From 084b859e3e40d37bad0f19302a84a2ed0a94f34d Mon Sep 17 00:00:00 2001 From: n4ze3m Date: Sun, 9 Feb 2025 13:00:04 +0530 Subject: [PATCH] feat: add option to remove reasoning tag from TTS output --- src/assets/locale/ar/settings.json | 3 ++ src/assets/locale/da/settings.json | 3 ++ src/assets/locale/de/settings.json | 3 ++ src/assets/locale/en/settings.json | 3 ++ src/assets/locale/es/settings.json | 6 ++-- src/assets/locale/fa/settings.json | 3 ++ src/assets/locale/fr/settings.json | 3 ++ src/assets/locale/it/settings.json | 3 ++ src/assets/locale/ja-JP/settings.json | 3 ++ src/assets/locale/ko/settings.json | 3 ++ src/assets/locale/ml/settings.json | 3 ++ src/assets/locale/no/settings.json | 3 ++ src/assets/locale/pt-BR/settings.json | 3 ++ src/assets/locale/ru/settings.json | 3 ++ src/assets/locale/sv/settings.json | 3 ++ src/assets/locale/uk/settings.json | 3 ++ src/assets/locale/zh/settings.json | 3 ++ src/components/Common/Playground/Message.tsx | 4 +-- src/components/Option/Settings/tts-mode.tsx | 15 ++++++++++ src/hooks/useTTS.tsx | 12 +++++++- src/services/tts.ts | 31 ++++++++++++++++---- 21 files changed, 106 insertions(+), 10 deletions(-) diff --git a/src/assets/locale/ar/settings.json b/src/assets/locale/ar/settings.json index a93eafd..10079f9 100644 --- a/src/assets/locale/ar/settings.json +++ b/src/assets/locale/ar/settings.json @@ -119,6 +119,9 @@ }, "ssmlEnabled": { "label": "تمكين SSML (لغة ترميز توليف الكلام)" + }, + "removeReasoningTagTTS": { + "label": "إزالة علامة التفكير من تحويل النص إلى كلام" } } }, diff --git a/src/assets/locale/da/settings.json b/src/assets/locale/da/settings.json index 74a2346..a790db0 100644 --- a/src/assets/locale/da/settings.json +++ b/src/assets/locale/da/settings.json @@ -113,6 +113,9 @@ }, "ssmlEnabled": { "label": "Aktiver SSML (Speech Synthesis Markup Language)" + }, + "removeReasoningTagTTS": { + "label": "Fjern Ræsonnement Tag fra TTS" } } }, diff --git a/src/assets/locale/de/settings.json b/src/assets/locale/de/settings.json index d8a106d..81e5ca0 100644 --- a/src/assets/locale/de/settings.json +++ b/src/assets/locale/de/settings.json @@ -116,6 +116,9 @@ }, "ssmlEnabled": { "label": "SSML (Speech Synthesis Markup Language) aktivieren" + }, + "removeReasoningTagTTS": { + "label": "Reasoning-Tag aus Text-zu-Sprache entfernen" } } }, diff --git a/src/assets/locale/en/settings.json b/src/assets/locale/en/settings.json index c7c75b7..4fe21cc 100644 --- a/src/assets/locale/en/settings.json +++ b/src/assets/locale/en/settings.json @@ -122,6 +122,9 @@ }, "responseSplitting": { "label": "Response Splitting" + }, + "removeReasoningTagTTS": { + "label": "Remove Reasoning Tag from TTS" } } }, diff --git a/src/assets/locale/es/settings.json b/src/assets/locale/es/settings.json index 4ff0ac6..a56037d 100644 --- a/src/assets/locale/es/settings.json +++ b/src/assets/locale/es/settings.json @@ -116,8 +116,10 @@ }, "ssmlEnabled": { "label": "Habilitar SSML (Speech Synthesis Markup Language)" - } - } + }, + "removeReasoningTagTTS": { + "label": "Eliminar Etiqueta de Razonamiento del TTS" + } } }, "manageModels": { "title": "Administar de Modelos", diff --git a/src/assets/locale/fa/settings.json b/src/assets/locale/fa/settings.json index a66db85..badea7a 100644 --- a/src/assets/locale/fa/settings.json +++ b/src/assets/locale/fa/settings.json @@ -113,6 +113,9 @@ }, "ssmlEnabled": { "label": "فعال کردن SSML (Speech Synthesis Markup Language)" + }, + "removeReasoningTagTTS": { + "label": "حذف برچسب استدلال از تبدیل متن به گفتار" } } }, diff --git a/src/assets/locale/fr/settings.json b/src/assets/locale/fr/settings.json index 8dcfcba..565b628 100644 --- a/src/assets/locale/fr/settings.json +++ b/src/assets/locale/fr/settings.json @@ -116,6 +116,9 @@ }, "ssmlEnabled": { "label": "Activer SSML (langage de balisage de synthèse vocale)" + }, + "removeReasoningTagTTS": { + "label": "Supprimer la balise de raisonnement de la synthèse vocale" } } }, diff --git a/src/assets/locale/it/settings.json b/src/assets/locale/it/settings.json index 7f1e71d..461fd40 100644 --- a/src/assets/locale/it/settings.json +++ b/src/assets/locale/it/settings.json @@ -116,6 +116,9 @@ }, "ssmlEnabled": { "label": "Abilita SSML (Speech Synthesis Markup Language)" + }, + "removeReasoningTagTTS": { + "label": "Rimuovi Tag di Ragionamento dal TTS" } } }, diff --git a/src/assets/locale/ja-JP/settings.json b/src/assets/locale/ja-JP/settings.json index 8c06300..ff1b8bc 100644 --- a/src/assets/locale/ja-JP/settings.json +++ b/src/assets/locale/ja-JP/settings.json @@ -119,6 +119,9 @@ }, "ssmlEnabled": { "label": "SSML (Speech Synthesis Markup Language) を有効にする" + }, + "removeReasoningTagTTS": { + "label": "テキスト読み上げから推論タグを削除" } } }, diff --git a/src/assets/locale/ko/settings.json b/src/assets/locale/ko/settings.json index f7950ea..a1eb539 100644 --- a/src/assets/locale/ko/settings.json +++ b/src/assets/locale/ko/settings.json @@ -119,6 +119,9 @@ }, "ssmlEnabled": { "label": "SSML (Speech Synthesis Markup Language) 활성화" + }, + "removeReasoningTagTTS": { + "label": "TTS에서 추론 태그 제거" } } }, diff --git a/src/assets/locale/ml/settings.json b/src/assets/locale/ml/settings.json index 13700f3..9b7fc44 100644 --- a/src/assets/locale/ml/settings.json +++ b/src/assets/locale/ml/settings.json @@ -119,6 +119,9 @@ }, "ssmlEnabled": { "label": "SSML (സ്പീച്ച് സിന്തസിസ് മാർക്കപ്പ് ലാംഗ്വേജ്) പ്രവർത്തനക്ഷമമാക്കുക" + }, + "removeReasoningTagTTS": { + "label": "ടിടിഎസിൽ നിന്ന് റീസണിംഗ് ടാഗ് നീക്കം ചെയ്യുക" } } }, diff --git a/src/assets/locale/no/settings.json b/src/assets/locale/no/settings.json index 4b9c66d..c7e0b1c 100644 --- a/src/assets/locale/no/settings.json +++ b/src/assets/locale/no/settings.json @@ -116,6 +116,9 @@ }, "ssmlEnabled": { "label": "Aktiver SSML (Speech Synthesis Markup Language)" + }, + "removeReasoningTagTTS": { + "label": "Fjern Resonneringsmerke fra TTS" } } }, diff --git a/src/assets/locale/pt-BR/settings.json b/src/assets/locale/pt-BR/settings.json index dc186ef..d2d087f 100644 --- a/src/assets/locale/pt-BR/settings.json +++ b/src/assets/locale/pt-BR/settings.json @@ -116,6 +116,9 @@ }, "ssmlEnabled": { "label": "Ativar SSML (Linguagem de Marcação de Síntese de Fala)" + }, + "removeReasoningTagTTS": { + "label": "Remover Tag de Raciocínio do TTS" } } }, diff --git a/src/assets/locale/ru/settings.json b/src/assets/locale/ru/settings.json index 80592e4..e8ad036 100644 --- a/src/assets/locale/ru/settings.json +++ b/src/assets/locale/ru/settings.json @@ -117,6 +117,9 @@ }, "ssmlEnabled": { "label": "Включить SSML (язык разметки синтеза речи)" + }, + "removeReasoningTagTTS": { + "label": "Удалить тег рассуждения из TTS" } } }, diff --git a/src/assets/locale/sv/settings.json b/src/assets/locale/sv/settings.json index 84f38dd..1cbee7f 100644 --- a/src/assets/locale/sv/settings.json +++ b/src/assets/locale/sv/settings.json @@ -116,6 +116,9 @@ }, "ssmlEnabled": { "label": "Aktivera SSML (Speech Synthesis Markup Language)" + }, + "removeReasoningTagTTS": { + "label": "Ta bort resonemangstagg från Text till Tal" } } }, diff --git a/src/assets/locale/uk/settings.json b/src/assets/locale/uk/settings.json index 3d937a8..e803248 100644 --- a/src/assets/locale/uk/settings.json +++ b/src/assets/locale/uk/settings.json @@ -116,6 +116,9 @@ }, "ssmlEnabled": { "label": "Ввімкнути SSML (Мова Розмітки для Синтезу Голосу)" + }, + "removeReasoningTagTTS": { + "label": "Видалити тег міркування з TTS" } } }, diff --git a/src/assets/locale/zh/settings.json b/src/assets/locale/zh/settings.json index ccaf4eb..f201179 100644 --- a/src/assets/locale/zh/settings.json +++ b/src/assets/locale/zh/settings.json @@ -119,6 +119,9 @@ }, "ssmlEnabled": { "label": "启用SSML(语音合成标记语言)" + }, + "removeReasoningTagTTS": { + "label": "从语音合成中移除推理标签" } } }, diff --git a/src/components/Common/Playground/Message.tsx b/src/components/Common/Playground/Message.tsx index 1bdd5c8..90fccb9 100644 --- a/src/components/Common/Playground/Message.tsx +++ b/src/components/Common/Playground/Message.tsx @@ -18,7 +18,7 @@ import { useTTS } from "@/hooks/useTTS" import { tagColors } from "@/utils/color" import { removeModelSuffix } from "@/db/models" import { GenerationInfo } from "./GenerationInfo" -import { parseReasoning, removeReasoning } from "@/libs/reasoning" +import { parseReasoning, } from "@/libs/reasoning" import { humanizeMilliseconds } from "@/utils/humanize-milliseconds" type Props = { message: string @@ -213,7 +213,7 @@ export const PlaygroundMessage = (props: Props) => { cancel() } else { speak({ - utterance: removeReasoning(props.message) + utterance: props.message }) } }} diff --git a/src/components/Option/Settings/tts-mode.tsx b/src/components/Option/Settings/tts-mode.tsx index 26c7bb8..75f0e07 100644 --- a/src/components/Option/Settings/tts-mode.tsx +++ b/src/components/Option/Settings/tts-mode.tsx @@ -17,6 +17,7 @@ export const TTSModeSettings = ({ hideBorder }: { hideBorder?: boolean }) => { ttsProvider: "", voice: "", ssmlEnabled: false, + removeReasoningTagTTS: true, elevenLabsApiKey: "", elevenLabsVoiceId: "", elevenLabsModel: "", @@ -209,6 +210,20 @@ export const TTSModeSettings = ({ hideBorder }: { hideBorder?: boolean }) => { +
+ + {t("generalSettings.tts.removeReasoningTagTTS.label")} + +
+ +
+
+
diff --git a/src/hooks/useTTS.tsx b/src/hooks/useTTS.tsx index 9ac8607..9c94b86 100644 --- a/src/hooks/useTTS.tsx +++ b/src/hooks/useTTS.tsx @@ -4,6 +4,7 @@ import { getElevenLabsApiKey, getElevenLabsModel, getElevenLabsVoiceId, + getRemoveReasoningTagTTS, getTTSProvider, getVoice, isSSMLEnabled @@ -11,6 +12,7 @@ import { import { markdownToSSML } from "@/utils/markdown-to-ssml" import { generateSpeech } from "@/services/elevenlabs" import { splitMessageContent } from "@/utils/tts" +import { removeReasoning } from "@/libs/reasoning" export interface VoiceOptions { utterance: string @@ -26,6 +28,11 @@ export const useTTS = () => { try { const voice = await getVoice() const provider = await getTTSProvider() + const isRemoveReasoning = await getRemoveReasoningTagTTS() + + if (isRemoveReasoning) { + utterance = removeReasoning(utterance) + } if (provider === "browser") { const isSSML = await isSSMLEnabled() @@ -115,7 +122,10 @@ export const useTTS = () => { return } - if (import.meta.env.BROWSER === "chrome" || import.meta.env.BROWSER === "edge") { + if ( + import.meta.env.BROWSER === "chrome" || + import.meta.env.BROWSER === "edge" + ) { chrome.tts.stop() } else { window.speechSynthesis.cancel() diff --git a/src/services/tts.ts b/src/services/tts.ts index 98b8a40..bd7cc26 100644 --- a/src/services/tts.ts +++ b/src/services/tts.ts @@ -1,6 +1,9 @@ import { Storage } from "@plasmohq/storage" const storage = new Storage() +const storage2 = new Storage({ + area: "local" +}) const DEFAULT_TTS_PROVIDER = "browser" @@ -98,10 +101,22 @@ export const getResponseSplitting = async () => { return data } +export const getRemoveReasoningTagTTS = async () => { + const data = await storage2.get("removeReasoningTagTTS") + if (!data || data.length === 0 || data === "") { + return true + } + return data === "true" +} + export const setResponseSplitting = async (responseSplitting: string) => { await storage.set("ttsResponseSplitting", responseSplitting) } +export const setRemoveReasoningTagTTS = async (removeReasoningTagTTS: boolean) => { + await storage2.set("removeReasoningTagTTS", removeReasoningTagTTS.toString()) +} + export const getTTSSettings = async () => { const [ ttsEnabled, @@ -112,7 +127,8 @@ export const getTTSSettings = async () => { elevenLabsApiKey, elevenLabsVoiceId, elevenLabsModel, - responseSplitting + responseSplitting, + removeReasoningTagTTS ] = await Promise.all([ isTTSEnabled(), getTTSProvider(), @@ -122,7 +138,8 @@ export const getTTSSettings = async () => { getElevenLabsApiKey(), getElevenLabsVoiceId(), getElevenLabsModel(), - getResponseSplitting() + getResponseSplitting(), + getRemoveReasoningTagTTS() ]) return { @@ -134,7 +151,8 @@ export const getTTSSettings = async () => { elevenLabsApiKey, elevenLabsVoiceId, elevenLabsModel, - responseSplitting + responseSplitting, + removeReasoningTagTTS } } @@ -146,7 +164,8 @@ export const setTTSSettings = async ({ elevenLabsApiKey, elevenLabsVoiceId, elevenLabsModel, - responseSplitting + responseSplitting, + removeReasoningTagTTS }: { ttsEnabled: boolean ttsProvider: string @@ -156,6 +175,7 @@ export const setTTSSettings = async ({ elevenLabsVoiceId: string elevenLabsModel: string responseSplitting: string + removeReasoningTagTTS: boolean }) => { await Promise.all([ setTTSEnabled(ttsEnabled), @@ -165,6 +185,7 @@ export const setTTSSettings = async ({ setElevenLabsApiKey(elevenLabsApiKey), setElevenLabsVoiceId(elevenLabsVoiceId), setElevenLabsModel(elevenLabsModel), - setResponseSplitting(responseSplitting) + setResponseSplitting(responseSplitting), + setRemoveReasoningTagTTS(removeReasoningTagTTS) ]) }