feat: Add ElevenLabs TTS provider and response splitting options
This commit is contained in:
parent
aa49f03f63
commit
3ddb7f1ad8
@ -113,6 +113,9 @@
|
|||||||
},
|
},
|
||||||
"ssmlEnabled": {
|
"ssmlEnabled": {
|
||||||
"label": "Enable SSML (Speech Synthesis Markup Language)"
|
"label": "Enable SSML (Speech Synthesis Markup Language)"
|
||||||
|
},
|
||||||
|
"responseSplitting": {
|
||||||
|
"label": "Response Splitting"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
import { SaveButton } from "@/components/Common/SaveButton"
|
import { SaveButton } from "@/components/Common/SaveButton"
|
||||||
|
import { getModels, getVoices } from "@/services/elevenlabs"
|
||||||
import { getTTSSettings, setTTSSettings } from "@/services/tts"
|
import { getTTSSettings, setTTSSettings } from "@/services/tts"
|
||||||
import { useWebUI } from "@/store/webui"
|
import { useWebUI } from "@/store/webui"
|
||||||
import { useForm } from "@mantine/form"
|
import { useForm } from "@mantine/form"
|
||||||
import { useQuery } from "@tanstack/react-query"
|
import { useQuery } from "@tanstack/react-query"
|
||||||
import { Select, Skeleton, Switch } from "antd"
|
import { Input, message, Select, Skeleton, Switch } from "antd"
|
||||||
import { useTranslation } from "react-i18next"
|
import { useTranslation } from "react-i18next"
|
||||||
|
|
||||||
export const TTSModeSettings = ({ hideBorder }: { hideBorder?: boolean }) => {
|
export const TTSModeSettings = ({ hideBorder }: { hideBorder?: boolean }) => {
|
||||||
@ -15,7 +16,11 @@ export const TTSModeSettings = ({ hideBorder }: { hideBorder?: boolean }) => {
|
|||||||
ttsEnabled: false,
|
ttsEnabled: false,
|
||||||
ttsProvider: "",
|
ttsProvider: "",
|
||||||
voice: "",
|
voice: "",
|
||||||
ssmlEnabled: false
|
ssmlEnabled: false,
|
||||||
|
elevenLabsApiKey: "",
|
||||||
|
elevenLabsVoiceId: "",
|
||||||
|
elevenLabsModel: "",
|
||||||
|
responseSplitting: ""
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -28,6 +33,27 @@ export const TTSModeSettings = ({ hideBorder }: { hideBorder?: boolean }) => {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
const { data: elevenLabsData } = useQuery({
|
||||||
|
queryKey: ["fetchElevenLabsData", form.values.elevenLabsApiKey],
|
||||||
|
queryFn: async () => {
|
||||||
|
try {
|
||||||
|
if (
|
||||||
|
form.values.ttsProvider === "elevenlabs" &&
|
||||||
|
form.values.elevenLabsApiKey
|
||||||
|
) {
|
||||||
|
const voices = await getVoices(form.values.elevenLabsApiKey)
|
||||||
|
const models = await getModels(form.values.elevenLabsApiKey)
|
||||||
|
return { voices, models }
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e)
|
||||||
|
message.error("Error fetching ElevenLabs data")
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
},
|
||||||
|
enabled:
|
||||||
|
form.values.ttsProvider === "elevenlabs" && !!form.values.elevenLabsApiKey
|
||||||
|
})
|
||||||
if (status === "pending" || status === "error") {
|
if (status === "pending" || status === "error") {
|
||||||
return <Skeleton active />
|
return <Skeleton active />
|
||||||
}
|
}
|
||||||
@ -72,29 +98,103 @@ export const TTSModeSettings = ({ hideBorder }: { hideBorder?: boolean }) => {
|
|||||||
<Select
|
<Select
|
||||||
placeholder={t("generalSettings.tts.ttsProvider.placeholder")}
|
placeholder={t("generalSettings.tts.ttsProvider.placeholder")}
|
||||||
className="w-full mt-4 sm:mt-0 sm:w-[200px]"
|
className="w-full mt-4 sm:mt-0 sm:w-[200px]"
|
||||||
options={[{ label: "Browser TTS", value: "browser" }]}
|
options={[
|
||||||
|
{ label: "Browser TTS", value: "browser" },
|
||||||
|
{
|
||||||
|
label: "ElevenLabs",
|
||||||
|
value: "elevenlabs"
|
||||||
|
}
|
||||||
|
]}
|
||||||
{...form.getInputProps("ttsProvider")}
|
{...form.getInputProps("ttsProvider")}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex sm:flex-row flex-col space-y-4 sm:space-y-0 sm:justify-between">
|
{form.values.ttsProvider === "browser" && (
|
||||||
<span className="text-gray-700 dark:text-neutral-50 ">
|
<div className="flex sm:flex-row flex-col space-y-4 sm:space-y-0 sm:justify-between">
|
||||||
{t("generalSettings.tts.ttsVoice.label")}
|
<span className="text-gray-700 dark:text-neutral-50 ">
|
||||||
</span>
|
{t("generalSettings.tts.ttsVoice.label")}
|
||||||
<div>
|
</span>
|
||||||
<Select
|
<div>
|
||||||
placeholder={t("generalSettings.tts.ttsVoice.placeholder")}
|
<Select
|
||||||
className="w-full mt-4 sm:mt-0 sm:w-[200px]"
|
placeholder={t("generalSettings.tts.ttsVoice.placeholder")}
|
||||||
options={data?.browserTTSVoices?.map(
|
className="w-full mt-4 sm:mt-0 sm:w-[200px]"
|
||||||
(voice) => ({
|
options={data?.browserTTSVoices?.map((voice) => ({
|
||||||
label: `${voice.voiceName} - ${voice.lang}`.trim(),
|
label: `${voice.voiceName} - ${voice.lang}`.trim(),
|
||||||
value: voice.voiceName
|
value: voice.voiceName
|
||||||
})
|
}))}
|
||||||
)}
|
{...form.getInputProps("voice")}
|
||||||
{...form.getInputProps("voice")}
|
/>
|
||||||
/>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
)}
|
||||||
|
{form.values.ttsProvider === "elevenlabs" && (
|
||||||
|
<>
|
||||||
|
<div className="flex sm:flex-row flex-col space-y-4 sm:space-y-0 sm:justify-between">
|
||||||
|
<span className="text-gray-700 dark:text-neutral-50">
|
||||||
|
API Key
|
||||||
|
</span>
|
||||||
|
<Input.Password
|
||||||
|
placeholder="sk_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||||
|
className=" mt-4 sm:mt-0 !w-[300px] sm:w-[200px]"
|
||||||
|
required
|
||||||
|
{...form.getInputProps("elevenLabsApiKey")}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{elevenLabsData && (
|
||||||
|
<>
|
||||||
|
<div className="flex sm:flex-row flex-col space-y-4 sm:space-y-0 sm:justify-between">
|
||||||
|
<span className="text-gray-700 dark:text-neutral-50">
|
||||||
|
TTS Voice
|
||||||
|
</span>
|
||||||
|
<Select
|
||||||
|
options={elevenLabsData.voices.map((v) => ({
|
||||||
|
label: v.name,
|
||||||
|
value: v.voice_id
|
||||||
|
}))}
|
||||||
|
className="w-full mt-4 sm:mt-0 sm:w-[200px]"
|
||||||
|
placeholder="Select a voice"
|
||||||
|
{...form.getInputProps("elevenLabsVoiceId")}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="flex sm:flex-row flex-col space-y-4 sm:space-y-0 sm:justify-between">
|
||||||
|
<span className="text-gray-700 dark:text-neutral-50">
|
||||||
|
TTS Model
|
||||||
|
</span>
|
||||||
|
<Select
|
||||||
|
className="w-full mt-4 sm:mt-0 sm:w-[200px]"
|
||||||
|
placeholder="Select a model"
|
||||||
|
options={elevenLabsData.models.map((m) => ({
|
||||||
|
label: m.name,
|
||||||
|
value: m.model_id
|
||||||
|
}))}
|
||||||
|
{...form.getInputProps("elevenLabsModel")}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="flex sm:flex-row flex-col space-y-4 sm:space-y-0 sm:justify-between">
|
||||||
|
<span className="text-gray-700 dark:text-neutral-50 ">
|
||||||
|
{t("generalSettings.tts.responseSplitting.label")}
|
||||||
|
</span>
|
||||||
|
<div>
|
||||||
|
<Select
|
||||||
|
placeholder={t(
|
||||||
|
"generalSettings.tts.responseSplitting.placeholder"
|
||||||
|
)}
|
||||||
|
className="w-full mt-4 sm:mt-0 sm:w-[200px]"
|
||||||
|
options={[
|
||||||
|
{ label: "None", value: "none" },
|
||||||
|
{ label: "Punctuation", value: "punctuation" },
|
||||||
|
{ label: "Paragraph", value: "paragraph" }
|
||||||
|
]}
|
||||||
|
{...form.getInputProps("responseSplitting")}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
<div className="flex sm:flex-row flex-col space-y-4 sm:space-y-0 sm:justify-between">
|
<div className="flex sm:flex-row flex-col space-y-4 sm:space-y-0 sm:justify-between">
|
||||||
<span className="text-gray-700 dark:text-neutral-50 ">
|
<span className="text-gray-700 dark:text-neutral-50 ">
|
||||||
{t("generalSettings.tts.ssmlEnabled.label")}
|
{t("generalSettings.tts.ssmlEnabled.label")}
|
||||||
|
@ -1,44 +1,101 @@
|
|||||||
import { useEffect, useState } from "react"
|
import { useEffect, useState } from "react"
|
||||||
import { notification } from "antd"
|
import { notification } from "antd"
|
||||||
import { getVoice, isSSMLEnabled } from "@/services/tts"
|
import {
|
||||||
|
getElevenLabsApiKey,
|
||||||
|
getElevenLabsModel,
|
||||||
|
getElevenLabsVoiceId,
|
||||||
|
getTTSProvider,
|
||||||
|
getVoice,
|
||||||
|
isSSMLEnabled
|
||||||
|
} from "@/services/tts"
|
||||||
import { markdownToSSML } from "@/utils/markdown-to-ssml"
|
import { markdownToSSML } from "@/utils/markdown-to-ssml"
|
||||||
type VoiceOptions = {
|
import { generateSpeech } from "@/services/elevenlabs"
|
||||||
|
import { splitMessageContent } from "@/utils/tts"
|
||||||
|
|
||||||
|
export interface VoiceOptions {
|
||||||
utterance: string
|
utterance: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export const useTTS = () => {
|
export const useTTS = () => {
|
||||||
const [isSpeaking, setIsSpeaking] = useState(false)
|
const [isSpeaking, setIsSpeaking] = useState(false)
|
||||||
|
const [audioElement, setAudioElement] = useState<HTMLAudioElement | null>(
|
||||||
|
null
|
||||||
|
)
|
||||||
|
|
||||||
const speak = async ({ utterance }: VoiceOptions) => {
|
const speak = async ({ utterance }: VoiceOptions) => {
|
||||||
try {
|
try {
|
||||||
const voice = await getVoice()
|
const voice = await getVoice()
|
||||||
const isSSML = await isSSMLEnabled()
|
const provider = await getTTSProvider()
|
||||||
if (isSSML) {
|
|
||||||
utterance = markdownToSSML(utterance)
|
if (provider === "browser") {
|
||||||
}
|
const isSSML = await isSSMLEnabled()
|
||||||
if (import.meta.env.BROWSER === "chrome") {
|
if (isSSML) {
|
||||||
chrome.tts.speak(utterance, {
|
utterance = markdownToSSML(utterance)
|
||||||
voiceName: voice,
|
|
||||||
onEvent(event) {
|
|
||||||
if (event.type === "start") {
|
|
||||||
setIsSpeaking(true)
|
|
||||||
} else if (event.type === "end") {
|
|
||||||
setIsSpeaking(false)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
// browser tts
|
|
||||||
window.speechSynthesis.speak(new SpeechSynthesisUtterance(utterance))
|
|
||||||
window.speechSynthesis.onvoiceschanged = () => {
|
|
||||||
const voices = window.speechSynthesis.getVoices()
|
|
||||||
const voice = voices.find((v) => v.name === voice)
|
|
||||||
const utter = new SpeechSynthesisUtterance(utterance)
|
|
||||||
utter.voice = voice
|
|
||||||
window.speechSynthesis.speak(utter)
|
|
||||||
}
|
}
|
||||||
|
if (import.meta.env.BROWSER === "chrome") {
|
||||||
|
chrome.tts.speak(utterance, {
|
||||||
|
voiceName: voice,
|
||||||
|
onEvent(event) {
|
||||||
|
if (event.type === "start") {
|
||||||
|
setIsSpeaking(true)
|
||||||
|
} else if (event.type === "end") {
|
||||||
|
setIsSpeaking(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
window.speechSynthesis.speak(new SpeechSynthesisUtterance(utterance))
|
||||||
|
window.speechSynthesis.onvoiceschanged = () => {
|
||||||
|
const voices = window.speechSynthesis.getVoices()
|
||||||
|
const voice = voices.find((v) => v.name === voice)
|
||||||
|
const utter = new SpeechSynthesisUtterance(utterance)
|
||||||
|
utter.voice = voice
|
||||||
|
window.speechSynthesis.speak(utter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (provider === "elevenlabs") {
|
||||||
|
const apiKey = await getElevenLabsApiKey()
|
||||||
|
const modelId = await getElevenLabsModel()
|
||||||
|
const voiceId = await getElevenLabsVoiceId()
|
||||||
|
const sentences = splitMessageContent(utterance)
|
||||||
|
let nextAudioData: ArrayBuffer | null = null
|
||||||
|
if (!apiKey || !modelId || !voiceId) {
|
||||||
|
throw new Error("Missing ElevenLabs configuration")
|
||||||
|
}
|
||||||
|
for (let i = 0; i < sentences.length; i++) {
|
||||||
|
setIsSpeaking(true)
|
||||||
|
|
||||||
|
let currentAudioData =
|
||||||
|
nextAudioData ||
|
||||||
|
(await generateSpeech(apiKey, sentences[i], voiceId, modelId))
|
||||||
|
|
||||||
|
if (i < sentences.length - 1) {
|
||||||
|
generateSpeech(apiKey, sentences[i + 1], voiceId, modelId)
|
||||||
|
.then((nextAudioData) => {
|
||||||
|
nextAudioData = nextAudioData
|
||||||
|
})
|
||||||
|
.catch(console.error)
|
||||||
|
}
|
||||||
|
|
||||||
|
const blob = new Blob([currentAudioData], { type: "audio/mpeg" })
|
||||||
|
const url = URL.createObjectURL(blob)
|
||||||
|
const audio = new Audio(url)
|
||||||
|
setAudioElement(audio)
|
||||||
|
|
||||||
|
await new Promise((resolve) => {
|
||||||
|
audio.onended = resolve
|
||||||
|
audio.play()
|
||||||
|
})
|
||||||
|
|
||||||
|
URL.revokeObjectURL(url)
|
||||||
|
}
|
||||||
|
|
||||||
|
setIsSpeaking(false)
|
||||||
|
setAudioElement(null)
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
setIsSpeaking(false)
|
||||||
|
setAudioElement(null)
|
||||||
notification.error({
|
notification.error({
|
||||||
message: "Error",
|
message: "Error",
|
||||||
description: "Something went wrong while trying to play the audio"
|
description: "Something went wrong while trying to play the audio"
|
||||||
@ -47,6 +104,14 @@ export const useTTS = () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const cancel = () => {
|
const cancel = () => {
|
||||||
|
if (audioElement) {
|
||||||
|
audioElement.pause()
|
||||||
|
audioElement.currentTime = 0
|
||||||
|
setAudioElement(null)
|
||||||
|
setIsSpeaking(false)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if (import.meta.env.BROWSER === "chrome") {
|
if (import.meta.env.BROWSER === "chrome") {
|
||||||
chrome.tts.stop()
|
chrome.tts.stop()
|
||||||
} else {
|
} else {
|
||||||
|
49
src/services/elevenlabs.ts
Normal file
49
src/services/elevenlabs.ts
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import axios from 'axios';
|
||||||
|
export interface Voice {
|
||||||
|
voice_id: string;
|
||||||
|
name: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Model {
|
||||||
|
model_id: string;
|
||||||
|
name: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const BASE_URL = 'https://api.elevenlabs.io/v1';
|
||||||
|
|
||||||
|
export const getVoices = async (apiKey: string): Promise<Voice[]> => {
|
||||||
|
const response = await axios.get(`${BASE_URL}/voices`, {
|
||||||
|
headers: { 'xi-api-key': apiKey }
|
||||||
|
});
|
||||||
|
return response.data.voices;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const getModels = async (apiKey: string): Promise<Model[]> => {
|
||||||
|
const response = await axios.get(`${BASE_URL}/models`, {
|
||||||
|
headers: { 'xi-api-key': apiKey }
|
||||||
|
});
|
||||||
|
return response.data;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const generateSpeech = async (
|
||||||
|
apiKey: string,
|
||||||
|
text: string,
|
||||||
|
voiceId: string,
|
||||||
|
modelId: string
|
||||||
|
): Promise<ArrayBuffer> => {
|
||||||
|
const response = await axios.post(
|
||||||
|
`${BASE_URL}/text-to-speech/${voiceId}`,
|
||||||
|
{
|
||||||
|
text,
|
||||||
|
model_id: modelId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'xi-api-key': apiKey,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
responseType: 'arraybuffer',
|
||||||
|
}
|
||||||
|
);
|
||||||
|
return response.data;
|
||||||
|
};
|
@ -4,7 +4,7 @@ const storage = new Storage()
|
|||||||
|
|
||||||
const DEFAULT_TTS_PROVIDER = "browser"
|
const DEFAULT_TTS_PROVIDER = "browser"
|
||||||
|
|
||||||
const AVAILABLE_TTS_PROVIDERS = ["browser"] as const
|
const AVAILABLE_TTS_PROVIDERS = ["browser", "elevenlabs"] as const
|
||||||
|
|
||||||
export const getTTSProvider = async (): Promise<
|
export const getTTSProvider = async (): Promise<
|
||||||
(typeof AVAILABLE_TTS_PROVIDERS)[number]
|
(typeof AVAILABLE_TTS_PROVIDERS)[number]
|
||||||
@ -63,22 +63,78 @@ export const setSSMLEnabled = async (isSSMLEnabled: boolean) => {
|
|||||||
await storage.set("isSSMLEnabled", isSSMLEnabled.toString())
|
await storage.set("isSSMLEnabled", isSSMLEnabled.toString())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const getElevenLabsApiKey = async () => {
|
||||||
|
const data = await storage.get("elevenLabsApiKey")
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
export const setElevenLabsApiKey = async (elevenLabsApiKey: string) => {
|
||||||
|
await storage.set("elevenLabsApiKey", elevenLabsApiKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
export const getElevenLabsVoiceId = async () => {
|
||||||
|
const data = await storage.get("elevenLabsVoiceId")
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
export const setElevenLabsVoiceId = async (elevenLabsVoiceId: string) => {
|
||||||
|
await storage.set("elevenLabsVoiceId", elevenLabsVoiceId)
|
||||||
|
}
|
||||||
|
|
||||||
|
export const getElevenLabsModel = async () => {
|
||||||
|
const data = await storage.get("elevenLabsModel")
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
export const setElevenLabsModel = async (elevenLabsModel: string) => {
|
||||||
|
await storage.set("elevenLabsModel", elevenLabsModel)
|
||||||
|
}
|
||||||
|
|
||||||
|
export const getResponseSplitting = async () => {
|
||||||
|
const data = await storage.get("ttsResponseSplitting")
|
||||||
|
if (!data || data.length === 0 || data === "") {
|
||||||
|
return "punctuation"
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
export const setResponseSplitting = async (responseSplitting: string) => {
|
||||||
|
await storage.set("ttsResponseSplitting", responseSplitting)
|
||||||
|
}
|
||||||
|
|
||||||
export const getTTSSettings = async () => {
|
export const getTTSSettings = async () => {
|
||||||
const [ttsEnabled, ttsProvider, browserTTSVoices, voice, ssmlEnabled] =
|
const [
|
||||||
await Promise.all([
|
ttsEnabled,
|
||||||
isTTSEnabled(),
|
ttsProvider,
|
||||||
getTTSProvider(),
|
browserTTSVoices,
|
||||||
getBrowserTTSVoices(),
|
voice,
|
||||||
getVoice(),
|
ssmlEnabled,
|
||||||
isSSMLEnabled()
|
elevenLabsApiKey,
|
||||||
])
|
elevenLabsVoiceId,
|
||||||
|
elevenLabsModel,
|
||||||
|
responseSplitting
|
||||||
|
] = await Promise.all([
|
||||||
|
isTTSEnabled(),
|
||||||
|
getTTSProvider(),
|
||||||
|
getBrowserTTSVoices(),
|
||||||
|
getVoice(),
|
||||||
|
isSSMLEnabled(),
|
||||||
|
getElevenLabsApiKey(),
|
||||||
|
getElevenLabsVoiceId(),
|
||||||
|
getElevenLabsModel(),
|
||||||
|
getResponseSplitting()
|
||||||
|
])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
ttsEnabled,
|
ttsEnabled,
|
||||||
ttsProvider,
|
ttsProvider,
|
||||||
browserTTSVoices,
|
browserTTSVoices,
|
||||||
voice,
|
voice,
|
||||||
ssmlEnabled
|
ssmlEnabled,
|
||||||
|
elevenLabsApiKey,
|
||||||
|
elevenLabsVoiceId,
|
||||||
|
elevenLabsModel,
|
||||||
|
responseSplitting
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,17 +142,29 @@ export const setTTSSettings = async ({
|
|||||||
ttsEnabled,
|
ttsEnabled,
|
||||||
ttsProvider,
|
ttsProvider,
|
||||||
voice,
|
voice,
|
||||||
ssmlEnabled
|
ssmlEnabled,
|
||||||
|
elevenLabsApiKey,
|
||||||
|
elevenLabsVoiceId,
|
||||||
|
elevenLabsModel,
|
||||||
|
responseSplitting
|
||||||
}: {
|
}: {
|
||||||
ttsEnabled: boolean
|
ttsEnabled: boolean
|
||||||
ttsProvider: string
|
ttsProvider: string
|
||||||
voice: string
|
voice: string
|
||||||
ssmlEnabled: boolean
|
ssmlEnabled: boolean
|
||||||
|
elevenLabsApiKey: string
|
||||||
|
elevenLabsVoiceId: string
|
||||||
|
elevenLabsModel: string
|
||||||
|
responseSplitting: string
|
||||||
}) => {
|
}) => {
|
||||||
await Promise.all([
|
await Promise.all([
|
||||||
setTTSEnabled(ttsEnabled),
|
setTTSEnabled(ttsEnabled),
|
||||||
setTTSProvider(ttsProvider),
|
setTTSProvider(ttsProvider),
|
||||||
setVoice(voice),
|
setVoice(voice),
|
||||||
setSSMLEnabled(ssmlEnabled)
|
setSSMLEnabled(ssmlEnabled),
|
||||||
|
setElevenLabsApiKey(elevenLabsApiKey),
|
||||||
|
setElevenLabsVoiceId(elevenLabsVoiceId),
|
||||||
|
setElevenLabsModel(elevenLabsModel),
|
||||||
|
setResponseSplitting(responseSplitting)
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
112
src/utils/tts.ts
Normal file
112
src/utils/tts.ts
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
// inspired from https://github.com/open-webui/open-webui/blob/2299f4843003759290cc6bf823595c6578ee4470/src/lib/utils/index.ts
|
||||||
|
|
||||||
|
const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
|
||||||
|
|
||||||
|
export const sanitizeEmojis = (text: string): string => {
|
||||||
|
const EMOJI_PATTERN = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
|
||||||
|
return text.replace(EMOJI_PATTERN, '');
|
||||||
|
};
|
||||||
|
|
||||||
|
export const sanitizeMarkdown = (text: string): string => {
|
||||||
|
return text
|
||||||
|
.replace(/(```[\s\S]*?```)/g, '')
|
||||||
|
.replace(/^\|.*\|$/gm, '')
|
||||||
|
.replace(/(?:\*\*|__)(.*?)(?:\*\*|__)/g, '$1')
|
||||||
|
.replace(/(?:[*_])(.*?)(?:[*_])/g, '$1')
|
||||||
|
.replace(/~~(.*?)~~/g, '$1')
|
||||||
|
.replace(/`([^`]+)`/g, '$1')
|
||||||
|
.replace(/!?\[([^\]]*)\](?:\([^)]+\)|\[[^\]]*\])/g, '$1')
|
||||||
|
.replace(/^\[[^\]]+\]:\s*.*$/gm, '')
|
||||||
|
.replace(/^#{1,6}\s+/gm, '')
|
||||||
|
.replace(/^\s*[-*+]\s+/gm, '')
|
||||||
|
.replace(/^\s*(?:\d+\.)\s+/gm, '')
|
||||||
|
.replace(/^\s*>[> ]*/gm, '')
|
||||||
|
.replace(/^\s*:\s+/gm, '')
|
||||||
|
.replace(/\[\^[^\]]*\]/g, '')
|
||||||
|
.replace(/[-*_~]/g, '')
|
||||||
|
.replace(/\n{2,}/g, '\n');
|
||||||
|
};
|
||||||
|
|
||||||
|
export const sanitizeText = (content: string): string => {
|
||||||
|
return sanitizeMarkdown(sanitizeEmojis(content.trim()));
|
||||||
|
};
|
||||||
|
|
||||||
|
export const parseTextIntoSentences = (text: string): string[] => {
|
||||||
|
const codeBlocks: string[] = [];
|
||||||
|
let blockIndex = 0;
|
||||||
|
|
||||||
|
const processedText = text.replace(CODE_BLOCK_PATTERN, (match) => {
|
||||||
|
const placeholder = `\u0000${blockIndex}\u0000`;
|
||||||
|
codeBlocks[blockIndex++] = match;
|
||||||
|
return placeholder;
|
||||||
|
});
|
||||||
|
|
||||||
|
const sentences = processedText.split(/(?<=[.!?])\s+/);
|
||||||
|
|
||||||
|
return sentences
|
||||||
|
.map(sentence =>
|
||||||
|
sentence.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx])
|
||||||
|
)
|
||||||
|
.map(sanitizeText)
|
||||||
|
.filter(Boolean);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const parseTextIntoParagraphs = (text: string): string[] => {
|
||||||
|
const codeBlocks: string[] = [];
|
||||||
|
let blockIndex = 0;
|
||||||
|
|
||||||
|
const processedText = text.replace(CODE_BLOCK_PATTERN, (match) => {
|
||||||
|
const placeholder = `\u0000${blockIndex}\u0000`;
|
||||||
|
codeBlocks[blockIndex++] = match;
|
||||||
|
return placeholder;
|
||||||
|
});
|
||||||
|
|
||||||
|
return processedText
|
||||||
|
.split(/\n+/)
|
||||||
|
.map(paragraph =>
|
||||||
|
paragraph.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx])
|
||||||
|
)
|
||||||
|
.map(sanitizeText)
|
||||||
|
.filter(Boolean);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const optimizeSentencesForSpeech = (text: string): string[] => {
|
||||||
|
return parseTextIntoSentences(text).reduce((optimizedTexts, currentText) => {
|
||||||
|
const lastIndex = optimizedTexts.length - 1;
|
||||||
|
|
||||||
|
if (lastIndex >= 0) {
|
||||||
|
const previousText = optimizedTexts[lastIndex];
|
||||||
|
const wordCount = previousText.split(/\s+/).length;
|
||||||
|
const charCount = previousText.length;
|
||||||
|
|
||||||
|
if (wordCount < 4 || charCount < 50) {
|
||||||
|
optimizedTexts[lastIndex] = `${previousText} ${currentText}`;
|
||||||
|
} else {
|
||||||
|
optimizedTexts.push(currentText);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
optimizedTexts.push(currentText);
|
||||||
|
}
|
||||||
|
|
||||||
|
return optimizedTexts;
|
||||||
|
}, [] as string[]);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const splitMessageContent = (content: string, splitBy: string = 'punctuation') => {
|
||||||
|
const messageContentParts: string[] = [];
|
||||||
|
|
||||||
|
switch (splitBy) {
|
||||||
|
case 'punctuation':
|
||||||
|
messageContentParts.push(...optimizeSentencesForSpeech(content));
|
||||||
|
break;
|
||||||
|
case 'paragraph':
|
||||||
|
messageContentParts.push(...parseTextIntoParagraphs(content));
|
||||||
|
break;
|
||||||
|
case 'none':
|
||||||
|
messageContentParts.push(sanitizeText(content));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
return messageContentParts;
|
||||||
|
};
|
@ -51,7 +51,7 @@ export default defineConfig({
|
|||||||
outDir: "build",
|
outDir: "build",
|
||||||
|
|
||||||
manifest: {
|
manifest: {
|
||||||
version: "1.3.10",
|
version: "1.4.0",
|
||||||
name:
|
name:
|
||||||
process.env.TARGET === "firefox"
|
process.env.TARGET === "firefox"
|
||||||
? "Page Assist - A Web UI for Local AI Models"
|
? "Page Assist - A Web UI for Local AI Models"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user