feat: Add ElevenLabs TTS provider and response splitting options
This commit is contained in:
@@ -1,44 +1,101 @@
|
||||
import { useEffect, useState } from "react"
|
||||
import { notification } from "antd"
|
||||
import { getVoice, isSSMLEnabled } from "@/services/tts"
|
||||
import {
|
||||
getElevenLabsApiKey,
|
||||
getElevenLabsModel,
|
||||
getElevenLabsVoiceId,
|
||||
getTTSProvider,
|
||||
getVoice,
|
||||
isSSMLEnabled
|
||||
} from "@/services/tts"
|
||||
import { markdownToSSML } from "@/utils/markdown-to-ssml"
|
||||
type VoiceOptions = {
|
||||
import { generateSpeech } from "@/services/elevenlabs"
|
||||
import { splitMessageContent } from "@/utils/tts"
|
||||
|
||||
export interface VoiceOptions {
|
||||
utterance: string
|
||||
}
|
||||
|
||||
export const useTTS = () => {
|
||||
const [isSpeaking, setIsSpeaking] = useState(false)
|
||||
const [audioElement, setAudioElement] = useState<HTMLAudioElement | null>(
|
||||
null
|
||||
)
|
||||
|
||||
const speak = async ({ utterance }: VoiceOptions) => {
|
||||
try {
|
||||
const voice = await getVoice()
|
||||
const isSSML = await isSSMLEnabled()
|
||||
if (isSSML) {
|
||||
utterance = markdownToSSML(utterance)
|
||||
}
|
||||
if (import.meta.env.BROWSER === "chrome") {
|
||||
chrome.tts.speak(utterance, {
|
||||
voiceName: voice,
|
||||
onEvent(event) {
|
||||
if (event.type === "start") {
|
||||
setIsSpeaking(true)
|
||||
} else if (event.type === "end") {
|
||||
setIsSpeaking(false)
|
||||
}
|
||||
}
|
||||
})
|
||||
} else {
|
||||
// browser tts
|
||||
window.speechSynthesis.speak(new SpeechSynthesisUtterance(utterance))
|
||||
window.speechSynthesis.onvoiceschanged = () => {
|
||||
const voices = window.speechSynthesis.getVoices()
|
||||
const voice = voices.find((v) => v.name === voice)
|
||||
const utter = new SpeechSynthesisUtterance(utterance)
|
||||
utter.voice = voice
|
||||
window.speechSynthesis.speak(utter)
|
||||
const provider = await getTTSProvider()
|
||||
|
||||
if (provider === "browser") {
|
||||
const isSSML = await isSSMLEnabled()
|
||||
if (isSSML) {
|
||||
utterance = markdownToSSML(utterance)
|
||||
}
|
||||
if (import.meta.env.BROWSER === "chrome") {
|
||||
chrome.tts.speak(utterance, {
|
||||
voiceName: voice,
|
||||
onEvent(event) {
|
||||
if (event.type === "start") {
|
||||
setIsSpeaking(true)
|
||||
} else if (event.type === "end") {
|
||||
setIsSpeaking(false)
|
||||
}
|
||||
}
|
||||
})
|
||||
} else {
|
||||
window.speechSynthesis.speak(new SpeechSynthesisUtterance(utterance))
|
||||
window.speechSynthesis.onvoiceschanged = () => {
|
||||
const voices = window.speechSynthesis.getVoices()
|
||||
const voice = voices.find((v) => v.name === voice)
|
||||
const utter = new SpeechSynthesisUtterance(utterance)
|
||||
utter.voice = voice
|
||||
window.speechSynthesis.speak(utter)
|
||||
}
|
||||
}
|
||||
} else if (provider === "elevenlabs") {
|
||||
const apiKey = await getElevenLabsApiKey()
|
||||
const modelId = await getElevenLabsModel()
|
||||
const voiceId = await getElevenLabsVoiceId()
|
||||
const sentences = splitMessageContent(utterance)
|
||||
let nextAudioData: ArrayBuffer | null = null
|
||||
if (!apiKey || !modelId || !voiceId) {
|
||||
throw new Error("Missing ElevenLabs configuration")
|
||||
}
|
||||
for (let i = 0; i < sentences.length; i++) {
|
||||
setIsSpeaking(true)
|
||||
|
||||
let currentAudioData =
|
||||
nextAudioData ||
|
||||
(await generateSpeech(apiKey, sentences[i], voiceId, modelId))
|
||||
|
||||
if (i < sentences.length - 1) {
|
||||
generateSpeech(apiKey, sentences[i + 1], voiceId, modelId)
|
||||
.then((nextAudioData) => {
|
||||
nextAudioData = nextAudioData
|
||||
})
|
||||
.catch(console.error)
|
||||
}
|
||||
|
||||
const blob = new Blob([currentAudioData], { type: "audio/mpeg" })
|
||||
const url = URL.createObjectURL(blob)
|
||||
const audio = new Audio(url)
|
||||
setAudioElement(audio)
|
||||
|
||||
await new Promise((resolve) => {
|
||||
audio.onended = resolve
|
||||
audio.play()
|
||||
})
|
||||
|
||||
URL.revokeObjectURL(url)
|
||||
}
|
||||
|
||||
setIsSpeaking(false)
|
||||
setAudioElement(null)
|
||||
}
|
||||
} catch (error) {
|
||||
setIsSpeaking(false)
|
||||
setAudioElement(null)
|
||||
notification.error({
|
||||
message: "Error",
|
||||
description: "Something went wrong while trying to play the audio"
|
||||
@@ -47,6 +104,14 @@ export const useTTS = () => {
|
||||
}
|
||||
|
||||
const cancel = () => {
|
||||
if (audioElement) {
|
||||
audioElement.pause()
|
||||
audioElement.currentTime = 0
|
||||
setAudioElement(null)
|
||||
setIsSpeaking(false)
|
||||
return
|
||||
}
|
||||
|
||||
if (import.meta.env.BROWSER === "chrome") {
|
||||
chrome.tts.stop()
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user