feat: Add react-speech-recognition for speech-to-text functionality in SidepanelForm

This commit is contained in:
n4ze3m 2024-05-25 11:39:05 +05:30
parent 014565a14e
commit f9f48109c2
4 changed files with 90 additions and 51 deletions

BIN
bun.lockb

Binary file not shown.

View File

@ -46,6 +46,7 @@
"react-i18next": "^14.1.0", "react-i18next": "^14.1.0",
"react-markdown": "8.0.0", "react-markdown": "8.0.0",
"react-router-dom": "6.10.0", "react-router-dom": "6.10.0",
"react-speech-recognition": "^3.10.0",
"react-syntax-highlighter": "^15.5.0", "react-syntax-highlighter": "^15.5.0",
"react-toastify": "^10.0.4", "react-toastify": "^10.0.4",
"rehype-mathjax": "4.0.3", "rehype-mathjax": "4.0.3",
@ -64,6 +65,7 @@
"@types/pubsub-js": "^1.8.6", "@types/pubsub-js": "^1.8.6",
"@types/react": "18.2.48", "@types/react": "18.2.48",
"@types/react-dom": "18.2.18", "@types/react-dom": "18.2.18",
"@types/react-speech-recognition": "^3.9.5",
"@types/react-syntax-highlighter": "^15.5.11", "@types/react-syntax-highlighter": "^15.5.11",
"@types/turndown": "^5.0.4", "@types/turndown": "^5.0.4",
"autoprefixer": "^10.4.17", "autoprefixer": "^10.4.17",

View File

@ -6,14 +6,14 @@ import { toBase64 } from "~/libs/to-base64"
import { useMessageOption } from "~/hooks/useMessageOption" import { useMessageOption } from "~/hooks/useMessageOption"
import { Checkbox, Dropdown, Select, Switch, Tooltip } from "antd" import { Checkbox, Dropdown, Select, Switch, Tooltip } from "antd"
import { Image } from "antd" import { Image } from "antd"
import { useSpeechRecognition } from "~/hooks/useSpeechRecognition"
import { useWebUI } from "~/store/webui" import { useWebUI } from "~/store/webui"
import { defaultEmbeddingModelForRag } from "~/services/ollama" import { defaultEmbeddingModelForRag } from "~/services/ollama"
import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react" import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react"
import { getVariable } from "~/utils/select-varaible" import { getVariable } from "~/utils/select-varaible"
import { useTranslation } from "react-i18next" import { useTranslation } from "react-i18next"
import { KnowledgeSelect } from "../Knowledge/KnowledgeSelect" import { KnowledgeSelect } from "../Knowledge/KnowledgeSelect"
import { SelectedKnowledge } from "../Knowledge/SelectedKnwledge" import { useSpeechRecognition } from "react-speech-recognition"
import SpeechRecognition from "react-speech-recognition"
type Props = { type Props = {
dropedFile: File | undefined dropedFile: File | undefined
@ -84,7 +84,13 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
useDynamicTextareaSize(textareaRef, form.values.message, 300) useDynamicTextareaSize(textareaRef, form.values.message, 300)
const { isListening, start, stop, transcript } = useSpeechRecognition() const {
transcript,
listening: isListening,
resetTranscript,
browserSupportsSpeechRecognition
} = useSpeechRecognition()
const { sendWhenEnter, setSendWhenEnter } = useWebUI() const { sendWhenEnter, setSendWhenEnter } = useWebUI()
React.useEffect(() => { React.useEffect(() => {
@ -135,6 +141,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
sendWhenEnter sendWhenEnter
) { ) {
e.preventDefault() e.preventDefault()
stopListening()
form.onSubmit(async (value) => { form.onSubmit(async (value) => {
if (value.message.trim().length === 0) { if (value.message.trim().length === 0) {
return return
@ -159,6 +166,13 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
})() })()
} }
} }
const stopListening = async () => {
if (isListening) {
SpeechRecognition.stopListening()
}
}
return ( return (
<div className="px-3 pt-3 md:px-6 md:pt-6 bg-gray-50 dark:bg-[#262626] border rounded-t-xl dark:border-gray-600"> <div className="px-3 pt-3 md:px-6 md:pt-6 bg-gray-50 dark:bg-[#262626] border rounded-t-xl dark:border-gray-600">
<div <div
@ -186,6 +200,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
<div className="flex bg-white dark:bg-transparent"> <div className="flex bg-white dark:bg-transparent">
<form <form
onSubmit={form.onSubmit(async (value) => { onSubmit={form.onSubmit(async (value) => {
stopListening()
if (!selectedModel || selectedModel.length === 0) { if (!selectedModel || selectedModel.length === 0) {
form.setFieldError("message", t("formError.noModel")) form.setFieldError("message", t("formError.noModel"))
return return
@ -260,30 +275,33 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
</div> </div>
<div className="flex !justify-end gap-3"> <div className="flex !justify-end gap-3">
<KnowledgeSelect /> <KnowledgeSelect />
<Tooltip title={t("tooltip.speechToText")}> {browserSupportsSpeechRecognition && (
<button <Tooltip title={t("tooltip.speechToText")}>
type="button" <button
onClick={() => { type="button"
if (isListening) { onClick={async () => {
stop() if (isListening) {
} else { SpeechRecognition.stopListening()
start({ } else {
lang: speechToTextLanguage, resetTranscript()
continuous: true SpeechRecognition.startListening({
}) continuous: true,
} language: speechToTextLanguage
}} })
className={`flex items-center justify-center dark:text-gray-300`}> }
{!isListening ? ( }}
<MicIcon className="h-5 w-5" /> className={`flex items-center justify-center dark:text-gray-300`}>
) : ( {!isListening ? (
<div className="relative">
<span className="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span>
<MicIcon className="h-5 w-5" /> <MicIcon className="h-5 w-5" />
</div> ) : (
)} <div className="relative">
</button> <span className="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span>
</Tooltip> <MicIcon className="h-5 w-5" />
</div>
)}
</button>
</Tooltip>
)}
{!selectedKnowledge && ( {!selectedKnowledge && (
<Tooltip title={t("tooltip.uploadImage")}> <Tooltip title={t("tooltip.uploadImage")}>

View File

@ -5,12 +5,13 @@ import useDynamicTextareaSize from "~/hooks/useDynamicTextareaSize"
import { useMessage } from "~/hooks/useMessage" import { useMessage } from "~/hooks/useMessage"
import { toBase64 } from "~/libs/to-base64" import { toBase64 } from "~/libs/to-base64"
import { Checkbox, Dropdown, Image, Tooltip } from "antd" import { Checkbox, Dropdown, Image, Tooltip } from "antd"
import { useSpeechRecognition } from "~/hooks/useSpeechRecognition"
import { useWebUI } from "~/store/webui" import { useWebUI } from "~/store/webui"
import { defaultEmbeddingModelForRag } from "~/services/ollama" import { defaultEmbeddingModelForRag } from "~/services/ollama"
import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react" import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react"
import { useTranslation } from "react-i18next" import { useTranslation } from "react-i18next"
import { ModelSelect } from "@/components/Common/ModelSelect" import { ModelSelect } from "@/components/Common/ModelSelect"
import { useSpeechRecognition } from "react-speech-recognition"
import SpeechRecognition from "react-speech-recognition"
type Props = { type Props = {
dropedFile: File | undefined dropedFile: File | undefined
@ -29,6 +30,20 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
image: "" image: ""
} }
}) })
const {
transcript,
listening: isListening,
resetTranscript,
browserSupportsSpeechRecognition
} = useSpeechRecognition({
})
const stopListening = async () => {
if (isListening) {
SpeechRecognition.stopListening()
}
}
const onInputChange = async ( const onInputChange = async (
e: React.ChangeEvent<HTMLInputElement> | File e: React.ChangeEvent<HTMLInputElement> | File
@ -59,6 +74,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
) { ) {
e.preventDefault() e.preventDefault()
form.onSubmit(async (value) => { form.onSubmit(async (value) => {
await stopListening()
if (value.message.trim().length === 0) { if (value.message.trim().length === 0) {
return return
} }
@ -98,7 +114,6 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
streaming, streaming,
setChatMode setChatMode
} = useMessage() } = useMessage()
const { isListening, start, stop, transcript } = useSpeechRecognition()
React.useEffect(() => { React.useEffect(() => {
if (dropedFile) { if (dropedFile) {
@ -161,6 +176,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
return return
} }
} }
await stopListening()
form.reset() form.reset()
textAreaFocus() textAreaFocus()
await sendMessage({ await sendMessage({
@ -196,30 +212,33 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
/> />
<div className="flex mt-4 justify-end gap-3"> <div className="flex mt-4 justify-end gap-3">
<ModelSelect /> <ModelSelect />
<Tooltip title={t("tooltip.speechToText")}> {browserSupportsSpeechRecognition && (
<button <Tooltip title={t("tooltip.speechToText")}>
type="button" <button
onClick={() => { type="button"
if (isListening) { onClick={async () => {
stop() if (isListening) {
} else { SpeechRecognition.stopListening()
start({ } else {
lang: speechToTextLanguage, resetTranscript()
continuous: true SpeechRecognition.startListening({
}) continuous: true,
} language: speechToTextLanguage
}} })
className={`flex items-center justify-center dark:text-gray-300`}> }
{!isListening ? ( }}
<MicIcon className="h-5 w-5" /> className={`flex items-center justify-center dark:text-gray-300`}>
) : ( {!isListening ? (
<div className="relative">
<span className="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span>
<MicIcon className="h-5 w-5" /> <MicIcon className="h-5 w-5" />
</div> ) : (
)} <div className="relative">
</button> <span className="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span>
</Tooltip> <MicIcon className="h-5 w-5" />
</div>
)}
</button>
</Tooltip>
)}
<Tooltip title={t("tooltip.uploadImage")}> <Tooltip title={t("tooltip.uploadImage")}>
<button <button
type="button" type="button"