feat: Add react-speech-recognition for speech-to-text functionality in SidepanelForm

This commit is contained in:
n4ze3m 2024-05-25 11:39:05 +05:30
parent 014565a14e
commit f9f48109c2
4 changed files with 90 additions and 51 deletions

BIN
bun.lockb

Binary file not shown.

View File

@ -46,6 +46,7 @@
"react-i18next": "^14.1.0", "react-i18next": "^14.1.0",
"react-markdown": "8.0.0", "react-markdown": "8.0.0",
"react-router-dom": "6.10.0", "react-router-dom": "6.10.0",
"react-speech-recognition": "^3.10.0",
"react-syntax-highlighter": "^15.5.0", "react-syntax-highlighter": "^15.5.0",
"react-toastify": "^10.0.4", "react-toastify": "^10.0.4",
"rehype-mathjax": "4.0.3", "rehype-mathjax": "4.0.3",
@ -64,6 +65,7 @@
"@types/pubsub-js": "^1.8.6", "@types/pubsub-js": "^1.8.6",
"@types/react": "18.2.48", "@types/react": "18.2.48",
"@types/react-dom": "18.2.18", "@types/react-dom": "18.2.18",
"@types/react-speech-recognition": "^3.9.5",
"@types/react-syntax-highlighter": "^15.5.11", "@types/react-syntax-highlighter": "^15.5.11",
"@types/turndown": "^5.0.4", "@types/turndown": "^5.0.4",
"autoprefixer": "^10.4.17", "autoprefixer": "^10.4.17",

View File

@ -6,14 +6,14 @@ import { toBase64 } from "~/libs/to-base64"
import { useMessageOption } from "~/hooks/useMessageOption" import { useMessageOption } from "~/hooks/useMessageOption"
import { Checkbox, Dropdown, Select, Switch, Tooltip } from "antd" import { Checkbox, Dropdown, Select, Switch, Tooltip } from "antd"
import { Image } from "antd" import { Image } from "antd"
import { useSpeechRecognition } from "~/hooks/useSpeechRecognition"
import { useWebUI } from "~/store/webui" import { useWebUI } from "~/store/webui"
import { defaultEmbeddingModelForRag } from "~/services/ollama" import { defaultEmbeddingModelForRag } from "~/services/ollama"
import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react" import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react"
import { getVariable } from "~/utils/select-varaible" import { getVariable } from "~/utils/select-varaible"
import { useTranslation } from "react-i18next" import { useTranslation } from "react-i18next"
import { KnowledgeSelect } from "../Knowledge/KnowledgeSelect" import { KnowledgeSelect } from "../Knowledge/KnowledgeSelect"
import { SelectedKnowledge } from "../Knowledge/SelectedKnwledge" import { useSpeechRecognition } from "react-speech-recognition"
import SpeechRecognition from "react-speech-recognition"
type Props = { type Props = {
dropedFile: File | undefined dropedFile: File | undefined
@ -84,7 +84,13 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
useDynamicTextareaSize(textareaRef, form.values.message, 300) useDynamicTextareaSize(textareaRef, form.values.message, 300)
const { isListening, start, stop, transcript } = useSpeechRecognition() const {
transcript,
listening: isListening,
resetTranscript,
browserSupportsSpeechRecognition
} = useSpeechRecognition()
const { sendWhenEnter, setSendWhenEnter } = useWebUI() const { sendWhenEnter, setSendWhenEnter } = useWebUI()
React.useEffect(() => { React.useEffect(() => {
@ -135,6 +141,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
sendWhenEnter sendWhenEnter
) { ) {
e.preventDefault() e.preventDefault()
stopListening()
form.onSubmit(async (value) => { form.onSubmit(async (value) => {
if (value.message.trim().length === 0) { if (value.message.trim().length === 0) {
return return
@ -159,6 +166,13 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
})() })()
} }
} }
const stopListening = async () => {
if (isListening) {
SpeechRecognition.stopListening()
}
}
return ( return (
<div className="px-3 pt-3 md:px-6 md:pt-6 bg-gray-50 dark:bg-[#262626] border rounded-t-xl dark:border-gray-600"> <div className="px-3 pt-3 md:px-6 md:pt-6 bg-gray-50 dark:bg-[#262626] border rounded-t-xl dark:border-gray-600">
<div <div
@ -186,6 +200,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
<div className="flex bg-white dark:bg-transparent"> <div className="flex bg-white dark:bg-transparent">
<form <form
onSubmit={form.onSubmit(async (value) => { onSubmit={form.onSubmit(async (value) => {
stopListening()
if (!selectedModel || selectedModel.length === 0) { if (!selectedModel || selectedModel.length === 0) {
form.setFieldError("message", t("formError.noModel")) form.setFieldError("message", t("formError.noModel"))
return return
@ -260,16 +275,18 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
</div> </div>
<div className="flex !justify-end gap-3"> <div className="flex !justify-end gap-3">
<KnowledgeSelect /> <KnowledgeSelect />
{browserSupportsSpeechRecognition && (
<Tooltip title={t("tooltip.speechToText")}> <Tooltip title={t("tooltip.speechToText")}>
<button <button
type="button" type="button"
onClick={() => { onClick={async () => {
if (isListening) { if (isListening) {
stop() SpeechRecognition.stopListening()
} else { } else {
start({ resetTranscript()
lang: speechToTextLanguage, SpeechRecognition.startListening({
continuous: true continuous: true,
language: speechToTextLanguage
}) })
} }
}} }}
@ -284,6 +301,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
)} )}
</button> </button>
</Tooltip> </Tooltip>
)}
{!selectedKnowledge && ( {!selectedKnowledge && (
<Tooltip title={t("tooltip.uploadImage")}> <Tooltip title={t("tooltip.uploadImage")}>

View File

@ -5,12 +5,13 @@ import useDynamicTextareaSize from "~/hooks/useDynamicTextareaSize"
import { useMessage } from "~/hooks/useMessage" import { useMessage } from "~/hooks/useMessage"
import { toBase64 } from "~/libs/to-base64" import { toBase64 } from "~/libs/to-base64"
import { Checkbox, Dropdown, Image, Tooltip } from "antd" import { Checkbox, Dropdown, Image, Tooltip } from "antd"
import { useSpeechRecognition } from "~/hooks/useSpeechRecognition"
import { useWebUI } from "~/store/webui" import { useWebUI } from "~/store/webui"
import { defaultEmbeddingModelForRag } from "~/services/ollama" import { defaultEmbeddingModelForRag } from "~/services/ollama"
import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react" import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react"
import { useTranslation } from "react-i18next" import { useTranslation } from "react-i18next"
import { ModelSelect } from "@/components/Common/ModelSelect" import { ModelSelect } from "@/components/Common/ModelSelect"
import { useSpeechRecognition } from "react-speech-recognition"
import SpeechRecognition from "react-speech-recognition"
type Props = { type Props = {
dropedFile: File | undefined dropedFile: File | undefined
@ -29,6 +30,20 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
image: "" image: ""
} }
}) })
const {
transcript,
listening: isListening,
resetTranscript,
browserSupportsSpeechRecognition
} = useSpeechRecognition({
})
const stopListening = async () => {
if (isListening) {
SpeechRecognition.stopListening()
}
}
const onInputChange = async ( const onInputChange = async (
e: React.ChangeEvent<HTMLInputElement> | File e: React.ChangeEvent<HTMLInputElement> | File
@ -59,6 +74,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
) { ) {
e.preventDefault() e.preventDefault()
form.onSubmit(async (value) => { form.onSubmit(async (value) => {
await stopListening()
if (value.message.trim().length === 0) { if (value.message.trim().length === 0) {
return return
} }
@ -98,7 +114,6 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
streaming, streaming,
setChatMode setChatMode
} = useMessage() } = useMessage()
const { isListening, start, stop, transcript } = useSpeechRecognition()
React.useEffect(() => { React.useEffect(() => {
if (dropedFile) { if (dropedFile) {
@ -161,6 +176,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
return return
} }
} }
await stopListening()
form.reset() form.reset()
textAreaFocus() textAreaFocus()
await sendMessage({ await sendMessage({
@ -196,16 +212,18 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
/> />
<div className="flex mt-4 justify-end gap-3"> <div className="flex mt-4 justify-end gap-3">
<ModelSelect /> <ModelSelect />
{browserSupportsSpeechRecognition && (
<Tooltip title={t("tooltip.speechToText")}> <Tooltip title={t("tooltip.speechToText")}>
<button <button
type="button" type="button"
onClick={() => { onClick={async () => {
if (isListening) { if (isListening) {
stop() SpeechRecognition.stopListening()
} else { } else {
start({ resetTranscript()
lang: speechToTextLanguage, SpeechRecognition.startListening({
continuous: true continuous: true,
language: speechToTextLanguage
}) })
} }
}} }}
@ -220,6 +238,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
)} )}
</button> </button>
</Tooltip> </Tooltip>
)}
<Tooltip title={t("tooltip.uploadImage")}> <Tooltip title={t("tooltip.uploadImage")}>
<button <button
type="button" type="button"