feat: Add react-speech-recognition for speech-to-text functionality in SidepanelForm

2024-05-25 11:39:05 +05:30 · 2024-05-25 11:39:05 +05:30 · f9f48109c2
commit f9f48109c2
parent 014565a14e
4 changed files with 90 additions and 51 deletions
--- a/bun.lockb
+++ b/bun.lockb
--- a/package.json
+++ b/package.json
@ -46,6 +46,7 @@
    "react-i18next": "^14.1.0",
    "react-markdown": "8.0.0",
    "react-router-dom": "6.10.0",
+    "react-speech-recognition": "^3.10.0",
    "react-syntax-highlighter": "^15.5.0",
    "react-toastify": "^10.0.4",
    "rehype-mathjax": "4.0.3",
@ -64,6 +65,7 @@
    "@types/pubsub-js": "^1.8.6",
    "@types/react": "18.2.48",
    "@types/react-dom": "18.2.18",
+    "@types/react-speech-recognition": "^3.9.5",
    "@types/react-syntax-highlighter": "^15.5.11",
    "@types/turndown": "^5.0.4",
    "autoprefixer": "^10.4.17",
--- a/src/components/Option/Playground/PlaygroundForm.tsx
+++ b/src/components/Option/Playground/PlaygroundForm.tsx
@ -6,14 +6,14 @@ import { toBase64 } from "~/libs/to-base64"
 import { useMessageOption } from "~/hooks/useMessageOption"
 import { Checkbox, Dropdown, Select, Switch, Tooltip } from "antd"
 import { Image } from "antd"
-import { useSpeechRecognition } from "~/hooks/useSpeechRecognition"
 import { useWebUI } from "~/store/webui"
 import { defaultEmbeddingModelForRag } from "~/services/ollama"
 import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react"
 import { getVariable } from "~/utils/select-varaible"
 import { useTranslation } from "react-i18next"
 import { KnowledgeSelect } from "../Knowledge/KnowledgeSelect"
-import { SelectedKnowledge } from "../Knowledge/SelectedKnwledge"
+import { useSpeechRecognition } from "react-speech-recognition"
+import SpeechRecognition from "react-speech-recognition"

 type Props = {
  dropedFile: File | undefined
@ -84,7 +84,13 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {

  useDynamicTextareaSize(textareaRef, form.values.message, 300)

-  const { isListening, start, stop, transcript } = useSpeechRecognition()
+  const {
+    transcript,
+    listening: isListening,
+    resetTranscript,
+    browserSupportsSpeechRecognition
+  } = useSpeechRecognition()
+
  const { sendWhenEnter, setSendWhenEnter } = useWebUI()

  React.useEffect(() => {
@ -135,6 +141,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
      sendWhenEnter
    ) {
      e.preventDefault()
+      stopListening()
      form.onSubmit(async (value) => {
        if (value.message.trim().length === 0) {
          return
@ -159,6 +166,13 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
      })()
    }
  }
+
+  const stopListening = async () => {
+    if (isListening) {
+      SpeechRecognition.stopListening()
+    }
+  }
+
  return (
    <div className="px-3 pt-3 md:px-6 md:pt-6 bg-gray-50 dark:bg-[#262626] border rounded-t-xl  dark:border-gray-600">
      <div
@ -186,6 +200,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
        <div className="flex bg-white dark:bg-transparent">
          <form
            onSubmit={form.onSubmit(async (value) => {
+              stopListening()
              if (!selectedModel || selectedModel.length === 0) {
                form.setFieldError("message", t("formError.noModel"))
                return
@ -260,16 +275,18 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
                </div>
                <div className="flex !justify-end gap-3">
                  <KnowledgeSelect />
+                  {browserSupportsSpeechRecognition && (
                    <Tooltip title={t("tooltip.speechToText")}>
                      <button
                        type="button"
-                      onClick={() => {
+                        onClick={async () => {
                          if (isListening) {
-                          stop()
+                            SpeechRecognition.stopListening()
                          } else {
-                          start({
-                            lang: speechToTextLanguage,
-                            continuous: true
+                            resetTranscript()
+                            SpeechRecognition.startListening({
+                              continuous: true,
+                              language: speechToTextLanguage
                            })
                          }
                        }}
@ -284,6 +301,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
                        )}
                      </button>
                    </Tooltip>
+                  )}

                  {!selectedKnowledge && (
                    <Tooltip title={t("tooltip.uploadImage")}>
--- a/src/components/Sidepanel/Chat/form.tsx
+++ b/src/components/Sidepanel/Chat/form.tsx
@ -5,12 +5,13 @@ import useDynamicTextareaSize from "~/hooks/useDynamicTextareaSize"
 import { useMessage } from "~/hooks/useMessage"
 import { toBase64 } from "~/libs/to-base64"
 import { Checkbox, Dropdown, Image, Tooltip } from "antd"
-import { useSpeechRecognition } from "~/hooks/useSpeechRecognition"
 import { useWebUI } from "~/store/webui"
 import { defaultEmbeddingModelForRag } from "~/services/ollama"
 import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react"
 import { useTranslation } from "react-i18next"
 import { ModelSelect } from "@/components/Common/ModelSelect"
+import { useSpeechRecognition } from "react-speech-recognition"
+import SpeechRecognition from "react-speech-recognition"

 type Props = {
  dropedFile: File | undefined
@ -29,6 +30,20 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
      image: ""
    }
  })
+  const {
+    transcript,
+    listening: isListening,
+    resetTranscript,
+    browserSupportsSpeechRecognition
+  } = useSpeechRecognition({
+
+  })
+
+  const stopListening = async () => {
+    if (isListening) {
+       SpeechRecognition.stopListening()
+    }
+  }

  const onInputChange = async (
    e: React.ChangeEvent<HTMLInputElement> | File
@ -59,6 +74,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
    ) {
      e.preventDefault()
      form.onSubmit(async (value) => {
+        await stopListening()
        if (value.message.trim().length === 0) {
          return
        }
@ -98,7 +114,6 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
    streaming,
    setChatMode
  } = useMessage()
-  const { isListening, start, stop, transcript } = useSpeechRecognition()

  React.useEffect(() => {
    if (dropedFile) {
@ -161,6 +176,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
                  return
                }
              }
+              await stopListening()
              form.reset()
              textAreaFocus()
              await sendMessage({
@ -196,16 +212,18 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
              />
              <div className="flex mt-4 justify-end gap-3">
                <ModelSelect />
+                {browserSupportsSpeechRecognition && (
                  <Tooltip title={t("tooltip.speechToText")}>
                    <button
                      type="button"
-                    onClick={() => {
+                      onClick={async () => {
                        if (isListening) {
-                        stop()
+                           SpeechRecognition.stopListening()
                        } else {
-                        start({
-                          lang: speechToTextLanguage,
-                          continuous: true
+                          resetTranscript()
+                          SpeechRecognition.startListening({
+                            continuous: true,
+                            language: speechToTextLanguage
                          })
                        }
                      }}
@ -220,6 +238,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
                      )}
                    </button>
                  </Tooltip>
+                )}
                <Tooltip title={t("tooltip.uploadImage")}>
                  <button
                    type="button"