feat: Add react-speech-recognition for speech-to-text functionality in SidepanelForm

2024-05-25 11:39:05 +05:30 · 2024-05-25 11:39:05 +05:30 · f9f48109c2
commit f9f48109c2
parent 014565a14e
4 changed files with 90 additions and 51 deletions
--- a/bun.lockb
+++ b/bun.lockb
--- a/package.json
+++ b/package.json
@ -46,6 +46,7 @@
    "react-i18next": "^14.1.0",
    "react-markdown": "8.0.0",
    "react-router-dom": "6.10.0",
    "react-speech-recognition": "^3.10.0",
    "react-syntax-highlighter": "^15.5.0",
    "react-toastify": "^10.0.4",
    "rehype-mathjax": "4.0.3",
@ -64,6 +65,7 @@
    "@types/pubsub-js": "^1.8.6",
    "@types/react": "18.2.48",
    "@types/react-dom": "18.2.18",
    "@types/react-speech-recognition": "^3.9.5",
    "@types/react-syntax-highlighter": "^15.5.11",
    "@types/turndown": "^5.0.4",
    "autoprefixer": "^10.4.17",
--- a/src/components/Option/Playground/PlaygroundForm.tsx
+++ b/src/components/Option/Playground/PlaygroundForm.tsx
@ -6,14 +6,14 @@ import { toBase64 } from "~/libs/to-base64"
 import { useMessageOption } from "~/hooks/useMessageOption"
 import { Checkbox, Dropdown, Select, Switch, Tooltip } from "antd"
 import { Image } from "antd"
 import { useSpeechRecognition } from "~/hooks/useSpeechRecognition"
 import { useWebUI } from "~/store/webui"
 import { defaultEmbeddingModelForRag } from "~/services/ollama"
 import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react"
 import { getVariable } from "~/utils/select-varaible"
 import { useTranslation } from "react-i18next"
 import { KnowledgeSelect } from "../Knowledge/KnowledgeSelect"
-import { SelectedKnowledge } from "../Knowledge/SelectedKnwledge"
+import { useSpeechRecognition } from "react-speech-recognition"
 import SpeechRecognition from "react-speech-recognition"
 type Props = {
  dropedFile: File | undefined
@ -84,7 +84,13 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
  useDynamicTextareaSize(textareaRef, form.values.message, 300)
-  const { isListening, start, stop, transcript } = useSpeechRecognition()
+  const {
    transcript,
    listening: isListening,
    resetTranscript,
    browserSupportsSpeechRecognition
  } = useSpeechRecognition()
  const { sendWhenEnter, setSendWhenEnter } = useWebUI()
  React.useEffect(() => {
@ -135,6 +141,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
      sendWhenEnter
    ) {
      e.preventDefault()
      stopListening()
      form.onSubmit(async (value) => {
        if (value.message.trim().length === 0) {
          return
@ -159,6 +166,13 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
      })()
    }
  }
  const stopListening = async () => {
    if (isListening) {
      SpeechRecognition.stopListening()
    }
  }
  return (
    <div className="px-3 pt-3 md:px-6 md:pt-6 bg-gray-50 dark:bg-[#262626] border rounded-t-xl  dark:border-gray-600">
      <div
@ -186,6 +200,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
        <div className="flex bg-white dark:bg-transparent">
          <form
            onSubmit={form.onSubmit(async (value) => {
              stopListening()
              if (!selectedModel || selectedModel.length === 0) {
                form.setFieldError("message", t("formError.noModel"))
                return
@ -260,16 +275,18 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
                </div>
                <div className="flex !justify-end gap-3">
                  <KnowledgeSelect />
                  {browserSupportsSpeechRecognition && (
                    <Tooltip title={t("tooltip.speechToText")}>
                      <button
                        type="button"
-                      onClick={() => {
+                        onClick={async () => {
                          if (isListening) {
-                          stop()
+                            SpeechRecognition.stopListening()
                          } else {
-                          start({
+                            resetTranscript()
-                            lang: speechToTextLanguage,
+                            SpeechRecognition.startListening({
-                            continuous: true
+                              continuous: true,
                              language: speechToTextLanguage
                            })
                          }
                        }}
@ -284,6 +301,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
                        )}
                      </button>
                    </Tooltip>
                  )}
                  {!selectedKnowledge && (
                    <Tooltip title={t("tooltip.uploadImage")}>
--- a/src/components/Sidepanel/Chat/form.tsx
+++ b/src/components/Sidepanel/Chat/form.tsx
@ -5,12 +5,13 @@ import useDynamicTextareaSize from "~/hooks/useDynamicTextareaSize"
 import { useMessage } from "~/hooks/useMessage"
 import { toBase64 } from "~/libs/to-base64"
 import { Checkbox, Dropdown, Image, Tooltip } from "antd"
 import { useSpeechRecognition } from "~/hooks/useSpeechRecognition"
 import { useWebUI } from "~/store/webui"
 import { defaultEmbeddingModelForRag } from "~/services/ollama"
 import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react"
 import { useTranslation } from "react-i18next"
 import { ModelSelect } from "@/components/Common/ModelSelect"
 import { useSpeechRecognition } from "react-speech-recognition"
 import SpeechRecognition from "react-speech-recognition"
 type Props = {
  dropedFile: File | undefined
@ -29,6 +30,20 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
      image: ""
    }
  })
  const {
    transcript,
    listening: isListening,
    resetTranscript,
    browserSupportsSpeechRecognition
  } = useSpeechRecognition({
  })
  const stopListening = async () => {
    if (isListening) {
       SpeechRecognition.stopListening()
    }
  }
  const onInputChange = async (
    e: React.ChangeEvent<HTMLInputElement> | File
@ -59,6 +74,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
    ) {
      e.preventDefault()
      form.onSubmit(async (value) => {
        await stopListening()
        if (value.message.trim().length === 0) {
          return
        }
@ -98,7 +114,6 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
    streaming,
    setChatMode
  } = useMessage()
  const { isListening, start, stop, transcript } = useSpeechRecognition()
  React.useEffect(() => {
    if (dropedFile) {
@ -161,6 +176,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
                  return
                }
              }
              await stopListening()
              form.reset()
              textAreaFocus()
              await sendMessage({
@ -196,16 +212,18 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
              />
              <div className="flex mt-4 justify-end gap-3">
                <ModelSelect />
                {browserSupportsSpeechRecognition && (
                  <Tooltip title={t("tooltip.speechToText")}>
                    <button
                      type="button"
-                    onClick={() => {
+                      onClick={async () => {
                        if (isListening) {
-                        stop()
+                           SpeechRecognition.stopListening()
                        } else {
-                        start({
+                          resetTranscript()
-                          lang: speechToTextLanguage,
+                          SpeechRecognition.startListening({
-                          continuous: true
+                            continuous: true,
                            language: speechToTextLanguage
                          })
                        }
                      }}
@ -220,6 +238,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => {
                      )}
                    </button>
                  </Tooltip>
                )}
                <Tooltip title={t("tooltip.uploadImage")}>
                  <button
                    type="button"