feat: Add react-speech-recognition for speech-to-text functionality in SidepanelForm
This commit is contained in:
		
							parent
							
								
									014565a14e
								
							
						
					
					
						commit
						f9f48109c2
					
				| @ -46,6 +46,7 @@ | |||||||
|     "react-i18next": "^14.1.0", |     "react-i18next": "^14.1.0", | ||||||
|     "react-markdown": "8.0.0", |     "react-markdown": "8.0.0", | ||||||
|     "react-router-dom": "6.10.0", |     "react-router-dom": "6.10.0", | ||||||
|  |     "react-speech-recognition": "^3.10.0", | ||||||
|     "react-syntax-highlighter": "^15.5.0", |     "react-syntax-highlighter": "^15.5.0", | ||||||
|     "react-toastify": "^10.0.4", |     "react-toastify": "^10.0.4", | ||||||
|     "rehype-mathjax": "4.0.3", |     "rehype-mathjax": "4.0.3", | ||||||
| @ -64,6 +65,7 @@ | |||||||
|     "@types/pubsub-js": "^1.8.6", |     "@types/pubsub-js": "^1.8.6", | ||||||
|     "@types/react": "18.2.48", |     "@types/react": "18.2.48", | ||||||
|     "@types/react-dom": "18.2.18", |     "@types/react-dom": "18.2.18", | ||||||
|  |     "@types/react-speech-recognition": "^3.9.5", | ||||||
|     "@types/react-syntax-highlighter": "^15.5.11", |     "@types/react-syntax-highlighter": "^15.5.11", | ||||||
|     "@types/turndown": "^5.0.4", |     "@types/turndown": "^5.0.4", | ||||||
|     "autoprefixer": "^10.4.17", |     "autoprefixer": "^10.4.17", | ||||||
|  | |||||||
| @ -6,14 +6,14 @@ import { toBase64 } from "~/libs/to-base64" | |||||||
| import { useMessageOption } from "~/hooks/useMessageOption" | import { useMessageOption } from "~/hooks/useMessageOption" | ||||||
| import { Checkbox, Dropdown, Select, Switch, Tooltip } from "antd" | import { Checkbox, Dropdown, Select, Switch, Tooltip } from "antd" | ||||||
| import { Image } from "antd" | import { Image } from "antd" | ||||||
| import { useSpeechRecognition } from "~/hooks/useSpeechRecognition" |  | ||||||
| import { useWebUI } from "~/store/webui" | import { useWebUI } from "~/store/webui" | ||||||
| import { defaultEmbeddingModelForRag } from "~/services/ollama" | import { defaultEmbeddingModelForRag } from "~/services/ollama" | ||||||
| import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react" | import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react" | ||||||
| import { getVariable } from "~/utils/select-varaible" | import { getVariable } from "~/utils/select-varaible" | ||||||
| import { useTranslation } from "react-i18next" | import { useTranslation } from "react-i18next" | ||||||
| import { KnowledgeSelect } from "../Knowledge/KnowledgeSelect" | import { KnowledgeSelect } from "../Knowledge/KnowledgeSelect" | ||||||
| import { SelectedKnowledge } from "../Knowledge/SelectedKnwledge" | import { useSpeechRecognition } from "react-speech-recognition" | ||||||
|  | import SpeechRecognition from "react-speech-recognition" | ||||||
| 
 | 
 | ||||||
| type Props = { | type Props = { | ||||||
|   dropedFile: File | undefined |   dropedFile: File | undefined | ||||||
| @ -84,7 +84,13 @@ export const PlaygroundForm = ({ dropedFile }: Props) => { | |||||||
| 
 | 
 | ||||||
|   useDynamicTextareaSize(textareaRef, form.values.message, 300) |   useDynamicTextareaSize(textareaRef, form.values.message, 300) | ||||||
| 
 | 
 | ||||||
|   const { isListening, start, stop, transcript } = useSpeechRecognition() |   const { | ||||||
|  |     transcript, | ||||||
|  |     listening: isListening, | ||||||
|  |     resetTranscript, | ||||||
|  |     browserSupportsSpeechRecognition | ||||||
|  |   } = useSpeechRecognition() | ||||||
|  | 
 | ||||||
|   const { sendWhenEnter, setSendWhenEnter } = useWebUI() |   const { sendWhenEnter, setSendWhenEnter } = useWebUI() | ||||||
| 
 | 
 | ||||||
|   React.useEffect(() => { |   React.useEffect(() => { | ||||||
| @ -135,6 +141,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => { | |||||||
|       sendWhenEnter |       sendWhenEnter | ||||||
|     ) { |     ) { | ||||||
|       e.preventDefault() |       e.preventDefault() | ||||||
|  |       stopListening() | ||||||
|       form.onSubmit(async (value) => { |       form.onSubmit(async (value) => { | ||||||
|         if (value.message.trim().length === 0) { |         if (value.message.trim().length === 0) { | ||||||
|           return |           return | ||||||
| @ -159,6 +166,13 @@ export const PlaygroundForm = ({ dropedFile }: Props) => { | |||||||
|       })() |       })() | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  | 
 | ||||||
|  |   const stopListening = async () => { | ||||||
|  |     if (isListening) { | ||||||
|  |       SpeechRecognition.stopListening() | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|   return ( |   return ( | ||||||
|     <div className="px-3 pt-3 md:px-6 md:pt-6 bg-gray-50 dark:bg-[#262626] border rounded-t-xl  dark:border-gray-600"> |     <div className="px-3 pt-3 md:px-6 md:pt-6 bg-gray-50 dark:bg-[#262626] border rounded-t-xl  dark:border-gray-600"> | ||||||
|       <div |       <div | ||||||
| @ -186,6 +200,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => { | |||||||
|         <div className="flex bg-white dark:bg-transparent"> |         <div className="flex bg-white dark:bg-transparent"> | ||||||
|           <form |           <form | ||||||
|             onSubmit={form.onSubmit(async (value) => { |             onSubmit={form.onSubmit(async (value) => { | ||||||
|  |               stopListening() | ||||||
|               if (!selectedModel || selectedModel.length === 0) { |               if (!selectedModel || selectedModel.length === 0) { | ||||||
|                 form.setFieldError("message", t("formError.noModel")) |                 form.setFieldError("message", t("formError.noModel")) | ||||||
|                 return |                 return | ||||||
| @ -260,30 +275,33 @@ export const PlaygroundForm = ({ dropedFile }: Props) => { | |||||||
|                 </div> |                 </div> | ||||||
|                 <div className="flex !justify-end gap-3"> |                 <div className="flex !justify-end gap-3"> | ||||||
|                   <KnowledgeSelect /> |                   <KnowledgeSelect /> | ||||||
|                   <Tooltip title={t("tooltip.speechToText")}> |                   {browserSupportsSpeechRecognition && ( | ||||||
|                     <button |                     <Tooltip title={t("tooltip.speechToText")}> | ||||||
|                       type="button" |                       <button | ||||||
|                       onClick={() => { |                         type="button" | ||||||
|                         if (isListening) { |                         onClick={async () => { | ||||||
|                           stop() |                           if (isListening) { | ||||||
|                         } else { |                             SpeechRecognition.stopListening() | ||||||
|                           start({ |                           } else { | ||||||
|                             lang: speechToTextLanguage, |                             resetTranscript() | ||||||
|                             continuous: true |                             SpeechRecognition.startListening({ | ||||||
|                           }) |                               continuous: true, | ||||||
|                         } |                               language: speechToTextLanguage | ||||||
|                       }} |                             }) | ||||||
|                       className={`flex items-center justify-center dark:text-gray-300`}> |                           } | ||||||
|                       {!isListening ? ( |                         }} | ||||||
|                         <MicIcon className="h-5 w-5" /> |                         className={`flex items-center justify-center dark:text-gray-300`}> | ||||||
|                       ) : ( |                         {!isListening ? ( | ||||||
|                         <div className="relative"> |  | ||||||
|                           <span className="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span> |  | ||||||
|                           <MicIcon className="h-5 w-5" /> |                           <MicIcon className="h-5 w-5" /> | ||||||
|                         </div> |                         ) : ( | ||||||
|                       )} |                           <div className="relative"> | ||||||
|                     </button> |                             <span className="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span> | ||||||
|                   </Tooltip> |                             <MicIcon className="h-5 w-5" /> | ||||||
|  |                           </div> | ||||||
|  |                         )} | ||||||
|  |                       </button> | ||||||
|  |                     </Tooltip> | ||||||
|  |                   )} | ||||||
| 
 | 
 | ||||||
|                   {!selectedKnowledge && ( |                   {!selectedKnowledge && ( | ||||||
|                     <Tooltip title={t("tooltip.uploadImage")}> |                     <Tooltip title={t("tooltip.uploadImage")}> | ||||||
|  | |||||||
| @ -5,12 +5,13 @@ import useDynamicTextareaSize from "~/hooks/useDynamicTextareaSize" | |||||||
| import { useMessage } from "~/hooks/useMessage" | import { useMessage } from "~/hooks/useMessage" | ||||||
| import { toBase64 } from "~/libs/to-base64" | import { toBase64 } from "~/libs/to-base64" | ||||||
| import { Checkbox, Dropdown, Image, Tooltip } from "antd" | import { Checkbox, Dropdown, Image, Tooltip } from "antd" | ||||||
| import { useSpeechRecognition } from "~/hooks/useSpeechRecognition" |  | ||||||
| import { useWebUI } from "~/store/webui" | import { useWebUI } from "~/store/webui" | ||||||
| import { defaultEmbeddingModelForRag } from "~/services/ollama" | import { defaultEmbeddingModelForRag } from "~/services/ollama" | ||||||
| import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react" | import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react" | ||||||
| import { useTranslation } from "react-i18next" | import { useTranslation } from "react-i18next" | ||||||
| import { ModelSelect } from "@/components/Common/ModelSelect" | import { ModelSelect } from "@/components/Common/ModelSelect" | ||||||
|  | import { useSpeechRecognition } from "react-speech-recognition" | ||||||
|  | import SpeechRecognition from "react-speech-recognition" | ||||||
| 
 | 
 | ||||||
| type Props = { | type Props = { | ||||||
|   dropedFile: File | undefined |   dropedFile: File | undefined | ||||||
| @ -29,6 +30,20 @@ export const SidepanelForm = ({ dropedFile }: Props) => { | |||||||
|       image: "" |       image: "" | ||||||
|     } |     } | ||||||
|   }) |   }) | ||||||
|  |   const { | ||||||
|  |     transcript, | ||||||
|  |     listening: isListening, | ||||||
|  |     resetTranscript, | ||||||
|  |     browserSupportsSpeechRecognition | ||||||
|  |   } = useSpeechRecognition({ | ||||||
|  | 
 | ||||||
|  |   }) | ||||||
|  | 
 | ||||||
|  |   const stopListening = async () => { | ||||||
|  |     if (isListening) { | ||||||
|  |        SpeechRecognition.stopListening() | ||||||
|  |     } | ||||||
|  |   } | ||||||
| 
 | 
 | ||||||
|   const onInputChange = async ( |   const onInputChange = async ( | ||||||
|     e: React.ChangeEvent<HTMLInputElement> | File |     e: React.ChangeEvent<HTMLInputElement> | File | ||||||
| @ -59,6 +74,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => { | |||||||
|     ) { |     ) { | ||||||
|       e.preventDefault() |       e.preventDefault() | ||||||
|       form.onSubmit(async (value) => { |       form.onSubmit(async (value) => { | ||||||
|  |         await stopListening() | ||||||
|         if (value.message.trim().length === 0) { |         if (value.message.trim().length === 0) { | ||||||
|           return |           return | ||||||
|         } |         } | ||||||
| @ -98,7 +114,6 @@ export const SidepanelForm = ({ dropedFile }: Props) => { | |||||||
|     streaming, |     streaming, | ||||||
|     setChatMode |     setChatMode | ||||||
|   } = useMessage() |   } = useMessage() | ||||||
|   const { isListening, start, stop, transcript } = useSpeechRecognition() |  | ||||||
| 
 | 
 | ||||||
|   React.useEffect(() => { |   React.useEffect(() => { | ||||||
|     if (dropedFile) { |     if (dropedFile) { | ||||||
| @ -161,6 +176,7 @@ export const SidepanelForm = ({ dropedFile }: Props) => { | |||||||
|                   return |                   return | ||||||
|                 } |                 } | ||||||
|               } |               } | ||||||
|  |               await stopListening() | ||||||
|               form.reset() |               form.reset() | ||||||
|               textAreaFocus() |               textAreaFocus() | ||||||
|               await sendMessage({ |               await sendMessage({ | ||||||
| @ -196,30 +212,33 @@ export const SidepanelForm = ({ dropedFile }: Props) => { | |||||||
|               /> |               /> | ||||||
|               <div className="flex mt-4 justify-end gap-3"> |               <div className="flex mt-4 justify-end gap-3"> | ||||||
|                 <ModelSelect /> |                 <ModelSelect /> | ||||||
|                 <Tooltip title={t("tooltip.speechToText")}> |                 {browserSupportsSpeechRecognition && ( | ||||||
|                   <button |                   <Tooltip title={t("tooltip.speechToText")}> | ||||||
|                     type="button" |                     <button | ||||||
|                     onClick={() => { |                       type="button" | ||||||
|                       if (isListening) { |                       onClick={async () => { | ||||||
|                         stop() |                         if (isListening) { | ||||||
|                       } else { |                            SpeechRecognition.stopListening() | ||||||
|                         start({ |                         } else { | ||||||
|                           lang: speechToTextLanguage, |                           resetTranscript() | ||||||
|                           continuous: true |                           SpeechRecognition.startListening({ | ||||||
|                         }) |                             continuous: true, | ||||||
|                       } |                             language: speechToTextLanguage | ||||||
|                     }} |                           }) | ||||||
|                     className={`flex items-center justify-center dark:text-gray-300`}> |                         } | ||||||
|                     {!isListening ? ( |                       }} | ||||||
|                       <MicIcon className="h-5 w-5" /> |                       className={`flex items-center justify-center dark:text-gray-300`}> | ||||||
|                     ) : ( |                       {!isListening ? ( | ||||||
|                       <div className="relative"> |  | ||||||
|                         <span className="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span> |  | ||||||
|                         <MicIcon className="h-5 w-5" /> |                         <MicIcon className="h-5 w-5" /> | ||||||
|                       </div> |                       ) : ( | ||||||
|                     )} |                         <div className="relative"> | ||||||
|                   </button> |                           <span className="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span> | ||||||
|                 </Tooltip> |                           <MicIcon className="h-5 w-5" /> | ||||||
|  |                         </div> | ||||||
|  |                       )} | ||||||
|  |                     </button> | ||||||
|  |                   </Tooltip> | ||||||
|  |                 )} | ||||||
|                 <Tooltip title={t("tooltip.uploadImage")}> |                 <Tooltip title={t("tooltip.uploadImage")}> | ||||||
|                   <button |                   <button | ||||||
|                     type="button" |                     type="button" | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user