Refactor useMessage hook and remove unused code
This commit is contained in:
		
							parent
							
								
									2381588e72
								
							
						
					
					
						commit
						3904a74701
					
				| @ -1,89 +1,22 @@ | ||||
| import React from "react" | ||||
| import { cleanUrl } from "~/libs/clean-url" | ||||
| import { | ||||
|   defaultEmbeddingChunkOverlap, | ||||
|   defaultEmbeddingChunkSize, | ||||
|   defaultEmbeddingModelForRag, | ||||
|   getOllamaURL, | ||||
|   promptForRag, | ||||
|   systemPromptForNonRag | ||||
| } from "~/services/ollama" | ||||
| import { useStoreMessage, type ChatHistory, type Message } from "~/store" | ||||
| import { useStoreMessage, type Message } from "~/store" | ||||
| import { ChatOllama } from "@langchain/community/chat_models/ollama" | ||||
| import { | ||||
|   HumanMessage, | ||||
|   AIMessage, | ||||
|   type MessageContent, | ||||
|   SystemMessage | ||||
| } from "@langchain/core/messages" | ||||
| import { HumanMessage, SystemMessage } from "@langchain/core/messages" | ||||
| import { getDataFromCurrentTab } from "~/libs/get-html" | ||||
| import { PageAssistHtmlLoader } from "~/loader/html" | ||||
| import { RecursiveCharacterTextSplitter } from "langchain/text_splitter" | ||||
| import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama" | ||||
| import { | ||||
|   createChatWithWebsiteChain, | ||||
|   groupMessagesByConversation | ||||
| } from "~/chain/chat-with-website" | ||||
| import { MemoryVectorStore } from "langchain/vectorstores/memory" | ||||
| import { chromeRunTime } from "~/libs/runtime" | ||||
| export type BotResponse = { | ||||
|   bot: { | ||||
|     text: string | ||||
|     sourceDocuments: any[] | ||||
|   } | ||||
|   history: ChatHistory | ||||
|   history_id: string | ||||
| } | ||||
| 
 | ||||
| const generateHistory = ( | ||||
|   messages: { | ||||
|     role: "user" | "assistant" | "system" | ||||
|     content: string | ||||
|     image?: string | ||||
|   }[] | ||||
| ) => { | ||||
|   let history = [] | ||||
|   for (const message of messages) { | ||||
|     if (message.role === "user") { | ||||
|       let content: MessageContent = [ | ||||
|         { | ||||
|           type: "text", | ||||
|           text: message.content | ||||
|         } | ||||
|       ] | ||||
| 
 | ||||
|       if (message.image) { | ||||
|         content = [ | ||||
|           { | ||||
|             type: "image_url", | ||||
|             image_url: message.image | ||||
|           }, | ||||
|           { | ||||
|             type: "text", | ||||
|             text: message.content | ||||
|           } | ||||
|         ] | ||||
|       } | ||||
|       history.push( | ||||
|         new HumanMessage({ | ||||
|           content: content | ||||
|         }) | ||||
|       ) | ||||
|     } else if (message.role === "assistant") { | ||||
|       history.push( | ||||
|         new AIMessage({ | ||||
|           content: [ | ||||
|             { | ||||
|               type: "text", | ||||
|               text: message.content | ||||
|             } | ||||
|           ] | ||||
|         }) | ||||
|       ) | ||||
|     } | ||||
|   } | ||||
|   return history | ||||
| } | ||||
| import { memoryEmbedding } from "@/utils/memory-embeddings" | ||||
| 
 | ||||
| export const useMessage = () => { | ||||
|   const { | ||||
| @ -129,47 +62,18 @@ export const useMessage = () => { | ||||
|     setStreaming(false) | ||||
|   } | ||||
| 
 | ||||
|   const memoryEmbedding = async ( | ||||
|     url: string, | ||||
|     html: string, | ||||
|     ollamaEmbedding: OllamaEmbeddings | ||||
|   ) => { | ||||
|     const loader = new PageAssistHtmlLoader({ | ||||
|       html, | ||||
|       url | ||||
|     }) | ||||
|     const docs = await loader.load() | ||||
|     const chunkSize = await defaultEmbeddingChunkSize() | ||||
|     const chunkOverlap = await defaultEmbeddingChunkOverlap() | ||||
|     const textSplitter = new RecursiveCharacterTextSplitter({ | ||||
|       chunkSize, | ||||
|       chunkOverlap | ||||
|     }) | ||||
| 
 | ||||
|     const chunks = await textSplitter.splitDocuments(docs) | ||||
| 
 | ||||
|     const store = new MemoryVectorStore(ollamaEmbedding) | ||||
| 
 | ||||
|     setIsEmbedding(true) | ||||
| 
 | ||||
|     await store.addDocuments(chunks) | ||||
|     setKeepTrackOfEmbedding({ | ||||
|       ...keepTrackOfEmbedding, | ||||
|       [url]: store | ||||
|     }) | ||||
|     setIsEmbedding(false) | ||||
| 
 | ||||
|     return store | ||||
|   } | ||||
| 
 | ||||
|   const chatWithWebsiteMode = async (message: string) => { | ||||
|     try { | ||||
|       let isAlreadyExistEmbedding: MemoryVectorStore | ||||
|       let embedURL: string, embedHTML: string | ||||
|       let embedURL: string, embedHTML: string, embedType: string | ||||
|       let embedPDF: { content: string; page: number }[] = [] | ||||
| 
 | ||||
|       if (messages.length === 0) { | ||||
|         const { content: html, url, type } = await getDataFromCurrentTab() | ||||
|         const { content: html, url, type, pdf } = await getDataFromCurrentTab() | ||||
|         embedHTML = html | ||||
|         embedURL = url | ||||
|         embedType = type | ||||
|         embedPDF = pdf | ||||
|         setCurrentURL(url) | ||||
|         isAlreadyExistEmbedding = keepTrackOfEmbedding[currentURL] | ||||
|       } else { | ||||
| @ -212,11 +116,16 @@ export const useMessage = () => { | ||||
|       if (isAlreadyExistEmbedding) { | ||||
|         vectorstore = isAlreadyExistEmbedding | ||||
|       } else { | ||||
|         vectorstore = await memoryEmbedding( | ||||
|           embedURL, | ||||
|           embedHTML, | ||||
|           ollamaEmbedding | ||||
|         ) | ||||
|         vectorstore = await memoryEmbedding({ | ||||
|           html: embedHTML, | ||||
|           keepTrackOfEmbedding: keepTrackOfEmbedding, | ||||
|           ollamaEmbedding: ollamaEmbedding, | ||||
|           pdf: embedPDF, | ||||
|           setIsEmbedding: setIsEmbedding, | ||||
|           setKeepTrackOfEmbedding: setKeepTrackOfEmbedding, | ||||
|           type: embedType, | ||||
|           url: embedURL | ||||
|         }) | ||||
|       } | ||||
| 
 | ||||
|       const { ragPrompt: systemPrompt, ragQuestionPrompt: questionPrompt } = | ||||
|  | ||||
| @ -1,14 +1,11 @@ | ||||
| import i18n from "i18next"; | ||||
| import LanguageDetector from "i18next-browser-languagedetector"; | ||||
| import { initReactI18next } from "react-i18next"; | ||||
| import { en } from "./lang/en"; | ||||
| import { ml } from "./lang/ml"; | ||||
| 
 | ||||
| i18n | ||||
|     .use(LanguageDetector) | ||||
|     .use(initReactI18next) | ||||
|     .init({ | ||||
|         debug: true, | ||||
|         resources: { | ||||
|             en: en, | ||||
|             ml: ml | ||||
|  | ||||
| @ -26,10 +26,7 @@ export const getPdf = async (data: ArrayBuffer) => { | ||||
| 
 | ||||
| const _getHtml = async () => { | ||||
|   const url = window.location.href | ||||
|   // check the content type
 | ||||
|   if (document.contentType === "application/pdf") { | ||||
| 
 | ||||
| 
 | ||||
|     return { url, content: "", type: "pdf" } | ||||
|   } | ||||
|   const html = Array.from(document.querySelectorAll("script")).reduce( | ||||
| @ -40,6 +37,7 @@ const _getHtml = async () => { | ||||
|   ) | ||||
|   return { url, content: html, type: "html" } | ||||
| } | ||||
| 
 | ||||
| export const getDataFromCurrentTab = async () => { | ||||
|   const result = new Promise((resolve) => { | ||||
|     chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => { | ||||
| @ -66,7 +64,10 @@ export const getDataFromCurrentTab = async () => { | ||||
|   if (type === "pdf") { | ||||
|     const res = await fetch(url) | ||||
|     const data = await res.arrayBuffer() | ||||
|     let pdfHtml: string[] = [] | ||||
|     let pdfHtml: { | ||||
|       content: string | ||||
|       page: number | ||||
|     }[] = [] | ||||
|     const pdf = await getPdf(data) | ||||
| 
 | ||||
|     for (let i = 1; i <= pdf.numPages; i += 1) { | ||||
| @ -79,18 +80,22 @@ export const getDataFromCurrentTab = async () => { | ||||
| 
 | ||||
|       const text = content?.items.map((item: any) => item.str).join("\n") | ||||
|         .replace(/\x00/g, "").trim(); | ||||
|       pdfHtml.push(`<div class="pdf-page">${text}</div>`) | ||||
|       pdfHtml.push({ | ||||
|         content: text, | ||||
|         page: i | ||||
|       }) | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
|     return { | ||||
|       url, | ||||
|       content: pdfHtml.join(""), | ||||
|       type: "html" | ||||
|       content: "", | ||||
|       pdf: pdfHtml, | ||||
|       type: "pdf" | ||||
|     } | ||||
| 
 | ||||
|   } | ||||
| 
 | ||||
|   return { url, content, type } | ||||
|   return { url, content, type, pdf: [] } | ||||
| } | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										37
									
								
								src/loader/pdf.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								src/loader/pdf.ts
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,37 @@ | ||||
| import { BaseDocumentLoader } from "langchain/document_loaders/base" | ||||
| import { Document } from "@langchain/core/documents" | ||||
| export interface WebLoaderParams { | ||||
|     pdf: { content: string, page: number }[] | ||||
|     url: string | ||||
| } | ||||
| 
 | ||||
| export class PageAssistPDFLoader | ||||
|     extends BaseDocumentLoader | ||||
|     implements WebLoaderParams { | ||||
|     pdf: { content: string, page: number }[] | ||||
|     url: string | ||||
| 
 | ||||
|     constructor({ pdf, url }: WebLoaderParams) { | ||||
|         super() | ||||
|         this.pdf = pdf | ||||
|         this.url = url | ||||
|     } | ||||
| 
 | ||||
|     async load(): Promise<Document<Record<string, any>>[]> { | ||||
|         const documents: Document[] = []; | ||||
| 
 | ||||
|         for (const page of this.pdf) { | ||||
|             const metadata = { source: this.url, page: page.page } | ||||
|             documents.push(new Document({ pageContent: page.content, metadata })) | ||||
|         } | ||||
| 
 | ||||
|         return [ | ||||
|             new Document({ | ||||
|                 pageContent: documents.map((doc) => doc.pageContent).join("\n\n"), | ||||
|                 metadata: documents.map((doc) => doc.metadata), | ||||
|             }), | ||||
|         ]; | ||||
| 
 | ||||
| 
 | ||||
|     } | ||||
| } | ||||
							
								
								
									
										63
									
								
								src/utils/memory-embeddings.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								src/utils/memory-embeddings.ts
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,63 @@ | ||||
| import { PageAssistHtmlLoader } from "~/loader/html" | ||||
| import { RecursiveCharacterTextSplitter } from "langchain/text_splitter" | ||||
| import { MemoryVectorStore } from "langchain/vectorstores/memory" | ||||
| import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama" | ||||
| import { defaultEmbeddingChunkOverlap, defaultEmbeddingChunkSize } from "@/services/ollama" | ||||
| import { PageAssistPDFLoader } from "@/loader/pdf" | ||||
| 
 | ||||
| 
 | ||||
| export const getLoader = ({ html, pdf, type, url }: { | ||||
|     url: string, | ||||
|     html: string, | ||||
|     type: string, | ||||
|     pdf: { content: string, page: number }[] | ||||
| }) => { | ||||
|     if (type === "pdf") { | ||||
|         return new PageAssistPDFLoader({ | ||||
|             pdf, | ||||
|             url | ||||
|         }) | ||||
|     } else { | ||||
|         return new PageAssistHtmlLoader({ | ||||
|             html, | ||||
|             url | ||||
|         }) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| export const memoryEmbedding = async ( | ||||
|     { html, | ||||
|         keepTrackOfEmbedding, ollamaEmbedding, pdf, setIsEmbedding, setKeepTrackOfEmbedding, type, url }: { | ||||
|             url: string, | ||||
|             html: string, | ||||
|             type: string, | ||||
|             pdf: { content: string, page: number }[], | ||||
|             keepTrackOfEmbedding: Record<string, MemoryVectorStore>, | ||||
|             ollamaEmbedding: OllamaEmbeddings, | ||||
|             setIsEmbedding: (value: boolean) => void, | ||||
|             setKeepTrackOfEmbedding: (value: Record<string, MemoryVectorStore>) => void | ||||
|         } | ||||
| ) => { | ||||
|     setIsEmbedding(true) | ||||
| 
 | ||||
|     const loader = getLoader({ html, pdf, type, url }) | ||||
|     const docs = await loader.load() | ||||
|     const chunkSize = await defaultEmbeddingChunkSize() | ||||
|     const chunkOverlap = await defaultEmbeddingChunkOverlap() | ||||
|     const textSplitter = new RecursiveCharacterTextSplitter({ | ||||
|         chunkSize, | ||||
|         chunkOverlap | ||||
|     }) | ||||
| 
 | ||||
|     const chunks = await textSplitter.splitDocuments(docs) | ||||
| 
 | ||||
|     const store = new MemoryVectorStore(ollamaEmbedding) | ||||
| 
 | ||||
|     await store.addDocuments(chunks) | ||||
|     setKeepTrackOfEmbedding({ | ||||
|         ...keepTrackOfEmbedding, | ||||
|         [url]: store | ||||
|     }) | ||||
|     setIsEmbedding(false) | ||||
|     return store | ||||
| } | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user