feat: Add support for Mammoth library for docx file uploads
This commit is contained in:
		
							parent
							
								
									845b725970
								
							
						
					
					
						commit
						8899a42331
					
				| @ -36,6 +36,7 @@ | ||||
|     "i18next-browser-languagedetector": "^7.2.0", | ||||
|     "langchain": "^0.1.28", | ||||
|     "lucide-react": "^0.350.0", | ||||
|     "mammoth": "^1.7.2", | ||||
|     "ml-distance": "^4.0.1", | ||||
|     "pdfjs-dist": "4.0.379", | ||||
|     "property-information": "^6.4.1", | ||||
|  | ||||
| @ -32,7 +32,7 @@ | ||||
|         "uploadFile": { | ||||
|             "label": "Upload File", | ||||
|             "uploadText": "Drag and drop a file here or click to upload", | ||||
|             "uploadHint": "Supported file types: .pdf, .csv, .txt, .md", | ||||
|             "uploadHint": "Supported file types: .pdf, .csv, .txt, .md, .docx", | ||||
|             "required": "File is required" | ||||
|         }, | ||||
|         "submit": "Submit", | ||||
|  | ||||
| @ -32,7 +32,7 @@ | ||||
|         "uploadFile": { | ||||
|             "label": "ഫയല് അപ്ലോഡ് ചെയ്യുക", | ||||
|             "uploadText": "ഇവിടെ ഒരു ഫയല് എടുത്തിടുക അല്ലെങ്കില് അപ്ലോഡ് ചെയ്യാന് ക്ലിക്ക് ചെയ്യുക", | ||||
|             "uploadHint": "പിന്തുണയുള്ള ഫയല് തരങ്ങള്: .pdf, .csv, .txt, .md", | ||||
|             "uploadHint": "പിന്തുണയുള്ള ഫയല് തരങ്ങള്: .pdf, .csv, .txt, .md,.docx", | ||||
|             "required": "ഫയല് ആവശ്യമാണ്" | ||||
|         }, | ||||
|         "submit": "സമര്പ്പിക്കുക", | ||||
|  | ||||
| @ -32,7 +32,7 @@ | ||||
|         "uploadFile": { | ||||
|             "label": "Загрузить файл", | ||||
|             "uploadText": "Перетащите файл сюда или нажмите, чтобы загрузить", | ||||
|             "uploadHint": "Поддерживаемые типы файлов: .pdf, .csv, .txt, .md", | ||||
|             "uploadHint": "Поддерживаемые типы файлов: .pdf, .csv, .txt, .md,.docx", | ||||
|             "required": "Файл обязателен" | ||||
|         }, | ||||
|         "submit": "Отправить", | ||||
|  | ||||
| @ -90,15 +90,16 @@ export const AddKnowledge = ({ open, setOpen }: Props) => { | ||||
|             return e?.fileList | ||||
|           }}> | ||||
|           <Upload.Dragger | ||||
|             accept={".pdf, .csv, .txt, .md"} | ||||
|             accept={".pdf, .csv, .txt, .md, .docx"} | ||||
|             multiple={true} | ||||
|             maxCount={10} | ||||
|             beforeUpload={(file) => { | ||||
|               const allowedTypes = [ | ||||
|                 "application/pdf", | ||||
|                 // "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
 | ||||
|                 "text/csv", | ||||
|                 "text/plain" | ||||
|                 "text/plain", | ||||
|                 "text/markdown", | ||||
|                 "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | ||||
|               ] | ||||
|                 .map((type) => type.toLowerCase()) | ||||
|                 .join(", ") | ||||
|  | ||||
| @ -7,6 +7,7 @@ type HistoryInfo = { | ||||
|   id: string | ||||
|   title: string | ||||
|   is_rag: boolean | ||||
|   message_source?: "copilot" | "web-ui" | ||||
|   createdAt: number | ||||
| } | ||||
| 
 | ||||
| @ -224,10 +225,10 @@ export const generateID = () => { | ||||
|   }) | ||||
| } | ||||
| 
 | ||||
| export const saveHistory = async (title: string, is_rag?: boolean) => { | ||||
| export const saveHistory = async (title: string, is_rag?: boolean, message_source?: "copilot" | "web-ui") => { | ||||
|   const id = generateID() | ||||
|   const createdAt = Date.now() | ||||
|   const history = { id, title, createdAt, is_rag } | ||||
|   const history = { id, title, createdAt, is_rag, message_source } | ||||
|   const db = new PageAssitDatabase() | ||||
|   await db.addChatHistory(history) | ||||
|   return history | ||||
| @ -465,3 +466,17 @@ export const importPrompts = async (prompts: Prompts) => { | ||||
|     await db.addPrompt(prompt) | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| export const getRecentChatFromCopilot = async () => { | ||||
|   const db = new PageAssitDatabase() | ||||
|   const chatHistories = await db.getChatHistories() | ||||
|   if (chatHistories.length === 0) return null | ||||
|   const history = chatHistories.find( | ||||
|     (history) => history.message_source === "copilot" | ||||
|   ) | ||||
|   if (!history) return null | ||||
| 
 | ||||
|   const messages = await db.getChatHistory(history.id) | ||||
| 
 | ||||
|   return { history, messages } | ||||
| } | ||||
| @ -11,7 +11,8 @@ export const saveMessageOnError = async ({ | ||||
|   historyId, | ||||
|   selectedModel, | ||||
|   setHistoryId, | ||||
|   isRegenerating | ||||
|   isRegenerating, | ||||
|   message_source = "web-ui" | ||||
| }: { | ||||
|   e: any | ||||
|   setHistory: (history: ChatHistory) => void | ||||
| @ -22,7 +23,8 @@ export const saveMessageOnError = async ({ | ||||
|   historyId: string | null | ||||
|   selectedModel: string | ||||
|   setHistoryId: (historyId: string) => void | ||||
|   isRegenerating: boolean | ||||
|   isRegenerating: boolean, | ||||
|   message_source?: "copilot" | "web-ui" | ||||
| }) => { | ||||
|   if ( | ||||
|     e?.name === "AbortError" || | ||||
| @ -65,7 +67,7 @@ export const saveMessageOnError = async ({ | ||||
|         2 | ||||
|       ) | ||||
|     } else { | ||||
|       const newHistoryId = await saveHistory(userMessage) | ||||
|       const newHistoryId = await saveHistory(userMessage, false, message_source) | ||||
|       if (!isRegenerating) { | ||||
|         await saveMessage( | ||||
|           newHistoryId.id, | ||||
| @ -103,7 +105,8 @@ export const saveMessageOnSuccess = async ({ | ||||
|   message, | ||||
|   image, | ||||
|   fullText, | ||||
|   source | ||||
|   source, | ||||
|   message_source = "web-ui" | ||||
| }: { | ||||
|   historyId: string | null | ||||
|   setHistoryId: (historyId: string) => void | ||||
| @ -112,7 +115,8 @@ export const saveMessageOnSuccess = async ({ | ||||
|   message: string | ||||
|   image: string | ||||
|   fullText: string | ||||
|   source: any[] | ||||
|   source: any[], | ||||
|   message_source?: "copilot" | "web-ui" | ||||
| }) => { | ||||
|   if (historyId) { | ||||
|     if (!isRegenerate) { | ||||
| @ -136,7 +140,7 @@ export const saveMessageOnSuccess = async ({ | ||||
|       2 | ||||
|     ) | ||||
|   } else { | ||||
|     const newHistoryId = await saveHistory(message) | ||||
|     const newHistoryId = await saveHistory(message, false, message_source) | ||||
|     await saveMessage( | ||||
|       newHistoryId.id, | ||||
|       selectedModel, | ||||
|  | ||||
| @ -329,7 +329,8 @@ export const useMessage = () => { | ||||
|         message, | ||||
|         image, | ||||
|         fullText, | ||||
|         source | ||||
|         source, | ||||
|         message_source: "copilot" | ||||
|       }) | ||||
| 
 | ||||
|       setIsProcessing(false) | ||||
| @ -345,7 +346,8 @@ export const useMessage = () => { | ||||
|         setHistory, | ||||
|         setHistoryId, | ||||
|         userMessage: message, | ||||
|         isRegenerating: isRegenerate | ||||
|         isRegenerating: isRegenerate, | ||||
|         message_source: "copilot" | ||||
|       }) | ||||
| 
 | ||||
|       if (!errorSave) { | ||||
| @ -535,7 +537,8 @@ export const useMessage = () => { | ||||
|         message, | ||||
|         image, | ||||
|         fullText, | ||||
|         source: [] | ||||
|         source: [], | ||||
|         message_source: "copilot" | ||||
|       }) | ||||
| 
 | ||||
|       setIsProcessing(false) | ||||
| @ -551,7 +554,8 @@ export const useMessage = () => { | ||||
|         setHistory, | ||||
|         setHistoryId, | ||||
|         userMessage: message, | ||||
|         isRegenerating: isRegenerate | ||||
|         isRegenerating: isRegenerate, | ||||
|         message_source: "copilot" | ||||
|       }) | ||||
| 
 | ||||
|       if (!errorSave) { | ||||
|  | ||||
| @ -9,6 +9,18 @@ import { RecursiveCharacterTextSplitter } from "langchain/text_splitter" | ||||
| import { PageAssistVectorStore } from "./PageAssistVectorStore" | ||||
| import { PageAssisCSVUrlLoader } from "@/loader/csv" | ||||
| import { PageAssisTXTUrlLoader } from "@/loader/txt" | ||||
| import { PageAssistDocxLoader } from "@/loader/docx" | ||||
| 
 | ||||
| const readAsArrayBuffer = (file: File): Promise<ArrayBuffer> => { | ||||
|   return new Promise((resolve, reject) => { | ||||
|     const reader = new FileReader() | ||||
|     reader.onload = () => { | ||||
|       resolve(reader.result as ArrayBuffer) | ||||
|     } | ||||
|     reader.onerror = reject | ||||
|     reader.readAsArrayBuffer(file) | ||||
|   }) | ||||
| } | ||||
| 
 | ||||
| export const processKnowledge = async (msg: any, id: string): Promise<void> => { | ||||
|   console.log(`Processing knowledge with id: ${id}`) | ||||
| @ -58,6 +70,26 @@ export const processKnowledge = async (msg: any, id: string): Promise<void> => { | ||||
|           knownledge_id: knowledge.id, | ||||
|           file_id: doc.source_id | ||||
|         }) | ||||
|       } else if (doc.type === "docx" || doc.type === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") { | ||||
|         try { | ||||
|           const loader = new PageAssistDocxLoader({ | ||||
|             fileName: doc.filename, | ||||
|             buffer: await toArrayBufferFromBase64( | ||||
|               doc.content | ||||
|             ) | ||||
|           }) | ||||
| 
 | ||||
|           let docs = await loader.load() | ||||
| 
 | ||||
|           const chunks = await textSplitter.splitDocuments(docs) | ||||
| 
 | ||||
|           await PageAssistVectorStore.fromDocuments(chunks, ollamaEmbedding, { | ||||
|             knownledge_id: knowledge.id, | ||||
|             file_id: doc.source_id | ||||
|           }) | ||||
|         } catch (error) { | ||||
|           console.error(`Error processing knowledge with id: ${id}`, error) | ||||
|         } | ||||
|       } else { | ||||
|         const loader = new PageAssisTXTUrlLoader({ | ||||
|           name: doc.filename, | ||||
|  | ||||
							
								
								
									
										33
									
								
								src/loader/docx.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								src/loader/docx.ts
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,33 @@ | ||||
| import { BaseDocumentLoader } from "langchain/document_loaders/base" | ||||
| import { Document } from "@langchain/core/documents" | ||||
| import * as mammoth from "mammoth" | ||||
| 
 | ||||
| export interface WebLoaderParams { | ||||
|     fileName: string | ||||
|     buffer: ArrayBuffer | ||||
| } | ||||
| 
 | ||||
| export class PageAssistDocxLoader | ||||
|     extends BaseDocumentLoader | ||||
|     implements WebLoaderParams { | ||||
|     fileName: string | ||||
|     buffer: ArrayBuffer | ||||
| 
 | ||||
|     constructor({ fileName, buffer }: WebLoaderParams) { | ||||
|         super() | ||||
|         this.fileName = fileName | ||||
|         this.buffer = buffer | ||||
|     } | ||||
| 
 | ||||
|     public async load(): Promise<Document[]> { | ||||
|         const data = await mammoth.extractRawText({ | ||||
|             arrayBuffer: this.buffer | ||||
|         }) | ||||
|         const text = data.value | ||||
|         const meta = { source: this.fileName } | ||||
|         if (text) { | ||||
|             return [new Document({ pageContent: text, metadata: meta })] | ||||
|         } | ||||
|         return [] | ||||
|     } | ||||
| } | ||||
| @ -1,16 +1,34 @@ | ||||
| import { | ||||
|   formatToChatHistory, | ||||
|   formatToMessage, | ||||
|   getRecentChatFromCopilot | ||||
| } from "@/db" | ||||
| import React from "react" | ||||
| import { SidePanelBody } from "~/components/Sidepanel/Chat/body" | ||||
| import { SidepanelForm } from "~/components/Sidepanel/Chat/form" | ||||
| import { SidepanelHeader } from "~/components/Sidepanel/Chat/header" | ||||
| import { useMessage } from "~/hooks/useMessage" | ||||
| 
 | ||||
|  const SidepanelChat = () => { | ||||
| const SidepanelChat = () => { | ||||
|   const drop = React.useRef<HTMLDivElement>(null) | ||||
|   const [dropedFile, setDropedFile] = React.useState<File | undefined>() | ||||
|   const [dropState, setDropState] = React.useState< | ||||
|     "idle" | "dragging" | "error" | ||||
|   >("idle") | ||||
|   const {chatMode} = useMessage() | ||||
|   const { chatMode, messages, setHistory, setHistoryId, setMessages } = | ||||
|     useMessage() | ||||
| 
 | ||||
|   const setRecentMessagesOnLoad = async () => { | ||||
|     if (messages.length === 0) { | ||||
|       const recentChat = await getRecentChatFromCopilot() | ||||
|       if (recentChat) { | ||||
|         setHistoryId(recentChat.history.id) | ||||
|         setHistory(formatToChatHistory(recentChat.messages)) | ||||
|         setMessages(formatToMessage(recentChat.messages)) | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   React.useEffect(() => { | ||||
|     if (!drop.current) { | ||||
|       return | ||||
| @ -67,6 +85,7 @@ import { useMessage } from "~/hooks/useMessage" | ||||
|       } | ||||
|     } | ||||
|   }, []) | ||||
| 
 | ||||
|   return ( | ||||
|     <div | ||||
|       ref={drop} | ||||
|  | ||||
| @ -10,6 +10,7 @@ export const toBase64 = (file: File | Blob): Promise<string> => { | ||||
|   }) | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| export const toArrayBufferFromBase64 = async (base64: string) => { | ||||
|   const res = await fetch(base64) | ||||
|   const blob = await res.blob() | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user