feat: Add support for Mammoth library for docx file uploads
This commit is contained in:
parent
845b725970
commit
8899a42331
@ -36,6 +36,7 @@
|
||||
"i18next-browser-languagedetector": "^7.2.0",
|
||||
"langchain": "^0.1.28",
|
||||
"lucide-react": "^0.350.0",
|
||||
"mammoth": "^1.7.2",
|
||||
"ml-distance": "^4.0.1",
|
||||
"pdfjs-dist": "4.0.379",
|
||||
"property-information": "^6.4.1",
|
||||
|
@ -32,7 +32,7 @@
|
||||
"uploadFile": {
|
||||
"label": "Upload File",
|
||||
"uploadText": "Drag and drop a file here or click to upload",
|
||||
"uploadHint": "Supported file types: .pdf, .csv, .txt, .md",
|
||||
"uploadHint": "Supported file types: .pdf, .csv, .txt, .md, .docx",
|
||||
"required": "File is required"
|
||||
},
|
||||
"submit": "Submit",
|
||||
|
@ -32,7 +32,7 @@
|
||||
"uploadFile": {
|
||||
"label": "ഫയല് അപ്ലോഡ് ചെയ്യുക",
|
||||
"uploadText": "ഇവിടെ ഒരു ഫയല് എടുത്തിടുക അല്ലെങ്കില് അപ്ലോഡ് ചെയ്യാന് ക്ലിക്ക് ചെയ്യുക",
|
||||
"uploadHint": "പിന്തുണയുള്ള ഫയല് തരങ്ങള്: .pdf, .csv, .txt, .md",
|
||||
"uploadHint": "പിന്തുണയുള്ള ഫയല് തരങ്ങള്: .pdf, .csv, .txt, .md,.docx",
|
||||
"required": "ഫയല് ആവശ്യമാണ്"
|
||||
},
|
||||
"submit": "സമര്പ്പിക്കുക",
|
||||
|
@ -32,7 +32,7 @@
|
||||
"uploadFile": {
|
||||
"label": "Загрузить файл",
|
||||
"uploadText": "Перетащите файл сюда или нажмите, чтобы загрузить",
|
||||
"uploadHint": "Поддерживаемые типы файлов: .pdf, .csv, .txt, .md",
|
||||
"uploadHint": "Поддерживаемые типы файлов: .pdf, .csv, .txt, .md,.docx",
|
||||
"required": "Файл обязателен"
|
||||
},
|
||||
"submit": "Отправить",
|
||||
|
@ -90,15 +90,16 @@ export const AddKnowledge = ({ open, setOpen }: Props) => {
|
||||
return e?.fileList
|
||||
}}>
|
||||
<Upload.Dragger
|
||||
accept={".pdf, .csv, .txt, .md"}
|
||||
accept={".pdf, .csv, .txt, .md, .docx"}
|
||||
multiple={true}
|
||||
maxCount={10}
|
||||
beforeUpload={(file) => {
|
||||
const allowedTypes = [
|
||||
"application/pdf",
|
||||
// "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"text/csv",
|
||||
"text/plain"
|
||||
"text/plain",
|
||||
"text/markdown",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
]
|
||||
.map((type) => type.toLowerCase())
|
||||
.join(", ")
|
||||
|
@ -7,6 +7,7 @@ type HistoryInfo = {
|
||||
id: string
|
||||
title: string
|
||||
is_rag: boolean
|
||||
message_source?: "copilot" | "web-ui"
|
||||
createdAt: number
|
||||
}
|
||||
|
||||
@ -224,10 +225,10 @@ export const generateID = () => {
|
||||
})
|
||||
}
|
||||
|
||||
export const saveHistory = async (title: string, is_rag?: boolean) => {
|
||||
export const saveHistory = async (title: string, is_rag?: boolean, message_source?: "copilot" | "web-ui") => {
|
||||
const id = generateID()
|
||||
const createdAt = Date.now()
|
||||
const history = { id, title, createdAt, is_rag }
|
||||
const history = { id, title, createdAt, is_rag, message_source }
|
||||
const db = new PageAssitDatabase()
|
||||
await db.addChatHistory(history)
|
||||
return history
|
||||
@ -465,3 +466,17 @@ export const importPrompts = async (prompts: Prompts) => {
|
||||
await db.addPrompt(prompt)
|
||||
}
|
||||
}
|
||||
|
||||
export const getRecentChatFromCopilot = async () => {
|
||||
const db = new PageAssitDatabase()
|
||||
const chatHistories = await db.getChatHistories()
|
||||
if (chatHistories.length === 0) return null
|
||||
const history = chatHistories.find(
|
||||
(history) => history.message_source === "copilot"
|
||||
)
|
||||
if (!history) return null
|
||||
|
||||
const messages = await db.getChatHistory(history.id)
|
||||
|
||||
return { history, messages }
|
||||
}
|
@ -11,7 +11,8 @@ export const saveMessageOnError = async ({
|
||||
historyId,
|
||||
selectedModel,
|
||||
setHistoryId,
|
||||
isRegenerating
|
||||
isRegenerating,
|
||||
message_source = "web-ui"
|
||||
}: {
|
||||
e: any
|
||||
setHistory: (history: ChatHistory) => void
|
||||
@ -22,7 +23,8 @@ export const saveMessageOnError = async ({
|
||||
historyId: string | null
|
||||
selectedModel: string
|
||||
setHistoryId: (historyId: string) => void
|
||||
isRegenerating: boolean
|
||||
isRegenerating: boolean,
|
||||
message_source?: "copilot" | "web-ui"
|
||||
}) => {
|
||||
if (
|
||||
e?.name === "AbortError" ||
|
||||
@ -65,7 +67,7 @@ export const saveMessageOnError = async ({
|
||||
2
|
||||
)
|
||||
} else {
|
||||
const newHistoryId = await saveHistory(userMessage)
|
||||
const newHistoryId = await saveHistory(userMessage, false, message_source)
|
||||
if (!isRegenerating) {
|
||||
await saveMessage(
|
||||
newHistoryId.id,
|
||||
@ -103,7 +105,8 @@ export const saveMessageOnSuccess = async ({
|
||||
message,
|
||||
image,
|
||||
fullText,
|
||||
source
|
||||
source,
|
||||
message_source = "web-ui"
|
||||
}: {
|
||||
historyId: string | null
|
||||
setHistoryId: (historyId: string) => void
|
||||
@ -112,7 +115,8 @@ export const saveMessageOnSuccess = async ({
|
||||
message: string
|
||||
image: string
|
||||
fullText: string
|
||||
source: any[]
|
||||
source: any[],
|
||||
message_source?: "copilot" | "web-ui"
|
||||
}) => {
|
||||
if (historyId) {
|
||||
if (!isRegenerate) {
|
||||
@ -136,7 +140,7 @@ export const saveMessageOnSuccess = async ({
|
||||
2
|
||||
)
|
||||
} else {
|
||||
const newHistoryId = await saveHistory(message)
|
||||
const newHistoryId = await saveHistory(message, false, message_source)
|
||||
await saveMessage(
|
||||
newHistoryId.id,
|
||||
selectedModel,
|
||||
|
@ -329,7 +329,8 @@ export const useMessage = () => {
|
||||
message,
|
||||
image,
|
||||
fullText,
|
||||
source
|
||||
source,
|
||||
message_source: "copilot"
|
||||
})
|
||||
|
||||
setIsProcessing(false)
|
||||
@ -345,7 +346,8 @@ export const useMessage = () => {
|
||||
setHistory,
|
||||
setHistoryId,
|
||||
userMessage: message,
|
||||
isRegenerating: isRegenerate
|
||||
isRegenerating: isRegenerate,
|
||||
message_source: "copilot"
|
||||
})
|
||||
|
||||
if (!errorSave) {
|
||||
@ -535,7 +537,8 @@ export const useMessage = () => {
|
||||
message,
|
||||
image,
|
||||
fullText,
|
||||
source: []
|
||||
source: [],
|
||||
message_source: "copilot"
|
||||
})
|
||||
|
||||
setIsProcessing(false)
|
||||
@ -551,7 +554,8 @@ export const useMessage = () => {
|
||||
setHistory,
|
||||
setHistoryId,
|
||||
userMessage: message,
|
||||
isRegenerating: isRegenerate
|
||||
isRegenerating: isRegenerate,
|
||||
message_source: "copilot"
|
||||
})
|
||||
|
||||
if (!errorSave) {
|
||||
|
@ -9,6 +9,18 @@ import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
||||
import { PageAssistVectorStore } from "./PageAssistVectorStore"
|
||||
import { PageAssisCSVUrlLoader } from "@/loader/csv"
|
||||
import { PageAssisTXTUrlLoader } from "@/loader/txt"
|
||||
import { PageAssistDocxLoader } from "@/loader/docx"
|
||||
|
||||
const readAsArrayBuffer = (file: File): Promise<ArrayBuffer> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader()
|
||||
reader.onload = () => {
|
||||
resolve(reader.result as ArrayBuffer)
|
||||
}
|
||||
reader.onerror = reject
|
||||
reader.readAsArrayBuffer(file)
|
||||
})
|
||||
}
|
||||
|
||||
export const processKnowledge = async (msg: any, id: string): Promise<void> => {
|
||||
console.log(`Processing knowledge with id: ${id}`)
|
||||
@ -58,6 +70,26 @@ export const processKnowledge = async (msg: any, id: string): Promise<void> => {
|
||||
knownledge_id: knowledge.id,
|
||||
file_id: doc.source_id
|
||||
})
|
||||
} else if (doc.type === "docx" || doc.type === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
|
||||
try {
|
||||
const loader = new PageAssistDocxLoader({
|
||||
fileName: doc.filename,
|
||||
buffer: await toArrayBufferFromBase64(
|
||||
doc.content
|
||||
)
|
||||
})
|
||||
|
||||
let docs = await loader.load()
|
||||
|
||||
const chunks = await textSplitter.splitDocuments(docs)
|
||||
|
||||
await PageAssistVectorStore.fromDocuments(chunks, ollamaEmbedding, {
|
||||
knownledge_id: knowledge.id,
|
||||
file_id: doc.source_id
|
||||
})
|
||||
} catch (error) {
|
||||
console.error(`Error processing knowledge with id: ${id}`, error)
|
||||
}
|
||||
} else {
|
||||
const loader = new PageAssisTXTUrlLoader({
|
||||
name: doc.filename,
|
||||
|
33
src/loader/docx.ts
Normal file
33
src/loader/docx.ts
Normal file
@ -0,0 +1,33 @@
|
||||
import { BaseDocumentLoader } from "langchain/document_loaders/base"
|
||||
import { Document } from "@langchain/core/documents"
|
||||
import * as mammoth from "mammoth"
|
||||
|
||||
export interface WebLoaderParams {
|
||||
fileName: string
|
||||
buffer: ArrayBuffer
|
||||
}
|
||||
|
||||
export class PageAssistDocxLoader
|
||||
extends BaseDocumentLoader
|
||||
implements WebLoaderParams {
|
||||
fileName: string
|
||||
buffer: ArrayBuffer
|
||||
|
||||
constructor({ fileName, buffer }: WebLoaderParams) {
|
||||
super()
|
||||
this.fileName = fileName
|
||||
this.buffer = buffer
|
||||
}
|
||||
|
||||
public async load(): Promise<Document[]> {
|
||||
const data = await mammoth.extractRawText({
|
||||
arrayBuffer: this.buffer
|
||||
})
|
||||
const text = data.value
|
||||
const meta = { source: this.fileName }
|
||||
if (text) {
|
||||
return [new Document({ pageContent: text, metadata: meta })]
|
||||
}
|
||||
return []
|
||||
}
|
||||
}
|
@ -1,16 +1,34 @@
|
||||
import {
|
||||
formatToChatHistory,
|
||||
formatToMessage,
|
||||
getRecentChatFromCopilot
|
||||
} from "@/db"
|
||||
import React from "react"
|
||||
import { SidePanelBody } from "~/components/Sidepanel/Chat/body"
|
||||
import { SidepanelForm } from "~/components/Sidepanel/Chat/form"
|
||||
import { SidepanelHeader } from "~/components/Sidepanel/Chat/header"
|
||||
import { useMessage } from "~/hooks/useMessage"
|
||||
|
||||
const SidepanelChat = () => {
|
||||
const SidepanelChat = () => {
|
||||
const drop = React.useRef<HTMLDivElement>(null)
|
||||
const [dropedFile, setDropedFile] = React.useState<File | undefined>()
|
||||
const [dropState, setDropState] = React.useState<
|
||||
"idle" | "dragging" | "error"
|
||||
>("idle")
|
||||
const {chatMode} = useMessage()
|
||||
const { chatMode, messages, setHistory, setHistoryId, setMessages } =
|
||||
useMessage()
|
||||
|
||||
const setRecentMessagesOnLoad = async () => {
|
||||
if (messages.length === 0) {
|
||||
const recentChat = await getRecentChatFromCopilot()
|
||||
if (recentChat) {
|
||||
setHistoryId(recentChat.history.id)
|
||||
setHistory(formatToChatHistory(recentChat.messages))
|
||||
setMessages(formatToMessage(recentChat.messages))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
React.useEffect(() => {
|
||||
if (!drop.current) {
|
||||
return
|
||||
@ -67,6 +85,7 @@ import { useMessage } from "~/hooks/useMessage"
|
||||
}
|
||||
}
|
||||
}, [])
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={drop}
|
||||
|
@ -10,6 +10,7 @@ export const toBase64 = (file: File | Blob): Promise<string> => {
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
export const toArrayBufferFromBase64 = async (base64: string) => {
|
||||
const res = await fetch(base64)
|
||||
const blob = await res.blob()
|
||||
|
Loading…
x
Reference in New Issue
Block a user