feat: Add support for Mammoth library for docx file uploads
This commit is contained in:
parent
845b725970
commit
8899a42331
@ -36,6 +36,7 @@
|
|||||||
"i18next-browser-languagedetector": "^7.2.0",
|
"i18next-browser-languagedetector": "^7.2.0",
|
||||||
"langchain": "^0.1.28",
|
"langchain": "^0.1.28",
|
||||||
"lucide-react": "^0.350.0",
|
"lucide-react": "^0.350.0",
|
||||||
|
"mammoth": "^1.7.2",
|
||||||
"ml-distance": "^4.0.1",
|
"ml-distance": "^4.0.1",
|
||||||
"pdfjs-dist": "4.0.379",
|
"pdfjs-dist": "4.0.379",
|
||||||
"property-information": "^6.4.1",
|
"property-information": "^6.4.1",
|
||||||
|
@ -32,7 +32,7 @@
|
|||||||
"uploadFile": {
|
"uploadFile": {
|
||||||
"label": "Upload File",
|
"label": "Upload File",
|
||||||
"uploadText": "Drag and drop a file here or click to upload",
|
"uploadText": "Drag and drop a file here or click to upload",
|
||||||
"uploadHint": "Supported file types: .pdf, .csv, .txt, .md",
|
"uploadHint": "Supported file types: .pdf, .csv, .txt, .md, .docx",
|
||||||
"required": "File is required"
|
"required": "File is required"
|
||||||
},
|
},
|
||||||
"submit": "Submit",
|
"submit": "Submit",
|
||||||
|
@ -32,7 +32,7 @@
|
|||||||
"uploadFile": {
|
"uploadFile": {
|
||||||
"label": "ഫയല് അപ്ലോഡ് ചെയ്യുക",
|
"label": "ഫയല് അപ്ലോഡ് ചെയ്യുക",
|
||||||
"uploadText": "ഇവിടെ ഒരു ഫയല് എടുത്തിടുക അല്ലെങ്കില് അപ്ലോഡ് ചെയ്യാന് ക്ലിക്ക് ചെയ്യുക",
|
"uploadText": "ഇവിടെ ഒരു ഫയല് എടുത്തിടുക അല്ലെങ്കില് അപ്ലോഡ് ചെയ്യാന് ക്ലിക്ക് ചെയ്യുക",
|
||||||
"uploadHint": "പിന്തുണയുള്ള ഫയല് തരങ്ങള്: .pdf, .csv, .txt, .md",
|
"uploadHint": "പിന്തുണയുള്ള ഫയല് തരങ്ങള്: .pdf, .csv, .txt, .md,.docx",
|
||||||
"required": "ഫയല് ആവശ്യമാണ്"
|
"required": "ഫയല് ആവശ്യമാണ്"
|
||||||
},
|
},
|
||||||
"submit": "സമര്പ്പിക്കുക",
|
"submit": "സമര്പ്പിക്കുക",
|
||||||
|
@ -32,7 +32,7 @@
|
|||||||
"uploadFile": {
|
"uploadFile": {
|
||||||
"label": "Загрузить файл",
|
"label": "Загрузить файл",
|
||||||
"uploadText": "Перетащите файл сюда или нажмите, чтобы загрузить",
|
"uploadText": "Перетащите файл сюда или нажмите, чтобы загрузить",
|
||||||
"uploadHint": "Поддерживаемые типы файлов: .pdf, .csv, .txt, .md",
|
"uploadHint": "Поддерживаемые типы файлов: .pdf, .csv, .txt, .md,.docx",
|
||||||
"required": "Файл обязателен"
|
"required": "Файл обязателен"
|
||||||
},
|
},
|
||||||
"submit": "Отправить",
|
"submit": "Отправить",
|
||||||
|
@ -90,15 +90,16 @@ export const AddKnowledge = ({ open, setOpen }: Props) => {
|
|||||||
return e?.fileList
|
return e?.fileList
|
||||||
}}>
|
}}>
|
||||||
<Upload.Dragger
|
<Upload.Dragger
|
||||||
accept={".pdf, .csv, .txt, .md"}
|
accept={".pdf, .csv, .txt, .md, .docx"}
|
||||||
multiple={true}
|
multiple={true}
|
||||||
maxCount={10}
|
maxCount={10}
|
||||||
beforeUpload={(file) => {
|
beforeUpload={(file) => {
|
||||||
const allowedTypes = [
|
const allowedTypes = [
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
// "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
||||||
"text/csv",
|
"text/csv",
|
||||||
"text/plain"
|
"text/plain",
|
||||||
|
"text/markdown",
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
]
|
]
|
||||||
.map((type) => type.toLowerCase())
|
.map((type) => type.toLowerCase())
|
||||||
.join(", ")
|
.join(", ")
|
||||||
|
@ -7,6 +7,7 @@ type HistoryInfo = {
|
|||||||
id: string
|
id: string
|
||||||
title: string
|
title: string
|
||||||
is_rag: boolean
|
is_rag: boolean
|
||||||
|
message_source?: "copilot" | "web-ui"
|
||||||
createdAt: number
|
createdAt: number
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -224,10 +225,10 @@ export const generateID = () => {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
export const saveHistory = async (title: string, is_rag?: boolean) => {
|
export const saveHistory = async (title: string, is_rag?: boolean, message_source?: "copilot" | "web-ui") => {
|
||||||
const id = generateID()
|
const id = generateID()
|
||||||
const createdAt = Date.now()
|
const createdAt = Date.now()
|
||||||
const history = { id, title, createdAt, is_rag }
|
const history = { id, title, createdAt, is_rag, message_source }
|
||||||
const db = new PageAssitDatabase()
|
const db = new PageAssitDatabase()
|
||||||
await db.addChatHistory(history)
|
await db.addChatHistory(history)
|
||||||
return history
|
return history
|
||||||
@ -465,3 +466,17 @@ export const importPrompts = async (prompts: Prompts) => {
|
|||||||
await db.addPrompt(prompt)
|
await db.addPrompt(prompt)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const getRecentChatFromCopilot = async () => {
|
||||||
|
const db = new PageAssitDatabase()
|
||||||
|
const chatHistories = await db.getChatHistories()
|
||||||
|
if (chatHistories.length === 0) return null
|
||||||
|
const history = chatHistories.find(
|
||||||
|
(history) => history.message_source === "copilot"
|
||||||
|
)
|
||||||
|
if (!history) return null
|
||||||
|
|
||||||
|
const messages = await db.getChatHistory(history.id)
|
||||||
|
|
||||||
|
return { history, messages }
|
||||||
|
}
|
@ -11,7 +11,8 @@ export const saveMessageOnError = async ({
|
|||||||
historyId,
|
historyId,
|
||||||
selectedModel,
|
selectedModel,
|
||||||
setHistoryId,
|
setHistoryId,
|
||||||
isRegenerating
|
isRegenerating,
|
||||||
|
message_source = "web-ui"
|
||||||
}: {
|
}: {
|
||||||
e: any
|
e: any
|
||||||
setHistory: (history: ChatHistory) => void
|
setHistory: (history: ChatHistory) => void
|
||||||
@ -22,7 +23,8 @@ export const saveMessageOnError = async ({
|
|||||||
historyId: string | null
|
historyId: string | null
|
||||||
selectedModel: string
|
selectedModel: string
|
||||||
setHistoryId: (historyId: string) => void
|
setHistoryId: (historyId: string) => void
|
||||||
isRegenerating: boolean
|
isRegenerating: boolean,
|
||||||
|
message_source?: "copilot" | "web-ui"
|
||||||
}) => {
|
}) => {
|
||||||
if (
|
if (
|
||||||
e?.name === "AbortError" ||
|
e?.name === "AbortError" ||
|
||||||
@ -65,7 +67,7 @@ export const saveMessageOnError = async ({
|
|||||||
2
|
2
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
const newHistoryId = await saveHistory(userMessage)
|
const newHistoryId = await saveHistory(userMessage, false, message_source)
|
||||||
if (!isRegenerating) {
|
if (!isRegenerating) {
|
||||||
await saveMessage(
|
await saveMessage(
|
||||||
newHistoryId.id,
|
newHistoryId.id,
|
||||||
@ -103,7 +105,8 @@ export const saveMessageOnSuccess = async ({
|
|||||||
message,
|
message,
|
||||||
image,
|
image,
|
||||||
fullText,
|
fullText,
|
||||||
source
|
source,
|
||||||
|
message_source = "web-ui"
|
||||||
}: {
|
}: {
|
||||||
historyId: string | null
|
historyId: string | null
|
||||||
setHistoryId: (historyId: string) => void
|
setHistoryId: (historyId: string) => void
|
||||||
@ -112,7 +115,8 @@ export const saveMessageOnSuccess = async ({
|
|||||||
message: string
|
message: string
|
||||||
image: string
|
image: string
|
||||||
fullText: string
|
fullText: string
|
||||||
source: any[]
|
source: any[],
|
||||||
|
message_source?: "copilot" | "web-ui"
|
||||||
}) => {
|
}) => {
|
||||||
if (historyId) {
|
if (historyId) {
|
||||||
if (!isRegenerate) {
|
if (!isRegenerate) {
|
||||||
@ -136,7 +140,7 @@ export const saveMessageOnSuccess = async ({
|
|||||||
2
|
2
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
const newHistoryId = await saveHistory(message)
|
const newHistoryId = await saveHistory(message, false, message_source)
|
||||||
await saveMessage(
|
await saveMessage(
|
||||||
newHistoryId.id,
|
newHistoryId.id,
|
||||||
selectedModel,
|
selectedModel,
|
||||||
|
@ -329,7 +329,8 @@ export const useMessage = () => {
|
|||||||
message,
|
message,
|
||||||
image,
|
image,
|
||||||
fullText,
|
fullText,
|
||||||
source
|
source,
|
||||||
|
message_source: "copilot"
|
||||||
})
|
})
|
||||||
|
|
||||||
setIsProcessing(false)
|
setIsProcessing(false)
|
||||||
@ -345,7 +346,8 @@ export const useMessage = () => {
|
|||||||
setHistory,
|
setHistory,
|
||||||
setHistoryId,
|
setHistoryId,
|
||||||
userMessage: message,
|
userMessage: message,
|
||||||
isRegenerating: isRegenerate
|
isRegenerating: isRegenerate,
|
||||||
|
message_source: "copilot"
|
||||||
})
|
})
|
||||||
|
|
||||||
if (!errorSave) {
|
if (!errorSave) {
|
||||||
@ -535,7 +537,8 @@ export const useMessage = () => {
|
|||||||
message,
|
message,
|
||||||
image,
|
image,
|
||||||
fullText,
|
fullText,
|
||||||
source: []
|
source: [],
|
||||||
|
message_source: "copilot"
|
||||||
})
|
})
|
||||||
|
|
||||||
setIsProcessing(false)
|
setIsProcessing(false)
|
||||||
@ -551,7 +554,8 @@ export const useMessage = () => {
|
|||||||
setHistory,
|
setHistory,
|
||||||
setHistoryId,
|
setHistoryId,
|
||||||
userMessage: message,
|
userMessage: message,
|
||||||
isRegenerating: isRegenerate
|
isRegenerating: isRegenerate,
|
||||||
|
message_source: "copilot"
|
||||||
})
|
})
|
||||||
|
|
||||||
if (!errorSave) {
|
if (!errorSave) {
|
||||||
|
@ -9,6 +9,18 @@ import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
|||||||
import { PageAssistVectorStore } from "./PageAssistVectorStore"
|
import { PageAssistVectorStore } from "./PageAssistVectorStore"
|
||||||
import { PageAssisCSVUrlLoader } from "@/loader/csv"
|
import { PageAssisCSVUrlLoader } from "@/loader/csv"
|
||||||
import { PageAssisTXTUrlLoader } from "@/loader/txt"
|
import { PageAssisTXTUrlLoader } from "@/loader/txt"
|
||||||
|
import { PageAssistDocxLoader } from "@/loader/docx"
|
||||||
|
|
||||||
|
const readAsArrayBuffer = (file: File): Promise<ArrayBuffer> => {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const reader = new FileReader()
|
||||||
|
reader.onload = () => {
|
||||||
|
resolve(reader.result as ArrayBuffer)
|
||||||
|
}
|
||||||
|
reader.onerror = reject
|
||||||
|
reader.readAsArrayBuffer(file)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
export const processKnowledge = async (msg: any, id: string): Promise<void> => {
|
export const processKnowledge = async (msg: any, id: string): Promise<void> => {
|
||||||
console.log(`Processing knowledge with id: ${id}`)
|
console.log(`Processing knowledge with id: ${id}`)
|
||||||
@ -58,6 +70,26 @@ export const processKnowledge = async (msg: any, id: string): Promise<void> => {
|
|||||||
knownledge_id: knowledge.id,
|
knownledge_id: knowledge.id,
|
||||||
file_id: doc.source_id
|
file_id: doc.source_id
|
||||||
})
|
})
|
||||||
|
} else if (doc.type === "docx" || doc.type === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
|
||||||
|
try {
|
||||||
|
const loader = new PageAssistDocxLoader({
|
||||||
|
fileName: doc.filename,
|
||||||
|
buffer: await toArrayBufferFromBase64(
|
||||||
|
doc.content
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
let docs = await loader.load()
|
||||||
|
|
||||||
|
const chunks = await textSplitter.splitDocuments(docs)
|
||||||
|
|
||||||
|
await PageAssistVectorStore.fromDocuments(chunks, ollamaEmbedding, {
|
||||||
|
knownledge_id: knowledge.id,
|
||||||
|
file_id: doc.source_id
|
||||||
|
})
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error processing knowledge with id: ${id}`, error)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
const loader = new PageAssisTXTUrlLoader({
|
const loader = new PageAssisTXTUrlLoader({
|
||||||
name: doc.filename,
|
name: doc.filename,
|
||||||
|
33
src/loader/docx.ts
Normal file
33
src/loader/docx.ts
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
import { BaseDocumentLoader } from "langchain/document_loaders/base"
|
||||||
|
import { Document } from "@langchain/core/documents"
|
||||||
|
import * as mammoth from "mammoth"
|
||||||
|
|
||||||
|
export interface WebLoaderParams {
|
||||||
|
fileName: string
|
||||||
|
buffer: ArrayBuffer
|
||||||
|
}
|
||||||
|
|
||||||
|
export class PageAssistDocxLoader
|
||||||
|
extends BaseDocumentLoader
|
||||||
|
implements WebLoaderParams {
|
||||||
|
fileName: string
|
||||||
|
buffer: ArrayBuffer
|
||||||
|
|
||||||
|
constructor({ fileName, buffer }: WebLoaderParams) {
|
||||||
|
super()
|
||||||
|
this.fileName = fileName
|
||||||
|
this.buffer = buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
public async load(): Promise<Document[]> {
|
||||||
|
const data = await mammoth.extractRawText({
|
||||||
|
arrayBuffer: this.buffer
|
||||||
|
})
|
||||||
|
const text = data.value
|
||||||
|
const meta = { source: this.fileName }
|
||||||
|
if (text) {
|
||||||
|
return [new Document({ pageContent: text, metadata: meta })]
|
||||||
|
}
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
}
|
@ -1,3 +1,8 @@
|
|||||||
|
import {
|
||||||
|
formatToChatHistory,
|
||||||
|
formatToMessage,
|
||||||
|
getRecentChatFromCopilot
|
||||||
|
} from "@/db"
|
||||||
import React from "react"
|
import React from "react"
|
||||||
import { SidePanelBody } from "~/components/Sidepanel/Chat/body"
|
import { SidePanelBody } from "~/components/Sidepanel/Chat/body"
|
||||||
import { SidepanelForm } from "~/components/Sidepanel/Chat/form"
|
import { SidepanelForm } from "~/components/Sidepanel/Chat/form"
|
||||||
@ -10,7 +15,20 @@ import { useMessage } from "~/hooks/useMessage"
|
|||||||
const [dropState, setDropState] = React.useState<
|
const [dropState, setDropState] = React.useState<
|
||||||
"idle" | "dragging" | "error"
|
"idle" | "dragging" | "error"
|
||||||
>("idle")
|
>("idle")
|
||||||
const {chatMode} = useMessage()
|
const { chatMode, messages, setHistory, setHistoryId, setMessages } =
|
||||||
|
useMessage()
|
||||||
|
|
||||||
|
const setRecentMessagesOnLoad = async () => {
|
||||||
|
if (messages.length === 0) {
|
||||||
|
const recentChat = await getRecentChatFromCopilot()
|
||||||
|
if (recentChat) {
|
||||||
|
setHistoryId(recentChat.history.id)
|
||||||
|
setHistory(formatToChatHistory(recentChat.messages))
|
||||||
|
setMessages(formatToMessage(recentChat.messages))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
React.useEffect(() => {
|
React.useEffect(() => {
|
||||||
if (!drop.current) {
|
if (!drop.current) {
|
||||||
return
|
return
|
||||||
@ -67,6 +85,7 @@ import { useMessage } from "~/hooks/useMessage"
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}, [])
|
}, [])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div
|
<div
|
||||||
ref={drop}
|
ref={drop}
|
||||||
|
@ -10,6 +10,7 @@ export const toBase64 = (file: File | Blob): Promise<string> => {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
export const toArrayBufferFromBase64 = async (base64: string) => {
|
export const toArrayBufferFromBase64 = async (base64: string) => {
|
||||||
const res = await fetch(base64)
|
const res = await fetch(base64)
|
||||||
const blob = await res.blob()
|
const blob = await res.blob()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user