feat: Add support for Mammoth library for docx file uploads
This commit is contained in:
@@ -9,6 +9,18 @@ import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
||||
import { PageAssistVectorStore } from "./PageAssistVectorStore"
|
||||
import { PageAssisCSVUrlLoader } from "@/loader/csv"
|
||||
import { PageAssisTXTUrlLoader } from "@/loader/txt"
|
||||
import { PageAssistDocxLoader } from "@/loader/docx"
|
||||
|
||||
const readAsArrayBuffer = (file: File): Promise<ArrayBuffer> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader()
|
||||
reader.onload = () => {
|
||||
resolve(reader.result as ArrayBuffer)
|
||||
}
|
||||
reader.onerror = reject
|
||||
reader.readAsArrayBuffer(file)
|
||||
})
|
||||
}
|
||||
|
||||
export const processKnowledge = async (msg: any, id: string): Promise<void> => {
|
||||
console.log(`Processing knowledge with id: ${id}`)
|
||||
@@ -58,6 +70,26 @@ export const processKnowledge = async (msg: any, id: string): Promise<void> => {
|
||||
knownledge_id: knowledge.id,
|
||||
file_id: doc.source_id
|
||||
})
|
||||
} else if (doc.type === "docx" || doc.type === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
|
||||
try {
|
||||
const loader = new PageAssistDocxLoader({
|
||||
fileName: doc.filename,
|
||||
buffer: await toArrayBufferFromBase64(
|
||||
doc.content
|
||||
)
|
||||
})
|
||||
|
||||
let docs = await loader.load()
|
||||
|
||||
const chunks = await textSplitter.splitDocuments(docs)
|
||||
|
||||
await PageAssistVectorStore.fromDocuments(chunks, ollamaEmbedding, {
|
||||
knownledge_id: knowledge.id,
|
||||
file_id: doc.source_id
|
||||
})
|
||||
} catch (error) {
|
||||
console.error(`Error processing knowledge with id: ${id}`, error)
|
||||
}
|
||||
} else {
|
||||
const loader = new PageAssisTXTUrlLoader({
|
||||
name: doc.filename,
|
||||
|
||||
Reference in New Issue
Block a user