feat: Add support for Mammoth library for docx file uploads

This commit is contained in:
n4ze3m
2024-05-24 18:26:28 +05:30
parent 845b725970
commit 8899a42331
13 changed files with 130 additions and 20 deletions

33
src/loader/docx.ts Normal file
View File

@@ -0,0 +1,33 @@
import { BaseDocumentLoader } from "langchain/document_loaders/base"
import { Document } from "@langchain/core/documents"
import * as mammoth from "mammoth"
export interface WebLoaderParams {
fileName: string
buffer: ArrayBuffer
}
export class PageAssistDocxLoader
extends BaseDocumentLoader
implements WebLoaderParams {
fileName: string
buffer: ArrayBuffer
constructor({ fileName, buffer }: WebLoaderParams) {
super()
this.fileName = fileName
this.buffer = buffer
}
public async load(): Promise<Document[]> {
const data = await mammoth.extractRawText({
arrayBuffer: this.buffer
})
const text = data.value
const meta = { source: this.fileName }
if (text) {
return [new Document({ pageContent: text, metadata: meta })]
}
return []
}
}