Add dependencies and update code for PDF parsing and searching

This commit is contained in:
n4ze3m
2024-02-25 00:12:46 +05:30
parent f87953ba5c
commit 06b32176a9
22 changed files with 577 additions and 249 deletions

View File

@@ -1,7 +1,14 @@
import { BaseDocumentLoader } from "langchain/document_loaders/base"
import { Document } from "@langchain/core/documents"
import { compile } from "html-to-text"
import { chromeRunTime } from "~libs/runtime"
const isPDFFetch = async (url: string) => {
await chromeRunTime(url)
const response = await fetch(url)
const blob = await response.blob()
return blob.type === "application/pdf"
}
export interface WebLoaderParams {
html: string
url: string