Update dependencies and fix import paths
This commit is contained in:
201
src/libs/PageAssistVectorStore.ts
Normal file
201
src/libs/PageAssistVectorStore.ts
Normal file
@@ -0,0 +1,201 @@
|
||||
import { similarity as ml_distance_similarity } from "ml-distance"
|
||||
import { VectorStore } from "@langchain/core/vectorstores"
|
||||
import type { EmbeddingsInterface } from "@langchain/core/embeddings"
|
||||
import { Document } from "@langchain/core/documents"
|
||||
import { getVector, insertVector } from "@/db/vector"
|
||||
|
||||
/**
|
||||
* Interface representing a vector in memory. It includes the content
|
||||
* (text), the corresponding embedding (vector), and any associated
|
||||
* metadata.
|
||||
*/
|
||||
interface PageAssistVector {
|
||||
content: string
|
||||
embedding: number[]
|
||||
metadata: Record<string, any>
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for the arguments that can be passed to the
|
||||
* `MemoryVectorStore` constructor. It includes an optional `similarity`
|
||||
* function.
|
||||
*/
|
||||
export interface MemoryVectorStoreArgs {
|
||||
knownledge_id: string
|
||||
file_id?: string
|
||||
similarity?: typeof ml_distance_similarity.cosine
|
||||
}
|
||||
|
||||
/**
|
||||
* Class that extends `VectorStore` to store vectors in memory. Provides
|
||||
* methods for adding documents, performing similarity searches, and
|
||||
* creating instances from texts, documents, or an existing index.
|
||||
*/
|
||||
export class PageAssistVectorStore extends VectorStore {
|
||||
declare FilterType: (doc: Document) => boolean
|
||||
|
||||
knownledge_id: string
|
||||
|
||||
file_id?: string
|
||||
|
||||
// memoryVectors: PageAssistVector[] = []
|
||||
|
||||
similarity: typeof ml_distance_similarity.cosine
|
||||
|
||||
_vectorstoreType(): string {
|
||||
return "memory"
|
||||
}
|
||||
|
||||
constructor(embeddings: EmbeddingsInterface, args: MemoryVectorStoreArgs) {
|
||||
super(embeddings, args)
|
||||
|
||||
this.similarity = args?.similarity ?? ml_distance_similarity.cosine
|
||||
|
||||
this.knownledge_id = args?.knownledge_id!
|
||||
|
||||
this.file_id = args?.file_id
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to add documents to the memory vector store. It extracts the
|
||||
* text from each document, generates embeddings for them, and adds the
|
||||
* resulting vectors to the store.
|
||||
* @param documents Array of `Document` instances to be added to the store.
|
||||
* @returns Promise that resolves when all documents have been added.
|
||||
*/
|
||||
async addDocuments(documents: Document[]): Promise<void> {
|
||||
const texts = documents.map(({ pageContent }) => pageContent)
|
||||
return this.addVectors(
|
||||
await this.embeddings.embedDocuments(texts),
|
||||
documents
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to add vectors to the memory vector store. It creates
|
||||
* `PageAssistVector` instances for each vector and document pair and adds
|
||||
* them to the store.
|
||||
* @param vectors Array of vectors to be added to the store.
|
||||
* @param documents Array of `Document` instances corresponding to the vectors.
|
||||
* @returns Promise that resolves when all vectors have been added.
|
||||
*/
|
||||
async addVectors(vectors: number[][], documents: Document[]): Promise<void> {
|
||||
const memoryVectors = vectors.map((embedding, idx) => ({
|
||||
content: documents[idx].pageContent,
|
||||
embedding,
|
||||
metadata: documents[idx].metadata,
|
||||
file_id: this.file_id
|
||||
}))
|
||||
console.log(`vector:${this.knownledge_id}`)
|
||||
await insertVector(`vector:${this.knownledge_id}`, memoryVectors)
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to perform a similarity search in the memory vector store. It
|
||||
* calculates the similarity between the query vector and each vector in
|
||||
* the store, sorts the results by similarity, and returns the top `k`
|
||||
* results along with their scores.
|
||||
* @param query Query vector to compare against the vectors in the store.
|
||||
* @param k Number of top results to return.
|
||||
* @param filter Optional filter function to apply to the vectors before performing the search.
|
||||
* @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score.
|
||||
*/
|
||||
async similaritySearchVectorWithScore(
|
||||
query: number[],
|
||||
k: number,
|
||||
filter?: this["FilterType"]
|
||||
): Promise<[Document, number][]> {
|
||||
const filterFunction = (memoryVector: PageAssistVector) => {
|
||||
if (!filter) {
|
||||
return true
|
||||
}
|
||||
|
||||
const doc = new Document({
|
||||
metadata: memoryVector.metadata,
|
||||
pageContent: memoryVector.content
|
||||
})
|
||||
return filter(doc)
|
||||
}
|
||||
const pgVector = await getVector(`vector:${this.knownledge_id}`)
|
||||
const filteredMemoryVectors = pgVector.vectors.filter(filterFunction)
|
||||
const searches = filteredMemoryVectors
|
||||
.map((vector, index) => ({
|
||||
similarity: this.similarity(query, vector.embedding),
|
||||
index
|
||||
}))
|
||||
.sort((a, b) => (a.similarity > b.similarity ? -1 : 0))
|
||||
.slice(0, k)
|
||||
|
||||
const result: [Document, number][] = searches.map((search) => [
|
||||
new Document({
|
||||
metadata: filteredMemoryVectors[search.index].metadata,
|
||||
pageContent: filteredMemoryVectors[search.index].content
|
||||
}),
|
||||
search.similarity
|
||||
])
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Static method to create a `MemoryVectorStore` instance from an array of
|
||||
* texts. It creates a `Document` for each text and metadata pair, and
|
||||
* adds them to the store.
|
||||
* @param texts Array of texts to be added to the store.
|
||||
* @param metadatas Array or single object of metadata corresponding to the texts.
|
||||
* @param embeddings `Embeddings` instance used to generate embeddings for the texts.
|
||||
* @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
|
||||
* @returns Promise that resolves with a new `MemoryVectorStore` instance.
|
||||
*/
|
||||
static async fromTexts(
|
||||
texts: string[],
|
||||
metadatas: object[] | object,
|
||||
embeddings: EmbeddingsInterface,
|
||||
dbConfig?: MemoryVectorStoreArgs
|
||||
): Promise<PageAssistVectorStore> {
|
||||
const docs: Document[] = []
|
||||
for (let i = 0; i < texts.length; i += 1) {
|
||||
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas
|
||||
const newDoc = new Document({
|
||||
pageContent: texts[i],
|
||||
metadata
|
||||
})
|
||||
docs.push(newDoc)
|
||||
}
|
||||
return PageAssistVectorStore.fromDocuments(docs, embeddings, dbConfig)
|
||||
}
|
||||
|
||||
/**
|
||||
* Static method to create a `MemoryVectorStore` instance from an array of
|
||||
* `Document` instances. It adds the documents to the store.
|
||||
* @param docs Array of `Document` instances to be added to the store.
|
||||
* @param embeddings `Embeddings` instance used to generate embeddings for the documents.
|
||||
* @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
|
||||
* @returns Promise that resolves with a new `MemoryVectorStore` instance.
|
||||
*/
|
||||
static async fromDocuments(
|
||||
docs: Document[],
|
||||
embeddings: EmbeddingsInterface,
|
||||
dbConfig?: MemoryVectorStoreArgs
|
||||
): Promise<PageAssistVectorStore> {
|
||||
const instance = new this(embeddings, dbConfig)
|
||||
await instance.addDocuments(docs)
|
||||
return instance
|
||||
}
|
||||
|
||||
/**
|
||||
* Static method to create a `MemoryVectorStore` instance from an existing
|
||||
* index. It creates a new `MemoryVectorStore` instance without adding any
|
||||
* documents or vectors.
|
||||
* @param embeddings `Embeddings` instance used to generate embeddings for the documents.
|
||||
* @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
|
||||
* @returns Promise that resolves with a new `MemoryVectorStore` instance.
|
||||
*/
|
||||
static async fromExistingIndex(
|
||||
embeddings: EmbeddingsInterface,
|
||||
dbConfig?: MemoryVectorStoreArgs
|
||||
): Promise<PageAssistVectorStore> {
|
||||
const instance = new this(embeddings, dbConfig)
|
||||
return instance
|
||||
}
|
||||
}
|
||||
419
src/libs/db.ts
419
src/libs/db.ts
@@ -1,419 +0,0 @@
|
||||
import {
|
||||
type ChatHistory as ChatHistoryType,
|
||||
type Message as MessageType
|
||||
} from "~/store/option"
|
||||
|
||||
type HistoryInfo = {
|
||||
id: string
|
||||
title: string
|
||||
is_rag: boolean
|
||||
createdAt: number
|
||||
}
|
||||
|
||||
type WebSearch = {
|
||||
search_engine: string
|
||||
search_url: string
|
||||
search_query: string
|
||||
search_results: {
|
||||
title: string
|
||||
link: string
|
||||
}[]
|
||||
}
|
||||
|
||||
type Message = {
|
||||
id: string
|
||||
history_id: string
|
||||
name: string
|
||||
role: string
|
||||
content: string
|
||||
images?: string[]
|
||||
sources?: string[]
|
||||
search?: WebSearch
|
||||
createdAt: number
|
||||
}
|
||||
|
||||
type Webshare = {
|
||||
id: string
|
||||
title: string
|
||||
url: string
|
||||
api_url: string
|
||||
share_id: string
|
||||
createdAt: number
|
||||
}
|
||||
|
||||
type Prompt = {
|
||||
id: string
|
||||
title: string
|
||||
content: string
|
||||
is_system: boolean
|
||||
createdBy?: string
|
||||
createdAt: number
|
||||
}
|
||||
|
||||
type MessageHistory = Message[]
|
||||
|
||||
type ChatHistory = HistoryInfo[]
|
||||
|
||||
type Prompts = Prompt[]
|
||||
|
||||
export class PageAssitDatabase {
|
||||
db: chrome.storage.StorageArea
|
||||
|
||||
constructor() {
|
||||
this.db = chrome.storage.local
|
||||
}
|
||||
|
||||
async getChatHistory(id: string): Promise<MessageHistory> {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get(id, (result) => {
|
||||
resolve(result[id] || [])
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async getChatHistories(): Promise<ChatHistory> {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get("chatHistories", (result) => {
|
||||
resolve(result.chatHistories || [])
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async addChatHistory(history: HistoryInfo) {
|
||||
const chatHistories = await this.getChatHistories()
|
||||
const newChatHistories = [history, ...chatHistories]
|
||||
this.db.set({ chatHistories: newChatHistories })
|
||||
}
|
||||
|
||||
async addMessage(message: Message) {
|
||||
const history_id = message.history_id
|
||||
const chatHistory = await this.getChatHistory(history_id)
|
||||
const newChatHistory = [message, ...chatHistory]
|
||||
this.db.set({ [history_id]: newChatHistory })
|
||||
}
|
||||
|
||||
async removeChatHistory(id: string) {
|
||||
const chatHistories = await this.getChatHistories()
|
||||
const newChatHistories = chatHistories.filter(
|
||||
(history) => history.id !== id
|
||||
)
|
||||
this.db.set({ chatHistories: newChatHistories })
|
||||
}
|
||||
|
||||
async removeMessage(history_id: string, message_id: string) {
|
||||
const chatHistory = await this.getChatHistory(history_id)
|
||||
const newChatHistory = chatHistory.filter(
|
||||
(message) => message.id !== message_id
|
||||
)
|
||||
this.db.set({ [history_id]: newChatHistory })
|
||||
}
|
||||
|
||||
async clear() {
|
||||
this.db.clear()
|
||||
}
|
||||
|
||||
async deleteChatHistory() {
|
||||
const chatHistories = await this.getChatHistories()
|
||||
for (const history of chatHistories) {
|
||||
this.db.remove(history.id)
|
||||
}
|
||||
this.db.remove("chatHistories")
|
||||
}
|
||||
|
||||
async deleteMessage(history_id: string) {
|
||||
await this.db.remove(history_id)
|
||||
}
|
||||
|
||||
async getAllPrompts(): Promise<Prompts> {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get("prompts", (result) => {
|
||||
resolve(result.prompts || [])
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async addPrompt(prompt: Prompt) {
|
||||
const prompts = await this.getAllPrompts()
|
||||
const newPrompts = [prompt, ...prompts]
|
||||
this.db.set({ prompts: newPrompts })
|
||||
}
|
||||
|
||||
async deletePrompt(id: string) {
|
||||
const prompts = await this.getAllPrompts()
|
||||
const newPrompts = prompts.filter((prompt) => prompt.id !== id)
|
||||
this.db.set({ prompts: newPrompts })
|
||||
}
|
||||
|
||||
async updatePrompt(
|
||||
id: string,
|
||||
title: string,
|
||||
content: string,
|
||||
is_system: boolean
|
||||
) {
|
||||
const prompts = await this.getAllPrompts()
|
||||
const newPrompts = prompts.map((prompt) => {
|
||||
if (prompt.id === id) {
|
||||
prompt.title = title
|
||||
prompt.content = content
|
||||
prompt.is_system = is_system
|
||||
}
|
||||
return prompt
|
||||
})
|
||||
this.db.set({ prompts: newPrompts })
|
||||
}
|
||||
|
||||
async getPromptById(id: string) {
|
||||
const prompts = await this.getAllPrompts()
|
||||
return prompts.find((prompt) => prompt.id === id)
|
||||
}
|
||||
|
||||
async getWebshare(id: string) {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get(id, (result) => {
|
||||
resolve(result[id] || [])
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async getAllWebshares(): Promise<Webshare[]> {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get("webshares", (result) => {
|
||||
resolve(result.webshares || [])
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async addWebshare(webshare: Webshare) {
|
||||
const webshares = await this.getAllWebshares()
|
||||
const newWebshares = [webshare, ...webshares]
|
||||
this.db.set({ webshares: newWebshares })
|
||||
}
|
||||
|
||||
async deleteWebshare(id: string) {
|
||||
const webshares = await this.getAllWebshares()
|
||||
const newWebshares = webshares.filter((webshare) => webshare.id !== id)
|
||||
this.db.set({ webshares: newWebshares })
|
||||
}
|
||||
|
||||
async getUserID() {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.get("user_id", (result) => {
|
||||
resolve(result.user_id || "")
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async setUserID(id: string) {
|
||||
this.db.set({ user_id: id })
|
||||
}
|
||||
}
|
||||
|
||||
export const generateID = () => {
|
||||
return "pa_xxxx-xxxx-xxx-xxxx".replace(/[x]/g, () => {
|
||||
const r = Math.floor(Math.random() * 16)
|
||||
return r.toString(16)
|
||||
})
|
||||
}
|
||||
|
||||
export const saveHistory = async (title: string, is_rag?: boolean) => {
|
||||
const id = generateID()
|
||||
const createdAt = Date.now()
|
||||
const history = { id, title, createdAt, is_rag }
|
||||
const db = new PageAssitDatabase()
|
||||
await db.addChatHistory(history)
|
||||
return history
|
||||
}
|
||||
|
||||
export const saveMessage = async (
|
||||
history_id: string,
|
||||
name: string,
|
||||
role: string,
|
||||
content: string,
|
||||
images: string[],
|
||||
source?: any[],
|
||||
time?: number
|
||||
) => {
|
||||
const id = generateID()
|
||||
let createdAt = Date.now()
|
||||
if (time) {
|
||||
createdAt += time
|
||||
}
|
||||
const message = {
|
||||
id,
|
||||
history_id,
|
||||
name,
|
||||
role,
|
||||
content,
|
||||
images,
|
||||
createdAt,
|
||||
sources: source
|
||||
}
|
||||
const db = new PageAssitDatabase()
|
||||
await db.addMessage(message)
|
||||
return message
|
||||
}
|
||||
|
||||
export const formatToChatHistory = (
|
||||
messages: MessageHistory
|
||||
): ChatHistoryType => {
|
||||
messages.sort((a, b) => a.createdAt - b.createdAt)
|
||||
return messages.map((message) => {
|
||||
return {
|
||||
content: message.content,
|
||||
role: message.role as "user" | "assistant" | "system",
|
||||
images: message.images
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export const formatToMessage = (messages: MessageHistory): MessageType[] => {
|
||||
messages.sort((a, b) => a.createdAt - b.createdAt)
|
||||
return messages.map((message) => {
|
||||
return {
|
||||
isBot: message.role === "assistant",
|
||||
message: message.content,
|
||||
name: message.name,
|
||||
sources: message?.sources || [],
|
||||
images: message.images || []
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export const deleteByHistoryId = async (history_id: string) => {
|
||||
const db = new PageAssitDatabase()
|
||||
await db.deleteMessage(history_id)
|
||||
await db.removeChatHistory(history_id)
|
||||
return history_id
|
||||
}
|
||||
|
||||
export const updateHistory = async (id: string, title: string) => {
|
||||
const db = new PageAssitDatabase()
|
||||
const chatHistories = await db.getChatHistories()
|
||||
const newChatHistories = chatHistories.map((history) => {
|
||||
if (history.id === id) {
|
||||
history.title = title
|
||||
}
|
||||
return history
|
||||
})
|
||||
db.db.set({ chatHistories: newChatHistories })
|
||||
}
|
||||
|
||||
export const removeMessageUsingHistoryId = async (history_id: string) => {
|
||||
const db = new PageAssitDatabase()
|
||||
const chatHistory = await db.getChatHistory(history_id)
|
||||
chatHistory.shift()
|
||||
await db.db.set({ [history_id]: chatHistory })
|
||||
}
|
||||
|
||||
export const getAllPrompts = async () => {
|
||||
const db = new PageAssitDatabase()
|
||||
return await db.getAllPrompts()
|
||||
}
|
||||
|
||||
export const updateMessageByIndex = async (
|
||||
history_id: string,
|
||||
index: number,
|
||||
message: string
|
||||
) => {
|
||||
const db = new PageAssitDatabase()
|
||||
const chatHistory = (await db.getChatHistory(history_id)).reverse()
|
||||
chatHistory[index].content = message
|
||||
await db.db.set({ [history_id]: chatHistory.reverse() })
|
||||
}
|
||||
|
||||
export const deleteChatForEdit = async (history_id: string, index: number) => {
|
||||
const db = new PageAssitDatabase()
|
||||
const chatHistory = (await db.getChatHistory(history_id)).reverse()
|
||||
const previousHistory = chatHistory.slice(0, index + 1)
|
||||
// console.log(previousHistory)
|
||||
await db.db.set({ [history_id]: previousHistory.reverse() })
|
||||
}
|
||||
|
||||
export const savePrompt = async ({
|
||||
content,
|
||||
title,
|
||||
is_system = false
|
||||
}: {
|
||||
title: string
|
||||
content: string
|
||||
is_system: boolean
|
||||
}) => {
|
||||
const db = new PageAssitDatabase()
|
||||
const id = generateID()
|
||||
const createdAt = Date.now()
|
||||
const prompt = { id, title, content, is_system, createdAt }
|
||||
await db.addPrompt(prompt)
|
||||
return prompt
|
||||
}
|
||||
|
||||
export const deletePromptById = async (id: string) => {
|
||||
const db = new PageAssitDatabase()
|
||||
await db.deletePrompt(id)
|
||||
return id
|
||||
}
|
||||
|
||||
export const updatePrompt = async ({
|
||||
content,
|
||||
id,
|
||||
title,
|
||||
is_system
|
||||
}: {
|
||||
id: string
|
||||
title: string
|
||||
content: string
|
||||
is_system: boolean
|
||||
}) => {
|
||||
const db = new PageAssitDatabase()
|
||||
await db.updatePrompt(id, title, content, is_system)
|
||||
return id
|
||||
}
|
||||
|
||||
export const getPromptById = async (id: string) => {
|
||||
if (!id || id.trim() === "") return null
|
||||
const db = new PageAssitDatabase()
|
||||
return await db.getPromptById(id)
|
||||
}
|
||||
|
||||
export const getAllWebshares = async () => {
|
||||
const db = new PageAssitDatabase()
|
||||
return await db.getAllWebshares()
|
||||
}
|
||||
|
||||
export const deleteWebshare = async (id: string) => {
|
||||
const db = new PageAssitDatabase()
|
||||
await db.deleteWebshare(id)
|
||||
return id
|
||||
}
|
||||
|
||||
export const saveWebshare = async ({
|
||||
title,
|
||||
url,
|
||||
api_url,
|
||||
share_id
|
||||
}: {
|
||||
title: string
|
||||
url: string
|
||||
api_url: string
|
||||
share_id: string
|
||||
}) => {
|
||||
const db = new PageAssitDatabase()
|
||||
const id = generateID()
|
||||
const createdAt = Date.now()
|
||||
const webshare = { id, title, url, share_id, createdAt, api_url }
|
||||
await db.addWebshare(webshare)
|
||||
return webshare
|
||||
}
|
||||
|
||||
export const getUserId = async () => {
|
||||
const db = new PageAssitDatabase()
|
||||
const id = (await db.getUserID()) as string
|
||||
if (!id || id?.trim() === "") {
|
||||
const user_id = "user_xxxx-xxxx-xxx-xxxx-xxxx".replace(/[x]/g, () => {
|
||||
const r = Math.floor(Math.random() * 16)
|
||||
return r.toString(16)
|
||||
})
|
||||
db.setUserID(user_id)
|
||||
return user_id
|
||||
}
|
||||
return id
|
||||
}
|
||||
@@ -1,28 +1,4 @@
|
||||
import { pdfDist } from "./pdfjs"
|
||||
|
||||
export const getPdf = async (data: ArrayBuffer) => {
|
||||
const pdf = pdfDist.getDocument({
|
||||
data,
|
||||
useWorkerFetch: false,
|
||||
isEvalSupported: false,
|
||||
useSystemFonts: true,
|
||||
});
|
||||
|
||||
pdf.onPassword = (callback: any) => {
|
||||
const password = prompt("Enter the password: ")
|
||||
if (!password) {
|
||||
throw new Error("Password required to open the PDF.");
|
||||
}
|
||||
callback(password);
|
||||
};
|
||||
|
||||
|
||||
const pdfDocument = await pdf.promise;
|
||||
|
||||
|
||||
return pdfDocument
|
||||
|
||||
}
|
||||
import { getPdf } from "./pdf"
|
||||
|
||||
const _getHtml = async () => {
|
||||
const url = window.location.href
|
||||
|
||||
29
src/libs/pdf.ts
Normal file
29
src/libs/pdf.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import { pdfDist } from "./pdfjs"
|
||||
|
||||
export const getPdf = async (data: ArrayBuffer) => {
|
||||
const pdf = pdfDist.getDocument({
|
||||
data,
|
||||
useWorkerFetch: false,
|
||||
isEvalSupported: false,
|
||||
useSystemFonts: true
|
||||
})
|
||||
|
||||
pdf.onPassword = (callback: any) => {
|
||||
const password = prompt("Enter the password: ")
|
||||
if (!password) {
|
||||
throw new Error("Password required to open the PDF.")
|
||||
}
|
||||
callback(password)
|
||||
}
|
||||
|
||||
const pdfDocument = await pdf.promise
|
||||
|
||||
return pdfDocument
|
||||
}
|
||||
|
||||
export const processPdf = async (base64: string) => {
|
||||
const res = await fetch(base64)
|
||||
const data = await res.arrayBuffer()
|
||||
const pdf = await getPdf(data)
|
||||
return pdf
|
||||
}
|
||||
55
src/libs/process-knowledge.ts
Normal file
55
src/libs/process-knowledge.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import { getKnowledgeById, updateKnowledgeStatus } from "@/db/knowledge"
|
||||
import { PageAssistPDFUrlLoader } from "@/loader/pdf-url"
|
||||
import {
|
||||
defaultEmbeddingChunkOverlap,
|
||||
defaultEmbeddingChunkSize
|
||||
} from "@/services/ollama"
|
||||
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama"
|
||||
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
||||
import { PageAssistVectorStore } from "./PageAssistVectorStore"
|
||||
|
||||
export const processKnowledge = async (msg: any, id: string): Promise<void> => {
|
||||
console.log(`Processing knowledge with id: ${id}`)
|
||||
try {
|
||||
const knowledge = await getKnowledgeById(id)
|
||||
|
||||
if (!knowledge) {
|
||||
console.error(`Knowledge with id ${id} not found`)
|
||||
return
|
||||
}
|
||||
|
||||
await updateKnowledgeStatus(id, "processing")
|
||||
|
||||
const ollamaEmbedding = new OllamaEmbeddings({
|
||||
model: knowledge.embedding_model
|
||||
})
|
||||
const chunkSize = await defaultEmbeddingChunkSize()
|
||||
const chunkOverlap = await defaultEmbeddingChunkOverlap()
|
||||
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize,
|
||||
chunkOverlap
|
||||
})
|
||||
|
||||
for (const doc of knowledge.source) {
|
||||
if (doc.type === "pdf" || doc.type === "application/pdf") {
|
||||
const loader = new PageAssistPDFUrlLoader({
|
||||
name: doc.filename,
|
||||
url: doc.content
|
||||
})
|
||||
let docs = await loader.load()
|
||||
const chunks = await textSplitter.splitDocuments(docs)
|
||||
await PageAssistVectorStore.fromDocuments(chunks, ollamaEmbedding, {
|
||||
knownledge_id: knowledge.id,
|
||||
file_id: doc.source_id
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
await updateKnowledgeStatus(id, "finished")
|
||||
} catch (error) {
|
||||
console.error(`Error processing knowledge with id: ${id}`, error)
|
||||
await updateKnowledgeStatus(id, "failed")
|
||||
} finally {
|
||||
console.log(`Finished processing knowledge with id: ${id}`)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user