Update dependencies and fix import paths

This commit is contained in:
n4ze3m
2024-04-05 20:28:29 +05:30
parent d91d4c4761
commit ac347a3970
43 changed files with 1142 additions and 99 deletions

View File

@@ -0,0 +1,201 @@
import { similarity as ml_distance_similarity } from "ml-distance"
import { VectorStore } from "@langchain/core/vectorstores"
import type { EmbeddingsInterface } from "@langchain/core/embeddings"
import { Document } from "@langchain/core/documents"
import { getVector, insertVector } from "@/db/vector"
/**
* Interface representing a vector in memory. It includes the content
* (text), the corresponding embedding (vector), and any associated
* metadata.
*/
interface PageAssistVector {
content: string
embedding: number[]
metadata: Record<string, any>
}
/**
* Interface for the arguments that can be passed to the
* `MemoryVectorStore` constructor. It includes an optional `similarity`
* function.
*/
export interface MemoryVectorStoreArgs {
knownledge_id: string
file_id?: string
similarity?: typeof ml_distance_similarity.cosine
}
/**
* Class that extends `VectorStore` to store vectors in memory. Provides
* methods for adding documents, performing similarity searches, and
* creating instances from texts, documents, or an existing index.
*/
export class PageAssistVectorStore extends VectorStore {
declare FilterType: (doc: Document) => boolean
knownledge_id: string
file_id?: string
// memoryVectors: PageAssistVector[] = []
similarity: typeof ml_distance_similarity.cosine
_vectorstoreType(): string {
return "memory"
}
constructor(embeddings: EmbeddingsInterface, args: MemoryVectorStoreArgs) {
super(embeddings, args)
this.similarity = args?.similarity ?? ml_distance_similarity.cosine
this.knownledge_id = args?.knownledge_id!
this.file_id = args?.file_id
}
/**
* Method to add documents to the memory vector store. It extracts the
* text from each document, generates embeddings for them, and adds the
* resulting vectors to the store.
* @param documents Array of `Document` instances to be added to the store.
* @returns Promise that resolves when all documents have been added.
*/
async addDocuments(documents: Document[]): Promise<void> {
const texts = documents.map(({ pageContent }) => pageContent)
return this.addVectors(
await this.embeddings.embedDocuments(texts),
documents
)
}
/**
* Method to add vectors to the memory vector store. It creates
* `PageAssistVector` instances for each vector and document pair and adds
* them to the store.
* @param vectors Array of vectors to be added to the store.
* @param documents Array of `Document` instances corresponding to the vectors.
* @returns Promise that resolves when all vectors have been added.
*/
async addVectors(vectors: number[][], documents: Document[]): Promise<void> {
const memoryVectors = vectors.map((embedding, idx) => ({
content: documents[idx].pageContent,
embedding,
metadata: documents[idx].metadata,
file_id: this.file_id
}))
console.log(`vector:${this.knownledge_id}`)
await insertVector(`vector:${this.knownledge_id}`, memoryVectors)
}
/**
* Method to perform a similarity search in the memory vector store. It
* calculates the similarity between the query vector and each vector in
* the store, sorts the results by similarity, and returns the top `k`
* results along with their scores.
* @param query Query vector to compare against the vectors in the store.
* @param k Number of top results to return.
* @param filter Optional filter function to apply to the vectors before performing the search.
* @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score.
*/
async similaritySearchVectorWithScore(
query: number[],
k: number,
filter?: this["FilterType"]
): Promise<[Document, number][]> {
const filterFunction = (memoryVector: PageAssistVector) => {
if (!filter) {
return true
}
const doc = new Document({
metadata: memoryVector.metadata,
pageContent: memoryVector.content
})
return filter(doc)
}
const pgVector = await getVector(`vector:${this.knownledge_id}`)
const filteredMemoryVectors = pgVector.vectors.filter(filterFunction)
const searches = filteredMemoryVectors
.map((vector, index) => ({
similarity: this.similarity(query, vector.embedding),
index
}))
.sort((a, b) => (a.similarity > b.similarity ? -1 : 0))
.slice(0, k)
const result: [Document, number][] = searches.map((search) => [
new Document({
metadata: filteredMemoryVectors[search.index].metadata,
pageContent: filteredMemoryVectors[search.index].content
}),
search.similarity
])
return result
}
/**
* Static method to create a `MemoryVectorStore` instance from an array of
* texts. It creates a `Document` for each text and metadata pair, and
* adds them to the store.
* @param texts Array of texts to be added to the store.
* @param metadatas Array or single object of metadata corresponding to the texts.
* @param embeddings `Embeddings` instance used to generate embeddings for the texts.
* @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
* @returns Promise that resolves with a new `MemoryVectorStore` instance.
*/
static async fromTexts(
texts: string[],
metadatas: object[] | object,
embeddings: EmbeddingsInterface,
dbConfig?: MemoryVectorStoreArgs
): Promise<PageAssistVectorStore> {
const docs: Document[] = []
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas
const newDoc = new Document({
pageContent: texts[i],
metadata
})
docs.push(newDoc)
}
return PageAssistVectorStore.fromDocuments(docs, embeddings, dbConfig)
}
/**
* Static method to create a `MemoryVectorStore` instance from an array of
* `Document` instances. It adds the documents to the store.
* @param docs Array of `Document` instances to be added to the store.
* @param embeddings `Embeddings` instance used to generate embeddings for the documents.
* @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
* @returns Promise that resolves with a new `MemoryVectorStore` instance.
*/
static async fromDocuments(
docs: Document[],
embeddings: EmbeddingsInterface,
dbConfig?: MemoryVectorStoreArgs
): Promise<PageAssistVectorStore> {
const instance = new this(embeddings, dbConfig)
await instance.addDocuments(docs)
return instance
}
/**
* Static method to create a `MemoryVectorStore` instance from an existing
* index. It creates a new `MemoryVectorStore` instance without adding any
* documents or vectors.
* @param embeddings `Embeddings` instance used to generate embeddings for the documents.
* @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
* @returns Promise that resolves with a new `MemoryVectorStore` instance.
*/
static async fromExistingIndex(
embeddings: EmbeddingsInterface,
dbConfig?: MemoryVectorStoreArgs
): Promise<PageAssistVectorStore> {
const instance = new this(embeddings, dbConfig)
return instance
}
}

View File

@@ -1,419 +0,0 @@
import {
type ChatHistory as ChatHistoryType,
type Message as MessageType
} from "~/store/option"
type HistoryInfo = {
id: string
title: string
is_rag: boolean
createdAt: number
}
type WebSearch = {
search_engine: string
search_url: string
search_query: string
search_results: {
title: string
link: string
}[]
}
type Message = {
id: string
history_id: string
name: string
role: string
content: string
images?: string[]
sources?: string[]
search?: WebSearch
createdAt: number
}
type Webshare = {
id: string
title: string
url: string
api_url: string
share_id: string
createdAt: number
}
type Prompt = {
id: string
title: string
content: string
is_system: boolean
createdBy?: string
createdAt: number
}
type MessageHistory = Message[]
type ChatHistory = HistoryInfo[]
type Prompts = Prompt[]
export class PageAssitDatabase {
db: chrome.storage.StorageArea
constructor() {
this.db = chrome.storage.local
}
async getChatHistory(id: string): Promise<MessageHistory> {
return new Promise((resolve, reject) => {
this.db.get(id, (result) => {
resolve(result[id] || [])
})
})
}
async getChatHistories(): Promise<ChatHistory> {
return new Promise((resolve, reject) => {
this.db.get("chatHistories", (result) => {
resolve(result.chatHistories || [])
})
})
}
async addChatHistory(history: HistoryInfo) {
const chatHistories = await this.getChatHistories()
const newChatHistories = [history, ...chatHistories]
this.db.set({ chatHistories: newChatHistories })
}
async addMessage(message: Message) {
const history_id = message.history_id
const chatHistory = await this.getChatHistory(history_id)
const newChatHistory = [message, ...chatHistory]
this.db.set({ [history_id]: newChatHistory })
}
async removeChatHistory(id: string) {
const chatHistories = await this.getChatHistories()
const newChatHistories = chatHistories.filter(
(history) => history.id !== id
)
this.db.set({ chatHistories: newChatHistories })
}
async removeMessage(history_id: string, message_id: string) {
const chatHistory = await this.getChatHistory(history_id)
const newChatHistory = chatHistory.filter(
(message) => message.id !== message_id
)
this.db.set({ [history_id]: newChatHistory })
}
async clear() {
this.db.clear()
}
async deleteChatHistory() {
const chatHistories = await this.getChatHistories()
for (const history of chatHistories) {
this.db.remove(history.id)
}
this.db.remove("chatHistories")
}
async deleteMessage(history_id: string) {
await this.db.remove(history_id)
}
async getAllPrompts(): Promise<Prompts> {
return new Promise((resolve, reject) => {
this.db.get("prompts", (result) => {
resolve(result.prompts || [])
})
})
}
async addPrompt(prompt: Prompt) {
const prompts = await this.getAllPrompts()
const newPrompts = [prompt, ...prompts]
this.db.set({ prompts: newPrompts })
}
async deletePrompt(id: string) {
const prompts = await this.getAllPrompts()
const newPrompts = prompts.filter((prompt) => prompt.id !== id)
this.db.set({ prompts: newPrompts })
}
async updatePrompt(
id: string,
title: string,
content: string,
is_system: boolean
) {
const prompts = await this.getAllPrompts()
const newPrompts = prompts.map((prompt) => {
if (prompt.id === id) {
prompt.title = title
prompt.content = content
prompt.is_system = is_system
}
return prompt
})
this.db.set({ prompts: newPrompts })
}
async getPromptById(id: string) {
const prompts = await this.getAllPrompts()
return prompts.find((prompt) => prompt.id === id)
}
async getWebshare(id: string) {
return new Promise((resolve, reject) => {
this.db.get(id, (result) => {
resolve(result[id] || [])
})
})
}
async getAllWebshares(): Promise<Webshare[]> {
return new Promise((resolve, reject) => {
this.db.get("webshares", (result) => {
resolve(result.webshares || [])
})
})
}
async addWebshare(webshare: Webshare) {
const webshares = await this.getAllWebshares()
const newWebshares = [webshare, ...webshares]
this.db.set({ webshares: newWebshares })
}
async deleteWebshare(id: string) {
const webshares = await this.getAllWebshares()
const newWebshares = webshares.filter((webshare) => webshare.id !== id)
this.db.set({ webshares: newWebshares })
}
async getUserID() {
return new Promise((resolve, reject) => {
this.db.get("user_id", (result) => {
resolve(result.user_id || "")
})
})
}
async setUserID(id: string) {
this.db.set({ user_id: id })
}
}
export const generateID = () => {
return "pa_xxxx-xxxx-xxx-xxxx".replace(/[x]/g, () => {
const r = Math.floor(Math.random() * 16)
return r.toString(16)
})
}
export const saveHistory = async (title: string, is_rag?: boolean) => {
const id = generateID()
const createdAt = Date.now()
const history = { id, title, createdAt, is_rag }
const db = new PageAssitDatabase()
await db.addChatHistory(history)
return history
}
export const saveMessage = async (
history_id: string,
name: string,
role: string,
content: string,
images: string[],
source?: any[],
time?: number
) => {
const id = generateID()
let createdAt = Date.now()
if (time) {
createdAt += time
}
const message = {
id,
history_id,
name,
role,
content,
images,
createdAt,
sources: source
}
const db = new PageAssitDatabase()
await db.addMessage(message)
return message
}
export const formatToChatHistory = (
messages: MessageHistory
): ChatHistoryType => {
messages.sort((a, b) => a.createdAt - b.createdAt)
return messages.map((message) => {
return {
content: message.content,
role: message.role as "user" | "assistant" | "system",
images: message.images
}
})
}
export const formatToMessage = (messages: MessageHistory): MessageType[] => {
messages.sort((a, b) => a.createdAt - b.createdAt)
return messages.map((message) => {
return {
isBot: message.role === "assistant",
message: message.content,
name: message.name,
sources: message?.sources || [],
images: message.images || []
}
})
}
export const deleteByHistoryId = async (history_id: string) => {
const db = new PageAssitDatabase()
await db.deleteMessage(history_id)
await db.removeChatHistory(history_id)
return history_id
}
export const updateHistory = async (id: string, title: string) => {
const db = new PageAssitDatabase()
const chatHistories = await db.getChatHistories()
const newChatHistories = chatHistories.map((history) => {
if (history.id === id) {
history.title = title
}
return history
})
db.db.set({ chatHistories: newChatHistories })
}
export const removeMessageUsingHistoryId = async (history_id: string) => {
const db = new PageAssitDatabase()
const chatHistory = await db.getChatHistory(history_id)
chatHistory.shift()
await db.db.set({ [history_id]: chatHistory })
}
export const getAllPrompts = async () => {
const db = new PageAssitDatabase()
return await db.getAllPrompts()
}
export const updateMessageByIndex = async (
history_id: string,
index: number,
message: string
) => {
const db = new PageAssitDatabase()
const chatHistory = (await db.getChatHistory(history_id)).reverse()
chatHistory[index].content = message
await db.db.set({ [history_id]: chatHistory.reverse() })
}
export const deleteChatForEdit = async (history_id: string, index: number) => {
const db = new PageAssitDatabase()
const chatHistory = (await db.getChatHistory(history_id)).reverse()
const previousHistory = chatHistory.slice(0, index + 1)
// console.log(previousHistory)
await db.db.set({ [history_id]: previousHistory.reverse() })
}
export const savePrompt = async ({
content,
title,
is_system = false
}: {
title: string
content: string
is_system: boolean
}) => {
const db = new PageAssitDatabase()
const id = generateID()
const createdAt = Date.now()
const prompt = { id, title, content, is_system, createdAt }
await db.addPrompt(prompt)
return prompt
}
export const deletePromptById = async (id: string) => {
const db = new PageAssitDatabase()
await db.deletePrompt(id)
return id
}
export const updatePrompt = async ({
content,
id,
title,
is_system
}: {
id: string
title: string
content: string
is_system: boolean
}) => {
const db = new PageAssitDatabase()
await db.updatePrompt(id, title, content, is_system)
return id
}
export const getPromptById = async (id: string) => {
if (!id || id.trim() === "") return null
const db = new PageAssitDatabase()
return await db.getPromptById(id)
}
export const getAllWebshares = async () => {
const db = new PageAssitDatabase()
return await db.getAllWebshares()
}
export const deleteWebshare = async (id: string) => {
const db = new PageAssitDatabase()
await db.deleteWebshare(id)
return id
}
export const saveWebshare = async ({
title,
url,
api_url,
share_id
}: {
title: string
url: string
api_url: string
share_id: string
}) => {
const db = new PageAssitDatabase()
const id = generateID()
const createdAt = Date.now()
const webshare = { id, title, url, share_id, createdAt, api_url }
await db.addWebshare(webshare)
return webshare
}
export const getUserId = async () => {
const db = new PageAssitDatabase()
const id = (await db.getUserID()) as string
if (!id || id?.trim() === "") {
const user_id = "user_xxxx-xxxx-xxx-xxxx-xxxx".replace(/[x]/g, () => {
const r = Math.floor(Math.random() * 16)
return r.toString(16)
})
db.setUserID(user_id)
return user_id
}
return id
}

View File

@@ -1,28 +1,4 @@
import { pdfDist } from "./pdfjs"
export const getPdf = async (data: ArrayBuffer) => {
const pdf = pdfDist.getDocument({
data,
useWorkerFetch: false,
isEvalSupported: false,
useSystemFonts: true,
});
pdf.onPassword = (callback: any) => {
const password = prompt("Enter the password: ")
if (!password) {
throw new Error("Password required to open the PDF.");
}
callback(password);
};
const pdfDocument = await pdf.promise;
return pdfDocument
}
import { getPdf } from "./pdf"
const _getHtml = async () => {
const url = window.location.href

29
src/libs/pdf.ts Normal file
View File

@@ -0,0 +1,29 @@
import { pdfDist } from "./pdfjs"
export const getPdf = async (data: ArrayBuffer) => {
const pdf = pdfDist.getDocument({
data,
useWorkerFetch: false,
isEvalSupported: false,
useSystemFonts: true
})
pdf.onPassword = (callback: any) => {
const password = prompt("Enter the password: ")
if (!password) {
throw new Error("Password required to open the PDF.")
}
callback(password)
}
const pdfDocument = await pdf.promise
return pdfDocument
}
export const processPdf = async (base64: string) => {
const res = await fetch(base64)
const data = await res.arrayBuffer()
const pdf = await getPdf(data)
return pdf
}

View File

@@ -0,0 +1,55 @@
import { getKnowledgeById, updateKnowledgeStatus } from "@/db/knowledge"
import { PageAssistPDFUrlLoader } from "@/loader/pdf-url"
import {
defaultEmbeddingChunkOverlap,
defaultEmbeddingChunkSize
} from "@/services/ollama"
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama"
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
import { PageAssistVectorStore } from "./PageAssistVectorStore"
export const processKnowledge = async (msg: any, id: string): Promise<void> => {
console.log(`Processing knowledge with id: ${id}`)
try {
const knowledge = await getKnowledgeById(id)
if (!knowledge) {
console.error(`Knowledge with id ${id} not found`)
return
}
await updateKnowledgeStatus(id, "processing")
const ollamaEmbedding = new OllamaEmbeddings({
model: knowledge.embedding_model
})
const chunkSize = await defaultEmbeddingChunkSize()
const chunkOverlap = await defaultEmbeddingChunkOverlap()
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize,
chunkOverlap
})
for (const doc of knowledge.source) {
if (doc.type === "pdf" || doc.type === "application/pdf") {
const loader = new PageAssistPDFUrlLoader({
name: doc.filename,
url: doc.content
})
let docs = await loader.load()
const chunks = await textSplitter.splitDocuments(docs)
await PageAssistVectorStore.fromDocuments(chunks, ollamaEmbedding, {
knownledge_id: knowledge.id,
file_id: doc.source_id
})
}
}
await updateKnowledgeStatus(id, "finished")
} catch (error) {
console.error(`Error processing knowledge with id: ${id}`, error)
await updateKnowledgeStatus(id, "failed")
} finally {
console.log(`Finished processing knowledge with id: ${id}`)
}
}