+ {/* Add new model button */}
+
+
+
+
+
+
+
+ {status === "pending" &&
}
+
+ {status === "success" && (
+
(
+ {t(`status.${text}`)}
+ )
+ },
+ {
+ title: t("columns.embeddings"),
+ dataIndex: "embedding_model",
+ key: "embedding_model"
+ },
+ {
+ title: t("columns.createdAt"),
+ dataIndex: "createdAt",
+ key: "createdAt",
+ render: (text: number) => new Date(text).toLocaleString()
+ },
+ {
+ title: t("columns.action"),
+ key: "action",
+ render: (text: string, record: any) => (
+
+
+
+
+
+ )
+ }
+ ]}
+ expandable={{
+ expandedRowRender: (record) => (
+
+ ),
+ defaultExpandAllRows: false
+ }}
+ bordered
+ dataSource={data}
+ rowKey={(record) => `${record.name}-${record.id}`}
+ />
+ )}
+
+
+
+
+ )
+}
diff --git a/src/components/Option/Playground/PlaygroundForm.tsx b/src/components/Option/Playground/PlaygroundForm.tsx
index 87c6005..4eec48c 100644
--- a/src/components/Option/Playground/PlaygroundForm.tsx
+++ b/src/components/Option/Playground/PlaygroundForm.tsx
@@ -4,7 +4,7 @@ import React from "react"
import useDynamicTextareaSize from "~/hooks/useDynamicTextareaSize"
import { toBase64 } from "~/libs/to-base64"
import { useMessageOption } from "~/hooks/useMessageOption"
-import { Checkbox, Dropdown, Switch, Tooltip } from "antd"
+import { Checkbox, Dropdown, Select, Switch, Tooltip } from "antd"
import { Image } from "antd"
import { useSpeechRecognition } from "~/hooks/useSpeechRecognition"
import { useWebUI } from "~/store/webui"
@@ -12,6 +12,7 @@ import { defaultEmbeddingModelForRag } from "~/services/ollama"
import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react"
import { getVariable } from "~/utils/select-varaible"
import { useTranslation } from "react-i18next"
+import { KnowledgeSelect } from "../Knowledge/KnowledgeSelect"
type Props = {
dropedFile: File | undefined
@@ -249,6 +250,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
+
+
-
+
{!isSending ? (
{
const queryClient = useQueryClient()
diff --git a/src/components/Option/Settings/other.tsx b/src/components/Option/Settings/other.tsx
index a608f98..3ae298b 100644
--- a/src/components/Option/Settings/other.tsx
+++ b/src/components/Option/Settings/other.tsx
@@ -1,7 +1,7 @@
import { useQueryClient } from "@tanstack/react-query"
import { useDarkMode } from "~/hooks/useDarkmode"
import { useMessageOption } from "~/hooks/useMessageOption"
-import { PageAssitDatabase } from "~/libs/db"
+import { PageAssitDatabase } from "@/db"
import { Select } from "antd"
import { SUPPORTED_LANGUAGES } from "~/utils/supporetd-languages"
import { MoonIcon, SunIcon } from "lucide-react"
diff --git a/src/components/Option/Share/index.tsx b/src/components/Option/Share/index.tsx
index 5b1c3cb..8239a67 100644
--- a/src/components/Option/Share/index.tsx
+++ b/src/components/Option/Share/index.tsx
@@ -3,7 +3,7 @@ import { Form, Input, Skeleton, Table, Tooltip, message } from "antd"
import { Trash2 } from "lucide-react"
import { Trans, useTranslation } from "react-i18next"
import { SaveButton } from "~/components/Common/SaveButton"
-import { deleteWebshare, getAllWebshares, getUserId } from "~/libs/db"
+import { deleteWebshare, getAllWebshares, getUserId } from "@/db"
import { getPageShareUrl, setPageShareUrl } from "~/services/ollama"
import { verifyPageShareURL } from "~/utils/verify-page-share"
diff --git a/src/components/Option/Sidebar.tsx b/src/components/Option/Sidebar.tsx
index 87628ff..e18f567 100644
--- a/src/components/Option/Sidebar.tsx
+++ b/src/components/Option/Sidebar.tsx
@@ -5,7 +5,7 @@ import {
formatToMessage,
deleteByHistoryId,
updateHistory
-} from "~/libs/db"
+} from "@/db"
import { Empty, Skeleton } from "antd"
import { useMessageOption } from "~/hooks/useMessageOption"
import { PencilIcon, Trash2 } from "lucide-react"
diff --git a/src/libs/db.ts b/src/db/index.ts
similarity index 100%
rename from src/libs/db.ts
rename to src/db/index.ts
diff --git a/src/db/knowledge.ts b/src/db/knowledge.ts
new file mode 100644
index 0000000..b853ead
--- /dev/null
+++ b/src/db/knowledge.ts
@@ -0,0 +1,192 @@
+import { deleteVector, deleteVectorByFileId } from "./vector"
+
+export type Source = {
+ source_id: string
+ type: string
+ filename?: string
+ content: string
+}
+
+export type Knowledge = {
+ id: string
+ db_type: string
+ title: string
+ status: string
+ embedding_model: string
+ source: Source[]
+ knownledge: any
+ createdAt: number
+}
+export const generateID = () => {
+ return "pa_knowledge_xxxx-xxxx-xxx-xxxx".replace(/[x]/g, () => {
+ const r = Math.floor(Math.random() * 16)
+ return r.toString(16)
+ })
+}
+export class PageAssistKnowledge {
+ db: chrome.storage.StorageArea
+
+ constructor() {
+ this.db = chrome.storage.local
+ }
+
+ getAll = async (): Promise => {
+ return new Promise((resolve, reject) => {
+ this.db.get(null, (result) => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ const data = Object.keys(result).map((key) => result[key])
+ resolve(data)
+ }
+ })
+ })
+ }
+
+ getById = async (id: string): Promise => {
+ return new Promise((resolve, reject) => {
+ this.db.get(id, (result) => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ resolve(result[id])
+ }
+ })
+ })
+ }
+
+ create = async (knowledge: Knowledge): Promise => {
+ return new Promise((resolve, reject) => {
+ this.db.set({ [knowledge.id]: knowledge }, () => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ resolve()
+ }
+ })
+ })
+ }
+
+ update = async (knowledge: Knowledge): Promise => {
+ return new Promise((resolve, reject) => {
+ this.db.set({ [knowledge.id]: knowledge }, () => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ resolve()
+ }
+ })
+ })
+ }
+
+ delete = async (id: string): Promise => {
+ return new Promise((resolve, reject) => {
+ this.db.remove(id, () => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ resolve()
+ }
+ })
+ })
+ }
+
+ deleteSource = async (id: string, source_id: string): Promise => {
+ return new Promise((resolve, reject) => {
+ this.db.get(id, (result) => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ const data = result[id] as Knowledge
+ data.source = data.source.filter((s) => s.source_id !== source_id)
+ this.db.set({ [id]: data }, () => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ resolve()
+ }
+ })
+ }
+ })
+ })
+ }
+}
+
+export const createKnowledge = async ({
+ source,
+ title,
+ embedding_model
+}: {
+ title: string
+ source: Source[]
+ embedding_model: string
+}) => {
+ const db = new PageAssistKnowledge()
+ const id = generateID()
+ const knowledge: Knowledge = {
+ id,
+ title,
+ db_type: "knowledge",
+ source,
+ status: "pending",
+ knownledge: {},
+ embedding_model,
+ createdAt: Date.now()
+ }
+ await db.create(knowledge)
+ return knowledge
+}
+
+export const getKnowledgeById = async (id: string) => {
+ const db = new PageAssistKnowledge()
+ return db.getById(id)
+}
+
+export const updateKnowledgeStatus = async (id: string, status: string) => {
+ const db = new PageAssistKnowledge()
+ const knowledge = await db.getById(id)
+ await db.update({
+ ...knowledge,
+ status
+ })
+}
+
+export const getAllKnowledge = async (status?: string) => {
+ const db = new PageAssistKnowledge()
+ const data = await db.getAll()
+
+ if (status) {
+ return data
+ .filter((d) => d.db_type === "knowledge")
+ .filter((d) => d.status === status)
+ .map((d) => {
+ d.source.forEach((s) => {
+ delete s.content
+ })
+ return d
+ })
+ .sort((a, b) => b.createdAt - a.createdAt)
+ }
+
+ return data
+ .filter((d) => d.db_type === "knowledge")
+ .map((d) => {
+ d.source.forEach((s) => {
+ delete s.content
+ })
+ return d
+ })
+ .sort((a, b) => b.createdAt - a.createdAt)
+}
+
+export const deleteKnowledge = async (id: string) => {
+ const db = new PageAssistKnowledge()
+ await db.delete(id)
+ await deleteVector(`vector:${id}`)
+}
+
+export const deleteSource = async (id: string, source_id: string) => {
+ const db = new PageAssistKnowledge()
+ await db.deleteSource(id, source_id)
+ await deleteVectorByFileId(`vector:${id}`, source_id)
+}
diff --git a/src/db/vector.ts b/src/db/vector.ts
new file mode 100644
index 0000000..0feb3b7
--- /dev/null
+++ b/src/db/vector.ts
@@ -0,0 +1,131 @@
+interface PageAssistVector {
+ file_id: string
+ content: string
+ embedding: number[]
+ metadata: Record
+}
+
+export type VectorData = {
+ id: string
+ vectors: PageAssistVector[]
+}
+
+export class PageAssistVectorDb {
+ db: chrome.storage.StorageArea
+
+ constructor() {
+ this.db = chrome.storage.local
+ }
+
+ insertVector = async (
+ id: string,
+ vector: PageAssistVector[]
+ ): Promise => {
+ return new Promise((resolve, reject) => {
+ this.db.get(id, (result) => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ const data = result[id] as VectorData
+ if (!data) {
+ console.log("Creating new vector")
+ this.db.set({ [id]: { id, vectors: [vector] } }, () => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ resolve()
+ }
+ })
+ } else {
+ console.log("Concatenating vectors")
+ this.db.set(
+ {
+ [id]: {
+ ...data,
+ vectors: data.vectors.concat(vector)
+ }
+ },
+ () => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ resolve()
+ }
+ }
+ )
+ }
+ }
+ })
+ })
+ }
+
+ deleteVector = async (id: string): Promise => {
+ return new Promise((resolve, reject) => {
+ this.db.remove(id, () => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ resolve()
+ }
+ })
+ })
+ }
+
+ deleteVectorByFileId = async (id: string, file_id: string): Promise => {
+ return new Promise((resolve, reject) => {
+ this.db.get(id, (result) => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ const data = result[id] as VectorData
+ data.vectors = data.vectors.filter((v) => v.file_id !== file_id)
+ this.db.set({ [id]: data }, () => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ resolve()
+ }
+ })
+ }
+ })
+ })
+ }
+
+ getVector = async (id: string): Promise => {
+ return new Promise((resolve, reject) => {
+ this.db.get(id, (result) => {
+ if (chrome.runtime.lastError) {
+ reject(chrome.runtime.lastError)
+ } else {
+ resolve(result[id] as VectorData)
+ }
+ })
+ })
+ }
+}
+
+export const insertVector = async (
+ id: string,
+ vector: PageAssistVector[]
+): Promise => {
+ const db = new PageAssistVectorDb()
+ return db.insertVector(id, vector)
+}
+
+export const getVector = async (id: string): Promise => {
+ const db = new PageAssistVectorDb()
+ return db.getVector(id)
+}
+
+export const deleteVector = async (id: string): Promise => {
+ const db = new PageAssistVectorDb()
+ return db.deleteVector(id)
+}
+
+export const deleteVectorByFileId = async (
+ id: string,
+ file_id: string
+): Promise => {
+ const db = new PageAssistVectorDb()
+ return db.deleteVectorByFileId(id, file_id)
+}
diff --git a/src/entries/background.ts b/src/entries/background.ts
index d4f075b..5767b7c 100644
--- a/src/entries/background.ts
+++ b/src/entries/background.ts
@@ -1,4 +1,4 @@
-
+import { processKnowledge } from "@/libs/process-knowledge"
import { getOllamaURL, isOllamaRunning } from "../services/ollama"
const progressHuman = (completed: number, total: number) => {
return ((completed / total) * 100).toFixed(0) + "%"
@@ -78,13 +78,16 @@ export default defineBackground({
main() {
chrome.runtime.onMessage.addListener(async (message) => {
if (message.type === "sidepanel") {
- chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => {
- const tab = tabs[0]
- chrome.sidePanel.open({
- // tabId: tab.id!,
- windowId: tab.windowId!,
- })
- })
+ chrome.tabs.query(
+ { active: true, currentWindow: true },
+ async (tabs) => {
+ const tab = tabs[0]
+ chrome.sidePanel.open({
+ // tabId: tab.id!,
+ windowId: tab.windowId!
+ })
+ }
+ )
} else if (message.type === "pull_model") {
const ollamaURL = await getOllamaURL()
@@ -93,8 +96,7 @@ export default defineBackground({
if (!isRunning) {
chrome.action.setBadgeText({ text: "E" })
chrome.action.setBadgeBackgroundColor({ color: "#FF0000" })
- chrome.action.setTitle({ title: "Ollama is not running"
- })
+ chrome.action.setTitle({ title: "Ollama is not running" })
setTimeout(() => {
clearBadge()
}, 5000)
@@ -111,12 +113,15 @@ export default defineBackground({
chrome.commands.onCommand.addListener((command) => {
switch (command) {
case "execute_side_panel":
- chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => {
- const tab = tabs[0]
- chrome.sidePanel.open({
- windowId: tab.windowId!
- })
- })
+ chrome.tabs.query(
+ { active: true, currentWindow: true },
+ async (tabs) => {
+ const tab = tabs[0]
+ chrome.sidePanel.open({
+ windowId: tab.windowId!
+ })
+ }
+ )
break
default:
break
@@ -131,14 +136,17 @@ export default defineBackground({
chrome.contextMenus.onClicked.addListener((info, tab) => {
if (info.menuItemId === "open-side-panel-pa") {
- chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => {
- const tab = tabs[0]
- await chrome.sidePanel.open({
- windowId: tab.windowId!,
- })
- })
+ chrome.tabs.query(
+ { active: true, currentWindow: true },
+ async (tabs) => {
+ const tab = tabs[0]
+ await chrome.sidePanel.open({
+ windowId: tab.windowId!
+ })
+ }
+ )
}
})
},
persistent: true
-})
\ No newline at end of file
+})
diff --git a/src/hooks/chat-helper/index.ts b/src/hooks/chat-helper/index.ts
index 51d320c..15562ec 100644
--- a/src/hooks/chat-helper/index.ts
+++ b/src/hooks/chat-helper/index.ts
@@ -1,4 +1,4 @@
-import { saveHistory, saveMessage } from "@/libs/db"
+import { saveHistory, saveMessage } from "@/db"
import { ChatHistory } from "@/store/option"
export const saveMessageOnError = async ({
diff --git a/src/hooks/useMessageOption.tsx b/src/hooks/useMessageOption.tsx
index f0b0d63..880757e 100644
--- a/src/hooks/useMessageOption.tsx
+++ b/src/hooks/useMessageOption.tsx
@@ -15,7 +15,7 @@ import {
getPromptById,
removeMessageUsingHistoryId,
updateMessageByIndex
-} from "~/libs/db"
+} from "@/db"
import { useNavigate } from "react-router-dom"
import { notification } from "antd"
import { getSystemPromptForWeb } from "~/web/web"
diff --git a/src/i18n/lang/en.ts b/src/i18n/lang/en.ts
index 182fc03..dd5a801 100644
--- a/src/i18n/lang/en.ts
+++ b/src/i18n/lang/en.ts
@@ -3,12 +3,13 @@ import playground from "@/assets/locale/en/playground.json";
import common from "@/assets/locale/en/common.json";
import sidepanel from "@/assets/locale/en/sidepanel.json";
import settings from "@/assets/locale/en/settings.json";
-
+import knownledge from "@/assets/locale/en/knownledge.json";
export const en = {
option,
playground,
common,
sidepanel,
- settings
+ settings,
+ knownledge
}
\ No newline at end of file
diff --git a/src/i18n/lang/ja.ts b/src/i18n/lang/ja.ts
index 2026585..37095a3 100644
--- a/src/i18n/lang/ja.ts
+++ b/src/i18n/lang/ja.ts
@@ -3,6 +3,7 @@ import playground from "@/assets/locale/ja-JP/playground.json";
import common from "@/assets/locale/ja-JP/common.json";
import sidepanel from "@/assets/locale/ja-JP/sidepanel.json";
import settings from "@/assets/locale/ja-JP/settings.json";
+import knownledge from "@/assets/locale/ja-JP/knownledge.json";
export const ja = {
@@ -10,5 +11,6 @@ export const ja = {
playground,
common,
sidepanel,
- settings
+ settings,
+ knownledge
}
\ No newline at end of file
diff --git a/src/i18n/lang/ml.ts b/src/i18n/lang/ml.ts
index 315e9c4..1cb8e7d 100644
--- a/src/i18n/lang/ml.ts
+++ b/src/i18n/lang/ml.ts
@@ -3,12 +3,13 @@ import playground from "@/assets/locale/ml/playground.json";
import common from "@/assets/locale/ml/common.json";
import sidepanel from "@/assets/locale/ml/sidepanel.json";
import settings from "@/assets/locale/ml/settings.json";
-
+import knownledge from "@/assets/locale/ml/knownledge.json";
export const ml = {
option,
playground,
common,
sidepanel,
- settings
+ settings,
+ knownledge
}
\ No newline at end of file
diff --git a/src/i18n/lang/zh.ts b/src/i18n/lang/zh.ts
index 0b816dc..2d55196 100644
--- a/src/i18n/lang/zh.ts
+++ b/src/i18n/lang/zh.ts
@@ -3,6 +3,7 @@ import playground from "@/assets/locale/zh/playground.json";
import common from "@/assets/locale/zh/common.json";
import sidepanel from "@/assets/locale/zh/sidepanel.json";
import settings from "@/assets/locale/zh/settings.json";
+import knownledge from "@/assets/locale/zh/knownledge.json";
export const zh = {
@@ -10,5 +11,6 @@ export const zh = {
playground,
common,
sidepanel,
- settings
+ settings,
+ knownledge
}
\ No newline at end of file
diff --git a/src/libs/PageAssistVectorStore.ts b/src/libs/PageAssistVectorStore.ts
new file mode 100644
index 0000000..f3ae7b4
--- /dev/null
+++ b/src/libs/PageAssistVectorStore.ts
@@ -0,0 +1,201 @@
+import { similarity as ml_distance_similarity } from "ml-distance"
+import { VectorStore } from "@langchain/core/vectorstores"
+import type { EmbeddingsInterface } from "@langchain/core/embeddings"
+import { Document } from "@langchain/core/documents"
+import { getVector, insertVector } from "@/db/vector"
+
+/**
+ * Interface representing a vector in memory. It includes the content
+ * (text), the corresponding embedding (vector), and any associated
+ * metadata.
+ */
+interface PageAssistVector {
+ content: string
+ embedding: number[]
+ metadata: Record
+}
+
+/**
+ * Interface for the arguments that can be passed to the
+ * `MemoryVectorStore` constructor. It includes an optional `similarity`
+ * function.
+ */
+export interface MemoryVectorStoreArgs {
+ knownledge_id: string
+ file_id?: string
+ similarity?: typeof ml_distance_similarity.cosine
+}
+
+/**
+ * Class that extends `VectorStore` to store vectors in memory. Provides
+ * methods for adding documents, performing similarity searches, and
+ * creating instances from texts, documents, or an existing index.
+ */
+export class PageAssistVectorStore extends VectorStore {
+ declare FilterType: (doc: Document) => boolean
+
+ knownledge_id: string
+
+ file_id?: string
+
+ // memoryVectors: PageAssistVector[] = []
+
+ similarity: typeof ml_distance_similarity.cosine
+
+ _vectorstoreType(): string {
+ return "memory"
+ }
+
+ constructor(embeddings: EmbeddingsInterface, args: MemoryVectorStoreArgs) {
+ super(embeddings, args)
+
+ this.similarity = args?.similarity ?? ml_distance_similarity.cosine
+
+ this.knownledge_id = args?.knownledge_id!
+
+ this.file_id = args?.file_id
+ }
+
+ /**
+ * Method to add documents to the memory vector store. It extracts the
+ * text from each document, generates embeddings for them, and adds the
+ * resulting vectors to the store.
+ * @param documents Array of `Document` instances to be added to the store.
+ * @returns Promise that resolves when all documents have been added.
+ */
+ async addDocuments(documents: Document[]): Promise {
+ const texts = documents.map(({ pageContent }) => pageContent)
+ return this.addVectors(
+ await this.embeddings.embedDocuments(texts),
+ documents
+ )
+ }
+
+ /**
+ * Method to add vectors to the memory vector store. It creates
+ * `PageAssistVector` instances for each vector and document pair and adds
+ * them to the store.
+ * @param vectors Array of vectors to be added to the store.
+ * @param documents Array of `Document` instances corresponding to the vectors.
+ * @returns Promise that resolves when all vectors have been added.
+ */
+ async addVectors(vectors: number[][], documents: Document[]): Promise {
+ const memoryVectors = vectors.map((embedding, idx) => ({
+ content: documents[idx].pageContent,
+ embedding,
+ metadata: documents[idx].metadata,
+ file_id: this.file_id
+ }))
+ console.log(`vector:${this.knownledge_id}`)
+ await insertVector(`vector:${this.knownledge_id}`, memoryVectors)
+ }
+
+ /**
+ * Method to perform a similarity search in the memory vector store. It
+ * calculates the similarity between the query vector and each vector in
+ * the store, sorts the results by similarity, and returns the top `k`
+ * results along with their scores.
+ * @param query Query vector to compare against the vectors in the store.
+ * @param k Number of top results to return.
+ * @param filter Optional filter function to apply to the vectors before performing the search.
+ * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score.
+ */
+ async similaritySearchVectorWithScore(
+ query: number[],
+ k: number,
+ filter?: this["FilterType"]
+ ): Promise<[Document, number][]> {
+ const filterFunction = (memoryVector: PageAssistVector) => {
+ if (!filter) {
+ return true
+ }
+
+ const doc = new Document({
+ metadata: memoryVector.metadata,
+ pageContent: memoryVector.content
+ })
+ return filter(doc)
+ }
+ const pgVector = await getVector(`vector:${this.knownledge_id}`)
+ const filteredMemoryVectors = pgVector.vectors.filter(filterFunction)
+ const searches = filteredMemoryVectors
+ .map((vector, index) => ({
+ similarity: this.similarity(query, vector.embedding),
+ index
+ }))
+ .sort((a, b) => (a.similarity > b.similarity ? -1 : 0))
+ .slice(0, k)
+
+ const result: [Document, number][] = searches.map((search) => [
+ new Document({
+ metadata: filteredMemoryVectors[search.index].metadata,
+ pageContent: filteredMemoryVectors[search.index].content
+ }),
+ search.similarity
+ ])
+
+ return result
+ }
+
+ /**
+ * Static method to create a `MemoryVectorStore` instance from an array of
+ * texts. It creates a `Document` for each text and metadata pair, and
+ * adds them to the store.
+ * @param texts Array of texts to be added to the store.
+ * @param metadatas Array or single object of metadata corresponding to the texts.
+ * @param embeddings `Embeddings` instance used to generate embeddings for the texts.
+ * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
+ * @returns Promise that resolves with a new `MemoryVectorStore` instance.
+ */
+ static async fromTexts(
+ texts: string[],
+ metadatas: object[] | object,
+ embeddings: EmbeddingsInterface,
+ dbConfig?: MemoryVectorStoreArgs
+ ): Promise {
+ const docs: Document[] = []
+ for (let i = 0; i < texts.length; i += 1) {
+ const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas
+ const newDoc = new Document({
+ pageContent: texts[i],
+ metadata
+ })
+ docs.push(newDoc)
+ }
+ return PageAssistVectorStore.fromDocuments(docs, embeddings, dbConfig)
+ }
+
+ /**
+ * Static method to create a `MemoryVectorStore` instance from an array of
+ * `Document` instances. It adds the documents to the store.
+ * @param docs Array of `Document` instances to be added to the store.
+ * @param embeddings `Embeddings` instance used to generate embeddings for the documents.
+ * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
+ * @returns Promise that resolves with a new `MemoryVectorStore` instance.
+ */
+ static async fromDocuments(
+ docs: Document[],
+ embeddings: EmbeddingsInterface,
+ dbConfig?: MemoryVectorStoreArgs
+ ): Promise {
+ const instance = new this(embeddings, dbConfig)
+ await instance.addDocuments(docs)
+ return instance
+ }
+
+ /**
+ * Static method to create a `MemoryVectorStore` instance from an existing
+ * index. It creates a new `MemoryVectorStore` instance without adding any
+ * documents or vectors.
+ * @param embeddings `Embeddings` instance used to generate embeddings for the documents.
+ * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance.
+ * @returns Promise that resolves with a new `MemoryVectorStore` instance.
+ */
+ static async fromExistingIndex(
+ embeddings: EmbeddingsInterface,
+ dbConfig?: MemoryVectorStoreArgs
+ ): Promise {
+ const instance = new this(embeddings, dbConfig)
+ return instance
+ }
+}
diff --git a/src/libs/get-html.ts b/src/libs/get-html.ts
index 465e480..bed1f43 100644
--- a/src/libs/get-html.ts
+++ b/src/libs/get-html.ts
@@ -1,28 +1,4 @@
-import { pdfDist } from "./pdfjs"
-
-export const getPdf = async (data: ArrayBuffer) => {
- const pdf = pdfDist.getDocument({
- data,
- useWorkerFetch: false,
- isEvalSupported: false,
- useSystemFonts: true,
- });
-
- pdf.onPassword = (callback: any) => {
- const password = prompt("Enter the password: ")
- if (!password) {
- throw new Error("Password required to open the PDF.");
- }
- callback(password);
- };
-
-
- const pdfDocument = await pdf.promise;
-
-
- return pdfDocument
-
-}
+import { getPdf } from "./pdf"
const _getHtml = async () => {
const url = window.location.href
diff --git a/src/libs/pdf.ts b/src/libs/pdf.ts
new file mode 100644
index 0000000..d1eede4
--- /dev/null
+++ b/src/libs/pdf.ts
@@ -0,0 +1,29 @@
+import { pdfDist } from "./pdfjs"
+
+export const getPdf = async (data: ArrayBuffer) => {
+ const pdf = pdfDist.getDocument({
+ data,
+ useWorkerFetch: false,
+ isEvalSupported: false,
+ useSystemFonts: true
+ })
+
+ pdf.onPassword = (callback: any) => {
+ const password = prompt("Enter the password: ")
+ if (!password) {
+ throw new Error("Password required to open the PDF.")
+ }
+ callback(password)
+ }
+
+ const pdfDocument = await pdf.promise
+
+ return pdfDocument
+}
+
+export const processPdf = async (base64: string) => {
+ const res = await fetch(base64)
+ const data = await res.arrayBuffer()
+ const pdf = await getPdf(data)
+ return pdf
+}
diff --git a/src/libs/process-knowledge.ts b/src/libs/process-knowledge.ts
new file mode 100644
index 0000000..d98cf17
--- /dev/null
+++ b/src/libs/process-knowledge.ts
@@ -0,0 +1,55 @@
+import { getKnowledgeById, updateKnowledgeStatus } from "@/db/knowledge"
+import { PageAssistPDFUrlLoader } from "@/loader/pdf-url"
+import {
+ defaultEmbeddingChunkOverlap,
+ defaultEmbeddingChunkSize
+} from "@/services/ollama"
+import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama"
+import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
+import { PageAssistVectorStore } from "./PageAssistVectorStore"
+
+export const processKnowledge = async (msg: any, id: string): Promise => {
+ console.log(`Processing knowledge with id: ${id}`)
+ try {
+ const knowledge = await getKnowledgeById(id)
+
+ if (!knowledge) {
+ console.error(`Knowledge with id ${id} not found`)
+ return
+ }
+
+ await updateKnowledgeStatus(id, "processing")
+
+ const ollamaEmbedding = new OllamaEmbeddings({
+ model: knowledge.embedding_model
+ })
+ const chunkSize = await defaultEmbeddingChunkSize()
+ const chunkOverlap = await defaultEmbeddingChunkOverlap()
+ const textSplitter = new RecursiveCharacterTextSplitter({
+ chunkSize,
+ chunkOverlap
+ })
+
+ for (const doc of knowledge.source) {
+ if (doc.type === "pdf" || doc.type === "application/pdf") {
+ const loader = new PageAssistPDFUrlLoader({
+ name: doc.filename,
+ url: doc.content
+ })
+ let docs = await loader.load()
+ const chunks = await textSplitter.splitDocuments(docs)
+ await PageAssistVectorStore.fromDocuments(chunks, ollamaEmbedding, {
+ knownledge_id: knowledge.id,
+ file_id: doc.source_id
+ })
+ }
+ }
+
+ await updateKnowledgeStatus(id, "finished")
+ } catch (error) {
+ console.error(`Error processing knowledge with id: ${id}`, error)
+ await updateKnowledgeStatus(id, "failed")
+ } finally {
+ console.log(`Finished processing knowledge with id: ${id}`)
+ }
+}
diff --git a/src/loader/pdf-url.ts b/src/loader/pdf-url.ts
new file mode 100644
index 0000000..3124085
--- /dev/null
+++ b/src/loader/pdf-url.ts
@@ -0,0 +1,49 @@
+import { BaseDocumentLoader } from "langchain/document_loaders/base"
+import { Document } from "@langchain/core/documents"
+import { processPdf } from "@/libs/pdf"
+export interface WebLoaderParams {
+ url: string
+ name: string
+}
+
+export class PageAssistPDFUrlLoader
+ extends BaseDocumentLoader
+ implements WebLoaderParams
+{
+ pdf: { content: string; page: number }[]
+ url: string
+ name: string
+
+ constructor({ url, name }: WebLoaderParams) {
+ super()
+ this.url = url
+ this.name = name
+ }
+
+ async load(): Promise>[]> {
+ const documents: Document[] = []
+
+ const data = await processPdf(this.url)
+
+ for (let i = 1; i <= data.numPages; i += 1) {
+ const page = await data.getPage(i)
+ const content = await page.getTextContent()
+
+ if (content?.items.length === 0) {
+ continue
+ }
+
+ const text = content?.items
+ .map((item: any) => item.str)
+ .join("\n")
+ .replace(/\x00/g, "")
+ .trim()
+ documents.push({
+ pageContent: text,
+ metadata: { source: this.name, page: i }
+ })
+ }
+
+ return documents
+ }
+}
diff --git a/src/loader/pdf.ts b/src/loader/pdf.ts
index 097460b..d17fd79 100644
--- a/src/loader/pdf.ts
+++ b/src/loader/pdf.ts
@@ -1,37 +1,36 @@
import { BaseDocumentLoader } from "langchain/document_loaders/base"
import { Document } from "@langchain/core/documents"
export interface WebLoaderParams {
- pdf: { content: string, page: number }[]
- url: string
+ pdf: { content: string; page: number }[]
+ url: string
}
export class PageAssistPDFLoader
- extends BaseDocumentLoader
- implements WebLoaderParams {
- pdf: { content: string, page: number }[]
- url: string
+ extends BaseDocumentLoader
+ implements WebLoaderParams
+{
+ pdf: { content: string; page: number }[]
+ url: string
- constructor({ pdf, url }: WebLoaderParams) {
- super()
- this.pdf = pdf
- this.url = url
+ constructor({ pdf, url }: WebLoaderParams) {
+ super()
+ this.pdf = pdf
+ this.url = url
+ }
+
+ async load(): Promise>[]> {
+ const documents: Document[] = []
+
+ for (const page of this.pdf) {
+ const metadata = { source: this.url, page: page.page }
+ documents.push(new Document({ pageContent: page.content, metadata }))
}
- async load(): Promise>[]> {
- const documents: Document[] = [];
-
- for (const page of this.pdf) {
- const metadata = { source: this.url, page: page.page }
- documents.push(new Document({ pageContent: page.content, metadata }))
- }
-
- return [
- new Document({
- pageContent: documents.map((doc) => doc.pageContent).join("\n\n"),
- metadata: documents.map((doc) => doc.metadata),
- }),
- ];
-
-
- }
+ return [
+ new Document({
+ pageContent: documents.map((doc) => doc.pageContent).join("\n\n"),
+ metadata: documents.map((doc) => doc.metadata)
+ })
+ ]
+ }
}
diff --git a/src/queue/index.ts b/src/queue/index.ts
new file mode 100644
index 0000000..f35a382
--- /dev/null
+++ b/src/queue/index.ts
@@ -0,0 +1,6 @@
+import { processKnowledge } from "@/libs/process-knowledge"
+import PubSub from "pubsub-js"
+
+export const KNOWLEDGE_QUEUE = Symbol("queue")
+
+PubSub.subscribe(KNOWLEDGE_QUEUE, processKnowledge)
diff --git a/src/routes/option-settings-knowledge.tsx b/src/routes/option-settings-knowledge.tsx
index 0d8c573..aedddd3 100644
--- a/src/routes/option-settings-knowledge.tsx
+++ b/src/routes/option-settings-knowledge.tsx
@@ -1,11 +1,12 @@
import { SettingsLayout } from "~/components/Layouts/SettingsOptionLayout"
import OptionLayout from "~/components/Layouts/Layout"
+import { KnowledgeSettings } from "@/components/Option/Knowledge"
export const OptionKnowledgeBase = () => {
return (
- hey
+
)
diff --git a/src/utils/to-source.ts b/src/utils/to-source.ts
new file mode 100644
index 0000000..ecc5454
--- /dev/null
+++ b/src/utils/to-source.ts
@@ -0,0 +1,32 @@
+import { Source } from "@/db/knowledge"
+import { UploadFile } from "antd"
+
+export const toBase64 = (file: File | Blob): Promise => {
+ return new Promise((resolve, reject) => {
+ const reader = new FileReader()
+ reader.readAsDataURL(file)
+ reader.onload = () => resolve(reader.result as string)
+ reader.onerror = (error) => reject(error)
+ })
+}
+
+export const toArrayBufferFromBase64 = async (base64: string) => {
+ const res = await fetch(base64)
+ const blob = await res.blob()
+ return await blob.arrayBuffer()
+}
+
+export const generateSourceId = () => {
+ return "XXXXXXXX-XXXX-4XXX-YXXX-XXXXXXXXXXXX".replace(/[XY]/g, (c) => {
+ const r = (Math.random() * 16) | 0
+ const v = c === "X" ? r : (r & 0x3) | 0x8
+ return v.toString(16)
+ })
+}
+
+export const convertToSource = async (file: UploadFile): Promise => {
+ let type = file.type
+ let filename = file.name
+ const content = await toBase64(file.originFileObj)
+ return { content, type, filename, source_id: generateSourceId() }
+}
diff --git a/wxt.config.ts b/wxt.config.ts
index 05526d4..954dfb9 100644
--- a/wxt.config.ts
+++ b/wxt.config.ts
@@ -24,7 +24,7 @@ export default defineConfig({
srcDir: "src",
outDir: "build",
manifest: {
- version: "1.1.1",
+ version: "1.1.2",
name: '__MSG_extName__',
description: '__MSG_extDescription__',
default_locale: 'en',