diff --git a/bun.lockb b/bun.lockb new file mode 100644 index 0000000..15224d0 Binary files /dev/null and b/bun.lockb differ diff --git a/package.json b/package.json index 8f62d6c..1988dfa 100644 --- a/package.json +++ b/package.json @@ -35,8 +35,10 @@ "i18next-browser-languagedetector": "^7.2.0", "langchain": "^0.1.28", "lucide-react": "^0.350.0", + "ml-distance": "^4.0.1", "pdfjs-dist": "^4.0.379", "property-information": "^6.4.1", + "pubsub-js": "^1.9.4", "react": "18.2.0", "react-dom": "18.2.0", "react-i18next": "^14.1.0", @@ -55,6 +57,7 @@ "@types/chrome": "0.0.259", "@types/html-to-text": "^9.0.4", "@types/node": "20.11.9", + "@types/pubsub-js": "^1.8.6", "@types/react": "18.2.48", "@types/react-dom": "18.2.18", "@types/react-syntax-highlighter": "^15.5.11", diff --git a/src/assets/locale/en/knownledge.json b/src/assets/locale/en/knownledge.json new file mode 100644 index 0000000..2ff6db7 --- /dev/null +++ b/src/assets/locale/en/knownledge.json @@ -0,0 +1,3 @@ +{ + "addBtn": "Add New Knowledge" +} \ No newline at end of file diff --git a/src/assets/locale/en/playground.json b/src/assets/locale/en/playground.json index 6a91280..1d0794d 100644 --- a/src/assets/locale/en/playground.json +++ b/src/assets/locale/en/playground.json @@ -21,7 +21,8 @@ "searchInternet": "Search Internet", "speechToText": "Speech to Text", "uploadImage": "Upload Image", - "stopStreaming": "Stop Streaming" + "stopStreaming": "Stop Streaming", + "knowledge": "Knowledge" }, "sendWhenEnter": "Send when Enter pressed" } \ No newline at end of file diff --git a/src/assets/locale/en/settings.json b/src/assets/locale/en/settings.json index 2aa6083..677fe2b 100644 --- a/src/assets/locale/en/settings.json +++ b/src/assets/locale/en/settings.json @@ -242,5 +242,9 @@ "koFi": "Support on Ko-fi", "githubSponsor": "Sponsor on GitHub", "githubRepo": "GitHub Repository" + }, + "manageKnowledge": { + "title": "Manage Knowledge", + "heading": "Configure Knowledge Base" } } \ No newline at end of file diff --git a/src/assets/locale/ja-JP/knownledge.json b/src/assets/locale/ja-JP/knownledge.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/src/assets/locale/ja-JP/knownledge.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/src/assets/locale/ja-JP/playground.json b/src/assets/locale/ja-JP/playground.json index d26dd66..a559d18 100644 --- a/src/assets/locale/ja-JP/playground.json +++ b/src/assets/locale/ja-JP/playground.json @@ -21,7 +21,8 @@ "searchInternet": "インターネットを検索", "speechToText": "音声入力", "uploadImage": "画像をアップロード", - "stopStreaming": "ストリーミングを停止" + "stopStreaming": "ストリーミングを停止", + "knowledge": "知識" }, "sendWhenEnter": "Enterキーを押すと送信" } \ No newline at end of file diff --git a/src/assets/locale/ml/knownledge.json b/src/assets/locale/ml/knownledge.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/src/assets/locale/ml/knownledge.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/src/assets/locale/ml/playground.json b/src/assets/locale/ml/playground.json index 7334837..0c6b7ee 100644 --- a/src/assets/locale/ml/playground.json +++ b/src/assets/locale/ml/playground.json @@ -21,7 +21,8 @@ "searchInternet": "ഇന്റര്‍നെറ്റ് തിരയുക", "speechToText": "സംഭാഷണം ടെക്സ്റ്റായി", "uploadImage": "ഇമേജ് അപ്‌ലോഡ് ചെയ്യുക", - "stopStreaming": "സ്ട്രീമിംഗ് നിർത്തുക" + "stopStreaming": "സ്ട്രീമിംഗ് നിർത്തുക", + "knowledge": "അറിവ്" }, "sendWhenEnter": "എന്റര്‍ അമര്‍ത്തുമ്പോള്‍ അയയ്ക്കുക" } \ No newline at end of file diff --git a/src/assets/locale/zh/knownledge.json b/src/assets/locale/zh/knownledge.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/src/assets/locale/zh/knownledge.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/src/assets/locale/zh/playground.json b/src/assets/locale/zh/playground.json index 0b9d9a4..84a591c 100644 --- a/src/assets/locale/zh/playground.json +++ b/src/assets/locale/zh/playground.json @@ -21,7 +21,8 @@ "searchInternet": "搜索互联网", "speechToText": "语音到文本", "uploadImage": "上传图片", - "stopStreaming": "停止流媒体" + "stopStreaming": "停止流媒体", + "knowledge": "知识" }, "sendWhenEnter": "按Enter发送" } \ No newline at end of file diff --git a/src/assets/tailwind.css b/src/assets/tailwind.css index f55882d..11ff412 100644 --- a/src/assets/tailwind.css +++ b/src/assets/tailwind.css @@ -55,3 +55,13 @@ background-position: 0% 50%; } } +/* Hide scrollbar for Chrome, Safari and Opera */ +.no-scrollbar::-webkit-scrollbar { + display: none; +} + +/* Hide scrollbar for IE, Edge and Firefox */ +.no-scrollbar { + -ms-overflow-style: none; /* IE and Edge */ + scrollbar-width: none; /* Firefox */ +} \ No newline at end of file diff --git a/src/components/Common/ShareBtn.tsx b/src/components/Common/ShareBtn.tsx index 1dfdc5f..98dddb1 100644 --- a/src/components/Common/ShareBtn.tsx +++ b/src/components/Common/ShareBtn.tsx @@ -7,7 +7,7 @@ import React from "react" import { useMutation } from "@tanstack/react-query" import { getPageShareUrl } from "~/services/ollama" import { cleanUrl } from "~/libs/clean-url" -import { getUserId, saveWebshare } from "~/libs/db" +import { getUserId, saveWebshare } from "@/db" import { useTranslation } from "react-i18next" type Props = { diff --git a/src/components/Layouts/Layout.tsx b/src/components/Layouts/Layout.tsx index 23e36db..f5455c1 100644 --- a/src/components/Layouts/Layout.tsx +++ b/src/components/Layouts/Layout.tsx @@ -15,7 +15,7 @@ import { SquarePen, ZapIcon } from "lucide-react" -import { getAllPrompts } from "~/libs/db" +import { getAllPrompts } from "@/db" import { ShareBtn } from "~/components/Common/ShareBtn" import { useTranslation } from "react-i18next" import { OllamaIcon } from "../Icons/Ollama" diff --git a/src/components/Layouts/SettingsOptionLayout.tsx b/src/components/Layouts/SettingsOptionLayout.tsx index 372dc0d..2fcb681 100644 --- a/src/components/Layouts/SettingsOptionLayout.tsx +++ b/src/components/Layouts/SettingsOptionLayout.tsx @@ -68,12 +68,12 @@ export const SettingsLayout = ({ children }: { children: React.ReactNode }) => { current={location.pathname} icon={BrainCircuit} /> - {/* */} + /> > +} + +export const AddKnowledge = ({ open, setOpen }: Props) => { + const { t } = useTranslation("knowledge") + const [form] = Form.useForm() + + const onUploadHandler = async (data: { + title: string + file: UploadFile[] + }) => { + const defaultEM = await defaultEmbeddingModelForRag() + + if (!defaultEM) { + throw new Error(t("noEmbeddingModel")) + } + + const source: Source[] = [] + + for (const file of data.file) { + const data = await convertToSource(file) + source.push(data) + } + + const knowledge = await createKnowledge({ + embedding_model: defaultEM, + source, + title: data.title + }) + + return knowledge.id + } + + const { mutate: saveKnowledge, isPending: isSaving } = useMutation({ + mutationFn: onUploadHandler, + onError: (error) => { + message.error(error.message) + }, + onSuccess: async (id) => { + message.success(t("form.success")) + PubSub.publish(KNOWLEDGE_QUEUE, id) + setOpen(false) + } + }) + + return ( + setOpen(false)}> +
+ + + + { + if (Array.isArray(e)) { + return e + } + return e?.fileList + }}> + { + const allowedTypes = [ + "application/pdf", + // "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "text/csv", + "text/plain" + ] + .map((type) => type.toLowerCase()) + .join(", ") + + if (!allowedTypes.includes(file.type.toLowerCase())) { + message.error( + t("form.uploadFile.uploadError", { allowedTypes }) + ) + return Upload.LIST_IGNORE + } + + return false + }}> +
+

+ +

+

+ {t("form.uploadFile.uploadText")} +

+

+ {t("form.uploadFile.uploadHint")} +

+
+
+
+ + + + +
+
+ ) +} diff --git a/src/components/Option/Knowledge/KnowledgeSelect.tsx b/src/components/Option/Knowledge/KnowledgeSelect.tsx new file mode 100644 index 0000000..2ab6ca8 --- /dev/null +++ b/src/components/Option/Knowledge/KnowledgeSelect.tsx @@ -0,0 +1,51 @@ +import { getAllKnowledge } from "@/db/knowledge" +import { useQuery } from "@tanstack/react-query" +import { Dropdown, Tooltip } from "antd" +import { Blocks } from "lucide-react" +import React from "react" +import { useTranslation } from "react-i18next" + +export const KnowledgeSelect: React.FC = () => { + const { t } = useTranslation("playground") + const { data } = useQuery({ + queryKey: ["getAllKnowledge"], + queryFn: async () => { + const data = await getAllKnowledge("finished") + return data + }, + refetchInterval: 1000 + }) + + return ( + ({ + key: d.id, + label: ( +
+
+ +
+ {d.title} +
+ ), + onClick: () => {} + })) || [], + style: { + maxHeight: 500, + overflowY: "scroll" + }, + // hidescrollbars: true + className: "no-scrollbar" + }} + placement={"topLeft"} + trigger={["click"]}> + + + +
+ ) +} diff --git a/src/components/Option/Knowledge/index.tsx b/src/components/Option/Knowledge/index.tsx new file mode 100644 index 0000000..a3198bd --- /dev/null +++ b/src/components/Option/Knowledge/index.tsx @@ -0,0 +1,138 @@ +import { useState } from "react" +import { useTranslation } from "react-i18next" +import { AddKnowledge } from "./AddKnowledge" +import { + useMutation, + useQuery, + useQueryClient +} from "@tanstack/react-query" +import { deleteKnowledge, getAllKnowledge } from "@/db/knowledge" +import { Skeleton, Table, Tag, Tooltip, message } from "antd" +import { Trash2 } from "lucide-react" + +export const KnowledgeSettings = () => { + const { t } = useTranslation(["knownledge", "common"]) + const [open, setOpen] = useState(false) + const queryClient = useQueryClient() + + const { data, status } = useQuery({ + queryKey: ["fetchAllKnowledge"], + queryFn: () => getAllKnowledge(), + refetchInterval: 1000 + }) + + const { mutate: deleteKnowledgeMutation, isPending: isDeleting } = + useMutation({ + mutationFn: deleteKnowledge, + onSuccess: () => { + queryClient.invalidateQueries({ + queryKey: ["fetchAllKnowledge"] + }) + + message.success(t("deleteSuccess")) + }, + onError: (error) => { + message.error(error.message) + } + }) + + return ( +
+
+ {/* Add new model button */} +
+
+
+ +
+
+
+ {status === "pending" && } + + {status === "success" && ( + ( + {t(`status.${text}`)} + ) + }, + { + title: t("columns.embeddings"), + dataIndex: "embedding_model", + key: "embedding_model" + }, + { + title: t("columns.createdAt"), + dataIndex: "createdAt", + key: "createdAt", + render: (text: number) => new Date(text).toLocaleString() + }, + { + title: t("columns.action"), + key: "action", + render: (text: string, record: any) => ( +
+ + + +
+ ) + } + ]} + expandable={{ + expandedRowRender: (record) => ( +
+ ), + defaultExpandAllRows: false + }} + bordered + dataSource={data} + rowKey={(record) => `${record.name}-${record.id}`} + /> + )} + + + + + ) +} diff --git a/src/components/Option/Playground/PlaygroundForm.tsx b/src/components/Option/Playground/PlaygroundForm.tsx index 87c6005..4eec48c 100644 --- a/src/components/Option/Playground/PlaygroundForm.tsx +++ b/src/components/Option/Playground/PlaygroundForm.tsx @@ -4,7 +4,7 @@ import React from "react" import useDynamicTextareaSize from "~/hooks/useDynamicTextareaSize" import { toBase64 } from "~/libs/to-base64" import { useMessageOption } from "~/hooks/useMessageOption" -import { Checkbox, Dropdown, Switch, Tooltip } from "antd" +import { Checkbox, Dropdown, Select, Switch, Tooltip } from "antd" import { Image } from "antd" import { useSpeechRecognition } from "~/hooks/useSpeechRecognition" import { useWebUI } from "~/store/webui" @@ -12,6 +12,7 @@ import { defaultEmbeddingModelForRag } from "~/services/ollama" import { ImageIcon, MicIcon, StopCircleIcon, X } from "lucide-react" import { getVariable } from "~/utils/select-varaible" import { useTranslation } from "react-i18next" +import { KnowledgeSelect } from "../Knowledge/KnowledgeSelect" type Props = { dropedFile: File | undefined @@ -249,6 +250,7 @@ export const PlaygroundForm = ({ dropedFile }: Props) => {
+ + - + {!isSending ? ( { const queryClient = useQueryClient() diff --git a/src/components/Option/Settings/other.tsx b/src/components/Option/Settings/other.tsx index a608f98..3ae298b 100644 --- a/src/components/Option/Settings/other.tsx +++ b/src/components/Option/Settings/other.tsx @@ -1,7 +1,7 @@ import { useQueryClient } from "@tanstack/react-query" import { useDarkMode } from "~/hooks/useDarkmode" import { useMessageOption } from "~/hooks/useMessageOption" -import { PageAssitDatabase } from "~/libs/db" +import { PageAssitDatabase } from "@/db" import { Select } from "antd" import { SUPPORTED_LANGUAGES } from "~/utils/supporetd-languages" import { MoonIcon, SunIcon } from "lucide-react" diff --git a/src/components/Option/Share/index.tsx b/src/components/Option/Share/index.tsx index 5b1c3cb..8239a67 100644 --- a/src/components/Option/Share/index.tsx +++ b/src/components/Option/Share/index.tsx @@ -3,7 +3,7 @@ import { Form, Input, Skeleton, Table, Tooltip, message } from "antd" import { Trash2 } from "lucide-react" import { Trans, useTranslation } from "react-i18next" import { SaveButton } from "~/components/Common/SaveButton" -import { deleteWebshare, getAllWebshares, getUserId } from "~/libs/db" +import { deleteWebshare, getAllWebshares, getUserId } from "@/db" import { getPageShareUrl, setPageShareUrl } from "~/services/ollama" import { verifyPageShareURL } from "~/utils/verify-page-share" diff --git a/src/components/Option/Sidebar.tsx b/src/components/Option/Sidebar.tsx index 87628ff..e18f567 100644 --- a/src/components/Option/Sidebar.tsx +++ b/src/components/Option/Sidebar.tsx @@ -5,7 +5,7 @@ import { formatToMessage, deleteByHistoryId, updateHistory -} from "~/libs/db" +} from "@/db" import { Empty, Skeleton } from "antd" import { useMessageOption } from "~/hooks/useMessageOption" import { PencilIcon, Trash2 } from "lucide-react" diff --git a/src/libs/db.ts b/src/db/index.ts similarity index 100% rename from src/libs/db.ts rename to src/db/index.ts diff --git a/src/db/knowledge.ts b/src/db/knowledge.ts new file mode 100644 index 0000000..b853ead --- /dev/null +++ b/src/db/knowledge.ts @@ -0,0 +1,192 @@ +import { deleteVector, deleteVectorByFileId } from "./vector" + +export type Source = { + source_id: string + type: string + filename?: string + content: string +} + +export type Knowledge = { + id: string + db_type: string + title: string + status: string + embedding_model: string + source: Source[] + knownledge: any + createdAt: number +} +export const generateID = () => { + return "pa_knowledge_xxxx-xxxx-xxx-xxxx".replace(/[x]/g, () => { + const r = Math.floor(Math.random() * 16) + return r.toString(16) + }) +} +export class PageAssistKnowledge { + db: chrome.storage.StorageArea + + constructor() { + this.db = chrome.storage.local + } + + getAll = async (): Promise => { + return new Promise((resolve, reject) => { + this.db.get(null, (result) => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + const data = Object.keys(result).map((key) => result[key]) + resolve(data) + } + }) + }) + } + + getById = async (id: string): Promise => { + return new Promise((resolve, reject) => { + this.db.get(id, (result) => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + resolve(result[id]) + } + }) + }) + } + + create = async (knowledge: Knowledge): Promise => { + return new Promise((resolve, reject) => { + this.db.set({ [knowledge.id]: knowledge }, () => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + resolve() + } + }) + }) + } + + update = async (knowledge: Knowledge): Promise => { + return new Promise((resolve, reject) => { + this.db.set({ [knowledge.id]: knowledge }, () => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + resolve() + } + }) + }) + } + + delete = async (id: string): Promise => { + return new Promise((resolve, reject) => { + this.db.remove(id, () => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + resolve() + } + }) + }) + } + + deleteSource = async (id: string, source_id: string): Promise => { + return new Promise((resolve, reject) => { + this.db.get(id, (result) => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + const data = result[id] as Knowledge + data.source = data.source.filter((s) => s.source_id !== source_id) + this.db.set({ [id]: data }, () => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + resolve() + } + }) + } + }) + }) + } +} + +export const createKnowledge = async ({ + source, + title, + embedding_model +}: { + title: string + source: Source[] + embedding_model: string +}) => { + const db = new PageAssistKnowledge() + const id = generateID() + const knowledge: Knowledge = { + id, + title, + db_type: "knowledge", + source, + status: "pending", + knownledge: {}, + embedding_model, + createdAt: Date.now() + } + await db.create(knowledge) + return knowledge +} + +export const getKnowledgeById = async (id: string) => { + const db = new PageAssistKnowledge() + return db.getById(id) +} + +export const updateKnowledgeStatus = async (id: string, status: string) => { + const db = new PageAssistKnowledge() + const knowledge = await db.getById(id) + await db.update({ + ...knowledge, + status + }) +} + +export const getAllKnowledge = async (status?: string) => { + const db = new PageAssistKnowledge() + const data = await db.getAll() + + if (status) { + return data + .filter((d) => d.db_type === "knowledge") + .filter((d) => d.status === status) + .map((d) => { + d.source.forEach((s) => { + delete s.content + }) + return d + }) + .sort((a, b) => b.createdAt - a.createdAt) + } + + return data + .filter((d) => d.db_type === "knowledge") + .map((d) => { + d.source.forEach((s) => { + delete s.content + }) + return d + }) + .sort((a, b) => b.createdAt - a.createdAt) +} + +export const deleteKnowledge = async (id: string) => { + const db = new PageAssistKnowledge() + await db.delete(id) + await deleteVector(`vector:${id}`) +} + +export const deleteSource = async (id: string, source_id: string) => { + const db = new PageAssistKnowledge() + await db.deleteSource(id, source_id) + await deleteVectorByFileId(`vector:${id}`, source_id) +} diff --git a/src/db/vector.ts b/src/db/vector.ts new file mode 100644 index 0000000..0feb3b7 --- /dev/null +++ b/src/db/vector.ts @@ -0,0 +1,131 @@ +interface PageAssistVector { + file_id: string + content: string + embedding: number[] + metadata: Record +} + +export type VectorData = { + id: string + vectors: PageAssistVector[] +} + +export class PageAssistVectorDb { + db: chrome.storage.StorageArea + + constructor() { + this.db = chrome.storage.local + } + + insertVector = async ( + id: string, + vector: PageAssistVector[] + ): Promise => { + return new Promise((resolve, reject) => { + this.db.get(id, (result) => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + const data = result[id] as VectorData + if (!data) { + console.log("Creating new vector") + this.db.set({ [id]: { id, vectors: [vector] } }, () => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + resolve() + } + }) + } else { + console.log("Concatenating vectors") + this.db.set( + { + [id]: { + ...data, + vectors: data.vectors.concat(vector) + } + }, + () => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + resolve() + } + } + ) + } + } + }) + }) + } + + deleteVector = async (id: string): Promise => { + return new Promise((resolve, reject) => { + this.db.remove(id, () => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + resolve() + } + }) + }) + } + + deleteVectorByFileId = async (id: string, file_id: string): Promise => { + return new Promise((resolve, reject) => { + this.db.get(id, (result) => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + const data = result[id] as VectorData + data.vectors = data.vectors.filter((v) => v.file_id !== file_id) + this.db.set({ [id]: data }, () => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + resolve() + } + }) + } + }) + }) + } + + getVector = async (id: string): Promise => { + return new Promise((resolve, reject) => { + this.db.get(id, (result) => { + if (chrome.runtime.lastError) { + reject(chrome.runtime.lastError) + } else { + resolve(result[id] as VectorData) + } + }) + }) + } +} + +export const insertVector = async ( + id: string, + vector: PageAssistVector[] +): Promise => { + const db = new PageAssistVectorDb() + return db.insertVector(id, vector) +} + +export const getVector = async (id: string): Promise => { + const db = new PageAssistVectorDb() + return db.getVector(id) +} + +export const deleteVector = async (id: string): Promise => { + const db = new PageAssistVectorDb() + return db.deleteVector(id) +} + +export const deleteVectorByFileId = async ( + id: string, + file_id: string +): Promise => { + const db = new PageAssistVectorDb() + return db.deleteVectorByFileId(id, file_id) +} diff --git a/src/entries/background.ts b/src/entries/background.ts index d4f075b..5767b7c 100644 --- a/src/entries/background.ts +++ b/src/entries/background.ts @@ -1,4 +1,4 @@ - +import { processKnowledge } from "@/libs/process-knowledge" import { getOllamaURL, isOllamaRunning } from "../services/ollama" const progressHuman = (completed: number, total: number) => { return ((completed / total) * 100).toFixed(0) + "%" @@ -78,13 +78,16 @@ export default defineBackground({ main() { chrome.runtime.onMessage.addListener(async (message) => { if (message.type === "sidepanel") { - chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => { - const tab = tabs[0] - chrome.sidePanel.open({ - // tabId: tab.id!, - windowId: tab.windowId!, - }) - }) + chrome.tabs.query( + { active: true, currentWindow: true }, + async (tabs) => { + const tab = tabs[0] + chrome.sidePanel.open({ + // tabId: tab.id!, + windowId: tab.windowId! + }) + } + ) } else if (message.type === "pull_model") { const ollamaURL = await getOllamaURL() @@ -93,8 +96,7 @@ export default defineBackground({ if (!isRunning) { chrome.action.setBadgeText({ text: "E" }) chrome.action.setBadgeBackgroundColor({ color: "#FF0000" }) - chrome.action.setTitle({ title: "Ollama is not running" - }) + chrome.action.setTitle({ title: "Ollama is not running" }) setTimeout(() => { clearBadge() }, 5000) @@ -111,12 +113,15 @@ export default defineBackground({ chrome.commands.onCommand.addListener((command) => { switch (command) { case "execute_side_panel": - chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => { - const tab = tabs[0] - chrome.sidePanel.open({ - windowId: tab.windowId! - }) - }) + chrome.tabs.query( + { active: true, currentWindow: true }, + async (tabs) => { + const tab = tabs[0] + chrome.sidePanel.open({ + windowId: tab.windowId! + }) + } + ) break default: break @@ -131,14 +136,17 @@ export default defineBackground({ chrome.contextMenus.onClicked.addListener((info, tab) => { if (info.menuItemId === "open-side-panel-pa") { - chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => { - const tab = tabs[0] - await chrome.sidePanel.open({ - windowId: tab.windowId!, - }) - }) + chrome.tabs.query( + { active: true, currentWindow: true }, + async (tabs) => { + const tab = tabs[0] + await chrome.sidePanel.open({ + windowId: tab.windowId! + }) + } + ) } }) }, persistent: true -}) \ No newline at end of file +}) diff --git a/src/hooks/chat-helper/index.ts b/src/hooks/chat-helper/index.ts index 51d320c..15562ec 100644 --- a/src/hooks/chat-helper/index.ts +++ b/src/hooks/chat-helper/index.ts @@ -1,4 +1,4 @@ -import { saveHistory, saveMessage } from "@/libs/db" +import { saveHistory, saveMessage } from "@/db" import { ChatHistory } from "@/store/option" export const saveMessageOnError = async ({ diff --git a/src/hooks/useMessageOption.tsx b/src/hooks/useMessageOption.tsx index f0b0d63..880757e 100644 --- a/src/hooks/useMessageOption.tsx +++ b/src/hooks/useMessageOption.tsx @@ -15,7 +15,7 @@ import { getPromptById, removeMessageUsingHistoryId, updateMessageByIndex -} from "~/libs/db" +} from "@/db" import { useNavigate } from "react-router-dom" import { notification } from "antd" import { getSystemPromptForWeb } from "~/web/web" diff --git a/src/i18n/lang/en.ts b/src/i18n/lang/en.ts index 182fc03..dd5a801 100644 --- a/src/i18n/lang/en.ts +++ b/src/i18n/lang/en.ts @@ -3,12 +3,13 @@ import playground from "@/assets/locale/en/playground.json"; import common from "@/assets/locale/en/common.json"; import sidepanel from "@/assets/locale/en/sidepanel.json"; import settings from "@/assets/locale/en/settings.json"; - +import knownledge from "@/assets/locale/en/knownledge.json"; export const en = { option, playground, common, sidepanel, - settings + settings, + knownledge } \ No newline at end of file diff --git a/src/i18n/lang/ja.ts b/src/i18n/lang/ja.ts index 2026585..37095a3 100644 --- a/src/i18n/lang/ja.ts +++ b/src/i18n/lang/ja.ts @@ -3,6 +3,7 @@ import playground from "@/assets/locale/ja-JP/playground.json"; import common from "@/assets/locale/ja-JP/common.json"; import sidepanel from "@/assets/locale/ja-JP/sidepanel.json"; import settings from "@/assets/locale/ja-JP/settings.json"; +import knownledge from "@/assets/locale/ja-JP/knownledge.json"; export const ja = { @@ -10,5 +11,6 @@ export const ja = { playground, common, sidepanel, - settings + settings, + knownledge } \ No newline at end of file diff --git a/src/i18n/lang/ml.ts b/src/i18n/lang/ml.ts index 315e9c4..1cb8e7d 100644 --- a/src/i18n/lang/ml.ts +++ b/src/i18n/lang/ml.ts @@ -3,12 +3,13 @@ import playground from "@/assets/locale/ml/playground.json"; import common from "@/assets/locale/ml/common.json"; import sidepanel from "@/assets/locale/ml/sidepanel.json"; import settings from "@/assets/locale/ml/settings.json"; - +import knownledge from "@/assets/locale/ml/knownledge.json"; export const ml = { option, playground, common, sidepanel, - settings + settings, + knownledge } \ No newline at end of file diff --git a/src/i18n/lang/zh.ts b/src/i18n/lang/zh.ts index 0b816dc..2d55196 100644 --- a/src/i18n/lang/zh.ts +++ b/src/i18n/lang/zh.ts @@ -3,6 +3,7 @@ import playground from "@/assets/locale/zh/playground.json"; import common from "@/assets/locale/zh/common.json"; import sidepanel from "@/assets/locale/zh/sidepanel.json"; import settings from "@/assets/locale/zh/settings.json"; +import knownledge from "@/assets/locale/zh/knownledge.json"; export const zh = { @@ -10,5 +11,6 @@ export const zh = { playground, common, sidepanel, - settings + settings, + knownledge } \ No newline at end of file diff --git a/src/libs/PageAssistVectorStore.ts b/src/libs/PageAssistVectorStore.ts new file mode 100644 index 0000000..f3ae7b4 --- /dev/null +++ b/src/libs/PageAssistVectorStore.ts @@ -0,0 +1,201 @@ +import { similarity as ml_distance_similarity } from "ml-distance" +import { VectorStore } from "@langchain/core/vectorstores" +import type { EmbeddingsInterface } from "@langchain/core/embeddings" +import { Document } from "@langchain/core/documents" +import { getVector, insertVector } from "@/db/vector" + +/** + * Interface representing a vector in memory. It includes the content + * (text), the corresponding embedding (vector), and any associated + * metadata. + */ +interface PageAssistVector { + content: string + embedding: number[] + metadata: Record +} + +/** + * Interface for the arguments that can be passed to the + * `MemoryVectorStore` constructor. It includes an optional `similarity` + * function. + */ +export interface MemoryVectorStoreArgs { + knownledge_id: string + file_id?: string + similarity?: typeof ml_distance_similarity.cosine +} + +/** + * Class that extends `VectorStore` to store vectors in memory. Provides + * methods for adding documents, performing similarity searches, and + * creating instances from texts, documents, or an existing index. + */ +export class PageAssistVectorStore extends VectorStore { + declare FilterType: (doc: Document) => boolean + + knownledge_id: string + + file_id?: string + + // memoryVectors: PageAssistVector[] = [] + + similarity: typeof ml_distance_similarity.cosine + + _vectorstoreType(): string { + return "memory" + } + + constructor(embeddings: EmbeddingsInterface, args: MemoryVectorStoreArgs) { + super(embeddings, args) + + this.similarity = args?.similarity ?? ml_distance_similarity.cosine + + this.knownledge_id = args?.knownledge_id! + + this.file_id = args?.file_id + } + + /** + * Method to add documents to the memory vector store. It extracts the + * text from each document, generates embeddings for them, and adds the + * resulting vectors to the store. + * @param documents Array of `Document` instances to be added to the store. + * @returns Promise that resolves when all documents have been added. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent) + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ) + } + + /** + * Method to add vectors to the memory vector store. It creates + * `PageAssistVector` instances for each vector and document pair and adds + * them to the store. + * @param vectors Array of vectors to be added to the store. + * @param documents Array of `Document` instances corresponding to the vectors. + * @returns Promise that resolves when all vectors have been added. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + const memoryVectors = vectors.map((embedding, idx) => ({ + content: documents[idx].pageContent, + embedding, + metadata: documents[idx].metadata, + file_id: this.file_id + })) + console.log(`vector:${this.knownledge_id}`) + await insertVector(`vector:${this.knownledge_id}`, memoryVectors) + } + + /** + * Method to perform a similarity search in the memory vector store. It + * calculates the similarity between the query vector and each vector in + * the store, sorts the results by similarity, and returns the top `k` + * results along with their scores. + * @param query Query vector to compare against the vectors in the store. + * @param k Number of top results to return. + * @param filter Optional filter function to apply to the vectors before performing the search. + * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[Document, number][]> { + const filterFunction = (memoryVector: PageAssistVector) => { + if (!filter) { + return true + } + + const doc = new Document({ + metadata: memoryVector.metadata, + pageContent: memoryVector.content + }) + return filter(doc) + } + const pgVector = await getVector(`vector:${this.knownledge_id}`) + const filteredMemoryVectors = pgVector.vectors.filter(filterFunction) + const searches = filteredMemoryVectors + .map((vector, index) => ({ + similarity: this.similarity(query, vector.embedding), + index + })) + .sort((a, b) => (a.similarity > b.similarity ? -1 : 0)) + .slice(0, k) + + const result: [Document, number][] = searches.map((search) => [ + new Document({ + metadata: filteredMemoryVectors[search.index].metadata, + pageContent: filteredMemoryVectors[search.index].content + }), + search.similarity + ]) + + return result + } + + /** + * Static method to create a `MemoryVectorStore` instance from an array of + * texts. It creates a `Document` for each text and metadata pair, and + * adds them to the store. + * @param texts Array of texts to be added to the store. + * @param metadatas Array or single object of metadata corresponding to the texts. + * @param embeddings `Embeddings` instance used to generate embeddings for the texts. + * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance. + * @returns Promise that resolves with a new `MemoryVectorStore` instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: EmbeddingsInterface, + dbConfig?: MemoryVectorStoreArgs + ): Promise { + const docs: Document[] = [] + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas + const newDoc = new Document({ + pageContent: texts[i], + metadata + }) + docs.push(newDoc) + } + return PageAssistVectorStore.fromDocuments(docs, embeddings, dbConfig) + } + + /** + * Static method to create a `MemoryVectorStore` instance from an array of + * `Document` instances. It adds the documents to the store. + * @param docs Array of `Document` instances to be added to the store. + * @param embeddings `Embeddings` instance used to generate embeddings for the documents. + * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance. + * @returns Promise that resolves with a new `MemoryVectorStore` instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: EmbeddingsInterface, + dbConfig?: MemoryVectorStoreArgs + ): Promise { + const instance = new this(embeddings, dbConfig) + await instance.addDocuments(docs) + return instance + } + + /** + * Static method to create a `MemoryVectorStore` instance from an existing + * index. It creates a new `MemoryVectorStore` instance without adding any + * documents or vectors. + * @param embeddings `Embeddings` instance used to generate embeddings for the documents. + * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance. + * @returns Promise that resolves with a new `MemoryVectorStore` instance. + */ + static async fromExistingIndex( + embeddings: EmbeddingsInterface, + dbConfig?: MemoryVectorStoreArgs + ): Promise { + const instance = new this(embeddings, dbConfig) + return instance + } +} diff --git a/src/libs/get-html.ts b/src/libs/get-html.ts index 465e480..bed1f43 100644 --- a/src/libs/get-html.ts +++ b/src/libs/get-html.ts @@ -1,28 +1,4 @@ -import { pdfDist } from "./pdfjs" - -export const getPdf = async (data: ArrayBuffer) => { - const pdf = pdfDist.getDocument({ - data, - useWorkerFetch: false, - isEvalSupported: false, - useSystemFonts: true, - }); - - pdf.onPassword = (callback: any) => { - const password = prompt("Enter the password: ") - if (!password) { - throw new Error("Password required to open the PDF."); - } - callback(password); - }; - - - const pdfDocument = await pdf.promise; - - - return pdfDocument - -} +import { getPdf } from "./pdf" const _getHtml = async () => { const url = window.location.href diff --git a/src/libs/pdf.ts b/src/libs/pdf.ts new file mode 100644 index 0000000..d1eede4 --- /dev/null +++ b/src/libs/pdf.ts @@ -0,0 +1,29 @@ +import { pdfDist } from "./pdfjs" + +export const getPdf = async (data: ArrayBuffer) => { + const pdf = pdfDist.getDocument({ + data, + useWorkerFetch: false, + isEvalSupported: false, + useSystemFonts: true + }) + + pdf.onPassword = (callback: any) => { + const password = prompt("Enter the password: ") + if (!password) { + throw new Error("Password required to open the PDF.") + } + callback(password) + } + + const pdfDocument = await pdf.promise + + return pdfDocument +} + +export const processPdf = async (base64: string) => { + const res = await fetch(base64) + const data = await res.arrayBuffer() + const pdf = await getPdf(data) + return pdf +} diff --git a/src/libs/process-knowledge.ts b/src/libs/process-knowledge.ts new file mode 100644 index 0000000..d98cf17 --- /dev/null +++ b/src/libs/process-knowledge.ts @@ -0,0 +1,55 @@ +import { getKnowledgeById, updateKnowledgeStatus } from "@/db/knowledge" +import { PageAssistPDFUrlLoader } from "@/loader/pdf-url" +import { + defaultEmbeddingChunkOverlap, + defaultEmbeddingChunkSize +} from "@/services/ollama" +import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama" +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter" +import { PageAssistVectorStore } from "./PageAssistVectorStore" + +export const processKnowledge = async (msg: any, id: string): Promise => { + console.log(`Processing knowledge with id: ${id}`) + try { + const knowledge = await getKnowledgeById(id) + + if (!knowledge) { + console.error(`Knowledge with id ${id} not found`) + return + } + + await updateKnowledgeStatus(id, "processing") + + const ollamaEmbedding = new OllamaEmbeddings({ + model: knowledge.embedding_model + }) + const chunkSize = await defaultEmbeddingChunkSize() + const chunkOverlap = await defaultEmbeddingChunkOverlap() + const textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize, + chunkOverlap + }) + + for (const doc of knowledge.source) { + if (doc.type === "pdf" || doc.type === "application/pdf") { + const loader = new PageAssistPDFUrlLoader({ + name: doc.filename, + url: doc.content + }) + let docs = await loader.load() + const chunks = await textSplitter.splitDocuments(docs) + await PageAssistVectorStore.fromDocuments(chunks, ollamaEmbedding, { + knownledge_id: knowledge.id, + file_id: doc.source_id + }) + } + } + + await updateKnowledgeStatus(id, "finished") + } catch (error) { + console.error(`Error processing knowledge with id: ${id}`, error) + await updateKnowledgeStatus(id, "failed") + } finally { + console.log(`Finished processing knowledge with id: ${id}`) + } +} diff --git a/src/loader/pdf-url.ts b/src/loader/pdf-url.ts new file mode 100644 index 0000000..3124085 --- /dev/null +++ b/src/loader/pdf-url.ts @@ -0,0 +1,49 @@ +import { BaseDocumentLoader } from "langchain/document_loaders/base" +import { Document } from "@langchain/core/documents" +import { processPdf } from "@/libs/pdf" +export interface WebLoaderParams { + url: string + name: string +} + +export class PageAssistPDFUrlLoader + extends BaseDocumentLoader + implements WebLoaderParams +{ + pdf: { content: string; page: number }[] + url: string + name: string + + constructor({ url, name }: WebLoaderParams) { + super() + this.url = url + this.name = name + } + + async load(): Promise>[]> { + const documents: Document[] = [] + + const data = await processPdf(this.url) + + for (let i = 1; i <= data.numPages; i += 1) { + const page = await data.getPage(i) + const content = await page.getTextContent() + + if (content?.items.length === 0) { + continue + } + + const text = content?.items + .map((item: any) => item.str) + .join("\n") + .replace(/\x00/g, "") + .trim() + documents.push({ + pageContent: text, + metadata: { source: this.name, page: i } + }) + } + + return documents + } +} diff --git a/src/loader/pdf.ts b/src/loader/pdf.ts index 097460b..d17fd79 100644 --- a/src/loader/pdf.ts +++ b/src/loader/pdf.ts @@ -1,37 +1,36 @@ import { BaseDocumentLoader } from "langchain/document_loaders/base" import { Document } from "@langchain/core/documents" export interface WebLoaderParams { - pdf: { content: string, page: number }[] - url: string + pdf: { content: string; page: number }[] + url: string } export class PageAssistPDFLoader - extends BaseDocumentLoader - implements WebLoaderParams { - pdf: { content: string, page: number }[] - url: string + extends BaseDocumentLoader + implements WebLoaderParams +{ + pdf: { content: string; page: number }[] + url: string - constructor({ pdf, url }: WebLoaderParams) { - super() - this.pdf = pdf - this.url = url + constructor({ pdf, url }: WebLoaderParams) { + super() + this.pdf = pdf + this.url = url + } + + async load(): Promise>[]> { + const documents: Document[] = [] + + for (const page of this.pdf) { + const metadata = { source: this.url, page: page.page } + documents.push(new Document({ pageContent: page.content, metadata })) } - async load(): Promise>[]> { - const documents: Document[] = []; - - for (const page of this.pdf) { - const metadata = { source: this.url, page: page.page } - documents.push(new Document({ pageContent: page.content, metadata })) - } - - return [ - new Document({ - pageContent: documents.map((doc) => doc.pageContent).join("\n\n"), - metadata: documents.map((doc) => doc.metadata), - }), - ]; - - - } + return [ + new Document({ + pageContent: documents.map((doc) => doc.pageContent).join("\n\n"), + metadata: documents.map((doc) => doc.metadata) + }) + ] + } } diff --git a/src/queue/index.ts b/src/queue/index.ts new file mode 100644 index 0000000..f35a382 --- /dev/null +++ b/src/queue/index.ts @@ -0,0 +1,6 @@ +import { processKnowledge } from "@/libs/process-knowledge" +import PubSub from "pubsub-js" + +export const KNOWLEDGE_QUEUE = Symbol("queue") + +PubSub.subscribe(KNOWLEDGE_QUEUE, processKnowledge) diff --git a/src/routes/option-settings-knowledge.tsx b/src/routes/option-settings-knowledge.tsx index 0d8c573..aedddd3 100644 --- a/src/routes/option-settings-knowledge.tsx +++ b/src/routes/option-settings-knowledge.tsx @@ -1,11 +1,12 @@ import { SettingsLayout } from "~/components/Layouts/SettingsOptionLayout" import OptionLayout from "~/components/Layouts/Layout" +import { KnowledgeSettings } from "@/components/Option/Knowledge" export const OptionKnowledgeBase = () => { return ( - hey + ) diff --git a/src/utils/to-source.ts b/src/utils/to-source.ts new file mode 100644 index 0000000..ecc5454 --- /dev/null +++ b/src/utils/to-source.ts @@ -0,0 +1,32 @@ +import { Source } from "@/db/knowledge" +import { UploadFile } from "antd" + +export const toBase64 = (file: File | Blob): Promise => { + return new Promise((resolve, reject) => { + const reader = new FileReader() + reader.readAsDataURL(file) + reader.onload = () => resolve(reader.result as string) + reader.onerror = (error) => reject(error) + }) +} + +export const toArrayBufferFromBase64 = async (base64: string) => { + const res = await fetch(base64) + const blob = await res.blob() + return await blob.arrayBuffer() +} + +export const generateSourceId = () => { + return "XXXXXXXX-XXXX-4XXX-YXXX-XXXXXXXXXXXX".replace(/[XY]/g, (c) => { + const r = (Math.random() * 16) | 0 + const v = c === "X" ? r : (r & 0x3) | 0x8 + return v.toString(16) + }) +} + +export const convertToSource = async (file: UploadFile): Promise => { + let type = file.type + let filename = file.name + const content = await toBase64(file.originFileObj) + return { content, type, filename, source_id: generateSourceId() } +} diff --git a/wxt.config.ts b/wxt.config.ts index 05526d4..954dfb9 100644 --- a/wxt.config.ts +++ b/wxt.config.ts @@ -24,7 +24,7 @@ export default defineConfig({ srcDir: "src", outDir: "build", manifest: { - version: "1.1.1", + version: "1.1.2", name: '__MSG_extName__', description: '__MSG_extDescription__', default_locale: 'en',