Refactor useMessage hook and remove unused code
This commit is contained in:
parent
2381588e72
commit
3904a74701
@ -1,89 +1,22 @@
|
|||||||
import React from "react"
|
import React from "react"
|
||||||
import { cleanUrl } from "~/libs/clean-url"
|
import { cleanUrl } from "~/libs/clean-url"
|
||||||
import {
|
import {
|
||||||
defaultEmbeddingChunkOverlap,
|
|
||||||
defaultEmbeddingChunkSize,
|
|
||||||
defaultEmbeddingModelForRag,
|
defaultEmbeddingModelForRag,
|
||||||
getOllamaURL,
|
getOllamaURL,
|
||||||
promptForRag,
|
promptForRag,
|
||||||
systemPromptForNonRag
|
systemPromptForNonRag
|
||||||
} from "~/services/ollama"
|
} from "~/services/ollama"
|
||||||
import { useStoreMessage, type ChatHistory, type Message } from "~/store"
|
import { useStoreMessage, type Message } from "~/store"
|
||||||
import { ChatOllama } from "@langchain/community/chat_models/ollama"
|
import { ChatOllama } from "@langchain/community/chat_models/ollama"
|
||||||
import {
|
import { HumanMessage, SystemMessage } from "@langchain/core/messages"
|
||||||
HumanMessage,
|
|
||||||
AIMessage,
|
|
||||||
type MessageContent,
|
|
||||||
SystemMessage
|
|
||||||
} from "@langchain/core/messages"
|
|
||||||
import { getDataFromCurrentTab } from "~/libs/get-html"
|
import { getDataFromCurrentTab } from "~/libs/get-html"
|
||||||
import { PageAssistHtmlLoader } from "~/loader/html"
|
|
||||||
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
|
||||||
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama"
|
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama"
|
||||||
import {
|
import {
|
||||||
createChatWithWebsiteChain,
|
createChatWithWebsiteChain,
|
||||||
groupMessagesByConversation
|
groupMessagesByConversation
|
||||||
} from "~/chain/chat-with-website"
|
} from "~/chain/chat-with-website"
|
||||||
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
||||||
import { chromeRunTime } from "~/libs/runtime"
|
import { memoryEmbedding } from "@/utils/memory-embeddings"
|
||||||
export type BotResponse = {
|
|
||||||
bot: {
|
|
||||||
text: string
|
|
||||||
sourceDocuments: any[]
|
|
||||||
}
|
|
||||||
history: ChatHistory
|
|
||||||
history_id: string
|
|
||||||
}
|
|
||||||
|
|
||||||
const generateHistory = (
|
|
||||||
messages: {
|
|
||||||
role: "user" | "assistant" | "system"
|
|
||||||
content: string
|
|
||||||
image?: string
|
|
||||||
}[]
|
|
||||||
) => {
|
|
||||||
let history = []
|
|
||||||
for (const message of messages) {
|
|
||||||
if (message.role === "user") {
|
|
||||||
let content: MessageContent = [
|
|
||||||
{
|
|
||||||
type: "text",
|
|
||||||
text: message.content
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
if (message.image) {
|
|
||||||
content = [
|
|
||||||
{
|
|
||||||
type: "image_url",
|
|
||||||
image_url: message.image
|
|
||||||
},
|
|
||||||
{
|
|
||||||
type: "text",
|
|
||||||
text: message.content
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
history.push(
|
|
||||||
new HumanMessage({
|
|
||||||
content: content
|
|
||||||
})
|
|
||||||
)
|
|
||||||
} else if (message.role === "assistant") {
|
|
||||||
history.push(
|
|
||||||
new AIMessage({
|
|
||||||
content: [
|
|
||||||
{
|
|
||||||
type: "text",
|
|
||||||
text: message.content
|
|
||||||
}
|
|
||||||
]
|
|
||||||
})
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return history
|
|
||||||
}
|
|
||||||
|
|
||||||
export const useMessage = () => {
|
export const useMessage = () => {
|
||||||
const {
|
const {
|
||||||
@ -129,47 +62,18 @@ export const useMessage = () => {
|
|||||||
setStreaming(false)
|
setStreaming(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
const memoryEmbedding = async (
|
|
||||||
url: string,
|
|
||||||
html: string,
|
|
||||||
ollamaEmbedding: OllamaEmbeddings
|
|
||||||
) => {
|
|
||||||
const loader = new PageAssistHtmlLoader({
|
|
||||||
html,
|
|
||||||
url
|
|
||||||
})
|
|
||||||
const docs = await loader.load()
|
|
||||||
const chunkSize = await defaultEmbeddingChunkSize()
|
|
||||||
const chunkOverlap = await defaultEmbeddingChunkOverlap()
|
|
||||||
const textSplitter = new RecursiveCharacterTextSplitter({
|
|
||||||
chunkSize,
|
|
||||||
chunkOverlap
|
|
||||||
})
|
|
||||||
|
|
||||||
const chunks = await textSplitter.splitDocuments(docs)
|
|
||||||
|
|
||||||
const store = new MemoryVectorStore(ollamaEmbedding)
|
|
||||||
|
|
||||||
setIsEmbedding(true)
|
|
||||||
|
|
||||||
await store.addDocuments(chunks)
|
|
||||||
setKeepTrackOfEmbedding({
|
|
||||||
...keepTrackOfEmbedding,
|
|
||||||
[url]: store
|
|
||||||
})
|
|
||||||
setIsEmbedding(false)
|
|
||||||
|
|
||||||
return store
|
|
||||||
}
|
|
||||||
|
|
||||||
const chatWithWebsiteMode = async (message: string) => {
|
const chatWithWebsiteMode = async (message: string) => {
|
||||||
try {
|
try {
|
||||||
let isAlreadyExistEmbedding: MemoryVectorStore
|
let isAlreadyExistEmbedding: MemoryVectorStore
|
||||||
let embedURL: string, embedHTML: string
|
let embedURL: string, embedHTML: string, embedType: string
|
||||||
|
let embedPDF: { content: string; page: number }[] = []
|
||||||
|
|
||||||
if (messages.length === 0) {
|
if (messages.length === 0) {
|
||||||
const { content: html, url, type } = await getDataFromCurrentTab()
|
const { content: html, url, type, pdf } = await getDataFromCurrentTab()
|
||||||
embedHTML = html
|
embedHTML = html
|
||||||
embedURL = url
|
embedURL = url
|
||||||
|
embedType = type
|
||||||
|
embedPDF = pdf
|
||||||
setCurrentURL(url)
|
setCurrentURL(url)
|
||||||
isAlreadyExistEmbedding = keepTrackOfEmbedding[currentURL]
|
isAlreadyExistEmbedding = keepTrackOfEmbedding[currentURL]
|
||||||
} else {
|
} else {
|
||||||
@ -212,11 +116,16 @@ export const useMessage = () => {
|
|||||||
if (isAlreadyExistEmbedding) {
|
if (isAlreadyExistEmbedding) {
|
||||||
vectorstore = isAlreadyExistEmbedding
|
vectorstore = isAlreadyExistEmbedding
|
||||||
} else {
|
} else {
|
||||||
vectorstore = await memoryEmbedding(
|
vectorstore = await memoryEmbedding({
|
||||||
embedURL,
|
html: embedHTML,
|
||||||
embedHTML,
|
keepTrackOfEmbedding: keepTrackOfEmbedding,
|
||||||
ollamaEmbedding
|
ollamaEmbedding: ollamaEmbedding,
|
||||||
)
|
pdf: embedPDF,
|
||||||
|
setIsEmbedding: setIsEmbedding,
|
||||||
|
setKeepTrackOfEmbedding: setKeepTrackOfEmbedding,
|
||||||
|
type: embedType,
|
||||||
|
url: embedURL
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
const { ragPrompt: systemPrompt, ragQuestionPrompt: questionPrompt } =
|
const { ragPrompt: systemPrompt, ragQuestionPrompt: questionPrompt } =
|
||||||
|
@ -1,14 +1,11 @@
|
|||||||
import i18n from "i18next";
|
import i18n from "i18next";
|
||||||
import LanguageDetector from "i18next-browser-languagedetector";
|
|
||||||
import { initReactI18next } from "react-i18next";
|
import { initReactI18next } from "react-i18next";
|
||||||
import { en } from "./lang/en";
|
import { en } from "./lang/en";
|
||||||
import { ml } from "./lang/ml";
|
import { ml } from "./lang/ml";
|
||||||
|
|
||||||
i18n
|
i18n
|
||||||
.use(LanguageDetector)
|
|
||||||
.use(initReactI18next)
|
.use(initReactI18next)
|
||||||
.init({
|
.init({
|
||||||
debug: true,
|
|
||||||
resources: {
|
resources: {
|
||||||
en: en,
|
en: en,
|
||||||
ml: ml
|
ml: ml
|
||||||
|
@ -26,10 +26,7 @@ export const getPdf = async (data: ArrayBuffer) => {
|
|||||||
|
|
||||||
const _getHtml = async () => {
|
const _getHtml = async () => {
|
||||||
const url = window.location.href
|
const url = window.location.href
|
||||||
// check the content type
|
|
||||||
if (document.contentType === "application/pdf") {
|
if (document.contentType === "application/pdf") {
|
||||||
|
|
||||||
|
|
||||||
return { url, content: "", type: "pdf" }
|
return { url, content: "", type: "pdf" }
|
||||||
}
|
}
|
||||||
const html = Array.from(document.querySelectorAll("script")).reduce(
|
const html = Array.from(document.querySelectorAll("script")).reduce(
|
||||||
@ -40,6 +37,7 @@ const _getHtml = async () => {
|
|||||||
)
|
)
|
||||||
return { url, content: html, type: "html" }
|
return { url, content: html, type: "html" }
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getDataFromCurrentTab = async () => {
|
export const getDataFromCurrentTab = async () => {
|
||||||
const result = new Promise((resolve) => {
|
const result = new Promise((resolve) => {
|
||||||
chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => {
|
chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => {
|
||||||
@ -66,7 +64,10 @@ export const getDataFromCurrentTab = async () => {
|
|||||||
if (type === "pdf") {
|
if (type === "pdf") {
|
||||||
const res = await fetch(url)
|
const res = await fetch(url)
|
||||||
const data = await res.arrayBuffer()
|
const data = await res.arrayBuffer()
|
||||||
let pdfHtml: string[] = []
|
let pdfHtml: {
|
||||||
|
content: string
|
||||||
|
page: number
|
||||||
|
}[] = []
|
||||||
const pdf = await getPdf(data)
|
const pdf = await getPdf(data)
|
||||||
|
|
||||||
for (let i = 1; i <= pdf.numPages; i += 1) {
|
for (let i = 1; i <= pdf.numPages; i += 1) {
|
||||||
@ -79,18 +80,22 @@ export const getDataFromCurrentTab = async () => {
|
|||||||
|
|
||||||
const text = content?.items.map((item: any) => item.str).join("\n")
|
const text = content?.items.map((item: any) => item.str).join("\n")
|
||||||
.replace(/\x00/g, "").trim();
|
.replace(/\x00/g, "").trim();
|
||||||
pdfHtml.push(`<div class="pdf-page">${text}</div>`)
|
pdfHtml.push({
|
||||||
|
content: text,
|
||||||
|
page: i
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
content: pdfHtml.join(""),
|
content: "",
|
||||||
type: "html"
|
pdf: pdfHtml,
|
||||||
|
type: "pdf"
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return { url, content, type }
|
return { url, content, type, pdf: [] }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
37
src/loader/pdf.ts
Normal file
37
src/loader/pdf.ts
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import { BaseDocumentLoader } from "langchain/document_loaders/base"
|
||||||
|
import { Document } from "@langchain/core/documents"
|
||||||
|
export interface WebLoaderParams {
|
||||||
|
pdf: { content: string, page: number }[]
|
||||||
|
url: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export class PageAssistPDFLoader
|
||||||
|
extends BaseDocumentLoader
|
||||||
|
implements WebLoaderParams {
|
||||||
|
pdf: { content: string, page: number }[]
|
||||||
|
url: string
|
||||||
|
|
||||||
|
constructor({ pdf, url }: WebLoaderParams) {
|
||||||
|
super()
|
||||||
|
this.pdf = pdf
|
||||||
|
this.url = url
|
||||||
|
}
|
||||||
|
|
||||||
|
async load(): Promise<Document<Record<string, any>>[]> {
|
||||||
|
const documents: Document[] = [];
|
||||||
|
|
||||||
|
for (const page of this.pdf) {
|
||||||
|
const metadata = { source: this.url, page: page.page }
|
||||||
|
documents.push(new Document({ pageContent: page.content, metadata }))
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
new Document({
|
||||||
|
pageContent: documents.map((doc) => doc.pageContent).join("\n\n"),
|
||||||
|
metadata: documents.map((doc) => doc.metadata),
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
63
src/utils/memory-embeddings.ts
Normal file
63
src/utils/memory-embeddings.ts
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
import { PageAssistHtmlLoader } from "~/loader/html"
|
||||||
|
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
||||||
|
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
||||||
|
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama"
|
||||||
|
import { defaultEmbeddingChunkOverlap, defaultEmbeddingChunkSize } from "@/services/ollama"
|
||||||
|
import { PageAssistPDFLoader } from "@/loader/pdf"
|
||||||
|
|
||||||
|
|
||||||
|
export const getLoader = ({ html, pdf, type, url }: {
|
||||||
|
url: string,
|
||||||
|
html: string,
|
||||||
|
type: string,
|
||||||
|
pdf: { content: string, page: number }[]
|
||||||
|
}) => {
|
||||||
|
if (type === "pdf") {
|
||||||
|
return new PageAssistPDFLoader({
|
||||||
|
pdf,
|
||||||
|
url
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
return new PageAssistHtmlLoader({
|
||||||
|
html,
|
||||||
|
url
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const memoryEmbedding = async (
|
||||||
|
{ html,
|
||||||
|
keepTrackOfEmbedding, ollamaEmbedding, pdf, setIsEmbedding, setKeepTrackOfEmbedding, type, url }: {
|
||||||
|
url: string,
|
||||||
|
html: string,
|
||||||
|
type: string,
|
||||||
|
pdf: { content: string, page: number }[],
|
||||||
|
keepTrackOfEmbedding: Record<string, MemoryVectorStore>,
|
||||||
|
ollamaEmbedding: OllamaEmbeddings,
|
||||||
|
setIsEmbedding: (value: boolean) => void,
|
||||||
|
setKeepTrackOfEmbedding: (value: Record<string, MemoryVectorStore>) => void
|
||||||
|
}
|
||||||
|
) => {
|
||||||
|
setIsEmbedding(true)
|
||||||
|
|
||||||
|
const loader = getLoader({ html, pdf, type, url })
|
||||||
|
const docs = await loader.load()
|
||||||
|
const chunkSize = await defaultEmbeddingChunkSize()
|
||||||
|
const chunkOverlap = await defaultEmbeddingChunkOverlap()
|
||||||
|
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||||
|
chunkSize,
|
||||||
|
chunkOverlap
|
||||||
|
})
|
||||||
|
|
||||||
|
const chunks = await textSplitter.splitDocuments(docs)
|
||||||
|
|
||||||
|
const store = new MemoryVectorStore(ollamaEmbedding)
|
||||||
|
|
||||||
|
await store.addDocuments(chunks)
|
||||||
|
setKeepTrackOfEmbedding({
|
||||||
|
...keepTrackOfEmbedding,
|
||||||
|
[url]: store
|
||||||
|
})
|
||||||
|
setIsEmbedding(false)
|
||||||
|
return store
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user