Refactor useMessage hook and remove unused code
This commit is contained in:
parent
2381588e72
commit
3904a74701
@ -1,89 +1,22 @@
|
||||
import React from "react"
|
||||
import { cleanUrl } from "~/libs/clean-url"
|
||||
import {
|
||||
defaultEmbeddingChunkOverlap,
|
||||
defaultEmbeddingChunkSize,
|
||||
defaultEmbeddingModelForRag,
|
||||
getOllamaURL,
|
||||
promptForRag,
|
||||
systemPromptForNonRag
|
||||
} from "~/services/ollama"
|
||||
import { useStoreMessage, type ChatHistory, type Message } from "~/store"
|
||||
import { useStoreMessage, type Message } from "~/store"
|
||||
import { ChatOllama } from "@langchain/community/chat_models/ollama"
|
||||
import {
|
||||
HumanMessage,
|
||||
AIMessage,
|
||||
type MessageContent,
|
||||
SystemMessage
|
||||
} from "@langchain/core/messages"
|
||||
import { HumanMessage, SystemMessage } from "@langchain/core/messages"
|
||||
import { getDataFromCurrentTab } from "~/libs/get-html"
|
||||
import { PageAssistHtmlLoader } from "~/loader/html"
|
||||
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
||||
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama"
|
||||
import {
|
||||
createChatWithWebsiteChain,
|
||||
groupMessagesByConversation
|
||||
} from "~/chain/chat-with-website"
|
||||
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
||||
import { chromeRunTime } from "~/libs/runtime"
|
||||
export type BotResponse = {
|
||||
bot: {
|
||||
text: string
|
||||
sourceDocuments: any[]
|
||||
}
|
||||
history: ChatHistory
|
||||
history_id: string
|
||||
}
|
||||
|
||||
const generateHistory = (
|
||||
messages: {
|
||||
role: "user" | "assistant" | "system"
|
||||
content: string
|
||||
image?: string
|
||||
}[]
|
||||
) => {
|
||||
let history = []
|
||||
for (const message of messages) {
|
||||
if (message.role === "user") {
|
||||
let content: MessageContent = [
|
||||
{
|
||||
type: "text",
|
||||
text: message.content
|
||||
}
|
||||
]
|
||||
|
||||
if (message.image) {
|
||||
content = [
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: message.image
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: message.content
|
||||
}
|
||||
]
|
||||
}
|
||||
history.push(
|
||||
new HumanMessage({
|
||||
content: content
|
||||
})
|
||||
)
|
||||
} else if (message.role === "assistant") {
|
||||
history.push(
|
||||
new AIMessage({
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: message.content
|
||||
}
|
||||
]
|
||||
})
|
||||
)
|
||||
}
|
||||
}
|
||||
return history
|
||||
}
|
||||
import { memoryEmbedding } from "@/utils/memory-embeddings"
|
||||
|
||||
export const useMessage = () => {
|
||||
const {
|
||||
@ -129,47 +62,18 @@ export const useMessage = () => {
|
||||
setStreaming(false)
|
||||
}
|
||||
|
||||
const memoryEmbedding = async (
|
||||
url: string,
|
||||
html: string,
|
||||
ollamaEmbedding: OllamaEmbeddings
|
||||
) => {
|
||||
const loader = new PageAssistHtmlLoader({
|
||||
html,
|
||||
url
|
||||
})
|
||||
const docs = await loader.load()
|
||||
const chunkSize = await defaultEmbeddingChunkSize()
|
||||
const chunkOverlap = await defaultEmbeddingChunkOverlap()
|
||||
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize,
|
||||
chunkOverlap
|
||||
})
|
||||
|
||||
const chunks = await textSplitter.splitDocuments(docs)
|
||||
|
||||
const store = new MemoryVectorStore(ollamaEmbedding)
|
||||
|
||||
setIsEmbedding(true)
|
||||
|
||||
await store.addDocuments(chunks)
|
||||
setKeepTrackOfEmbedding({
|
||||
...keepTrackOfEmbedding,
|
||||
[url]: store
|
||||
})
|
||||
setIsEmbedding(false)
|
||||
|
||||
return store
|
||||
}
|
||||
|
||||
const chatWithWebsiteMode = async (message: string) => {
|
||||
try {
|
||||
let isAlreadyExistEmbedding: MemoryVectorStore
|
||||
let embedURL: string, embedHTML: string
|
||||
let embedURL: string, embedHTML: string, embedType: string
|
||||
let embedPDF: { content: string; page: number }[] = []
|
||||
|
||||
if (messages.length === 0) {
|
||||
const { content: html, url, type } = await getDataFromCurrentTab()
|
||||
const { content: html, url, type, pdf } = await getDataFromCurrentTab()
|
||||
embedHTML = html
|
||||
embedURL = url
|
||||
embedType = type
|
||||
embedPDF = pdf
|
||||
setCurrentURL(url)
|
||||
isAlreadyExistEmbedding = keepTrackOfEmbedding[currentURL]
|
||||
} else {
|
||||
@ -212,11 +116,16 @@ export const useMessage = () => {
|
||||
if (isAlreadyExistEmbedding) {
|
||||
vectorstore = isAlreadyExistEmbedding
|
||||
} else {
|
||||
vectorstore = await memoryEmbedding(
|
||||
embedURL,
|
||||
embedHTML,
|
||||
ollamaEmbedding
|
||||
)
|
||||
vectorstore = await memoryEmbedding({
|
||||
html: embedHTML,
|
||||
keepTrackOfEmbedding: keepTrackOfEmbedding,
|
||||
ollamaEmbedding: ollamaEmbedding,
|
||||
pdf: embedPDF,
|
||||
setIsEmbedding: setIsEmbedding,
|
||||
setKeepTrackOfEmbedding: setKeepTrackOfEmbedding,
|
||||
type: embedType,
|
||||
url: embedURL
|
||||
})
|
||||
}
|
||||
|
||||
const { ragPrompt: systemPrompt, ragQuestionPrompt: questionPrompt } =
|
||||
|
@ -1,14 +1,11 @@
|
||||
import i18n from "i18next";
|
||||
import LanguageDetector from "i18next-browser-languagedetector";
|
||||
import { initReactI18next } from "react-i18next";
|
||||
import { en } from "./lang/en";
|
||||
import { ml } from "./lang/ml";
|
||||
|
||||
i18n
|
||||
.use(LanguageDetector)
|
||||
.use(initReactI18next)
|
||||
.init({
|
||||
debug: true,
|
||||
resources: {
|
||||
en: en,
|
||||
ml: ml
|
||||
|
@ -26,10 +26,7 @@ export const getPdf = async (data: ArrayBuffer) => {
|
||||
|
||||
const _getHtml = async () => {
|
||||
const url = window.location.href
|
||||
// check the content type
|
||||
if (document.contentType === "application/pdf") {
|
||||
|
||||
|
||||
return { url, content: "", type: "pdf" }
|
||||
}
|
||||
const html = Array.from(document.querySelectorAll("script")).reduce(
|
||||
@ -40,6 +37,7 @@ const _getHtml = async () => {
|
||||
)
|
||||
return { url, content: html, type: "html" }
|
||||
}
|
||||
|
||||
export const getDataFromCurrentTab = async () => {
|
||||
const result = new Promise((resolve) => {
|
||||
chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => {
|
||||
@ -66,7 +64,10 @@ export const getDataFromCurrentTab = async () => {
|
||||
if (type === "pdf") {
|
||||
const res = await fetch(url)
|
||||
const data = await res.arrayBuffer()
|
||||
let pdfHtml: string[] = []
|
||||
let pdfHtml: {
|
||||
content: string
|
||||
page: number
|
||||
}[] = []
|
||||
const pdf = await getPdf(data)
|
||||
|
||||
for (let i = 1; i <= pdf.numPages; i += 1) {
|
||||
@ -79,18 +80,22 @@ export const getDataFromCurrentTab = async () => {
|
||||
|
||||
const text = content?.items.map((item: any) => item.str).join("\n")
|
||||
.replace(/\x00/g, "").trim();
|
||||
pdfHtml.push(`<div class="pdf-page">${text}</div>`)
|
||||
pdfHtml.push({
|
||||
content: text,
|
||||
page: i
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
return {
|
||||
url,
|
||||
content: pdfHtml.join(""),
|
||||
type: "html"
|
||||
content: "",
|
||||
pdf: pdfHtml,
|
||||
type: "pdf"
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return { url, content, type }
|
||||
return { url, content, type, pdf: [] }
|
||||
}
|
||||
|
||||
|
37
src/loader/pdf.ts
Normal file
37
src/loader/pdf.ts
Normal file
@ -0,0 +1,37 @@
|
||||
import { BaseDocumentLoader } from "langchain/document_loaders/base"
|
||||
import { Document } from "@langchain/core/documents"
|
||||
export interface WebLoaderParams {
|
||||
pdf: { content: string, page: number }[]
|
||||
url: string
|
||||
}
|
||||
|
||||
export class PageAssistPDFLoader
|
||||
extends BaseDocumentLoader
|
||||
implements WebLoaderParams {
|
||||
pdf: { content: string, page: number }[]
|
||||
url: string
|
||||
|
||||
constructor({ pdf, url }: WebLoaderParams) {
|
||||
super()
|
||||
this.pdf = pdf
|
||||
this.url = url
|
||||
}
|
||||
|
||||
async load(): Promise<Document<Record<string, any>>[]> {
|
||||
const documents: Document[] = [];
|
||||
|
||||
for (const page of this.pdf) {
|
||||
const metadata = { source: this.url, page: page.page }
|
||||
documents.push(new Document({ pageContent: page.content, metadata }))
|
||||
}
|
||||
|
||||
return [
|
||||
new Document({
|
||||
pageContent: documents.map((doc) => doc.pageContent).join("\n\n"),
|
||||
metadata: documents.map((doc) => doc.metadata),
|
||||
}),
|
||||
];
|
||||
|
||||
|
||||
}
|
||||
}
|
63
src/utils/memory-embeddings.ts
Normal file
63
src/utils/memory-embeddings.ts
Normal file
@ -0,0 +1,63 @@
|
||||
import { PageAssistHtmlLoader } from "~/loader/html"
|
||||
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
||||
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
||||
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama"
|
||||
import { defaultEmbeddingChunkOverlap, defaultEmbeddingChunkSize } from "@/services/ollama"
|
||||
import { PageAssistPDFLoader } from "@/loader/pdf"
|
||||
|
||||
|
||||
export const getLoader = ({ html, pdf, type, url }: {
|
||||
url: string,
|
||||
html: string,
|
||||
type: string,
|
||||
pdf: { content: string, page: number }[]
|
||||
}) => {
|
||||
if (type === "pdf") {
|
||||
return new PageAssistPDFLoader({
|
||||
pdf,
|
||||
url
|
||||
})
|
||||
} else {
|
||||
return new PageAssistHtmlLoader({
|
||||
html,
|
||||
url
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export const memoryEmbedding = async (
|
||||
{ html,
|
||||
keepTrackOfEmbedding, ollamaEmbedding, pdf, setIsEmbedding, setKeepTrackOfEmbedding, type, url }: {
|
||||
url: string,
|
||||
html: string,
|
||||
type: string,
|
||||
pdf: { content: string, page: number }[],
|
||||
keepTrackOfEmbedding: Record<string, MemoryVectorStore>,
|
||||
ollamaEmbedding: OllamaEmbeddings,
|
||||
setIsEmbedding: (value: boolean) => void,
|
||||
setKeepTrackOfEmbedding: (value: Record<string, MemoryVectorStore>) => void
|
||||
}
|
||||
) => {
|
||||
setIsEmbedding(true)
|
||||
|
||||
const loader = getLoader({ html, pdf, type, url })
|
||||
const docs = await loader.load()
|
||||
const chunkSize = await defaultEmbeddingChunkSize()
|
||||
const chunkOverlap = await defaultEmbeddingChunkOverlap()
|
||||
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize,
|
||||
chunkOverlap
|
||||
})
|
||||
|
||||
const chunks = await textSplitter.splitDocuments(docs)
|
||||
|
||||
const store = new MemoryVectorStore(ollamaEmbedding)
|
||||
|
||||
await store.addDocuments(chunks)
|
||||
setKeepTrackOfEmbedding({
|
||||
...keepTrackOfEmbedding,
|
||||
[url]: store
|
||||
})
|
||||
setIsEmbedding(false)
|
||||
return store
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user