Update dependencies and add YouTube transcript support
This commit is contained in:
parent
6d559eda2f
commit
7a72961562
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "pageassist",
|
"name": "pageassist",
|
||||||
"displayName": "Page Assist - A Web UI for Local AI Models",
|
"displayName": "Page Assist - A Web UI for Local AI Models",
|
||||||
"version": "1.0.7",
|
"version": "1.0.8",
|
||||||
"description": "Use your locally running AI models to assist you in your web browsing.",
|
"description": "Use your locally running AI models to assist you in your web browsing.",
|
||||||
"author": "n4ze3m",
|
"author": "n4ze3m",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
@ -26,7 +26,7 @@
|
|||||||
"dayjs": "^1.11.10",
|
"dayjs": "^1.11.10",
|
||||||
"html-to-text": "^9.0.5",
|
"html-to-text": "^9.0.5",
|
||||||
"langchain": "^0.1.9",
|
"langchain": "^0.1.9",
|
||||||
"lucide-react": "^0.340.0",
|
"lucide-react": "^0.350.0",
|
||||||
"plasmo": "0.84.1",
|
"plasmo": "0.84.1",
|
||||||
"property-information": "^6.4.1",
|
"property-information": "^6.4.1",
|
||||||
"react": "18.2.0",
|
"react": "18.2.0",
|
||||||
@ -38,6 +38,7 @@
|
|||||||
"rehype-mathjax": "4.0.3",
|
"rehype-mathjax": "4.0.3",
|
||||||
"remark-gfm": "3.0.1",
|
"remark-gfm": "3.0.1",
|
||||||
"remark-math": "5.1.1",
|
"remark-math": "5.1.1",
|
||||||
|
"yt-transcript": "^0.0.2",
|
||||||
"zustand": "^4.5.0"
|
"zustand": "^4.5.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
@ -38,7 +38,7 @@ export const EmptySidePanel = () => {
|
|||||||
}
|
}
|
||||||
}, [ollamaInfo])
|
}, [ollamaInfo])
|
||||||
|
|
||||||
const { setSelectedModel, selectedModel, chatMode, setChatMode } =
|
const { setSelectedModel, selectedModel, chatMode, setChatMode, } =
|
||||||
useMessage()
|
useMessage()
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -20,9 +20,12 @@ import { getHtmlOfCurrentTab } from "~libs/get-html"
|
|||||||
import { PageAssistHtmlLoader } from "~loader/html"
|
import { PageAssistHtmlLoader } from "~loader/html"
|
||||||
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
||||||
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama"
|
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama"
|
||||||
import { createChatWithWebsiteChain, groupMessagesByConversation } from "~chain/chat-with-website"
|
import {
|
||||||
|
createChatWithWebsiteChain,
|
||||||
|
groupMessagesByConversation
|
||||||
|
} from "~chain/chat-with-website"
|
||||||
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
||||||
|
import { chromeRunTime } from "~libs/runtime"
|
||||||
export type BotResponse = {
|
export type BotResponse = {
|
||||||
bot: {
|
bot: {
|
||||||
text: string
|
text: string
|
||||||
@ -134,11 +137,11 @@ export const useMessage = () => {
|
|||||||
url
|
url
|
||||||
})
|
})
|
||||||
const docs = await loader.load()
|
const docs = await loader.load()
|
||||||
const chunkSize = await defaultEmbeddingChunkSize();
|
const chunkSize = await defaultEmbeddingChunkSize()
|
||||||
const chunkOverlap = await defaultEmbeddingChunkOverlap();
|
const chunkOverlap = await defaultEmbeddingChunkOverlap()
|
||||||
const textSplitter = new RecursiveCharacterTextSplitter({
|
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||||
chunkSize,
|
chunkSize,
|
||||||
chunkOverlap,
|
chunkOverlap
|
||||||
})
|
})
|
||||||
|
|
||||||
const chunks = await textSplitter.splitDocuments(docs)
|
const chunks = await textSplitter.splitDocuments(docs)
|
||||||
@ -158,64 +161,65 @@ export const useMessage = () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const chatWithWebsiteMode = async (message: string) => {
|
const chatWithWebsiteMode = async (message: string) => {
|
||||||
const ollamaUrl = await getOllamaURL()
|
|
||||||
const { html, url } = await getHtmlOfCurrentTab()
|
|
||||||
const isAlreadyExistEmbedding = keepTrackOfEmbedding[url]
|
|
||||||
let newMessage: Message[] = [
|
|
||||||
...messages,
|
|
||||||
{
|
|
||||||
isBot: false,
|
|
||||||
name: "You",
|
|
||||||
message,
|
|
||||||
sources: []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
isBot: true,
|
|
||||||
name: selectedModel,
|
|
||||||
message: "▋",
|
|
||||||
sources: []
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
const appendingIndex = newMessage.length - 1
|
|
||||||
setMessages(newMessage)
|
|
||||||
const embeddingModle = await defaultEmbeddingModelForRag()
|
|
||||||
const ollamaEmbedding = new OllamaEmbeddings({
|
|
||||||
model: embeddingModle || selectedModel,
|
|
||||||
baseUrl: cleanUrl(ollamaUrl)
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
const ollamaChat = new ChatOllama({
|
|
||||||
model: selectedModel,
|
|
||||||
baseUrl: cleanUrl(ollamaUrl)
|
|
||||||
})
|
|
||||||
|
|
||||||
let vectorstore: MemoryVectorStore
|
|
||||||
|
|
||||||
if (isAlreadyExistEmbedding) {
|
|
||||||
vectorstore = isAlreadyExistEmbedding
|
|
||||||
} else {
|
|
||||||
vectorstore = await memoryEmbedding(url, html, ollamaEmbedding)
|
|
||||||
}
|
|
||||||
|
|
||||||
const { ragPrompt: systemPrompt, ragQuestionPrompt: questionPrompt } =
|
|
||||||
await promptForRag()
|
|
||||||
|
|
||||||
const sanitizedQuestion = message.trim().replaceAll("\n", " ")
|
|
||||||
|
|
||||||
const chain = createChatWithWebsiteChain({
|
|
||||||
llm: ollamaChat,
|
|
||||||
question_llm: ollamaChat,
|
|
||||||
question_template: questionPrompt,
|
|
||||||
response_template: systemPrompt,
|
|
||||||
retriever: vectorstore.asRetriever()
|
|
||||||
})
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
let isAlreadyExistEmbedding: MemoryVectorStore
|
||||||
|
const { html, url } = await getHtmlOfCurrentTab()
|
||||||
|
isAlreadyExistEmbedding = keepTrackOfEmbedding[url]
|
||||||
|
let newMessage: Message[] = [
|
||||||
|
...messages,
|
||||||
|
{
|
||||||
|
isBot: false,
|
||||||
|
name: "You",
|
||||||
|
message,
|
||||||
|
sources: []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
isBot: true,
|
||||||
|
name: selectedModel,
|
||||||
|
message: "▋",
|
||||||
|
sources: []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
const appendingIndex = newMessage.length - 1
|
||||||
|
setMessages(newMessage)
|
||||||
|
const ollamaUrl = await getOllamaURL()
|
||||||
|
const embeddingModle = await defaultEmbeddingModelForRag()
|
||||||
|
|
||||||
|
const ollamaEmbedding = new OllamaEmbeddings({
|
||||||
|
model: embeddingModle || selectedModel,
|
||||||
|
baseUrl: cleanUrl(ollamaUrl)
|
||||||
|
})
|
||||||
|
|
||||||
|
const ollamaChat = new ChatOllama({
|
||||||
|
model: selectedModel,
|
||||||
|
baseUrl: cleanUrl(ollamaUrl)
|
||||||
|
})
|
||||||
|
|
||||||
|
let vectorstore: MemoryVectorStore
|
||||||
|
|
||||||
|
if (isAlreadyExistEmbedding) {
|
||||||
|
vectorstore = isAlreadyExistEmbedding
|
||||||
|
} else {
|
||||||
|
vectorstore = await memoryEmbedding(url, html, ollamaEmbedding)
|
||||||
|
}
|
||||||
|
|
||||||
|
const { ragPrompt: systemPrompt, ragQuestionPrompt: questionPrompt } =
|
||||||
|
await promptForRag()
|
||||||
|
|
||||||
|
const sanitizedQuestion = message.trim().replaceAll("\n", " ")
|
||||||
|
|
||||||
|
const chain = createChatWithWebsiteChain({
|
||||||
|
llm: ollamaChat,
|
||||||
|
question_llm: ollamaChat,
|
||||||
|
question_template: questionPrompt,
|
||||||
|
response_template: systemPrompt,
|
||||||
|
retriever: vectorstore.asRetriever()
|
||||||
|
})
|
||||||
|
|
||||||
const chunks = await chain.stream({
|
const chunks = await chain.stream({
|
||||||
question: sanitizedQuestion,
|
question: sanitizedQuestion,
|
||||||
chat_history: groupMessagesByConversation(history),
|
chat_history: groupMessagesByConversation(history)
|
||||||
})
|
})
|
||||||
let count = 0
|
let count = 0
|
||||||
for await (const chunk of chunks) {
|
for await (const chunk of chunks) {
|
||||||
@ -258,7 +262,8 @@ export const useMessage = () => {
|
|||||||
{
|
{
|
||||||
isBot: true,
|
isBot: true,
|
||||||
name: selectedModel,
|
name: selectedModel,
|
||||||
message: `Something went wrong. Check out the following logs:
|
message: `Error in chat with website mode. Check out the following logs:
|
||||||
|
|
||||||
~~~
|
~~~
|
||||||
${e?.message}
|
${e?.message}
|
||||||
~~~
|
~~~
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
|
||||||
const _getHtml = () => {
|
const _getHtml = () => {
|
||||||
const url = window.location.href
|
const url = window.location.href
|
||||||
const html = Array.from(document.querySelectorAll("script")).reduce(
|
const html = Array.from(document.querySelectorAll("script")).reduce(
|
||||||
@ -29,3 +30,4 @@ export const getHtmlOfCurrentTab = async () => {
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,6 +2,20 @@ import { BaseDocumentLoader } from "langchain/document_loaders/base"
|
|||||||
import { Document } from "@langchain/core/documents"
|
import { Document } from "@langchain/core/documents"
|
||||||
import { compile } from "html-to-text"
|
import { compile } from "html-to-text"
|
||||||
import { chromeRunTime } from "~libs/runtime"
|
import { chromeRunTime } from "~libs/runtime"
|
||||||
|
import { YtTranscript } from "yt-transcript"
|
||||||
|
|
||||||
|
const YT_REGEX =
|
||||||
|
/(?:https?:\/\/)?(?:www\.)?(?:youtube\.com|youtu\.be)\/(?:watch\?v=)?([a-zA-Z0-9_-]+)/
|
||||||
|
|
||||||
|
const isYoutubeLink = (url: string) => {
|
||||||
|
return YT_REGEX.test(url)
|
||||||
|
}
|
||||||
|
|
||||||
|
const getTranscript = async (url: string) => {
|
||||||
|
const ytTranscript = new YtTranscript({ url })
|
||||||
|
return await ytTranscript.getTranscript()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
export interface WebLoaderParams {
|
export interface WebLoaderParams {
|
||||||
html: string
|
html: string
|
||||||
@ -21,6 +35,29 @@ export class PageAssistHtmlLoader
|
|||||||
}
|
}
|
||||||
|
|
||||||
async load(): Promise<Document<Record<string, any>>[]> {
|
async load(): Promise<Document<Record<string, any>>[]> {
|
||||||
|
if (isYoutubeLink(this.url)) {
|
||||||
|
const transcript = await getTranscript(this.url)
|
||||||
|
if (!transcript) {
|
||||||
|
throw new Error("Transcript not found for this video.")
|
||||||
|
}
|
||||||
|
|
||||||
|
let text = ""
|
||||||
|
|
||||||
|
transcript.forEach((item) => {
|
||||||
|
text += item.text + " "
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
metadata: {
|
||||||
|
source: this.url,
|
||||||
|
audio: { chunks: transcript }
|
||||||
|
},
|
||||||
|
pageContent: text
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
const htmlCompiler = compile({
|
const htmlCompiler = compile({
|
||||||
wordwrap: false
|
wordwrap: false
|
||||||
})
|
})
|
||||||
@ -30,6 +67,29 @@ export class PageAssistHtmlLoader
|
|||||||
}
|
}
|
||||||
|
|
||||||
async loadByURL(): Promise<Document<Record<string, any>>[]> {
|
async loadByURL(): Promise<Document<Record<string, any>>[]> {
|
||||||
|
if (isYoutubeLink(this.url)) {
|
||||||
|
const transcript = await getTranscript(this.url)
|
||||||
|
if (!transcript) {
|
||||||
|
throw new Error("Transcript not found for this video.")
|
||||||
|
}
|
||||||
|
|
||||||
|
let text = ""
|
||||||
|
|
||||||
|
transcript.forEach((item) => {
|
||||||
|
text += item.text + " "
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
metadata: {
|
||||||
|
source: this.url,
|
||||||
|
audio: { chunks: transcript }
|
||||||
|
},
|
||||||
|
pageContent: text
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
await chromeRunTime(this.url)
|
await chromeRunTime(this.url)
|
||||||
const fetchHTML = await fetch(this.url)
|
const fetchHTML = await fetch(this.url)
|
||||||
const html = await fetchHTML.text()
|
const html = await fetchHTML.text()
|
||||||
|
23
yarn.lock
23
yarn.lock
@ -4967,10 +4967,10 @@ lru-cache@^6.0.0:
|
|||||||
resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-10.2.0.tgz#0bd445ca57363465900f4d1f9bd8db343a4d95c3"
|
resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-10.2.0.tgz#0bd445ca57363465900f4d1f9bd8db343a4d95c3"
|
||||||
integrity sha512-2bIM8x+VAf6JT4bKAljS1qUWgMsqZRPGJS6FSahIMPVvctcNhyVp7AJu7quxOW9jwkryBReKZY5tY5JYv2n/7Q==
|
integrity sha512-2bIM8x+VAf6JT4bKAljS1qUWgMsqZRPGJS6FSahIMPVvctcNhyVp7AJu7quxOW9jwkryBReKZY5tY5JYv2n/7Q==
|
||||||
|
|
||||||
lucide-react@^0.340.0:
|
lucide-react@^0.350.0:
|
||||||
version "0.340.0"
|
version "0.350.0"
|
||||||
resolved "https://registry.yarnpkg.com/lucide-react/-/lucide-react-0.340.0.tgz#67a6fac6a5e257f2036dffae0dd94d6ccb28ce8e"
|
resolved "https://registry.yarnpkg.com/lucide-react/-/lucide-react-0.350.0.tgz#78b45342f4daff4535290e37b1ea7eb0961a3dab"
|
||||||
integrity sha512-mWzYhbyy2d+qKuKHh+GWElPwa+kIquTnKbmSLGWOuZy+bjfZCkYD8DQWVFlqI4mQwc4HNxcqcOvtQ7ZS2PwURg==
|
integrity sha512-5IZVKsxxG8Nn81gpsz4XLNgCAXkppCh0Y0P0GLO39h5iVD2WEaB9of6cPkLtzys1GuSfxJxmwsDh487y7LAf/g==
|
||||||
|
|
||||||
magic-string@^0.30.0:
|
magic-string@^0.30.0:
|
||||||
version "0.30.6"
|
version "0.30.6"
|
||||||
@ -7772,6 +7772,13 @@ ws@^8.11.0:
|
|||||||
resolved "https://registry.yarnpkg.com/ws/-/ws-8.16.0.tgz#d1cd774f36fbc07165066a60e40323eab6446fd4"
|
resolved "https://registry.yarnpkg.com/ws/-/ws-8.16.0.tgz#d1cd774f36fbc07165066a60e40323eab6446fd4"
|
||||||
integrity sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==
|
integrity sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==
|
||||||
|
|
||||||
|
xml-js@^1.6.11:
|
||||||
|
version "1.6.11"
|
||||||
|
resolved "https://registry.yarnpkg.com/xml-js/-/xml-js-1.6.11.tgz#927d2f6947f7f1c19a316dd8eea3614e8b18f8e9"
|
||||||
|
integrity sha512-7rVi2KMfwfWFl+GpPg6m80IVMWXLRjO+PxTq7V2CDhoGak0wzYzFgUY2m4XJ47OGdXd8eLE8EmwfAmdjw7lC1g==
|
||||||
|
dependencies:
|
||||||
|
sax "^1.2.4"
|
||||||
|
|
||||||
xml-name-validator@^4.0.0:
|
xml-name-validator@^4.0.0:
|
||||||
version "4.0.0"
|
version "4.0.0"
|
||||||
resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-4.0.0.tgz#79a006e2e63149a8600f15430f0a4725d1524835"
|
resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-4.0.0.tgz#79a006e2e63149a8600f15430f0a4725d1524835"
|
||||||
@ -7817,6 +7824,14 @@ yaml@^2.2.1, yaml@^2.3.4:
|
|||||||
resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.3.4.tgz#53fc1d514be80aabf386dc6001eb29bf3b7523b2"
|
resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.3.4.tgz#53fc1d514be80aabf386dc6001eb29bf3b7523b2"
|
||||||
integrity sha512-8aAvwVUSHpfEqTQ4w/KMlf3HcRdt50E5ODIQJBw1fQ5RL34xabzxtUlzTXVqc4rkZsPbvrXKWnABCD7kWSmocA==
|
integrity sha512-8aAvwVUSHpfEqTQ4w/KMlf3HcRdt50E5ODIQJBw1fQ5RL34xabzxtUlzTXVqc4rkZsPbvrXKWnABCD7kWSmocA==
|
||||||
|
|
||||||
|
yt-transcript@^0.0.2:
|
||||||
|
version "0.0.2"
|
||||||
|
resolved "https://registry.yarnpkg.com/yt-transcript/-/yt-transcript-0.0.2.tgz#1c54aede89bb8a03bbca3ba58520dbbd9c828571"
|
||||||
|
integrity sha512-+cNRqW6tSQNDkQDVrWNT6hc6X2TnaQLvUJIepzn9r7XdEvPtUDkfsyhptW5+j0EPIEpnlsKyA/epCUrE4QKn2g==
|
||||||
|
dependencies:
|
||||||
|
axios "^1.6.7"
|
||||||
|
xml-js "^1.6.11"
|
||||||
|
|
||||||
zod-to-json-schema@^3.22.3:
|
zod-to-json-schema@^3.22.3:
|
||||||
version "3.22.4"
|
version "3.22.4"
|
||||||
resolved "https://registry.yarnpkg.com/zod-to-json-schema/-/zod-to-json-schema-3.22.4.tgz#f8cc691f6043e9084375e85fb1f76ebafe253d70"
|
resolved "https://registry.yarnpkg.com/zod-to-json-schema/-/zod-to-json-schema-3.22.4.tgz#f8cc691f6043e9084375e85fb1f76ebafe253d70"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user