Update dependencies and add YouTube transcript support
This commit is contained in:
		
							parent
							
								
									6d559eda2f
								
							
						
					
					
						commit
						7a72961562
					
				| @ -1,7 +1,7 @@ | |||||||
| { | { | ||||||
|   "name": "pageassist", |   "name": "pageassist", | ||||||
|   "displayName": "Page Assist - A Web UI for Local AI Models", |   "displayName": "Page Assist - A Web UI for Local AI Models", | ||||||
|   "version": "1.0.7", |   "version": "1.0.8", | ||||||
|   "description": "Use your locally running AI models to assist you in your web browsing.", |   "description": "Use your locally running AI models to assist you in your web browsing.", | ||||||
|   "author": "n4ze3m", |   "author": "n4ze3m", | ||||||
|   "scripts": { |   "scripts": { | ||||||
| @ -26,7 +26,7 @@ | |||||||
|     "dayjs": "^1.11.10", |     "dayjs": "^1.11.10", | ||||||
|     "html-to-text": "^9.0.5", |     "html-to-text": "^9.0.5", | ||||||
|     "langchain": "^0.1.9", |     "langchain": "^0.1.9", | ||||||
|     "lucide-react": "^0.340.0", |     "lucide-react": "^0.350.0", | ||||||
|     "plasmo": "0.84.1", |     "plasmo": "0.84.1", | ||||||
|     "property-information": "^6.4.1", |     "property-information": "^6.4.1", | ||||||
|     "react": "18.2.0", |     "react": "18.2.0", | ||||||
| @ -38,6 +38,7 @@ | |||||||
|     "rehype-mathjax": "4.0.3", |     "rehype-mathjax": "4.0.3", | ||||||
|     "remark-gfm": "3.0.1", |     "remark-gfm": "3.0.1", | ||||||
|     "remark-math": "5.1.1", |     "remark-math": "5.1.1", | ||||||
|  |     "yt-transcript": "^0.0.2", | ||||||
|     "zustand": "^4.5.0" |     "zustand": "^4.5.0" | ||||||
|   }, |   }, | ||||||
|   "devDependencies": { |   "devDependencies": { | ||||||
|  | |||||||
| @ -38,7 +38,7 @@ export const EmptySidePanel = () => { | |||||||
|     } |     } | ||||||
|   }, [ollamaInfo]) |   }, [ollamaInfo]) | ||||||
| 
 | 
 | ||||||
|   const { setSelectedModel, selectedModel, chatMode, setChatMode } = |   const { setSelectedModel, selectedModel, chatMode, setChatMode,  } = | ||||||
|     useMessage() |     useMessage() | ||||||
| 
 | 
 | ||||||
|   return ( |   return ( | ||||||
|  | |||||||
| @ -20,9 +20,12 @@ import { getHtmlOfCurrentTab } from "~libs/get-html" | |||||||
| import { PageAssistHtmlLoader } from "~loader/html" | import { PageAssistHtmlLoader } from "~loader/html" | ||||||
| import { RecursiveCharacterTextSplitter } from "langchain/text_splitter" | import { RecursiveCharacterTextSplitter } from "langchain/text_splitter" | ||||||
| import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama" | import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama" | ||||||
| import { createChatWithWebsiteChain, groupMessagesByConversation } from "~chain/chat-with-website" | import { | ||||||
|  |   createChatWithWebsiteChain, | ||||||
|  |   groupMessagesByConversation | ||||||
|  | } from "~chain/chat-with-website" | ||||||
| import { MemoryVectorStore } from "langchain/vectorstores/memory" | import { MemoryVectorStore } from "langchain/vectorstores/memory" | ||||||
| 
 | import { chromeRunTime } from "~libs/runtime" | ||||||
| export type BotResponse = { | export type BotResponse = { | ||||||
|   bot: { |   bot: { | ||||||
|     text: string |     text: string | ||||||
| @ -134,11 +137,11 @@ export const useMessage = () => { | |||||||
|       url |       url | ||||||
|     }) |     }) | ||||||
|     const docs = await loader.load() |     const docs = await loader.load() | ||||||
|     const chunkSize = await defaultEmbeddingChunkSize(); |     const chunkSize = await defaultEmbeddingChunkSize() | ||||||
|     const chunkOverlap = await defaultEmbeddingChunkOverlap(); |     const chunkOverlap = await defaultEmbeddingChunkOverlap() | ||||||
|     const textSplitter = new RecursiveCharacterTextSplitter({ |     const textSplitter = new RecursiveCharacterTextSplitter({ | ||||||
|       chunkSize, |       chunkSize, | ||||||
|       chunkOverlap, |       chunkOverlap | ||||||
|     }) |     }) | ||||||
| 
 | 
 | ||||||
|     const chunks = await textSplitter.splitDocuments(docs) |     const chunks = await textSplitter.splitDocuments(docs) | ||||||
| @ -158,9 +161,10 @@ export const useMessage = () => { | |||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   const chatWithWebsiteMode = async (message: string) => { |   const chatWithWebsiteMode = async (message: string) => { | ||||||
|     const ollamaUrl = await getOllamaURL() |     try { | ||||||
|  |       let isAlreadyExistEmbedding: MemoryVectorStore | ||||||
|       const { html, url } = await getHtmlOfCurrentTab() |       const { html, url } = await getHtmlOfCurrentTab() | ||||||
|     const isAlreadyExistEmbedding = keepTrackOfEmbedding[url] |       isAlreadyExistEmbedding = keepTrackOfEmbedding[url] | ||||||
|       let newMessage: Message[] = [ |       let newMessage: Message[] = [ | ||||||
|         ...messages, |         ...messages, | ||||||
|         { |         { | ||||||
| @ -179,13 +183,14 @@ export const useMessage = () => { | |||||||
| 
 | 
 | ||||||
|       const appendingIndex = newMessage.length - 1 |       const appendingIndex = newMessage.length - 1 | ||||||
|       setMessages(newMessage) |       setMessages(newMessage) | ||||||
|  |       const ollamaUrl = await getOllamaURL() | ||||||
|       const embeddingModle = await defaultEmbeddingModelForRag() |       const embeddingModle = await defaultEmbeddingModelForRag() | ||||||
|  | 
 | ||||||
|       const ollamaEmbedding = new OllamaEmbeddings({ |       const ollamaEmbedding = new OllamaEmbeddings({ | ||||||
|         model: embeddingModle || selectedModel, |         model: embeddingModle || selectedModel, | ||||||
|         baseUrl: cleanUrl(ollamaUrl) |         baseUrl: cleanUrl(ollamaUrl) | ||||||
|       }) |       }) | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
|       const ollamaChat = new ChatOllama({ |       const ollamaChat = new ChatOllama({ | ||||||
|         model: selectedModel, |         model: selectedModel, | ||||||
|         baseUrl: cleanUrl(ollamaUrl) |         baseUrl: cleanUrl(ollamaUrl) | ||||||
| @ -212,10 +217,9 @@ export const useMessage = () => { | |||||||
|         retriever: vectorstore.asRetriever() |         retriever: vectorstore.asRetriever() | ||||||
|       }) |       }) | ||||||
| 
 | 
 | ||||||
|     try { |  | ||||||
|       const chunks = await chain.stream({ |       const chunks = await chain.stream({ | ||||||
|         question: sanitizedQuestion, |         question: sanitizedQuestion, | ||||||
|         chat_history: groupMessagesByConversation(history), |         chat_history: groupMessagesByConversation(history) | ||||||
|       }) |       }) | ||||||
|       let count = 0 |       let count = 0 | ||||||
|       for await (const chunk of chunks) { |       for await (const chunk of chunks) { | ||||||
| @ -258,7 +262,8 @@ export const useMessage = () => { | |||||||
|         { |         { | ||||||
|           isBot: true, |           isBot: true, | ||||||
|           name: selectedModel, |           name: selectedModel, | ||||||
|           message: `Something went wrong. Check out the following logs:
 |           message: `Error in chat with website mode. Check out the following logs:
 | ||||||
|  |            | ||||||
| ~~~ | ~~~ | ||||||
| ${e?.message} | ${e?.message} | ||||||
|  ~~~ |  ~~~ | ||||||
|  | |||||||
| @ -1,3 +1,4 @@ | |||||||
|  | 
 | ||||||
| const _getHtml = () => { | const _getHtml = () => { | ||||||
|   const url = window.location.href |   const url = window.location.href | ||||||
|   const html = Array.from(document.querySelectorAll("script")).reduce( |   const html = Array.from(document.querySelectorAll("script")).reduce( | ||||||
| @ -29,3 +30,4 @@ export const getHtmlOfCurrentTab = async () => { | |||||||
| 
 | 
 | ||||||
|   return result |   return result | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | |||||||
| @ -2,6 +2,20 @@ import { BaseDocumentLoader } from "langchain/document_loaders/base" | |||||||
| import { Document } from "@langchain/core/documents" | import { Document } from "@langchain/core/documents" | ||||||
| import { compile } from "html-to-text" | import { compile } from "html-to-text" | ||||||
| import { chromeRunTime } from "~libs/runtime" | import { chromeRunTime } from "~libs/runtime" | ||||||
|  | import { YtTranscript } from "yt-transcript" | ||||||
|  | 
 | ||||||
|  | const YT_REGEX = | ||||||
|  |   /(?:https?:\/\/)?(?:www\.)?(?:youtube\.com|youtu\.be)\/(?:watch\?v=)?([a-zA-Z0-9_-]+)/ | ||||||
|  | 
 | ||||||
|  | const isYoutubeLink = (url: string) => { | ||||||
|  |   return YT_REGEX.test(url) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | const getTranscript = async (url: string) => { | ||||||
|  |   const ytTranscript = new YtTranscript({ url }) | ||||||
|  |   return await ytTranscript.getTranscript() | ||||||
|  | } | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| export interface WebLoaderParams { | export interface WebLoaderParams { | ||||||
|   html: string |   html: string | ||||||
| @ -21,6 +35,29 @@ export class PageAssistHtmlLoader | |||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   async load(): Promise<Document<Record<string, any>>[]> { |   async load(): Promise<Document<Record<string, any>>[]> { | ||||||
|  |     if (isYoutubeLink(this.url)) { | ||||||
|  |       const transcript = await getTranscript(this.url) | ||||||
|  |       if (!transcript) { | ||||||
|  |         throw new Error("Transcript not found for this video.") | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|  |       let text = "" | ||||||
|  | 
 | ||||||
|  |       transcript.forEach((item) => { | ||||||
|  |         text += item.text + " " | ||||||
|  |       }) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |       return [ | ||||||
|  |         { | ||||||
|  |           metadata: { | ||||||
|  |             source: this.url, | ||||||
|  |             audio: { chunks: transcript } | ||||||
|  |           }, | ||||||
|  |           pageContent: text | ||||||
|  |         } | ||||||
|  |       ] | ||||||
|  |     } | ||||||
|     const htmlCompiler = compile({ |     const htmlCompiler = compile({ | ||||||
|       wordwrap: false |       wordwrap: false | ||||||
|     }) |     }) | ||||||
| @ -30,6 +67,29 @@ export class PageAssistHtmlLoader | |||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   async loadByURL(): Promise<Document<Record<string, any>>[]> { |   async loadByURL(): Promise<Document<Record<string, any>>[]> { | ||||||
|  |     if (isYoutubeLink(this.url)) { | ||||||
|  |       const transcript = await getTranscript(this.url) | ||||||
|  |       if (!transcript) { | ||||||
|  |         throw new Error("Transcript not found for this video.") | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|  |       let text = "" | ||||||
|  | 
 | ||||||
|  |       transcript.forEach((item) => { | ||||||
|  |         text += item.text + " " | ||||||
|  |       }) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |       return [ | ||||||
|  |         { | ||||||
|  |           metadata: { | ||||||
|  |             source: this.url, | ||||||
|  |             audio: { chunks: transcript } | ||||||
|  |           }, | ||||||
|  |           pageContent: text | ||||||
|  |         } | ||||||
|  |       ] | ||||||
|  |     } | ||||||
|     await chromeRunTime(this.url) |     await chromeRunTime(this.url) | ||||||
|     const fetchHTML = await fetch(this.url) |     const fetchHTML = await fetch(this.url) | ||||||
|     const html = await fetchHTML.text() |     const html = await fetchHTML.text() | ||||||
|  | |||||||
							
								
								
									
										23
									
								
								yarn.lock
									
									
									
									
									
								
							
							
						
						
									
										23
									
								
								yarn.lock
									
									
									
									
									
								
							| @ -4967,10 +4967,10 @@ lru-cache@^6.0.0: | |||||||
|   resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-10.2.0.tgz#0bd445ca57363465900f4d1f9bd8db343a4d95c3" |   resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-10.2.0.tgz#0bd445ca57363465900f4d1f9bd8db343a4d95c3" | ||||||
|   integrity sha512-2bIM8x+VAf6JT4bKAljS1qUWgMsqZRPGJS6FSahIMPVvctcNhyVp7AJu7quxOW9jwkryBReKZY5tY5JYv2n/7Q== |   integrity sha512-2bIM8x+VAf6JT4bKAljS1qUWgMsqZRPGJS6FSahIMPVvctcNhyVp7AJu7quxOW9jwkryBReKZY5tY5JYv2n/7Q== | ||||||
| 
 | 
 | ||||||
| lucide-react@^0.340.0: | lucide-react@^0.350.0: | ||||||
|   version "0.340.0" |   version "0.350.0" | ||||||
|   resolved "https://registry.yarnpkg.com/lucide-react/-/lucide-react-0.340.0.tgz#67a6fac6a5e257f2036dffae0dd94d6ccb28ce8e" |   resolved "https://registry.yarnpkg.com/lucide-react/-/lucide-react-0.350.0.tgz#78b45342f4daff4535290e37b1ea7eb0961a3dab" | ||||||
|   integrity sha512-mWzYhbyy2d+qKuKHh+GWElPwa+kIquTnKbmSLGWOuZy+bjfZCkYD8DQWVFlqI4mQwc4HNxcqcOvtQ7ZS2PwURg== |   integrity sha512-5IZVKsxxG8Nn81gpsz4XLNgCAXkppCh0Y0P0GLO39h5iVD2WEaB9of6cPkLtzys1GuSfxJxmwsDh487y7LAf/g== | ||||||
| 
 | 
 | ||||||
| magic-string@^0.30.0: | magic-string@^0.30.0: | ||||||
|   version "0.30.6" |   version "0.30.6" | ||||||
| @ -7772,6 +7772,13 @@ ws@^8.11.0: | |||||||
|   resolved "https://registry.yarnpkg.com/ws/-/ws-8.16.0.tgz#d1cd774f36fbc07165066a60e40323eab6446fd4" |   resolved "https://registry.yarnpkg.com/ws/-/ws-8.16.0.tgz#d1cd774f36fbc07165066a60e40323eab6446fd4" | ||||||
|   integrity sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ== |   integrity sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ== | ||||||
| 
 | 
 | ||||||
|  | xml-js@^1.6.11: | ||||||
|  |   version "1.6.11" | ||||||
|  |   resolved "https://registry.yarnpkg.com/xml-js/-/xml-js-1.6.11.tgz#927d2f6947f7f1c19a316dd8eea3614e8b18f8e9" | ||||||
|  |   integrity sha512-7rVi2KMfwfWFl+GpPg6m80IVMWXLRjO+PxTq7V2CDhoGak0wzYzFgUY2m4XJ47OGdXd8eLE8EmwfAmdjw7lC1g== | ||||||
|  |   dependencies: | ||||||
|  |     sax "^1.2.4" | ||||||
|  | 
 | ||||||
| xml-name-validator@^4.0.0: | xml-name-validator@^4.0.0: | ||||||
|   version "4.0.0" |   version "4.0.0" | ||||||
|   resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-4.0.0.tgz#79a006e2e63149a8600f15430f0a4725d1524835" |   resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-4.0.0.tgz#79a006e2e63149a8600f15430f0a4725d1524835" | ||||||
| @ -7817,6 +7824,14 @@ yaml@^2.2.1, yaml@^2.3.4: | |||||||
|   resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.3.4.tgz#53fc1d514be80aabf386dc6001eb29bf3b7523b2" |   resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.3.4.tgz#53fc1d514be80aabf386dc6001eb29bf3b7523b2" | ||||||
|   integrity sha512-8aAvwVUSHpfEqTQ4w/KMlf3HcRdt50E5ODIQJBw1fQ5RL34xabzxtUlzTXVqc4rkZsPbvrXKWnABCD7kWSmocA== |   integrity sha512-8aAvwVUSHpfEqTQ4w/KMlf3HcRdt50E5ODIQJBw1fQ5RL34xabzxtUlzTXVqc4rkZsPbvrXKWnABCD7kWSmocA== | ||||||
| 
 | 
 | ||||||
|  | yt-transcript@^0.0.2: | ||||||
|  |   version "0.0.2" | ||||||
|  |   resolved "https://registry.yarnpkg.com/yt-transcript/-/yt-transcript-0.0.2.tgz#1c54aede89bb8a03bbca3ba58520dbbd9c828571" | ||||||
|  |   integrity sha512-+cNRqW6tSQNDkQDVrWNT6hc6X2TnaQLvUJIepzn9r7XdEvPtUDkfsyhptW5+j0EPIEpnlsKyA/epCUrE4QKn2g== | ||||||
|  |   dependencies: | ||||||
|  |     axios "^1.6.7" | ||||||
|  |     xml-js "^1.6.11" | ||||||
|  | 
 | ||||||
| zod-to-json-schema@^3.22.3: | zod-to-json-schema@^3.22.3: | ||||||
|   version "3.22.4" |   version "3.22.4" | ||||||
|   resolved "https://registry.yarnpkg.com/zod-to-json-schema/-/zod-to-json-schema-3.22.4.tgz#f8cc691f6043e9084375e85fb1f76ebafe253d70" |   resolved "https://registry.yarnpkg.com/zod-to-json-schema/-/zod-to-json-schema-3.22.4.tgz#f8cc691f6043e9084375e85fb1f76ebafe253d70" | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user