Update dependencies and fix whitespace formatting in isTTSEnabled function in tts.ts
This commit is contained in:
parent
476323d928
commit
c914233610
@ -50,6 +50,7 @@
|
||||
"rehype-mathjax": "4.0.3",
|
||||
"remark-gfm": "3.0.1",
|
||||
"remark-math": "5.1.1",
|
||||
"turndown": "^7.1.3",
|
||||
"yt-transcript": "^0.0.2",
|
||||
"zustand": "^4.5.0"
|
||||
},
|
||||
@ -63,6 +64,7 @@
|
||||
"@types/react": "18.2.48",
|
||||
"@types/react-dom": "18.2.18",
|
||||
"@types/react-syntax-highlighter": "^15.5.11",
|
||||
"@types/turndown": "^5.0.4",
|
||||
"autoprefixer": "^10.4.17",
|
||||
"postcss": "^8.4.33",
|
||||
"prettier": "3.2.4",
|
||||
|
@ -5,7 +5,7 @@ import { Tooltip } from "antd"
|
||||
import { BoxesIcon, CogIcon, EraserIcon, HistoryIcon } from "lucide-react"
|
||||
import { useTranslation } from "react-i18next"
|
||||
export const SidepanelHeader = () => {
|
||||
const { clearChat, isEmbedding, messages } = useMessage()
|
||||
const { clearChat, isEmbedding, messages, streaming } = useMessage()
|
||||
const { t } = useTranslation(["sidepanel", "common"])
|
||||
|
||||
return (
|
||||
@ -25,16 +25,15 @@ export const SidepanelHeader = () => {
|
||||
<BoxesIcon className="h-5 w-5 text-gray-500 dark:text-gray-400 animate-bounce animate-infinite" />
|
||||
</Tooltip>
|
||||
) : null}
|
||||
{messages.length > 0 && (
|
||||
<Tooltip title={t("tooltip.clear")}>
|
||||
<button
|
||||
onClick={() => {
|
||||
clearChat()
|
||||
}}
|
||||
className="flex items-center space-x-1 focus:outline-none focus-visible:ring-2 focus-visible:ring-pink-700">
|
||||
<EraserIcon className="h-5 w-5 text-gray-500 dark:text-gray-400" />
|
||||
</button>
|
||||
</Tooltip>
|
||||
{messages.length > 0 && !streaming && (
|
||||
<button
|
||||
title={t("tooltip.clear")}
|
||||
onClick={() => {
|
||||
clearChat()
|
||||
}}
|
||||
className="flex items-center space-x-1 focus:outline-none focus-visible:ring-2 focus-visible:ring-pink-700">
|
||||
<EraserIcon className="h-5 w-5 text-gray-500 dark:text-gray-400" />
|
||||
</button>
|
||||
)}
|
||||
{/* <Tooltip title={t("tooltip.history")}>
|
||||
<Link to="/history">
|
||||
|
@ -81,6 +81,7 @@ export const useMessage = () => {
|
||||
signal: AbortSignal,
|
||||
embeddingSignal: AbortSignal
|
||||
) => {
|
||||
setStreaming(true)
|
||||
const url = await getOllamaURL()
|
||||
|
||||
const ollama = new ChatOllama({
|
||||
@ -320,6 +321,7 @@ export const useMessage = () => {
|
||||
history: ChatHistory,
|
||||
signal: AbortSignal
|
||||
) => {
|
||||
setStreaming(true)
|
||||
const url = await getOllamaURL()
|
||||
|
||||
if (image.length > 0) {
|
||||
|
@ -1,17 +1,25 @@
|
||||
import { defaultExtractContent } from "@/parser/default"
|
||||
import { getPdf } from "./pdf"
|
||||
import {
|
||||
isTweet,
|
||||
isTwitterTimeline,
|
||||
parseTweet,
|
||||
parseTwitterTimeline,
|
||||
} from "@/parser/twitter"
|
||||
import { isGoogleDocs, parseGoogleDocs } from "@/parser/google-docs"
|
||||
import { cleanUnwantedUnicode } from "@/utils/clean"
|
||||
|
||||
const _getHtml = async () => {
|
||||
const _getHtml = () => {
|
||||
const url = window.location.href
|
||||
if (document.contentType === "application/pdf") {
|
||||
return { url, content: "", type: "pdf" }
|
||||
}
|
||||
const html = Array.from(document.querySelectorAll("script")).reduce(
|
||||
(acc, script) => {
|
||||
return acc.replace(script.outerHTML, "")
|
||||
},
|
||||
document.documentElement.outerHTML
|
||||
)
|
||||
return { url, content: html, type: "html" }
|
||||
|
||||
return {
|
||||
content: document.documentElement.outerHTML,
|
||||
url,
|
||||
type: "html"
|
||||
}
|
||||
}
|
||||
|
||||
export const getDataFromCurrentTab = async () => {
|
||||
@ -34,7 +42,6 @@ export const getDataFromCurrentTab = async () => {
|
||||
type: string
|
||||
}>
|
||||
|
||||
|
||||
const { content, type, url } = await result
|
||||
|
||||
if (type === "pdf") {
|
||||
@ -47,31 +54,58 @@ export const getDataFromCurrentTab = async () => {
|
||||
const pdf = await getPdf(data)
|
||||
|
||||
for (let i = 1; i <= pdf.numPages; i += 1) {
|
||||
const page = await pdf.getPage(i);
|
||||
const content = await page.getTextContent();
|
||||
const page = await pdf.getPage(i)
|
||||
const content = await page.getTextContent()
|
||||
|
||||
if (content?.items.length === 0) {
|
||||
continue;
|
||||
continue
|
||||
}
|
||||
|
||||
const text = content?.items.map((item: any) => item.str).join("\n")
|
||||
.replace(/\x00/g, "").trim();
|
||||
const text = content?.items
|
||||
.map((item: any) => item.str)
|
||||
.join("\n")
|
||||
.replace(/\x00/g, "")
|
||||
.trim()
|
||||
pdfHtml.push({
|
||||
content: text,
|
||||
page: i
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
return {
|
||||
url,
|
||||
content: "",
|
||||
pdf: pdfHtml,
|
||||
type: "pdf"
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return { url, content, type, pdf: [] }
|
||||
if (isTwitterTimeline(url)) {
|
||||
const data = parseTwitterTimeline(content)
|
||||
return {
|
||||
url,
|
||||
content: data,
|
||||
type: "html",
|
||||
pdf: []
|
||||
}
|
||||
} else if (isTweet(url)) {
|
||||
const data = parseTweet(content)
|
||||
return {
|
||||
url,
|
||||
content: data,
|
||||
type: "html",
|
||||
pdf: []
|
||||
}
|
||||
} else if (isGoogleDocs(url)) {
|
||||
const data = await parseGoogleDocs()
|
||||
if (data) {
|
||||
return {
|
||||
url,
|
||||
content: cleanUnwantedUnicode(data),
|
||||
type: "html",
|
||||
pdf: []
|
||||
}
|
||||
}
|
||||
}
|
||||
const data = defaultExtractContent(content)
|
||||
return { url, content: data, type, pdf: [] }
|
||||
}
|
||||
|
||||
|
@ -59,24 +59,24 @@ export class PageAssistHtmlLoader
|
||||
]
|
||||
}
|
||||
|
||||
let html = this.html
|
||||
// let html = this.html
|
||||
|
||||
if (isWikipedia(this.url)) {
|
||||
console.log("Wikipedia URL detected")
|
||||
html = parseWikipedia(html)
|
||||
}
|
||||
|
||||
// else if (isTwitter(this.url)) {
|
||||
// console.log("Twitter URL detected")
|
||||
// html = parseTweet(html, this.url)
|
||||
// if (isWikipedia(this.url)) {
|
||||
// console.log("Wikipedia URL detected")
|
||||
// html = parseWikipedia(html)
|
||||
// }
|
||||
|
||||
const htmlCompiler = compile({
|
||||
wordwrap: false
|
||||
})
|
||||
const text = htmlCompiler(html)
|
||||
// // else if (isTwitter(this.url)) {
|
||||
// // console.log("Twitter URL detected")
|
||||
// // html = parseTweet(html, this.url)
|
||||
// // }
|
||||
|
||||
// const htmlCompiler = compile({
|
||||
// wordwrap: false
|
||||
// })
|
||||
// const text = htmlCompiler(html)
|
||||
const metadata = { source: this.url }
|
||||
return [new Document({ pageContent: text, metadata })]
|
||||
return [new Document({ pageContent: this.html, metadata })]
|
||||
}
|
||||
|
||||
async loadByURL(): Promise<Document<Record<string, any>>[]> {
|
||||
|
10
src/parser/default.ts
Normal file
10
src/parser/default.ts
Normal file
@ -0,0 +1,10 @@
|
||||
import * as cheerio from "cheerio"
|
||||
import TurndownService from "turndown"
|
||||
let turndownService = new TurndownService()
|
||||
|
||||
export const defaultExtractContent = (html: string) => {
|
||||
const $ = cheerio.load(html)
|
||||
const mainContent = $('[role="main"]').html() || $("main").html() || $.html()
|
||||
const markdown = turndownService.turndown(mainContent)
|
||||
return markdown
|
||||
}
|
119
src/parser/google-docs.ts
Normal file
119
src/parser/google-docs.ts
Normal file
@ -0,0 +1,119 @@
|
||||
|
||||
export const isGoogleDocs = (url: string) => {
|
||||
const GOOGLE_DOCS_REGEX = /docs\.google\.com\/document/g
|
||||
return GOOGLE_DOCS_REGEX.test(url)
|
||||
}
|
||||
|
||||
const getGoogleDocs = () => {
|
||||
try {
|
||||
function traverse(
|
||||
obj: { [x: string]: any },
|
||||
predicate: { (_: any, value: any): boolean; (arg0: any, arg1: any): any },
|
||||
maxDepth: number,
|
||||
propNames = Object.getOwnPropertyNames(obj)
|
||||
) {
|
||||
const visited = new Set()
|
||||
const results = []
|
||||
let iterations = 0
|
||||
|
||||
const traverseObj = (
|
||||
name: string,
|
||||
value: unknown,
|
||||
path: any[],
|
||||
depth = 0
|
||||
) => {
|
||||
iterations++
|
||||
if (name === "prototype" || value instanceof Window || depth > maxDepth)
|
||||
return
|
||||
|
||||
const currentPath = [...path, name]
|
||||
|
||||
try {
|
||||
if (predicate(name, value)) {
|
||||
results.push({ path: currentPath, value })
|
||||
return
|
||||
}
|
||||
} catch (error) {}
|
||||
|
||||
if (value != null && !visited.has(value)) {
|
||||
visited.add(value)
|
||||
if (Array.isArray(value)) {
|
||||
value.forEach((val, index) => {
|
||||
try {
|
||||
traverseObj(index.toString(), val, currentPath, depth + 1)
|
||||
} catch (error) {}
|
||||
})
|
||||
} else if (value instanceof Object) {
|
||||
const propNamesForValue =
|
||||
value &&
|
||||
// @ts-ignore
|
||||
value.nodeType === 1 &&
|
||||
// @ts-ignore
|
||||
typeof value.nodeName === "string"
|
||||
? Object.getOwnPropertyNames(obj)
|
||||
: Object.getOwnPropertyNames(value)
|
||||
|
||||
propNamesForValue.forEach((prop) => {
|
||||
try {
|
||||
traverseObj(prop, value[prop], currentPath, depth + 1)
|
||||
} catch (error) {}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
propNames.forEach((prop) => {
|
||||
try {
|
||||
traverseObj(prop, obj[prop], [])
|
||||
} catch (error) {}
|
||||
})
|
||||
|
||||
return { results, iterations }
|
||||
}
|
||||
|
||||
const result = traverse(
|
||||
// @ts-ignore
|
||||
window.KX_kixApp,
|
||||
(_: any, value: { toString: () => string }) =>
|
||||
value && "\x03" === value.toString().charAt(0),
|
||||
5
|
||||
)
|
||||
if (result.results?.[0]?.value) {
|
||||
return {
|
||||
content: result.results[0].value
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content: null
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
content: null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const parseGoogleDocs = async () => {
|
||||
const result = new Promise((resolve) => {
|
||||
chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => {
|
||||
const tab = tabs[0]
|
||||
|
||||
const data = await chrome.scripting.executeScript({
|
||||
target: { tabId: tab.id },
|
||||
world: "MAIN",
|
||||
func: getGoogleDocs
|
||||
})
|
||||
|
||||
if (data.length > 0) {
|
||||
resolve(data[0].result)
|
||||
}
|
||||
})
|
||||
}) as Promise<{
|
||||
content?: string
|
||||
}>
|
||||
|
||||
const { content } = await result
|
||||
|
||||
return content
|
||||
}
|
5
src/parser/google-sheets.ts
Normal file
5
src/parser/google-sheets.ts
Normal file
@ -0,0 +1,5 @@
|
||||
import * as cheerio from 'cheerio';
|
||||
|
||||
export const parseGoogleSheets = (html: string) => {
|
||||
const $ = cheerio.load(html);
|
||||
};
|
@ -2,89 +2,101 @@ import * as cheerio from "cheerio"
|
||||
|
||||
export const isTweet = (url: string) => {
|
||||
const TWEET_REGEX = /twitter\.com\/[a-zA-Z0-9_]+\/status\/[0-9]+/g
|
||||
return TWEET_REGEX.test(url)
|
||||
const X_REGEX = /x\.com\/[a-zA-Z0-9_]+\/status\/[0-9]+/g
|
||||
return TWEET_REGEX.test(url) || X_REGEX.test(url)
|
||||
}
|
||||
|
||||
export const isTwitterTimeline = (url: string) => {
|
||||
return url === "https://twitter.com/home" || url === "https://x.com/home"
|
||||
}
|
||||
|
||||
export const isTwitterProfile = (url: string) => {
|
||||
const PROFILE_REGEX = /twitter\.com\/[a-zA-Z0-9_]+/g
|
||||
return PROFILE_REGEX.test(url)
|
||||
const X_REGEX = /x\.com\/[a-zA-Z0-9_]+/g
|
||||
return PROFILE_REGEX.test(url) || X_REGEX.test(url)
|
||||
}
|
||||
|
||||
export const isTwitterTimeline = (url: string) => {
|
||||
const TIMELINE_REGEX = /twitter\.com\/home/g
|
||||
return TIMELINE_REGEX.test(url)
|
||||
export const parseTwitterTimeline = (html: string) => {
|
||||
const $ = cheerio.load(html)
|
||||
const postElements = $("[data-testid=tweetText]")
|
||||
const authorElements = $("[data-testid=User-Name]")
|
||||
|
||||
const posts = postElements
|
||||
.map((index, element) => {
|
||||
const post = $(element).text()
|
||||
const author = $(authorElements[index]).text()
|
||||
return {
|
||||
author,
|
||||
post
|
||||
}
|
||||
})
|
||||
.get()
|
||||
|
||||
return posts
|
||||
.map((post) => {
|
||||
return `## Author: ${post.author}\n\n${post.post}\n\n---\n\n`
|
||||
})
|
||||
.filter((value, index, self) => self.indexOf(value) === index)
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
export const isTwitter = (url: string) => {
|
||||
return isTweet(url) || isTwitterProfile(url) || isTwitterTimeline(url)
|
||||
export const parseTweet = (html: string) => {
|
||||
const $ = cheerio.load(html)
|
||||
const postElements = $("[data-testid=tweetText]")
|
||||
const authorElements = $("[data-testid=User-Name]")
|
||||
|
||||
const posts = postElements
|
||||
.map((index, element) => {
|
||||
const post = $(element).text()
|
||||
const author = $(authorElements[index]).text()
|
||||
return {
|
||||
author,
|
||||
post,
|
||||
isReply: index !== 0
|
||||
}
|
||||
})
|
||||
.get()
|
||||
|
||||
return posts
|
||||
.map((post) => {
|
||||
return `##Author: ${post.author}\n\n${post.isReply ? "Reply:" : "Post:"} ${post.post}\n\n---\n\n`
|
||||
})
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
export const isTwitterNotification = (url: string) => {
|
||||
const NOTIFICATION_REGEX = /twitter\.com\/notifications/g
|
||||
return NOTIFICATION_REGEX.test(url)
|
||||
}
|
||||
|
||||
export const parseTweet = (html: string, url: string) => {
|
||||
if (!html) {
|
||||
return ""
|
||||
}
|
||||
|
||||
export const parseTweetProfile = (html: string) => {
|
||||
const $ = cheerio.load(html)
|
||||
|
||||
if (isTweet(url)) {
|
||||
console.log("tweet")
|
||||
const tweet = $("div[data-testid='tweet']")
|
||||
const tweetContent = tweet.find("div[lang]")
|
||||
const tweetMedia = tweet.find("div[role='group']")
|
||||
const author = tweet.find("a[role='link']").text()
|
||||
const date = tweet.find("time").text()
|
||||
return `<div>${author} ${tweetContent.text()} ${tweetMedia.html()} ${date}</div>`
|
||||
}
|
||||
const profileName = $("[data-testid=UserProfileHeader_Items]")
|
||||
.find("h1")
|
||||
.text()
|
||||
const profileBio = $("[data-testid=UserProfileHeader_Items]").find("p").text()
|
||||
const profileLocation = $("[data-testid=UserProfileHeader_Items]")
|
||||
.find("span")
|
||||
.text()
|
||||
const profileJoinDate = $("[data-testid=UserProfileHeader_Items]")
|
||||
.find("span")
|
||||
.text()
|
||||
const profileFollowers = $(
|
||||
"[data-testid=UserProfileHeader_Items] span"
|
||||
).text()
|
||||
const profileFollowing = $(
|
||||
"[data-testid=UserProfileHeader_Items] span"
|
||||
).text()
|
||||
|
||||
if (isTwitterTimeline(url)) {
|
||||
console.log("timeline")
|
||||
const timeline = $("div[data-testid='primaryColumn']")
|
||||
const timelineContent = timeline.find("div[data-testid='tweet']")
|
||||
console.log(timelineContent.html())
|
||||
const tweet = timelineContent
|
||||
.map((i, el) => {
|
||||
const author = $(el).find("a[role='link']").text()
|
||||
const content = $(el).find("div[lang]").text()
|
||||
const media = $(el).find("div[role='group']").html()
|
||||
const date = $(el).find("time").text()
|
||||
return `<div>${author} ${content} ${media} ${date}</div>`
|
||||
})
|
||||
.get()
|
||||
.join("")
|
||||
console.log(tweet)
|
||||
return `<div>${tweet}</div>`
|
||||
}
|
||||
const postElements = $("[data-testid=tweetText]")
|
||||
const authorElements = $("[data-testid=User-Name]")
|
||||
|
||||
if (isTwitterNotification(url)) {
|
||||
console.log("notification")
|
||||
const notification = $("div[data-testid='primaryColumn']")
|
||||
const notificationContent = notification.find("div[data-testid='tweet']")
|
||||
return `<div>${notificationContent.html()}</div>`
|
||||
}
|
||||
if (isTwitterProfile(url)) {
|
||||
console.log("profile")
|
||||
const profile = $("div[data-testid='primaryColumn']")
|
||||
const profileContent = profile.find(
|
||||
"div[data-testid='UserProfileHeader_Items']"
|
||||
)
|
||||
const profileTweets = profile.find("div[data-testid='tweet']")
|
||||
return `<div>${profileContent.html()}</div><div>${profileTweets.html()}</div>`
|
||||
}
|
||||
console.log("no match")
|
||||
const timeline = $("div[data-testid='primaryColumn']")
|
||||
const timelineContent = timeline.find("div[data-testid='tweet']")
|
||||
const tweet = timelineContent.map((i, el) => {
|
||||
const author = $(el).find("a[role='link']").text()
|
||||
const content = $(el).find("div[lang]").text()
|
||||
const media = $(el).find("div[role='group']").html()
|
||||
const date = $(el).find("time").text()
|
||||
return `<div>${author} ${content} ${media} ${date}</div>`
|
||||
})
|
||||
const posts = postElements
|
||||
.map((index, element) => {
|
||||
const post = $(element).text()
|
||||
const author = $(authorElements[index]).text()
|
||||
return {
|
||||
author,
|
||||
post
|
||||
}
|
||||
})
|
||||
.get()
|
||||
|
||||
return `<div>${tweet}</div>`
|
||||
return `## Profile: ${profileName}\n\nBio: ${profileBio}\n\nLocation: ${profileLocation}\n\nJoin Date: ${profileJoinDate}\n\nFollowers: ${profileFollowers}\n\nFollowing: ${profileFollowing}\n\nPosts: ${posts.map((post) => `Author: ${post.author}\n\nPost: ${post.post}\n\n---\n\n`).join("\n")}`
|
||||
}
|
||||
|
4
src/utils/clean.ts
Normal file
4
src/utils/clean.ts
Normal file
@ -0,0 +1,4 @@
|
||||
export const cleanUnwantedUnicode = (text: string) => {
|
||||
const UNICODE_REGEX = /[\u200B-\u200D\uFEFF]/g
|
||||
return text.replace(UNICODE_REGEX, "").trim()
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user