Update dependencies and fix whitespace formatting in isTTSEnabled function in tts.ts
This commit is contained in:
parent
476323d928
commit
c914233610
@ -50,6 +50,7 @@
|
|||||||
"rehype-mathjax": "4.0.3",
|
"rehype-mathjax": "4.0.3",
|
||||||
"remark-gfm": "3.0.1",
|
"remark-gfm": "3.0.1",
|
||||||
"remark-math": "5.1.1",
|
"remark-math": "5.1.1",
|
||||||
|
"turndown": "^7.1.3",
|
||||||
"yt-transcript": "^0.0.2",
|
"yt-transcript": "^0.0.2",
|
||||||
"zustand": "^4.5.0"
|
"zustand": "^4.5.0"
|
||||||
},
|
},
|
||||||
@ -63,6 +64,7 @@
|
|||||||
"@types/react": "18.2.48",
|
"@types/react": "18.2.48",
|
||||||
"@types/react-dom": "18.2.18",
|
"@types/react-dom": "18.2.18",
|
||||||
"@types/react-syntax-highlighter": "^15.5.11",
|
"@types/react-syntax-highlighter": "^15.5.11",
|
||||||
|
"@types/turndown": "^5.0.4",
|
||||||
"autoprefixer": "^10.4.17",
|
"autoprefixer": "^10.4.17",
|
||||||
"postcss": "^8.4.33",
|
"postcss": "^8.4.33",
|
||||||
"prettier": "3.2.4",
|
"prettier": "3.2.4",
|
||||||
|
@ -5,7 +5,7 @@ import { Tooltip } from "antd"
|
|||||||
import { BoxesIcon, CogIcon, EraserIcon, HistoryIcon } from "lucide-react"
|
import { BoxesIcon, CogIcon, EraserIcon, HistoryIcon } from "lucide-react"
|
||||||
import { useTranslation } from "react-i18next"
|
import { useTranslation } from "react-i18next"
|
||||||
export const SidepanelHeader = () => {
|
export const SidepanelHeader = () => {
|
||||||
const { clearChat, isEmbedding, messages } = useMessage()
|
const { clearChat, isEmbedding, messages, streaming } = useMessage()
|
||||||
const { t } = useTranslation(["sidepanel", "common"])
|
const { t } = useTranslation(["sidepanel", "common"])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@ -25,16 +25,15 @@ export const SidepanelHeader = () => {
|
|||||||
<BoxesIcon className="h-5 w-5 text-gray-500 dark:text-gray-400 animate-bounce animate-infinite" />
|
<BoxesIcon className="h-5 w-5 text-gray-500 dark:text-gray-400 animate-bounce animate-infinite" />
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
) : null}
|
) : null}
|
||||||
{messages.length > 0 && (
|
{messages.length > 0 && !streaming && (
|
||||||
<Tooltip title={t("tooltip.clear")}>
|
|
||||||
<button
|
<button
|
||||||
|
title={t("tooltip.clear")}
|
||||||
onClick={() => {
|
onClick={() => {
|
||||||
clearChat()
|
clearChat()
|
||||||
}}
|
}}
|
||||||
className="flex items-center space-x-1 focus:outline-none focus-visible:ring-2 focus-visible:ring-pink-700">
|
className="flex items-center space-x-1 focus:outline-none focus-visible:ring-2 focus-visible:ring-pink-700">
|
||||||
<EraserIcon className="h-5 w-5 text-gray-500 dark:text-gray-400" />
|
<EraserIcon className="h-5 w-5 text-gray-500 dark:text-gray-400" />
|
||||||
</button>
|
</button>
|
||||||
</Tooltip>
|
|
||||||
)}
|
)}
|
||||||
{/* <Tooltip title={t("tooltip.history")}>
|
{/* <Tooltip title={t("tooltip.history")}>
|
||||||
<Link to="/history">
|
<Link to="/history">
|
||||||
|
@ -81,6 +81,7 @@ export const useMessage = () => {
|
|||||||
signal: AbortSignal,
|
signal: AbortSignal,
|
||||||
embeddingSignal: AbortSignal
|
embeddingSignal: AbortSignal
|
||||||
) => {
|
) => {
|
||||||
|
setStreaming(true)
|
||||||
const url = await getOllamaURL()
|
const url = await getOllamaURL()
|
||||||
|
|
||||||
const ollama = new ChatOllama({
|
const ollama = new ChatOllama({
|
||||||
@ -320,6 +321,7 @@ export const useMessage = () => {
|
|||||||
history: ChatHistory,
|
history: ChatHistory,
|
||||||
signal: AbortSignal
|
signal: AbortSignal
|
||||||
) => {
|
) => {
|
||||||
|
setStreaming(true)
|
||||||
const url = await getOllamaURL()
|
const url = await getOllamaURL()
|
||||||
|
|
||||||
if (image.length > 0) {
|
if (image.length > 0) {
|
||||||
|
@ -1,17 +1,25 @@
|
|||||||
|
import { defaultExtractContent } from "@/parser/default"
|
||||||
import { getPdf } from "./pdf"
|
import { getPdf } from "./pdf"
|
||||||
|
import {
|
||||||
|
isTweet,
|
||||||
|
isTwitterTimeline,
|
||||||
|
parseTweet,
|
||||||
|
parseTwitterTimeline,
|
||||||
|
} from "@/parser/twitter"
|
||||||
|
import { isGoogleDocs, parseGoogleDocs } from "@/parser/google-docs"
|
||||||
|
import { cleanUnwantedUnicode } from "@/utils/clean"
|
||||||
|
|
||||||
const _getHtml = async () => {
|
const _getHtml = () => {
|
||||||
const url = window.location.href
|
const url = window.location.href
|
||||||
if (document.contentType === "application/pdf") {
|
if (document.contentType === "application/pdf") {
|
||||||
return { url, content: "", type: "pdf" }
|
return { url, content: "", type: "pdf" }
|
||||||
}
|
}
|
||||||
const html = Array.from(document.querySelectorAll("script")).reduce(
|
|
||||||
(acc, script) => {
|
return {
|
||||||
return acc.replace(script.outerHTML, "")
|
content: document.documentElement.outerHTML,
|
||||||
},
|
url,
|
||||||
document.documentElement.outerHTML
|
type: "html"
|
||||||
)
|
}
|
||||||
return { url, content: html, type: "html" }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getDataFromCurrentTab = async () => {
|
export const getDataFromCurrentTab = async () => {
|
||||||
@ -34,7 +42,6 @@ export const getDataFromCurrentTab = async () => {
|
|||||||
type: string
|
type: string
|
||||||
}>
|
}>
|
||||||
|
|
||||||
|
|
||||||
const { content, type, url } = await result
|
const { content, type, url } = await result
|
||||||
|
|
||||||
if (type === "pdf") {
|
if (type === "pdf") {
|
||||||
@ -47,31 +54,58 @@ export const getDataFromCurrentTab = async () => {
|
|||||||
const pdf = await getPdf(data)
|
const pdf = await getPdf(data)
|
||||||
|
|
||||||
for (let i = 1; i <= pdf.numPages; i += 1) {
|
for (let i = 1; i <= pdf.numPages; i += 1) {
|
||||||
const page = await pdf.getPage(i);
|
const page = await pdf.getPage(i)
|
||||||
const content = await page.getTextContent();
|
const content = await page.getTextContent()
|
||||||
|
|
||||||
if (content?.items.length === 0) {
|
if (content?.items.length === 0) {
|
||||||
continue;
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
const text = content?.items.map((item: any) => item.str).join("\n")
|
const text = content?.items
|
||||||
.replace(/\x00/g, "").trim();
|
.map((item: any) => item.str)
|
||||||
|
.join("\n")
|
||||||
|
.replace(/\x00/g, "")
|
||||||
|
.trim()
|
||||||
pdfHtml.push({
|
pdfHtml.push({
|
||||||
content: text,
|
content: text,
|
||||||
page: i
|
page: i
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url,
|
url,
|
||||||
content: "",
|
content: "",
|
||||||
pdf: pdfHtml,
|
pdf: pdfHtml,
|
||||||
type: "pdf"
|
type: "pdf"
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
if (isTwitterTimeline(url)) {
|
||||||
return { url, content, type, pdf: [] }
|
const data = parseTwitterTimeline(content)
|
||||||
|
return {
|
||||||
|
url,
|
||||||
|
content: data,
|
||||||
|
type: "html",
|
||||||
|
pdf: []
|
||||||
|
}
|
||||||
|
} else if (isTweet(url)) {
|
||||||
|
const data = parseTweet(content)
|
||||||
|
return {
|
||||||
|
url,
|
||||||
|
content: data,
|
||||||
|
type: "html",
|
||||||
|
pdf: []
|
||||||
|
}
|
||||||
|
} else if (isGoogleDocs(url)) {
|
||||||
|
const data = await parseGoogleDocs()
|
||||||
|
if (data) {
|
||||||
|
return {
|
||||||
|
url,
|
||||||
|
content: cleanUnwantedUnicode(data),
|
||||||
|
type: "html",
|
||||||
|
pdf: []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const data = defaultExtractContent(content)
|
||||||
|
return { url, content: data, type, pdf: [] }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,24 +59,24 @@ export class PageAssistHtmlLoader
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
let html = this.html
|
// let html = this.html
|
||||||
|
|
||||||
if (isWikipedia(this.url)) {
|
// if (isWikipedia(this.url)) {
|
||||||
console.log("Wikipedia URL detected")
|
// console.log("Wikipedia URL detected")
|
||||||
html = parseWikipedia(html)
|
// html = parseWikipedia(html)
|
||||||
}
|
|
||||||
|
|
||||||
// else if (isTwitter(this.url)) {
|
|
||||||
// console.log("Twitter URL detected")
|
|
||||||
// html = parseTweet(html, this.url)
|
|
||||||
// }
|
// }
|
||||||
|
|
||||||
const htmlCompiler = compile({
|
// // else if (isTwitter(this.url)) {
|
||||||
wordwrap: false
|
// // console.log("Twitter URL detected")
|
||||||
})
|
// // html = parseTweet(html, this.url)
|
||||||
const text = htmlCompiler(html)
|
// // }
|
||||||
|
|
||||||
|
// const htmlCompiler = compile({
|
||||||
|
// wordwrap: false
|
||||||
|
// })
|
||||||
|
// const text = htmlCompiler(html)
|
||||||
const metadata = { source: this.url }
|
const metadata = { source: this.url }
|
||||||
return [new Document({ pageContent: text, metadata })]
|
return [new Document({ pageContent: this.html, metadata })]
|
||||||
}
|
}
|
||||||
|
|
||||||
async loadByURL(): Promise<Document<Record<string, any>>[]> {
|
async loadByURL(): Promise<Document<Record<string, any>>[]> {
|
||||||
|
10
src/parser/default.ts
Normal file
10
src/parser/default.ts
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
import * as cheerio from "cheerio"
|
||||||
|
import TurndownService from "turndown"
|
||||||
|
let turndownService = new TurndownService()
|
||||||
|
|
||||||
|
export const defaultExtractContent = (html: string) => {
|
||||||
|
const $ = cheerio.load(html)
|
||||||
|
const mainContent = $('[role="main"]').html() || $("main").html() || $.html()
|
||||||
|
const markdown = turndownService.turndown(mainContent)
|
||||||
|
return markdown
|
||||||
|
}
|
119
src/parser/google-docs.ts
Normal file
119
src/parser/google-docs.ts
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
|
||||||
|
export const isGoogleDocs = (url: string) => {
|
||||||
|
const GOOGLE_DOCS_REGEX = /docs\.google\.com\/document/g
|
||||||
|
return GOOGLE_DOCS_REGEX.test(url)
|
||||||
|
}
|
||||||
|
|
||||||
|
const getGoogleDocs = () => {
|
||||||
|
try {
|
||||||
|
function traverse(
|
||||||
|
obj: { [x: string]: any },
|
||||||
|
predicate: { (_: any, value: any): boolean; (arg0: any, arg1: any): any },
|
||||||
|
maxDepth: number,
|
||||||
|
propNames = Object.getOwnPropertyNames(obj)
|
||||||
|
) {
|
||||||
|
const visited = new Set()
|
||||||
|
const results = []
|
||||||
|
let iterations = 0
|
||||||
|
|
||||||
|
const traverseObj = (
|
||||||
|
name: string,
|
||||||
|
value: unknown,
|
||||||
|
path: any[],
|
||||||
|
depth = 0
|
||||||
|
) => {
|
||||||
|
iterations++
|
||||||
|
if (name === "prototype" || value instanceof Window || depth > maxDepth)
|
||||||
|
return
|
||||||
|
|
||||||
|
const currentPath = [...path, name]
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (predicate(name, value)) {
|
||||||
|
results.push({ path: currentPath, value })
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} catch (error) {}
|
||||||
|
|
||||||
|
if (value != null && !visited.has(value)) {
|
||||||
|
visited.add(value)
|
||||||
|
if (Array.isArray(value)) {
|
||||||
|
value.forEach((val, index) => {
|
||||||
|
try {
|
||||||
|
traverseObj(index.toString(), val, currentPath, depth + 1)
|
||||||
|
} catch (error) {}
|
||||||
|
})
|
||||||
|
} else if (value instanceof Object) {
|
||||||
|
const propNamesForValue =
|
||||||
|
value &&
|
||||||
|
// @ts-ignore
|
||||||
|
value.nodeType === 1 &&
|
||||||
|
// @ts-ignore
|
||||||
|
typeof value.nodeName === "string"
|
||||||
|
? Object.getOwnPropertyNames(obj)
|
||||||
|
: Object.getOwnPropertyNames(value)
|
||||||
|
|
||||||
|
propNamesForValue.forEach((prop) => {
|
||||||
|
try {
|
||||||
|
traverseObj(prop, value[prop], currentPath, depth + 1)
|
||||||
|
} catch (error) {}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
propNames.forEach((prop) => {
|
||||||
|
try {
|
||||||
|
traverseObj(prop, obj[prop], [])
|
||||||
|
} catch (error) {}
|
||||||
|
})
|
||||||
|
|
||||||
|
return { results, iterations }
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = traverse(
|
||||||
|
// @ts-ignore
|
||||||
|
window.KX_kixApp,
|
||||||
|
(_: any, value: { toString: () => string }) =>
|
||||||
|
value && "\x03" === value.toString().charAt(0),
|
||||||
|
5
|
||||||
|
)
|
||||||
|
if (result.results?.[0]?.value) {
|
||||||
|
return {
|
||||||
|
content: result.results[0].value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: null
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
content: null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const parseGoogleDocs = async () => {
|
||||||
|
const result = new Promise((resolve) => {
|
||||||
|
chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => {
|
||||||
|
const tab = tabs[0]
|
||||||
|
|
||||||
|
const data = await chrome.scripting.executeScript({
|
||||||
|
target: { tabId: tab.id },
|
||||||
|
world: "MAIN",
|
||||||
|
func: getGoogleDocs
|
||||||
|
})
|
||||||
|
|
||||||
|
if (data.length > 0) {
|
||||||
|
resolve(data[0].result)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}) as Promise<{
|
||||||
|
content?: string
|
||||||
|
}>
|
||||||
|
|
||||||
|
const { content } = await result
|
||||||
|
|
||||||
|
return content
|
||||||
|
}
|
5
src/parser/google-sheets.ts
Normal file
5
src/parser/google-sheets.ts
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
import * as cheerio from 'cheerio';
|
||||||
|
|
||||||
|
export const parseGoogleSheets = (html: string) => {
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
};
|
@ -2,89 +2,101 @@ import * as cheerio from "cheerio"
|
|||||||
|
|
||||||
export const isTweet = (url: string) => {
|
export const isTweet = (url: string) => {
|
||||||
const TWEET_REGEX = /twitter\.com\/[a-zA-Z0-9_]+\/status\/[0-9]+/g
|
const TWEET_REGEX = /twitter\.com\/[a-zA-Z0-9_]+\/status\/[0-9]+/g
|
||||||
return TWEET_REGEX.test(url)
|
const X_REGEX = /x\.com\/[a-zA-Z0-9_]+\/status\/[0-9]+/g
|
||||||
|
return TWEET_REGEX.test(url) || X_REGEX.test(url)
|
||||||
|
}
|
||||||
|
|
||||||
|
export const isTwitterTimeline = (url: string) => {
|
||||||
|
return url === "https://twitter.com/home" || url === "https://x.com/home"
|
||||||
}
|
}
|
||||||
|
|
||||||
export const isTwitterProfile = (url: string) => {
|
export const isTwitterProfile = (url: string) => {
|
||||||
const PROFILE_REGEX = /twitter\.com\/[a-zA-Z0-9_]+/g
|
const PROFILE_REGEX = /twitter\.com\/[a-zA-Z0-9_]+/g
|
||||||
return PROFILE_REGEX.test(url)
|
const X_REGEX = /x\.com\/[a-zA-Z0-9_]+/g
|
||||||
|
return PROFILE_REGEX.test(url) || X_REGEX.test(url)
|
||||||
}
|
}
|
||||||
|
|
||||||
export const isTwitterTimeline = (url: string) => {
|
export const parseTwitterTimeline = (html: string) => {
|
||||||
const TIMELINE_REGEX = /twitter\.com\/home/g
|
|
||||||
return TIMELINE_REGEX.test(url)
|
|
||||||
}
|
|
||||||
|
|
||||||
export const isTwitter = (url: string) => {
|
|
||||||
return isTweet(url) || isTwitterProfile(url) || isTwitterTimeline(url)
|
|
||||||
}
|
|
||||||
|
|
||||||
export const isTwitterNotification = (url: string) => {
|
|
||||||
const NOTIFICATION_REGEX = /twitter\.com\/notifications/g
|
|
||||||
return NOTIFICATION_REGEX.test(url)
|
|
||||||
}
|
|
||||||
|
|
||||||
export const parseTweet = (html: string, url: string) => {
|
|
||||||
if (!html) {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
const $ = cheerio.load(html)
|
const $ = cheerio.load(html)
|
||||||
|
const postElements = $("[data-testid=tweetText]")
|
||||||
|
const authorElements = $("[data-testid=User-Name]")
|
||||||
|
|
||||||
if (isTweet(url)) {
|
const posts = postElements
|
||||||
console.log("tweet")
|
.map((index, element) => {
|
||||||
const tweet = $("div[data-testid='tweet']")
|
const post = $(element).text()
|
||||||
const tweetContent = tweet.find("div[lang]")
|
const author = $(authorElements[index]).text()
|
||||||
const tweetMedia = tweet.find("div[role='group']")
|
return {
|
||||||
const author = tweet.find("a[role='link']").text()
|
author,
|
||||||
const date = tweet.find("time").text()
|
post
|
||||||
return `<div>${author} ${tweetContent.text()} ${tweetMedia.html()} ${date}</div>`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isTwitterTimeline(url)) {
|
|
||||||
console.log("timeline")
|
|
||||||
const timeline = $("div[data-testid='primaryColumn']")
|
|
||||||
const timelineContent = timeline.find("div[data-testid='tweet']")
|
|
||||||
console.log(timelineContent.html())
|
|
||||||
const tweet = timelineContent
|
|
||||||
.map((i, el) => {
|
|
||||||
const author = $(el).find("a[role='link']").text()
|
|
||||||
const content = $(el).find("div[lang]").text()
|
|
||||||
const media = $(el).find("div[role='group']").html()
|
|
||||||
const date = $(el).find("time").text()
|
|
||||||
return `<div>${author} ${content} ${media} ${date}</div>`
|
|
||||||
})
|
})
|
||||||
.get()
|
.get()
|
||||||
.join("")
|
|
||||||
console.log(tweet)
|
|
||||||
return `<div>${tweet}</div>`
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isTwitterNotification(url)) {
|
return posts
|
||||||
console.log("notification")
|
.map((post) => {
|
||||||
const notification = $("div[data-testid='primaryColumn']")
|
return `## Author: ${post.author}\n\n${post.post}\n\n---\n\n`
|
||||||
const notificationContent = notification.find("div[data-testid='tweet']")
|
|
||||||
return `<div>${notificationContent.html()}</div>`
|
|
||||||
}
|
|
||||||
if (isTwitterProfile(url)) {
|
|
||||||
console.log("profile")
|
|
||||||
const profile = $("div[data-testid='primaryColumn']")
|
|
||||||
const profileContent = profile.find(
|
|
||||||
"div[data-testid='UserProfileHeader_Items']"
|
|
||||||
)
|
|
||||||
const profileTweets = profile.find("div[data-testid='tweet']")
|
|
||||||
return `<div>${profileContent.html()}</div><div>${profileTweets.html()}</div>`
|
|
||||||
}
|
|
||||||
console.log("no match")
|
|
||||||
const timeline = $("div[data-testid='primaryColumn']")
|
|
||||||
const timelineContent = timeline.find("div[data-testid='tweet']")
|
|
||||||
const tweet = timelineContent.map((i, el) => {
|
|
||||||
const author = $(el).find("a[role='link']").text()
|
|
||||||
const content = $(el).find("div[lang]").text()
|
|
||||||
const media = $(el).find("div[role='group']").html()
|
|
||||||
const date = $(el).find("time").text()
|
|
||||||
return `<div>${author} ${content} ${media} ${date}</div>`
|
|
||||||
})
|
})
|
||||||
|
.filter((value, index, self) => self.indexOf(value) === index)
|
||||||
return `<div>${tweet}</div>`
|
.join("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
export const parseTweet = (html: string) => {
|
||||||
|
const $ = cheerio.load(html)
|
||||||
|
const postElements = $("[data-testid=tweetText]")
|
||||||
|
const authorElements = $("[data-testid=User-Name]")
|
||||||
|
|
||||||
|
const posts = postElements
|
||||||
|
.map((index, element) => {
|
||||||
|
const post = $(element).text()
|
||||||
|
const author = $(authorElements[index]).text()
|
||||||
|
return {
|
||||||
|
author,
|
||||||
|
post,
|
||||||
|
isReply: index !== 0
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.get()
|
||||||
|
|
||||||
|
return posts
|
||||||
|
.map((post) => {
|
||||||
|
return `##Author: ${post.author}\n\n${post.isReply ? "Reply:" : "Post:"} ${post.post}\n\n---\n\n`
|
||||||
|
})
|
||||||
|
.join("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
export const parseTweetProfile = (html: string) => {
|
||||||
|
const $ = cheerio.load(html)
|
||||||
|
|
||||||
|
const profileName = $("[data-testid=UserProfileHeader_Items]")
|
||||||
|
.find("h1")
|
||||||
|
.text()
|
||||||
|
const profileBio = $("[data-testid=UserProfileHeader_Items]").find("p").text()
|
||||||
|
const profileLocation = $("[data-testid=UserProfileHeader_Items]")
|
||||||
|
.find("span")
|
||||||
|
.text()
|
||||||
|
const profileJoinDate = $("[data-testid=UserProfileHeader_Items]")
|
||||||
|
.find("span")
|
||||||
|
.text()
|
||||||
|
const profileFollowers = $(
|
||||||
|
"[data-testid=UserProfileHeader_Items] span"
|
||||||
|
).text()
|
||||||
|
const profileFollowing = $(
|
||||||
|
"[data-testid=UserProfileHeader_Items] span"
|
||||||
|
).text()
|
||||||
|
|
||||||
|
const postElements = $("[data-testid=tweetText]")
|
||||||
|
const authorElements = $("[data-testid=User-Name]")
|
||||||
|
|
||||||
|
const posts = postElements
|
||||||
|
.map((index, element) => {
|
||||||
|
const post = $(element).text()
|
||||||
|
const author = $(authorElements[index]).text()
|
||||||
|
return {
|
||||||
|
author,
|
||||||
|
post
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.get()
|
||||||
|
|
||||||
|
return `## Profile: ${profileName}\n\nBio: ${profileBio}\n\nLocation: ${profileLocation}\n\nJoin Date: ${profileJoinDate}\n\nFollowers: ${profileFollowers}\n\nFollowing: ${profileFollowing}\n\nPosts: ${posts.map((post) => `Author: ${post.author}\n\nPost: ${post.post}\n\n---\n\n`).join("\n")}`
|
||||||
}
|
}
|
||||||
|
4
src/utils/clean.ts
Normal file
4
src/utils/clean.ts
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
export const cleanUnwantedUnicode = (text: string) => {
|
||||||
|
const UNICODE_REGEX = /[\u200B-\u200D\uFEFF]/g
|
||||||
|
return text.replace(UNICODE_REGEX, "").trim()
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user