Update dependencies and fix whitespace formatting in isTTSEnabled function in tts.ts

This commit is contained in:
n4ze3m
2024-04-15 11:32:30 +05:30
parent 476323d928
commit c914233610
11 changed files with 302 additions and 115 deletions

View File

@@ -1,17 +1,25 @@
import { defaultExtractContent } from "@/parser/default"
import { getPdf } from "./pdf"
import {
isTweet,
isTwitterTimeline,
parseTweet,
parseTwitterTimeline,
} from "@/parser/twitter"
import { isGoogleDocs, parseGoogleDocs } from "@/parser/google-docs"
import { cleanUnwantedUnicode } from "@/utils/clean"
const _getHtml = async () => {
const _getHtml = () => {
const url = window.location.href
if (document.contentType === "application/pdf") {
return { url, content: "", type: "pdf" }
}
const html = Array.from(document.querySelectorAll("script")).reduce(
(acc, script) => {
return acc.replace(script.outerHTML, "")
},
document.documentElement.outerHTML
)
return { url, content: html, type: "html" }
return {
content: document.documentElement.outerHTML,
url,
type: "html"
}
}
export const getDataFromCurrentTab = async () => {
@@ -34,7 +42,6 @@ export const getDataFromCurrentTab = async () => {
type: string
}>
const { content, type, url } = await result
if (type === "pdf") {
@@ -47,31 +54,58 @@ export const getDataFromCurrentTab = async () => {
const pdf = await getPdf(data)
for (let i = 1; i <= pdf.numPages; i += 1) {
const page = await pdf.getPage(i);
const content = await page.getTextContent();
const page = await pdf.getPage(i)
const content = await page.getTextContent()
if (content?.items.length === 0) {
continue;
continue
}
const text = content?.items.map((item: any) => item.str).join("\n")
.replace(/\x00/g, "").trim();
const text = content?.items
.map((item: any) => item.str)
.join("\n")
.replace(/\x00/g, "")
.trim()
pdfHtml.push({
content: text,
page: i
})
}
return {
url,
content: "",
pdf: pdfHtml,
type: "pdf"
}
}
return { url, content, type, pdf: [] }
if (isTwitterTimeline(url)) {
const data = parseTwitterTimeline(content)
return {
url,
content: data,
type: "html",
pdf: []
}
} else if (isTweet(url)) {
const data = parseTweet(content)
return {
url,
content: data,
type: "html",
pdf: []
}
} else if (isGoogleDocs(url)) {
const data = await parseGoogleDocs()
if (data) {
return {
url,
content: cleanUnwantedUnicode(data),
type: "html",
pdf: []
}
}
}
const data = defaultExtractContent(content)
return { url, content: data, type, pdf: [] }
}