Update dependencies and fix whitespace formatting in isTTSEnabled function in tts.ts
This commit is contained in:
@@ -1,17 +1,25 @@
|
||||
import { defaultExtractContent } from "@/parser/default"
|
||||
import { getPdf } from "./pdf"
|
||||
import {
|
||||
isTweet,
|
||||
isTwitterTimeline,
|
||||
parseTweet,
|
||||
parseTwitterTimeline,
|
||||
} from "@/parser/twitter"
|
||||
import { isGoogleDocs, parseGoogleDocs } from "@/parser/google-docs"
|
||||
import { cleanUnwantedUnicode } from "@/utils/clean"
|
||||
|
||||
const _getHtml = async () => {
|
||||
const _getHtml = () => {
|
||||
const url = window.location.href
|
||||
if (document.contentType === "application/pdf") {
|
||||
return { url, content: "", type: "pdf" }
|
||||
}
|
||||
const html = Array.from(document.querySelectorAll("script")).reduce(
|
||||
(acc, script) => {
|
||||
return acc.replace(script.outerHTML, "")
|
||||
},
|
||||
document.documentElement.outerHTML
|
||||
)
|
||||
return { url, content: html, type: "html" }
|
||||
|
||||
return {
|
||||
content: document.documentElement.outerHTML,
|
||||
url,
|
||||
type: "html"
|
||||
}
|
||||
}
|
||||
|
||||
export const getDataFromCurrentTab = async () => {
|
||||
@@ -34,7 +42,6 @@ export const getDataFromCurrentTab = async () => {
|
||||
type: string
|
||||
}>
|
||||
|
||||
|
||||
const { content, type, url } = await result
|
||||
|
||||
if (type === "pdf") {
|
||||
@@ -47,31 +54,58 @@ export const getDataFromCurrentTab = async () => {
|
||||
const pdf = await getPdf(data)
|
||||
|
||||
for (let i = 1; i <= pdf.numPages; i += 1) {
|
||||
const page = await pdf.getPage(i);
|
||||
const content = await page.getTextContent();
|
||||
const page = await pdf.getPage(i)
|
||||
const content = await page.getTextContent()
|
||||
|
||||
if (content?.items.length === 0) {
|
||||
continue;
|
||||
continue
|
||||
}
|
||||
|
||||
const text = content?.items.map((item: any) => item.str).join("\n")
|
||||
.replace(/\x00/g, "").trim();
|
||||
const text = content?.items
|
||||
.map((item: any) => item.str)
|
||||
.join("\n")
|
||||
.replace(/\x00/g, "")
|
||||
.trim()
|
||||
pdfHtml.push({
|
||||
content: text,
|
||||
page: i
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
return {
|
||||
url,
|
||||
content: "",
|
||||
pdf: pdfHtml,
|
||||
type: "pdf"
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return { url, content, type, pdf: [] }
|
||||
if (isTwitterTimeline(url)) {
|
||||
const data = parseTwitterTimeline(content)
|
||||
return {
|
||||
url,
|
||||
content: data,
|
||||
type: "html",
|
||||
pdf: []
|
||||
}
|
||||
} else if (isTweet(url)) {
|
||||
const data = parseTweet(content)
|
||||
return {
|
||||
url,
|
||||
content: data,
|
||||
type: "html",
|
||||
pdf: []
|
||||
}
|
||||
} else if (isGoogleDocs(url)) {
|
||||
const data = await parseGoogleDocs()
|
||||
if (data) {
|
||||
return {
|
||||
url,
|
||||
content: cleanUnwantedUnicode(data),
|
||||
type: "html",
|
||||
pdf: []
|
||||
}
|
||||
}
|
||||
}
|
||||
const data = defaultExtractContent(content)
|
||||
return { url, content: data, type, pdf: [] }
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user