Add dependencies and update code for PDF parsing and searching

This commit is contained in:
n4ze3m
2024-02-25 00:12:46 +05:30
parent f87953ba5c
commit 06b32176a9
22 changed files with 577 additions and 249 deletions

45
src/web/local-google.ts Normal file
View File

@@ -0,0 +1,45 @@
import { cleanUrl } from "~libs/clean-url"
import { chromeRunTime } from "~libs/runtime"
const BLOCKED_HOSTS = [
"google.com",
"youtube.com",
"twitter.com",
]
export const localGoogleSearch = async (query: string) => {
await chromeRunTime(
cleanUrl("https://www.google.com/search?hl=en&q=" + query)
)
const abortController = new AbortController()
setTimeout(() => abortController.abort(), 10000)
const htmlString = await fetch(
"https://www.google.com/search?hl=en&q=" + query,
{
signal: abortController.signal
}
)
.then((response) => response.text())
.catch()
const parser = new DOMParser()
const doc = parser.parseFromString(htmlString, "text/html")
const searchResults = Array.from(doc.querySelectorAll("div.g")).map(
(result) => {
const title = result.querySelector("h3")?.textContent
const link = result.querySelector("a")?.getAttribute("href")
return { title, link }
}
)
const filteredSearchResults = searchResults
.filter(
(result) =>
!result.link ||
!BLOCKED_HOSTS.some((host) => result.link.includes(host))
)
.filter((result) => result.title && result.link)
return filteredSearchResults
}