feat: Add @mozilla/readability dependency for extracting content from web pages

This commit is contained in:
n4ze3m
2024-06-22 00:25:12 +05:30
parent 56cea30058
commit d23b70b979
7 changed files with 136 additions and 3 deletions

19
src/parser/reader.ts Normal file
View File

@@ -0,0 +1,19 @@
import { Readability } from "@mozilla/readability"
import { defaultExtractContent } from "./default"
export const extractReadabilityContent = async (url: string) => {
const response = await fetch(url)
if (!response.ok) {
throw new Error(`Failed to fetch ${url}`)
}
const html = await response.text()
// create a fake dom for Readability
const doc = new DOMParser().parseFromString(html, "text/html")
const reader = new Readability(doc)
const article = reader.parse()
// convert the article to markdown
const markdown = defaultExtractContent(article.content)
return markdown
}