diff --git a/src/utils/search-provider.ts b/src/utils/search-provider.ts index 5fb35e6..d623df2 100644 --- a/src/utils/search-provider.ts +++ b/src/utils/search-provider.ts @@ -10,5 +10,9 @@ export const SUPPORTED_SERACH_PROVIDERS = [ { label: "Sogou", value: "sogou" + }, + { + label: "Brave", + value: "brave" } ] \ No newline at end of file diff --git a/src/web/search-engines/brave.ts b/src/web/search-engines/brave.ts new file mode 100644 index 0000000..b387fbf --- /dev/null +++ b/src/web/search-engines/brave.ts @@ -0,0 +1,112 @@ +import { cleanUrl } from "@/libs/clean-url" +import { urlRewriteRuntime } from "@/libs/runtime" +import { PageAssistHtmlLoader } from "@/loader/html" +import { + defaultEmbeddingChunkOverlap, + defaultEmbeddingChunkSize, + defaultEmbeddingModelForRag, + getOllamaURL +} from "@/services/ollama" +import { + getIsSimpleInternetSearch, + totalSearchResults +} from "@/services/search" +import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama" +import type { Document } from "@langchain/core/documents" +import * as cheerio from "cheerio" +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter" +import { MemoryVectorStore } from "langchain/vectorstores/memory" + +export const localBraveSearch = async (query: string) => { + await urlRewriteRuntime(cleanUrl("https://search.brave.com/search?q=" + query), "duckduckgo") + + const abortController = new AbortController() + setTimeout(() => abortController.abort(), 10000) + + const htmlString = await fetch( + "https://search.brave.com/search?q=" + query, + { + signal: abortController.signal + } + ) + .then((response) => response.text()) + .catch() + + const $ = cheerio.load(htmlString) + const $results = $("div#results") + const $snippets = $results.find("div.snippet") + + const searchResults = Array.from($snippets).map((result) => { + const link = $(result).find("a").attr("href") + const title = $(result).find("div.title").text() + const content = $(result).find("div.snippet-description").text() + return { title, link, content } + }).filter((result) => result.link && result.title && result.content) + + console.log(searchResults) + + return searchResults +} + +export const webBraveSearch = async (query: string) => { + const results = await localBraveSearch(query) + const TOTAL_SEARCH_RESULTS = await totalSearchResults() + const searchResults = results.slice(0, TOTAL_SEARCH_RESULTS) + + const isSimpleMode = await getIsSimpleInternetSearch() + + if (isSimpleMode) { + await getOllamaURL() + return searchResults.map((result) => { + return { + url: result.link, + content: result.content + } + }) + } + + const docs: Document>[] = [] + for (const result of searchResults) { + const loader = new PageAssistHtmlLoader({ + html: "", + url: result.link + }) + + const documents = await loader.loadByURL() + + documents.forEach((doc) => { + docs.push(doc) + }) + } + const ollamaUrl = await getOllamaURL() + + const embeddingModle = await defaultEmbeddingModelForRag() + const ollamaEmbedding = new OllamaEmbeddings({ + model: embeddingModle || "", + baseUrl: cleanUrl(ollamaUrl) + }) + + const chunkSize = await defaultEmbeddingChunkSize() + const chunkOverlap = await defaultEmbeddingChunkOverlap() + const textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize, + chunkOverlap + }) + + const chunks = await textSplitter.splitDocuments(docs) + + const store = new MemoryVectorStore(ollamaEmbedding) + + await store.addDocuments(chunks) + + const resultsWithEmbeddings = await store.similaritySearch(query, 3) + + const searchResult = resultsWithEmbeddings.map((result) => { + return { + url: result.metadata.url, + content: result.pageContent + } + }) + + return searchResult +} diff --git a/src/web/web.ts b/src/web/web.ts index cfadf09..e9c1765 100644 --- a/src/web/web.ts +++ b/src/web/web.ts @@ -3,6 +3,7 @@ import { webGoogleSearch } from "./search-engines/google" import { webDuckDuckGoSearch } from "./search-engines/duckduckgo" import { getSearchProvider } from "@/services/search" import { webSogouSearch } from "./search-engines/sogou" +import { webBraveSearch } from "./search-engines/brave" const getHostName = (url: string) => { try { @@ -19,6 +20,8 @@ const searchWeb = (provider: string, query: string) => { return webDuckDuckGoSearch(query) case "sogou": return webSogouSearch(query) + case "brave": + return webBraveSearch(query) default: return webGoogleSearch(query) }