feat: Add support for Brave search provider
This commit is contained in:
parent
11f5eba3ed
commit
845b725970
@ -10,5 +10,9 @@ export const SUPPORTED_SERACH_PROVIDERS = [
|
||||
{
|
||||
label: "Sogou",
|
||||
value: "sogou"
|
||||
},
|
||||
{
|
||||
label: "Brave",
|
||||
value: "brave"
|
||||
}
|
||||
]
|
112
src/web/search-engines/brave.ts
Normal file
112
src/web/search-engines/brave.ts
Normal file
@ -0,0 +1,112 @@
|
||||
import { cleanUrl } from "@/libs/clean-url"
|
||||
import { urlRewriteRuntime } from "@/libs/runtime"
|
||||
import { PageAssistHtmlLoader } from "@/loader/html"
|
||||
import {
|
||||
defaultEmbeddingChunkOverlap,
|
||||
defaultEmbeddingChunkSize,
|
||||
defaultEmbeddingModelForRag,
|
||||
getOllamaURL
|
||||
} from "@/services/ollama"
|
||||
import {
|
||||
getIsSimpleInternetSearch,
|
||||
totalSearchResults
|
||||
} from "@/services/search"
|
||||
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama"
|
||||
import type { Document } from "@langchain/core/documents"
|
||||
import * as cheerio from "cheerio"
|
||||
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
||||
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
||||
|
||||
export const localBraveSearch = async (query: string) => {
|
||||
await urlRewriteRuntime(cleanUrl("https://search.brave.com/search?q=" + query), "duckduckgo")
|
||||
|
||||
const abortController = new AbortController()
|
||||
setTimeout(() => abortController.abort(), 10000)
|
||||
|
||||
const htmlString = await fetch(
|
||||
"https://search.brave.com/search?q=" + query,
|
||||
{
|
||||
signal: abortController.signal
|
||||
}
|
||||
)
|
||||
.then((response) => response.text())
|
||||
.catch()
|
||||
|
||||
const $ = cheerio.load(htmlString)
|
||||
const $results = $("div#results")
|
||||
const $snippets = $results.find("div.snippet")
|
||||
|
||||
const searchResults = Array.from($snippets).map((result) => {
|
||||
const link = $(result).find("a").attr("href")
|
||||
const title = $(result).find("div.title").text()
|
||||
const content = $(result).find("div.snippet-description").text()
|
||||
return { title, link, content }
|
||||
}).filter((result) => result.link && result.title && result.content)
|
||||
|
||||
console.log(searchResults)
|
||||
|
||||
return searchResults
|
||||
}
|
||||
|
||||
export const webBraveSearch = async (query: string) => {
|
||||
const results = await localBraveSearch(query)
|
||||
const TOTAL_SEARCH_RESULTS = await totalSearchResults()
|
||||
const searchResults = results.slice(0, TOTAL_SEARCH_RESULTS)
|
||||
|
||||
const isSimpleMode = await getIsSimpleInternetSearch()
|
||||
|
||||
if (isSimpleMode) {
|
||||
await getOllamaURL()
|
||||
return searchResults.map((result) => {
|
||||
return {
|
||||
url: result.link,
|
||||
content: result.content
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
const docs: Document<Record<string, any>>[] = []
|
||||
for (const result of searchResults) {
|
||||
const loader = new PageAssistHtmlLoader({
|
||||
html: "",
|
||||
url: result.link
|
||||
})
|
||||
|
||||
const documents = await loader.loadByURL()
|
||||
|
||||
documents.forEach((doc) => {
|
||||
docs.push(doc)
|
||||
})
|
||||
}
|
||||
const ollamaUrl = await getOllamaURL()
|
||||
|
||||
const embeddingModle = await defaultEmbeddingModelForRag()
|
||||
const ollamaEmbedding = new OllamaEmbeddings({
|
||||
model: embeddingModle || "",
|
||||
baseUrl: cleanUrl(ollamaUrl)
|
||||
})
|
||||
|
||||
const chunkSize = await defaultEmbeddingChunkSize()
|
||||
const chunkOverlap = await defaultEmbeddingChunkOverlap()
|
||||
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize,
|
||||
chunkOverlap
|
||||
})
|
||||
|
||||
const chunks = await textSplitter.splitDocuments(docs)
|
||||
|
||||
const store = new MemoryVectorStore(ollamaEmbedding)
|
||||
|
||||
await store.addDocuments(chunks)
|
||||
|
||||
const resultsWithEmbeddings = await store.similaritySearch(query, 3)
|
||||
|
||||
const searchResult = resultsWithEmbeddings.map((result) => {
|
||||
return {
|
||||
url: result.metadata.url,
|
||||
content: result.pageContent
|
||||
}
|
||||
})
|
||||
|
||||
return searchResult
|
||||
}
|
@ -3,6 +3,7 @@ import { webGoogleSearch } from "./search-engines/google"
|
||||
import { webDuckDuckGoSearch } from "./search-engines/duckduckgo"
|
||||
import { getSearchProvider } from "@/services/search"
|
||||
import { webSogouSearch } from "./search-engines/sogou"
|
||||
import { webBraveSearch } from "./search-engines/brave"
|
||||
|
||||
const getHostName = (url: string) => {
|
||||
try {
|
||||
@ -19,6 +20,8 @@ const searchWeb = (provider: string, query: string) => {
|
||||
return webDuckDuckGoSearch(query)
|
||||
case "sogou":
|
||||
return webSogouSearch(query)
|
||||
case "brave":
|
||||
return webBraveSearch(query)
|
||||
default:
|
||||
return webGoogleSearch(query)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user