feat: Add support for Brave search provider
This commit is contained in:
		
							parent
							
								
									11f5eba3ed
								
							
						
					
					
						commit
						845b725970
					
				| @ -10,5 +10,9 @@ export const SUPPORTED_SERACH_PROVIDERS = [ | |||||||
|     { |     { | ||||||
|         label: "Sogou", |         label: "Sogou", | ||||||
|         value: "sogou" |         value: "sogou" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |         label: "Brave", | ||||||
|  |         value: "brave" | ||||||
|     } |     } | ||||||
| ] | ] | ||||||
							
								
								
									
										112
									
								
								src/web/search-engines/brave.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										112
									
								
								src/web/search-engines/brave.ts
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,112 @@ | |||||||
|  | import { cleanUrl } from "@/libs/clean-url" | ||||||
|  | import { urlRewriteRuntime } from "@/libs/runtime" | ||||||
|  | import { PageAssistHtmlLoader } from "@/loader/html" | ||||||
|  | import { | ||||||
|  |     defaultEmbeddingChunkOverlap, | ||||||
|  |     defaultEmbeddingChunkSize, | ||||||
|  |     defaultEmbeddingModelForRag, | ||||||
|  |     getOllamaURL | ||||||
|  | } from "@/services/ollama" | ||||||
|  | import { | ||||||
|  |     getIsSimpleInternetSearch, | ||||||
|  |     totalSearchResults | ||||||
|  | } from "@/services/search" | ||||||
|  | import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama" | ||||||
|  | import type { Document } from "@langchain/core/documents" | ||||||
|  | import * as cheerio from "cheerio" | ||||||
|  | import { RecursiveCharacterTextSplitter } from "langchain/text_splitter" | ||||||
|  | import { MemoryVectorStore } from "langchain/vectorstores/memory" | ||||||
|  | 
 | ||||||
|  | export const localBraveSearch = async (query: string) => { | ||||||
|  |     await urlRewriteRuntime(cleanUrl("https://search.brave.com/search?q=" + query), "duckduckgo") | ||||||
|  | 
 | ||||||
|  |     const abortController = new AbortController() | ||||||
|  |     setTimeout(() => abortController.abort(), 10000) | ||||||
|  | 
 | ||||||
|  |     const htmlString = await fetch( | ||||||
|  |         "https://search.brave.com/search?q=" + query, | ||||||
|  |         { | ||||||
|  |             signal: abortController.signal | ||||||
|  |         } | ||||||
|  |     ) | ||||||
|  |         .then((response) => response.text()) | ||||||
|  |         .catch() | ||||||
|  | 
 | ||||||
|  |     const $ = cheerio.load(htmlString) | ||||||
|  |     const $results = $("div#results") | ||||||
|  |     const $snippets = $results.find("div.snippet") | ||||||
|  | 
 | ||||||
|  |     const searchResults = Array.from($snippets).map((result) => { | ||||||
|  |         const link = $(result).find("a").attr("href") | ||||||
|  |         const title = $(result).find("div.title").text() | ||||||
|  |         const content = $(result).find("div.snippet-description").text() | ||||||
|  |         return { title, link, content } | ||||||
|  |     }).filter((result) => result.link && result.title && result.content) | ||||||
|  | 
 | ||||||
|  |     console.log(searchResults) | ||||||
|  | 
 | ||||||
|  |     return searchResults | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | export const webBraveSearch = async (query: string) => { | ||||||
|  |     const results = await localBraveSearch(query) | ||||||
|  |     const TOTAL_SEARCH_RESULTS = await totalSearchResults() | ||||||
|  |     const searchResults = results.slice(0, TOTAL_SEARCH_RESULTS) | ||||||
|  | 
 | ||||||
|  |     const isSimpleMode = await getIsSimpleInternetSearch() | ||||||
|  | 
 | ||||||
|  |     if (isSimpleMode) { | ||||||
|  |         await getOllamaURL() | ||||||
|  |         return searchResults.map((result) => { | ||||||
|  |             return { | ||||||
|  |                 url: result.link, | ||||||
|  |                 content: result.content | ||||||
|  |             } | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const docs: Document<Record<string, any>>[] = [] | ||||||
|  |     for (const result of searchResults) { | ||||||
|  |         const loader = new PageAssistHtmlLoader({ | ||||||
|  |             html: "", | ||||||
|  |             url: result.link | ||||||
|  |         }) | ||||||
|  | 
 | ||||||
|  |         const documents = await loader.loadByURL() | ||||||
|  | 
 | ||||||
|  |         documents.forEach((doc) => { | ||||||
|  |             docs.push(doc) | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |     const ollamaUrl = await getOllamaURL() | ||||||
|  | 
 | ||||||
|  |     const embeddingModle = await defaultEmbeddingModelForRag() | ||||||
|  |     const ollamaEmbedding = new OllamaEmbeddings({ | ||||||
|  |         model: embeddingModle || "", | ||||||
|  |         baseUrl: cleanUrl(ollamaUrl) | ||||||
|  |     }) | ||||||
|  | 
 | ||||||
|  |     const chunkSize = await defaultEmbeddingChunkSize() | ||||||
|  |     const chunkOverlap = await defaultEmbeddingChunkOverlap() | ||||||
|  |     const textSplitter = new RecursiveCharacterTextSplitter({ | ||||||
|  |         chunkSize, | ||||||
|  |         chunkOverlap | ||||||
|  |     }) | ||||||
|  | 
 | ||||||
|  |     const chunks = await textSplitter.splitDocuments(docs) | ||||||
|  | 
 | ||||||
|  |     const store = new MemoryVectorStore(ollamaEmbedding) | ||||||
|  | 
 | ||||||
|  |     await store.addDocuments(chunks) | ||||||
|  | 
 | ||||||
|  |     const resultsWithEmbeddings = await store.similaritySearch(query, 3) | ||||||
|  | 
 | ||||||
|  |     const searchResult = resultsWithEmbeddings.map((result) => { | ||||||
|  |         return { | ||||||
|  |             url: result.metadata.url, | ||||||
|  |             content: result.pageContent | ||||||
|  |         } | ||||||
|  |     }) | ||||||
|  | 
 | ||||||
|  |     return searchResult | ||||||
|  | } | ||||||
| @ -3,6 +3,7 @@ import { webGoogleSearch } from "./search-engines/google" | |||||||
| import { webDuckDuckGoSearch } from "./search-engines/duckduckgo" | import { webDuckDuckGoSearch } from "./search-engines/duckduckgo" | ||||||
| import { getSearchProvider } from "@/services/search" | import { getSearchProvider } from "@/services/search" | ||||||
| import { webSogouSearch } from "./search-engines/sogou" | import { webSogouSearch } from "./search-engines/sogou" | ||||||
|  | import { webBraveSearch } from "./search-engines/brave" | ||||||
| 
 | 
 | ||||||
| const getHostName = (url: string) => { | const getHostName = (url: string) => { | ||||||
|   try { |   try { | ||||||
| @ -19,6 +20,8 @@ const searchWeb = (provider: string, query: string) => { | |||||||
|       return webDuckDuckGoSearch(query) |       return webDuckDuckGoSearch(query) | ||||||
|     case "sogou": |     case "sogou": | ||||||
|       return webSogouSearch(query) |       return webSogouSearch(query) | ||||||
|  |     case "brave": | ||||||
|  |       return webBraveSearch(query) | ||||||
|     default: |     default: | ||||||
|       return webGoogleSearch(query) |       return webGoogleSearch(query) | ||||||
|   } |   } | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user