feat: Add Baidu search engine

This commit is contained in:
n4ze3m 2025-02-01 11:22:12 +05:30
parent 2d1e465582
commit 342d544e30
7 changed files with 114 additions and 10 deletions

View File

@ -54,7 +54,7 @@ export const PlaygroundMessage = (props: Props) => {
return ( return (
<div className="group w-full text-gray-800 dark:text-gray-100"> <div className="group w-full text-gray-800 dark:text-gray-100">
<div className="text-base md:max-w-2xl lg:max-w-xl xl:max-w-3xl flex lg:px-0 m-auto w-full"> <div className="text-base md:max-w-2xl lg:max-w-xl xl:max-w-3xl flex lg:px-0 m-auto w-full">
<div className="flex flex-row gap-4 md:gap-6 p-4 md:py-6 lg:px-0 m-auto w-full"> <div className="flex flex-row gap-4 md:gap-6 p-4 m-auto w-full">
<div className="w-8 flex flex-col relative items-end"> <div className="w-8 flex flex-col relative items-end">
<div className="relative h-7 w-7 p-1 rounded-sm text-white flex items-center justify-center text-opacity-100r"> <div className="relative h-7 w-7 p-1 rounded-sm text-white flex items-center justify-center text-opacity-100r">
{props.isBot ? ( {props.isBot ? (
@ -150,7 +150,6 @@ export const PlaygroundMessage = (props: Props) => {
</div> </div>
{/* source if available */} {/* source if available */}
{props.images && {props.images &&
props.images &&
props.images.filter((img) => img.length > 0).length > 0 && ( props.images.filter((img) => img.length > 0).length > 0 && (
<div className="flex md:max-w-2xl lg:max-w-xl xl:max-w-3xl mt-4 m-auto w-full"> <div className="flex md:max-w-2xl lg:max-w-xl xl:max-w-3xl mt-4 m-auto w-full">
{props.images {props.images

View File

@ -11,6 +11,10 @@ export const SUPPORTED_SERACH_PROVIDERS = [
label: "Sogou", label: "Sogou",
value: "sogou" value: "sogou"
}, },
{
label: "Baidu",
value: "baidu"
},
{ {
label: "Brave", label: "Brave",
value: "brave" value: "brave"

View File

@ -0,0 +1,105 @@
import { cleanUrl } from "@/libs/clean-url"
import { PageAssistHtmlLoader } from "@/loader/html"
import { pageAssistEmbeddingModel } from "@/models/embedding"
import {
defaultEmbeddingModelForRag,
getOllamaURL
} from "@/services/ollama"
import {
getIsSimpleInternetSearch,
totalSearchResults
} from "@/services/search"
import { getPageAssistTextSplitter } from "@/utils/text-splitter"
import type { Document } from "@langchain/core/documents"
import { MemoryVectorStore } from "langchain/vectorstores/memory"
export const localBaiduSearch = async (query: string) => {
const TOTAL_SEARCH_RESULTS = await totalSearchResults()
const abortController = new AbortController()
setTimeout(() => abortController.abort(), 10000)
const jsonRes = await fetch(
"https://www.baidu.com/s?wd=" + encodeURIComponent(query) + "&tn=json&rn=" + TOTAL_SEARCH_RESULTS,
{
signal: abortController.signal
}
)
.then((response) => response.json())
.catch((e) => {
console.log(e)
return {
feed: {
entry: []
}
}
})
const data = jsonRes?.feed?.entry || []
const searchResults = data.map((result: any) => {
const title = result?.title || ""
const link = result?.url
const content = result?.abs || ""
return { title, link, content }
})
return searchResults.filter((result) => result?.link)
}
export const webBaiduSearch = async (query: string) => {
const searchResults = await localBaiduSearch(query)
const isSimpleMode = await getIsSimpleInternetSearch()
if (isSimpleMode) {
await getOllamaURL()
return searchResults.map((result) => {
return {
url: result.link,
content: result.content
}
})
}
const docs: Document<Record<string, any>>[] = []
for (const result of searchResults) {
const loader = new PageAssistHtmlLoader({
html: "",
url: result.link
})
const documents = await loader.loadByURL()
documents.forEach((doc) => {
docs.push(doc)
})
}
const ollamaUrl = await getOllamaURL()
const embeddingModle = await defaultEmbeddingModelForRag()
const ollamaEmbedding = await pageAssistEmbeddingModel({
model: embeddingModle || "",
baseUrl: cleanUrl(ollamaUrl)
})
const textSplitter = await getPageAssistTextSplitter()
const chunks = await textSplitter.splitDocuments(docs)
const store = new MemoryVectorStore(ollamaEmbedding)
await store.addDocuments(chunks)
const resultsWithEmbeddings = await store.similaritySearch(query, 3)
const searchResult = resultsWithEmbeddings.map((result) => {
return {
url: result.metadata.url,
content: result.pageContent
}
})
return searchResult
}

View File

@ -1,5 +1,4 @@
import { cleanUrl } from "@/libs/clean-url" import { cleanUrl } from "@/libs/clean-url"
import { urlRewriteRuntime } from "@/libs/runtime"
import { PageAssistHtmlLoader } from "@/loader/html" import { PageAssistHtmlLoader } from "@/loader/html"
import { pageAssistEmbeddingModel } from "@/models/embedding" import { pageAssistEmbeddingModel } from "@/models/embedding"
import { import {
@ -16,7 +15,6 @@ import * as cheerio from "cheerio"
import { MemoryVectorStore } from "langchain/vectorstores/memory" import { MemoryVectorStore } from "langchain/vectorstores/memory"
export const localDuckDuckGoSearch = async (query: string) => { export const localDuckDuckGoSearch = async (query: string) => {
await urlRewriteRuntime(cleanUrl("https://html.duckduckgo.com/html/?q=" + query), "duckduckgo")
const abortController = new AbortController() const abortController = new AbortController()
setTimeout(() => abortController.abort(), 10000) setTimeout(() => abortController.abort(), 10000)

View File

@ -8,7 +8,6 @@ import { getPageAssistTextSplitter } from "@/utils/text-splitter"
import type { Document } from "@langchain/core/documents" import type { Document } from "@langchain/core/documents"
import { MemoryVectorStore } from "langchain/vectorstores/memory" import { MemoryVectorStore } from "langchain/vectorstores/memory"
import { cleanUrl } from "~/libs/clean-url" import { cleanUrl } from "~/libs/clean-url"
import { urlRewriteRuntime } from "~/libs/runtime"
import { PageAssistHtmlLoader } from "~/loader/html" import { PageAssistHtmlLoader } from "~/loader/html"
import { import {
defaultEmbeddingModelForRag, defaultEmbeddingModelForRag,
@ -18,10 +17,6 @@ import {
export const localGoogleSearch = async (query: string) => { export const localGoogleSearch = async (query: string) => {
const baseGoogleDomain = await getGoogleDomain() const baseGoogleDomain = await getGoogleDomain()
await urlRewriteRuntime(
cleanUrl(`https://www.${baseGoogleDomain}/search?hl=en&q=` + query),
"google"
)
const abortController = new AbortController() const abortController = new AbortController()
setTimeout(() => abortController.abort(), 10000) setTimeout(() => abortController.abort(), 10000)

View File

@ -7,6 +7,7 @@ import { webBraveSearch } from "./search-engines/brave"
import { getWebsiteFromQuery, processSingleWebsite } from "./website" import { getWebsiteFromQuery, processSingleWebsite } from "./website"
import { searxngSearch } from "./search-engines/searxng" import { searxngSearch } from "./search-engines/searxng"
import { braveAPISearch } from "./search-engines/brave-api" import { braveAPISearch } from "./search-engines/brave-api"
import { webBaiduSearch } from "./search-engines/baidu"
const getHostName = (url: string) => { const getHostName = (url: string) => {
try { try {
@ -29,6 +30,8 @@ const searchWeb = (provider: string, query: string) => {
return searxngSearch(query) return searxngSearch(query)
case "brave-api": case "brave-api":
return braveAPISearch(query) return braveAPISearch(query)
case "baidu":
return webBaiduSearch(query)
default: default:
return webGoogleSearch(query) return webGoogleSearch(query)
} }

View File

@ -51,7 +51,7 @@ export default defineConfig({
outDir: "build", outDir: "build",
manifest: { manifest: {
version: "1.4.4", version: "1.4.5",
name: name:
process.env.TARGET === "firefox" process.env.TARGET === "firefox"
? "Page Assist - A Web UI for Local AI Models" ? "Page Assist - A Web UI for Local AI Models"