feat: Add Baidu search engine
This commit is contained in:
parent
2d1e465582
commit
342d544e30
@ -54,7 +54,7 @@ export const PlaygroundMessage = (props: Props) => {
|
||||
return (
|
||||
<div className="group w-full text-gray-800 dark:text-gray-100">
|
||||
<div className="text-base md:max-w-2xl lg:max-w-xl xl:max-w-3xl flex lg:px-0 m-auto w-full">
|
||||
<div className="flex flex-row gap-4 md:gap-6 p-4 md:py-6 lg:px-0 m-auto w-full">
|
||||
<div className="flex flex-row gap-4 md:gap-6 p-4 m-auto w-full">
|
||||
<div className="w-8 flex flex-col relative items-end">
|
||||
<div className="relative h-7 w-7 p-1 rounded-sm text-white flex items-center justify-center text-opacity-100r">
|
||||
{props.isBot ? (
|
||||
@ -150,7 +150,6 @@ export const PlaygroundMessage = (props: Props) => {
|
||||
</div>
|
||||
{/* source if available */}
|
||||
{props.images &&
|
||||
props.images &&
|
||||
props.images.filter((img) => img.length > 0).length > 0 && (
|
||||
<div className="flex md:max-w-2xl lg:max-w-xl xl:max-w-3xl mt-4 m-auto w-full">
|
||||
{props.images
|
||||
|
@ -11,6 +11,10 @@ export const SUPPORTED_SERACH_PROVIDERS = [
|
||||
label: "Sogou",
|
||||
value: "sogou"
|
||||
},
|
||||
{
|
||||
label: "Baidu",
|
||||
value: "baidu"
|
||||
},
|
||||
{
|
||||
label: "Brave",
|
||||
value: "brave"
|
||||
|
105
src/web/search-engines/baidu.ts
Normal file
105
src/web/search-engines/baidu.ts
Normal file
@ -0,0 +1,105 @@
|
||||
import { cleanUrl } from "@/libs/clean-url"
|
||||
import { PageAssistHtmlLoader } from "@/loader/html"
|
||||
import { pageAssistEmbeddingModel } from "@/models/embedding"
|
||||
import {
|
||||
defaultEmbeddingModelForRag,
|
||||
getOllamaURL
|
||||
} from "@/services/ollama"
|
||||
import {
|
||||
getIsSimpleInternetSearch,
|
||||
totalSearchResults
|
||||
} from "@/services/search"
|
||||
import { getPageAssistTextSplitter } from "@/utils/text-splitter"
|
||||
import type { Document } from "@langchain/core/documents"
|
||||
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
||||
|
||||
export const localBaiduSearch = async (query: string) => {
|
||||
const TOTAL_SEARCH_RESULTS = await totalSearchResults()
|
||||
|
||||
const abortController = new AbortController()
|
||||
setTimeout(() => abortController.abort(), 10000)
|
||||
|
||||
const jsonRes = await fetch(
|
||||
"https://www.baidu.com/s?wd=" + encodeURIComponent(query) + "&tn=json&rn=" + TOTAL_SEARCH_RESULTS,
|
||||
{
|
||||
signal: abortController.signal
|
||||
}
|
||||
)
|
||||
.then((response) => response.json())
|
||||
.catch((e) => {
|
||||
console.log(e)
|
||||
return {
|
||||
feed: {
|
||||
entry: []
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
const data = jsonRes?.feed?.entry || []
|
||||
|
||||
const searchResults = data.map((result: any) => {
|
||||
const title = result?.title || ""
|
||||
const link = result?.url
|
||||
const content = result?.abs || ""
|
||||
return { title, link, content }
|
||||
})
|
||||
|
||||
|
||||
return searchResults.filter((result) => result?.link)
|
||||
}
|
||||
|
||||
export const webBaiduSearch = async (query: string) => {
|
||||
const searchResults = await localBaiduSearch(query)
|
||||
|
||||
const isSimpleMode = await getIsSimpleInternetSearch()
|
||||
|
||||
if (isSimpleMode) {
|
||||
await getOllamaURL()
|
||||
return searchResults.map((result) => {
|
||||
return {
|
||||
url: result.link,
|
||||
content: result.content
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
const docs: Document<Record<string, any>>[] = []
|
||||
for (const result of searchResults) {
|
||||
const loader = new PageAssistHtmlLoader({
|
||||
html: "",
|
||||
url: result.link
|
||||
})
|
||||
|
||||
const documents = await loader.loadByURL()
|
||||
|
||||
documents.forEach((doc) => {
|
||||
docs.push(doc)
|
||||
})
|
||||
}
|
||||
const ollamaUrl = await getOllamaURL()
|
||||
|
||||
const embeddingModle = await defaultEmbeddingModelForRag()
|
||||
const ollamaEmbedding = await pageAssistEmbeddingModel({
|
||||
model: embeddingModle || "",
|
||||
baseUrl: cleanUrl(ollamaUrl)
|
||||
})
|
||||
|
||||
const textSplitter = await getPageAssistTextSplitter()
|
||||
|
||||
const chunks = await textSplitter.splitDocuments(docs)
|
||||
|
||||
const store = new MemoryVectorStore(ollamaEmbedding)
|
||||
|
||||
await store.addDocuments(chunks)
|
||||
|
||||
const resultsWithEmbeddings = await store.similaritySearch(query, 3)
|
||||
|
||||
const searchResult = resultsWithEmbeddings.map((result) => {
|
||||
return {
|
||||
url: result.metadata.url,
|
||||
content: result.pageContent
|
||||
}
|
||||
})
|
||||
|
||||
return searchResult
|
||||
}
|
@ -1,5 +1,4 @@
|
||||
import { cleanUrl } from "@/libs/clean-url"
|
||||
import { urlRewriteRuntime } from "@/libs/runtime"
|
||||
import { PageAssistHtmlLoader } from "@/loader/html"
|
||||
import { pageAssistEmbeddingModel } from "@/models/embedding"
|
||||
import {
|
||||
@ -16,7 +15,6 @@ import * as cheerio from "cheerio"
|
||||
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
||||
|
||||
export const localDuckDuckGoSearch = async (query: string) => {
|
||||
await urlRewriteRuntime(cleanUrl("https://html.duckduckgo.com/html/?q=" + query), "duckduckgo")
|
||||
|
||||
const abortController = new AbortController()
|
||||
setTimeout(() => abortController.abort(), 10000)
|
||||
|
@ -8,7 +8,6 @@ import { getPageAssistTextSplitter } from "@/utils/text-splitter"
|
||||
import type { Document } from "@langchain/core/documents"
|
||||
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
||||
import { cleanUrl } from "~/libs/clean-url"
|
||||
import { urlRewriteRuntime } from "~/libs/runtime"
|
||||
import { PageAssistHtmlLoader } from "~/loader/html"
|
||||
import {
|
||||
defaultEmbeddingModelForRag,
|
||||
@ -18,10 +17,6 @@ import {
|
||||
|
||||
export const localGoogleSearch = async (query: string) => {
|
||||
const baseGoogleDomain = await getGoogleDomain()
|
||||
await urlRewriteRuntime(
|
||||
cleanUrl(`https://www.${baseGoogleDomain}/search?hl=en&q=` + query),
|
||||
"google"
|
||||
)
|
||||
const abortController = new AbortController()
|
||||
setTimeout(() => abortController.abort(), 10000)
|
||||
|
||||
|
@ -7,6 +7,7 @@ import { webBraveSearch } from "./search-engines/brave"
|
||||
import { getWebsiteFromQuery, processSingleWebsite } from "./website"
|
||||
import { searxngSearch } from "./search-engines/searxng"
|
||||
import { braveAPISearch } from "./search-engines/brave-api"
|
||||
import { webBaiduSearch } from "./search-engines/baidu"
|
||||
|
||||
const getHostName = (url: string) => {
|
||||
try {
|
||||
@ -29,6 +30,8 @@ const searchWeb = (provider: string, query: string) => {
|
||||
return searxngSearch(query)
|
||||
case "brave-api":
|
||||
return braveAPISearch(query)
|
||||
case "baidu":
|
||||
return webBaiduSearch(query)
|
||||
default:
|
||||
return webGoogleSearch(query)
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ export default defineConfig({
|
||||
outDir: "build",
|
||||
|
||||
manifest: {
|
||||
version: "1.4.4",
|
||||
version: "1.4.5",
|
||||
name:
|
||||
process.env.TARGET === "firefox"
|
||||
? "Page Assist - A Web UI for Local AI Models"
|
||||
|
Loading…
x
Reference in New Issue
Block a user