From 342d544e30fd5f331ba7d49742ba5938a03404b3 Mon Sep 17 00:00:00 2001 From: n4ze3m Date: Sat, 1 Feb 2025 11:22:12 +0530 Subject: [PATCH 1/3] feat: Add Baidu search engine --- src/components/Common/Playground/Message.tsx | 3 +- src/utils/search-provider.ts | 4 + src/web/search-engines/baidu.ts | 105 +++++++++++++++++++ src/web/search-engines/duckduckgo.ts | 2 - src/web/search-engines/google.ts | 5 - src/web/web.ts | 3 + wxt.config.ts | 2 +- 7 files changed, 114 insertions(+), 10 deletions(-) create mode 100644 src/web/search-engines/baidu.ts diff --git a/src/components/Common/Playground/Message.tsx b/src/components/Common/Playground/Message.tsx index 1172c41..e529cf5 100644 --- a/src/components/Common/Playground/Message.tsx +++ b/src/components/Common/Playground/Message.tsx @@ -54,7 +54,7 @@ export const PlaygroundMessage = (props: Props) => { return (
-
+
{props.isBot ? ( @@ -150,7 +150,6 @@ export const PlaygroundMessage = (props: Props) => {
{/* source if available */} {props.images && - props.images && props.images.filter((img) => img.length > 0).length > 0 && (
{props.images diff --git a/src/utils/search-provider.ts b/src/utils/search-provider.ts index 24004a4..031ee62 100644 --- a/src/utils/search-provider.ts +++ b/src/utils/search-provider.ts @@ -11,6 +11,10 @@ export const SUPPORTED_SERACH_PROVIDERS = [ label: "Sogou", value: "sogou" }, + { + label: "Baidu", + value: "baidu" + }, { label: "Brave", value: "brave" diff --git a/src/web/search-engines/baidu.ts b/src/web/search-engines/baidu.ts new file mode 100644 index 0000000..22da5e8 --- /dev/null +++ b/src/web/search-engines/baidu.ts @@ -0,0 +1,105 @@ +import { cleanUrl } from "@/libs/clean-url" +import { PageAssistHtmlLoader } from "@/loader/html" +import { pageAssistEmbeddingModel } from "@/models/embedding" +import { + defaultEmbeddingModelForRag, + getOllamaURL +} from "@/services/ollama" +import { + getIsSimpleInternetSearch, + totalSearchResults +} from "@/services/search" +import { getPageAssistTextSplitter } from "@/utils/text-splitter" +import type { Document } from "@langchain/core/documents" +import { MemoryVectorStore } from "langchain/vectorstores/memory" + +export const localBaiduSearch = async (query: string) => { + const TOTAL_SEARCH_RESULTS = await totalSearchResults() + + const abortController = new AbortController() + setTimeout(() => abortController.abort(), 10000) + + const jsonRes = await fetch( + "https://www.baidu.com/s?wd=" + encodeURIComponent(query) + "&tn=json&rn=" + TOTAL_SEARCH_RESULTS, + { + signal: abortController.signal + } + ) + .then((response) => response.json()) + .catch((e) => { + console.log(e) + return { + feed: { + entry: [] + } + } + }) + + const data = jsonRes?.feed?.entry || [] + + const searchResults = data.map((result: any) => { + const title = result?.title || "" + const link = result?.url + const content = result?.abs || "" + return { title, link, content } + }) + + + return searchResults.filter((result) => result?.link) +} + +export const webBaiduSearch = async (query: string) => { + const searchResults = await localBaiduSearch(query) + + const isSimpleMode = await getIsSimpleInternetSearch() + + if (isSimpleMode) { + await getOllamaURL() + return searchResults.map((result) => { + return { + url: result.link, + content: result.content + } + }) + } + + const docs: Document>[] = [] + for (const result of searchResults) { + const loader = new PageAssistHtmlLoader({ + html: "", + url: result.link + }) + + const documents = await loader.loadByURL() + + documents.forEach((doc) => { + docs.push(doc) + }) + } + const ollamaUrl = await getOllamaURL() + + const embeddingModle = await defaultEmbeddingModelForRag() + const ollamaEmbedding = await pageAssistEmbeddingModel({ + model: embeddingModle || "", + baseUrl: cleanUrl(ollamaUrl) + }) + + const textSplitter = await getPageAssistTextSplitter() + + const chunks = await textSplitter.splitDocuments(docs) + + const store = new MemoryVectorStore(ollamaEmbedding) + + await store.addDocuments(chunks) + + const resultsWithEmbeddings = await store.similaritySearch(query, 3) + + const searchResult = resultsWithEmbeddings.map((result) => { + return { + url: result.metadata.url, + content: result.pageContent + } + }) + + return searchResult +} diff --git a/src/web/search-engines/duckduckgo.ts b/src/web/search-engines/duckduckgo.ts index 9552b9d..8645724 100644 --- a/src/web/search-engines/duckduckgo.ts +++ b/src/web/search-engines/duckduckgo.ts @@ -1,5 +1,4 @@ import { cleanUrl } from "@/libs/clean-url" -import { urlRewriteRuntime } from "@/libs/runtime" import { PageAssistHtmlLoader } from "@/loader/html" import { pageAssistEmbeddingModel } from "@/models/embedding" import { @@ -16,7 +15,6 @@ import * as cheerio from "cheerio" import { MemoryVectorStore } from "langchain/vectorstores/memory" export const localDuckDuckGoSearch = async (query: string) => { - await urlRewriteRuntime(cleanUrl("https://html.duckduckgo.com/html/?q=" + query), "duckduckgo") const abortController = new AbortController() setTimeout(() => abortController.abort(), 10000) diff --git a/src/web/search-engines/google.ts b/src/web/search-engines/google.ts index 59e7175..6dd46ed 100644 --- a/src/web/search-engines/google.ts +++ b/src/web/search-engines/google.ts @@ -8,7 +8,6 @@ import { getPageAssistTextSplitter } from "@/utils/text-splitter" import type { Document } from "@langchain/core/documents" import { MemoryVectorStore } from "langchain/vectorstores/memory" import { cleanUrl } from "~/libs/clean-url" -import { urlRewriteRuntime } from "~/libs/runtime" import { PageAssistHtmlLoader } from "~/loader/html" import { defaultEmbeddingModelForRag, @@ -18,10 +17,6 @@ import { export const localGoogleSearch = async (query: string) => { const baseGoogleDomain = await getGoogleDomain() - await urlRewriteRuntime( - cleanUrl(`https://www.${baseGoogleDomain}/search?hl=en&q=` + query), - "google" - ) const abortController = new AbortController() setTimeout(() => abortController.abort(), 10000) diff --git a/src/web/web.ts b/src/web/web.ts index 42d8ca9..5b6f6d9 100644 --- a/src/web/web.ts +++ b/src/web/web.ts @@ -7,6 +7,7 @@ import { webBraveSearch } from "./search-engines/brave" import { getWebsiteFromQuery, processSingleWebsite } from "./website" import { searxngSearch } from "./search-engines/searxng" import { braveAPISearch } from "./search-engines/brave-api" +import { webBaiduSearch } from "./search-engines/baidu" const getHostName = (url: string) => { try { @@ -29,6 +30,8 @@ const searchWeb = (provider: string, query: string) => { return searxngSearch(query) case "brave-api": return braveAPISearch(query) + case "baidu": + return webBaiduSearch(query) default: return webGoogleSearch(query) } diff --git a/wxt.config.ts b/wxt.config.ts index f323b7c..9dd31bc 100644 --- a/wxt.config.ts +++ b/wxt.config.ts @@ -51,7 +51,7 @@ export default defineConfig({ outDir: "build", manifest: { - version: "1.4.4", + version: "1.4.5", name: process.env.TARGET === "firefox" ? "Page Assist - A Web UI for Local AI Models" From 80f5812355d113b666b8e035cf2a19c3765198f7 Mon Sep 17 00:00:00 2001 From: n4ze3m Date: Sat, 1 Feb 2025 12:30:05 +0530 Subject: [PATCH 2/3] fix: Adjust visibility styles for PlaygroundMessage component in preparation for v1.4.5 release --- src/components/Common/Playground/Message.tsx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/components/Common/Playground/Message.tsx b/src/components/Common/Playground/Message.tsx index e529cf5..87b1555 100644 --- a/src/components/Common/Playground/Message.tsx +++ b/src/components/Common/Playground/Message.tsx @@ -197,8 +197,12 @@ export const PlaygroundMessage = (props: Props) => {
{props.isTTSEnabled && ( From 7264f30d81838e1f8386a0ea646a996686d18ef6 Mon Sep 17 00:00:00 2001 From: n4ze3m Date: Sun, 2 Feb 2025 14:51:52 +0530 Subject: [PATCH 3/3] fix: Remove reasoning tag from tts utterance --- src/components/Common/Playground/Message.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/Common/Playground/Message.tsx b/src/components/Common/Playground/Message.tsx index 87b1555..9894f0f 100644 --- a/src/components/Common/Playground/Message.tsx +++ b/src/components/Common/Playground/Message.tsx @@ -18,7 +18,7 @@ import { useTTS } from "@/hooks/useTTS" import { tagColors } from "@/utils/color" import { removeModelSuffix } from "@/db/models" import { GenerationInfo } from "./GenerationInfo" -import { parseReasoning } from "@/libs/reasoning" +import { parseReasoning, removeReasoning } from "@/libs/reasoning" import { humanizeMilliseconds } from "@/utils/humanize-miliseconds" type Props = { message: string @@ -213,7 +213,7 @@ export const PlaygroundMessage = (props: Props) => { cancel() } else { speak({ - utterance: props.message + utterance: removeReasoning(props.message), }) } }}