From f617a0548367a7ae7f26d3426e7ac665da1f81d9 Mon Sep 17 00:00:00 2001 From: Nex Zhu Date: Mon, 17 Feb 2025 19:03:38 +0800 Subject: [PATCH] feat: improve DEFAULT_WEBSEARCH_PROMPT for IoD and 3W citations --- src/assets/locale/zh/common.json | 2 +- src/services/ollama.ts | 36 +++++++++++++++---- src/types/web.ts | 5 +++ src/web/web.ts | 61 ++++++++++++++++++-------------- 4 files changed, 71 insertions(+), 33 deletions(-) create mode 100644 src/types/web.ts diff --git a/src/assets/locale/zh/common.json b/src/assets/locale/zh/common.json index c4ba2e8..e57d65a 100644 --- a/src/assets/locale/zh/common.json +++ b/src/assets/locale/zh/common.json @@ -38,7 +38,7 @@ } }, "copyToClipboard": "复制到剪贴板", - "webSearch": "正在搜索", + "webSearch": "搜索万维网", "iodSearch": "搜索数联网", "regenerate": "重新生成", "edit": "编辑", diff --git a/src/services/ollama.ts b/src/services/ollama.ts index ee669f1..6e1462c 100644 --- a/src/services/ollama.ts +++ b/src/services/ollama.ts @@ -21,15 +21,39 @@ const DEFAULT_RAG_QUESTION_PROMPT = const DEFAUTL_RAG_SYSTEM_PROMPT = `You are a helpful AI assistant. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say you don't know. DO NOT try to make up an answer. If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context. {context} Question: {question} Helpful answer:` -const DEFAULT_WEBSEARCH_PROMP = `You are an AI model who is expert at searching the web and answering user's queries. +const DEFAULT_WEBSEARCH_PROMPT = `You are an AI model who is expert at searching the web and answering user's queries. Generate a response that is informative and relevant to the user's query based on provided search results. the current date and time are {current_date_time}. -\`search-results\` block provides knowledge from the web search results. You can use this information to generate a meaningful response. +\`iod-search-results\` block provides knowledge from the Internet of Data (数联网) search results. Each search result has a format of: +\`{content}\` +Please show the \`doId\` and \`name\` of the search result when you cite the Internet of Data search result, in the following format, in English: +\`[IoD source [id] doId: {doId} "{name}"]({url})\` +Or in Chinese: +\`[数联网引用[id] doId: {doId} "{name}"]({url})\` +For example, in English: +\`[IoD source [1] doId: 10.48550/arXiv.1803.05591v2 "On the insufficiency of existing momentum schemes for Stochastic Optimization"](http://arxiv.org/pdf/1803.05591v2.pdf)\` +Or in Chinese: +\`[数联网引用[1] doId: 10.48550/arXiv.1803.05591v2 "On the insufficiency of existing momentum schemes for Stochastic Optimization"](http://arxiv.org/pdf/1803.05591v2.pdf)\` - - {search_results} - +\`web-search-results\` block provides knowledge from the World Wide Web (万维网) search results. +Please show the \`doId\` and \`name\` of the search result when you cite the search result, in the following format, in English: +\`[3W source [id] "{name}"]({url})\` +Or in Chinese: +\`[万维网引用[id] "{name}"]({url})\` +For example, in English: +\`[3W source [1] On the insufficiency of existing momentum schemes for Stochastic Optimization](http://arxiv.org/pdf/1803.05591v2.pdf)\` +Or in Chinese: +\`[万维网引用[1] On the insufficiency of existing momentum schemes for Stochastic Optimization](http://arxiv.org/pdf/1803.05591v2.pdf)\` + +You can use these information to generate a meaningful response. + + + {iod_search_results} + + + {web_search_results} + ` const DEFAULT_WEBSEARCH_FOLLOWUP_PROMPT = `You will give a follow-up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the AI model to search the internet. @@ -409,7 +433,7 @@ export const saveForRag = async ( export const getWebSearchPrompt = async () => { const prompt = await storage.get("webSearchPrompt") if (!prompt || prompt.length === 0) { - return DEFAULT_WEBSEARCH_PROMP + return DEFAULT_WEBSEARCH_PROMPT } return prompt } diff --git a/src/types/web.ts b/src/types/web.ts new file mode 100644 index 0000000..f880201 --- /dev/null +++ b/src/types/web.ts @@ -0,0 +1,5 @@ +export type WebSearchResult = { + url: string + name: string + content: string +} \ No newline at end of file diff --git a/src/web/web.ts b/src/web/web.ts index 089b875..2168f28 100644 --- a/src/web/web.ts +++ b/src/web/web.ts @@ -9,6 +9,7 @@ import { searxngSearch } from "./search-engines/searxng" import { braveAPISearch } from "./search-engines/brave-api" import { webBaiduSearch } from "./search-engines/baidu" import { searchIod } from "./iod" +import type { WebSearchResult } from "~/types/web" import type { IodRegistryEntry } from "~/types/iod" const getHostName = (url: string) => { @@ -20,23 +21,35 @@ const getHostName = (url: string) => { } } -const searchWeb = (provider: string, query: string) => { +async function searchWeb( + provider: string, + query: string +): Promise { + let results = [] switch (provider) { case "duckduckgo": - return webDuckDuckGoSearch(query) + results = await webDuckDuckGoSearch(query) + break case "sogou": - return webSogouSearch(query) + results = await webSogouSearch(query) + break case "brave": - return webBraveSearch(query) + results = await webBraveSearch(query) + break case "searxng": - return searxngSearch(query) + results = await searxngSearch(query) + break case "brave-api": - return braveAPISearch(query) + results = await braveAPISearch(query) + break case "baidu": - return webBaiduSearch(query) + results = await webBaiduSearch(query) + break default: - return webGoogleSearch(query) + results = await webGoogleSearch(query) + break } + return results.map((r) => ({ ...r, name: getHostName(r.url) })) } export const getSystemPromptForWeb = async ( @@ -47,10 +60,7 @@ export const getSystemPromptForWeb = async ( ) => { try { const websiteVisit = getWebsiteFromQuery(query) - let webSearchResults: { - url: any - content: string - }[] = [] + let webSearchResults: WebSearchResult[] = [] // let search_results_web = "" if (webSearch) { @@ -87,28 +97,25 @@ export const getSystemPromptForWeb = async ( } const iod_search_results = iodSearchResults .map((res) => ({ - url: `${res.url}`, + doId: res.doId, + name: res.name, + url: res.url, content: res.content || res.description })) .map( (result, idx) => - `${result.content}` + `${result.content}` ) .join("\n") - console.log("iod_search_result:" + iod_search_results) + console.log("iod_search_result: " + iod_search_results) + const web_search_results = webSearchResults .map( (result, idx) => - `${result.content}` + `${result.content}` ) .join("\n") - const search_results = - (iodSearch - ? "<数联网搜索结果>" + iod_search_results + "" - : "") + - (webSearch - ? "<万维网搜索结果>" + web_search_results + "" - : "") + console.log("web_search_result: " + web_search_results) const current_date_time = new Date().toLocaleString() @@ -116,14 +123,15 @@ export const getSystemPromptForWeb = async ( const prompt = system .replace("{current_date_time}", current_date_time) - .replace("{search_results}", search_results) + .replace("{iod_search_results}", iod_search_results) + .replace("{web_search_results}", web_search_results) return { prompt, webSources: webSearchResults.map((result) => { return { url: result.url, - name: getHostName(result.url), + name: result.name, type: "url" } }), @@ -133,7 +141,8 @@ export const getSystemPromptForWeb = async ( console.error(e) return { prompt: "", - source: [] + webSources: [], + iodSources: [] } } }