From 4c5d5cfe995a2e46fe288c07d59140f844d80564 Mon Sep 17 00:00:00 2001 From: Nex Zhu Date: Fri, 14 Feb 2025 23:24:27 +0800 Subject: [PATCH] feat: IoD search process HTML/PDF content --- src/types/iod.ts | 1 + src/web/iod.ts | 9 +++++++-- src/web/web.ts | 42 +++++++++++++++++++++++++----------------- 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/src/types/iod.ts b/src/types/iod.ts index 6cc661f..48de6e3 100644 --- a/src/types/iod.ts +++ b/src/types/iod.ts @@ -4,4 +4,5 @@ export type IodRegistryEntry = { url?: string pdf_url?: string description: string + content?: string } diff --git a/src/web/iod.ts b/src/web/iod.ts index dd9a980..cb68b2c 100644 --- a/src/web/iod.ts +++ b/src/web/iod.ts @@ -108,6 +108,7 @@ export const searchIod = async (query: string, keywords: string[]) => { } const docs: Document>[] = [] + const resMap = new Map() for (const result of searchResults) { const url = result.url if (!url) continue @@ -190,11 +191,15 @@ export const searchIod = async (query: string, keywords: string[]) => { const resultsWithEmbeddings = await store.similaritySearch(query, 3) const searchResult = resultsWithEmbeddings.map((result) => { + // `source` for PDF type + const key = result.metadata.url || result.metadata.source + if (!key) return null + const fullRes = resMap[key] return { - url: result.metadata.url, + ...fullRes, content: result.pageContent } - }) + }).filter((r) => r) return searchResult } diff --git a/src/web/web.ts b/src/web/web.ts index 83d9ea2..089b875 100644 --- a/src/web/web.ts +++ b/src/web/web.ts @@ -85,22 +85,30 @@ export const getSystemPromptForWeb = async ( // ) // .join("\n") } - const iod_search_results = iodSearchResults.map((res) => ({ - url: `${res.doId}: ${res.name}`, - content: res.description - })).map( - (result, idx) => - `${result.content}` - ) - .join("\n"); - console.log("iod_search_result:"+iod_search_results); - const web_search_results = webSearchResults.map( - (result, idx) => - `${result.content}` - ) - .join("\n"); - const search_results = (iodSearch?"<数联网搜索结果>"+iod_search_results+"":"") - + (webSearch?"<万维网搜索结果>"+web_search_results+"":""); + const iod_search_results = iodSearchResults + .map((res) => ({ + url: `${res.url}`, + content: res.content || res.description + })) + .map( + (result, idx) => + `${result.content}` + ) + .join("\n") + console.log("iod_search_result:" + iod_search_results) + const web_search_results = webSearchResults + .map( + (result, idx) => + `${result.content}` + ) + .join("\n") + const search_results = + (iodSearch + ? "<数联网搜索结果>" + iod_search_results + "" + : "") + + (webSearch + ? "<万维网搜索结果>" + web_search_results + "" + : "") const current_date_time = new Date().toLocaleString() @@ -119,7 +127,7 @@ export const getSystemPromptForWeb = async ( type: "url" } }), - iodSources: iodSearchResults, + iodSources: iodSearchResults } } catch (e) { console.error(e)