feat: IoD search process HTML/PDF content

This commit is contained in:
Nex Zhu 2025-02-14 23:24:27 +08:00
parent 51188b1428
commit 4c5d5cfe99
3 changed files with 33 additions and 19 deletions

View File

@ -4,4 +4,5 @@ export type IodRegistryEntry = {
url?: string
pdf_url?: string
description: string
content?: string
}

View File

@ -108,6 +108,7 @@ export const searchIod = async (query: string, keywords: string[]) => {
}
const docs: Document<Record<string, any>>[] = []
const resMap = new Map<string, IodRegistryEntry>()
for (const result of searchResults) {
const url = result.url
if (!url) continue
@ -190,11 +191,15 @@ export const searchIod = async (query: string, keywords: string[]) => {
const resultsWithEmbeddings = await store.similaritySearch(query, 3)
const searchResult = resultsWithEmbeddings.map((result) => {
// `source` for PDF type
const key = result.metadata.url || result.metadata.source
if (!key) return null
const fullRes = resMap[key]
return {
url: result.metadata.url,
...fullRes,
content: result.pageContent
}
})
}).filter((r) => r)
return searchResult
}

View File

@ -85,22 +85,30 @@ export const getSystemPromptForWeb = async (
// )
// .join("\n")
}
const iod_search_results = iodSearchResults.map((res) => ({
url: `${res.doId}: ${res.name}`,
content: res.description
})).map(
(result, idx) =>
`<result source="${result.url}" id="${idx}">${result.content}</result>`
)
.join("\n");
console.log("iod_search_result:"+iod_search_results);
const web_search_results = webSearchResults.map(
(result, idx) =>
`<result source="${result.url}" id="${idx}">${result.content}</result>`
)
.join("\n");
const search_results = (iodSearch?"<数联网搜索结果>"+iod_search_results+"</数联网搜索结果>":"")
+ (webSearch?"<万维网搜索结果>"+web_search_results+"</万维网搜索结果>":"");
const iod_search_results = iodSearchResults
.map((res) => ({
url: `${res.url}`,
content: res.content || res.description
}))
.map(
(result, idx) =>
`<result source="${result.url}" id="${idx}">${result.content}</result>`
)
.join("\n")
console.log("iod_search_result:" + iod_search_results)
const web_search_results = webSearchResults
.map(
(result, idx) =>
`<result source="${result.url}" id="${idx}">${result.content}</result>`
)
.join("\n")
const search_results =
(iodSearch
? "<数联网搜索结果>" + iod_search_results + "</数联网搜索结果>"
: "") +
(webSearch
? "<万维网搜索结果>" + web_search_results + "</万维网搜索结果>"
: "")
const current_date_time = new Date().toLocaleString()
@ -119,7 +127,7 @@ export const getSystemPromptForWeb = async (
type: "url"
}
}),
iodSources: iodSearchResults,
iodSources: iodSearchResults
}
} catch (e) {
console.error(e)