feat: IoD search process HTML/PDF content

This commit is contained in:
Nex Zhu 2025-02-14 23:24:27 +08:00
parent 51188b1428
commit 4c5d5cfe99
3 changed files with 33 additions and 19 deletions

View File

@ -4,4 +4,5 @@ export type IodRegistryEntry = {
url?: string url?: string
pdf_url?: string pdf_url?: string
description: string description: string
content?: string
} }

View File

@ -108,6 +108,7 @@ export const searchIod = async (query: string, keywords: string[]) => {
} }
const docs: Document<Record<string, any>>[] = [] const docs: Document<Record<string, any>>[] = []
const resMap = new Map<string, IodRegistryEntry>()
for (const result of searchResults) { for (const result of searchResults) {
const url = result.url const url = result.url
if (!url) continue if (!url) continue
@ -190,11 +191,15 @@ export const searchIod = async (query: string, keywords: string[]) => {
const resultsWithEmbeddings = await store.similaritySearch(query, 3) const resultsWithEmbeddings = await store.similaritySearch(query, 3)
const searchResult = resultsWithEmbeddings.map((result) => { const searchResult = resultsWithEmbeddings.map((result) => {
// `source` for PDF type
const key = result.metadata.url || result.metadata.source
if (!key) return null
const fullRes = resMap[key]
return { return {
url: result.metadata.url, ...fullRes,
content: result.pageContent content: result.pageContent
} }
}) }).filter((r) => r)
return searchResult return searchResult
} }

View File

@ -85,22 +85,30 @@ export const getSystemPromptForWeb = async (
// ) // )
// .join("\n") // .join("\n")
} }
const iod_search_results = iodSearchResults.map((res) => ({ const iod_search_results = iodSearchResults
url: `${res.doId}: ${res.name}`, .map((res) => ({
content: res.description url: `${res.url}`,
})).map( content: res.content || res.description
}))
.map(
(result, idx) => (result, idx) =>
`<result source="${result.url}" id="${idx}">${result.content}</result>` `<result source="${result.url}" id="${idx}">${result.content}</result>`
) )
.join("\n"); .join("\n")
console.log("iod_search_result:"+iod_search_results); console.log("iod_search_result:" + iod_search_results)
const web_search_results = webSearchResults.map( const web_search_results = webSearchResults
.map(
(result, idx) => (result, idx) =>
`<result source="${result.url}" id="${idx}">${result.content}</result>` `<result source="${result.url}" id="${idx}">${result.content}</result>`
) )
.join("\n"); .join("\n")
const search_results = (iodSearch?"<数联网搜索结果>"+iod_search_results+"</数联网搜索结果>":"") const search_results =
+ (webSearch?"<万维网搜索结果>"+web_search_results+"</万维网搜索结果>":""); (iodSearch
? "<数联网搜索结果>" + iod_search_results + "</数联网搜索结果>"
: "") +
(webSearch
? "<万维网搜索结果>" + web_search_results + "</万维网搜索结果>"
: "")
const current_date_time = new Date().toLocaleString() const current_date_time = new Date().toLocaleString()
@ -119,7 +127,7 @@ export const getSystemPromptForWeb = async (
type: "url" type: "url"
} }
}), }),
iodSources: iodSearchResults, iodSources: iodSearchResults
} }
} catch (e) { } catch (e) {
console.error(e) console.error(e)