add rerank
This commit is contained in:
parent
d6d28b325f
commit
885d938bbf
44
src/utils/rerank.ts
Normal file
44
src/utils/rerank.ts
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import type { Embeddings } from "@langchain/core/embeddings"
|
||||||
|
import type { Document } from "@langchain/core/documents"
|
||||||
|
import * as ml_distance from "ml-distance"
|
||||||
|
|
||||||
|
export const rerankDocs = async ({
|
||||||
|
query,
|
||||||
|
docs,
|
||||||
|
embedding
|
||||||
|
}: {
|
||||||
|
query: string
|
||||||
|
docs: Document[]
|
||||||
|
embedding: Embeddings
|
||||||
|
}) => {
|
||||||
|
if (docs.length === 0) {
|
||||||
|
return docs
|
||||||
|
}
|
||||||
|
|
||||||
|
const docsWithContent = docs.filter(
|
||||||
|
(doc) => doc.pageContent && doc.pageContent.length > 0
|
||||||
|
)
|
||||||
|
|
||||||
|
const [docEmbeddings, queryEmbedding] = await Promise.all([
|
||||||
|
embedding.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
|
||||||
|
embedding.embedQuery(query)
|
||||||
|
])
|
||||||
|
|
||||||
|
const similarity = docEmbeddings.map((docEmbedding, i) => {
|
||||||
|
// perform cosine similarity between query and document
|
||||||
|
const sim = ml_distance.similarity.cosine(queryEmbedding, docEmbedding)
|
||||||
|
|
||||||
|
return {
|
||||||
|
index: i,
|
||||||
|
similarity: sim
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const sortedDocs = similarity
|
||||||
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
|
.filter((sim) => sim.similarity > 0.5)
|
||||||
|
.slice(0, 15)
|
||||||
|
.map((sim) => docsWithContent[sim.index])
|
||||||
|
|
||||||
|
return sortedDocs
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user