使用sentence_transformers对文本进行重新排序

# -*- coding: utf-8 -*-
# @File:     sentence_sorted.py
# @Author:
# @DateTime: 2025/10/23/16:09

# pip install sentence_transformers
# sentence-transformers      5.1.2
from pathlib import Path
current_path = Path(__file__).resolve().parent
from sentence_transformers import CrossEncoder

RERANK_MODEL = Path.joinpath(current_path,  r'sentence_models\models--cross-encoder--ms-marco-MiniLM-L6-v2\snapshots\c5ee24cb16019beea0893ab7796b1df96625c6b8')     # cross-encoder/ms-marco-MiniLM-L6-v2
model = CrossEncoder(RERANK_MODEL, max_length=512)  # type: ignore


def docs_scores_sorted(docs, query_content):
    """
    调用re-rank模型对获取的结果再进行一次相似度计算及排序
    :param query_content:
    :param docs:
    :param query_text:
    :return:
    """
    scores = model.predict([(query_content, doc) for doc in docs])
    sorted_list = sorted(zip(scores, docs), key=lambda x: x[0], reverse=True)
    # print(sorted_list)
    documents = list(map(lambda x: x[1], sorted_list))
    # print(documents)
    return documents


if __name__ == '__main__':

    docs = [
        "对获取的结果再进行一次相似度计算及排序",
        "这个结果排序什么",
        "今天天气真好"
    ]
    query_content = '什么'
    doc_sorted_list = docs_scores_sorted(docs, query_content)
    print(doc_sorted_list)

 

第一次运行不指定模型,会默认下载到缓存

posted @ 2025-10-27 11:05  Wchime  阅读(2)  评论(0)    收藏  举报