python langchain使用示例
1.参考文档(langchain中文开发教程)
2.安装依赖
pip install torch transformers langchain loguru scikit-learn numpy sentence-transformers faiss-cpu
3.示例代码
from typing import Any, Dict, List, Optional, Tuple import torch from langchain.callbacks.manager import CallbackManagerForLLMRun from langchain.llms.base import LLM from loguru import logger from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig class TestQwen3Model(LLM): tokenizer: Optional[AutoTokenizer] = None model: Optional[AutoModelForCausalLM] = None def __init__(self, model_dir: str): super().__init__() # 继承LLM参数 self.__model_dir = model_dir logger.info("从本地加载模型...") logger.info("加载device...") self.__device = self.__new_device() logger.info("加载tokenizer...") self.tokenizer = AutoTokenizer.from_pretrained( self.__model_dir, trust_remote_code=True, ) logger.info("加载model...") self.model = AutoModelForCausalLM.from_pretrained( self.__model_dir, device_map=None, # 禁用自动映射 trust_remote_code=True, torch_dtype=( torch.float16 if self.__device != torch.device("cpu") else torch.float32 ), low_cpu_mem_usage=True, ).to( self.__device # type: ignore ) # 显式指定设备 logger.info("加载generation_config...") self.model.generation_config = GenerationConfig.from_pretrained( # type:ignore model_dir, trust_remote_code=True, ) logger.info("模型加载完成") @staticmethod def __new_device() -> torch.device: """ 检查设备,自动使用相应的gpu加速 """ if torch.backends.mps.is_available(): device = torch.device("mps") logger.info("使用Apple Silicon GPU加速 (MPS)") elif torch.cuda.is_available(): device = torch.device("cuda") logger.info(f"使用NVIDIA GPU加速: {torch.cuda.get_device_name(0)}") else: device = torch.device("cpu") logger.info("使用CPU运行") return device def _call( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Dict[str, Any], ) -> Tuple[str, str]: messages = [{"role": "user", "content": prompt}] text = self.tokenizer.apply_chat_template( # type:ignore messages, tokenize=False, add_generation_prompt=True, enable_thinking=True, ) model_inputs = self.tokenizer([text], return_tensors="pt").to( # type:ignore self.model.device # type:ignore ) # conduct text completion generated_ids = self.model.generate( # type:ignore **model_inputs, max_new_tokens=32768 ) response = self.tokenizer.batch_decode( # type:ignore generated_ids, skip_special_tokens=True )[0] return response @property def _llm_type(self) -> str: return "TestQwen3Model" test = TestQwen3Model(model_dir="模型和模型配置存放的文件夹") # res = test.invoke("周杰伦是谁?") # print(res.split("/think>")[-1].strip()) def ask(question: str) -> str: res = test.invoke(question.strip()) return res.split("/think>")[-1].strip() if __name__ == "__main__": import time while True: question = input("请输入问题(输入q退出):") if question.lower() == "q": break t1 = time.time() print(ask(question)) t2 = time.time() print(f"\n本次回答耗时: {t2- t1}s")
浙公网安备 33010602011771号