调用本地大模型
- https://www.modelscope.cn/models/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/summary
- 下载llama-cli https://github.com/ggerganov/llama.cpp/releases
- 利用modelscope下载模型,避免无法连接
modelscope download --model unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf --local_dir ./DeepSeek-R1-Distill-Qwen-32B-GGUF
modelscope download --model unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF DeepSeek-R1-Distill-Qwen-32B-Q6_K.gguf --local_dir ./DeepSeek-R1-Distill-Qwen-32B-GGUF
- bash 直接调用
prompt='<|User|>'$(cat query.txt)'<|Assistant|>You are a helpful medical assistant.'
echo $prompt
LD_LIBRARY_PATH=/home/student/deepseek/build/bin \
/home/student/deepseek/build/bin/llama-cli \
--model /home/student/deepseek/DeepSeek-R1-Distill-Qwen-32B-GGUF/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf \
--cache-type-k q8_0 \
--threads 16 \
--prompt "$prompt" \
--n-gpu-layers 20 \
-no-cnv
- python 调用,还是基于bash命令
import logging
def run_command(command, logger=None):
'''
使用 subprocess 模组执行shell 命令。
打印 subprocess 返回日志信息
'''
import subprocess
logger = logger or logging
logger.info(command)
result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode == 0:
return result.stdout
else:
print(f'{result.returncode}, {result.stderr}')
return None
def predict(query='what is 1 + 1?'):
cmd = f'''\
LANG=en_US.UTF-8 \
LC_ALL=en_US.UTF-8 \
LD_LIBRARY_PATH=/home/student/deepseek/build/bin \
/home/student/deepseek/build/bin/llama-cli \
--model /home/student/deepseek/DeepSeek-R1-Distill-Qwen-32B-GGUF/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf \
--cache-type-k q8_0 \
--threads 16 \
--prompt '<|User|>{query}<|Assistant|>You are a helpful medical assistant.' \
--n-gpu-layers 20 \
-no-cnv '''
ret = run_command(cmd)
if ret:
print(ret)
return ret
else:
#print(query)
return None
if __name__ == '__main__':
with open('query.txt', 'r') as fp:
query = fp.read()
predict(query)
--- 她说, 她是仙,她不是神

浙公网安备 33010602011771号