ollama部署SmolVLM-256M多模态Ollama API版本
直接windows下安装部署:
https://blog.csdn.net/m0_52919859/article/details/148643913
用podman部署 (docker收费)
1. 安装podman (略)
2. 拉取 Ollama 镜像
podman pull ollama/ollama
3. 挂载 本地 smolVLM模型地址 C:\Users\sdt\Desktop\smolVLM
podman run -d ` --name ollama ` -p 11434:11434 ` -v ollama_data:/root/.ollama ` -v C:\Users\sdt\Desktop\smolVLM:/models ` ollama/ollama
4. 运行
podman exec -it ollama sh
5. 在容器内部创建并运行你的模型
# 进入挂载的模型目录 cd /models # 创建模型(假设你的 modelfile 名为 smolvlm.modelfile) ollama create smolvlm:256m -f smolvlm.modelfile # 运行模型 ollama run smolvlm:256m
smolvlm.modelfile内容:
# 针对256M参数模型的专项优化
FROM ./SmolVLM-256M-Instruct-f16.gguf
ADAPTER ./mmproj-SmolVLM-256M-Instruct-f16.gguf
# 优化的模板配置,确保最佳性能
TEMPLATE """
<|im_start|>system
{{ .System }}
<end_of_utterance>
{{- range .Messages }}
<|im_start|>{{ .Role }}:
{{ .Content }}
<end_of_utterance>
{{- end }}
<|im_start|>assistant
"""
# 专为SmolVLM优化的系统提示
SYSTEM "You are a visual assistant powered by SmolVLM-256M. Describe images clearly and answer questions based on visual content with high efficiency."
# 针对256M模型的参数优化
PARAMETER num_ctx 4096
PARAMETER stop "<end_of_utterance>"
PARAMETER stop "<|im_start|>"
PARAMETER temperature 0.01
PARAMETER top_p 0.9
PARAMETER repeat_penalty 1.1
6. 善后及删除
podman stop ollama podman rm ollama
7. API调用
脚本 smolvlm-api.py
python smolvlm-api.py --image [图片路径] --stream
脚本代码:
#!/usr/bin/env python3
# vision_chat.py
import argparse
import base64
from pathlib import Path
import cv2
import tempfile
import os
from VIsionModel import VisionModel
# ----------------------------------------------------------------------
def build_args() -> argparse.Namespace:
p = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="文本 + 图像 多模态推理小工具",
)
p.add_argument("--model", default="smolvlm:256m",
help="Vision 模型标签(ollama list)")
p.add_argument("--image",
help="要推理的图像文件")
p.add_argument("--camera", action="store_true",
help="使用摄像头拍照")
p.add_argument("--prompt", default="",
help="首次提问内容;留空则进入交互循环")
p.add_argument("--stream", action="store_true",
help="是否流式输出")
return p.parse_args()
# ----------------------------------------------------------------------
def capture_from_camera() -> str:
"""从摄像头捕获图像并返回base64编码"""
cap = cv2.VideoCapture(0)
if not cap.isOpened():
raise RuntimeError("无法打开摄像头")
print("按空格键拍照,按ESC退出...")
while True:
ret, frame = cap.read()
if not ret:
raise RuntimeError("无法获取摄像头画面")
cv2.imshow('Camera', frame)
key = cv2.waitKey(1) & 0xFF
if key == 27: # ESC
cap.release()
cv2.destroyAllWindows()
raise KeyboardInterrupt("用户取消拍照")
elif key == 32: # 空格
break
cap.release()
cv2.destroyAllWindows()
# 保存临时文件
with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp:
cv2.imwrite(tmp.name, frame)
img_b64 = load_b64(Path(tmp.name))
os.unlink(tmp.name)
return img_b64
# ----------------------------------------------------------------------
def load_b64(img_path: Path) -> str:
if not img_path.is_file():
raise FileNotFoundError(img_path)
return base64.b64encode(img_path.read_bytes()).decode()
# ----------------------------------------------------------------------
def main() -> None:
args = build_args()
# 1) 载入模型
llm = VisionModel(vision_model_path=args.model, stream=args.stream)
# 2) 获取图像
try:
if args.camera:
img_b64 = capture_from_camera()
elif args.image:
img_b64 = load_b64(Path(args.image))
else:
raise ValueError("请指定 --image 或 --camera 参数")
except Exception as e:
print(f"错误: {e}")
return
# 3) 如果命令行已经给 prompt → 直接跑一次
if args.prompt:
run_once(llm, args.prompt, img_b64)
else:
# 否则进入 REPL
try:
while True:
prompt = input("请输入内容(Ctrl-C 退出):")
run_once(llm, prompt, img_b64)
except KeyboardInterrupt:
print("\n已退出")
# ----------------------------------------------------------------------
def run_once(llm, prompt: str, img_b64: str) -> None:
"""调用 VisionModel 并打印结果(支持流式或非流式)"""
for chunk in llm.generate(prompt, img_b64):
print(chunk, end="", flush=True)
print()
# ----------------------------------------------------------------------
if __name__ == "__main__":
main()
浙公网安备 33010602011771号