ollama部署SmolVLM-256M多模态Ollama API版本

直接windows下安装部署:

https://blog.csdn.net/m0_52919859/article/details/148643913

 

 

用podman部署 (docker收费)

1. 安装podman (略)

2. 拉取 Ollama 镜像

podman pull ollama/ollama

3. 挂载 本地 smolVLM模型地址 C:\Users\sdt\Desktop\smolVLM

podman run -d `
  --name ollama `
  -p 11434:11434 `
  -v ollama_data:/root/.ollama `
  -v C:\Users\sdt\Desktop\smolVLM:/models `
  ollama/ollama

  

4. 运行

podman exec -it ollama sh

5. 在容器内部创建并运行你的模型  

# 进入挂载的模型目录
cd /models

# 创建模型(假设你的 modelfile 名为 smolvlm.modelfile)
ollama create smolvlm:256m -f smolvlm.modelfile

# 运行模型
ollama run smolvlm:256m

  

smolvlm.modelfile内容:
# 针对256M参数模型的专项优化
FROM ./SmolVLM-256M-Instruct-f16.gguf
ADAPTER ./mmproj-SmolVLM-256M-Instruct-f16.gguf

# 优化的模板配置,确保最佳性能
TEMPLATE """
<|im_start|>system
{{ .System }}
<end_of_utterance>
{{- range .Messages }}
<|im_start|>{{ .Role }}:
{{ .Content }}
<end_of_utterance>
{{- end }}
<|im_start|>assistant
"""

# 专为SmolVLM优化的系统提示
SYSTEM "You are a visual assistant powered by SmolVLM-256M. Describe images clearly and answer questions based on visual content with high efficiency."

# 针对256M模型的参数优化
PARAMETER num_ctx 4096
PARAMETER stop "<end_of_utterance>"
PARAMETER stop "<|im_start|>"
PARAMETER temperature 0.01
PARAMETER top_p 0.9
PARAMETER repeat_penalty 1.1

  


6. 善后及删除
podman stop ollama
podman rm ollama

  



7. API调用
脚本 smolvlm-api.py
python smolvlm-api.py --image [图片路径] --stream

  脚本代码:

#!/usr/bin/env python3
# vision_chat.py
import argparse
import base64
from pathlib import Path
import cv2
import tempfile
import os

from VIsionModel import VisionModel

# ----------------------------------------------------------------------
def build_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="文本 + 图像 多模态推理小工具",
    )
    p.add_argument("--model",  default="smolvlm:256m",
                   help="Vision 模型标签(ollama list)")
    p.add_argument("--image",  
                   help="要推理的图像文件")
    p.add_argument("--camera", action="store_true",
                   help="使用摄像头拍照")
    p.add_argument("--prompt", default="",
                   help="首次提问内容;留空则进入交互循环")
    p.add_argument("--stream", action="store_true",
                   help="是否流式输出")
    return p.parse_args()

# ----------------------------------------------------------------------
def capture_from_camera() -> str:
    """从摄像头捕获图像并返回base64编码"""
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        raise RuntimeError("无法打开摄像头")
    
    print("按空格键拍照,按ESC退出...")
    while True:
        ret, frame = cap.read()
        if not ret:
            raise RuntimeError("无法获取摄像头画面")
        
        cv2.imshow('Camera', frame)
        key = cv2.waitKey(1) & 0xFF
        
        if key == 27:  # ESC
            cap.release()
            cv2.destroyAllWindows()
            raise KeyboardInterrupt("用户取消拍照")
        elif key == 32:  # 空格
            break
    
    cap.release()
    cv2.destroyAllWindows()
    
    # 保存临时文件
    with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp:
        cv2.imwrite(tmp.name, frame)
        img_b64 = load_b64(Path(tmp.name))
        os.unlink(tmp.name)
        return img_b64

# ----------------------------------------------------------------------
def load_b64(img_path: Path) -> str:
    if not img_path.is_file():
        raise FileNotFoundError(img_path)
    return base64.b64encode(img_path.read_bytes()).decode()

# ----------------------------------------------------------------------
def main() -> None:
    args = build_args()

    # 1) 载入模型
    llm = VisionModel(vision_model_path=args.model, stream=args.stream)

    # 2) 获取图像
    try:
        if args.camera:
            img_b64 = capture_from_camera()
        elif args.image:
            img_b64 = load_b64(Path(args.image))
        else:
            raise ValueError("请指定 --image 或 --camera 参数")
    except Exception as e:
        print(f"错误: {e}")
        return

    # 3) 如果命令行已经给 prompt → 直接跑一次
    if args.prompt:
        run_once(llm, args.prompt, img_b64)
    else:
        # 否则进入 REPL
        try:
            while True:
                prompt = input("请输入内容(Ctrl-C 退出):")
                run_once(llm, prompt, img_b64)
        except KeyboardInterrupt:
            print("\n已退出")


# ----------------------------------------------------------------------
def run_once(llm, prompt: str, img_b64: str) -> None:
    """调用 VisionModel 并打印结果(支持流式或非流式)"""
    for chunk in llm.generate(prompt, img_b64):
        print(chunk, end="", flush=True)
    print()


# ----------------------------------------------------------------------
if __name__ == "__main__":
    main()

  





posted @ 2025-07-05 20:36  CrossPython  阅读(69)  评论(0)    收藏  举报