折腾笔记[41]-使用mqtt透传ollama的api

摘要

使用mqtt透传ollama的api端点以应对客户端与服务器网络连接不稳定的情况.

实现

服务端

1. 启动mqtt broker(也可以使用公共broker)

  • mqtt://127.0.0.1:8907, 允许匿名登陆
docker pull m.daocloud.io/docker.io/eclipse-mosquitto:latest
sudo mkdir -p /home/server/mosquitto
sudo touch /home/server/mosquitto/mosquitto.conf
sudo chown -R qsbye /home/server/mosquitto
cat > /home/server/mosquitto/mosquitto.conf <<'EOD'
listener 8907 0.0.0.0
protocol mqtt
allow_anonymous true
EOD
docker run --restart=always -p 8907:8907 -v "/home/server/mosquitto/mosquitto.conf:/mosquitto/config/mosquitto.conf" -d m.daocloud.io/docker.io/eclipse-mosquitto:latest

2. 配置mqtt透传转发ollama

命令:

uv init
uv python pin 3.13
uv add paho-mqtt aiohttp
vim mqtt_bridge_ollama.py

代码:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
文件: mqtt_bridge_ollama.py
功能: MQTT 透明代理 127.0.0.1:8907 <-> Ollama 127.0.0.1:11435
用法: python mqtt_bridge_ollama.py
"""
import asyncio
import json
import logging
import signal
import sys
import time
from datetime import datetime
from typing import Any, Dict, Optional

import aiohttp
import paho.mqtt.client as mqtt

# ========== 配置 ==========
OLLAMA_BASE = "http://127.0.0.1:11435"
MQTT_BROKER = "tcp://127.0.0.1:8907"
ROOT_TOPIC  = "/api/ollama"
QOS         = 1
# ==========================

logging.basicConfig(
    level=logging.INFO,
    format="[mqtt-ollama] %(asctime)s  %(message)s",
    datefmt="%H:%M:%S",
)
log = logging.getLogger(__name__)


# ---------- 工具 ----------
def gen_id() -> str:
    return str(int(datetime.now().timestamp() * 1_000_000))


# ---------- MQTT ----------
class Bridge:
    def __init__(self) -> None:
        self._mqtt: Optional[mqtt.Client] = None
        self._session: Optional[aiohttp.ClientSession] = None
        self._loop: Optional[asyncio.AbstractEventLoop] = None

    # 入口
    def run(self) -> None:
        self._loop = asyncio.new_event_loop()
        asyncio.set_event_loop(self._loop)

        # 优雅退出
        for sig in (signal.SIGINT, signal.SIGTERM):
            signal.signal(sig, lambda *_: self._loop.create_task(self.stop()))

        try:
            self._loop.run_until_complete(self._start())
        finally:
            self._loop.close()

    async def stop(self) -> None:
        log.info("收到退出信号")
        if self._mqtt:
            self._mqtt.disconnect()
        if self._session:
            await self._session.close()
        self._loop.stop()

    # 连接 MQTT
    async def _start(self) -> None:
        # 在异步上下文中创建 aiohttp session
        self._session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=0))
        
        while True:
            try:
                self._mqtt = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2, client_id="mqtt-ollama-bridge")
                self._mqtt.on_connect = self._on_connect
                self._mqtt.connect("127.0.0.1", 8907, 60)
                self._mqtt.loop_start()
                log.info("MQTT 连接成功")
                # 保持运行
                await asyncio.Event().wait()
            except Exception as e:
                log.error("连接失败: %s ,5s 后重试", e)
                await asyncio.sleep(5)

    # 订阅主题
    def _on_connect(self, client: mqtt.Client, *_: Any, properties=None) -> None:
        topic = f"{ROOT_TOPIC}/#"
        client.subscribe(topic, qos=QOS)
        client.message_callback_add(topic, self._on_message)
        log.info("订阅成功: %s", topic)

    # 收到 MQTT 消息
    def _on_message(self, client: mqtt.Client, userdata: Any, msg: mqtt.MQTTMessage) -> None:
        asyncio.run_coroutine_threadsafe(self._handle(msg), self._loop)

    # 异步处理
    async def _handle(self, msg: mqtt.MQTTMessage) -> None:
        topic = msg.topic
        log.info("收到 topic=%s", topic)

        # 解析 topic: api/ollama/<method>/<path...>
        suffix = topic[len(ROOT_TOPIC) + 1 :]
        if "/" not in suffix:
            log.warning("topic 格式错误")
            return
        method, path = suffix.split("/", 1)

        # 提取 req_id
        try:
            payload: Dict[str, Any] = json.loads(msg.payload) if msg.payload else {}
        except Exception:
            payload = {}
        req_id: str = payload.pop("_req_id", None) or gen_id()

        # 转发
        try:
            await self._forward(method, path, req_id, payload)
        except Exception as e:
            log.error("转发错误: %s", e)
            self._publish(req_id, {"error": str(e)})

    # 真正发 HTTP 并流式回 MQTT
    async def _forward(self, method: str, path: str, req_id: str, body: Dict[str, Any]) -> None:
        url = f"{OLLAMA_BASE}/{path}"
        headers = {"Content-Type": "application/json"} if body else {}
        async with self._session.request(
            method, url, json=body or None, headers=headers
        ) as resp:
            # 逐行读取 SSE / 普通 body
            async for line in resp.content:
                line = line.rstrip(b"\r\n")
                if line:
                    self._publish(req_id, line.decode("utf-8"))

    # 发布单条数据
    def _publish(self, req_id: str, data: Any) -> None:
        topic = f"{ROOT_TOPIC}/response/{req_id}"
        payload = json.dumps(data) if not isinstance(data, str) else data
        self._mqtt.publish(topic, payload, qos=QOS)


# ---------- main ----------
if __name__ == "__main__":
    Bridge().run()

运行:

nohup uv run mqtt_bridge_ollama.py &

客户端

1. 测试连接mqtt话题

命令:

uv init
uv python pin 3.13
uv add paho-mqtt aiohttp
vim test_mqtt_ollama.py

代码:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
测试:MQTT 透传 → Ollama 流式生成 Rust 代码
用法: python test_mqtt_ollama.py
"""
import json
import queue
import random
import time
import paho.mqtt.client as mqtt

MQTT_HOST = "10.8.8.130"
MQTT_PORT = 8907
ROOT_TOPIC = "/api/ollama"

req_id = f"rust_demo_{random.randint(1000, 9999)}"
q = queue.Queue()

def on_connect(cli, _ud, _flags, rc, _properties=None):
    if rc == 0:
        print("✅ MQTT 连接成功")
        cli.subscribe(f"{ROOT_TOPIC}/response/{req_id}", qos=1)
    else:
        print("❌ 连接失败,rc =", rc)

def on_message(cli, _ud, msg):
    q.put(msg.payload)

client = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2)
client.on_connect = on_connect
client.on_message = on_message
client.connect(MQTT_HOST, MQTT_PORT, 60)
client.loop_start()

# 等连上
while not q.empty():
    q.get()
time.sleep(0.5)

# 发请求
payload = {
    "_req_id": req_id,
    "model": "modelscope.cn/Qwen/Qwen3-30B-A3B-GGUF:Qwen3-30B-A3B-Q8_0.gguf",
    "prompt": "请只输出一段最简 Rust 代码,打印 hello ollama,不要任何解释。",
    "stream": True
}
client.publish(f"{ROOT_TOPIC}/post/api/generate",
               json.dumps(payload), qos=1)

# 收流式回答
print("\n--- 流式回答开始 ---")
done = False
while not done:
    try:
        pkt = q.get(timeout=10)
        if not pkt:
            continue

        # 只提取并打印 response 字段
        try:
            data = json.loads(pkt.decode('utf-8'))
            print(data.get("response", ""), end="", flush=True)
            if data.get("done"):
                done = True
        except json.JSONDecodeError:
            # 非 JSON 包直接丢弃
            continue
    except queue.Empty:
        print("\n⚠️  10s 没收到新包,退出")
        break
print("\n--- 流式回答结束 ---")

client.loop_stop()
client.disconnect()

go版本:

package main

import (
	"encoding/json"
	"fmt"
	"log"
	"math/rand"
	"time"

	mqtt "github.com/eclipse/paho.mqtt.golang"
)

const (
	MQTT_HOST  = "10.8.8.130"
	MQTT_PORT  = 8907
	ROOT_TOPIC = "/api/ollama"
)

func main() {
	// 生成随机请求 ID
	reqId := fmt.Sprintf("rust_demo_%d", rand.Intn(9000)+1000)

	// 创建消息队列(用带缓冲的 channel 模拟)
	msgQueue := make(chan mqtt.Message, 10)

	// MQTT 连接选项
	opts := mqtt.NewClientOptions()
	opts.AddBroker(fmt.Sprintf("tcp://%s:%d", MQTT_HOST, MQTT_PORT))
	opts.SetClientID(reqId)
	opts.SetAutoReconnect(true)

	// 连接成功回调:订阅响应主题
	opts.SetOnConnectHandler(func(c mqtt.Client) {
		fmt.Println("✅ MQTT 连接成功")
		topic := fmt.Sprintf("%s/response/%s", ROOT_TOPIC, reqId)
		if token := c.Subscribe(topic, 1, nil); token.Wait() && token.Error() != nil {
			log.Fatal(token.Error())
		}
	})

	// 消息到达回调:写入队列
	opts.SetDefaultPublishHandler(func(c mqtt.Client, m mqtt.Message) {
		msgQueue <- m
	})

	// 连接并启动网络循环
	client := mqtt.NewClient(opts)
	if token := client.Connect(); token.Wait() && token.Error() != nil {
		log.Fatal("❌ 连接失败:", token.Error())
	}
	defer client.Disconnect(250)

	// 等待连上并清空残留消息
	time.Sleep(500 * time.Millisecond)
	for len(msgQueue) > 0 {
		<-msgQueue
	}

	// 构造并发送请求
	payload := map[string]interface{}{
		"_req_id": reqId,
		"model":   "modelscope.cn/Qwen/Qwen3-30B-A3B-GGUF:Qwen3-30B-A3B-Q8_0.gguf",
		"prompt":  "请只输出一段最简 Rust 代码,打印 hello ollama,不要任何解释。",
		"stream":  true,
	}
	jsonBytes, _ := json.Marshal(payload)
	topic := fmt.Sprintf("%s/post/api/generate", ROOT_TOPIC)
	token := client.Publish(topic, 1, false, jsonBytes)
	token.Wait()

	// 收流式回答
	fmt.Println("\n--- 流式回答开始 ---")
	done := false
	for !done {
		select {
		case msg := <-msgQueue:
			// 只提取并打印 response 字段
			var data map[string]interface{}
			if err := json.Unmarshal(msg.Payload(), &data); err != nil {
				// 非 JSON 包直接丢弃
				continue
			}
			fmt.Print(data["response"])
			if v, ok := data["done"].(bool); ok && v {
				done = true
			}
		case <-time.After(10 * time.Second):
			fmt.Println("\n⚠️  10s 没收到新包,退出")
			return
		}
	}
	fmt.Println("\n--- 流式回答结束 ---")
}

输出:

  MQTT 连接成功

--- 流式回答开始 ---
<think>
好的,用户让我提供一个最简的Rust代码,打印“hello ollama”,而且不要任何解释。首先,我需要确认用户的需求是什么。他们可能想要一个非常基础的示例,可能用于测试环境或者快速演示。

Rust的Hello World通常使用println!宏。所以最简单的代码应该是fn main() { println!("hello ollama"); }。不过用户要求最简,可能需要更简短的写法。但Rust的语法必须有函数定义,所以fn main()是必须的。

有没有可能更简?比如使用main函数的另一种形式?或者有没有其他方式?比如使用宏展开?不过可能不会更简。比如,直接写println!会报错,因为需要在函数体内。所以必须有main函数。

那正确的最简代码应该是fn main(){println!("hello ollama");}。去掉空格的话,可能更简,但用户可能希望有适当的格式。不过用户要求最简,所以可能不需要空格。比如fn main(){println!("hello ollama");}这样。

检查是否有其他可能的错误。比如,是否需要use std::println;?不过println!是宏,通常自动引入。所以不需要额外的use语句。因此,这段代码应该可以直接运行。

确认用户不要任何解释,所以只需要输出代码。所以最终答案应该是这个代码段。
</think>

fn main(){println!("hello ollama");}
--- 流式回答结束 ---
posted @ 2026-01-18 17:18  qsBye  阅读(2)  评论(0)    收藏  举报