使用golang编写大语言模型的输出速度性能

使用golang编写大语言模型的输出速度性能

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"net/http"
	"time"
)

type Request struct {
	Model   string         `json:"model"`
	Prompt  string         `json:"prompt"`
	Stream  bool           `json:"stream"`
	Options map[string]any `json:"options,omitempty"` // 新增:模型参数
}

type Response struct {
	EvalCount    int   `json:"eval_count"`
	EvalDuration int64 `json:"eval_duration"`
}

func main() {
	url := "http://localhost:11434/api/generate"
	payload := Request{
		Model: "qwen3:0.6b",
		// Model:  "qwen3.5:0.8b",
		Prompt: "你好,请介绍一下你自己",
		Stream: false,
		Options: map[string]any{
			"think": false, // 关闭 think 模式
		},
	}

	data, _ := json.Marshal(payload)
	start := time.Now()
	resp, err := http.Post(url, "application/json", bytes.NewBuffer(data))
	if err != nil {
		fmt.Println("Error:", err)
		return
	}
	defer resp.Body.Close()

	var res Response
	json.NewDecoder(resp.Body).Decode(&res)

	duration := time.Since(start)
	tokensPerSec := float64(res.EvalCount) / (float64(res.EvalDuration) / 1e9)

	fmt.Printf("总耗时:%v\n", duration)
	fmt.Printf("生成令牌:%d\n", res.EvalCount)
	fmt.Printf("生成速度:%.2f tokens/s\n", tokensPerSec)
}

硬件

AMD Ryzen 7 7735H with Radeon Graphics @ 3.2GHz
32 GB

输出结果

总耗时:2.712756201s
生成令牌:122
生成速度:67.33 tokens/s
posted @ 2026-03-23 11:30  jiftle  阅读(2)  评论(0)    收藏  举报