使用golang编写大语言模型的输出速度性能
使用golang编写大语言模型的输出速度性能
package main
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"time"
)
type Request struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
Stream bool `json:"stream"`
Options map[string]any `json:"options,omitempty"` // 新增:模型参数
}
type Response struct {
EvalCount int `json:"eval_count"`
EvalDuration int64 `json:"eval_duration"`
}
func main() {
url := "http://localhost:11434/api/generate"
payload := Request{
Model: "qwen3:0.6b",
// Model: "qwen3.5:0.8b",
Prompt: "你好,请介绍一下你自己",
Stream: false,
Options: map[string]any{
"think": false, // 关闭 think 模式
},
}
data, _ := json.Marshal(payload)
start := time.Now()
resp, err := http.Post(url, "application/json", bytes.NewBuffer(data))
if err != nil {
fmt.Println("Error:", err)
return
}
defer resp.Body.Close()
var res Response
json.NewDecoder(resp.Body).Decode(&res)
duration := time.Since(start)
tokensPerSec := float64(res.EvalCount) / (float64(res.EvalDuration) / 1e9)
fmt.Printf("总耗时:%v\n", duration)
fmt.Printf("生成令牌:%d\n", res.EvalCount)
fmt.Printf("生成速度:%.2f tokens/s\n", tokensPerSec)
}
硬件
AMD Ryzen 7 7735H with Radeon Graphics @ 3.2GHz
32 GB
输出结果
总耗时:2.712756201s
生成令牌:122
生成速度:67.33 tokens/s

浙公网安备 33010602011771号