golang测试模型的token输出速度
golang测试模型的token输出速度
package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"os/exec"
"runtime"
"strconv"
"strings"
"time"
)
type Request struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
Stream bool `json:"stream"`
Options map[string]any `json:"options,omitempty"`
}
type Response struct {
EvalCount int `json:"eval_count"`
EvalDuration int64 `json:"eval_duration"`
Response string `json:"response"`
}
type VersionResp struct {
Version string `json:"version"`
}
func main() {
printSystemInfo()
fmt.Println()
printOllamaVersion()
fmt.Println()
runBenchmark()
}
func printSystemInfo() {
fmt.Println("=== System Info ===")
printCPU()
printMemory()
printGPU()
fmt.Println("===================")
}
func printCPU() {
model, cores := getCPUInfo()
fmt.Printf("%-8s: %s\n", "CPU", model)
fmt.Printf("%-8s: %s\n", "Cores", cores)
}
func getCPUInfo() (model, cores string) {
switch runtime.GOOS {
case "linux":
return getCPUInfoLinux()
case "windows":
return getCPUInfoWindows()
case "darwin":
return getCPUInfoDarwin()
default:
return "unknown", "unknown"
}
}
func getCPUInfoLinux() (model, cores string) {
// 获取 CPU 型号
cmd := exec.Command("grep", "-m1", "model name", "/proc/cpuinfo")
out, err := cmd.Output()
if err != nil {
model = "unknown"
} else {
parts := strings.SplitN(string(out), ":", 2)
if len(parts) == 2 {
model = strings.TrimSpace(parts[1])
}
}
// 获取核心数
logical, _ := exec.Command("nproc").Output()
physicalCmd := exec.Command("bash", "-c", "lscpu | grep 'Core(s) per socket' | awk '{print $4}'")
physicalOut, err := physicalCmd.Output()
if err != nil {
cores = fmt.Sprintf("%sL", strings.TrimSpace(string(logical)))
} else {
socketCmd := exec.Command("bash", "-c", "lscpu | grep 'Socket(s)' | awk '{print $2}'")
socketOut, _ := socketCmd.Output()
physical := strings.TrimSpace(string(physicalOut))
sockets := strings.TrimSpace(string(socketOut))
p, _ := strconv.Atoi(physical)
s, _ := strconv.Atoi(sockets)
l, _ := strconv.Atoi(strings.TrimSpace(string(logical)))
if p > 0 && s > 0 {
cores = fmt.Sprintf("%dP / %dL", p*s, l)
} else {
cores = fmt.Sprintf("%dL", l)
}
}
return
}
func getCPUInfoWindows() (model, cores string) {
// 获取 CPU 型号
cmd := exec.Command("wmic", "cpu", "get", "name")
out, err := cmd.Output()
if err != nil {
model = "unknown"
} else {
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
if len(lines) >= 2 {
model = strings.TrimSpace(lines[1])
}
}
// 获取核心数
cmd = exec.Command("wmic", "cpu", "get", "NumberOfCores,NumberOfLogicalProcessors")
out, err = cmd.Output()
if err != nil {
cores = "unknown"
} else {
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
if len(lines) >= 2 {
parts := strings.Fields(lines[1])
if len(parts) >= 2 {
cores = fmt.Sprintf("%sP / %sL", parts[0], parts[1])
}
}
}
return
}
func getCPUInfoDarwin() (model, cores string) {
// 获取 CPU 型号
cmd := exec.Command("sysctl", "-n", "machdep.cpu.brand_string")
out, err := cmd.Output()
if err != nil {
model = "unknown"
} else {
model = strings.TrimSpace(string(out))
}
// 获取核心数
physicalCmd := exec.Command("sysctl", "-n", "hw.physicalcpu")
logicalCmd := exec.Command("sysctl", "-n", "hw.logicalcpu")
pOut, pErr := physicalCmd.Output()
lOut, lErr := logicalCmd.Output()
if pErr != nil || lErr != nil {
cores = "unknown"
} else {
cores = fmt.Sprintf("%sP / %sL", strings.TrimSpace(string(pOut)), strings.TrimSpace(string(lOut)))
}
return
}
func printMemory() {
totalGB := getMemoryTotal()
fmt.Printf("%-8s: %s\n", "Memory", totalGB)
}
func getMemoryTotal() string {
switch runtime.GOOS {
case "linux":
return getMemoryTotalLinux()
case "windows":
return getMemoryTotalWindows()
case "darwin":
return getMemoryTotalDarwin()
default:
return "unknown"
}
}
func getMemoryTotalLinux() string {
cmd := exec.Command("grep", "MemTotal", "/proc/meminfo")
out, err := cmd.Output()
if err != nil {
return "unknown"
}
parts := strings.Fields(string(out))
if len(parts) >= 2 {
kb, _ := strconv.ParseFloat(parts[1], 64)
return fmt.Sprintf("%.2f GB", kb/1024/1024)
}
return "unknown"
}
func getMemoryTotalWindows() string {
cmd := exec.Command("wmic", "os", "get", "TotalVisibleMemorySize")
out, err := cmd.Output()
if err != nil {
return "unknown"
}
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
if len(lines) >= 2 {
kb, _ := strconv.ParseFloat(strings.TrimSpace(lines[1]), 64)
return fmt.Sprintf("%.2f GB", kb/1024/1024)
}
return "unknown"
}
func getMemoryTotalDarwin() string {
cmd := exec.Command("sysctl", "-n", "hw.memsize")
out, err := cmd.Output()
if err != nil {
return "unknown"
}
bytes, _ := strconv.ParseFloat(strings.TrimSpace(string(out)), 64)
return fmt.Sprintf("%.2f GB", bytes/1024/1024/1024)
}
func printGPU() {
cmd := exec.Command("nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader")
out, err := cmd.Output()
if err != nil {
fmt.Printf("%-8s: No NVIDIA GPU detected or nvidia-smi not available\n", "GPU")
return
}
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
for i, line := range lines {
parts := strings.Split(line, ",")
if len(parts) >= 2 {
name := strings.TrimSpace(parts[0])
memMB := strings.TrimSpace(strings.TrimSuffix(parts[1], " MiB"))
if i == 0 {
fmt.Printf("%-8s[%d]: %s\n", "GPU", i, name)
fmt.Printf("%-8s: %s MB\n", "VRAM", memMB)
} else {
fmt.Printf("%-8s[%d]: %s | VRAM: %s MB\n", "GPU", i, name, memMB)
}
}
}
}
func printOllamaVersion() {
fmt.Println("=== Ollama Info ===")
url := "http://localhost:11434/api/version"
resp, err := http.Get(url)
if err != nil {
fmt.Printf("%-8s: failed to connect (%v)\n", "Version", err)
return
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
fmt.Printf("%-8s: failed to read response (%v)\n", "Version", err)
return
}
var v VersionResp
if err := json.Unmarshal(body, &v); err != nil {
fmt.Printf("%-8s: unknown (%s)\n", "Version", string(body))
return
}
fmt.Printf("%-8s: %s\n", "Version", v.Version)
fmt.Println("===================")
}
func runBenchmark() {
const (
model = "qwen3.5:9b"
prompt = "你好,请介绍一下你自己"
)
fmt.Println("=== Benchmark ===")
fmt.Printf("%-12s: %s\n", "Model", model)
fmt.Printf("%-12s: %s\n", "Prompt", prompt)
fmt.Println("-------------------")
url := "http://localhost:11434/api/generate"
payload := Request{
Model: model,
Prompt: prompt,
Stream: false,
Options: map[string]any{
"think": false,
},
}
data, err := json.Marshal(payload)
if err != nil {
fmt.Println("Marshal error:", err)
return
}
start := time.Now()
resp, err := http.Post(url, "application/json", bytes.NewBuffer(data))
if err != nil {
fmt.Println("Request error:", err)
return
}
defer resp.Body.Close()
var res Response
if err := json.NewDecoder(resp.Body).Decode(&res); err != nil {
fmt.Println("Decode error:", err)
return
}
duration := time.Since(start)
evalSec := float64(res.EvalDuration) / 1e9
var tokensPerSec float64
if evalSec > 0 {
tokensPerSec = float64(res.EvalCount) / evalSec
}
fmt.Printf("%-12s: %v\n", "Total Time", duration.Round(time.Millisecond))
fmt.Printf("%-12s: %d\n", "Eval Tokens", res.EvalCount)
fmt.Printf("%-12s: %.2f tokens/s\n", "Eval Speed", tokensPerSec)
fmt.Printf("%-12s: %v\n", "Server Eval", time.Duration(res.EvalDuration)*time.Nanosecond)
fmt.Println("=================")
}
测试结果
=== System Info ===
CPU : AMD Ryzen 5 5600G with Radeon Graphics
Cores : 6P / 12L
Memory : 27.30 GB
GPU : No NVIDIA GPU detected or nvidia-smi not available
===================
=== Ollama Info ===
Version : 0.18.2
===================
=== Benchmark ===
Model : qwen3.5:9b
Prompt : 你好,请介绍一下你自己
-------------------
Total Time : 1m33.855s
Eval Tokens : 400
Eval Speed : 4.32 tokens/s
Server Eval : 1m32.676335318s
=================
=== System Info ===
CPU : AMD Ryzen 5 5600G with Radeon Graphics
Cores : 6P / 12L
Memory : 27.30 GB
GPU : No NVIDIA GPU detected or nvidia-smi not available
===================
=== Ollama Info ===
Version : 0.18.2
===================
=== Benchmark ===
Model : qwen3.5:0.8b
Prompt : 你好,请介绍一下你自己
-------------------
Total Time : 33.877s
Eval Tokens : 484
Eval Speed : 15.66 tokens/s
Server Eval : 30.914075743s
=================
=== System Info ===
CPU : AMD Ryzen 5 5600G with Radeon Graphics
Cores : 6P / 12L
Memory : 27.30 GB
GPU : No NVIDIA GPU detected or nvidia-smi not available
===================
=== Ollama Info ===
Version : 0.18.2
===================
=== Benchmark ===
Model : qwen3:0.6b
Prompt : 你好,请介绍一下你自己
-------------------
Total Time : 3.721s
Eval Tokens : 165
Eval Speed : 58.01 tokens/s
Server Eval : 2.844187074s
=================
=== System Info ===
CPU : AMD Ryzen 5 5600G with Radeon Graphics
Cores : 6P / 12L
Memory : 27.30 GB
GPU : No NVIDIA GPU detected or nvidia-smi not available
===================
=== Ollama Info ===
Version : 0.18.2
===================
=== Benchmark ===
Model : llama3.1:8b
Prompt : 你好,请介绍一下你自己
-------------------
Total Time : 6.548s
Eval Tokens : 47
Eval Speed : 7.53 tokens/s
Server Eval : 6.24072249s
=================
模型性能对比
以下是将新提供的数据整合到原有对比表格中的结果:
| 指标 | qwen3.5:9b | qwen3.5:0.8b | qwen3:0.6b | llama3.1:8b | qwen3.5:27b | qwen2.5:0.5b |
|---|---|---|---|---|---|---|
| Model | qwen3.5:9b | qwen3.5:0.8b | qwen3:0.6b | llama3.1:8b | qwen3.5:27b | qwen2.5:0.5b |
| Prompt | 你好,请介绍一下你自己 | 你好,请介绍一下你自己 | 你好,请介绍一下你自己 | 你好,请介绍一下你自己 | 你好,请介绍一下你自己 | 你好,请介绍一下你自己 |
| Total Time | 1m33.855s | 33.877s | 3.721s | 6.548s | 24m16.474s | 2.969s |
| Eval Tokens | 400 | 484 | 165 | 47 | 1622 | 70 |
| Eval Speed (tokens/s) | 4.32 | 15.66 | 58.01 | 7.53 | 1.13 | 55.88 |
| Server Eval | 1m32.676335318s | 30.914075743s | 2.844187074s | 6.24072249s | 23m59.117758659s | 1.252627949s |
表格说明:
- Model:测试的模型名称。
- Prompt:使用的提示文本。
- Total Time:模型完成提示所需的总时间。
- Eval Tokens:评估过程中处理的token数量。
- Eval Speed:评估速度,以tokens/s为单位。
- Server Eval:服务器端评估所需的时间。

浙公网安备 33010602011771号