落落2009

导航

 
from locust import HttpUser,task,between
import time
import json


class QwenTPUTUser(HttpUser):
host='http://201.201.201.35:9997/v1'
wait_time=between(0.1,0.5)

@task
def test_tput(self):
start_time=time.time()
body={
"model": "qwen2.5-instruct",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "我很开心,陪我聊聊天吧,今天吃几个鸡蛋"
}
]
}
headers={'Content-Type': 'application/json'}

response=self.client.post(
'/chat/completions',
data=json.dumps(body),
headers=headers)

if response.status_code==200:
resp_data=response.json(response)
end_time=time.time()
duration=end_time-start_time
total_tokens=resp_data['usage']['total_tokens']
TPUT=total_tokens/duration
self.environment.events.request.fire(
request_type="POST",
name="TPUT",
response_time=duration * 1000, # 转为毫秒
response_length=total_tokens, # 用 Token 数替代响应大小
context={"tput": TPUT} # 额外存储 TPUT
)

posted on 2025-06-20 16:59  落落2009  阅读(13)  评论(0)    收藏  举报