python实时语音识别 -2025.12.29
基于科大讯飞的语音识别大模型
test代码
import base64
import hashlib
import hmac
import time
import uuid
import urllib.parse
import json
import threading
import pyaudio
from websocket import WebSocketApp
APP_ID = "xxx"
API_SECRET = "xxx"
API_KEY = "xxx"
WS_HOST = "wss://office-api-ast-dx.iflyaisol.com/ast/communicate/v1"
LANG = "autodialect"
AUDIO_ENCODE = "pcm_s16le"
SAMPLE_RATE = 16000
UUID = uuid.uuid4().hex
FRAME_BYTES = 1280
FRAME_INTERVAL = 0.04
session_id = str(uuid.uuid4())
# ---------------------
# 签名生成
# ---------------------
def build_signature(app_id, api_key, api_secret, lang, audio_encode, samplerate, uuid):
utc_raw = time.strftime("%Y-%m-%dT%H:%M:%S+0800", time.localtime())
params = {
"accessKeyId": api_key,
"appId": app_id,
"lang": lang,
"utc": utc_raw,
"uuid": uuid,
"audio_encode": audio_encode,
"samplerate": str(samplerate),
}
sorted_items = sorted(params.items(), key=lambda x: x[0])
encoded_pairs = []
for k, v in sorted_items:
k_enc = urllib.parse.quote(k, safe="")
v_enc = urllib.parse.quote(v, safe="")
encoded_pairs.append(f"{k_enc}={v_enc}")
base_string = "&".join(encoded_pairs)
digest = hmac.new(api_secret.encode("utf-8"),
base_string.encode("utf-8"),
hashlib.sha1).digest()
signature = base64.b64encode(digest).decode("utf-8")
return params, signature
def build_ws_url():
params, signature = build_signature(
APP_ID, API_KEY, API_SECRET, LANG, AUDIO_ENCODE, SAMPLE_RATE, UUID
)
query_items = []
for k, v in params.items():
query_items.append(f"{urllib.parse.quote(k, safe='')}={urllib.parse.quote(v, safe='')}")
query_items.append(f"signature={urllib.parse.quote(signature, safe='')}")
query_string = "&".join(query_items)
return f"{WS_HOST}?{query_string}"
# ---------------------
# WebSocket 事件处理
# ---------------------
final_results = {} # 缓冲区:seg_id -> text
def on_open(ws):
print("[WS] 连接已打开,开始麦克风采集...")
def run():
pa = pyaudio.PyAudio()
stream = pa.open(format=pyaudio.paInt16,
channels=1,
rate=SAMPLE_RATE,
input=True,
frames_per_buffer=FRAME_BYTES)
try:
while True:
data = stream.read(FRAME_BYTES, exception_on_overflow=False)
if not data:
break
ws.send(data, opcode=0x2)
time.sleep(FRAME_INTERVAL)
except KeyboardInterrupt:
print("[WS] 采集中断")
except Exception as e:
print(f"[WS] 采集异常: {e}")
finally:
stream.stop_stream()
stream.close()
pa.terminate()
end_msg = {"end": True, "sessionId": session_id}
ws.send(json.dumps(end_msg))
print("[WS] 已发送结束标识")
threading.Thread(target=run, daemon=True).start()
def on_message(ws, message):
try:
obj = json.loads(message)
except Exception:
print("[WS] 非JSON消息:", message[:200])
return
msg_type = obj.get("msg_type")
res_type = obj.get("res_type")
data = obj.get("data", {})
if msg_type == "result" and res_type == "asr":
seg_id = data.get("seg_id")
cn = data.get("cn", {})
st = cn.get("st", {})
result_type = st.get("type") # 0=最终结果, 1=中间结果
rt_list = st.get("rt", [])
text_fragments = []
for rt in rt_list:
for ws_item in rt.get("ws", []):
for cw in ws_item.get("cw", []):
w = cw.get("w")
if w:
text_fragments.append(w)
text = "".join(text_fragments)
if result_type == "0": # 只处理最终结果
final_results[seg_id] = text
# 按 seg_id 顺序拼接完整文本
ordered_text = "".join(final_results[i] for i in sorted(final_results.keys()))
print(f"[FINAL] {ordered_text}")
elif msg_type == "result" and res_type == "frc":
desc = data.get("desc")
detail = data.get("detail")
print(f"[ERR] 异常: {desc}, detail={detail}")
else:
print("[WS] 其他消息:", obj)
def on_error(ws, error):
print("[WS] 连接错误:", error)
def on_close(ws, close_status_code, close_msg):
print(f"[WS] 连接关闭: code={close_status_code}, msg={close_msg}")
def main():
ws_url = build_ws_url()
print("WebSocket URL:", ws_url)
ws_app = WebSocketApp(
ws_url,
on_open=on_open,
on_message=on_message,
on_error=on_error,
on_close=on_close
)
ws_app.run_forever()
if __name__ == "__main__":
main()

浙公网安备 33010602011771号