【人工智障】之一 语音识别
首先,配置appid AK SK
from aip import AipSpeech """ 你的 APPID AK SK """ APP_ID = '11312730' API_KEY = 'I7rDa8SGYVM4yFHGjuzdgOrO' SECRET_KEY = 'BguQbuYvyeqhFsVFfNNe2hieYOsvp5yL'
语音合成
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
result = client.synthesis('我是李文强,给大家说句绕口令,' '八百标兵奔北坡, 炮兵并排北边跑, 炮兵怕把标兵碰, ' '标兵怕碰炮兵炮. 八百标兵奔北坡, 北坡八百炮兵炮. ' '标兵怕碰炮兵炮, 炮兵怕把标兵碰.,', 'zh', 1, { 'vol': 5, 'per': 4 }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): with open('audio.mp3', 'wb') as f: # with open('audio.pcm', 'wb') as f: f.write(result)
语音识别
# 读取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") # os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() # 识别本地文件 res = client.asr(get_file_content('audio.mp3'), 'pcm', 16000, { 'dev_pid': 1536, }) print(res.get("result")[0])
给我说(必须提供一个mp3文件)
from aip import AipSpeech import os APP_ID = '11312730' API_KEY = 'I7rDa8SGYVM4yFHGjuzdgOrO' SECRET_KEY = 'BguQbuYvyeqhFsVFfNNe2hieYOsvp5yL' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) def audio2text(filename): os.system(f"ffmpeg -y -i {filename} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filename}.pcm") with open(f"{filename}.pcm","rb") as fp: res = client.asr(fp.read(), "pcm", 16000, { 'dev_pid': 1536, }) print(res.get("result")[0]) return res.get("result")[0] def text2audio(text): result = client.synthesis(text, 'zh', 1, { "spd": 4, "vol": 5, "pit": 8, "per": 4 }) if not isinstance(result, dict): with open("audio.mp3", "wb") as f: f.write(result) os.system("audio.mp3") text = audio2text("audio.mp3") if "你叫什么名字" in text: text2audio("我的名字叫**") else: text2audio(f"你刚刚是说,{text}")