PS F:\python> & C:/Users/Administrator/AppData/Local/Microsoft/WindowsApps/python3.13.exe f:/python/py_test_audio.py
音频已生成: nihaochilema.wav
'你吃了吗' 的音频已保存为 nihaochilema.wav
import speech_recognition as sr
import pyttsx3
import threading
import time
import os
class VoiceRecognitionSystem:
def __init__(self):
self.recognizer = sr.Recognizer()
self.microphone = sr.Microphone()
self.tts_engine = pyttsx3.init()
self.is_listening = False
self.recognized_text = ""
# 配置语音引擎
self.tts_engine.setProperty('rate', 150)
self.tts_engine.setProperty('volume', 0.9)
def calibrate_microphone(self):
"""校准麦克风环境噪声"""
print("正在校准麦克风,请保持安静...")
with self.microphone as source:
self.recognizer.adjust_for_ambient_noise(source, duration=2)
print("麦克风校准完成")
def listen_for_speech(self):
"""监听语音输入"""
with self.microphone as source:
print("正在监听...")
try:
audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=10)
return audio
except sr.WaitTimeoutError:
print("监听超时,未检测到语音")
return None
except Exception as e:
print(f"监听出错: {e}")
return None
def recognize_speech(self, audio):
"""识别语音内容"""
if audio is None:
return ""
try:
# 使用Google语音识别API
text = self.recognizer.recognize_google(audio, language='zh-CN')
print(f"识别结果: {text}")
return text
except sr.UnknownValueError:
print("无法理解音频内容")
return ""
except sr.RequestError as e:
print(f"无法请求语音识别服务: {e}")
return ""
except Exception as e:
print(f"识别过程中出错: {e}")
return ""
def recognize_from_file(self, audio_file_path):
"""从音频文件识别语音"""
if not os.path.exists(audio_file_path):
print(f"音频文件不存在: {audio_file_path}")
return ""
try:
with sr.AudioFile(audio_file_path) as source:
print(f"正在读取音频文件: {audio_file_path}")
audio_data = self.recognizer.record(source)
text = self.recognizer.recognize_google(audio_data, language='zh-CN')
print(f"文件识别结果: {text}")
return text
except sr.UnknownValueError:
print("无法理解音频文件内容")
return ""
except sr.RequestError as e:
print(f"无法请求语音识别服务: {e}")
return ""
except Exception as e:
print(f"识别音频文件时出错: {e}")
return ""
def speak_text(self, text):
"""文本转语音输出"""
if text:
print(f"语音输出: {text}")
self.tts_engine.say(text)
self.tts_engine.runAndWait()
def start_listening(self):
"""开始持续监听"""
self.is_listening = True
print("语音识别系统已启动,按Ctrl+C停止")
try:
while self.is_listening:
audio = self.listen_for_speech()
if audio:
text = self.recognize_speech(audio)
if text:
self.recognized_text = text
# 可以在这里添加自定义的处理逻辑
self.process_recognized_text(text)
except KeyboardInterrupt:
print("\n语音识别系统已停止")
self.is_listening = False
def process_recognized_text(self, text):
"""处理识别到的文本"""
# 简单的命令处理示例
if "你好" in text or "您好" in text:
response = "你好!很高兴见到你。"
self.speak_text(response)
elif "时间" in text:
current_time = time.strftime("%H:%M:%S")
response = f"现在是{current_time}"
self.speak_text(response)
elif "停止" in text or "结束" in text:
response = "好的,我将停止语音识别"
self.speak_text(response)
self.is_listening = False
else:
# 回显识别到的内容
self.speak_text(f"你说的是:{text}")
def main():
# 创建语音识别系统实例
voice_system = VoiceRecognitionSystem()
print("请选择输入方式:")
print("1. 实时麦克风输入")
print("2. 音频文件输入")
choice = input("请输入选择(1或2): ")
if choice == "1":
# 校准麦克风
voice_system.calibrate_microphone()
# 开始语音识别
voice_system.start_listening()
elif choice == "2":
# 从音频文件识别
audio_file = input("请输入音频文件路径(支持wav格式): ")
if audio_file:
result = voice_system.recognize_from_file(audio_file)
if result:
print(f"识别完成: {result}")
# 语音回显结果
voice_system.speak_text(f"识别结果是:{result}")
else:
print("未能识别音频内容")
else:
print("未提供音频文件路径")
else:
print("无效选择")
if __name__ == "__main__":
main()