python 模拟语音识别

公告

Posted on 2026-03-25 15:37 打杂滴阅读(1) 评论(0) 收藏举报

1.生成模拟语音：你吃了吗

import pyttsx3

import os

def generate_audio_from_text(text, output_file="output.wav"):

"""

使用pyttsx3将文本转换为音频文件

Args:

text (str): 要转换的文本

output_file (str): 输出音频文件名

"""

# 初始化语音引擎

engine = pyttsx3.init()

# 设置语音参数

voices = engine.getProperty('voices')

if voices:

# 选择第一个可用的语音

engine.setProperty('voice', voices[0].id)

# 设置语速(默认200)

engine.setProperty('rate', 150)

# 设置音量(0-1之间，默认1.0)

engine.setProperty('volume', 0.9)

# 保存音频到文件

engine.save_to_file(text, output_file)

# 等待转换完成

engine.runAndWait()

print(f"音频已生成: {output_file}")

return output_file

def main():

"""主函数"""

# 要转换的文本

text = "你吃了吗"

# 生成音频文件

output_filename = "nihaochilema.wav"

generate_audio_from_text(text, output_filename)

print(f"'{text}' 的音频已保存为 {output_filename}")

if __name__ == "__main__":

main()

--------------

PS F:\python> & C:/Users/Administrator/AppData/Local/Microsoft/WindowsApps/python3.13.exe f:/python/py_test_audio.py
音频已生成: nihaochilema.wav
'你吃了吗' 的音频已保存为 nihaochilema.wav

---------------------------------------------------------------------

2.语音识别

import speech_recognition as sr

import pyttsx3

import threading

import time

import os

class VoiceRecognitionSystem:

def __init__(self):

self.recognizer = sr.Recognizer()

self.microphone = sr.Microphone()

self.tts_engine = pyttsx3.init()

self.is_listening = False

self.recognized_text = ""

# 配置语音引擎

self.tts_engine.setProperty('rate', 150)

self.tts_engine.setProperty('volume', 0.9)

def calibrate_microphone(self):

"""校准麦克风环境噪声"""

print("正在校准麦克风，请保持安静...")

with self.microphone as source:

self.recognizer.adjust_for_ambient_noise(source, duration=2)

print("麦克风校准完成")

def listen_for_speech(self):

"""监听语音输入"""

with self.microphone as source:

print("正在监听...")

try:

audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=10)

return audio

except sr.WaitTimeoutError:

print("监听超时，未检测到语音")

return None

except Exception as e:

print(f"监听出错: {e}")

return None

def recognize_speech(self, audio):

"""识别语音内容"""

if audio is None:

return ""

try:

# 使用Google语音识别API

text = self.recognizer.recognize_google(audio, language='zh-CN')

print(f"识别结果: {text}")

return text

except sr.UnknownValueError:

print("无法理解音频内容")

return ""

except sr.RequestError as e:

print(f"无法请求语音识别服务: {e}")

return ""

except Exception as e:

print(f"识别过程中出错: {e}")

return ""

def recognize_from_file(self, audio_file_path):

"""从音频文件识别语音"""

if not os.path.exists(audio_file_path):

print(f"音频文件不存在: {audio_file_path}")

return ""

try:

with sr.AudioFile(audio_file_path) as source:

print(f"正在读取音频文件: {audio_file_path}")

audio_data = self.recognizer.record(source)

text = self.recognizer.recognize_google(audio_data, language='zh-CN')

print(f"文件识别结果: {text}")

return text

except sr.UnknownValueError:

print("无法理解音频文件内容")

return ""

except sr.RequestError as e:

print(f"无法请求语音识别服务: {e}")

return ""

except Exception as e:

print(f"识别音频文件时出错: {e}")

return ""

def speak_text(self, text):

"""文本转语音输出"""

if text:

print(f"语音输出: {text}")

self.tts_engine.say(text)

self.tts_engine.runAndWait()

def start_listening(self):

"""开始持续监听"""

self.is_listening = True

print("语音识别系统已启动，按Ctrl+C停止")

try:

while self.is_listening:

audio = self.listen_for_speech()

if audio:

text = self.recognize_speech(audio)

if text:

self.recognized_text = text

# 可以在这里添加自定义的处理逻辑

self.process_recognized_text(text)

except KeyboardInterrupt:

print("\n语音识别系统已停止")

self.is_listening = False

def process_recognized_text(self, text):

"""处理识别到的文本"""

# 简单的命令处理示例

if "你好" in text or "您好" in text:

response = "你好！很高兴见到你。"

self.speak_text(response)

elif "时间" in text:

current_time = time.strftime("%H:%M:%S")

response = f"现在是{current_time}"

self.speak_text(response)

elif "停止" in text or "结束" in text:

response = "好的，我将停止语音识别"

self.speak_text(response)

self.is_listening = False

else:

# 回显识别到的内容

self.speak_text(f"你说的是：{text}")

def main():

# 创建语音识别系统实例

voice_system = VoiceRecognitionSystem()

print("请选择输入方式:")

print("1. 实时麦克风输入")

print("2. 音频文件输入")

choice = input("请输入选择(1或2): ")

if choice == "1":

# 校准麦克风

voice_system.calibrate_microphone()

# 开始语音识别

voice_system.start_listening()

elif choice == "2":

# 从音频文件识别

audio_file = input("请输入音频文件路径(支持wav格式): ")

if audio_file:

result = voice_system.recognize_from_file(audio_file)

if result:

print(f"识别完成: {result}")

# 语音回显结果

voice_system.speak_text(f"识别结果是：{result}")

else:

print("未能识别音频内容")

else:

print("未提供音频文件路径")

else:

print("无效选择")

if __name__ == "__main__":

main()

-----------

PS F:\python> & C:/Users/Administrator/AppData/Local/Microsoft/WindowsApps/python3.13.exe f:/python/pyvoice.py
请选择输入方式:
1. 实时麦克风输入
2. 音频文件输入
请输入选择(1或2): 2
请输入音频文件路径(支持wav格式): ./nihaochilema.wav
正在读取音频文件: ./nihaochilema.wav
文件识别结果: 你吃了吗
识别完成: 你吃了吗
语音输出: 识别结果是：你吃了吗

再次测试：

PS F:\python> & C:/Users/Administrator/AppData/Local/Microsoft/WindowsApps/python3.13.exe f:/python/py_test_audio.py
音频已生成: nihaochilema.wav
'我喜欢吃馒头' 的音频已保存为 nihaochilema.wav
PS F:\python> & C:/Users/Administrator/AppData/Local/Microsoft/WindowsApps/python3.13.exe f:/python/pyvoice.py
请选择输入方式:
1. 实时麦克风输入
2. 音频文件输入
请输入选择(1或2): 2
请输入音频文件路径(支持wav格式): ./nihaochilema.wav
正在读取音频文件: ./nihaochilema.wav
文件识别结果: 我喜欢吃馒头
识别完成: 我喜欢吃馒头
语音输出: 识别结果是：我喜欢吃馒头

刷新页面返回顶部

努力，奋斗

公告

python 模拟语音识别