C#实现连续语音转文字
一、基础实现方案(System.Speech)
1. 环境配置
// 安装NuGet包
Install-Package System.Speech
// 添加语言包(控制面板)
控制面板 -> 语言 -> 添加中文语音包
2. 核心代码实现
using System.Speech.Recognition;
using System.Windows.Forms;
public class ContinuousRecognizer : IDisposable
{
private SpeechRecognitionEngine _engine;
private bool _isRunning;
public ContinuousRecognizer()
{
InitializeEngine();
}
private void InitializeEngine()
{
_engine = new SpeechRecognitionEngine(new System.Globalization.CultureInfo("zh-CN"));
// 加载默认语法(连续识别模式)
var grammar = new DictationGrammar();
_engine.LoadGrammar(grammar);
// 配置音频输入
_engine.SetInputToDefaultAudioDevice();
// 注册事件处理器
_engine.SpeechRecognized += OnRecognized;
_engine.SpeechRecognitionRejected += OnRejected;
_engine.RecognizeAsyncStop();
}
public void StartRecognition()
{
if (!_isRunning)
{
_engine.RecognizeAsync(RecognizeMode.Multiple);
_isRunning = true;
}
}
public void StopRecognition()
{
if (_isRunning)
{
_engine.RecognizeAsyncStop();
_isRunning = false;
}
}
private void OnRecognized(object sender, SpeechRecognizedEventArgs e)
{
if (e.Result.Confidence > 0.7)
{
string text = e.Result.Text;
// 更新UI(需跨线程)
Application.Current.Dispatcher.Invoke(() =>
txtResult.AppendText(text + Environment.NewLine));
}
}
private void OnRejected(object sender, SpeechRecognitionRejectedEventArgs e)
{
txtResult.AppendText("[识别失败]" + Environment.NewLine);
}
public void Dispose()
{
_engine?.Dispose();
}
}
二、性能优化
1. 音频预处理
// 添加降噪模块(使用NAudio)
public class AudioPreprocessor
{
private readonly WaveInEvent _waveIn;
private readonly SpeechRecognitionEngine _engine;
public AudioPreprocessor(SpeechRecognitionEngine engine)
{
_engine = engine;
_waveIn = new WaveInEvent();
_waveIn.DataAvailable += OnDataAvailable;
}
private void OnDataAvailable(object sender, WaveInEventArgs e)
{
// 应用WebRTC降噪算法
byte[] denoisedData = WebRTCNative.NoiseSuppress(e.Buffer, e.BytesRecorded);
// 转换为流格式
using (var ms = new MemoryStream(denoisedData))
{
_engine.SetInputToWaveStream(ms);
_engine.Recognize();
}
}
}
2. 多线程处理
// 使用生产者-消费者模型
public class RecognitionProcessor
{
private BlockingCollection<byte[]> _audioQueue = new BlockingCollection<byte[]>();
private Thread _processingThread;
public RecognitionProcessor()
{
_processingThread = new Thread(ProcessAudio);
_processingThread.Start();
}
public void Enqueue(byte[] audioData)
{
_audioQueue.Add(audioData);
}
private void ProcessAudio()
{
foreach (var chunk in _audioQueue.GetConsumingEnumerable())
{
using (var stream = new MemoryStream(chunk))
{
var recognizer = new SpeechRecognitionEngine();
recognizer.LoadGrammar(new DictationGrammar());
recognizer.Recognize(stream);
}
}
}
}
三、高级功能实现
1. 自定义语法识别
// 创建SRGS语法文件
var grammarBuilder = new GrammarBuilder();
grammarBuilder.Append(new Choices("打开", "关闭", "保存"));
grammarBuilder.Append(new SemanticResultKey("action", "操作类型"));
var grammar = new Grammar(grammarBuilder);
_recognizer.LoadGrammar(grammar);
// 事件处理
_recognizer.SpeechRecognized += (s, e) =>
{
if (e.Result.Semantics["action"].Value.ToString() == "打开")
{
OpenApplication();
}
};
2. 实时转写服务
// 结合WebSocket实现流式传输
public class StreamingTranscriber
{
private readonly SpeechRecognizer _recognizer;
private readonly ClientWebSocket _webSocket;
public StreamingTranscriber(ClientWebSocket webSocket)
{
_webSocket = webSocket;
_recognizer = new SpeechRecognizer();
_recognizer.SpeechRecognized += OnRecognized;
}
private async void OnRecognized(object sender, SpeechRecognizedEventArgs e)
{
var json = JsonSerializer.Serialize(new
{
Text = e.Result.Text,
Timestamp = DateTime.Now
});
await _webSocket.SendAsync(
new ArraySegment<byte>(Encoding.UTF8.GetBytes(json)),
WebSocketMessageType.Text,
true,
CancellationToken.None);
}
}
四、部署与调试
1. 硬件要求
| 组件 | 最低配置 | 推荐配置 |
|---|---|---|
| CPU | i5-4代(4核) | i7-10代(8核) |
| 内存 | 8GB | 16GB+ |
| 声卡 | 普通USB声卡 | 支持ASIO的专业声卡 |
| 麦克风 | 16kHz采样率 | 高信噪比指向性麦克风 |
2. 调试技巧
// 启用详细日志
public static void EnableDebugLogging()
{
var config = SpeechConfig.FromSubscription("YourKey", "YourRegion");
config.SetProperty("SpeechServiceResponse_DebugLog", "true");
config.SetProperty("SpeechServiceResponse_ProfanityOption", "Raw");
}
// 性能监控
public void LogPerformance(TimeSpan duration, int wordCount)
{
Debug.WriteLine($"识别速度: {wordCount/duration.TotalSeconds}字/秒");
}
五、扩展应用场景
1. 会议记录系统
// 结合语音识别与PPT同步
public class MeetingRecorder
{
private ContinuousRecognizer _recognizer;
private PptxBuilder _pptBuilder;
public MeetingRecorder()
{
_recognizer = new ContinuousRecognizer();
_pptBuilder = new PptxBuilder();
_recognizer.SpeechRecognized += (s, e) =>
{
_pptBuilder.AddSlide(e.Result.Text);
};
}
}
2. 工业设备控制
// 关键指令处理
public class VoiceController
{
private ContinuousRecognizer _recognizer;
public VoiceController()
{
_recognizer = new ContinuousRecognizer();
_recognizer.SpeechRecognized += (s, e) =>
{
if (e.Result.Text.Contains("紧急停止"))
{
Machine.Stop();
}
};
}
}
参考代码 语音转文字连续识别的C#小程序 www.youwenfan.com/contentcnj/70643.html
六、常见问题解决
-
识别率低 添加领域特定词汇表 调整
InitialSilenceTimeout和EndSilenceTimeout参数 使用SetProperty("Adaptation", "True")启用自适应训练 -
内存泄漏 实现对象池模式管理识别引擎 限制同时处理的音频流数量 定期调用
GC.Collect() -
多设备冲突
// 显式指定音频设备 var deviceEnumerator = new MMDeviceEnumerator(); var inputDevice = deviceEnumerator.GetDefaultAudioEndpoint(DataFlow.Capture, Role.Communications); _engine.SetInputToDefaultAudioDevice(inputDevice);

浙公网安备 33010602011771号