C#实现连续语音转文字

一、基础实现方案(System.Speech)

1. 环境配置

// 安装NuGet包
Install-Package System.Speech

// 添加语言包(控制面板)
控制面板 -> 语言 -> 添加中文语音包

2. 核心代码实现

using System.Speech.Recognition;
using System.Windows.Forms;

public class ContinuousRecognizer : IDisposable
{
    private SpeechRecognitionEngine _engine;
    private bool _isRunning;

    public ContinuousRecognizer()
    {
        InitializeEngine();
    }

    private void InitializeEngine()
    {
        _engine = new SpeechRecognitionEngine(new System.Globalization.CultureInfo("zh-CN"));
        
        // 加载默认语法(连续识别模式)
        var grammar = new DictationGrammar();
        _engine.LoadGrammar(grammar);

        // 配置音频输入
        _engine.SetInputToDefaultAudioDevice();

        // 注册事件处理器
        _engine.SpeechRecognized += OnRecognized;
        _engine.SpeechRecognitionRejected += OnRejected;
        _engine.RecognizeAsyncStop();
    }

    public void StartRecognition()
    {
        if (!_isRunning)
        {
            _engine.RecognizeAsync(RecognizeMode.Multiple);
            _isRunning = true;
        }
    }

    public void StopRecognition()
    {
        if (_isRunning)
        {
            _engine.RecognizeAsyncStop();
            _isRunning = false;
        }
    }

    private void OnRecognized(object sender, SpeechRecognizedEventArgs e)
    {
        if (e.Result.Confidence > 0.7)
        {
            string text = e.Result.Text;
            // 更新UI(需跨线程)
            Application.Current.Dispatcher.Invoke(() => 
                txtResult.AppendText(text + Environment.NewLine));
        }
    }

    private void OnRejected(object sender, SpeechRecognitionRejectedEventArgs e)
    {
        txtResult.AppendText("[识别失败]" + Environment.NewLine);
    }

    public void Dispose()
    {
        _engine?.Dispose();
    }
}

二、性能优化

1. 音频预处理

// 添加降噪模块(使用NAudio)
public class AudioPreprocessor
{
    private readonly WaveInEvent _waveIn;
    private readonly SpeechRecognitionEngine _engine;

    public AudioPreprocessor(SpeechRecognitionEngine engine)
    {
        _engine = engine;
        _waveIn = new WaveInEvent();
        _waveIn.DataAvailable += OnDataAvailable;
    }

    private void OnDataAvailable(object sender, WaveInEventArgs e)
    {
        // 应用WebRTC降噪算法
        byte[] denoisedData = WebRTCNative.NoiseSuppress(e.Buffer, e.BytesRecorded);
        
        // 转换为流格式
        using (var ms = new MemoryStream(denoisedData))
        {
            _engine.SetInputToWaveStream(ms);
            _engine.Recognize();
        }
    }
}

2. 多线程处理

// 使用生产者-消费者模型
public class RecognitionProcessor
{
    private BlockingCollection<byte[]> _audioQueue = new BlockingCollection<byte[]>();
    private Thread _processingThread;

    public RecognitionProcessor()
    {
        _processingThread = new Thread(ProcessAudio);
        _processingThread.Start();
    }

    public void Enqueue(byte[] audioData)
    {
        _audioQueue.Add(audioData);
    }

    private void ProcessAudio()
    {
        foreach (var chunk in _audioQueue.GetConsumingEnumerable())
        {
            using (var stream = new MemoryStream(chunk))
            {
                var recognizer = new SpeechRecognitionEngine();
                recognizer.LoadGrammar(new DictationGrammar());
                recognizer.Recognize(stream);
            }
        }
    }
}

三、高级功能实现

1. 自定义语法识别

// 创建SRGS语法文件
var grammarBuilder = new GrammarBuilder();
grammarBuilder.Append(new Choices("打开", "关闭", "保存"));
grammarBuilder.Append(new SemanticResultKey("action", "操作类型"));

var grammar = new Grammar(grammarBuilder);
_recognizer.LoadGrammar(grammar);

// 事件处理
_recognizer.SpeechRecognized += (s, e) => 
{
    if (e.Result.Semantics["action"].Value.ToString() == "打开")
    {
        OpenApplication();
    }
};

2. 实时转写服务

// 结合WebSocket实现流式传输
public class StreamingTranscriber
{
    private readonly SpeechRecognizer _recognizer;
    private readonly ClientWebSocket _webSocket;

    public StreamingTranscriber(ClientWebSocket webSocket)
    {
        _webSocket = webSocket;
        _recognizer = new SpeechRecognizer();
        _recognizer.SpeechRecognized += OnRecognized;
    }

    private async void OnRecognized(object sender, SpeechRecognizedEventArgs e)
    {
        var json = JsonSerializer.Serialize(new 
        {
            Text = e.Result.Text,
            Timestamp = DateTime.Now
        });
        
        await _webSocket.SendAsync(
            new ArraySegment<byte>(Encoding.UTF8.GetBytes(json)),
            WebSocketMessageType.Text,
            true,
            CancellationToken.None);
    }
}

四、部署与调试

1. 硬件要求

组件 最低配置 推荐配置
CPU i5-4代(4核) i7-10代(8核)
内存 8GB 16GB+
声卡 普通USB声卡 支持ASIO的专业声卡
麦克风 16kHz采样率 高信噪比指向性麦克风

2. 调试技巧

// 启用详细日志
public static void EnableDebugLogging()
{
    var config = SpeechConfig.FromSubscription("YourKey", "YourRegion");
    config.SetProperty("SpeechServiceResponse_DebugLog", "true");
    config.SetProperty("SpeechServiceResponse_ProfanityOption", "Raw");
}

// 性能监控
public void LogPerformance(TimeSpan duration, int wordCount)
{
    Debug.WriteLine($"识别速度: {wordCount/duration.TotalSeconds}字/秒");
}

五、扩展应用场景

1. 会议记录系统

// 结合语音识别与PPT同步
public class MeetingRecorder
{
    private ContinuousRecognizer _recognizer;
    private PptxBuilder _pptBuilder;

    public MeetingRecorder()
    {
        _recognizer = new ContinuousRecognizer();
        _pptBuilder = new PptxBuilder();
        
        _recognizer.SpeechRecognized += (s, e) => 
        {
            _pptBuilder.AddSlide(e.Result.Text);
        };
    }
}

2. 工业设备控制

// 关键指令处理
public class VoiceController
{
    private ContinuousRecognizer _recognizer;

    public VoiceController()
    {
        _recognizer = new ContinuousRecognizer();
        _recognizer.SpeechRecognized += (s, e) => 
        {
            if (e.Result.Text.Contains("紧急停止"))
            {
                Machine.Stop();
            }
        };
    }
}

参考代码 语音转文字连续识别的C#小程序 www.youwenfan.com/contentcnj/70643.html

六、常见问题解决

  1. 识别率低 添加领域特定词汇表 调整InitialSilenceTimeoutEndSilenceTimeout参数 使用SetProperty("Adaptation", "True")启用自适应训练

  2. 内存泄漏 实现对象池模式管理识别引擎 限制同时处理的音频流数量 定期调用GC.Collect()

  3. 多设备冲突

    // 显式指定音频设备
    var deviceEnumerator = new MMDeviceEnumerator();
    var inputDevice = deviceEnumerator.GetDefaultAudioEndpoint(DataFlow.Capture, Role.Communications);
    _engine.SetInputToDefaultAudioDevice(inputDevice);
    
posted @ 2025-10-20 08:57  u95900090  阅读(10)  评论(0)    收藏  举报