TTS 合成语音
1. python脚本:
import sys
import os
import logging
import torch
import re
from TTS.utils.radam import RAdam
from collections import defaultdict
from TTS.api import TTS
from pydub import AudioSegment
from pydub.effects import normalize
import io
# 设置系统编码
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# 安全序列化全局配置
torch.serialization.add_safe_globals([RAdam, defaultdict, dict])
class HybridTTS:
def __init__(self):
self.setup_logging()
self.setup_config()
def setup_logging(self):
# 创建支持Unicode的日志处理器
class UnicodeStreamHandler(logging.StreamHandler):
def emit(self, record):
try:
msg = self.format(record)
stream = self.stream
stream.write(msg + self.terminator)
self.flush()
except UnicodeEncodeError:
# 如果遇到编码错误,尝试使用UTF-8编码
msg = self.format(record).encode('utf-8').decode('utf-8', 'replace')
stream.write(msg + self.terminator)
self.flush()
except Exception:
self.handleError(record)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("tts_hybrid.log", encoding='utf-8'),
UnicodeStreamHandler()
]
)
def setup_config(self):
self.config = {
# 模式切换阈值
"max_direct_length": 120,
"min_chunk_length": 20,
# 分块参数
"chunk_size": 50,
"crossfade_ms": 50,
# 模型参数
"max_decoder_steps": 5000,
"gate_threshold": 0.6,
# 音频参数
"sample_rate": 22050,
"bitrate": "192k",
# 特殊车次处理
"train_number_map": {
'G': '高', 'D': '动', 'C': '城',
'K': '快', 'T': '特', 'Z': '直'
}
}
def convert_train_numbers(self, text):
"""专门处理列车车次"""
# 匹配格式:字母+数字+次(可选)
def replace_match(match):
letter = match.group(1)
number = match.group(2)
suffix = match.group(3) or ''
return f"{self.config['train_number_map'].get(letter, letter)}{number}{suffix}"
return re.sub(
r'([A-Z])(\d+)(次)?',
replace_match,
text
)
def clean_text(self, text):
"""增强版文本清洗"""
# 先处理列车车次
text = self.convert_train_numbers(text)
# 常规清洗
pattern = re.compile(r'[^\u4e00-\u9fa5,。!?、;:"\'()《》【】\s0-9年月日点分秒]')
cleaned = pattern.sub('', text)
# 多音字处理
cleaned = cleaned.replace("同行", "同xing")
cleaned = cleaned.replace("长时间", "常时间")
# 确保最后一个字符是标点符号
if not cleaned or cleaned[-1] not in ',。!?':
cleaned += '。'
return cleaned
def should_chunk(self, text):
"""智能判断是否需要分块"""
return (len(text) > self.config["max_direct_length"] or
'\n\n' in text or
text.count('。') > 3)
def split_text(
