CosyVoice2 部署调试记录
方案1:conda
conda create -n cosyvoice python=3.10 conda activate cosyvoice # pynini is required by WeTextProcessing, use conda to install it as it can be executed on all platform. conda install -y -c conda-forge pynini==2.1.5 pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com # If you encounter sox compatibility issues # ubuntu sudo apt-get install sox libsox-dev # centos sudo yum install sox sox-devel
方案2:
python3.10
注意:
不要安装ttsfrd .
代码自动使用 WeTextProcessing 代替
调用代码:
import sys
sys.path.append('/mnt/workspace/CosyVoice')
sys.path.append('/mnt/workspace/CosyVoice/third_party/Matcha-TTS')
from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
from cosyvoice.utils.file_utils import load_wav
import torchaudio
import torch
torch.cuda.set_per_process_memory_fraction(0.8)
cosyvoice = CosyVoice2('pretrained_models/CosyVoice2-0.5B', load_jit=False, load_trt=False, fp16=False)
prompt_speech_16k = load_wav('wav/gu.wav', 16000)
# fine grained control, for supported control, check cosyvoice/tokenizer/tokenizer.py#L248
for i, j in enumerate(cosyvoice.inference_cross_lingual('当人有自由思考的机会,创造力会出现,而在这个星球,每个人都应该是自由思考的。不幸的是,它并不存在。我们在
匣子里思考,我们在有限的范围内思[ 1]考,我们需要从这里走出去。我们必须自由的思考,让我们的思想从狭隘的意识形态里解放。', prompt_speech_16k, stream=False,speed=0.9 )):
torchaudio.save('./wav/ret_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
'''
prompt_speech_16k = load_wav('asset/zero_shot_prompt.wav', 16000)
for i, j in enumerate(cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望
你以后能够做的比我还好呦。', prompt_speech_16k, stream=False)):
torchaudio.save('zero_shot_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
# fine grained control, for supported control, check cosyvoice/tokenizer/tokenizer.py#L248
for i, j in enumerate(cosyvoice.inference_cross_lingual('在他讲述那个荒诞故事的过程中,他突然[laughter]停下来,因为他自己也被逗笑了[laughter]。', prompt_speech_16k, stream=False)):
torchaudio.save('fine_grained_control_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
# instruct usage
for i, j in enumerate(cosyvoice.inference_instruct2('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '用四
川话说这句话', prompt_speech_16k, stream=False)):
torchaudio.save('instruct_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
'''
~
~
~
~
创业?
浙公网安备 33010602011771号