CosyVoice2 部署调试记录

方案1:conda

conda create -n cosyvoice python=3.10
conda activate cosyvoice
# pynini is required by WeTextProcessing, use conda to install it as it can be executed on all platform.
conda install -y -c conda-forge pynini==2.1.5
pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com

# If you encounter sox compatibility issues
# ubuntu
sudo apt-get install sox libsox-dev
# centos
sudo yum install sox sox-devel

 

方案2:

python3.10 

 

注意:

不要安装ttsfrd .

代码自动使用  WeTextProcessing  代替

 

调用代码:

import sys
sys.path.append('/mnt/workspace/CosyVoice')
sys.path.append('/mnt/workspace/CosyVoice/third_party/Matcha-TTS')
from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
from cosyvoice.utils.file_utils import load_wav
import torchaudio
import torch

torch.cuda.set_per_process_memory_fraction(0.8)


cosyvoice = CosyVoice2('pretrained_models/CosyVoice2-0.5B', load_jit=False, load_trt=False, fp16=False)

prompt_speech_16k = load_wav('wav/gu.wav', 16000)

# fine grained control, for supported control, check cosyvoice/tokenizer/tokenizer.py#L248
for i, j in enumerate(cosyvoice.inference_cross_lingual('当人有自由思考的机会,创造力会出现,而在这个星球,每个人都应该是自由思考的。不幸的是,它并不存在。我们在
匣子里思考,我们在有限的范围内思[ 1]考,我们需要从这里走出去。我们必须自由的思考,让我们的思想从狭隘的意识形态里解放。', prompt_speech_16k, stream=False,speed=0.9 )):
    torchaudio.save('./wav/ret_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)


'''
prompt_speech_16k = load_wav('asset/zero_shot_prompt.wav', 16000)
for i, j in enumerate(cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '希望
你以后能够做的比我还好呦。', prompt_speech_16k, stream=False)):
    torchaudio.save('zero_shot_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)

# fine grained control, for supported control, check cosyvoice/tokenizer/tokenizer.py#L248
for i, j in enumerate(cosyvoice.inference_cross_lingual('在他讲述那个荒诞故事的过程中,他突然[laughter]停下来,因为他自己也被逗笑了[laughter]。', prompt_speech_16k, stream=False)):
    torchaudio.save('fine_grained_control_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)

# instruct usage
for i, j in enumerate(cosyvoice.inference_instruct2('收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。', '用四
川话说这句话', prompt_speech_16k, stream=False)):
    torchaudio.save('instruct_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)

'''

~                                                                                                                                                                
~                                                                                                                                                                
~                                                                                                                                                                
~                                               

  

posted on 2025-02-04 14:40  asp114  阅读(1083)  评论(0)    收藏  举报

导航