语音切割
# 设置分句的标志符号;可以根据实际需要进行修改
# cutlist = "。!?".decode('utf-8')
cutlist = ['\n', '\t', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']
# 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
def FindToken(cutlist, char):
if char in cutlist:
return True
else:
return False
# 进行分句的核心函数
def Cut(cutlist, lines): # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
l = [] # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
line = [] # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空
for i in lines: # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
if FindToken(cutlist, i): # 如果当前字符是分句符号
line.append(i) # 将此字符放入临时列表中
l.append(''.join(line)) # 并把当前临时列表的内容加入到句子列表中
line = [] # 将符号列表清空,以便下次分句使用
else: # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
line.append(i)
return l
r_s = []
# 以下为调用上述函数实现从文本文件中读取内容并进行分句。
# with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
# for lines in fr:
# l = Cut(list(cutlist), list(lines))
# for line in l:
# if len(line.replace(' ', '')) == 0:
# continue
# if line.strip() != "":
# line=line.strip()
# r_s.append(line)
#
# # li = line.strip().split()
# # for sentence in li:
# # r_s.append(sentence)
str_ = ''
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','\n']
with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
for lines in fr:
if len(lines.replace(' ', '')) == 0:
continue
# str_='{}{}'.format(str_,lines.replace('\n',''))
# if len(lines.replace(' ','').replace('\n',''))==0:
# continue
str_ = '{}{}'.format(str_, lines)
# l = Cut(list(cutlist), list(lines))
# for line in l:
# if line.strip() != "":
# line=line.strip()
from aip import AipSpeech
bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
APP_ID, API_KEY, SECRET_KEY = bd_k_l
import math
bd_str_per_limit=1024
rep_times=math.ceil(len(str_)/bd_str_per_limit)
for i in range(rep_times):
cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]
mp3_dir = 'C:\\Users\\sas\\PycharmProjects\\produce_video\\result_liukeyun\\'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
result = client.synthesis(str_, 'zh', 1, {
'vol': 5,
})
uid = 'liukeyuanCAKE_whole_para'
# 识别正确返回语音二进制 错误则返回dict 参照下面错误码
f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')
if not isinstance(result, dict):
# f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
f_w = '{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','bd_str_per_limit',i, '.mp3')
# ,'g3db',uid,'g3uid'
# with open('auido.b.mp3', 'wb') as f:
with open(f_w, 'wb') as f:
f.write(result)
import os
os._exit(2)
换行符影响
# 设置分句的标志符号;可以根据实际需要进行修改
# cutlist = "。!?".decode('utf-8')
cutlist = ['\n', '\t', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']
# 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
def FindToken(cutlist, char):
if char in cutlist:
return True
else:
return False
# 进行分句的核心函数
def Cut(cutlist, lines): # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
l = [] # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
line = [] # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空
for i in lines: # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
if FindToken(cutlist, i): # 如果当前字符是分句符号
line.append(i) # 将此字符放入临时列表中
l.append(''.join(line)) # 并把当前临时列表的内容加入到句子列表中
line = [] # 将符号列表清空,以便下次分句使用
else: # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
line.append(i)
return l
r_s = []
# 以下为调用上述函数实现从文本文件中读取内容并进行分句。
# with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
# for lines in fr:
# l = Cut(list(cutlist), list(lines))
# for line in l:
# if len(line.replace(' ', '')) == 0:
# continue
# if line.strip() != "":
# line=line.strip()
# r_s.append(line)
#
# # li = line.strip().split()
# # for sentence in li:
# # r_s.append(sentence)
str_ = ''
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','\n']
with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
for lines in fr:
if len(lines.replace(' ', '')) == 0:
continue
# str_='{}{}'.format(str_,lines.replace('\n',''))
# if len(lines.replace(' ','').replace('\n',''))==0:
# continue
str_ = '{}{}'.format(str_, lines.replace('\n',''))
# l = Cut(list(cutlist), list(lines))
# for line in l:
# if line.strip() != "":
# line=line.strip()
from aip import AipSpeech
bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
APP_ID, API_KEY, SECRET_KEY = bd_k_l
import math
bd_str_per_limit=1024
rep_times=math.ceil(len(str_)/bd_str_per_limit)
for i in range(rep_times):
cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]
print(cut_str)
print('----------------------------------')
mp3_dir = 'C:\\Users\\sas\\PycharmProjects\\produce_video\\result_liukeyun\\'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
result = client.synthesis(cut_str, 'zh', 1, {
'vol': 5,
})
uid = 'liukeyuanCAKE_whole_para'
# 识别正确返回语音二进制 错误则返回dict 参照下面错误码
f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')
if not isinstance(result, dict):
# f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
f_w = '{}{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','bd_str_per_limit','_NO_trN_',i, '.mp3')
# ,'g3db',uid,'g3uid'
# with open('auido.b.mp3', 'wb') as f:
with open(f_w, 'wb') as f:
f.write(result)
import os
os._exit(2)
# 设置分句的标志符号;可以根据实际需要进行修改
# cutlist = "。!?".decode('utf-8')
cutlist = ['\n', '\t', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']
# 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
def FindToken(cutlist, char):
if char in cutlist:
return True
else:
return False
# 进行分句的核心函数
def Cut(cutlist, lines): # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
l = [] # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
line = [] # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空
for i in lines: # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
if FindToken(cutlist, i): # 如果当前字符是分句符号
line.append(i) # 将此字符放入临时列表中
l.append(''.join(line)) # 并把当前临时列表的内容加入到句子列表中
line = [] # 将符号列表清空,以便下次分句使用
else: # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
line.append(i)
return l
r_s = []
# 以下为调用上述函数实现从文本文件中读取内容并进行分句。
# with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
# for lines in fr:
# l = Cut(list(cutlist), list(lines))
# for line in l:
# if len(line.replace(' ', '')) == 0:
# continue
# if line.strip() != "":
# line=line.strip()
# r_s.append(line)
#
# # li = line.strip().split()
# # for sentence in li:
# # r_s.append(sentence)
str_ = ''
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','\n']
with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
for lines in fr:
# if len(lines.replace(' ', '')) == 0:
# continue
# str_='{}{}'.format(str_,lines.replace('\n',''))
if len(lines.replace(' ','').replace('\n',''))==0:
continue
str_ = '{}{}'.format(str_, lines.replace('\n',''))
# l = Cut(list(cutlist), list(lines))
# for line in l:
# if line.strip() != "":
# line=line.strip()
from aip import AipSpeech
bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
APP_ID, API_KEY, SECRET_KEY = bd_k_l
import math
#bd_str_per_limit=1024
bd_str_per_limit=300
rep_times=math.ceil(len(str_)/bd_str_per_limit)
for i in range(rep_times):
cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]
print(cut_str)
print('----------------------------------')
mp3_dir = 'C:\\Users\\sas\\PycharmProjects\\produce_video\\result_liukeyun\\'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
result = client.synthesis(cut_str, 'zh', 1, {
'vol': 5,
})
uid = 'CAKE'
# 识别正确返回语音二进制 错误则返回dict 参照下面错误码
f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')
if not isinstance(result, dict):
# f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
f_w = '{}{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','noBRBlankLine','',i, '.mp3')
# ,'g3db',uid,'g3uid'
# with open('auido.b.mp3', 'wb') as f:
with open(f_w, 'wb') as f:
f.write(result)
import os
os._exit(2)

浙公网安备 33010602011771号