Python实现语音转文字功能
import os
import requests
import urllib
import calendar
import time
import datetime
from moviepy.editor import AudioFileClip
from pydub import AudioSegment
from aip import AipSpeech
class DouYin:
def __init__(self):
self._headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0;'
' Nexus 5 Build/MRA58N)'
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/66.0.3359.181 Mobile Safari/537.36',
}
# 音频文件
def transform(self,videoName):
audioName = datetime.datetime.now()
audioName = audioName.strftime('%m%d%H%M%S') + '.wav'
audio = AudioFileClip(os.getcwd() + "/" +videoName)
audio.write_audiofile(audioName)
#语音分割
self.speech_seg(audioName)
#语音分割
def speech_seg(self,filename):
txtFileName = datetime.datetime.now()
txtFileName = txtFileName.strftime('%m%d%H%M%S') + '.txt'
txt_path = os.getcwd() + '/' + txtFileName
if os.path.exists(txt_path):
os.remove(txt_path) # 如果存在即删除文件
#长语音分割为59s语音区间
sound = AudioSegment.from_wav(filename) #音频文件读取
seconds_of_file = sound.duration_seconds #音频长度
seconds_per_split_file = 59 #设定每段59s
if seconds_of_file % int(seconds_per_split_file) == 0:
times = int(seconds_of_file / int(seconds_per_split_file)) # 语音长度能被59整除
else:
times = int(seconds_of_file // int(seconds_per_split_file) + 1) # 非整除
print(f'{filename}可切割 {times} 次') #输出该语音能被切割几次
start_time = 0
internal = seconds_per_split_file * 1000
end_time = seconds_per_split_file * 1000 #语音结束时间点即59s
#各分割语音的文本所含字数列表
length_list=[]
for i in range(times):
if i + 1 == times: # 最后一次切割
part = sound[start_time:]
else:
part = sound[start_time:end_time]
data_split_filename = os.path.join('' + str(i) + '.wav') # audios_try文件夹用来临时存放分割后的语音文件
part.export(data_split_filename, format="wav") # 先导入该文件
wav_version = AudioSegment.from_wav(data_split_filename) # 再读取分割好的文件
mono = wav_version.set_frame_rate(16000).set_channels(1) # 设置声道和采样率
mono.export(data_split_filename, format='wav', codec='pcm_s16le') # 存储设置后的音频文件
text = self.speech_recognize(data_split_filename) #语音转文字
length_list.append(len(text))
with open(txt_path,'a') as ff: #识别的文字追加写入
ff.write(text)
ff.write('\n') #换行
print(f' {str(i)}.wav语音转换成功,开始删除')
os.remove(data_split_filename) #删除音频文件
start_time += internal
end_time += internal
time.sleep(0.5)
#调用接口,实现语音识别
def speech_recognize(self,seg_filename):
#对应参数输入
APP_ID = '57997766'
API_KEY = '百度语音识别APP_KEY'
SECRET_KEY = '百度语音识别SECRET_KEY'
aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY) #传入参数
with open(seg_filename, 'rb') as fp:
audioPcm = fp.read()
json = aipSpeech.asr(audioPcm, 'wav', 16000, {'dev_pid': 1537})
if 'success' in json['err_msg']:
context = json['result'][0] #转换成功的文本
else:
context = '=====识别失败====='
print('识别失败!')
return context
def run(self):
print(f'============{self.id}共{len(self.audio_list)}个文件==============')
for i in self.audio_list:
try:
self.speech_seg(i)
print(f'-----{i}-----分析完成')
except:
print(f'-----{i}-----分析出现问题')
pass
if __name__ == "__main__":
DY = DouYin()
DY.transform("1.mp4")
作者:Aleen Pan
出处:http://panxiangfu.cnblogs.com/
如果您觉得本文对您的学习有所帮助,可通过“微信”或“支付宝”打赏博主,或者点击页面右下角【好文要顶】支持博主。



浙公网安备 33010602011771号