语音问答助手和生成熟肉视频
语音问答助手
from multiprocessing import Process
from threading import Thread
import os
import json
import whisper
import zhconv
from pyaudio import PyAudio,paInt16
import wave
from pydub import AudioSegment
from pydub.playback import play
import sys
import time
import numpy as np
import subprocess
import openai
script_dir=os.path.dirname(os.path.realpath(sys.argv[0]))
def transcribe(file):
print(f'transcribing {file}')
model=whisper.load_model('small')
print('whisper model loaded')
result=model.transcribe(file,language='Chinese')
print(result)
with open(f'{file.rsplit(".",1)[0]}.json','w',encoding='utf8') as f:
json.dump(result,f,ensure_ascii=False,indent=4)
class recorder:
NUM_SAMPLES=2000
SAMPLING_RATE=16000
voice_string=[]
does=False
def start(self):
print('recording audio...')
self.does=True
self.voice_string=[]
pa=PyAudio()
stream=pa.open(format=paInt16,channels=1,rate=self.SAMPLING_RATE,input=True,frames_per_buffer=self.NUM_SAMPLES)
save_buffer=[]
while self.does:
string_audio_data=stream.read(self.NUM_SAMPLES)
self.voice_string.append(string_audio_data)
def finish(self):
print('recording audio end')
self.does=False
wav_id=time.strftime('%y_%m_%d_%H%M%S')
if not os.path.exists(wav_id[:8].replace('_','/')):
os.makedirs(wav_id[:8].replace('_','/'))
wf=wave.open(f'{wav_id.replace("_","/")}.wav','wb')
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(self.SAMPLING_RATE)
wf.writeframes(np.array(self.voice_string).tobytes())
wf.close()
wav=wave.open(f'{wav_id.replace("_","/")}.wav','rb')
wav_duration=wav.getnframes()/16000
wav.close()
print('wav_id',wav_id)
return wav_id,wav_duration
rec=recorder()
def start_minicpm_service():
os.chdir('C:/Users/tellw/apps/python')
subprocess.run('python run_minicpm_service.py',shell=True)
def answer2(result,client):
completion=client.chat.completions.create(model='Model-7.6B-Q4_0_openbmb_MiniCPM-o-2_6-gguf',messages=[{'role':'system','content':'You are a helpful assistant.'},{'role':'user','content':result}],frequency_penalty=0.2)
return completion.choices[0].message.content
def synthesize_answer_and_play(answer,wav_id):
os.chdir('D:/asr-service/VITS-Paimon')
print('synthesizing audios...')
subprocess.run(f'python custom_synthesize_shell.py {answer.replace(" ","")} {script_dir}/{wav_id.replace("_","/")}_answer')
os.chdir(script_dir)
print('start to play')
song=AudioSegment.from_wav(f'{wav_id.replace("_","/")}_answer.wav')
play(song)
print('play end')
def exec_shell(cmd, ignore_err=False):
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = process.communicate()
retcode = process.poll()
if retcode == 0 or ignore_err:
return output, err
else:
return -1000, f'execute "{cmd}" failed'
if __name__=='__main__':
while True:
input('录音?')
record_thread=Thread(target=rec.start,args=())
record_thread.daemon=True
record_thread.start()
input('停止?')
st=time.time()
wav_id,wav_duration=rec.finish()
p1=Process(target=transcribe,args=(f'{wav_id.replace("_","/")}.wav',))
p1.start()
p1.join()
res,_=exec_shell('netstat -ano|grep 8080|grep -i listen',True)
if len(res.decode())==0:
gpt_running=False
else:
print('chatgpt服务正在运行')
gpt_running=True
p=None
if not gpt_running:
p=Process(target=start_minicpm_service,args=())
p.daemon=True
p.start()
time.sleep(60)
with open(f'{wav_id.replace("_","/")}.json','r',encoding='utf8') as f:
result=json.load(f)
client=openai.OpenAI(base_url='http://127.0.0.1:8080/v1',api_key='1')
result=zhconv.convert(result['text'],'zh-hans')
print(f'responcing to {result}')
answer=answer2(result,client)
print('answer',answer)
if not gpt_running:
output,_=exec_shell('netstat -ano|grep 8080|grep -i listen')
pid=int(output.decode().strip().split('\n')[0].strip().split(' ')[-1])
print('pid',pid)
os.kill(pid,9)
print('结束chatgpt服务')
p2=Process(target=synthesize_answer_and_play,args=(answer,wav_id))
p2.start()
p2.join()
with open('audio-robot-logs.txt','a',encoding='utf8') as f:
f.write(f'{wav_id}\n{result}<SPLIT>\n{answer}<SPLIT>\n')
et=time.time()
print(f'this query costs {et-st}s')
生成熟肉视频
from multiprocessing import Process
import os
import subprocess
import sys
import psutil
import time
import whisper
import json
import openai
script_dir=os.path.dirname(os.path.realpath(sys.argv[0]))
def transcribe(file):
print(f'transcribing {file}')
subprocess.run(f'ffmpeg -i "{file}" test.wav -y',shell=True)
model=whisper.load_model('small')
print('whisper model loaded')
result=model.transcribe('test.wav',language='Japanese')
print(result)
with open(f'{file.rsplit(".",1)[0]}.json','w',encoding='utf8') as f:
json.dump(result,f,ensure_ascii=False,indent=4)
def start_jp2cn_service():
os.chdir('C:/Users/tellw/apps/python')
subprocess.run('python run_jp2zh_service.py',shell=True)
def translate_jp2zh(jp,client):
try:
completion=client.chat.completions.create(model='sakura-1.5b-qwen2.5-v1.0-fp16',messages=[{'role':'system','content':'You are a helpful assistant.'},{'role':'user','content':jp}],frequency_penalty=0.2,timeout=20)
except Exception as e:
print(f'{e}')
return jp
if completion.choices[0].message.content[:-1]=='。':
return completion.choices[0].message.content[:-1]
else:
return completion.choices[0].message.content
def srt_time(t):
h=int(t//3600)
t%=3600
m=int(t//60)
t%=60
s=int(t)
return f'{h:02d}:{m:02d}:{s:02d},000'
def exec_shell(cmd, ignore_err=False):
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = process.communicate()
retcode = process.poll()
if retcode == 0 or ignore_err:
return output, err
else:
return -1000, f'execute "{cmd}" failed'
if __name__=='__main__':
wd=sys.argv[1]
os.chdir(wd)
for file in os.listdir():
if '.' not in file or file.rsplit('.',1)[1] not in ['mp4','mkv']:
continue
if not os.path.exists(f'{file.rsplit(".",1)[0]}.json'):
p1=Process(target=transcribe,args=(file,))
p1.start()
p1.join()
if not os.path.exists(f'{file.rsplit(".",1)[0]}.srt'):
p=Process(target=start_jp2cn_service,args=())
p.daemon=True
p.start()
time.sleep(30)
with open(f'{file.rsplit(".",1)[0]}.json','r',encoding='utf8') as f:
result=json.load(f)
srt_txt=''
client=openai.OpenAI(base_url='http://127.0.0.1:8080/v1',api_key='1')
rsl=len(result["segments"])
for i,segment in enumerate(result['segments']):
print(f'translating {i/rsl*100}% {i}/{rsl} {segment["text"]} {time.strftime("%y%m%d%H%M%S")}')
nc=f'{segment["id"]+1}\n{srt_time(segment["start"])} --> {srt_time(segment["end"])}\n{translate_jp2zh(segment["text"],client)}\n\n'
print(nc)
srt_txt+=nc
with open(f'{file.rsplit(".",1)[0]}.srt','w',encoding='utf8') as f:
f.write(srt_txt)
output,_=exec_shell('netstat -ano|grep 8080|grep -i listen')
pid=int(output.decode().strip().split('\n')[0].strip().split(' ')[-1])
print(pid)
os.kill(pid,9)
print('结束日语翻译服务')
print(f'generating subtitle video--{file.rsplit(".",1)[0]}_subtitle.mp4')
subprocess.run(f'ffmpeg -i "{file}" -vf subtitles="{file.rsplit(".",1)[0]}.srt" "{file.rsplit(".",1)[0]}_subtitle.mp4" -y')
if len(sys.argv)>=3 and sys.argv[2]=='shutdown_y':
subprocess.run('shutdown -s -t 0',shell=True)
创建于2502051411,修改于2502051411