import uuid
import threading
import re
import os
import tempfile
import soundfile as sf
from flask import Blueprint, jsonify, send_file, current_app
from pathlib import Path
from pptx import Presentation
from pptx.util import Inches
from werkzeug.utils import secure_filename
from F5_TTS.src.f5_tts.infer.utils_infer import (
preprocess_ref_audio_text,
infer_process,
remove_silence_for_generated_wav
)
video_bp = Blueprint('video_bp', __name__)
video_tasks = {}
#@video_bp.route('/upload', methods=['POST'])
def video_upload(ppt_file,script_file,F5TTS_ema_model,vocoder,ref_audio_path):
task_id = str(uuid.uuid4())
video_tasks[task_id] = {
'status': 'uploading',
'progress': 0,
'message': 'Uploading files...',
'files': {}
}
# 创建任务目录
save_dir = Path(current_app.config['VIDEO_UPLOAD_FOLDER']) / task_id
save_dir.mkdir(parents=True, exist_ok=True)
# 处理PPT文件
if ppt_file.filename != '':
ppt_filename = secure_filename(ppt_file.filename)
ppt_path = save_dir / ppt_filename
ppt_file.save(ppt_path)
video_tasks[task_id]['files']['ppt'] = str(ppt_path)
# 处理讲解脚本
if script_file.filename != '':
script_filename = secure_filename(script_file.filename)
script_path = save_dir / script_filename
script_file.save(script_path)
video_tasks[task_id]['files']['script'] = str(script_path)
# 更新任务状态
video_tasks[task_id].update({
'status': 'processing',
'progress': 20,
'message': 'Processing files...'
})
#ref_audio_path = "data/voice/"+ request.form['ref_audio_name']
# 开启后台线程处理任务
app_obj = current_app._get_current_object()
threading.Thread(target=process_video_task, args=(app_obj, task_id, F5TTS_ema_model,vocoder,ref_audio_path)).start()
return task_id
def process_video_task(app, task_id,F5TTS_ema_model,vocoder,ref_audio_path):
try:
with app.app_context():
task = video_tasks[task_id]
update_video_task(task_id, 30, 'Validating files...')
ppt_path = Path(task['files'].get('ppt', ''))
script_path = Path(task['files'].get('script', ''))
if not ppt_path.exists():
raise FileNotFoundError("PPT file not found")
if not script_path.exists():
raise FileNotFoundError("Script file not found")
update_video_task(task_id, 40, 'Parsing script...')
page_contents = parse_script(script_path)
update_video_task(task_id, 60, 'Generating audio...')
audio_dir = ppt_path.parent / "audio"
audio_files = generate_audio_files(page_contents, audio_dir,F5TTS_ema_model,vocoder,ref_audio_path)
update_video_task(task_id, 80, 'Inserting audio...')
output_path = ppt_path.parent / f"audio_{ppt_path.name}"
insert_audio_to_ppt(ppt_path, audio_files, output_path)
video_tasks[task_id].update({
'status': 'completed',
'progress': 100,
'result_path': str(output_path),
'message': 'Processing complete'
})
except Exception as e:
video_tasks[task_id].update({
'status': 'failed',
'progress': 100,
'message': f'Error: {str(e)}'
})
def update_video_task(task_id, progress, message):
video_tasks[task_id].update({
'progress': progress,
'message': message
})
def parse_script(txt_path):
with open(txt_path, "r", encoding="utf-8") as f:
lines = f.readlines()
page_contents = {}
current_page = None
page_pattern = re.compile(r"第\s*(\d+)\s*页")
for line in lines:
line = line.strip()
match = page_pattern.match(line)
if match:
current_page = int(match.group(1))
page_contents[current_page] = ""
elif current_page is not None:
page_contents[current_page] += line + " "
return page_contents
def convert_to_chinese_date(text):
"""Convert dates and numbers in the text to Chinese format."""
num_map = {"0": "零", "1": "一", "2": "二", "3": "三", "4": "四",
"5": "五", "6": "六", "7": "七", "8": "八", "9": "九"}
def number_to_chinese(match):
number = match.group()
if len(number) == 1: # 单个数字
return num_map[number]
elif len(number) == 2: # 两位数
if number.startswith("1"): # 特殊处理10-19
return "十" + (num_map[number[1]] if number[1] != "0" else "")
else:
return num_map[number[0]] + "十" + (num_map[number[1]] if number[1] != "0" else "")
else:
return "".join(num_map[digit] for digit in number) # 处理三位及以上的数字
# 将日期格式(如12月、10日)处理为中文读法
text = re.sub(r'\d+', number_to_chinese, text)
return text
def generate_audio_files(page_contents, output_dir,F5TTS_ema_model,vocoder,ref_audio_path):
output_dir.mkdir(exist_ok=True)
audio_files = {}
for page, text in page_contents.items():
audio_path = output_dir / f"page_{page}.mp3"
try:
gen_text = convert_to_chinese_date(text)
ref_audio_data, ref_text = preprocess_ref_audio_text(ref_audio_path, '')
final_wave, final_sample_rate, _ = infer_process(
ref_audio_data,
ref_text,
gen_text,
F5TTS_ema_model,
vocoder,
cross_fade_duration=0.15,
speed=1.0,
)
# Remove silences from generated audio
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_generated_audio:
sf.write(temp_generated_audio.name, final_wave, final_sample_rate)
remove_silence_for_generated_wav(temp_generated_audio.name)
final_wave, _ = sf.read(temp_generated_audio.name)
sf.write(audio_path, final_wave, final_sample_rate)
audio_files[page] = audio_path
except Exception as e:
raise RuntimeError(f"Audio generation failed (Page {page}): {str(e)}")
return audio_files
def insert_audio_to_ppt(ppt_path, audio_files, output_path):
prs = Presentation(ppt_path)
for page, audio_path in audio_files.items():
if page <= len(prs.slides):
slide = prs.slides[page-1]
# 添加音频图标
left = Inches(1)
top = Inches(3)
width = Inches(1)
height = Inches(1)
audio_shape = slide.shapes.add_movie(
str(audio_path),
left, top, width, height,
poster_frame_image=None,
mime_type='audio/mp3'
)
# 设置自动播放
try:
movie_element = audio_shape._element.find(".//p:movie")
if movie_element is not None:
movie_element.set("playMode", "auto")
except Exception as e:
current_app.logger.warning(f"Auto-play setup failed: {str(e)}")
prs.save(output_path)
@video_bp.route('/status/<task_id>')
def video_status(task_id):
return jsonify(video_tasks.get(task_id, {'status': 'not_found'}))
@video_bp.route('/download/<task_id>')
def video_download(task_id):
task = video_tasks.get(task_id)
if not task or task['status'] != 'completed':
return jsonify({'error': 'File not available'}), 404
file_path = os.path.join("D:/Project/VisualStudioCode/EduVoice/",str(task['result_path']).replace('\\','/'))
return send_file(file_path,as_attachment=True)
浙公网安备 33010602011771号