import uuid
import threading
import re
import os
import tempfile
import soundfile as sf
from flask import Blueprint, jsonify, send_file, current_app
from pathlib import Path
from pptx import Presentation
from pptx.util import Inches
from werkzeug.utils import secure_filename
from F5_TTS.src.f5_tts.infer.utils_infer import (
    preprocess_ref_audio_text,
    infer_process,
    remove_silence_for_generated_wav
)

video_bp = Blueprint('video_bp', __name__)

video_tasks = {}

#@video_bp.route('/upload', methods=['POST'])
def video_upload(ppt_file,script_file,F5TTS_ema_model,vocoder,ref_audio_path):
    task_id = str(uuid.uuid4())
    video_tasks[task_id] = {
        'status': 'uploading',
        'progress': 0,
        'message': 'Uploading files...',
        'files': {}
    }

    # 创建任务目录
    save_dir = Path(current_app.config['VIDEO_UPLOAD_FOLDER']) / task_id
    save_dir.mkdir(parents=True, exist_ok=True)

    # 处理PPT文件
    if ppt_file.filename != '':
        ppt_filename = secure_filename(ppt_file.filename)
        ppt_path = save_dir / ppt_filename
        ppt_file.save(ppt_path)
        video_tasks[task_id]['files']['ppt'] = str(ppt_path)

    # 处理讲解脚本

    if script_file.filename != '':
        script_filename = secure_filename(script_file.filename)
        script_path = save_dir / script_filename
        script_file.save(script_path)
        video_tasks[task_id]['files']['script'] = str(script_path)

    # 更新任务状态
    video_tasks[task_id].update({
        'status': 'processing',
        'progress': 20,
        'message': 'Processing files...'
    })
    #ref_audio_path = "data/voice/"+ request.form['ref_audio_name']
    # 开启后台线程处理任务
    app_obj = current_app._get_current_object()
    threading.Thread(target=process_video_task, args=(app_obj, task_id, F5TTS_ema_model,vocoder,ref_audio_path)).start()

    return task_id

def process_video_task(app, task_id,F5TTS_ema_model,vocoder,ref_audio_path):
    try:
        with app.app_context():
            task = video_tasks[task_id]
            update_video_task(task_id, 30, 'Validating files...')

            ppt_path = Path(task['files'].get('ppt', ''))
            script_path = Path(task['files'].get('script', ''))

            if not ppt_path.exists():
                raise FileNotFoundError("PPT file not found")
            if not script_path.exists():
                raise FileNotFoundError("Script file not found")

            update_video_task(task_id, 40, 'Parsing script...')
            page_contents = parse_script(script_path)

            update_video_task(task_id, 60, 'Generating audio...')
            audio_dir = ppt_path.parent / "audio"
            audio_files = generate_audio_files(page_contents, audio_dir,F5TTS_ema_model,vocoder,ref_audio_path)

            update_video_task(task_id, 80, 'Inserting audio...')
            output_path = ppt_path.parent / f"audio_{ppt_path.name}"
            insert_audio_to_ppt(ppt_path, audio_files, output_path)

            video_tasks[task_id].update({
                'status': 'completed',
                'progress': 100,
                'result_path': str(output_path),
                'message': 'Processing complete'
            })
    except Exception as e:
        video_tasks[task_id].update({
            'status': 'failed',
            'progress': 100,
            'message': f'Error: {str(e)}'
        })

def update_video_task(task_id, progress, message):
    video_tasks[task_id].update({
        'progress': progress,
        'message': message
    })

def parse_script(txt_path):
    with open(txt_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    page_contents = {}
    current_page = None
    page_pattern = re.compile(r"第\s*(\d+)\s*页")

    for line in lines:
        line = line.strip()
        match = page_pattern.match(line)
        if match:
            current_page = int(match.group(1))
            page_contents[current_page] = ""
        elif current_page is not None:
            page_contents[current_page] += line + " "

    return page_contents

def convert_to_chinese_date(text):
    """Convert dates and numbers in the text to Chinese format."""
    num_map = {"0": "零", "1": "一", "2": "二", "3": "三", "4": "四",
               "5": "五", "6": "六", "7": "七", "8": "八", "9": "九"}
   
    def number_to_chinese(match):
        number = match.group()
        if len(number) == 1:  # 单个数字
            return num_map[number]
        elif len(number) == 2:  # 两位数
            if number.startswith("1"):  # 特殊处理10-19
                return "十" + (num_map[number[1]] if number[1] != "0" else "")
            else:
                return num_map[number[0]] + "十" + (num_map[number[1]] if number[1] != "0" else "")
        else:
            return "".join(num_map[digit] for digit in number)  # 处理三位及以上的数字

    # 将日期格式(如12月、10日)处理为中文读法
    text = re.sub(r'\d+', number_to_chinese, text)
    return text

def generate_audio_files(page_contents, output_dir,F5TTS_ema_model,vocoder,ref_audio_path):
    output_dir.mkdir(exist_ok=True)
    audio_files = {}

    for page, text in page_contents.items():
        audio_path = output_dir / f"page_{page}.mp3"
        try:
            gen_text = convert_to_chinese_date(text)

            ref_audio_data, ref_text = preprocess_ref_audio_text(ref_audio_path, '')

            final_wave, final_sample_rate, _ = infer_process(
                ref_audio_data,
                ref_text,
                gen_text,
                F5TTS_ema_model,
                vocoder,
                cross_fade_duration=0.15,
                speed=1.0,
            )

            # Remove silences from generated audio
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_generated_audio:
                sf.write(temp_generated_audio.name, final_wave, final_sample_rate)
                remove_silence_for_generated_wav(temp_generated_audio.name)
                final_wave, _ = sf.read(temp_generated_audio.name)
                sf.write(audio_path, final_wave, final_sample_rate)
           
            audio_files[page] = audio_path

        except Exception as e:
            raise RuntimeError(f"Audio generation failed (Page {page}): {str(e)}")

    return audio_files

def insert_audio_to_ppt(ppt_path, audio_files, output_path):
    prs = Presentation(ppt_path)

    for page, audio_path in audio_files.items():
        if page <= len(prs.slides):
            slide = prs.slides[page-1]

            # 添加音频图标
            left = Inches(1)
            top = Inches(3)
            width = Inches(1)
            height = Inches(1)

            audio_shape = slide.shapes.add_movie(
                str(audio_path),
                left, top, width, height,
                poster_frame_image=None,
                mime_type='audio/mp3'
            )

            # 设置自动播放
            try:
                movie_element = audio_shape._element.find(".//p:movie")
                if movie_element is not None:
                    movie_element.set("playMode", "auto")
            except Exception as e:
                current_app.logger.warning(f"Auto-play setup failed: {str(e)}")

    prs.save(output_path)

@video_bp.route('/status/<task_id>')
def video_status(task_id):
    return jsonify(video_tasks.get(task_id, {'status': 'not_found'}))

@video_bp.route('/download/<task_id>')
def video_download(task_id):
    task = video_tasks.get(task_id)
    if not task or task['status'] != 'completed':
        return jsonify({'error': 'File not available'}), 404

    file_path = os.path.join("D:/Project/VisualStudioCode/EduVoice/",str(task['result_path']).replace('\\','/'))
    return send_file(file_path,as_attachment=True)
posted on 2025-04-30 17:18  leapss  阅读(16)  评论(0)    收藏  举报