import re
import ffmpeg
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
import io
import tempfile
import soundfile as sf
from cached_path import cached_path
import os
import sys
from pathlib import Path

f5_tts_src_path = str(Path(__file__).parent / "F5_TTS" / "src")
sys.path.append(f5_tts_src_path)

from Text.teacher import teach
from F5_TTS.src.f5_tts.infer.utils_infer import (
    preprocess_ref_audio_text,
    infer_process,
    remove_silence_for_generated_wav,
    load_vocoder, 
    load_model
)
from F5_TTS.src.f5_tts.model import DiT

from PPT.text_module import text_bp
from PPT.video_module import video_bp
from PPT.video_module import video_upload

app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})
app.register_blueprint(text_bp, url_prefix='/api/text')
app.register_blueprint(video_bp, url_prefix='/api/video')
app.config.update({
    'MAX_CONTENT_LENGTH': 100 * 1024 * 1024,
    'TEXT_UPLOAD_FOLDER': 'data/ppt/text',
    'VIDEO_UPLOAD_FOLDER': 'data/ppt/video'
})

BASE_DIR_PATH =str(Path(__file__).parent.parent)+"/"
AUDIO_FOLDER_PATH = "data/voice/"
PPT_FOLDER_PATH = "data/ppt/"

F5TTS_ema_model = None
vocoder = load_vocoder()

def load_f5tts_model():
    global F5TTS_ema_model
   
    if F5TTS_ema_model is None:
        MODEL_PATH=str(cached_path("hf://SWivid/F5-TTS/F5TTS_Base/model_1200000.safetensors"))
        F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
        F5TTS_ema_model = load_model(DiT, F5TTS_model_cfg, MODEL_PATH)

load_f5tts_model()

def convert_to_chinese_date(text):
    """Convert dates and numbers in the text to Chinese format."""
    num_map = {"0": "零", "1": "一", "2": "二", "3": "三", "4": "四",
               "5": "五", "6": "六", "7": "七", "8": "八", "9": "九"}
    
    def number_to_chinese(match):
        number = match.group()
        if len(number) == 1:  # 单个数字
            return num_map[number]
        elif len(number) == 2:  # 两位数
            if number.startswith("1"):  # 特殊处理10-19
                return "十" + (num_map[number[1]] if number[1] != "0" else "")
            else:
                return num_map[number[0]] + "十" + (num_map[number[1]] if number[1] != "0" else "")
        else:
            return "".join(num_map[digit] for digit in number)  # 处理三位及以上的数字

    # 将日期格式(如12月、10日)处理为中文读法
    text = re.sub(r'\d+', number_to_chinese, text)
    return text


@app.route('/generateAudio', methods=['POST'])
def synthesize():
    # Validate and parse input
    if 'gen_text' not in request.form:
        return jsonify({"error": "Missing required parameter: 'gen_text'"}), 400

    gen_text = request.form['gen_text']
    ref_text = request.form.get('ref_text', '')
    ref_audio_path = None

    if 'ref_audio' in request.files:
        # Save uploaded reference audio file to a temporary location
        ref_audio = request.files['ref_audio']
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
            ref_audio.save(temp_audio_file.name)
            ref_audio_path = temp_audio_file.name
    elif 'ref_audio_name' in request.form:
        # Use reference audio path provided in the form
        ref_audio_path =AUDIO_FOLDER_PATH + request.form['ref_audio_name']
        print(ref_audio_path)
        if not os.path.exists(ref_audio_path):
            return jsonify({"error": f"File not found: {ref_audio_path}"}), 400

    if not ref_audio_path:
        return jsonify({"error": "Missing required parameter: 'ref_audio' or 'ref_audio_path'"}), 400

    try:
        # Convert dates in gen_text to Chinese format
        gen_text = convert_to_chinese_date(gen_text)

        # Preprocess reference audio and text
        ref_audio_data, ref_text = preprocess_ref_audio_text(ref_audio_path, ref_text)

        # Synthesize speech
        final_wave, final_sample_rate, _ = infer_process(
            ref_audio_data,
            ref_text,
            gen_text,
            F5TTS_ema_model,
            vocoder,
            cross_fade_duration=0.15,
            speed=1.0,
        )

        # Remove silences from generated audio
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_generated_audio:
            sf.write(temp_generated_audio.name, final_wave, final_sample_rate)
            remove_silence_for_generated_wav(temp_generated_audio.name)
            final_wave, _ = sf.read(temp_generated_audio.name)

        # Convert synthesized audio to bytes
        audio_buffer = io.BytesIO()
        sf.write(audio_buffer, final_wave, final_sample_rate, format='WAV')
        audio_buffer.seek(0)

        return send_file(
            audio_buffer,
            as_attachment=True,
            download_name="synthesized_audio.wav",
            mimetype="audio/wav"
        )

    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/teacher',methods=['POST'])
def teacher():
    if 'gen_text' not in request.form:
        return jsonify({"error": "Missing required parameter: 'gen_text'"}), 400

    gen_text = request.form['gen_text']
    ref_text = request.form.get('ref_text', '')
    ref_audio_path = None

    if 'ref_audio_name' in request.form:
        ref_audio_path =AUDIO_FOLDER_PATH + request.form['ref_audio_name']
        print(ref_audio_path)
        if not os.path.exists(ref_audio_path):
            return jsonify({"error": f"File not found: {ref_audio_path}"}), 400

    try:
        gen_text = convert_to_chinese_date(gen_text)
        gen_text = teach(gen_text)

        ref_audio_data, ref_text = preprocess_ref_audio_text(ref_audio_path, ref_text)

        final_wave, final_sample_rate, _ = infer_process(
            ref_audio_data,
            ref_text,
            gen_text,
            F5TTS_ema_model,
            vocoder,
            cross_fade_duration=0.15,
            speed=1.0,
        )

        # Remove silences from generated audio
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_generated_audio:
            sf.write(temp_generated_audio.name, final_wave, final_sample_rate)
            remove_silence_for_generated_wav(temp_generated_audio.name)
            final_wave, _ = sf.read(temp_generated_audio.name)

        # Convert synthesized audio to bytes
        audio_buffer = io.BytesIO()
        sf.write(audio_buffer, final_wave, final_sample_rate, format='WAV')
        audio_buffer.seek(0)

        return send_file(
            audio_buffer,
            as_attachment=True,
            download_name="synthesized_audio.wav",
            mimetype="audio/wav"
        )

    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/voiceList',methods=['GET'])
def list_voice():
    try:
        # 获取文件夹中的所有文件并去掉文件扩展名
        voiceList = [os.path.splitext(f)[0] for f in os.listdir(AUDIO_FOLDER_PATH) if os.path.isfile(os.path.join(AUDIO_FOLDER_PATH, f))]
        return jsonify({"voiceList": voiceList})
    except Exception as e:
        return jsonify({"error": str(e)}), 500
    
@app.route('/getAudio', methods=['GET'])
def get_audio():
    try:
        file_name = request.args.get('fileName')  # 从请求参数获取音频文件名
        if not file_name:
            return jsonify({"error": "缺少 fileName 参数"}), 400

        # 构建完整的文件路径(假设文件扩展名为 mp3)
        audio_file_path = os.path.join(BASE_DIR_PATH,AUDIO_FOLDER_PATH, file_name + ".mp3")
        #audio_file_path = "D:\\Project\\VisualStudioCode\\EduVoice\\server\\data\\voice\\Trump.mp3"   
        return send_file(audio_file_path, mimetype="audio/mpeg", as_attachment=False)
    except Exception as e:
        print(str(e))
        return jsonify({"error": str(e)}), 500

@app.route('/cutAudio', methods=['POST'])
def cut_audio():
    try:
        data = request.get_json()
        file_name = data.get('fileName')
        start_time = data.get('startTime')
        end_time = data.get('endTime')

        # 构建音频文件的完整路径
        audio_file_path = os.path.join(AUDIO_FOLDER_PATH, file_name + ".mp3")
        output_file_path = os.path.join(AUDIO_FOLDER_PATH, f"cut_{file_name}.mp3")

        # 使用 FFmpeg 剪切音频
        ffmpeg.input(audio_file_path, ss=start_time, t=end_time-start_time).output(output_file_path).run()

        # 返回剪切后的音频文件URL
        return jsonify({
            'audioUrl': f"/audio/{file_name}_cut.mp3"
        })

    except Exception as e:
        return jsonify({"error": str(e)}), 500
    
@app.route('/reUploadAudio', methods=['POST'])
def re_upload_audio(): 
    try:
        if 'audio' not in request.files:
            return jsonify({"error": "未找到文件"}), 400

        audio = request.files['audio']

        # 检查文件是否有名称
        if audio.filename == '':
            return jsonify({"error": "文件名不能为空"}), 400

        # 只允许特定格式的音频文件
        allowed_extensions = {'mp3', 'wav', 'm4a'}
        if audio.filename.split('.')[-1].lower() not in allowed_extensions:
            return jsonify({"error": "不支持的文件格式"}), 400

        fileName = request.form.get('fileName', audio.filename)
      
        file_path = os.path.join(AUDIO_FOLDER_PATH, fileName)
        audio.save(file_path)

        return jsonify({
            "message": "文件上传成功"
        }), 200

    except Exception as e:
        return jsonify({"error": str(e)}), 500


@app.route("/addAudio", methods=["POST"])
def add_audio():
    try:
             
        audio_file = request.files['audioFile']
        file_name = request.form['fileName']
       
        wav_path = os.path.join(BASE_DIR_PATH,AUDIO_FOLDER_PATH, "temp/", file_name+'.wav')
        mp3_path = os.path.join(BASE_DIR_PATH,AUDIO_FOLDER_PATH, file_name+'.mp3')
        audio_file.save(wav_path)

        try:
            ffmpeg.input(wav_path).output(mp3_path, format="mp3", audio_bitrate="192k").run(overwrite_output=True)
            return jsonify({"message": "文件上传成功", "wav_path": wav_path, "mp3_path": mp3_path}), 200
        except Exception as e:
            return jsonify({"error": f"音频转换失败: {str(e)}"}), 500


    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route("/generateAudioCourseware",methods=["POST"])
def generate_audio_courseware():
    try:
        # 检查是否有文件被上传
        if 'ppt' not in request.files:
            return jsonify({"error": "未接收到PPT文件"}), 400
        
        ppt = request.files['ppt']
        
        # 检查文件扩展名(确保是PPT文件)
        allowed_extensions = {'ppt', 'pptx'}
        
        if '.' in ppt.filename:
            extension = ppt.filename.rsplit('.', 1)[1].lower()
            if extension not in allowed_extensions:
                return jsonify({"error": "仅支持PPT/PPTX格式文件"}), 400
        
        save_path = os.path.join(PPT_FOLDER_PATH, ppt.filename)
        ppt.save(save_path)
        voice = request.form['voice']
        print(voice)
        
        """
        # 避免文件名冲突(可选)
        counter = 1
        while os.path.exists(save_path):
            name, ext = os.path.splitext(filename)
            save_path = os.path.join(PPT_FOLDER, f"{name}_{counter}{ext}")
            counter += 1
        
        file.save(save_path)
        
        # 获取语音参数(如果有)
        voice = request.form.get('voice', 'default')
        
        # 这里可以添加PPT转音频的逻辑
        """
        return jsonify({
            "success": True,
            "message": "PPT上传成功",
        }), 200
    
    except Exception as e:
        return jsonify({"error": f"音频转换失败: {str(e)}"}), 500

@app.route("/api/video/upload",methods=['POST']) 
def audio_upload():
    #ref_audio_path =AUDIO_FOLDER_PATH + request.form['ref_audio_name']
    if 'ppt' in request.files:
        ppt_file = request.files['ppt']

    if 'script' in request.files:
        script_file = request.files['script']

    ref_audio_path =AUDIO_FOLDER_PATH+"标准.mp3"
    try:
        task_id = video_upload(ppt_file,script_file,F5TTS_ema_model,vocoder,ref_audio_path)
        return jsonify({"task_id": task_id})
    except Exception as e:
        return jsonify({"error": f"音频转换失败: {str(e)}"}), 500

VIDEO_FILE = r"C:\Users\leap\Documents\3ed5f3a38bc88882cbc2a342de574d57.mp4"

@app.route('/download_video')
def download_video():
    """
    视频文件下载接口
    直接返回指定的MP4文件供下载
    """
    try:
        return send_file(
            VIDEO_FILE,
            as_attachment=True,  # 作为附件下载
            download_name="processed_video.mp4",  # 前端保存的文件名
            mimetype='video/mp4'  # 明确指定MIME类型
        )
    except FileNotFoundError:
        return jsonify({'error': '视频文件不存在'}), 404
    except Exception as e:
        return jsonify({'error': str(e)}), 500 

if __name__ == '__main__':
    app.run(host='127.0.0.1', port=7860, debug=False)

 

posted on 2025-04-17 22:39  leapss  阅读(11)  评论(0)    收藏  举报