import re
import ffmpeg
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
import io
import tempfile
import soundfile as sf
from cached_path import cached_path
import os
import sys
from pathlib import Path
f5_tts_src_path = str(Path(__file__).parent / "F5_TTS" / "src")
sys.path.append(f5_tts_src_path)
from Text.teacher import teach
from F5_TTS.src.f5_tts.infer.utils_infer import (
preprocess_ref_audio_text,
infer_process,
remove_silence_for_generated_wav,
load_vocoder,
load_model
)
from F5_TTS.src.f5_tts.model import DiT
from PPT.text_module import text_bp
from PPT.video_module import video_bp
from PPT.video_module import video_upload
app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})
app.register_blueprint(text_bp, url_prefix='/api/text')
app.register_blueprint(video_bp, url_prefix='/api/video')
app.config.update({
'MAX_CONTENT_LENGTH': 100 * 1024 * 1024,
'TEXT_UPLOAD_FOLDER': 'data/ppt/text',
'VIDEO_UPLOAD_FOLDER': 'data/ppt/video'
})
BASE_DIR_PATH =str(Path(__file__).parent.parent)+"/"
AUDIO_FOLDER_PATH = "data/voice/"
PPT_FOLDER_PATH = "data/ppt/"
F5TTS_ema_model = None
vocoder = load_vocoder()
def load_f5tts_model():
global F5TTS_ema_model
if F5TTS_ema_model is None:
MODEL_PATH=str(cached_path("hf://SWivid/F5-TTS/F5TTS_Base/model_1200000.safetensors"))
F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
F5TTS_ema_model = load_model(DiT, F5TTS_model_cfg, MODEL_PATH)
load_f5tts_model()
def convert_to_chinese_date(text):
"""Convert dates and numbers in the text to Chinese format."""
num_map = {"0": "零", "1": "一", "2": "二", "3": "三", "4": "四",
"5": "五", "6": "六", "7": "七", "8": "八", "9": "九"}
def number_to_chinese(match):
number = match.group()
if len(number) == 1: # 单个数字
return num_map[number]
elif len(number) == 2: # 两位数
if number.startswith("1"): # 特殊处理10-19
return "十" + (num_map[number[1]] if number[1] != "0" else "")
else:
return num_map[number[0]] + "十" + (num_map[number[1]] if number[1] != "0" else "")
else:
return "".join(num_map[digit] for digit in number) # 处理三位及以上的数字
# 将日期格式(如12月、10日)处理为中文读法
text = re.sub(r'\d+', number_to_chinese, text)
return text
@app.route('/generateAudio', methods=['POST'])
def synthesize():
# Validate and parse input
if 'gen_text' not in request.form:
return jsonify({"error": "Missing required parameter: 'gen_text'"}), 400
gen_text = request.form['gen_text']
ref_text = request.form.get('ref_text', '')
ref_audio_path = None
if 'ref_audio' in request.files:
# Save uploaded reference audio file to a temporary location
ref_audio = request.files['ref_audio']
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
ref_audio.save(temp_audio_file.name)
ref_audio_path = temp_audio_file.name
elif 'ref_audio_name' in request.form:
# Use reference audio path provided in the form
ref_audio_path =AUDIO_FOLDER_PATH + request.form['ref_audio_name']
print(ref_audio_path)
if not os.path.exists(ref_audio_path):
return jsonify({"error": f"File not found: {ref_audio_path}"}), 400
if not ref_audio_path:
return jsonify({"error": "Missing required parameter: 'ref_audio' or 'ref_audio_path'"}), 400
try:
# Convert dates in gen_text to Chinese format
gen_text = convert_to_chinese_date(gen_text)
# Preprocess reference audio and text
ref_audio_data, ref_text = preprocess_ref_audio_text(ref_audio_path, ref_text)
# Synthesize speech
final_wave, final_sample_rate, _ = infer_process(
ref_audio_data,
ref_text,
gen_text,
F5TTS_ema_model,
vocoder,
cross_fade_duration=0.15,
speed=1.0,
)
# Remove silences from generated audio
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_generated_audio:
sf.write(temp_generated_audio.name, final_wave, final_sample_rate)
remove_silence_for_generated_wav(temp_generated_audio.name)
final_wave, _ = sf.read(temp_generated_audio.name)
# Convert synthesized audio to bytes
audio_buffer = io.BytesIO()
sf.write(audio_buffer, final_wave, final_sample_rate, format='WAV')
audio_buffer.seek(0)
return send_file(
audio_buffer,
as_attachment=True,
download_name="synthesized_audio.wav",
mimetype="audio/wav"
)
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route('/teacher',methods=['POST'])
def teacher():
if 'gen_text' not in request.form:
return jsonify({"error": "Missing required parameter: 'gen_text'"}), 400
gen_text = request.form['gen_text']
ref_text = request.form.get('ref_text', '')
ref_audio_path = None
if 'ref_audio_name' in request.form:
ref_audio_path =AUDIO_FOLDER_PATH + request.form['ref_audio_name']
print(ref_audio_path)
if not os.path.exists(ref_audio_path):
return jsonify({"error": f"File not found: {ref_audio_path}"}), 400
try:
gen_text = convert_to_chinese_date(gen_text)
gen_text = teach(gen_text)
ref_audio_data, ref_text = preprocess_ref_audio_text(ref_audio_path, ref_text)
final_wave, final_sample_rate, _ = infer_process(
ref_audio_data,
ref_text,
gen_text,
F5TTS_ema_model,
vocoder,
cross_fade_duration=0.15,
speed=1.0,
)
# Remove silences from generated audio
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_generated_audio:
sf.write(temp_generated_audio.name, final_wave, final_sample_rate)
remove_silence_for_generated_wav(temp_generated_audio.name)
final_wave, _ = sf.read(temp_generated_audio.name)
# Convert synthesized audio to bytes
audio_buffer = io.BytesIO()
sf.write(audio_buffer, final_wave, final_sample_rate, format='WAV')
audio_buffer.seek(0)
return send_file(
audio_buffer,
as_attachment=True,
download_name="synthesized_audio.wav",
mimetype="audio/wav"
)
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route('/voiceList',methods=['GET'])
def list_voice():
try:
# 获取文件夹中的所有文件并去掉文件扩展名
voiceList = [os.path.splitext(f)[0] for f in os.listdir(AUDIO_FOLDER_PATH) if os.path.isfile(os.path.join(AUDIO_FOLDER_PATH, f))]
return jsonify({"voiceList": voiceList})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route('/getAudio', methods=['GET'])
def get_audio():
try:
file_name = request.args.get('fileName') # 从请求参数获取音频文件名
if not file_name:
return jsonify({"error": "缺少 fileName 参数"}), 400
# 构建完整的文件路径(假设文件扩展名为 mp3)
audio_file_path = os.path.join(BASE_DIR_PATH,AUDIO_FOLDER_PATH, file_name + ".mp3")
#audio_file_path = "D:\\Project\\VisualStudioCode\\EduVoice\\server\\data\\voice\\Trump.mp3"
return send_file(audio_file_path, mimetype="audio/mpeg", as_attachment=False)
except Exception as e:
print(str(e))
return jsonify({"error": str(e)}), 500
@app.route('/cutAudio', methods=['POST'])
def cut_audio():
try:
data = request.get_json()
file_name = data.get('fileName')
start_time = data.get('startTime')
end_time = data.get('endTime')
# 构建音频文件的完整路径
audio_file_path = os.path.join(AUDIO_FOLDER_PATH, file_name + ".mp3")
output_file_path = os.path.join(AUDIO_FOLDER_PATH, f"cut_{file_name}.mp3")
# 使用 FFmpeg 剪切音频
ffmpeg.input(audio_file_path, ss=start_time, t=end_time-start_time).output(output_file_path).run()
# 返回剪切后的音频文件URL
return jsonify({
'audioUrl': f"/audio/{file_name}_cut.mp3"
})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route('/reUploadAudio', methods=['POST'])
def re_upload_audio():
try:
if 'audio' not in request.files:
return jsonify({"error": "未找到文件"}), 400
audio = request.files['audio']
# 检查文件是否有名称
if audio.filename == '':
return jsonify({"error": "文件名不能为空"}), 400
# 只允许特定格式的音频文件
allowed_extensions = {'mp3', 'wav', 'm4a'}
if audio.filename.split('.')[-1].lower() not in allowed_extensions:
return jsonify({"error": "不支持的文件格式"}), 400
fileName = request.form.get('fileName', audio.filename)
file_path = os.path.join(AUDIO_FOLDER_PATH, fileName)
audio.save(file_path)
return jsonify({
"message": "文件上传成功"
}), 200
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/addAudio", methods=["POST"])
def add_audio():
try:
audio_file = request.files['audioFile']
file_name = request.form['fileName']
wav_path = os.path.join(BASE_DIR_PATH,AUDIO_FOLDER_PATH, "temp/", file_name+'.wav')
mp3_path = os.path.join(BASE_DIR_PATH,AUDIO_FOLDER_PATH, file_name+'.mp3')
audio_file.save(wav_path)
try:
ffmpeg.input(wav_path).output(mp3_path, format="mp3", audio_bitrate="192k").run(overwrite_output=True)
return jsonify({"message": "文件上传成功", "wav_path": wav_path, "mp3_path": mp3_path}), 200
except Exception as e:
return jsonify({"error": f"音频转换失败: {str(e)}"}), 500
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/generateAudioCourseware",methods=["POST"])
def generate_audio_courseware():
try:
# 检查是否有文件被上传
if 'ppt' not in request.files:
return jsonify({"error": "未接收到PPT文件"}), 400
ppt = request.files['ppt']
# 检查文件扩展名(确保是PPT文件)
allowed_extensions = {'ppt', 'pptx'}
if '.' in ppt.filename:
extension = ppt.filename.rsplit('.', 1)[1].lower()
if extension not in allowed_extensions:
return jsonify({"error": "仅支持PPT/PPTX格式文件"}), 400
save_path = os.path.join(PPT_FOLDER_PATH, ppt.filename)
ppt.save(save_path)
voice = request.form['voice']
print(voice)
"""
# 避免文件名冲突(可选)
counter = 1
while os.path.exists(save_path):
name, ext = os.path.splitext(filename)
save_path = os.path.join(PPT_FOLDER, f"{name}_{counter}{ext}")
counter += 1
file.save(save_path)
# 获取语音参数(如果有)
voice = request.form.get('voice', 'default')
# 这里可以添加PPT转音频的逻辑
"""
return jsonify({
"success": True,
"message": "PPT上传成功",
}), 200
except Exception as e:
return jsonify({"error": f"音频转换失败: {str(e)}"}), 500
@app.route("/api/video/upload",methods=['POST'])
def audio_upload():
#ref_audio_path =AUDIO_FOLDER_PATH + request.form['ref_audio_name']
if 'ppt' in request.files:
ppt_file = request.files['ppt']
if 'script' in request.files:
script_file = request.files['script']
ref_audio_path =AUDIO_FOLDER_PATH+"标准.mp3"
try:
task_id = video_upload(ppt_file,script_file,F5TTS_ema_model,vocoder,ref_audio_path)
return jsonify({"task_id": task_id})
except Exception as e:
return jsonify({"error": f"音频转换失败: {str(e)}"}), 500
VIDEO_FILE = r"C:\Users\leap\Documents\3ed5f3a38bc88882cbc2a342de574d57.mp4"
@app.route('/download_video')
def download_video():
"""
视频文件下载接口
直接返回指定的MP4文件供下载
"""
try:
return send_file(
VIDEO_FILE,
as_attachment=True, # 作为附件下载
download_name="processed_video.mp4", # 前端保存的文件名
mimetype='video/mp4' # 明确指定MIME类型
)
except FileNotFoundError:
return jsonify({'error': '视频文件不存在'}), 404
except Exception as e:
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(host='127.0.0.1', port=7860, debug=False)
浙公网安备 33010602011771号