语音合成

前端

<template>
  <div class="cartoon-theme" style="padding: 20px">
    <div class="hero-banner">
      <img src="@/image/sound.png" class="hero-image" alt="语音测试" />
    </div>

    <div class="split-layout">
      <div class="left-pane">
        <el-card class="fixed-panel">
          <template #header>
            <div class="card-header">
              <div class="card-title"><span class="card-icon">📚</span><span>故事列表与操作</span></div>
            </div>
          </template>

          <div class="actions-row" style="margin-bottom: 12px">
            <el-input v-model="searchForm.keywords" placeholder="关键词" style="width: 260px" />
            <el-button type="primary" @click="searchStories">搜索</el-button>
            <el-button @click="resetSearch">重置</el-button>
          </div>
          <div v-if="genProgressVisible" style="margin: 8px 0">
            <el-progress :percentage="genProgress" :text-inside="true" :stroke-width="16" />
          </div>

          <el-table :data="storyList" v-loading="loading" element-loading-text="加载中..." style="width: 100%">
            <el-table-column type="index" :index="indexMethod" label="序号" width="80" />
            <el-table-column prop="title" label="标题" min-width="220" show-overflow-tooltip>
              <template #default="scope">
                <div class="story-title-cell" @click="viewStory(scope.row)">{{ scope.row.title }}</div>
              </template>
            </el-table-column>
            <el-table-column prop="keywords" label="关键词" width="200" show-overflow-tooltip />
            <el-table-column prop="status" label="状态" width="100">
              <template #default="scope">
                <el-tag v-if="scope.row.status === 1" type="success">正常</el-tag>
                <el-tag v-else type="danger">禁用</el-tag>
              </template>
            </el-table-column>
            <el-table-column label="语音" width="220">
              <template #default="scope">
                <div class="row-actions">
                  <el-button size="small" type="primary" @click="generateFor(scope.row)">生成语音</el-button>
                  <el-button size="small" @click="querySound(scope.row)">查看数量</el-button>
                </div>
              </template>
            </el-table-column>
          </el-table>

          <div style="margin-top: 12px; display:flex; justify-content:flex-end">
            <el-pagination
              v-model:current-page="pageNum"
              v-model:page-size="pageSize"
              :page-sizes="[10,20,50]"
              layout="total, sizes, prev, pager, next, jumper"
              :total="total"
              @current-change="changePage"
              @size-change="changeSize"
            />
          </div>
        </el-card>
      </div>

      <div class="right-pane">
        <el-card class="fixed-panel">
          <template #header>
            <div class="card-header">
              <div class="card-title"><span class="card-icon">📖</span><span>故事详情</span></div>
            </div>
          </template>
          <div v-if="selectedStory">
            <h2 class="story-detail-title">{{ selectedStory.title }}</h2>
            <div class="story-meta">
              <el-tag class="story-tags" v-for="keyword in selectedStory.keywords?.split(',') || []" :key="keyword">{{ keyword.trim() }}</el-tag>
              <div class="story-status">
                状态:
                <el-tag :type="selectedStory.status === 1 ? 'success' : 'danger'">{{ selectedStory.status === 1 ? '正常' : '禁用' }}</el-tag>
              </div>
              <div class="story-id">ID: {{ selectedStory.id }}</div>
            </div>

            <div v-if="audioLoading" style="margin-bottom:8px"><el-alert title="音频加载中..." type="info" show-icon /></div>
            <div v-else-if="latestSoundUrl" class="audio-box">
              <audio :src="latestSoundUrl" controls style="width:100%"></audio>
            </div>
            <div v-else class="no-data">暂无语音</div>

            <div class="story-detail-body" v-html="formatStoryBody(selectedStory.body)"></div>

            <el-divider>插画预览</el-divider>
            <div v-if="imageLoading"><el-alert title="插画加载中..." type="info" show-icon /></div>
            <div v-else>
              <div v-if="imageUrls.length" class="img-grid">
                <el-image
                  v-for="(u,i) in imageUrls"
                  :key="u + i"
                  :src="u"
                  fit="cover"
                  :preview-src-list="imageUrls"
                  style="width: 220px; height: 160px"
                />
              </div>
              <div v-else class="no-data">暂无插画</div>
            </div>
          </div>
          <div v-else class="placeholder">选择左侧列表中的故事进行详情查看</div>
        </el-card>
      </div>
    </div>

    <el-dialog v-model="pathDialog" title="生成结果" width="700px">
      <div v-if="savedSound">
        <el-alert title="音频已保存到服务端资源目录" type="info" show-icon />
        <el-divider />
        <div class="path-item">{{ savedSound.filePath }}</div>
        <div v-if="audioUrl" style="margin-top:12px">
          <audio :src="audioUrl" controls style="width:100%"></audio>
        </div>
      </div>
      <div v-else>暂无数据</div>
      <template #footer>
        <el-button @click="pathDialog=false">关闭</el-button>
      </template>
    </el-dialog>
  </div>
 </template>

<script>
export default {
  name: 'SoundTest',
  data() {
    return {
      searchForm: { keywords: '' },
      pageNum: 1,
      pageSize: 10,
      total: 0,
      storyList: [],
      loading: false,
      pathDialog: false,
      savedSound: null,
      audioUrl: '',
      selectedStory: null,
      latestSoundUrl: '',
      audioLoading: false,
      imageLoading: false,
      imageUrls: []
      ,genProgress: 0
      ,genProgressVisible: false
      ,genProgressTimer: null
    }
  },
  mounted() {
    this.loadStories()
  },
  methods: {
    indexMethod(i) { return (this.pageNum - 1) * this.pageSize + i + 1 },
    loadStories() {
      this.loading = true
      const params = {
        pageNum: this.pageNum,
        pageSize: this.pageSize,
        param: { keywords: this.searchForm.keywords }
      }
      this.$axios.post('/story/list', params).then(res => {
        this.loading = false
        if (res.data.code === 200) {
          this.storyList = res.data.data || []
          this.total = res.data.total || 0
        } else {
          this.storyList = []
          this.total = 0
          this.$message.error(res.data.msg || '获取故事失败')
        }
      }).catch(() => {
        this.loading = false
        this.$message.error('获取故事失败')
      })
    },
    formatStoryBody(body) {
      if (!body) return ''
      return body.replace(/\n/g, '<br>').replace(/\n\n/g, '<br><br>')
    },
    searchStories() {
      this.pageNum = 1
      this.loadStories()
    },
    resetSearch() {
      this.searchForm.keywords = ''
      this.pageNum = 1
      this.loadStories()
    },
    changePage(p) {
      this.pageNum = p
      this.loadStories()
    },
    changeSize(s) {
      this.pageSize = s
      this.loadStories()
    },
    generateFor(row) {
      if (!row?.id) return
      const body = { storyId: row.id }
      console.log(`[${this.ts()}] 开始生成: storyId=${row.id}`)
      console.log(`[${this.ts()}] 请求: POST /sound/generate body=` + JSON.stringify(body))
      this.startProgress()
      this.loading = true
      this.$axios.post('/sound/generate', body, { timeout: 180000 }).then(res => {
        this.loading = false
        this.stopProgress(true)
        console.log(`[${this.ts()}] 响应: http=${res.status}, appCode=${res.data?.code}`)
        if (res.data.code === 200) {
          this.savedSound = res.data.data || null
          const base = this.$axios?.defaults?.baseURL || ''
          this.audioUrl = this.savedSound && this.savedSound.id ? `${base}/sound/file/${this.savedSound.id}` : ''
          this.pathDialog = true
          console.log(`[${this.ts()}] 生成成功, 路径=${this.savedSound?.filePath}`)
          this.$message.success('生成成功')
        } else {
          console.log(`[${this.ts()}] 生成失败: ${res.data.msg || '未知错误'}`)
          this.$message.error(res.data.msg || '生成失败')
        }
      }).catch(err => {
        this.loading = false
        this.stopProgress(false)
        const isTimeout = err?.code === 'ECONNABORTED' || /timeout/i.test(err?.message || '')
        const msg = isTimeout ? '请求超时,请稍后重试或增大超时' : (err?.response?.data?.msg || '生成失败')
        const http = err?.response?.status
        const payload = err?.response?.data ? JSON.stringify(err.response.data) : ''
        console.log(`[${this.ts()}] 请求异常: http=${http} msg=${msg}`)
        if (payload) console.log(`[${this.ts()}] 错误载荷: ${payload}`)
        this.$message.error(msg)
      })
    },
    startProgress() {
      this.genProgressVisible = true
      this.genProgress = 0
      if (this.genProgressTimer) { clearInterval(this.genProgressTimer); this.genProgressTimer = null }
      this.genProgressTimer = setInterval(() => {
        const inc = Math.floor(Math.random() * 5) + 1
        if (this.genProgress < 95) this.genProgress = Math.min(95, this.genProgress + inc)
      }, 200)
    },
    stopProgress(success) {
      if (this.genProgressTimer) { clearInterval(this.genProgressTimer); this.genProgressTimer = null }
      this.genProgress = success ? 100 : this.genProgress
      setTimeout(() => { this.genProgressVisible = false; this.genProgress = 0 }, 500)
    },
    querySound(row) {
      if (!row?.id) return
      this.$axios.get(`/sound/by-story/${row.id}`).then(res => {
        if (res.data.code === 200) {
          const n = (res.data.data || []).length
          this.$message.info(`当前语音数量:${n}`)
        } else {
          this.$message.error(res.data.msg || '查询失败')
        }
      }).catch(() => {
        this.$message.error('查询失败')
      })
    },
    viewStory(row) {
      if (!row?.id) return
      this.selectedStory = row
      this.audioLoading = true
      this.latestSoundUrl = ''
      this.$axios.get(`/sound/by-story/${row.id}`).then(res => {
        this.audioLoading = false
        if (res.data.code === 200) {
          const list = res.data.data || []
          const latest = list.reduce((a,b) => (a.id > b.id ? a : b), { id: 0 })
          const base = this.$axios?.defaults?.baseURL || ''
          this.latestSoundUrl = latest && latest.id ? `${base}/sound/file/${latest.id}` : ''
        } else {
          this.$message.error(res.data.msg || '查询失败')
        }
      }).catch(() => {
        this.audioLoading = false
        this.$message.error('查询失败')
      })
      this.imageLoading = true
      this.imageUrls = []
      this.$axios.get(`/illustration/by-story/${row.id}`).then(res => {
        this.imageLoading = false
        if (res.data.code === 200) {
          const base = this.$axios?.defaults?.baseURL || ''
          this.imageUrls = (res.data.data || []).map(x => `${base}/illustration/file/${x.id}`)
        } else {
          this.$message.error(res.data.msg || '查询失败')
        }
      }).catch(() => {
        this.imageLoading = false
        this.$message.error('查询失败')
      })
    },
    ts() {
      const d = new Date()
      const p = n => (n<10?('0'+n):n)
      return `${p(d.getHours())}:${p(d.getMinutes())}:${p(d.getSeconds())}`
    }
  }
}
</script>

<style scoped>
.page-hero { background: linear-gradient(90deg,#e8f5e9,#c8e6c9); border: 1px solid #c8e6c9; border-radius: 16px; padding: 18px; box-shadow: 0 6px 16px rgba(76,175,80,0.12); margin-bottom: 16px; }
.hero-sound { }
.hero-banner { margin-bottom: 16px; height: 180px; border-radius: 16px; overflow: hidden; box-shadow: 0 6px 16px rgba(76,175,80,0.12); border: 1px solid #c8e6c9; }
.hero-image { width: 100%; height: 100%; display: block; object-fit: cover; }
.hero-title { font-size: 22px; font-weight: 700; color: #3e2723; }
.hero-sub { color: #6d4c41; margin-top: 6px; }
.path-item { padding: 6px 8px; border-bottom: 1px solid #eee; font-family: Consolas, Monaco, monospace; word-break: break-all; }
.split-layout { display: flex; gap: 16px; align-items: flex-start; }
.left-pane { flex: 1; min-width: 420px; }
.right-pane { width: 48%; }
.actions-row { display: flex; gap: 8px; align-items: center; flex-wrap: nowrap; }
.fixed-panel { height: 900px; display: flex; flex-direction: column; }
.fixed-panel .el-card__body { overflow: auto; }
.story-title-cell { cursor: pointer; color: #409eff; }
.story-title-cell:hover { color: #66b1ff; text-decoration: underline; }
.story-detail-title { font-size: 20px; font-weight: 600; color: #303133; margin: 0 0 15px 0; text-align: center; }
.story-meta { display: flex; flex-wrap: wrap; gap: 10px; align-items: center; margin-bottom: 20px; padding-bottom: 15px; border-bottom: 1px solid #ebeef5; }
.story-id { color: #909399; font-size: 12px; margin-left: auto; }
.story-detail-body { font-size: 14px; line-height: 1.8; color: #606266; white-space: pre-wrap; }
.audio-box { margin: 10px 0 16px 0; }
.placeholder, .no-data { color: #909399; padding: 20px; text-align: center; }
.img-grid { display: flex; flex-wrap: wrap; gap: 12px; }
.cartoon-theme .el-card { border-radius: 16px; border: 1px solid rgba(0,0,0,0.06); box-shadow: 0 10px 20px rgba(0,0,0,0.08); }
.cartoon-theme .el-dialog { border-radius: 16px; overflow: hidden; }
.cartoon-theme .el-dialog__header { background: linear-gradient(90deg,#e8f5e9,#c8e6c9); color: #3e2723; }
.cartoon-theme .el-button { border-radius: 18px; font-weight: 600; }
.cartoon-theme .el-button--primary { background: linear-gradient(90deg,#ff9a9e,#fecfef); border-color: #ff9a9e; }
.cartoon-theme { background: url('@/image/storyBG.png') center/cover no-repeat; }
.card-header { display: flex; align-items: center; }
.card-title { display: flex; align-items: center; gap: 10px; font-weight: 700; color: #303133; font-size: 18px; }
.card-icon { font-size: 22px; line-height: 1; padding: 4px 8px; border-radius: 999px; background: rgba(255,255,255,0.92); box-shadow: 0 2px 6px rgba(0,0,0,0.15); }
</style>

后端使用的是百度的api可以参考百度的文档更改语音模型,语气等

package com.example.demo.service;

import com.example.demo.entity.Sound;
import com.example.demo.entity.Story;
import com.baomidou.mybatisplus.core.toolkit.StringUtils;
import okhttp3.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.io.FileOutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
import java.util.*;

@Service
public class VoiceApiService {

    @Value("${baidu.tts.api-key}")
    private String apiKey;
    @Value("${baidu.tts.secret-key}")
    private String secretKey;
    @Value("${baidu.tts.voice:4194}")
    private int voice;
    @Value("${baidu.tts.lang:zh}")
    private String lang;
    @Value("${baidu.tts.speed:5}")
    private int speed;
    @Value("${baidu.tts.pitch:5}")
    private int pitch;
    @Value("${baidu.tts.volume:5}")
    private int volume;
    @Value("${baidu.tts.format:mp3-16k}")
    private String format;

    @Autowired
    private StoryService storyService;
    @Autowired
    private SoundService soundService;

    private final OkHttpClient http = new OkHttpClient.Builder().readTimeout(java.time.Duration.ofSeconds(300)).build();

    public com.example.demo.entity.Sound generateForStory(Long storyId) {
        Story story = storyService.getById(storyId);
        if (story == null || StringUtils.isBlank(story.getBody())) throw new RuntimeException("故事不存在或正文为空");
        String text = buildText(story.getTitle(), story.getBody());
        String token = getAccessToken();
        byte[] audio = tryCreateAndFetch(token, text);
        if (audio == null || audio.length == 0) audio = text2audio(token, text);
        if (audio == null || audio.length == 0) throw new RuntimeException("语音生成失败");
        Path saveDir = Paths.get("d:\\Project\\2025aunt\\KIDSTORY2.0\\Spring\\demo\\src\\main\\resources\\sound");
        try { Files.createDirectories(saveDir); } catch (Exception ignore) {}
        long ts = Instant.now().toEpochMilli();
        String fname = "story_" + storyId + "_" + ts + ".mp3";
        Path fpath = saveDir.resolve(fname);
        try (FileOutputStream fos = new FileOutputStream(fpath.toFile())) { fos.write(audio); }
        catch (Exception e) { throw new RuntimeException("保存音频失败: " + fpath.toString()); }
        Sound s = new Sound();
        s.setStoryId(storyId);
        s.setFilePath(fpath.toString());
        soundService.save(s);
        return s;
    }

    private String buildText(String title, String body) {
        StringBuilder sb = new StringBuilder();
        if (title != null && !title.isEmpty()) sb.append(title).append('。');
        if (body != null) sb.append(body);
        String text = sb.toString();
        int max = 1400;
        if (text.length() > max) text = text.substring(0, max);
        return text;
    }

    private String getAccessToken() {
        RequestBody body = new FormBody.Builder()
                .add("grant_type", "client_credentials")
                .add("client_id", apiKey)
                .add("client_secret", secretKey)
                .build();
        Request req = new Request.Builder()
                .url("https://aip.baidubce.com/oauth/2.0/token")
                .post(body)
                .build();
        try (Response resp = http.newCall(req).execute()) {
            if (!resp.isSuccessful()) throw new RuntimeException("获取token失败:" + resp.code());
            String json = resp.body().string();
            com.fasterxml.jackson.databind.JsonNode n = new com.fasterxml.jackson.databind.ObjectMapper().readTree(json);
            return n.get("access_token").asText();
        } catch (Exception e) {
            throw new RuntimeException("获取token异常");
        }
    }

    private byte[] tryCreateAndFetch(String token, String text) {
        Map<String, Object> payload = new HashMap<>();
        payload.put("format", format);
        payload.put("voice", voice);
        payload.put("lang", lang);
        payload.put("speed", speed);
        payload.put("pitch", pitch);
        payload.put("volume", volume);
        payload.put("enable_subtitle", 0);
        payload.put("text", text);
        String url = "https://aip.baidubce.com/rpc/2.0/tts/v1/create?access_token=" + token;
        byte[] jsonBytes;
        try {
            jsonBytes = new com.fasterxml.jackson.databind.ObjectMapper().writeValueAsBytes(payload);
        } catch (com.fasterxml.jackson.core.JsonProcessingException e) {
            return null;
        }
        Request req = new Request.Builder()
                .url(url)
                .post(RequestBody.create(okhttp3.MediaType.parse("application/json"), jsonBytes))
                .build();
        try (Response resp = http.newCall(req).execute()) {
            if (!resp.isSuccessful()) return null;
            String json = resp.body().string();
            String audioUrl = extractAudioUrl(json);
            if (audioUrl != null && !audioUrl.isEmpty()) return download(audioUrl);
            String taskId = extractTaskId(json);
            if (taskId == null || taskId.isEmpty()) return null;
            return pollAndDownload(token, taskId);
        } catch (Exception e) {
            return null;
        }
    }

    private String extractAudioUrl(String json) {
        try {
            com.fasterxml.jackson.databind.JsonNode root = new com.fasterxml.jackson.databind.ObjectMapper().readTree(json);
            if (root.has("audio_url")) return root.get("audio_url").asText("");
            if (root.has("audio_address")) return root.get("audio_address").asText("");
            if (root.has("result") && root.get("result").has("audio_url")) return root.get("result").get("audio_url").asText("");
            if (root.has("result") && root.get("result").has("audio_address")) return root.get("result").get("audio_address").asText("");
            return null;
        } catch (Exception e) {
            return null;
        }
    }

    private String extractTaskId(String json) {
        try {
            com.fasterxml.jackson.databind.JsonNode root = new com.fasterxml.jackson.databind.ObjectMapper().readTree(json);
            if (root.has("task_id")) return root.get("task_id").asText("");
            if (root.has("result") && root.get("result").has("task_id")) return root.get("result").get("task_id").asText("");
            return null;
        } catch (Exception e) {
            return null;
        }
    }

    private byte[] pollAndDownload(String token, String taskId) {
        String url = "https://aip.baidubce.com/rpc/2.0/tts/v1/query?access_token=" + token;
        Map<String, Object> payload = new HashMap<>();
        payload.put("task_ids", java.util.Collections.singletonList(taskId));
        int times = 0;
        while (times < 30) {
            times++;
            try {
                byte[] bytes;
                try {
                    bytes = new com.fasterxml.jackson.databind.ObjectMapper().writeValueAsBytes(payload);
                } catch (com.fasterxml.jackson.core.JsonProcessingException e) {
                    Thread.sleep(1000);
                    continue;
                }
                Request req = new Request.Builder()
                        .url(url)
                        .post(RequestBody.create(okhttp3.MediaType.parse("application/json"), bytes))
                        .build();
                try (Response resp = http.newCall(req).execute()) {
                    if (!resp.isSuccessful()) {
                        Thread.sleep(1000);
                        continue;
                    }
                    String json = resp.body().string();
                    String a = extractAudioUrl(json);
                    if (a != null && !a.isEmpty()) return download(a);
                    Thread.sleep(1000);
                }
            } catch (Exception ignore) {
                try { Thread.sleep(1000); } catch (InterruptedException ignored) {}
            }
        }
        return null;
    }

    private byte[] text2audio(String token, String text) {
        RequestBody body = new FormBody.Builder()
                .add("tex", text)
                .add("tok", token)
                .add("lan", lang)
                .add("ctp", "1")
                .add("spd", String.valueOf(speed))
                .add("pit", String.valueOf(pitch))
                .add("vol", String.valueOf(volume))
                .add("cuid", java.util.UUID.randomUUID().toString())
                .build();
        Request req = new Request.Builder()
                .url("https://tsn.baidu.com/text2audio")
                .post(body)
                .build();
        try (Response resp = http.newCall(req).execute()) {
            if (!resp.isSuccessful()) return null;
            return resp.body().bytes();
        } catch (Exception e) {
            return null;
        }
    }

    private byte[] download(String url) {
        Request req = new Request.Builder().url(url).get().build();
        try (Response resp = http.newCall(req).execute()) {
            if (!resp.isSuccessful()) return null;
            return resp.body().bytes();
        } catch (Exception e) {
            return null;
        }
    }
}
posted @ 2025-10-21 15:57  QixunQiu  阅读(8)  评论(0)    收藏  举报