tts即“从文本到语音”。 TTS技术对文本文件进行实时转换,转换时间之短可以秒计算。在其特有智能语音控制器作用下,文本输出的语音音律流畅,使得听者在听取信息时感觉自然,毫无机器语音输出的冷漠与生涩感。TTS语音合成技术即将覆盖国标一、二级汉字,具有英文接口,自动识别中、英文,支持中英文混读。所有声音采用真人普通话为标准发音,实现了120-150个汉字/秒的快速语音合成,朗读速度达3-4个汉字/秒,使用户可以听到清晰悦耳的音质和连贯流畅的语调
上面的算是tts的基本概念,baidu搜到的- -!其实如果说到tts的code,很多东西网上都是有现成的,楼主可以多加利用,我以前有搜集一些,希望可以帮助到楼主
CText2Speech类的设计,其定义文件可以类似于下面
///////////////////////////////////////////////////////////////
// active speech engine
//
#include <atlbase.h>
extern CComModule _Module;
#include <atlcom.h>
#include "sapi.h"
#include <sphelper.h>
///////////////////////////////////////////////////////////////
// speech message
//
#define WM_TTSEVENT WM_USER+101
///////////////////////////////////////////////////////////////
// text-to-speech class
//
class CText2Speech
{
public:
CText2Speech();
virtual ~CText2Speech();
// initialize
BOOL Initialize(HWND hWnd = NULL);
void Destroy();
// speak
HRESULT Speak(const WCHAR *pwcs, DWORD dwFlags = SPF_DEFAULT);
HRESULT Pause();
HRESULT Resume();
// rate
HRESULT SetRate(long lRateAdjust);
HRESULT GetRate(long* plRateAdjust);
// volume
HRESULT SetVolume(USHORT usVolume);
HRESULT GetVolume(USHORT* pusVolume);
// voice
ULONG GetVoiceCount();
HRESULT GetVoice(WCHAR **ppszDescription, ULONG lIndex = -1);
HRESULT SetVoice(WCHAR **ppszDescription);
// error string
CString GetErrorString()
{
return m_sError;
}
// interface
CComPtr<ISpVoice> m_IpVoice;
private:
CString m_sError;
};
文件的开始几行语句:
#include <atlbase.h>
extern CComModule _Module;
#include <atlcom.h>
#include "sapi.h"
#include <sphelper.h>
而类库格式的.dll格式的可以有一下格式:
using System;
using System.Collections.Generic;
using System.Text;
using SpeechLib;
using System.Collections;
namespace SpeechReco
{
public delegate void SpeechHandler(object sender, SpeechEvent e);
public class SpeechVoice
{
private SpVoiceClass spVoice;
public event SpeechHandler mySpeech;
public event SpeechHandler imgHandler;
private SpeechEvent evt = new SpeechEvent();
private string status="default";
private int roleNum = 0;//选择朗读的角色
public SpeechVoice()
{
spVoice = new SpVoiceClass();
spVoice.Word += new _ISpeechVoiceEvents_WordEventHandler(spVoice_Word);//每读一个单词触发该事件
spVoice.EndStream += new _ISpeechVoiceEvents_EndStreamEventHandler(spVoice_EndStream);//文本读完触发该事件
}
//读完调用此事件
下面是tts应用到的几个函数,很有用
// 初始化和释放函数
BOOL Initialize(HWND hWnd = NULL);
void Destroy();
// 语音操作函数
HRESULT Speak(const WCHAR *pwcs, DWORD dwFlags = SPF_DEFAULT);
HRESULT Pause();
HRESULT Resume();
// 语速函数
HRESULT SetRate(long lRateAdjust);
HRESULT GetRate(long* plRateAdjust);
// 音量函数
HRESULT SetVolume(USHORT usVolume);
HRESULT GetVolume(USHORT* pusVolume);
// 语言函数
ULONG GetVoiceCount();
HRESULT GetVoice(WCHAR **ppszDescription, ULONG lIndex = -1);
HRESULT SetVoice(WCHAR **ppszDesc);
// 获取错误信息函数
CString GetErrorString()
CText2Speech类的构造函数用于初始化Text-To-Speech引擎接口指针m_IpVoice和错误字符串;析构函数则调用释放引擎的Destroy()函数释放语音引擎,其代码如下:
CText2Speech::CText2Speech()
{
m_IpVoice = NULL;
m_sError=_T("");
}
CText2Speech::~CText2Speech()
{
Destroy();
}
如果运用到单词的朗读,尝试下下面的code或许有用
void spVoice_EndStream(int StreamNumber, object StreamPosition)
{
status = "default";
if (imgHandler != null)
{
imgHandler(this, evt);
}
}
//每读一个单词触发该事件, CharacterPosition表示单词在整个句子中的位置 length表示单词的长度
初始化函数Initialize首先初始化COM库,并调用CoCreateInstance方法初始化语音引擎。然后设置必须响应的引擎事件,并指定响应事件消息的窗口句柄。该窗口句柄是作为函数的参数传入的。Initialize函数的代码如下:
BOOL CText2Speech::Initialize(HWND hWnd)
{
if (FAILED(CoInitialize(NULL)))
{
m_sError=_T("Error intialization COM");
return FALSE;
}
HRESULT hr;
hr = m_IpVoice.CoCreateInstance(CLSID_SpVoice);
if (FAILED(hr))
{
m_sError=_T("Error creating voice");
return FALSE;
}
hr = m_IpVoice->SetInterest(SPFEI(SPEI_VISEME), SPFEI(SPEI_VISEME));
if (FAILED(hr))
{
m_sError=_T("Error creating interest...seriously");
return FALSE;
}
if (::IsWindow(hWnd))
{
hr = m_IpVoice->SetNotifyWindowMessage(hWnd, WM_TTSEVENT, 0, 0);
if (FAILED(hr))
{
m_sError=_T("Error setting notification window");
return FALSE;
}
}
return TRUE;
}
语速音量之类的下面的code可以参考下
void spVoice_Word(int StreamNumber, object StreamPosition, int CharacterPosition, int Length)
{
evt.CharacterPosition = CharacterPosition;
evt.Length = Length;
mySpeech(this, evt);
}
//选择发音的男女
public int role
{
set
{
spVoice.Voice = spVoice.GetVoices("", "").Item(value);
spVoice.Voice.GetDescription(0);
roleNum = value;
}
get
{
return roleNum;
}
}
public void speakChinese()
{
spVoice.GetVoices(string.Empty, string.Empty).Item(0);
}
/// <summary>
/// 获得当前的语速
/// </summary>
/// <returns></returns>
public int getRate()
{
int a;
a = spVoice.Rate;
return (a);
}
/// <summary>
/// rate 设置语速
/// </summary>
/// <param name="rate"></param>
public void setRate(int rate)
{//设置语速
spVoice.Rate = rate;
}
public void setVolume(int volume)
{//设置音量
spVoice.Volume = volume;
}
释放函数则释放语音引擎接口和COM库,其代码如下:
void CText2Speech:
estroy()
{
if (m_IpVoice)
m_IpVoice.Release();
CoUninitialize();
}
语音、语速、音量函数都是通过m_IpVoice成员直接调用ISpVoice接口的相关方法来实现的:
HRESULT CText2Speech::Speak(const WCHAR *pwcs, DWORD dwFlags)
{return m_IpVoice->Speak(pwcs, dwFlags, NULL);}
HRESULT CText2Speech:
ause()
{return m_IpVoice->
ause();}
HRESULT CText2Speech::Resume()
{ return m_IpVoice->Resume();}
// rate
HRESULT CText2Speech::SetRate(long lRateAdjust)
{return m_IpVoice->SetRate(lRateAdjust);}
HRESULT CText2Speech::GetRate(long* plRateAdjust)
{ return m_IpVoice->GetRate(plRateAdjust);}
// volume
HRESULT CText2Speech::SetVolume(USHORT usVolume)
{ return m_IpVoice->SetVolume(usVolume);}
HRESULT CText2Speech::GetVolume(USHORT* pusVolume)
{ return m_IpVoice->GetVolume(pusVolume);}
暂停开始播放,可以类似于下面的code
public void pause()
{//暂停
spVoice.Pause();
status = "pause";
}
public void resume()
{ //恢复播放
spVoice.Resume();
status = "play";
}
语言函数的实现比较复杂。由于IspVoice接口提供的语言函数,都只与抽象的语音语言接口ISpObjectToken相关,而我们能看到的却是语音语言的描述,比如,通过控制面板的语音程序所能见到的就是语音语言的描述。因此,笔者设计了直接对语音语言进行操作的语言函数,包括获取系统中已安装的语音语言数目,设置指定的语音语言,获取指定的语音语言描述(包括当前设定的语音语言)。它们的代码如下:
ULONG CText2Speech::GetVoiceCount()
{
HRESULT hr = S_OK;
CComPtr<ISpObjectToken> cpVoiceToken;
CComPtr<IEnumSpObjectTokens> cpEnum;
ULONG ulCount = -1;
//Enumerate the available voices
hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
if(FAILED(hr))
{
m_sError = _T("Error to enumerate voices");
return -1;
}
//Get the number of voices
hr = cpEnum->GetCount(&ulCount);
if(FAILED(hr))
{
m_sError = _T("Error to get voice count");
return -1;
}
return ulCount;
}
HRESULT CText2Speech::GetVoice(WCHAR **ppszDescription, ULONG lIndex)
{
HRESULT hr = S_OK;
CComPtr<ISpObjectToken> cpVoiceToken;
CComPtr<IEnumSpObjectTokens> cpEnum;
ULONG ulCount = 0;
if (lIndex == -1)
{
// current voice
//
hr = m_IpVoice->GetVoice(&cpVoiceToken);
if(FAILED(hr))
{
m_sError = _T("Error to get current voice");
return hr;
}
SpGetDescription(cpVoiceToken, ppszDescription);
if(FAILED(hr))
{
m_sError = _T("Error to get current voice description");
return hr;
}
}
else
{
// else other voices, we should enumerate the voice list first
//Enumerate the available voices
hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
if(FAILED(hr))
{
m_sError = _T("Error to enumerate voices");
return hr;
}
//Get the number of voices
hr = cpEnum->GetCount(&ulCount);
if(FAILED(hr))
{
m_sError = _T("Error to voice count");
return hr;
}
// range control
ASSERT(lIndex >= 0);
ASSERT(lIndex < ulCount);
// Obtain specified voice id
ULONG l = 0;
while (SUCCEEDED(hr))
{
cpVoiceToken.Release();
hr = cpEnum->Next( 1, &cpVoiceToken, NULL );
if(FAILED(hr))
{
m_sError = _T("Error to get voice token");
return hr;
}
if (l == lIndex)
{
hr = SpGetDescription(cpVoiceToken, ppszDescription);
if(FAILED(hr))
{
m_sError = _T("Error to get voice description");
return hr;
}
break;
}
l++;
}
}
return hr;
}
HRESULT CText2Speech::SetVoice(WCHAR **ppszDescription)
{
HRESULT hr = S_OK;
CComPtr<ISpObjectToken> cpVoiceToken;
CComPtr<IEnumSpObjectTokens> cpEnum;
ULONG ulCount = 0;
//Enumerate the available voices
hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
if(FAILED(hr))
{
m_sError = _T("Error to enumerate voices");
return hr;
}
//Get the number of voices
hr = cpEnum->GetCount(&ulCount);
if(FAILED(hr))
{
m_sError = _T("Error to voice count");
return hr;
}
// Obtain specified voice id
while (SUCCEEDED(hr) && ulCount--)
{
cpVoiceToken.Release();
hr = cpEnum->Next( 1, &cpVoiceToken, NULL );
if(FAILED(hr))
{
m_sError = _T("Error to voice token");
return hr;
}
WCHAR *pszDescription1;
hr = SpGetDescription(cpVoiceToken, &pszDescription1);
if(FAILED(hr))
{
m_sError = _T("Error to get voice description");
return hr;
}
if (! wcsicmp(pszDescription1, *ppszDescription))
{
hr = m_IpVoice->SetVoice(cpVoiceToken);
if(FAILED(hr))
{
m_sError = _T("Error to set voice");
return hr;
}
break;
}
}
return hr;
}
通过CSpEvent的GetFrom函数可以获得当前的事件信息,eEventId成员中记录了朗读的音节的代号。数组g_iMapVisemeToImage定义了音节代码和对应嘴形位图序列号的对应关系:
const int g_iMapVisemeToImage[22] =
{
0, // SP_VISEME_0 = 0, // Silence
11, // SP_VISEME_1, // AE, AX, AH
11, // SP_VISEME_2, // AA
11, // SP_VISEME_3, // AO
10, // SP_VISEME_4, // EY, EH, UH
11, // SP_VISEME_5, // ER
9, // SP_VISEME_6, // y, IY, IH, IX
2, // SP_VISEME_7, // w, UW
13, // SP_VISEME_8, // OW
9, // SP_VISEME_9, // AW
12, // SP_VISEME_10, // OY
11, // SP_VISEME_11, // AY
9, // SP_VISEME_12, // h
3, // SP_VISEME_13, // r
6, // SP_VISEME_14, // l
7, // SP_VISEME_15, // s, z
8, // SP_VISEME_16, // SH, CH, JH, ZH
5, // SP_VISEME_17, // TH, DH
4, // SP_VISEME_18, // f, v
7, // SP_VISEME_19, // d, t, n
9, // SP_VISEME_20, // k, g, NG
1 // SP_VISEME_21, // p, b, m
};
为了响应消息WM_TTSEVENT,需要添加相应的消息响应函数:
BEGIN_MESSAGE_MAP(CReciterDlg, CDialog)
//{{AFX_MSG_MAP(CReciterDlg)
ON_WM_SYSCOMMAND()
ON_WM_PAINT()
ON_WM_QUERYDRAGICON()
ON_BN_CLICKED(IDC_BUTTON_SPEAK, OnButtonSpeak)
ON_LBN_SELCHANGE(IDC_LIST1, OnSelchangeList1)
ON_BN_CLICKED(IDC_BUTTON_STOP, OnButtonStop)
ON_BN_CLICKED(IDC_BUTTON_RESUME, OnButtonResume)
//}}AFX_MSG_MAP
ON_MESSAGE(WM_TTSEVENT, OnMouthEvent)
END_MESSAGE_MAP()
LRESULT CReciterDlg::OnMouthEvent(WPARAM wParam, LPARAM lParam)
{
CSpEvent event;
while (event.GetFrom(m_Text2Speech.m_IpVoice) == S_OK) {
switch (event.eEventId) {
case SPEI_VISEME:
m_iMouthBmp = g_iMapVisemeToImage[event.Viseme()];
InvalidateRect(m_cMouthRect, false);
break; }
} return 0;
}
上面的算是tts的基本概念,baidu搜到的- -!其实如果说到tts的code,很多东西网上都是有现成的,楼主可以多加利用,我以前有搜集一些,希望可以帮助到楼主
CText2Speech类的设计,其定义文件可以类似于下面
///////////////////////////////////////////////////////////////
// active speech engine
//
#include <atlbase.h>
extern CComModule _Module;
#include <atlcom.h>
#include "sapi.h"
#include <sphelper.h>
///////////////////////////////////////////////////////////////
// speech message
//
#define WM_TTSEVENT WM_USER+101
///////////////////////////////////////////////////////////////
// text-to-speech class
//
class CText2Speech
{
public:
CText2Speech();
virtual ~CText2Speech();
// initialize
BOOL Initialize(HWND hWnd = NULL);
void Destroy();
// speak
HRESULT Speak(const WCHAR *pwcs, DWORD dwFlags = SPF_DEFAULT);
HRESULT Pause();
HRESULT Resume();
// rate
HRESULT SetRate(long lRateAdjust);
HRESULT GetRate(long* plRateAdjust);
// volume
HRESULT SetVolume(USHORT usVolume);
HRESULT GetVolume(USHORT* pusVolume);
// voice
ULONG GetVoiceCount();
HRESULT GetVoice(WCHAR **ppszDescription, ULONG lIndex = -1);
HRESULT SetVoice(WCHAR **ppszDescription);
// error string
CString GetErrorString()
{
return m_sError;
}
// interface
CComPtr<ISpVoice> m_IpVoice;
private:
CString m_sError;
};
文件的开始几行语句:
#include <atlbase.h>
extern CComModule _Module;
#include <atlcom.h>
#include "sapi.h"
#include <sphelper.h>
而类库格式的.dll格式的可以有一下格式:
using System;
using System.Collections.Generic;
using System.Text;
using SpeechLib;
using System.Collections;
namespace SpeechReco
{
public delegate void SpeechHandler(object sender, SpeechEvent e);
public class SpeechVoice
{
private SpVoiceClass spVoice;
public event SpeechHandler mySpeech;
public event SpeechHandler imgHandler;
private SpeechEvent evt = new SpeechEvent();
private string status="default";
private int roleNum = 0;//选择朗读的角色
public SpeechVoice()
{
spVoice = new SpVoiceClass();
spVoice.Word += new _ISpeechVoiceEvents_WordEventHandler(spVoice_Word);//每读一个单词触发该事件
spVoice.EndStream += new _ISpeechVoiceEvents_EndStreamEventHandler(spVoice_EndStream);//文本读完触发该事件
}
//读完调用此事件
下面是tts应用到的几个函数,很有用
// 初始化和释放函数
BOOL Initialize(HWND hWnd = NULL);
void Destroy();
// 语音操作函数
HRESULT Speak(const WCHAR *pwcs, DWORD dwFlags = SPF_DEFAULT);
HRESULT Pause();
HRESULT Resume();
// 语速函数
HRESULT SetRate(long lRateAdjust);
HRESULT GetRate(long* plRateAdjust);
// 音量函数
HRESULT SetVolume(USHORT usVolume);
HRESULT GetVolume(USHORT* pusVolume);
// 语言函数
ULONG GetVoiceCount();
HRESULT GetVoice(WCHAR **ppszDescription, ULONG lIndex = -1);
HRESULT SetVoice(WCHAR **ppszDesc);
// 获取错误信息函数
CString GetErrorString()
CText2Speech类的构造函数用于初始化Text-To-Speech引擎接口指针m_IpVoice和错误字符串;析构函数则调用释放引擎的Destroy()函数释放语音引擎,其代码如下:
CText2Speech::CText2Speech()
{
m_IpVoice = NULL;
m_sError=_T("");
}
CText2Speech::~CText2Speech()
{
Destroy();
}
如果运用到单词的朗读,尝试下下面的code或许有用
void spVoice_EndStream(int StreamNumber, object StreamPosition)
{
status = "default";
if (imgHandler != null)
{
imgHandler(this, evt);
}
}
//每读一个单词触发该事件, CharacterPosition表示单词在整个句子中的位置 length表示单词的长度
初始化函数Initialize首先初始化COM库,并调用CoCreateInstance方法初始化语音引擎。然后设置必须响应的引擎事件,并指定响应事件消息的窗口句柄。该窗口句柄是作为函数的参数传入的。Initialize函数的代码如下:
BOOL CText2Speech::Initialize(HWND hWnd)
{
if (FAILED(CoInitialize(NULL)))
{
m_sError=_T("Error intialization COM");
return FALSE;
}
HRESULT hr;
hr = m_IpVoice.CoCreateInstance(CLSID_SpVoice);
if (FAILED(hr))
{
m_sError=_T("Error creating voice");
return FALSE;
}
hr = m_IpVoice->SetInterest(SPFEI(SPEI_VISEME), SPFEI(SPEI_VISEME));
if (FAILED(hr))
{
m_sError=_T("Error creating interest...seriously");
return FALSE;
}
if (::IsWindow(hWnd))
{
hr = m_IpVoice->SetNotifyWindowMessage(hWnd, WM_TTSEVENT, 0, 0);
if (FAILED(hr))
{
m_sError=_T("Error setting notification window");
return FALSE;
}
}
return TRUE;
}
语速音量之类的下面的code可以参考下
void spVoice_Word(int StreamNumber, object StreamPosition, int CharacterPosition, int Length)
{
evt.CharacterPosition = CharacterPosition;
evt.Length = Length;
mySpeech(this, evt);
}
//选择发音的男女
public int role
{
set
{
spVoice.Voice = spVoice.GetVoices("", "").Item(value);
spVoice.Voice.GetDescription(0);
roleNum = value;
}
get
{
return roleNum;
}
}
public void speakChinese()
{
spVoice.GetVoices(string.Empty, string.Empty).Item(0);
}
/// <summary>
/// 获得当前的语速
/// </summary>
/// <returns></returns>
public int getRate()
{
int a;
a = spVoice.Rate;
return (a);
}
/// <summary>
/// rate 设置语速
/// </summary>
/// <param name="rate"></param>
public void setRate(int rate)
{//设置语速
spVoice.Rate = rate;
}
public void setVolume(int volume)
{//设置音量
spVoice.Volume = volume;
}
释放函数则释放语音引擎接口和COM库,其代码如下:
void CText2Speech:
estroy(){
if (m_IpVoice)
m_IpVoice.Release();
CoUninitialize();
}
语音、语速、音量函数都是通过m_IpVoice成员直接调用ISpVoice接口的相关方法来实现的:
HRESULT CText2Speech::Speak(const WCHAR *pwcs, DWORD dwFlags)
{return m_IpVoice->Speak(pwcs, dwFlags, NULL);}
HRESULT CText2Speech:
ause(){return m_IpVoice->
ause();}HRESULT CText2Speech::Resume()
{ return m_IpVoice->Resume();}
// rate
HRESULT CText2Speech::SetRate(long lRateAdjust)
{return m_IpVoice->SetRate(lRateAdjust);}
HRESULT CText2Speech::GetRate(long* plRateAdjust)
{ return m_IpVoice->GetRate(plRateAdjust);}
// volume
HRESULT CText2Speech::SetVolume(USHORT usVolume)
{ return m_IpVoice->SetVolume(usVolume);}
HRESULT CText2Speech::GetVolume(USHORT* pusVolume)
{ return m_IpVoice->GetVolume(pusVolume);}
暂停开始播放,可以类似于下面的code
public void pause()
{//暂停
spVoice.Pause();
status = "pause";
}
public void resume()
{ //恢复播放
spVoice.Resume();
status = "play";
}
语言函数的实现比较复杂。由于IspVoice接口提供的语言函数,都只与抽象的语音语言接口ISpObjectToken相关,而我们能看到的却是语音语言的描述,比如,通过控制面板的语音程序所能见到的就是语音语言的描述。因此,笔者设计了直接对语音语言进行操作的语言函数,包括获取系统中已安装的语音语言数目,设置指定的语音语言,获取指定的语音语言描述(包括当前设定的语音语言)。它们的代码如下:
ULONG CText2Speech::GetVoiceCount()
{
HRESULT hr = S_OK;
CComPtr<ISpObjectToken> cpVoiceToken;
CComPtr<IEnumSpObjectTokens> cpEnum;
ULONG ulCount = -1;
//Enumerate the available voices
hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
if(FAILED(hr))
{
m_sError = _T("Error to enumerate voices");
return -1;
}
//Get the number of voices
hr = cpEnum->GetCount(&ulCount);
if(FAILED(hr))
{
m_sError = _T("Error to get voice count");
return -1;
}
return ulCount;
}
HRESULT CText2Speech::GetVoice(WCHAR **ppszDescription, ULONG lIndex)
{
HRESULT hr = S_OK;
CComPtr<ISpObjectToken> cpVoiceToken;
CComPtr<IEnumSpObjectTokens> cpEnum;
ULONG ulCount = 0;
if (lIndex == -1)
{
// current voice
//
hr = m_IpVoice->GetVoice(&cpVoiceToken);
if(FAILED(hr))
{
m_sError = _T("Error to get current voice");
return hr;
}
SpGetDescription(cpVoiceToken, ppszDescription);
if(FAILED(hr))
{
m_sError = _T("Error to get current voice description");
return hr;
}
}
else
{
// else other voices, we should enumerate the voice list first
//Enumerate the available voices
hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
if(FAILED(hr))
{
m_sError = _T("Error to enumerate voices");
return hr;
}
//Get the number of voices
hr = cpEnum->GetCount(&ulCount);
if(FAILED(hr))
{
m_sError = _T("Error to voice count");
return hr;
}
// range control
ASSERT(lIndex >= 0);
ASSERT(lIndex < ulCount);
// Obtain specified voice id
ULONG l = 0;
while (SUCCEEDED(hr))
{
cpVoiceToken.Release();
hr = cpEnum->Next( 1, &cpVoiceToken, NULL );
if(FAILED(hr))
{
m_sError = _T("Error to get voice token");
return hr;
}
if (l == lIndex)
{
hr = SpGetDescription(cpVoiceToken, ppszDescription);
if(FAILED(hr))
{
m_sError = _T("Error to get voice description");
return hr;
}
break;
}
l++;
}
}
return hr;
}
HRESULT CText2Speech::SetVoice(WCHAR **ppszDescription)
{
HRESULT hr = S_OK;
CComPtr<ISpObjectToken> cpVoiceToken;
CComPtr<IEnumSpObjectTokens> cpEnum;
ULONG ulCount = 0;
//Enumerate the available voices
hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
if(FAILED(hr))
{
m_sError = _T("Error to enumerate voices");
return hr;
}
//Get the number of voices
hr = cpEnum->GetCount(&ulCount);
if(FAILED(hr))
{
m_sError = _T("Error to voice count");
return hr;
}
// Obtain specified voice id
while (SUCCEEDED(hr) && ulCount--)
{
cpVoiceToken.Release();
hr = cpEnum->Next( 1, &cpVoiceToken, NULL );
if(FAILED(hr))
{
m_sError = _T("Error to voice token");
return hr;
}
WCHAR *pszDescription1;
hr = SpGetDescription(cpVoiceToken, &pszDescription1);
if(FAILED(hr))
{
m_sError = _T("Error to get voice description");
return hr;
}
if (! wcsicmp(pszDescription1, *ppszDescription))
{
hr = m_IpVoice->SetVoice(cpVoiceToken);
if(FAILED(hr))
{
m_sError = _T("Error to set voice");
return hr;
}
break;
}
}
return hr;
}
通过CSpEvent的GetFrom函数可以获得当前的事件信息,eEventId成员中记录了朗读的音节的代号。数组g_iMapVisemeToImage定义了音节代码和对应嘴形位图序列号的对应关系:
const int g_iMapVisemeToImage[22] =
{
0, // SP_VISEME_0 = 0, // Silence
11, // SP_VISEME_1, // AE, AX, AH
11, // SP_VISEME_2, // AA
11, // SP_VISEME_3, // AO
10, // SP_VISEME_4, // EY, EH, UH
11, // SP_VISEME_5, // ER
9, // SP_VISEME_6, // y, IY, IH, IX
2, // SP_VISEME_7, // w, UW
13, // SP_VISEME_8, // OW
9, // SP_VISEME_9, // AW
12, // SP_VISEME_10, // OY
11, // SP_VISEME_11, // AY
9, // SP_VISEME_12, // h
3, // SP_VISEME_13, // r
6, // SP_VISEME_14, // l
7, // SP_VISEME_15, // s, z
8, // SP_VISEME_16, // SH, CH, JH, ZH
5, // SP_VISEME_17, // TH, DH
4, // SP_VISEME_18, // f, v
7, // SP_VISEME_19, // d, t, n
9, // SP_VISEME_20, // k, g, NG
1 // SP_VISEME_21, // p, b, m
};
为了响应消息WM_TTSEVENT,需要添加相应的消息响应函数:
BEGIN_MESSAGE_MAP(CReciterDlg, CDialog)
//{{AFX_MSG_MAP(CReciterDlg)
ON_WM_SYSCOMMAND()
ON_WM_PAINT()
ON_WM_QUERYDRAGICON()
ON_BN_CLICKED(IDC_BUTTON_SPEAK, OnButtonSpeak)
ON_LBN_SELCHANGE(IDC_LIST1, OnSelchangeList1)
ON_BN_CLICKED(IDC_BUTTON_STOP, OnButtonStop)
ON_BN_CLICKED(IDC_BUTTON_RESUME, OnButtonResume)
//}}AFX_MSG_MAP
ON_MESSAGE(WM_TTSEVENT, OnMouthEvent)
END_MESSAGE_MAP()
LRESULT CReciterDlg::OnMouthEvent(WPARAM wParam, LPARAM lParam)
{
CSpEvent event;
while (event.GetFrom(m_Text2Speech.m_IpVoice) == S_OK) {
switch (event.eEventId) {
case SPEI_VISEME:
m_iMouthBmp = g_iMapVisemeToImage[event.Viseme()];
InvalidateRect(m_cMouthRect, false);
break; }
} return 0;
}
浙公网安备 33010602011771号