📄 phonemeextractor.cpp

📁 hl2 source code. Do not use it illegal.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
// extracephonemes.cpp : Defines the entry point for the console application.
//
#define PROTECTED_THINGS_DISABLE

#include <stdio.h>
#include <windows.h>
#include <tchar.h>
#include "sphelper.h"
#include "spddkhlp.h"
// ATL Header Files
#include <atlbase.h>
// Face poser and util includes
#include "utlvector.h"
#include "PhonemeExtractor.h"
#include "PhonemeConverter.h"
#include "sentence.h"
#include "tier0/dbg.h"

// Extract phoneme grammar id
#define EP_GRAM_ID			101
// First rule of dynamic sentence rule set
#define DYN_SENTENCERULE	102
// # of milliseconds to allow for processing before timeout
#define SR_WAVTIMEOUT		4000
// Weight tag for rule to rule word/rule transitions
#define CONFIDENCE_WEIGHT	0.0f

//#define LOGGING		1
#define LOGFILE		"c:\\fp.log"

void LogReset( void )
{
#if LOGGING
	FILE *fp = fopen( LOGFILE, "w" );
	if ( fp )
		fclose( fp );
#endif
}

char *va( const char *fmt, ... );

//-----------------------------------------------------------------------------
// Purpose: 
// Input  : *words - 
//-----------------------------------------------------------------------------
void LogWords( CSentence& sentence )
{
	Log( "Wordcount == %i\n", sentence.m_Words.Size() );

	for ( int i = 0; i < sentence.m_Words.Size(); i++ )
	{
		const CWordTag *w = sentence.m_Words[ i ];
		Log( "Word %s %u to %u\n", w->m_pszWord, w->m_uiStartByte, w->m_uiEndByte );
	}
}

//-----------------------------------------------------------------------------
// Purpose: 
// Input  : *phonemes - 
//-----------------------------------------------------------------------------
void LogPhonemes( CSentence& sentence )
{
	return;

	Log( "Phonemecount == %i\n", sentence.CountPhonemes() );

	for ( int i = 0; i < sentence.m_Words.Size(); i++ )
	{
		const CWordTag *w = sentence.m_Words[ i ];

		for ( int j = 0; j < w->m_Phonemes.Size(); j++ )
		{
			const CPhonemeTag *p = w->m_Phonemes[ j ];
			Log( "Phoneme %s %u to %u\n", p->m_szPhoneme, p->m_uiStartByte, p->m_uiEndByte );
		}
	}
}

#define NANO_CONVERT 10000000.0f;

//-----------------------------------------------------------------------------
// Purpose: Walk list of words and phonemes and create phoneme tags in CSentence object
//  FIXME:  Right now, phonemes are assumed to evenly space out across a word.
// Input  : *converter - 
//			result - 
//			sentence - 
//-----------------------------------------------------------------------------
void EnumeratePhonemes( ISpPhoneConverter *converter, const ISpRecoResult* result, CSentence& sentence )
{
	USES_CONVERSION;

	// Grab access to element container
	ISpPhrase *phrase = ( ISpPhrase * )result;
	if ( !phrase )
		return;

    SPPHRASE *pElements;
	if ( !SUCCEEDED( phrase->GetPhrase( &pElements ) ) )
		return;

	// Only use it if it's better/same size as what we already had on-hand
	if ( pElements->Rule.ulCountOfElements > 0 )
		//(unsigned int)( sentence.m_Words.Size() - sentence.GetWordBase() ) )
	{
		sentence.ResetToBase();

		// Walk list of words
		for ( ULONG i = 0; i < pElements->Rule.ulCountOfElements; i++ )
		{
			unsigned int wordstart, wordend;

			// Get start/end sample index
			wordstart	= pElements->pElements[i].ulAudioStreamOffset + (unsigned int)pElements->ullAudioStreamPosition;
			wordend		= wordstart + pElements->pElements[i].ulAudioSizeBytes;

			// Create word tag
			CWordTag *w = new CWordTag( W2T( pElements->pElements[i].pszDisplayText ) );
			Assert( w );
			w->m_uiStartByte = wordstart;
			w->m_uiEndByte   = wordend;

			sentence.AddWordTag( w );

			// Count # of phonemes in this word
			SPPHONEID pstr[ 2 ];
			pstr[ 1 ] = 0;
			WCHAR wszPhoneme[ SP_MAX_PRON_LENGTH ];

			const SPPHONEID *current;
			SPPHONEID phoneme;
			current = pElements->pElements[i].pszPronunciation;
			float total_weight = 0.0f;
			while ( 1 )
			{
				phoneme = *current++;
				if ( !phoneme )
					break;

				pstr[ 0 ] = phoneme;
				wszPhoneme[ 0 ] = L'\0';

				converter->IdToPhone( pstr, wszPhoneme );

				total_weight += WeightForPhoneme( W2A( wszPhoneme ) );
			}

			current = pElements->pElements[i].pszPronunciation;

			// Decide # of bytes/phoneme weight
			float psize = 0;
			if ( total_weight )
			{
				psize = ( wordend - wordstart ) / total_weight;
			}

			int number = 0;

			// Re-walk the phoneme list and create true phoneme tags
			float startWeight = 0.0f;
			while ( 1 )
			{
				phoneme = *current++;
				if ( !phoneme )
					break;

				pstr[ 0 ] = phoneme;
				wszPhoneme[ 0 ] = L'\0';

				converter->IdToPhone( pstr, wszPhoneme );
 
				CPhonemeTag *p = new CPhonemeTag( W2A( wszPhoneme ) );
				Assert( p );
				
				float weight = WeightForPhoneme( W2A( wszPhoneme ) );

				p->m_uiStartByte = wordstart + (int)( startWeight * psize );
				p->m_uiEndByte	 = p->m_uiStartByte + (int)( psize * weight );

				startWeight += weight;

				// Convert to IPA phoneme code
				p->m_nPhonemeCode = TextToPhoneme( p->m_szPhoneme );

				sentence.AddPhonemeTag( w, p );

				number++;
			}
		}	
	}

	// Free memory
    ::CoTaskMemFree(pElements);
}

//-----------------------------------------------------------------------------
// Purpose: Create rules for each word in the reference sentence
//-----------------------------------------------------------------------------
typedef struct
{
	int					ruleId;
	SPSTATEHANDLE		hRule;
	CSpDynamicString	word;
	char				plaintext[ 256 ];
} WORDRULETYPE;

//-----------------------------------------------------------------------------
// Purpose: Creates start for word of sentence
// Input  : cpRecoGrammar - 
//			*root - 
//			*rules - 
//			word - 
//-----------------------------------------------------------------------------
void AddWordRule( ISpRecoGrammar* cpRecoGrammar, SPSTATEHANDLE *root, CUtlVector< WORDRULETYPE > *rules, CSpDynamicString& word )
{
	USES_CONVERSION;
	HRESULT hr;
	WORDRULETYPE *newrule;

	int idx = (*rules).AddToTail();

	newrule = &(*rules)[ idx ];

	newrule->ruleId = DYN_SENTENCERULE + idx + 1;
	newrule->word = word;

	strcpy( newrule->plaintext, W2T( word ) );

	// Create empty rule
	hr = cpRecoGrammar->CreateNewState( *root, &newrule->hRule );
	Assert( !FAILED( hr ) );
}

//-----------------------------------------------------------------------------
// Purpose: 
// Input  : cpRecoGrammar - 
//			*from - 
//			*to - 
//-----------------------------------------------------------------------------
void AddWordTransitionRule( ISpRecoGrammar* cpRecoGrammar, WORDRULETYPE *from, WORDRULETYPE *to )
{
	USES_CONVERSION;

	HRESULT hr;
	Assert( from );

	if ( from && !to )
	{
		OutputDebugString( va( "Transition from %s to TERM\r\n", from->plaintext ) );
	}
	else
	{
		OutputDebugString( va( "Transition from %s to %s\r\n", from->plaintext, to->plaintext ) );
	}

	hr = cpRecoGrammar->AddWordTransition( from->hRule, to ? to->hRule : NULL, (WCHAR *)from->word, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL );
	Assert( !FAILED( hr ) );
}

//-----------------------------------------------------------------------------
// Purpose: 
// Input  : cpRecoGrammar - 
//			*from - 
//			*to - 
//-----------------------------------------------------------------------------
void AddOptionalTransitionRule( ISpRecoGrammar* cpRecoGrammar, WORDRULETYPE *from, WORDRULETYPE *to )
{
	USES_CONVERSION;

	HRESULT hr;
	Assert( from );

	if ( from && !to )
	{
		OutputDebugString( va( "Opt transition from %s to TERM\r\n", from->plaintext ) );
	}
	else
	{
		OutputDebugString( va( "Opt transition from %s to %s\r\n", from->plaintext, to->plaintext ) );
	}

	hr = cpRecoGrammar->AddWordTransition( from->hRule, to ? to->hRule : NULL, NULL, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL );
	Assert( !FAILED( hr ) );
}

#define MAX_WORD_SKIP 1
//-----------------------------------------------------------------------------
// Purpose: Links together all word rule states into a sentence rule CFG
// Input  : singleword - 
//			cpRecoGrammar - 
//			*root - 
//			*rules - 
//-----------------------------------------------------------------------------
bool BuildRules( ISpRecoGrammar* cpRecoGrammar, SPSTATEHANDLE *root, CUtlVector< WORDRULETYPE > *rules )
{
	HRESULT hr;
	WORDRULETYPE *rule, *next;

	int numrules = (*rules).Size();

	rule = &(*rules)[ 0 ];

	// Add transition
	hr = cpRecoGrammar->AddWordTransition( *root, rule->hRule, NULL, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL );
	Assert( !FAILED( hr ) );

	for ( int i = 0; i < numrules; i++ )
	{
		rule = &(*rules)[ i ];
		if ( i < numrules - 1 )
		{
			next = &(*rules)[ i + 1 ];
		}
		else
		{
			next = NULL;
		}

		AddWordTransitionRule( cpRecoGrammar, rule, next );
	}

	if ( numrules > 1 )
	{
		for ( int skip = 1; skip <= min( MAX_WORD_SKIP, numrules ); skip++ )
		{
			OutputDebugString( va( "Opt transition from Root to %s\r\n", (*rules)[ 0 ].plaintext ) );

			hr = cpRecoGrammar->AddWordTransition( *root, (*rules)[ 0 ].hRule, NULL, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL );

			// Now build rules where you can skip 1 to N intervening words
			for ( int i = 1; i < numrules; i++ )
			{
				// Start at the beginning?
				rule = &(*rules)[ i ];	
				if ( i < numrules - skip )
				{
					next = &(*rules)[ i + skip ];
				}
				else
				{
					continue;
				}

				// Add transition
				AddOptionalTransitionRule( cpRecoGrammar, rule, next );
			}

			// Go from final rule to end point
			AddOptionalTransitionRule( cpRecoGrammar, rule, NULL );
		}
	}

	// Store it
	hr = cpRecoGrammar->Commit(NULL);
	if ( FAILED( hr ) )
		return false;

	return true;
}

//-----------------------------------------------------------------------------
// Purpose: Debugging, prints alternate list if one is created
// Input  : cpResult - 
//			(*pfnPrint - 
//-----------------------------------------------------------------------------
void PrintAlternates( ISpRecoResult* cpResult, void (*pfnPrint)( const char *fmt, ... ) )
{
	ISpPhraseAlt *rgPhraseAlt[ 32 ];
	memset( rgPhraseAlt, 0, sizeof( rgPhraseAlt ) );

	ULONG ulCount;
	
	ISpPhrase *phrase = ( ISpPhrase * )cpResult;
	if ( phrase )
	{
		SPPHRASE *pElements;
		if ( SUCCEEDED( phrase->GetPhrase( &pElements ) ) )
		{
			if ( pElements->Rule.ulCountOfElements > 0 )
			{
				HRESULT hr = cpResult->GetAlternates(
					pElements->Rule.ulFirstElement,
					pElements->Rule.ulCountOfElements, 
					32,
					rgPhraseAlt,
					&ulCount);
				
				Assert( !FAILED( hr ) );
				
				for ( ULONG r = 0 ; r < ulCount; r++ )
				{
					CSpDynamicString dstrText;
					hr = rgPhraseAlt[ r ]->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);
					Assert( !FAILED( hr ) );

					pfnPrint( "[ ALT ]" );
					pfnPrint( dstrText.CopyToChar() );
					pfnPrint( "\r\n" );
				}
			}
		}
		
	}

	for ( int i = 0; i < 32; i++ )
	{
		if ( rgPhraseAlt[ i ] )
		{
			rgPhraseAlt[ i ]->Release();
			rgPhraseAlt[ i ] = NULL;
		}
	}
}

void PrintWordsAndPhonemes( CSentence& sentence, void (*pfnPrint)( const char *fmt, ... ) )
{
	char sz[ 256 ];
	int i;

	pfnPrint( "WORDS\r\n\r\n" );

	for ( i = 0 ; i < sentence.m_Words.Size(); i++ )
	{
		CWordTag *word = sentence.m_Words[ i ];
		if ( !word )
			continue;

		sprintf( sz, "<%u - %u> %s\r\n", 
			word->m_uiStartByte, word->m_uiEndByte, word->m_pszWord );

		pfnPrint( sz );

		for ( int j = 0 ; j < word->m_Phonemes.Size(); j++ )
		{
			CPhonemeTag *phoneme = word->m_Phonemes[ j ];
			if ( !phoneme )
				continue;

			sprintf( sz, "  <%u - %u> %s\r\n", 
				phoneme->m_uiStartByte, phoneme->m_uiEndByte, phoneme->m_szPhoneme );

			pfnPrint( sz );
		}
	}

	pfnPrint( "\r\n" );
}

//-----------------------------------------------------------------------------
// Purpose: Given a wave file and a string of words "text", creates a CFG from the
//  sentence and stores the resulting words/phonemes in CSentence
// Input  : *wavname - 
//			text - 
//			sentence - 
//			(*pfnPrint - 
// Output : SR_RESULT
//-----------------------------------------------------------------------------
SR_RESULT ExtractPhonemes( const char *wavname, CSpDynamicString& text, CSentence& sentence, void (*pfnPrint)( const char *fmt, ...) )
{
	// Assume failure
	SR_RESULT result = SR_RESULT_ERROR;
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -