📄 phonemeextractor.cpp

📁 hl2 source code. Do not use it illegal.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
	{
		CWordTag *word = outwords.m_Words[ i ];
		if ( IsUseable( word ) )
			continue;

		count++;
	}

	return count;
}

// Keeps same relative spacing, but rebases list
void RepartitionPhonemes( CWordTag *word, unsigned int oldStart, unsigned int oldEnd )
{
	// Repartition phonemes based on old range
	float oldRange = ( float )( oldEnd - oldStart );
	float newRange = ( float )( word->m_uiEndByte - word->m_uiStartByte );

	for ( int i = 0; i < word->m_Phonemes.Size(); i++ )
	{
		CPhonemeTag *tag = word->m_Phonemes[ i ];
		Assert( tag );

		float frac1 = 0.0f, frac2 = 0.0f;
		float delta1, delta2;
		
		delta1 = ( float ) ( tag->m_uiStartByte - oldStart );
		delta2 = ( float ) ( tag->m_uiEndByte - oldStart );
		if ( oldRange > 0.0f )
		{
			frac1 = delta1 / oldRange;
			frac2 = delta2 / oldRange;
		}

		tag->m_uiStartByte = word->m_uiStartByte + ( unsigned int ) ( frac1 * newRange );
		tag->m_uiEndByte = word->m_uiStartByte +  ( unsigned int ) ( frac2 * newRange );
	}
}

void PartitionWords( CSentence& outwords, int start, int end, int sampleStart, int sampleEnd )
{
	int wordCount = end - start + 1;
	Assert( wordCount >= 1 );
	int stepSize  = ( sampleEnd - sampleStart ) / wordCount;

	int currentStart = sampleStart;

	for ( int i = start; i <= end; i++ )
	{
		CWordTag *word = outwords.m_Words[ i ];
		Assert( word );

		unsigned int oldStart = word->m_uiStartByte;
		unsigned int oldEnd = word->m_uiEndByte;

		word->m_uiStartByte = currentStart;
		word->m_uiEndByte = currentStart + stepSize;

		RepartitionPhonemes( word, oldStart, oldEnd );

		currentStart += stepSize;
	}
}

void MergeWords( CWordTag *w1, CWordTag *w2 )
{
	unsigned int start, end;

	start = min( w1->m_uiStartByte, w2->m_uiStartByte );
	end = max( w1->m_uiEndByte, w2->m_uiEndByte );

	unsigned int mid = ( start + end ) / 2;

	unsigned int oldw1start, oldw2start, oldw1end, oldw2end;

	oldw1start = w1->m_uiStartByte;
	oldw2start = w2->m_uiStartByte;
	oldw1end = w1->m_uiEndByte;
	oldw2end = w2->m_uiEndByte;

	w1->m_uiStartByte = start;
	w1->m_uiEndByte = mid;
	w2->m_uiStartByte = mid;
	w2->m_uiEndByte = end;

	RepartitionPhonemes( w1, oldw1start, oldw1end );
	RepartitionPhonemes( w2, oldw2start, oldw2end );
}

void FixupZeroLengthWords( CSentence& outwords )
{
	bool done = false;
	while ( 1 )
	{
		int i;
		for ( i = 0 ; i < outwords.m_Words.Size() - 1; i++ )
		{
			CWordTag *current, *next;

			current = outwords.m_Words[ i ];
			next = outwords.m_Words[ i + 1 ];

			if ( current->m_uiEndByte - current->m_uiStartByte <= 0 )
			{
				MergeWords( current, next );
				break;
			}

			if ( next->m_uiEndByte - next->m_uiStartByte <= 0 )
			{
				MergeWords( current, next );
				break;
			}
		}

		if ( i >= outwords.m_Words.Size() - 1 )
		{
			break;
		}
	}
}

void ComputeMissingByteSpans( int numsamples, CSentence& outwords )
{
	int numwords = outwords.m_Words.Size();
	// Nothing to do
	if ( numwords <= 0 )
		return;

	int interationcount = 1;

	while( 1 )
	{
		Log( "\nCompute %i\n", interationcount++ );
		LogWords( outwords );

		int wordNumber;

		// Done!
		if ( !CountUnuseableWords( outwords ) )
		{
			FixupZeroLengthWords( outwords );
			break;
		}

		if ( !CountUsableWords( outwords ) )
		{
			// Evenly space words across full sample time
			PartitionWords( outwords, 0, numwords - 1, 0, numsamples );
			break;
		}

		wordNumber = FindFirstUsableWord( outwords );
		// Not the first word
		if ( wordNumber > 0 )
		{
			// Repartition all of the unusables and the first one starting at zero over the range
			CWordTag *firstUsable = outwords.m_Words[ wordNumber ];
			Assert( firstUsable );

			if ( firstUsable->m_uiStartByte != 0 )
			{
				PartitionWords( outwords, 0, wordNumber - 1, 0, firstUsable->m_uiStartByte );
			}
			else
			{
				PartitionWords( outwords, 0, wordNumber, 0, firstUsable->m_uiEndByte );
			}

			// Start over
			continue;
		}

		wordNumber = FindLastUsableWord( outwords );
		// Not the last word
		if ( wordNumber >= 0 && wordNumber < numwords - 1 )
		{
			// Repartition all of the unusables and the first one starting at zero over the range
			CWordTag *lastUsable = outwords.m_Words[ wordNumber ];
			Assert( lastUsable );

			if ( lastUsable->m_uiEndByte != (unsigned int)numsamples )
			{
				PartitionWords( outwords, wordNumber + 1, numwords-1, lastUsable->m_uiEndByte, numsamples );
			}
			else
			{
				PartitionWords( outwords, wordNumber, numwords-1, lastUsable->m_uiStartByte, numsamples );
			}

			// Start over
			continue;
		}

		// If we get here it means that the start and end of the list are okay and we just have to
		//  iterate across the list and fix things in the middle
		int startByte = 0;
		int endByte = 0;
		for ( int i = 0; i < numwords ; i++ )
		{
			CWordTag *word = outwords.m_Words[ i ];
			if ( IsUseable( word ) )
			{
				startByte = word->m_uiEndByte;
				continue;
			}

			// Found the start of a chain of 1 or more unusable words
			// Find the startbyte of the next usable word and count how many words we check
			int wordCount = 1;
			for ( int j = i + 1; j < numwords; j++ )
			{
				CWordTag *next = outwords.m_Words[ j ];
				if ( IsUseable( next ) )
				{
					endByte = next->m_uiStartByte;
					break;
				}

				wordCount++;
			}

			// Now partition words across the gap and go to start again
			PartitionWords( outwords, i, i + wordCount - 1, startByte, endByte );
			break;
		}
	}
}

//-----------------------------------------------------------------------------
// Purpose: Given a wavfile and a list of inwords, determines the word/phonene 
//  sample counts for the sentce
// Input  : *wavfile - 
//			*inwords - 
//			*outphonemes{	text.Clear( - 
// Output : SR_RESULT
//-----------------------------------------------------------------------------
static SR_RESULT SAPI_ExtractPhonemes( 
	const char *wavfile,
	int numsamples,
	void (*pfnPrint)( const char *fmt, ... ),
	CSentence& inwords,
	CSentence& outwords )
{
	LogReset();

	USES_CONVERSION;

	CSpDynamicString text;
	text.Clear();

	HKEY hkwipe;
	LONG lResult = RegOpenKeyEx( HKEY_CURRENT_USER, "Software\\Microsoft\\Speech\\RecoProfiles", 0, KEY_ALL_ACCESS, &hkwipe );
	if ( lResult == ERROR_SUCCESS )
	{
		RecursiveRegDelKey( hkwipe );
		RegCloseKey( hkwipe );
	}

	if ( strlen( inwords.GetText() ) <= 0 )
	{
		inwords.SetTextFromWords();
	}

	// Construct a string from the inwords array
	text.Append( T2W( inwords.GetText() ) );

	// Assume failure
	SR_RESULT result = SR_RESULT_ERROR;

	if ( text.Length() > 0 )
	{
		CSentence sentence;

		pfnPrint( "Processing...\r\n" );

		// Give it a try
		result = ExtractPhonemes( wavfile, text, sentence, pfnPrint );

		pfnPrint( "Finished.\r\n" );
		// PrintWordsAndPhonemes( sentence, pfnPrint );

		// Copy results to outputs
		outwords.Reset();

		outwords.SetText( inwords.GetText() );
		
		Log( "Starting\n" );
		LogWords( inwords );

		if ( SR_RESULT_ERROR != result )
		{
			int i;

			Log( "Hypothesized\n" );
			LogWords( sentence );

			for( i = 0 ; i < sentence.m_Words.Size(); i++ )
			{
				CWordTag *tag = sentence.m_Words[ i ];
				if ( tag )
				{
					// Skip '...' tag
					if ( stricmp( tag->m_pszWord, "..." ) )
					{
						CWordTag *newTag = new CWordTag( *tag );

						outwords.m_Words.AddToTail( newTag );
					}
				}
			}

			// Now insert unrecognized/skipped words from original list
			//
			int frompos = 0, topos = 0;

			int iterationcount = 1;

			while( 1 )
			{
				// End of source list
				if ( frompos >= inwords.m_Words.Size() )
					break;

				const CWordTag *fromTag = inwords.m_Words[ frompos ];

				// Reached end of destination list, just copy words over from from source list until
				//  we run out of source words
				if ( topos >= outwords.m_Words.Size() )
				{
					// Just copy words over
					CWordTag *newWord = new CWordTag( *fromTag );

					// Remove phonemes
					while ( newWord->m_Phonemes.Size() > 0 )
					{
						CPhonemeTag *kill = newWord->m_Phonemes[ 0 ];
						newWord->m_Phonemes.Remove( 0 );
						delete kill;
					}

					outwords.m_Words.AddToTail( newWord );
					frompos++;
					topos++;
					continue;
				}

				// Destination word
				const CWordTag *toTag = outwords.m_Words[ topos ];

				// Words match, just skip ahead
				if ( !stricmp( fromTag->m_pszWord, toTag->m_pszWord ) )
				{
					frompos++;
					topos++;
					continue;
				}

				// The only case we handle is that something in the source wasn't in the destination

				// Find the next source word that appears in the destination
				int skipAhead = frompos + 1;
				bool found = false;
				while ( skipAhead < inwords.m_Words.Size() )
				{
					const CWordTag *sourceWord = inwords.m_Words[ skipAhead ];
					if ( !stricmp( sourceWord->m_pszWord, toTag->m_pszWord ) )
					{
						found = true;
						break;
					}

					skipAhead++;
				}

				// Uh oh destination has words that are not in source, just skip to next destination word?
				if ( !found )
				{
					topos++;
				}
				else
				{
					// Copy words from from source list into destination
					// 
					int skipCount = skipAhead - frompos;

					while ( --skipCount>= 0 )
					{
						const CWordTag *sourceWord = inwords.m_Words[ frompos++ ];
						CWordTag *newWord = new CWordTag( *sourceWord );

						// Remove phonemes
						while ( newWord->m_Phonemes.Size() > 0 )
						{
							CPhonemeTag *kill = newWord->m_Phonemes[ 0 ];
							newWord->m_Phonemes.Remove( 0 );
							delete kill;
						}

						outwords.m_Words.InsertBefore( topos, newWord );
						topos++;
					}

					frompos++;
					topos++;
				}
			}

			Log( "\nDone simple check\n" );

			LogWords( outwords );
			LogPhonemes( outwords );

			ComputeMissingByteSpans( numsamples, outwords );

			Log( "\nFinal check\n" );

			LogWords( outwords );
			LogPhonemes( outwords );
		}
	}
	else
	{
		pfnPrint( "Input sentence is empty!\n" );
	}

	// Return results
	return result;
}


//-----------------------------------------------------------------------------
// Purpose: Expose the interface
//-----------------------------------------------------------------------------
class CPhonemeExtractorSAPI : public IPhonemeExtractor
{
public:
	virtual PE_APITYPE	GetAPIType() const
	{
		return SPEECH_API_SAPI;
	}

	// Used for menus, etc
	virtual char const *GetName() const
	{
		return "MS SAPI 5.1";
	}

	SR_RESULT Extract( 
		const char *wavfile,
		int numsamples,
		void (*pfnPrint)( const char *fmt, ... ),
		CSentence& inwords,
		CSentence& outwords )
	{
		return SAPI_ExtractPhonemes( wavfile, numsamples, pfnPrint, inwords, outwords );
	}
};

EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorSAPI, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE );
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -