📄 phonemeextractor.cpp

📁 hl2 source code. Do not use it illegal.
💻 CPP
📖 第 1 页 / 共 3 页
字号:

	if ( text.Length() <= 0 )
	{
		pfnPrint( "Error:  no rule / text specified\n" );
		return result;
	}

	USES_CONVERSION;
	HRESULT hr;
	
	CUtlVector < WORDRULETYPE > wordRules;

	CComPtr<ISpStream> cpInputStream;
	CComPtr<ISpRecognizer> cpRecognizer;
	CComPtr<ISpRecoContext> cpRecoContext;
	CComPtr<ISpRecoGrammar> cpRecoGrammar;
	CComPtr<ISpPhoneConverter>  cpPhoneConv;
    
	// Create basic SAPI stream object
	// NOTE: The helper SpBindToFile can be used to perform the following operations
	hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Stream object not installed?\n" );
		return result;
	}

	CSpStreamFormat sInputFormat;
	
	// setup stream object with wav file MY_WAVE_AUDIO_FILENAME
	//   for read-only access, since it will only be access by the SR engine
	hr = cpInputStream->BindToFile(
		T2W(wavname),
		SPFM_OPEN_READONLY,
		NULL,
		sInputFormat.WaveFormatExPtr(),
		SPFEI_ALL_EVENTS );

	if ( FAILED( hr ) )
	{
		pfnPrint( "Error: couldn't open wav file %s\n", wavname );
		return result;
	}
	
	// Create in-process speech recognition engine
	hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 In process recognizer object not installed?\n" );
		return result;
	}

	// Create recognition context to receive events
	hr = cpRecognizer->CreateRecoContext(&cpRecoContext);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create recognizer context\n" );
		return result;
	}
	
	// Create a grammar
	hr = cpRecoContext->CreateGrammar( EP_GRAM_ID, &cpRecoGrammar );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create recognizer grammar\n" );
		return result;
	}

	// Create a phoneme converter ( so we can convert to IPA codes )
	hr = SpCreatePhoneConverter( SpGetUserDefaultUILanguage(), NULL, NULL, &cpPhoneConv );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create phoneme converter for language %i\n", SpGetUserDefaultUILanguage() );
		return result;
	}

	SPSTATEHANDLE hStateRoot;
	// create/re-create Root level rule of grammar
	hr = cpRecoGrammar->GetRule(L"Root", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateRoot);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create root rule\n" );
		return result;
	}

	// Inactivate it so we can alter it
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to deactivate grammar rules\n" );
		return result;
	}

	// Create the rule set from the words in text
	{
		CSpDynamicString currentWord;
		WCHAR *pos = ( WCHAR * )text;
		WCHAR str[ 2 ];
		str[1]= 0;

		while ( *pos )
		{
			if ( *pos == L' ' /*|| *pos == L'.' || *pos == L'-'*/ )
			{
				// Add word to rule set
				if ( currentWord.Length() > 0 )
				{
					AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord );
					currentWord.Clear();
				}
				pos++;
				continue;
			}

			// Skip anything that's inside a [ xxx ] pair.
			if ( *pos == L'[' )
			{
				while ( *pos && *pos != L']' )
				{
					pos++;
				}

				if ( *pos )
				{
					pos++;
				}
				continue;
			}

			str[ 0 ] = *pos;

			currentWord.Append( str );
			pos++;
		}

		if ( currentWord.Length() > 0 )
		{
			AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord );
		}

		if ( wordRules.Size() <= 0 )
		{
			pfnPrint( "Error:  Text %s contained no usable words\n", text );
			return result;
		}

		// Build all word to word transitions in the grammar
		if ( !BuildRules( cpRecoGrammar, &hStateRoot, &wordRules ) )
		{
			pfnPrint( "Error:  Rule set for %s could not be generated\n", text );
			return result;
		}
	}

	// check for recognitions and end of stream event
	const ULONGLONG ullInterest = 
		SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM) | SPFEI(SPEI_FALSE_RECOGNITION) | 
		SPFEI(SPEI_PHRASE_START ) | SPFEI(SPEI_HYPOTHESIS ) | SPFEI(SPEI_INTERFERENCE) ;
	hr = cpRecoContext->SetInterest( ullInterest, ullInterest );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to set interest level\n" );
		return result;
	}	
	// use Win32 events for command-line style application
	hr = cpRecoContext->SetNotifyWin32Event();
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to set win32 notify event\n" );
		return result;
	}
	// connect wav input to recognizer
	// SAPI will negotiate mismatched engine/input audio formats using system audio codecs, so second parameter is not important - use default of TRUE
	hr = cpRecognizer->SetInput(cpInputStream, TRUE);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to associate input stream\n" );
		return result;
	}	

	// Activate the CFG ( rather than using dictation )
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_ACTIVE );
	if ( FAILED( hr ) )
	{
		switch ( hr )
		{
		case E_INVALIDARG:
			pfnPrint( "pszName is invalid or bad. Alternatively, pReserved is non-NULL\n" );
			break;
		case SP_STREAM_UNINITIALIZED:
			pfnPrint( "ISpRecognizer::SetInput has not been called with the InProc recognizer\n" );
			break;
		case SPERR_UNINITIALIZED:
			pfnPrint( "The object has not been properly initialized.\n");
			break;
		case SPERR_UNSUPPORTED_FORMAT:
			pfnPrint( "Audio format is bad or is not recognized. Alternatively, the device driver may be busy by another application and cannot be accessed.\n" );
			break;
		case SPERR_NOT_TOPLEVEL_RULE:
			pfnPrint( "The rule pszName exists, but is not a top-level rule.\n" );
			break;
		default:
			pfnPrint( "Unknown error\n" );
			break;
		}
		pfnPrint( "Error:  SAPI 5.1 Unable to activate rule set\n" );
		return result;
	}

	// while events occur, continue processing
	// timeout should be greater than the audio stream length, or a reasonable amount of time expected to pass before no more recognitions are expected in an audio stream
	BOOL fEndStreamReached = FALSE;
	while (!fEndStreamReached && S_OK == cpRecoContext->WaitForNotifyEvent( SR_WAVTIMEOUT ))
	{
		CSpEvent spEvent;
		// pull all queued events from the reco context's event queue
		
		while (!fEndStreamReached && S_OK == spEvent.GetFrom(cpRecoContext))
		{
			// Check event type
			switch (spEvent.eEventId)
			{
			case SPEI_INTERFERENCE:
				{
					SPINTERFERENCE interference = spEvent.Interference();

					switch ( interference )
					{
					case SPINTERFERENCE_NONE:
						pfnPrint( "[ I None ]\r\n" );
						break;
					case SPINTERFERENCE_NOISE:
						pfnPrint( "[ I Noise ]\r\n" );
						break;
					case SPINTERFERENCE_NOSIGNAL:
						pfnPrint( "[ I No Signal ]\r\n" );
						break;
					case SPINTERFERENCE_TOOLOUD:
						pfnPrint( "[ I Too Loud ]\r\n" );
						break;
					case SPINTERFERENCE_TOOQUIET:
						pfnPrint( "[ I Too Quiet ]\r\n" );
						break;
					case SPINTERFERENCE_TOOFAST:
						pfnPrint( "[ I Too Fast ]\r\n" );
						break;
					case SPINTERFERENCE_TOOSLOW:
						pfnPrint( "[ I Too Slow ]\r\n" );
						break;
					default:
						break;
					}
				}
				break;
			case SPEI_PHRASE_START:
				pfnPrint( "Phrase Start\r\n" );
				sentence.MarkNewPhraseBase();
				break;

			case SPEI_HYPOTHESIS:
			case SPEI_RECOGNITION:
			case SPEI_FALSE_RECOGNITION:
				{
                    CComPtr<ISpRecoResult> cpResult;
                    cpResult = spEvent.RecoResult();

                    CSpDynamicString dstrText;
                    if (spEvent.eEventId == SPEI_FALSE_RECOGNITION)
                    {
                        dstrText = L"(Unrecognized)";

						result = SR_RESULT_FAILED;

						// It's possible that the failed recog might have more words, so see if that's the case
						EnumeratePhonemes( cpPhoneConv, cpResult, sentence );
					}
                    else
                    {
						// Hypothesis or recognition success
                        cpResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);

						EnumeratePhonemes( cpPhoneConv, cpResult, sentence );

						if ( spEvent.eEventId == SPEI_RECOGNITION )
						{
							result = SR_RESULT_SUCCESS;
						}

						pfnPrint( va( "%s%s\r\n", spEvent.eEventId == SPEI_HYPOTHESIS ? "[ Hypothesis ] " : "", dstrText.CopyToChar() ) );
					}
                    
                    cpResult.Release();
				}
				break;
				// end of the wav file was reached by the speech recognition engine
            case SPEI_END_SR_STREAM:
				fEndStreamReached = TRUE;
				break;
			}
			
			// clear any event data/object references
			spEvent.Clear();
		}// END event pulling loop - break on empty event queue OR end stream
	}// END event polling loop - break on event timeout OR end stream
	
	// Deactivate rule
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to deactivate rule set\n" );
		return result;
	}

	// close the input stream, since we're done with it
	// NOTE: smart pointer will call SpStream's destructor, and consequently ::Close, but code may want to check for errors on ::Close operation
	hr = cpInputStream->Close();
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to close input stream\n" );
		return result;
	}

	return result;
}

//-----------------------------------------------------------------------------
// Purpose: HACK HACK:  We have to delete the RecoContext key or sapi starts to train
//  itself on each iteration which was causing some problems.
// Input  : hKey - 
//-----------------------------------------------------------------------------
void RecursiveRegDelKey(HKEY hKey)
{
	char keyname[256]={0};
	DWORD namesize=256;

	//base case: no subkeys when RegEnumKeyEx returns error on index 0
	LONG lResult=RegEnumKeyEx(hKey,0,keyname,&namesize,NULL,NULL,NULL,NULL);
	if (lResult!=ERROR_SUCCESS)
	{
		return;
	}

	DWORD i=1;
	do
	{
		HKEY subkey;
		LONG lResult2;
		LONG lDelResult;
		lResult2=RegOpenKeyEx(hKey,keyname,0,KEY_ALL_ACCESS,&subkey);
		
		if (lResult2==ERROR_SUCCESS)
		{
			RecursiveRegDelKey(subkey);

			RegCloseKey(subkey);
			lDelResult=RegDeleteKey(hKey,keyname);
			namesize=256;
			//use 0 in the next function call because when you delete one, the rest shift down!
			lResult=RegEnumKeyEx(hKey,0,keyname,&namesize,NULL,NULL,NULL,NULL);
		}

		else 
		{
			break;
		}

	} while (lResult!=ERROR_NO_MORE_ITEMS);
}

bool IsUseable( CWordTag *word )
{
	if ( word->m_uiStartByte || word->m_uiEndByte )
		return true;

	return false;
}

int FindLastUsableWord( CSentence& outwords )
{
	int numwords = outwords.m_Words.Size();
	if ( numwords < 1 )
	{
		Assert( 0 );
		return -1;
	}

	for ( int i = numwords-1; i >= 0; i-- )
	{
		CWordTag *check = outwords.m_Words[ i ];
		if ( IsUseable( check ) )
		{
			return i;
		}
	}

	return -1;
}


int FindFirstUsableWord( CSentence& outwords )
{
	int numwords = outwords.m_Words.Size();
	if ( numwords < 1 )
	{
		Assert( 0 );
		return -1;
	}

	for ( int i = 0; i < numwords; i++ )
	{
		CWordTag *check = outwords.m_Words[ i ];
		if ( IsUseable( check ) )
		{
			return i;
		}
	}

	return -1;
}

//-----------------------------------------------------------------------------
// Purpose: Counts words which have either a valid start or end byte
// Input  : *outwords - 
// Output : int
//-----------------------------------------------------------------------------
int CountUsableWords( CSentence& outwords )
{
	int count = 0;
	int numwords = outwords.m_Words.Size();
	// Nothing to do
	if ( numwords <= 0 )
		return count;

	for ( int i = 0; i < numwords; i++ )
	{
		CWordTag *word = outwords.m_Words[ i ];
		if ( !IsUseable( word ) )
			continue;

		count++;
	}

	return count;
}


//-----------------------------------------------------------------------------
// Purpose: Counts words which have either a valid start or end byte
// Input  : *outwords - 
// Output : int
//-----------------------------------------------------------------------------
int CountUnuseableWords( CSentence& outwords )
{
	int count = 0;
	int numwords = outwords.m_Words.Size();
	// Nothing to do
	if ( numwords <= 0 )
		return count;

	for ( int i = 0; i < numwords; i++ )
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -