📄 mnistdoc.cpp

📁 基于神经网络的手写体识别程序
💻 CPP
📖 第 1 页 / 共 5 页
字号:
	
	if ( bFromRandomizedPatternSequence == FALSE )
	{
		iRet = m_iNextTrainingPattern;
	}
	else
	{
		iRet = m_iRandomizedTrainingPatternSequence[ m_iNextTrainingPattern ];
	}
	
	return iRet;
}



// UNIFORM_ZERO_THRU_ONE gives a uniformly-distributed number between zero (inclusive) and one (exclusive)

#define UNIFORM_ZERO_THRU_ONE ( (double)(rand())/(RAND_MAX + 1 ) ) 


void CMNistDoc::RandomizeTrainingPatternSequence()
{
	// randomizes the order of m_iRandomizedTrainingPatternSequence, which is a UINT array
	// holding the numbers 0..59999 in random order
	
	CAutoCS tlo( m_csTrainingPatterns );
	
	UINT ii, jj, iiMax, iiTemp;
	
	iiMax = ::GetPreferences().m_nItemsTrainingImages;
	
	ASSERT( iiMax == 60000 );  // requirement of sloppy and unimaginative code
	
	// initialize array in sequential order
	
	for ( ii=0; ii<iiMax; ++ii )
	{
		m_iRandomizedTrainingPatternSequence[ ii ] = ii;  
	}
	
	
	// now at each position, swap with a random position
	
	for ( ii=0; ii<iiMax; ++ii )
	{
		jj = (UINT)( UNIFORM_ZERO_THRU_ONE * iiMax );
		
		ASSERT( jj < iiMax );
		
		iiTemp = m_iRandomizedTrainingPatternSequence[ ii ];
		m_iRandomizedTrainingPatternSequence[ ii ] = m_iRandomizedTrainingPatternSequence[ jj ];
		m_iRandomizedTrainingPatternSequence[ jj ] = iiTemp;
	}
	
}



UINT CMNistDoc::GetNextTrainingPattern(unsigned char *pArray /* =NULL */, int *pLabel /* =NULL */, 
									   BOOL bFlipGrayscale /* =TRUE */, BOOL bFromRandomizedPatternSequence /* =TRUE */, UINT* iSequenceNum /* =NULL */)
{
	// returns the number of the pattern corresponding to the pattern that will be stored in pArray
	// if BOOL bFromRandomizedPatternSequence is TRUE (which is the default) then the pattern
	// stored will be a pattern from the randomized sequence; otherwise the pattern will be a straight
	// sequential run through all the training patterns, from start to finish.  The sequence number,
	// which runs from 0..59999 monotonically, is returned in iSequenceNum (if it's not NULL)
	
	CAutoCS tlo( m_csTrainingPatterns );
	
	UINT iPatternNum;
	
	if ( bFromRandomizedPatternSequence == FALSE )
	{
		iPatternNum = m_iNextTrainingPattern;
	}
	else
	{
		iPatternNum = m_iRandomizedTrainingPatternSequence[ m_iNextTrainingPattern ];
	}

	ASSERT( iPatternNum < ::GetPreferences().m_nItemsTrainingImages );
	
	GetTrainingPatternArrayValues( iPatternNum, pArray, pLabel, bFlipGrayscale );
	
	if ( iSequenceNum != NULL )
	{
		*iSequenceNum = m_iNextTrainingPattern;
	}
	
	m_iNextTrainingPattern++;
	
	if ( m_iNextTrainingPattern >= ::GetPreferences().m_nItemsTrainingImages )
	{
		m_iNextTrainingPattern = 0;
	}
	
	return iPatternNum;
}




UINT CMNistDoc::GetRandomTrainingPattern(unsigned char *pArray /* =NULL */, int *pLabel /* =NULL */, BOOL bFlipGrayscale /* =TRUE */ )
{
	// returns the number of the pattern corresponding to the pattern stored in pArray
	
	CAutoCS tlo( m_csTrainingPatterns );
	
	UINT patternNum = (UINT)( UNIFORM_ZERO_THRU_ONE * (::GetPreferences().m_nItemsTrainingImages - 1) );
	
	GetTrainingPatternArrayValues( patternNum, pArray, pLabel, bFlipGrayscale );
	
	return patternNum;
}





void CMNistDoc::GetTrainingPatternArrayValues(int iNumImage /* =0 */, unsigned char *pArray /* =NULL */, int *pLabel /* =NULL */,
											  BOOL bFlipGrayscale /* =TRUE */ )
{
	// fills an unsigned char array with gray values, corresponding to iNumImage, and also
	// returns the label for the image
	
	CAutoCS tlo( m_csTrainingPatterns );
	
	int cCount = g_cImageSize*g_cImageSize;
	int fPos;
	
	if ( m_bFilesOpen != FALSE )
	{
		if ( pArray != NULL )
		{
			fPos = 16 + iNumImage*cCount;  // 16 compensates for file header info
			m_fileTrainingImages.Seek( fPos, CFile::begin );
			m_fileTrainingImages.Read( pArray, cCount );
			
			if ( bFlipGrayscale != FALSE )
			{
				for ( int ii=0; ii<cCount; ++ii )
				{
					pArray[ ii ] = 255 - pArray[ ii ];
				}
			}
		}
		
		if ( pLabel != NULL )
		{
			fPos = 8 + iNumImage;
			char r;
			m_fileTrainingLabels.Seek( fPos, CFile::begin );
			m_fileTrainingLabels.Read( &r, 1 );  // single byte
			
			*pLabel = r;
		}
	}
	else  // no files are open: return a simple gray wedge
	{
		if ( pArray != NULL )
		{
			for ( int ii=0; ii<cCount; ++ii )
			{
				pArray[ ii ] = ii*255/cCount;
			}
		}
		
		if ( pLabel != NULL )
		{
			*pLabel = INT_MAX;
		}
	}
}



UINT CMNistDoc::GetNextTestingPatternNumber()
{
	return m_iNextTestingPattern;
}



UINT CMNistDoc::GetNextTestingPattern(unsigned char *pArray /* =NULL */, int *pLabel /* =NULL */, BOOL bFlipGrayscale /* =TRUE */ )
{
	// returns the number of the pattern corresponding to the pattern stored in pArray
	
	CAutoCS tlo( m_csTestingPatterns );
	
	
	GetTestingPatternArrayValues( m_iNextTestingPattern, pArray, pLabel, bFlipGrayscale );
	
	UINT iRet = m_iNextTestingPattern;
	m_iNextTestingPattern++;
	
	if ( m_iNextTestingPattern >= ::GetPreferences().m_nItemsTestingImages )
	{
		m_iNextTestingPattern = 0;
	}
	
	return iRet ;
}


void CMNistDoc::GetTestingPatternArrayValues(int iNumImage /* =0 */, unsigned char *pArray /* =NULL */, int *pLabel /* =NULL */,
											 BOOL bFlipGrayscale /* =TRUE */ )
{
	// fills an unsigned char array with gray values, corresponding to iNumImage, and also
	// returns the label for the image
	
	CAutoCS tlo( m_csTestingPatterns );
	
	int cCount = g_cImageSize*g_cImageSize;
	int fPos;
	
	if ( m_bFilesOpen != FALSE )
	{
		if ( pArray != NULL )
		{
			fPos = 16 + iNumImage*cCount;  // 16 compensates for file header info
			m_fileTestingImages.Seek( fPos, CFile::begin );
			m_fileTestingImages.Read( pArray, cCount );
			
			if ( bFlipGrayscale != FALSE )
			{
				for ( int ii=0; ii<cCount; ++ii )
				{
					pArray[ ii ] = 255 - pArray[ ii ];
				}
			}
		}
		
		if ( pLabel != NULL )
		{
			fPos = 8 + iNumImage;
			char r;
			m_fileTestingLabels.Seek( fPos, CFile::begin );
			m_fileTestingLabels.Read( &r, 1 );  // single byte
			
			*pLabel = r;
		}
	}
	else  // no files are open: return a simple gray wedge
	{
		if ( pArray != NULL )
		{
			for ( int ii=0; ii<cCount; ++ii )
			{
				pArray[ ii ] = ii*255/cCount;
			}
		}
		
		if ( pLabel != NULL )
		{
			*pLabel = INT_MAX;
		}
	}
}



void CMNistDoc::GenerateDistortionMap( double severityFactor /* =1.0 */ )
{
	// generates distortion maps in each of the horizontal and vertical directions
	// Three distortions are applied: a scaling, a rotation, and an elastic distortion
	// Since these are all linear tranformations, we can simply add them together, after calculation
	// one at a time
	
	// The input parameter, severityFactor, let's us control the severity of the distortions relative
	// to the default values.  For example, if we only want half as harsh a distortion, set
	// severityFactor == 0.5
	
	// First, elastic distortion, per Patrice Simard, "Best Practices For Convolutional Neural Networks..."
	// at page 2.
	// Three-step process: seed array with uniform randoms, filter with a gaussian kernel, normalize (scale)
	
	int row, col;
	double* uniformH = new double[ m_cCount ];
	double* uniformV = new double[ m_cCount ];
	
	
	for ( col=0; col<m_cCols; ++col )
	{
		for ( row=0; row<m_cRows; ++row )
		{
			At( uniformH, row, col ) = UNIFORM_PLUS_MINUS_ONE;
			At( uniformV, row, col ) = UNIFORM_PLUS_MINUS_ONE;
		}
	}
	
	// filter with gaussian
	
	double fConvolvedH, fConvolvedV;
	double fSampleH, fSampleV;
	double elasticScale = severityFactor * ::GetPreferences().m_dElasticScaling;
	int xxx, yyy, xxxDisp, yyyDisp;
	int iiMid = GAUSSIAN_FIELD_SIZE/2;  // GAUSSIAN_FIELD_SIZE is strictly odd
	
	for ( col=0; col<m_cCols; ++col )
	{
		for ( row=0; row<m_cRows; ++row )
		{
			fConvolvedH = 0.0;
			fConvolvedV = 0.0;
			
			for ( xxx=0; xxx<GAUSSIAN_FIELD_SIZE; ++xxx )
			{
				for ( yyy=0; yyy<GAUSSIAN_FIELD_SIZE; ++yyy )
				{
					xxxDisp = col - iiMid + xxx;
					yyyDisp = row - iiMid + yyy;
					
					if ( xxxDisp<0 || xxxDisp>=m_cCols || yyyDisp<0 || yyyDisp>=m_cRows )
					{
						fSampleH = 0.0;
						fSampleV = 0.0;
					}
					else
					{
						fSampleH = At( uniformH, yyyDisp, xxxDisp );
						fSampleV = At( uniformV, yyyDisp, xxxDisp );
					}
					
					fConvolvedH += fSampleH * m_GaussianKernel[ yyy ][ xxx ];
					fConvolvedV += fSampleV * m_GaussianKernel[ yyy ][ xxx ];
				}
			}
			
			At( m_DispH, row, col ) = elasticScale * fConvolvedH;
			At( m_DispV, row, col ) = elasticScale * fConvolvedV;
		}
	}
	
	delete[] uniformH;
	delete[] uniformV;
	
	// next, the scaling of the image by a random scale factor
	// Horizontal and vertical directions are scaled independently
	
	double dSFHoriz = severityFactor * ::GetPreferences().m_dMaxScaling / 100.0 * UNIFORM_PLUS_MINUS_ONE;  // m_dMaxScaling is a percentage
	double dSFVert = severityFactor * ::GetPreferences().m_dMaxScaling / 100.0 * UNIFORM_PLUS_MINUS_ONE;  // m_dMaxScaling is a percentage

	
	int iMid = m_cRows/2;
	
	for ( row=0; row<m_cRows; ++row )
	{
		for ( col=0; col<m_cCols; ++col )
		{
			At( m_DispH, row, col ) += dSFHoriz * ( col-iMid );
			At( m_DispV, row, col ) -= dSFVert * ( iMid-row );  // negative because of top-down bitmap
		}
	}
	
	
	// finally, apply a rotation
	
	double angle = severityFactor * ::GetPreferences().m_dMaxRotation * UNIFORM_PLUS_MINUS_ONE;
	angle = angle * 3.1415926535897932384626433832795 / 180.0;  // convert from degrees to radians
	
	double cosAngle = cos( angle );
	double sinAngle = sin( angle );
	
	for ( row=0; row<m_cRows; ++row )
	{
		for ( col=0; col<m_cCols; ++col )
		{
			At( m_DispH, row, col ) += ( col-iMid ) * ( cosAngle - 1 ) - ( iMid-row ) * sinAngle;
			At( m_DispV, row, col ) -= ( iMid-row ) * ( cosAngle - 1 ) + ( col-iMid ) * sinAngle;  // negative because of top-down bitmap
		}
	}
	
}


void CMNistDoc::ApplyDistortionMap(double *inputVector)
{
	// applies the current distortion map to the input vector
	
	// For the mapped array, we assume that 0.0 == background, and 1.0 == full intensity information
	// This is different from the input vector, in which +1.0 == background (white), and 
	// -1.0 == information (black), so we must convert one to the other
	
	std::vector< std::vector< double > >   mappedVector( m_cRows, std::vector< double >( m_cCols, 0.0 ));
	
	double sourceRow, sourceCol;
	double fracRow, fracCol;
	double w1, w2, w3, w4;
	double sourceValue;
	int row, col;
	int sRow, sCol, sRowp1, sColp1;
	BOOL bSkipOutOfBounds;
	
	for ( row=0; row<m_cRows; ++row )
	{
		for ( col=0; col<m_cCols; ++col )
		{
			// the pixel at sourceRow, sourceCol is an "phantom" pixel that doesn't really exist, and
			// whose value must be manufactured from surrounding real pixels (i.e., since 
			// sourceRow and sourceCol are floating point, not ints, there's not a real pixel there)
			// The idea is that if we can calculate the value of this phantom pixel, then its 
			// displacement will exactly fit into the current pixel at row, col (which are both ints)
			
			sourceRow = (double)row - At( m_DispV, row, col );
			sourceCol = (double)col - At( m_DispH, row, col );
			
			// weights for bi-linear interpolation
			
			fracRow = sourceRow - (int)sourceRow;
			fracCol = sourceCol - (int)sourceCol;
			
			
			w1 = ( 1.0 - fracRow ) * ( 1.0 - fracCol );
			w2 = ( 1.0 - fracRow ) * fracCol;
			w3 = fracRow * ( 1 - fracCol );
			w4 = fracRow * fracCol;
			
			
			// limit indexes

/*
			while (sourceRow >= m_cRows ) sourceRow -= m_cRows;
			while (sourceRow < 0 ) sourceRow += m_cRows;
			
			while (sourceCol >= m_cCols ) sourceCol -= m_cCols;
			while (sourceCol < 0 ) sourceCol += m_cCols;
*/
			bSkipOutOfBounds = FALSE;

			if ( (sourceRow + 1.0) >= m_cRows )	bSkipOutOfBounds = TRUE;
			if ( sourceRow < 0 )				bSkipOutOfBounds = TRUE;
			
			if ( (sourceCol + 1.0) >= m_cCols )	bSkipOutOfBounds = TRUE;
			if ( sourceCol < 0 )				bSkipOutOfBounds = TRUE;
			
			if ( bSkipOutOfBounds == FALSE )
			{
				// the supporting pixels for the "phantom" source pixel are all within the 
				// bounds of the character grid.
				// Manufacture its value by bi-linear interpolation of surrounding pixels
				
				sRow = (int)sourceRow;
				sCol = (int)sourceCol;
				
				sRowp1 = sRow + 1;
				sColp1 = sCol + 1;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -