📄 neuralnetwork.cpp

📁 基于神经网络的手写体识别程序
💻 CPP
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
// NeuralNetwork.cpp: implementation of the NeuralNetwork class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "NeuralNetwork.h"
#include "MNist.h"  // for the _Intelocked functions
#include <malloc.h>  // for the _alloca function


#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif


///////////////////////////////////////////////////////////////////////
//
//  NeuralNetwork class definition

NeuralNetwork::NeuralNetwork()
{
	Initialize();
}

void NeuralNetwork::Initialize()
{
	// delete all layers
	
	VectorLayers::iterator it;
	
	for( it=m_Layers.begin(); it<m_Layers.end(); it++ )
	{
		delete *it;
	}
	
	m_Layers.clear();
	
	m_etaLearningRate = .001;  // arbitrary, so that brand-new NNs can be serialized with a non-ridiculous number
	m_cBackprops = 0;
	
}

NeuralNetwork::~NeuralNetwork()
{
	// call Initialize(); makes sense if you think
	
	Initialize();
}

void NeuralNetwork::Calculate(double* inputVector, UINT iCount, 
							  double* outputVector /* =NULL */, UINT oCount /* =0 */,
							  std::vector< std::vector< double > >* pNeuronOutputs /* =NULL */ )
{
	VectorLayers::iterator lit = m_Layers.begin();
	VectorNeurons::iterator nit;
	
	// first layer is imput layer: directly set outputs of all of its neurons
	// to the input vector
	
	if ( lit<m_Layers.end() )  
	{
		nit = (*lit)->m_Neurons.begin();
		int count = 0;
		
		ASSERT( iCount == (*lit)->m_Neurons.size() );  // there should be exactly one neuron per input
		
		while( ( nit < (*lit)->m_Neurons.end() ) && ( count < iCount ) )
		{
			(*nit)->output = inputVector[ count ];
			nit++;
			count++;
		}
	}
	
	
	
	for( lit++; lit<m_Layers.end(); lit++ )
	{
		(*lit)->Calculate();
	}
	
	// load up output vector with results
	
	if ( outputVector != NULL )
	{
		lit = m_Layers.end();
		lit--;
		
		nit = (*lit)->m_Neurons.begin();
		
		for ( int ii=0; ii<oCount; ++ii )
		{
			outputVector[ ii ] = (*nit)->output;
			nit++;
		}
	}
	
	// load up neuron output values with results
	
	if ( pNeuronOutputs != NULL )
	{
		// check for first time use (re-use is expected)
		
		if ( pNeuronOutputs->empty() != FALSE )
		{
			// it's empty, so allocate memory for its use
			
			pNeuronOutputs->clear();  // for safekeeping
			
			int ii = 0;
			for( lit=m_Layers.begin(); lit<m_Layers.end(); lit++ )
			{
				std::vector< double > layerOut;
				
				for ( ii=0; ii<(*lit)->m_Neurons.size(); ++ii )
				{
					layerOut.push_back( (*lit)->m_Neurons[ ii ]->output );
				}
				
				pNeuronOutputs->push_back( layerOut);
			}
		}
		else
		{
			// it's not empty, so assume it's been used in a past iteration and memory for
			// it has already been allocated internally.  Simply store the values
			
			int ii, jj = 0;
			for( lit=m_Layers.begin(); lit<m_Layers.end(); lit++ )
			{
				for ( ii=0; ii<(*lit)->m_Neurons.size(); ++ii )
				{
					(*pNeuronOutputs)[ jj ][ ii ] = (*lit)->m_Neurons[ ii ]->output ;
				}
				
				++jj;
			}
			
		}
		
	}
	
}


void NeuralNetwork::Backpropagate(double *actualOutput, double *desiredOutput, UINT count,
								  std::vector< std::vector< double > >* pMemorizedNeuronOutputs )
{
	// backpropagates through the neural net
	
	ASSERT( ( actualOutput != NULL ) && ( desiredOutput != NULL ) && ( count < 256 ) );
	
	ASSERT( m_Layers.size() >= 2 );  // there must be at least two layers in the net
	
	if ( ( actualOutput == NULL ) || ( desiredOutput == NULL ) || ( count >= 256 ) )
		return;
	
	
	// check if it's time for a weight sanity check
	
	m_cBackprops++;
	
	if ( (m_cBackprops % 10000) == 0 )
	{
		// every 10000 backprops
		
		PeriodicWeightSanityCheck();
	}
	
	
	// proceed from the last layer to the first, iteratively
	// We calculate the last layer separately, and first, since it provides the needed derviative
	// (i.e., dErr_wrt_dXnm1) for the previous layers
	
	// nomenclature:
	//
	// Err is output error of the entire neural net
	// Xn is the output vector on the n-th layer
	// Xnm1 is the output vector of the previous layer
	// Wn is the vector of weights of the n-th layer
	// Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied
	// F is the squashing function: Xn = F(Yn)
	// F' is the derivative of the squashing function
	//   Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input
	
	
	VectorLayers::iterator lit = m_Layers.end() - 1;
	
	std::vector< double > dErr_wrt_dXlast( (*lit)->m_Neurons.size() );
	std::vector< std::vector< double > > differentials;
	
	int iSize = m_Layers.size();
	
	differentials.resize( iSize );
	
	int ii;
	
	// start the process by calculating dErr_wrt_dXn for the last layer.
	// for the standard MSE Err function (i.e., 0.5*sumof( (actual-target)^2 ), this differential is simply
	// the difference between the target and the actual
	
	for ( ii=0; ii<(*lit)->m_Neurons.size(); ++ii )
	{
		dErr_wrt_dXlast[ ii ] = actualOutput[ ii ] - desiredOutput[ ii ];
	}
	
	
	// store Xlast and reserve memory for the remaining vectors stored in differentials
	
	differentials[ iSize-1 ] = dErr_wrt_dXlast;  // last one
	
	for ( ii=0; ii<iSize-1; ++ii )
	{
		differentials[ ii ].resize( m_Layers[ii]->m_Neurons.size(), 0.0 );
	}
	
	// now iterate through all layers including the last but excluding the first, and ask each of
	// them to backpropagate error and adjust their weights, and to return the differential
	// dErr_wrt_dXnm1 for use as the input value of dErr_wrt_dXn for the next iterated layer
	
	BOOL bMemorized = ( pMemorizedNeuronOutputs != NULL );
	
	lit = m_Layers.end() - 1;  // re-initialized to last layer for clarity, although it should already be this value
	
	ii = iSize - 1;
	for ( lit; lit>m_Layers.begin(); lit--)
	{
		if ( bMemorized != FALSE )
		{
			(*lit)->Backpropagate( differentials[ ii ], differentials[ ii - 1 ], 
				&(*pMemorizedNeuronOutputs)[ ii ], &(*pMemorizedNeuronOutputs)[ ii - 1 ], m_etaLearningRate );
		}
		else
		{
			(*lit)->Backpropagate( differentials[ ii ], differentials[ ii - 1 ], 
				NULL, NULL, m_etaLearningRate );
		}
		
		--ii;
	}
	
	
	differentials.clear();
	
}

								  
void NeuralNetwork::PeriodicWeightSanityCheck()
{
	// fucntion that simply goes through all weights, and tests them against an arbitrary
	// "reasonable" upper limit.  If the upper limit is exceeded, a warning is displayed
	
	VectorLayers::iterator lit;
	
	for ( lit=m_Layers.begin(); lit<m_Layers.end(); lit++)
	{
		(*lit)->PeriodicWeightSanityCheck();
	}
	
}




void NeuralNetwork::EraseHessianInformation()
{
	// controls each layer to erase (set to value of zero) all its diagonal Hessian info
	
	VectorLayers::iterator lit;
	
	for ( lit=m_Layers.begin(); lit<m_Layers.end(); lit++ )
	{
		(*lit)->EraseHessianInformation();
	}
	
}


void NeuralNetwork::DivideHessianInformationBy( double divisor )
{
	// controls each layer to divide its current diagonal Hessian info by a common divisor. 
	// A check is also made to ensure that each Hessian is strictly zero-positive
	
	VectorLayers::iterator lit;
	
	for ( lit=m_Layers.begin(); lit<m_Layers.end(); lit++ )
	{
		(*lit)->DivideHessianInformationBy( divisor );
	}
	
}


void NeuralNetwork::BackpropagateSecondDervatives( double* actualOutputVector, 
												  double* targetOutputVector, UINT count )
{
	// calculates the second dervatives (for diagonal Hessian) and backpropagates
	// them through neural net
	
	ASSERT( ( actualOutputVector != NULL ) && ( targetOutputVector != NULL ) && ( count < 256 ) );
	
	ASSERT( m_Layers.size() >= 2 );  // there must be at least two layers in the net
	
	if ( ( actualOutputVector == NULL ) || ( targetOutputVector == NULL ) || ( count >= 256 ) )
		return;
	
	// we use nearly the same nomenclature as above (e.g., "dErr_wrt_dXnm1") even though everything here
	// is actually second derivatives and not first derivatives, since otherwise the ASCII would 
	// become too confusing.  To emphasize that these are second derivatives, we insert a "2"
	// such as "d2Err_wrt_dXnm1".  We don't insert the second "2" that's conventional for designating
	// second derivatives
	
	
	VectorLayers::iterator lit;
	
	lit = m_Layers.end() - 1;  // set to last layer
	
	std::vector< double > d2Err_wrt_dXlast( (*lit)->m_Neurons.size() );
	std::vector< std::vector< double > > differentials;
	
	int iSize = m_Layers.size();
	
	differentials.resize( iSize );
	
	int ii;
	
	// start the process by calculating the second derivative dErr_wrt_dXn for the last layer.
	// for the standard MSE Err function (i.e., 0.5*sumof( (actual-target)^2 ), this differential is 
	// exactly one
	
	lit = m_Layers.end() - 1;  // point to last layer
	
	for ( ii=0; ii<(*lit)->m_Neurons.size(); ++ii )
	{
		d2Err_wrt_dXlast[ ii ] = 1.0;
	}
	
	
	// store Xlast and reserve memory for the remaining vectors stored in differentials
	
	differentials[ iSize-1 ] = d2Err_wrt_dXlast;  // last one
	
	for ( ii=0; ii<iSize-1; ++ii )
	{
		differentials[ ii ].resize( m_Layers[ii]->m_Neurons.size(), 0.0 );
	}
	
	
	// now iterate through all layers including the last but excluding the first, starting from
	// the last, and ask each of
	// them to backpropagate the second derviative and accumulate the diagonal Hessian, and also to
	// return the second dervative
	// d2Err_wrt_dXnm1 for use as the input value of dErr_wrt_dXn for the next iterated layer (which
	// is the previous layer spatially)
	
	lit = m_Layers.end() - 1;  // re-initialized to last layer for clarity, although it should already be this value
	
	ii = iSize - 1;
	for ( lit; lit>m_Layers.begin(); lit--)
	{
		(*lit)->BackpropagateSecondDerivatives( differentials[ ii ], differentials[ ii - 1 ] );
		
		--ii;
	}
	
	differentials.clear();
}




void NeuralNetwork::Serialize(CArchive &ar)
{
	if (ar.IsStoring())
	{
		// TODO: add storing code here
		
		ar << m_etaLearningRate;
		
		ar << m_Layers.size();
		
		VectorLayers::iterator lit;
		
		for( lit=m_Layers.begin(); lit<m_Layers.end(); lit++ )
		{
			(*lit)->Serialize( ar );
		}
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -