📄 neuralnetwork.cpp
字号:
// NeuralNetwork.cpp: implementation of the NeuralNetwork class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "NeuralNetwork.h"
#include "MNist.h" // for the _Intelocked functions
#include <malloc.h> // for the _alloca function
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
///////////////////////////////////////////////////////////////////////
//
// NeuralNetwork class definition
NeuralNetwork::NeuralNetwork()
{
Initialize();
}
void NeuralNetwork::Initialize()
{
// delete all layers
VectorLayers::iterator it;
for( it=m_Layers.begin(); it<m_Layers.end(); it++ )
{
delete *it;
}
m_Layers.clear();
m_etaLearningRate = .001; // arbitrary, so that brand-new NNs can be serialized with a non-ridiculous number
m_cBackprops = 0;
}
NeuralNetwork::~NeuralNetwork()
{
// call Initialize(); makes sense if you think
Initialize();
}
void NeuralNetwork::Calculate(double* inputVector, UINT iCount,
double* outputVector /* =NULL */, UINT oCount /* =0 */,
std::vector< std::vector< double > >* pNeuronOutputs /* =NULL */ )
{
VectorLayers::iterator lit = m_Layers.begin();
VectorNeurons::iterator nit;
// first layer is imput layer: directly set outputs of all of its neurons
// to the input vector
if ( lit<m_Layers.end() )
{
nit = (*lit)->m_Neurons.begin();
int count = 0;
ASSERT( iCount == (*lit)->m_Neurons.size() ); // there should be exactly one neuron per input
while( ( nit < (*lit)->m_Neurons.end() ) && ( count < iCount ) )
{
(*nit)->output = inputVector[ count ];
nit++;
count++;
}
}
for( lit++; lit<m_Layers.end(); lit++ )
{
(*lit)->Calculate();
}
// load up output vector with results
if ( outputVector != NULL )
{
lit = m_Layers.end();
lit--;
nit = (*lit)->m_Neurons.begin();
for ( int ii=0; ii<oCount; ++ii )
{
outputVector[ ii ] = (*nit)->output;
nit++;
}
}
// load up neuron output values with results
if ( pNeuronOutputs != NULL )
{
// check for first time use (re-use is expected)
if ( pNeuronOutputs->empty() != FALSE )
{
// it's empty, so allocate memory for its use
pNeuronOutputs->clear(); // for safekeeping
int ii = 0;
for( lit=m_Layers.begin(); lit<m_Layers.end(); lit++ )
{
std::vector< double > layerOut;
for ( ii=0; ii<(*lit)->m_Neurons.size(); ++ii )
{
layerOut.push_back( (*lit)->m_Neurons[ ii ]->output );
}
pNeuronOutputs->push_back( layerOut);
}
}
else
{
// it's not empty, so assume it's been used in a past iteration and memory for
// it has already been allocated internally. Simply store the values
int ii, jj = 0;
for( lit=m_Layers.begin(); lit<m_Layers.end(); lit++ )
{
for ( ii=0; ii<(*lit)->m_Neurons.size(); ++ii )
{
(*pNeuronOutputs)[ jj ][ ii ] = (*lit)->m_Neurons[ ii ]->output ;
}
++jj;
}
}
}
}
void NeuralNetwork::Backpropagate(double *actualOutput, double *desiredOutput, UINT count,
std::vector< std::vector< double > >* pMemorizedNeuronOutputs )
{
// backpropagates through the neural net
ASSERT( ( actualOutput != NULL ) && ( desiredOutput != NULL ) && ( count < 256 ) );
ASSERT( m_Layers.size() >= 2 ); // there must be at least two layers in the net
if ( ( actualOutput == NULL ) || ( desiredOutput == NULL ) || ( count >= 256 ) )
return;
// check if it's time for a weight sanity check
m_cBackprops++;
if ( (m_cBackprops % 10000) == 0 )
{
// every 10000 backprops
PeriodicWeightSanityCheck();
}
// proceed from the last layer to the first, iteratively
// We calculate the last layer separately, and first, since it provides the needed derviative
// (i.e., dErr_wrt_dXnm1) for the previous layers
// nomenclature:
//
// Err is output error of the entire neural net
// Xn is the output vector on the n-th layer
// Xnm1 is the output vector of the previous layer
// Wn is the vector of weights of the n-th layer
// Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied
// F is the squashing function: Xn = F(Yn)
// F' is the derivative of the squashing function
// Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input
VectorLayers::iterator lit = m_Layers.end() - 1;
std::vector< double > dErr_wrt_dXlast( (*lit)->m_Neurons.size() );
std::vector< std::vector< double > > differentials;
int iSize = m_Layers.size();
differentials.resize( iSize );
int ii;
// start the process by calculating dErr_wrt_dXn for the last layer.
// for the standard MSE Err function (i.e., 0.5*sumof( (actual-target)^2 ), this differential is simply
// the difference between the target and the actual
for ( ii=0; ii<(*lit)->m_Neurons.size(); ++ii )
{
dErr_wrt_dXlast[ ii ] = actualOutput[ ii ] - desiredOutput[ ii ];
}
// store Xlast and reserve memory for the remaining vectors stored in differentials
differentials[ iSize-1 ] = dErr_wrt_dXlast; // last one
for ( ii=0; ii<iSize-1; ++ii )
{
differentials[ ii ].resize( m_Layers[ii]->m_Neurons.size(), 0.0 );
}
// now iterate through all layers including the last but excluding the first, and ask each of
// them to backpropagate error and adjust their weights, and to return the differential
// dErr_wrt_dXnm1 for use as the input value of dErr_wrt_dXn for the next iterated layer
BOOL bMemorized = ( pMemorizedNeuronOutputs != NULL );
lit = m_Layers.end() - 1; // re-initialized to last layer for clarity, although it should already be this value
ii = iSize - 1;
for ( lit; lit>m_Layers.begin(); lit--)
{
if ( bMemorized != FALSE )
{
(*lit)->Backpropagate( differentials[ ii ], differentials[ ii - 1 ],
&(*pMemorizedNeuronOutputs)[ ii ], &(*pMemorizedNeuronOutputs)[ ii - 1 ], m_etaLearningRate );
}
else
{
(*lit)->Backpropagate( differentials[ ii ], differentials[ ii - 1 ],
NULL, NULL, m_etaLearningRate );
}
--ii;
}
differentials.clear();
}
void NeuralNetwork::PeriodicWeightSanityCheck()
{
// fucntion that simply goes through all weights, and tests them against an arbitrary
// "reasonable" upper limit. If the upper limit is exceeded, a warning is displayed
VectorLayers::iterator lit;
for ( lit=m_Layers.begin(); lit<m_Layers.end(); lit++)
{
(*lit)->PeriodicWeightSanityCheck();
}
}
void NeuralNetwork::EraseHessianInformation()
{
// controls each layer to erase (set to value of zero) all its diagonal Hessian info
VectorLayers::iterator lit;
for ( lit=m_Layers.begin(); lit<m_Layers.end(); lit++ )
{
(*lit)->EraseHessianInformation();
}
}
void NeuralNetwork::DivideHessianInformationBy( double divisor )
{
// controls each layer to divide its current diagonal Hessian info by a common divisor.
// A check is also made to ensure that each Hessian is strictly zero-positive
VectorLayers::iterator lit;
for ( lit=m_Layers.begin(); lit<m_Layers.end(); lit++ )
{
(*lit)->DivideHessianInformationBy( divisor );
}
}
void NeuralNetwork::BackpropagateSecondDervatives( double* actualOutputVector,
double* targetOutputVector, UINT count )
{
// calculates the second dervatives (for diagonal Hessian) and backpropagates
// them through neural net
ASSERT( ( actualOutputVector != NULL ) && ( targetOutputVector != NULL ) && ( count < 256 ) );
ASSERT( m_Layers.size() >= 2 ); // there must be at least two layers in the net
if ( ( actualOutputVector == NULL ) || ( targetOutputVector == NULL ) || ( count >= 256 ) )
return;
// we use nearly the same nomenclature as above (e.g., "dErr_wrt_dXnm1") even though everything here
// is actually second derivatives and not first derivatives, since otherwise the ASCII would
// become too confusing. To emphasize that these are second derivatives, we insert a "2"
// such as "d2Err_wrt_dXnm1". We don't insert the second "2" that's conventional for designating
// second derivatives
VectorLayers::iterator lit;
lit = m_Layers.end() - 1; // set to last layer
std::vector< double > d2Err_wrt_dXlast( (*lit)->m_Neurons.size() );
std::vector< std::vector< double > > differentials;
int iSize = m_Layers.size();
differentials.resize( iSize );
int ii;
// start the process by calculating the second derivative dErr_wrt_dXn for the last layer.
// for the standard MSE Err function (i.e., 0.5*sumof( (actual-target)^2 ), this differential is
// exactly one
lit = m_Layers.end() - 1; // point to last layer
for ( ii=0; ii<(*lit)->m_Neurons.size(); ++ii )
{
d2Err_wrt_dXlast[ ii ] = 1.0;
}
// store Xlast and reserve memory for the remaining vectors stored in differentials
differentials[ iSize-1 ] = d2Err_wrt_dXlast; // last one
for ( ii=0; ii<iSize-1; ++ii )
{
differentials[ ii ].resize( m_Layers[ii]->m_Neurons.size(), 0.0 );
}
// now iterate through all layers including the last but excluding the first, starting from
// the last, and ask each of
// them to backpropagate the second derviative and accumulate the diagonal Hessian, and also to
// return the second dervative
// d2Err_wrt_dXnm1 for use as the input value of dErr_wrt_dXn for the next iterated layer (which
// is the previous layer spatially)
lit = m_Layers.end() - 1; // re-initialized to last layer for clarity, although it should already be this value
ii = iSize - 1;
for ( lit; lit>m_Layers.begin(); lit--)
{
(*lit)->BackpropagateSecondDerivatives( differentials[ ii ], differentials[ ii - 1 ] );
--ii;
}
differentials.clear();
}
void NeuralNetwork::Serialize(CArchive &ar)
{
if (ar.IsStoring())
{
// TODO: add storing code here
ar << m_etaLearningRate;
ar << m_Layers.size();
VectorLayers::iterator lit;
for( lit=m_Layers.begin(); lit<m_Layers.end(); lit++ )
{
(*lit)->Serialize( ar );
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -