📄 erroranalysis.cpp
字号:
// ErrorAnalysis.cpp: implementation of the CErrorAnalysis class.
//
//////////////////////////////////////////////////////////////////////
#include "ErrorAnalysis.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CErrorAnalysis::CErrorAnalysis()
{
}
int CErrorAnalysis::ErrorReduction(multimap<int, ScoreError>& CountScore) // here Error is the index and elements are Count and Score
{
multimap<int, ScoreError>::iterator f = CountScore.begin();
multimap<int, ScoreError>::iterator Start;
multimap<int, ScoreError>::iterator End;
bool BoundaryExist = true;
int WStarFeatureCount = 0;
int WFeatureCount = 0;
double WStarScore = 0;
double WScore = 0;
double UpperBound = 100000000;
double LowerBound = -100000000;
double Boundary = 0;
for (; f != CountScore.end(); f++ )
{
BoundaryExist = true;
UpperBound = 100000000;
LowerBound = -100000000;
Start = CountScore.begin();
End = CountScore.end();
WStarFeatureCount = (*f).second.Error;
WStarScore = (*f).second.Score;
for ( ; Start != End; Start++ )
{
if ( Start != f)
{
WFeatureCount = (*Start).second.Error;
WScore = (*Start).second.Score;
Boundary = (WScore - WStarScore) / (WStarFeatureCount - WFeatureCount);
if ( WStarFeatureCount > WFeatureCount )
{
if ( Boundary > UpperBound )
{
break;
BoundaryExist = false;
}
else if ( Boundary > LowerBound )
{
LowerBound = Boundary;
}
}
else
{
if ( Boundary < LowerBound )
{
break;
BoundaryExist = false;
}
else if ( Boundary < UpperBound )
{
UpperBound = Boundary;
}
}
}
} // end for ( ; Start != End; Start++ )
// if the boundary does not exist, jump
if ( BoundaryExist )
{
return (*f).first;
}
}
return -1;
}
void CErrorAnalysis::Switch(CountScore& OldCountScore, multimap<int, ScoreError>& NewCountScore)
{
map<int, ScoreError>::iterator f = OldCountScore.begin();
for (; f!= OldCountScore.end(); f++ )
{
ScoreError temp;
temp.Error = (*f).first; // here the error should be the feature occuring times
temp.Score = (*f).second.Score;
NewCountScore.insert(make_pair((*f).second.Error, temp));
}
}
void CErrorAnalysis::ComputeErrors(map<int, CountScore>& FeatureCountScore, int HypothesisId, int OriginalError)
{
map<int, CountScore>::iterator f = FeatureCountScore.begin();
multimap<int, ScoreError> ErrorScoreCount;
for (; f != FeatureCountScore.end(); f++ )
{
ErrorScoreCount.clear();
Switch((*f).second, ErrorScoreCount);
int MinError = ErrorReduction(ErrorScoreCount);
if ( MinError != -1 )
{
int ErrorReduction = OriginalError - MinError;
if ( ErrorReduction > 0 )
{
FeatureNode[(*f).first][HypothesisId] = ErrorReduction;
}
}
}
}
void CErrorAnalysis::GenerateErrorArray(FILE* Filename, int HypothesisNum)
{
if ( !Filename )
{
cout << " Loading Training Data Error! " << endl;
exit(0);
}
char* Buffer = new char[10000000];
char* Pos_Buffer = NULL;
int Error = 0;
int FeatureId = 0;
int HypothesisId = 0;
int MaxScoreError = -1;
double Score = 0;
double MaxScore = 0;
map<int, CountScore> FeatureCountScore;
map<int, double> FeatureCount;
vector<int> ErrorRec;
vector<double> ScoreRec;
vector<FeatureVector> FindZeroTimes;
while ( fgets(Buffer, 10000000, Filename) != NULL && HypothesisId < HypothesisNum)
{
Pos_Buffer = Buffer;
if ( !strstr(Pos_Buffer, "CandEnd") )
{
sscanf(Pos_Buffer, "%lf %d", &Score, &Error);
Pos_Buffer = strstr(Pos_Buffer + 1, " ");
Pos_Buffer = strstr(Pos_Buffer + 1, " ");
FeatureCount.clear();
if ( MaxScoreError == -1 )
{
MaxScoreError = Error;
MaxScore = Score;
}
else if ( Score > MaxScore )
{
MaxScoreError = Error;
MaxScore = Score;
}
while ( sscanf(Pos_Buffer, "%d", &FeatureId) != EOF )
{
if ( FeatureCount.find(FeatureId) == FeatureCount.end() )
{
FeatureCount[FeatureId] = 1;
}
else
{
FeatureCount[FeatureId]++;
}
Pos_Buffer = strstr(Pos_Buffer + 1, " ");
}
map<int, double>::iterator f = FeatureCount.begin();
FindZeroTimes.push_back(FeatureCount);
ScoreRec.push_back(Score);
ErrorRec.push_back(Error);
for (; f != FeatureCount.end(); f++ )
{
if ( FeatureCountScore.find((*f).first) == FeatureCountScore.end() )
{
FeatureCountScore[(*f).first][(*f).second].Score = Score;
FeatureCountScore[(*f).first][(*f).second].Error = Error;
}
else if ( FeatureCountScore[(*f).first].find((*f).second) == FeatureCountScore[(*f).first].end() )
{
FeatureCountScore[(*f).first][(*f).second].Score = Score;
FeatureCountScore[(*f).first][(*f).second].Error = Error;
}
else if ( Score > FeatureCountScore[(*f).first][(*f).second].Score )
{
FeatureCountScore[(*f).first][(*f).second].Score = Score;
FeatureCountScore[(*f).first][(*f).second].Error = Error;
}
}
} // end strstr(Pos_Buffer, "CandEnd") != 0
else
{
// here statisticed 0 time
int HypothesisLen = FindZeroTimes.size();
for (int i = 0; i < HypothesisLen; i++ )
{
map<int, CountScore>::iterator l = FeatureCountScore.begin();
for (; l != FeatureCountScore.end(); l++ )
{
if ( FindZeroTimes[i].find((*l).first) == FindZeroTimes[i].end() )
{
if ( (*l).second.find(0) == (*l).second.end() )
{
(*l).second[0].Score = ScoreRec[i];
(*l).second[0].Error = ErrorRec[i];
}
else if ( (*l).second[0].Score < ScoreRec[i] )
{
(*l).second[0].Score = ScoreRec[i];
(*l).second[0].Error = ErrorRec[i];
}
}
}
} // this block statistic the 0 time
ComputeErrors(FeatureCountScore, HypothesisId, MaxScoreError);
FeatureCountScore.clear();
FindZeroTimes.clear();
ErrorRec.clear();
ScoreRec.clear();
MaxScoreError = -1;
printf("%d hypothesis computing over!\r", HypothesisId++);
}
}
cout << "\n(1) Gennerate feature vector over" << endl;
/*////////////////// this is for test ///////////////////
map<int, FeatureVector>::iterator tt = FeatureNode.begin();
for ( ; tt != FeatureNode.end(); tt++ )
{
map<int, double>::iterator ttt = (*tt).second.begin();
cout << (*tt).first << "-->";
for ( ; ttt != (*tt).second.end(); ttt++ )
{
cout << (*ttt).first << "(" << (*ttt).second << ")" << " ";
}
cout << "\n";
}
exit(0);*/
}
CErrorAnalysis::~CErrorAnalysis()
{
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -