📄 cpd.cpp
字号:
FileListFout << Name << endl;
//............
cout<<Name<<"\t"<<Suffix<<endl;
//..................
//! Copy it into repository
ReturnKiller(PathAndName, Name);
//!Ins_Chunk(PathAndName);
RebuildHashTable();
FileListFin.close();
FileListFout.close();
}
//! Examine if a file is probably a plagiarism
//! \param PathAndName a file name string
void HawkEye::Exam(string PathAndName)
{
ReturnKiller(PathAndName, "t.tmp");
Evl_Chunk(REPOSITORYPATH + "t.tmp");
}
bool HawkEye::Compare(string StrA, string StrB)
{
#ifdef DEBUG_Compare
cout << "StrA: " << StrA << endl << "StrB: " << StrB << endl;
#endif
unsigned long Length= StrA.size();
if( Length != StrB.size() )
return false;
for(i= 0; i< Length; i++)
if( StrA[i] != StrB[i] )
return false;
return true;
}
long HawkEye::Hash(string str)
{
long h= 0, a= 33, chnum;
for(i= 0; i< str.size(); i++){
// The Chinese Char is smaller than zero(-127 - 128 not 0 - 255)
chnum= 0xFF + str[i];
h= (a*h + chnum) % MAX;
}
return h;
}
void HawkEye::Insert(FileBlock *ToBeLinked)
{
#ifdef DEBUG_Insert
cout << "Into Insert()" << endl;
OutputFileBlock(ToBeLinked);
#endif
if( NULL == HashTable[ToBeLinked->HashKey] ){
// If it is empty
HashTable[ToBeLinked->HashKey]= ToBeLinked;
}else{
// Add it to the head of the chain
ToBeLinked->Next= HashTable[ToBeLinked->HashKey]->Next;
HashTable[ToBeLinked->HashKey]->Next= ToBeLinked;
}
#ifdef DEBUG_Insert
cout << "Out Insert()" << endl;
OutputFileBlock(ToBeLinked);
#endif
}
//! No Repo
//! Receive abs path!!!
void HawkEye::Ins_Chunk(string PathAndName)
{
//!PathAndName= GetFileName(PathAndName) + GetSuffix(PathAndName);
cout<<"hello in Ins_Chunk"<<endl;
char Ch1, Ch2;
string Str;
fstream fin;
fin.open( PathAndName.c_str(), fstream::in );
if( !fin.good() ){
cout << "Error: Can't find input file in Ins_Chunk()!: " << PathAndName << endl;
cin >> Str;
exit(1);
}
long PosCounter= 0;
Ins_Entry:
while( fin.read(&Ch1, sizeof(char)) ){
PosCounter++;
#ifdef DEBUG_Ins_Chunk
//cout << "Ch1: " << Ch1 << endl;
#endif
// If it can read two chars,check if it is double cut char
// If Ch1 is the last one,that is,there is no char left,skip this step
if( fin.read(&Ch2, sizeof(char)) ){
PosCounter++;
// Is Ch1+Ch2 compose a DoubleCutChar?
for(i= 0; i< DoubleCutChar.size(); i+= 2)
///////////////////////////////Simplify here///////////////////////////////////////
if( DoubleCutChar[i] == Ch1 && DoubleCutChar[i+1] == Ch2 ){
if( Str.size() == 0 ){
goto Ins_Entry;
}else{
FileBlock *ndPtr= new FileBlock;
// Go back two chars
ndPtr->StartPoint= PosCounter - Str.size() - 2;
ndPtr->Length= Str.size();
ndPtr->FileName= PathAndName;
ndPtr->Next= NULL;
ndPtr->HashKey= Hash(Str);
#ifdef DEBUG_Ins_Chunk
cout << "Before Double Insertion" << endl;
cout << Str << endl;
OutputFileBlock(ndPtr);
#endif
Insert(ndPtr);
Str= "";
goto Ins_Entry;
}
}
#ifdef DEBUG_Ins_Chunk
//cout << "Ch2: " << Ch2 << endl;
#endif
fin.putback(Ch2);
PosCounter--;
#ifdef DEBUG_Ins_Chunk
//cout << "Into Single" << endl;
#endif
// Single
for(i= 0; i< SingleCutChar.size(); i++)
if( SingleCutChar[i] == Ch1 )
if( Str.size() == 0 )
goto Ins_Entry;
else{
FileBlock *ndPtr= new FileBlock;
// Go back one char
ndPtr->StartPoint= PosCounter - Str.size() - 1;
ndPtr->Length= Str.size();
ndPtr->FileName= PathAndName;
ndPtr->Next= NULL;
ndPtr->HashKey= Hash(Str);
#ifdef DEBUG_Ins_Chunk
cout << "Before Single Insertion" << endl;
cout << Str << endl;
OutputFileBlock(ndPtr);
#endif
Insert(ndPtr);
Str= "";
goto Ins_Entry;
}
Str+= Ch1;
goto Ins_Entry;
}
}
fin.close();
// The final check for string that ends with no puncture
if( Str.size() != 0 ){
FileBlock *ndPtr= new FileBlock;
Str+=Ch1;
// Go back one char
ndPtr->StartPoint= PosCounter - Str.size();
ndPtr->Length= Str.size();
ndPtr->FileName= PathAndName;
ndPtr->Next= NULL;
ndPtr->HashKey= Hash(Str);
#ifdef DEBUG_Ins_Chunk
cout << "Before Last Insertion" << endl;
cout << "String: " << Str << endl;
cout << "PosCounter: " << PosCounter << endl;
cout << Str << endl;
OutputFileBlock(ndPtr);
#endif
Insert(ndPtr);
}
}
void HawkEye::Process(string str, ProcessMode pm)
{
if( pm == CONCATENATE ){
OutputMessage+= str + ",";
}
else if( pm == PRINT ){
if( OutputMessage.size() >= String_Size_Treshold ){
OutputMessage+= str + " is Violated ";
cout << OutputMessage << endl;
}
OutputMessage= "";
}
}
void HawkEye::Evaluate(string Str)
{
static bool LastOneIsViolated= false;
FileBlock *ndPtr= HashTable[Hash(Str)];
#ifdef DEBUG_Evaluate
if( ndPtr == NULL ){
cout << "No hash table yet?" << endl;
}else{
cout << "String into Evaluate: " << Str << endl;
cout << "Hash Key in Evaluate(): " << ndPtr->HashKey << endl;
}
// OutputHashTable(1003);
OutputFileBlock(ndPtr);
#endif
LastOneIsViolated= false;
while( ndPtr != NULL ){
#ifdef DEBUG_Evaluate
cout << "And The Original String is: " << Suck(ndPtr) << endl;
OutputFileBlock(ndPtr);
#endif
////////////////////////////////////////////////////
if( Compare( Str, Suck(ndPtr) ) )
LastOneIsViolated= true;
ndPtr= ndPtr->Next;
}
// Process violation
if( LastOneIsViolated )
Process(Str, CONCATENATE);
else
if( OutputMessage.size() != 0 )
Process(Str, PRINT);
}
//! No Repo
void HawkEye::Evl_Chunk(string PathAndName)
{
char Ch1, Ch2;
string Str;
fstream fin;
fin.open( PathAndName.c_str(), fstream::in );
if( !fin.good() ){
cout << "Error: Can't find input file in Evl_Chunk()!" << endl;
cin >> Str;
exit(1);
}
long PosCounter= 0;
Evl_Entry:
while( fin.read(&Ch1, sizeof(char)) ){
PosCounter++;
#ifdef DEBUG_Evl_Chunk
//cout << "Ch1: " << Ch1 << endl;
#endif
// If it can read two chars,check if it is double cut char
// If Ch1 is the last one,that is,there is no char left,skip this step
if( fin.read(&Ch2, sizeof(char)) ){
PosCounter++;
// Is Ch1+Ch2 compose a DoubleCutChar?
for(i= 0; i< DoubleCutChar.size(); i+= 2)
///////////////////////////////Simplify here///////////////////////////////////////
if( DoubleCutChar[i] == Ch1 && DoubleCutChar[i+1] == Ch2 ){
if( Str.size() == 0 ){
goto Evl_Entry;
}else{
#ifdef DEBUG_Evl_Chunk
cout << "Before Double Evaluation" << endl;
cout << Str << endl;
#endif
Evaluate(Str);
Str= "";
goto Evl_Entry;
}
}
#ifdef DEBUG_Evl_Chunk
// cout << "Ch2: " << Ch2 << endl;
#endif
fin.putback(Ch2);
PosCounter--;
#ifdef DEBUG_Evl_Chunk
//cout << "Into Single" << endl;
#endif
// Single
for(i= 0; i< SingleCutChar.size(); i++)
if( SingleCutChar[i] == Ch1 )
if( Str.size() == 0 )
goto Evl_Entry;
else{
#ifdef DEBUG_Evl_Chunk
cout << "Before Single Evaluation" << endl;
cout << Str << endl;
#endif
Evaluate(Str);
Str= "";
goto Evl_Entry;
}
Str+= Ch1;
goto Evl_Entry;
}
}
fin.close();
// The final check for string that ends with no puncture
if( Str.size() != 0 ){
Str+=Ch1;
#ifdef DEBUG_Evl_Chunk
cout << "Before Last Evaluation" << endl;
cout << "String: " << Str << endl;
cout << "PosCounter: " << PosCounter << endl;
cout << Str << endl;
#endif
Evaluate(Str);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -