📄 assrule.cpp
字号:
#include "AssRule.h"
bool UDlesser(Item* i1, Item* i2) {
return *i1 < *i2;
}
bool ruleLesser(Rule* r1, Rule* r2) {
return *r1 < *r2;
}
void AssRule::processFirstScan() throw (AppException) {
string sName;
string line;
int recNum = 0;
try {
ifstream inFile(dataFileName, ios::in);
if (inFile.is_open()) {
//cout << "opened file" << endl;
while (!inFile.eof()) {
getline(inFile, line);
recNum++;
if ((recNum >= testStartRec) && (recNum <= testEndRec)) {
continue;
}
//cout << "firstScan Range (" << testStartRec << "," << testEndRec << ") " << recNum << endl;
trim(line);
tokenizer<> tok(line);
for (tokenizer<>::iterator beg = tok.begin(); beg != tok.end(); ++beg) {
sName = *beg;
itemHeaderTable->processNewItem(sName);
//cout << sName << endl;
}
}
inFile.close();
//process minimum support. Keep only those items in itemHeaderTable
//which have the minimum support and delete the remaining.
//cout << *itemHeaderTable << endl;
itemHeaderTable->processMinimumSupport();
//cout << *itemHeaderTable << endl;
} else {
throw AppException(9, "unable to open Input file", __FILE__, __LINE__);
}
} catch (exception &e) {
throw AppException(9, string("abnormal error in processing Input file - ") + string(e.what()), __FILE__, __LINE__);
}
};
void AssRule::processSecondScan() throw (AppException) {
string sName;
string line;
int recNum = 0;
try {
ifstream inFile(dataFileName, ios::in);
if (inFile.is_open()) {
//cout << "opened file again" << endl;
while (!inFile.eof()) {
getline(inFile, line);
recNum++;
if ((recNum >= testStartRec) && (recNum <= testEndRec)) {
continue;
}
//cout << "secondScan Range (" << testStartRec << "," << testEndRec << ") " << recNum << endl;
trim(line);
vector<Item*>* itemVec = new vector<Item*>;
tokenizer<> tok(line);
int seq = 1000;
for (tokenizer<>::iterator beg = tok.begin(); beg != tok.end(); ++beg) {
sName = *beg;
int count = itemHeaderTable->getItemCount(sName);
if (count > 0) {
Item* item = new Item(sName, count, seq);
itemVec->push_back(item);
//cout << sName << endl;
}
seq--;
}
sort(itemVec->rbegin(), itemVec->rend(), UDlesser);
// remove duplicate items that might exist in each transaction
// as they have already been accounted for.
vector<Item*>::iterator iter;
iter = unique(itemVec->begin(), itemVec->end());
itemVec->erase(iter, itemVec->end());
for (iter = itemVec->begin(); iter != itemVec->end(); iter++) {
Item* item = *iter;
item->setCount(1);
}
// construct the tree for items read in the transaction
rootNode->insertTree(itemVec, itemHeaderTable);
}
inFile.close();
} else {
throw AppException(9, "unable to open Input file", __FILE__, __LINE__);
}
} catch (exception &e) {
throw AppException(9, string("abnormal error in processing Input file - ") + string(e.what()), __FILE__, __LINE__);
}
}
void AssRule::genSubsets(string str, vector<string>* subsetVec) {
int index = str.find_last_of(" ", string::npos);
if (index == string::npos) {
string nullStr("");
subsetVec->push_back(nullStr);
subsetVec->push_back(str);
} else {
string shortStr = str.substr(0, index);
string lastElem = str.substr(++index);
this->genSubsets(shortStr, subsetVec);
vector<string>* tempVec = new vector<string>;
vector<string>::iterator itr1;
for (itr1 = subsetVec->begin(); itr1 != subsetVec->end(); itr1++) {
string s = *itr1;
s.append(" "+lastElem);
trim(s);
tempVec->push_back(s);
}
vector<string>::iterator itr;
for (itr = tempVec->begin(); itr != tempVec->end(); itr++) {
subsetVec->push_back(*itr);
}
delete tempVec;
//cout << "subsetVecSize: " << subsetVec->size() << endl;
}
return;
}
string AssRule::getRule(string lString, string sString) {
set<string> setL;
set<string> setS;
set<string> setR;
ostringstream s;
string str;
tokenizer<> tok(lString);
for (tokenizer<>::iterator beg = tok.begin(); beg != tok.end(); ++beg) {
str = *beg;
setL.insert(str);
}
tokenizer<> tok1(sString);
for (tokenizer<>::iterator beg = tok1.begin(); beg != tok1.end(); ++beg) {
str = *beg;
setS.insert(str);
}
set_difference(setL.begin(), setL.end(), setS.begin(), setS.end(), inserter(setR, setR.begin()));
copy(setR.begin(), setR.end(), ostream_iterator<string>(s, " "));
str = s.str();
trim(str);
str = sString + ">" + str;
return str;
}
void AssRule::extractFrequentItemsets() {
string nullStr;
itemHeaderTable->processItemHeaderTable(rootNode, nullStr);
}
void AssRule::extractAssociationRules() {
fiHashMap::iterator iter;
fiHashMap::iterator curr;
int mapSize = fiMap->size();
long i = 0;
for (iter = fiMap->begin(); iter != fiMap->end(); iter++) {
i++;
FrequentItemset* fi = (*iter).second;
//cout << i << ") " << *fi << " mapsize: " << mapSize << "; ruleSize: " << ruleVec->size() << endl;
string str = fi->getName();
int count = fi->getCount();
if (str.find("Failure") == string::npos) {
continue;
}
vector<string>* subsetVec = new vector<string>;
this->genSubsets(str, subsetVec);
vector<string>::iterator itr;
subsetVec->erase(subsetVec->begin());
subsetVec->erase(subsetVec->end());
for (itr = subsetVec->begin(); itr != subsetVec->end(); itr++) {
string den = *itr;
int denCount = 9999999;
size_t h = string_hash(den);
curr = fiMap->find(h);
if (curr != fiMap->end()) {
fi = (*curr).second;
denCount = fi->getCount();
//cout << "FI found in fiMap: " << den << "; count: " << denCount << endl;
} else {
cout << "Severe Error -> FI cannot be found in fiMap: " << den << endl;
}
float conf = (float)count/denCount;
if (conf >= Config::getInstance()->getConfidence()) {
den = getRule(str, den);
int index = den.find(">", 0);
string thenStr = den.substr(index+1);
string ifStr = den.substr(0, index);
if ( (thenStr.find("Failure") == string::npos) || (ifStr.find("Failure") != string::npos) ) {
continue;
}
Rule* rule = new Rule(den, conf);
ruleVec->push_back(rule);
//cout << *rule << endl;
//cout << setiosflags(ios::fixed | ios::showpoint) << setprecision(4);
//cout << conf << " : " << den << endl;
}
}
delete subsetVec;
}
sort(ruleVec->rbegin(), ruleVec->rend(), ruleLesser);
//*
vector<Rule*>::iterator itr;
for (itr = ruleVec->begin(); itr != ruleVec->end(); itr++) {
//cout << setiosflags(ios::fixed | ios::showpoint) << setprecision(4);
cout << **itr << endl;
}
//*/
}
void AssRule::checkPrediction(string inputStr) {
string str;
bool applicable = true;
bool correctPrediction = true;
map<const size_t, string> strMap;
map<const size_t, string>::iterator curr;
tokenizer<> tok(inputStr);
for (tokenizer<>::iterator beg = tok.begin(); beg != tok.end(); ++beg) {
str = *beg;
size_t h = string_hash(str);
strMap[h] = str;
}
/*
* If it is a failure event without any precursor events then
* we cannot apply any of the association rules.
*/
if (strMap.size() == 1) {
sinRec++;
} else {
vector<Rule*>::iterator itr;
vector<string>::iterator i;
for (itr = ruleVec->begin(); itr != ruleVec->end(); itr++) {
applicable = true;
Rule* r = *itr;
vector<string> ifVec = r->getIfVector();
for (i = ifVec.begin(); i != ifVec.end(); i++) {
size_t h = string_hash(*i);
curr = strMap.find(h);
if (curr == strMap.end()) {
applicable = false;
break;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -