⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 assrule.cpp

📁 用data miming技术进行false prediction
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "AssRule.h"

bool UDlesser(Item* i1, Item* i2) {
    return *i1 < *i2;
}

bool ruleLesser(Rule* r1, Rule* r2) {
    return *r1 < *r2;
}

void AssRule::processFirstScan() throw (AppException) {
    string sName;
    string line;
    int recNum = 0;

    try {
        ifstream inFile(dataFileName, ios::in);

        if (inFile.is_open()) {
            //cout << "opened file" << endl;

            while (!inFile.eof()) {
                getline(inFile, line);
                recNum++;
                if ((recNum >= testStartRec) && (recNum <= testEndRec)) {
                    continue;
                }
                //cout << "firstScan Range (" << testStartRec << "," << testEndRec << ") " << recNum << endl;

                trim(line);
                tokenizer<> tok(line);

                for (tokenizer<>::iterator beg = tok.begin(); beg != tok.end(); ++beg) {
                    sName = *beg;
                    itemHeaderTable->processNewItem(sName);
                    //cout << sName << endl;
                }
            }

            inFile.close();
            //process minimum support. Keep only those items in itemHeaderTable
            //which have the minimum support and delete the remaining.
            //cout << *itemHeaderTable << endl;
            itemHeaderTable->processMinimumSupport();
            //cout << *itemHeaderTable << endl;
        } else {
            throw AppException(9, "unable to open Input file", __FILE__, __LINE__);
        }
    } catch (exception &e) {
        throw AppException(9, string("abnormal error in processing Input file - ") + string(e.what()), __FILE__, __LINE__);
    }
};

void AssRule::processSecondScan() throw (AppException) {
    string sName;
    string line;
    int recNum = 0;

    try {
        ifstream inFile(dataFileName, ios::in);

        if (inFile.is_open()) {
            //cout << "opened file again" << endl;

            while (!inFile.eof()) {
                getline(inFile, line);
                recNum++;
                if ((recNum >= testStartRec) && (recNum <= testEndRec)) {
                    continue;
                }
                //cout << "secondScan Range (" << testStartRec << "," << testEndRec << ") " << recNum << endl;

                trim(line);
                vector<Item*>* itemVec = new vector<Item*>;
                tokenizer<> tok(line);
                int seq = 1000;

                for (tokenizer<>::iterator beg = tok.begin(); beg != tok.end(); ++beg) {
                    sName = *beg;
                    int count = itemHeaderTable->getItemCount(sName);
                    if (count > 0) {
                        Item* item = new Item(sName, count, seq);
                        itemVec->push_back(item);
                        //cout << sName << endl;
                    }
                    seq--;
                }

                sort(itemVec->rbegin(), itemVec->rend(), UDlesser);

                // remove duplicate items that might exist in each transaction
                // as they have already been accounted for.

                vector<Item*>::iterator iter;
                iter = unique(itemVec->begin(), itemVec->end());
                itemVec->erase(iter, itemVec->end());

                for (iter = itemVec->begin(); iter != itemVec->end(); iter++) {
                    Item* item = *iter;
                    item->setCount(1);
                }

                // construct the tree for items read in the transaction
                rootNode->insertTree(itemVec, itemHeaderTable);
            }

            inFile.close();

        } else {
            throw AppException(9, "unable to open Input file", __FILE__, __LINE__);
        }
    } catch (exception &e) {
        throw AppException(9, string("abnormal error in processing Input file - ") + string(e.what()), __FILE__, __LINE__);
    }
}

void AssRule::genSubsets(string str, vector<string>* subsetVec) {
    int index = str.find_last_of(" ", string::npos);
    if (index == string::npos) {
        string nullStr("");
        subsetVec->push_back(nullStr);
        subsetVec->push_back(str);
    } else {
        string shortStr = str.substr(0, index);
        string lastElem = str.substr(++index);
        this->genSubsets(shortStr, subsetVec);
        vector<string>* tempVec = new vector<string>;
        vector<string>::iterator itr1;
        for (itr1 = subsetVec->begin(); itr1 != subsetVec->end(); itr1++) {
            string s = *itr1;
            s.append(" "+lastElem);
            trim(s);
            tempVec->push_back(s);
        }
        vector<string>::iterator itr;
        for (itr = tempVec->begin(); itr != tempVec->end(); itr++) {
            subsetVec->push_back(*itr);
        }
        delete tempVec;
        //cout << "subsetVecSize: " << subsetVec->size() << endl;
    }
    return;
}

string AssRule::getRule(string lString, string sString) {
    set<string> setL;
	set<string> setS;
	set<string> setR;
	ostringstream s;
    string str;

    tokenizer<> tok(lString);
    for (tokenizer<>::iterator beg = tok.begin(); beg != tok.end(); ++beg) {
        str = *beg;
        setL.insert(str);
    }

    tokenizer<> tok1(sString);
    for (tokenizer<>::iterator beg = tok1.begin(); beg != tok1.end(); ++beg) {
        str = *beg;
        setS.insert(str);
    }

	set_difference(setL.begin(), setL.end(), setS.begin(), setS.end(), inserter(setR, setR.begin()));
	copy(setR.begin(), setR.end(), ostream_iterator<string>(s, " "));
	str = s.str();
	trim(str);
	str = sString + ">" + str;
	return str;
}

void AssRule::extractFrequentItemsets() {
    string nullStr;
    itemHeaderTable->processItemHeaderTable(rootNode, nullStr);
}

void AssRule::extractAssociationRules() {
    fiHashMap::iterator iter;
    fiHashMap::iterator curr;
    int mapSize = fiMap->size();
    long i = 0;

    for (iter = fiMap->begin(); iter != fiMap->end(); iter++) {
        i++;
        FrequentItemset* fi = (*iter).second;
        //cout << i << ") " << *fi << " mapsize: " << mapSize << "; ruleSize: " << ruleVec->size() << endl;

        string str = fi->getName();
        int count = fi->getCount();

        if (str.find("Failure") == string::npos) {
            continue;
        }

        vector<string>* subsetVec = new vector<string>;
        this->genSubsets(str, subsetVec);

        vector<string>::iterator itr;
        subsetVec->erase(subsetVec->begin());
        subsetVec->erase(subsetVec->end());

        for (itr = subsetVec->begin(); itr != subsetVec->end(); itr++) {
            string den = *itr;
            int denCount = 9999999;
            size_t h = string_hash(den);
            curr = fiMap->find(h);
            if (curr != fiMap->end()) {
                fi = (*curr).second;
                denCount = fi->getCount();
                //cout << "FI found in fiMap: " << den << "; count: " << denCount << endl;
            } else {
                cout << "Severe Error -> FI cannot be found in fiMap: " << den << endl;
            }

            float conf = (float)count/denCount;
            if (conf >= Config::getInstance()->getConfidence()) {
                den = getRule(str, den);

                int index = den.find(">", 0);
                string thenStr = den.substr(index+1);
                string ifStr = den.substr(0, index);

                if ( (thenStr.find("Failure") == string::npos) || (ifStr.find("Failure") != string::npos) ) {
                    continue;
                }

                Rule* rule = new Rule(den, conf);
                ruleVec->push_back(rule);
                //cout << *rule << endl;
                //cout << setiosflags(ios::fixed | ios::showpoint) << setprecision(4);
                //cout << conf << " : " << den << endl;
            }
        }
        delete subsetVec;
    }

    sort(ruleVec->rbegin(), ruleVec->rend(), ruleLesser);
    //*
    vector<Rule*>::iterator itr;
    for (itr = ruleVec->begin(); itr != ruleVec->end(); itr++) {
        //cout << setiosflags(ios::fixed | ios::showpoint) << setprecision(4);
        cout << **itr << endl;
    }
    //*/
}

void AssRule::checkPrediction(string inputStr) {
    string str;
    bool applicable = true;
    bool correctPrediction = true;
    map<const size_t, string> strMap;
    map<const size_t, string>::iterator curr;

    tokenizer<> tok(inputStr);
    for (tokenizer<>::iterator beg = tok.begin(); beg != tok.end(); ++beg) {
        str = *beg;
        size_t h = string_hash(str);
        strMap[h] = str;
    }

    /*
     *  If it is a failure event without any precursor events then
     *  we cannot apply any of the association rules.
     */
    if (strMap.size() == 1) {
        sinRec++;
    } else {
        vector<Rule*>::iterator itr;
        vector<string>::iterator i;
        for (itr = ruleVec->begin(); itr != ruleVec->end(); itr++) {
            applicable = true;
            Rule* r = *itr;
            vector<string> ifVec = r->getIfVector();

            for (i = ifVec.begin(); i != ifVec.end(); i++) {
                size_t h = string_hash(*i);
                curr = strMap.find(h);
                if (curr == strMap.end()) {
                    applicable = false;
                    break;
                }
            }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -