📄 itemheadertable.cpp
字号:
#include "ItemHeaderTable.h"
#include "Node.h"
#include "Config.h"
bool UDgreater(ItemInfo* i1, ItemInfo* i2) {
return *i1 > *i2;
}
bool UDlesser(ItemInfo* i1, ItemInfo* i2) {
return *i1 < *i2;
}
bool ItemGreater(Item* i1, Item* i2) {
return *i1 > *i2;
}
ItemHeaderTable::ItemHeaderTable(fiHashMap* fiMap) {
this->fiMap = fiMap;
}
ItemHeaderTable::~ItemHeaderTable() {
if (itemInfoMap.size() > 0) {
hashMap::iterator iter;
for (iter = itemInfoMap.begin(); iter != itemInfoMap.end(); iter++) {
delete (*iter).second;
}
delete itemInfoVec;
}
}
void ItemHeaderTable::processNewNode(Node* node) throw (AppException) {
size_t h = string_hash(node->getName());
hashMap::iterator curr = itemInfoMap.find(h);
ItemInfo* itemInfo;
if (curr != itemInfoMap.end()) {
itemInfo = (*curr).second;
Node* startNode = itemInfo->getStartNode();
if (startNode == 0) {
itemInfo->setStartNode(node);
itemInfo->setLastNode(node);
} else {
itemInfo->getLastNode()->setLinkNode(node);
itemInfo->setLastNode(node);
}
} else {
string msg = "ItemHeaderTable does not have an entry for the current node " + node->getName();
throw AppException(2, msg, __FILE__, __LINE__);
}
}
void ItemHeaderTable::processNewItem(string name) {
size_t h = string_hash(name);
hashMap::iterator curr = itemInfoMap.find(h);
ItemInfo* itemInfo;
if (curr != itemInfoMap.end()) {
itemInfo = (*curr).second;
itemInfo->incrementCount();
} else {
itemInfo = new ItemInfo(name);
itemInfoMap[h] = itemInfo;
}
}
void ItemHeaderTable::processNewItem(Item* item) {
size_t h = string_hash(item->getName());
hashMap::iterator curr = itemInfoMap.find(h);
ItemInfo* itemInfo;
if (curr != itemInfoMap.end()) {
itemInfo = (*curr).second;
itemInfo->incrementCount(item->getCount());
//cout << "already in IHT ";
} else {
itemInfo = new ItemInfo(item->getName(), item->getCount());
itemInfoMap[h] = itemInfo;
//cout << "new in IHT ";
}
//cout << *itemInfo << endl;
}
int ItemHeaderTable::getItemCount(string name) {
size_t h = string_hash(name);
hashMap::iterator curr = itemInfoMap.find(h);
ItemInfo* itemInfo;
if (curr != itemInfoMap.end()) {
itemInfo = (*curr).second;
return itemInfo->getCount();
} else {
return 0;
}
}
void ItemHeaderTable::processMinimumSupport() {
itemInfoVec = new vector<ItemInfo*>;
hashMap::iterator iter;
vector<ItemInfo*>::iterator vecIter;
int minSupport = Config::getInstance()->getSupport();
/*
map<const size_t, int, ltstr> testMap;
map<const size_t, int, ltstr>::iterator testIter;
hash<int> int_hash;
for (int i = 0; i < 10; i++) {
size_t h = int_hash(i);
testMap[h] = i;
}
for (testIter = testMap.begin(); testIter != testMap.end();) {
testMap.erase(testIter++);
}
*/
for (iter = itemInfoMap.begin(); iter != itemInfoMap.end(); iter++) {
ItemInfo* itemInfo = (*iter).second;
//cout << "Evaluate " << *itemInfo << endl;
if (itemInfo->getCount() < minSupport) {
delete itemInfo;
//itemInfoMap.erase(iter++);
//itemInfo->resetCount();
//char ch; cin.get(ch);
} else {
this->itemInfoVec->push_back(itemInfo);
}
}
itemInfoMap.clear();
sort(this->itemInfoVec->begin(), this->itemInfoVec->end(), UDgreater);
for (vecIter = this->itemInfoVec->begin(); vecIter != this->itemInfoVec->end(); vecIter++) {
ItemInfo* itemInfo = *vecIter;
//cout << *itemInfo << endl;
//char ch; cin.get(ch);
size_t h = string_hash(itemInfo->getName());
itemInfoMap[h] = itemInfo;
}
}
void ItemHeaderTable::pushItemVector(Node* node, vector<vector<Item*>*>* branchVector) {
vector<Item*>* itemVector;
int count = node->getCount();
int seq = 0;
node = node->getParentNode();
if ( (node != 0) && (node->getName() != "root") ) {
itemVector = new vector<Item*>;
} else {
//cout << "no parent found" << endl;
return;
}
while ( (node != 0) && (node->getName() != "root") ) {
//cout << "curr node: " << node->getName() << endl;
Item* item = new Item(node->getName(), count, seq);
itemVector->push_back(item);
node = node->getParentNode();
seq++;
}
sort(itemVector->begin(), itemVector->end(), ItemGreater);
branchVector->push_back(itemVector);
}
void ItemHeaderTable::processItemHeaderTable(Node* node, string name) throw (AppException) {
vector<vector<Item*>*>* branchVector = new vector<vector<Item*>*>;
vector<ItemInfo*>::reverse_iterator vecIter;
if (node->hasSingleBranchOnly()) {
vecIter = itemInfoVec->rbegin();
ItemInfo* itemInfo = *vecIter;
Node* currNode = itemInfo->getStartNode();
Node* leafNode = currNode->getLeafNode();
if (leafNode == 0) {
throw AppException(21, "LeafNode for the tree cannot be determined", __FILE__, __LINE__);
}
currNode = leafNode;
vector<Item*>* itemVector = new vector<Item*>;
int count = currNode->getCount();
int seq = 0;
while ( (currNode != 0) && (currNode->getName() != "root") ) {
//cout << "curr node: " << *currNode << endl;
Item* item = new Item(currNode->getName(), count, seq);
itemVector->push_back(item);
currNode = currNode->getParentNode();
seq++;
}
sort(itemVector->begin(), itemVector->end(), ItemGreater);
this->extractFrequentItemsets(name, itemVector);
delete node;
vector<Item*>::iterator itr;
for (itr = itemVector->begin(); itr != itemVector->end(); itr++) {
delete *itr;
}
delete itemVector;
} else {
for (vecIter = itemInfoVec->rbegin(); vecIter != itemInfoVec->rend(); vecIter++) {
ItemInfo* itemInfo = *vecIter;
string fiName = itemInfo->getName() + " " + name;
FrequentItemset* fi = new FrequentItemset(fiName, itemInfo->getCount());
string hashStr = fiName;
trim(hashStr);
size_t h = string_hash(hashStr);
(*fiMap)[h] = fi;
//cout << fiName << ": " << itemInfo->getCount() << endl;
Node* node = itemInfo->getStartNode();
while ( (node != 0) && (node->getName() != "root") ) {
//cout << *node << endl;
pushItemVector(node, branchVector);
node = node->getLinkNode();
}
this->processConditionalPattern(fiName, branchVector);
branchVector->clear();
}
}
delete branchVector;
}
void ItemHeaderTable::processConditionalPattern(string name, vector<vector<Item*>*>* branchVector) {
if (branchVector->size() == 0) {
return;
} else {
ItemHeaderTable* iht = new ItemHeaderTable(fiMap);
Node* rootNode = new Node("root", 0, 1);
vector<vector<Item*>*>::iterator branchIter;
for (branchIter = branchVector->begin(); branchIter != branchVector->end(); branchIter++) {
vector<Item*>* itemVector = *branchIter;
iht->processItemVector(itemVector);
}
//cout << "iht after processItemVector" << endl;
//cout << *iht << endl;
iht->processMinimumSupport();
//cout << "iht after processMinimumSupport" << endl;
//cout << *iht << endl;
if (iht->getSize() < 1) {
return;
}
for (branchIter = branchVector->begin(); branchIter != branchVector->end(); branchIter++) {
vector<Item*>* itemVector = *branchIter;
iht->frequencyTrimItemVector(itemVector);
//cout << "itemVector size: " << itemVector->size() << endl;
rootNode->insertTree(itemVector, iht);
delete itemVector;
}
iht->processItemHeaderTable(rootNode, name);
delete iht;
//cout << "finished processConditionalPattern" << endl;
}
}
void ItemHeaderTable::processItemVector(vector<Item*>* itemVector) {
//cout << "itemVec size: " << itemVector->size() << endl;
vector<Item*>::iterator iter;
for (iter = itemVector->begin(); iter != itemVector->end(); iter++) {
Item* item = *iter;
//cout << *item << endl;
this->processNewItem(item);
}
}
void ItemHeaderTable::frequencyTrimItemVector(vector<Item*>* itemVector) {
vector<Item*>::iterator iter;
for (iter = itemVector->begin(); iter != itemVector->end(); iter++) {
Item* item = *iter;
int count = this->getItemCount(item->getName());
if (count == 0) {
delete item;
itemVector->erase(iter);
iter--;
}
}
}
void ItemHeaderTable::extractFrequentItemsets(string name, vector<Item*>* itemVector) {
if (itemVector->size() < 1) {
return;
}
vector<Item*>::reverse_iterator revIter;
vector<Item*>::iterator iter;
for (revIter = itemVector->rbegin(); revIter != itemVector->rend(); revIter++) {
Item* item = *revIter;
string fiName = item->getName() + " " + name;
FrequentItemset* fi = new FrequentItemset(fiName, item->getCount());
string hashStr = fiName;
trim(hashStr);
size_t h = string_hash(hashStr);
(*fiMap)[h] = fi;
//cout << fiName << ": " << item->getCount() << endl;
vector<Item*>* newItemVec = new vector<Item*>;
for (iter = itemVector->begin(); *iter != *revIter; iter++) {
Item* item = *iter;
newItemVec->push_back(item);
}
this->extractFrequentItemsets(fiName, newItemVec);
delete newItemVec;
}
}
int ItemHeaderTable::getSize() const {
return itemInfoMap.size();
}
hashMap ItemHeaderTable::getItemInfoMap() const {
return itemInfoMap;
}
ostream& operator<<(ostream& os, const ItemHeaderTable& iht) {
//os << " Size: " << iht.getSize();
hashMap itemInfoMap = iht.getItemInfoMap();
hashMap::iterator iter;
for (iter = itemInfoMap.begin(); iter != itemInfoMap.end(); iter++) {
ItemInfo* itemInfo = (*iter).second;
os << "Name: " << itemInfo->getName() << " Count: " << itemInfo->getCount() << endl;
}
return os;
};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -