📄 cet.cpp
字号:
}
//remove the item from the end of currentPrefix
currentPrefix.pop_back();
myStartPosition++;
}
}//end for (step 3)
return maxNewSupport;
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * void cleanNode ()Decription: a helper function for recursively removing closed itemsetsin a subtree from the HashtableNote: assuming currentPrefix contains the item at "node"* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */void CET::cleanNode(TreeNode & node)
{
//step 1, if I am closed, remove me from the hash table
if ( node.isClosed ) {
pair<HashClosed::iterator, HashClosed::iterator> p =
closedItemsets.equal_range(node.myTidSum);
for ( HashClosed::iterator pos = p.first; pos != p.second; ++pos ) {
if ( pos->second.first == (node.mySupport)
&& pos->second.second == currentPrefix ) {
closedItemsets.erase(pos);
break;
}
}
node.isClosed = false;
}
//step 2, if I have children, then recursively clean my children
if ( node.myChildren.size() != 0 ) {
for ( Family::iterator pos1 = node.myChildren.begin();
pos1 != node.myChildren.end(); ++pos1 )
{
currentPrefix.push_back(pos1->first);
cleanNode(pos1->second);
currentPrefix.pop_back();
}
}
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * void deletion ()Decription: delete an itemset from the CET * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
void CET::deletion(const int tid,
const vector<unsigned short> & itemset,
const FP & FPTree)
{
vector<bool> parentIsNew(itemset.size(), false);
deleteHelp(tid, itemset, parentIsNew, 0, FPTree, CETRoot);
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * void deleteHelp ()Decription: a helper function for deletion() * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
void CET::deleteHelp(const int tid,
const vector<unsigned short> & parentItemset,
const vector<bool> & parentIsNew,
const unsigned short startPosition,
const FP & FPTree,
TreeNode & node)
{
vector<unsigned short> myItemset;
vector<bool> myIsNew;
vector<bool> myNeedErase; //check if this node should be erased
//according to if it is infrequent at parent's level
bool needCount = false;
bool existNewInFrequent = false;
set<unsigned short> itemsToErase;
////debug //cout << "current prefix: "; //for ( unsigned short s = 0; s < currentPrefix.size(); s++ ) // cout << currentPrefix[s]; //cout << endl; //cout << "items: "; //for ( unsigned short s = 0; s < parentItemset.size(); s++ ) // cout << parentItemset[s]; //cout << endl; //cout << "isNew: "; //for ( unsigned short s = 0; s < parentIsNew.size(); s++ ) // cout << parentIsNew[s]; //cout << endl;
//step 1, remove the parentItemset components that were infrequent in the above level
//and copy the itemset to pass down to myItemset.
//myIsNew records if an item becomes infrequent at THIS level
for ( unsigned short s = startPosition; s < parentItemset.size(); s++ ) {
if ( node.myChildren[parentItemset[s]].mySupport == node.childrenSupport )
needCount = true; //need to recount the support of node's children
if ( parentIsNew[s] ) {//just became infrequent in PARENT's level
if ( node.myChildren[parentItemset[s]].mySupport >= SUPPORT ) {
myItemset.push_back(parentItemset[s]);
myIsNew.push_back(true);
myNeedErase.push_back(true);
itemsToErase.insert(parentItemset[s]);
existNewInFrequent = true;
currentPrefix.push_back(parentItemset[s]);
cleanNode(node.myChildren[parentItemset[s]]);
currentPrefix.pop_back();
}
else {
node.myChildren.erase(parentItemset[s]);
}
//actually, there is no difference to erase here or to erase in step 3
}
else {//update the support and tidsum of corresponding items
node.myChildren[parentItemset[s]].mySupport --;
node.myChildren[parentItemset[s]].myTidSum -= tid;
if ( !node.myChildren[parentItemset[s]].isInfrequent ) //even if unpromising
{
myItemset.push_back(parentItemset[s]);
myIsNew.push_back( node.myChildren[parentItemset[s]].mySupport < SUPPORT );
myNeedErase.push_back(false);
if ( node.myChildren[parentItemset[s]].mySupport < SUPPORT ) {
node.myChildren[parentItemset[s]].isInfrequent = true; //temporary
//later, recursive checking will handle it
existNewInFrequent = true;
}
}
}
}
//step 2, recursively update the children of each family
unsigned short myStartPosition = 0;
for ( Family::iterator pos1 = node.myChildren.begin();
pos1 != node.myChildren.end() && myStartPosition < myItemset.size(); ++pos1 )
{
if ( pos1->first > myItemset.back() ) break; //done! (not possible)
//case 1, an old node, not in the myItemset,
//need to proprogate the update down ONE LEVEL
else if ( pos1->first < myItemset[myStartPosition] ) {
if ( pos1->second.isInfrequent || pos1->second.isUnpromising ||
!existNewInFrequent )
{
continue;
}
for ( int j = myStartPosition; j < myItemset.size(); j++ ) {
if ( myIsNew[j] ) {
pos1->second.myChildren.erase(myItemset[j]);
//this children could not be frequent before: it is not
//in the deleted itemset
}
}
}
//case 2, an old node, in the itemset, still frequent after deletion
//need to delete items down
//fact: pos1->first == myItemset[myStartPosition]
else if ( !myIsNew[myStartPosition] ) {
if ( pos1->second.isUnpromising ) {//if originally unpromising
myStartPosition++; //still unpromising
continue;
}
else if ( pos1->second.isClosed ) {//if originally closed
//add the item to the end of currentPrefix
currentPrefix.push_back(pos1->first);
//first, remove old value from the hash table, and set me to be non-closed
pair<HashClosed::iterator, HashClosed::iterator> p =
closedItemsets.equal_range(pos1->second.myTidSum+tid); //old value
for ( HashClosed::iterator pos = p.first; pos != p.second; ++pos ) {
if ( pos->second.first == (pos1->second.mySupport+1)
&& pos->second.second == currentPrefix ) {
closedItemsets.erase(pos);
break;
}
}
pos1->second.isClosed = false;
//second, check left pruning
bool stillClosed = true;
p = closedItemsets.equal_range(pos1->second.myTidSum);
for ( HashClosed::const_iterator pos = p.first; pos != p.second; ++pos ) {
if ( pos->second.first == pos1->second.mySupport
&& isSubset(pos->second.second, currentPrefix)
&& !isPrefix(currentPrefix, pos->second.second) ) {
//notice: avoid my descendents, i.e., my support and tidsum
//have been updated, but not my children
pos1->second.isUnpromising = true;
stillClosed = false;
break;
}
}
if ( !stillClosed ) { //if becomes unpromising, prune
cleanNode(pos1->second);
pos1->second.myChildren.erase(pos1->second.myChildren.begin(),
pos1->second.myChildren.end());
}
else { //else, pass the update down
if ( myStartPosition+1 < myItemset.size() ) {
deleteHelp(tid,
myItemset,
myIsNew,
myStartPosition+1,
FPTree,
pos1->second);
}
if ( pos1->second.childrenSupport < pos1->second.mySupport ) {
pos1->second.isClosed = true;
closedItemsets.insert(make_pair(pos1->second.myTidSum,
make_pair(pos1->second.mySupport,currentPrefix)));
}
}
currentPrefix.pop_back();
}
else {//if originally not closed, not isInfrequent, not isUnpromising
//add the item to the end of currentPrefix
currentPrefix.push_back(pos1->first);
if ( myStartPosition+1 < myItemset.size() ) {
deleteHelp(tid,
myItemset,
myIsNew,
myStartPosition+1,
FPTree,
pos1->second);
}
if ( pos1->second.childrenSupport < pos1->second.mySupport ) {//impossible
pos1->second.isClosed = true;
closedItemsets.insert(make_pair(pos1->second.myTidSum,
make_pair(pos1->second.mySupport,currentPrefix)));
}
currentPrefix.pop_back();
}
myStartPosition++;
}
//case 3, a node in the itemset, which just becomes infrequent
//remove all children, if there are any
else {
if ( myNeedErase[myStartPosition] ) {//became infrequent at parent's level
//do nothing
}
else if ( pos1->second.isUnpromising ) pos1->second.isUnpromising = false;
else {
//add the item to the end of currentPrefix
currentPrefix.push_back(pos1->first);
//recover the OLD support and tidSum, so that can be found
//in the hash table
pos1->second.mySupport ++;
pos1->second.myTidSum += tid;
cleanNode(pos1->second);
pos1->second.mySupport --;
pos1->second.myTidSum -= tid;
pos1->second.myChildren.erase(pos1->second.myChildren.begin(),
pos1->second.myChildren.end());
currentPrefix.pop_back();
pos1->second.isInfrequent = true;
}
myStartPosition++;
}
}//end for
//step 3, if exist nodes to be removed because it became infrequent at parent's level
if ( itemsToErase.size() != 0 ) {
for ( set<unsigned short>::iterator pos9 = itemsToErase.begin();
pos9 != itemsToErase.end(); ++pos9 )
{
node.myChildren.erase(*pos9);
}
}
//step 2, update node.childrenSupport, if necessary
if ( needCount ) {
node.childrenSupport = 0;
for ( Family::iterator pos = node.myChildren.begin();
pos != node.myChildren.end(); ++pos )
{
if ( node.childrenSupport < pos->second.mySupport )
node.childrenSupport = pos->second.mySupport;
}
}
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * void printMe ()Decription: debugging function * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */void CET::printMe(TreeNode & node, unsigned short level)
{
for ( unsigned short s = 0; s < currentPrefix.size(); s++ ) cout << currentPrefix[s]; cout << ": ";
if ( node.myChildren.size() != 0 ) {
Family::iterator pos;
for ( pos = node.myChildren.begin(); pos != node.myChildren.end(); ++pos ) {
cout << "\"" << pos->first << ' ' << pos->second.mySupport << ' ' << pos->second.myTidSum << ' ' << (pos->second.isInfrequent? 't' : 'f') << ' ' << (pos->second.isUnpromising? 't' : 'f') << ' ' << (pos->second.isClosed? 't' : 'f') << "\" ";
}
cout << endl;
for ( Family::iterator pos1 = node.myChildren.begin(); pos1 != node.myChildren.end(); ++pos1) { currentPrefix.push_back(pos1->first); printMe(pos1->second, level + 1);
currentPrefix.pop_back();
}
}
else
cout << endl;
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * void printHash ()Decription: debugging function * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */void CET::printHash()
{
for ( HashClosed::iterator pos = closedItemsets.begin(); pos != closedItemsets.end(); ++pos ) { cout << pos->first << " "; cout << pos->second.first << ":"; for ( int i = 0; i < pos->second.second.size(); i++ ) { cout << " " << pos->second.second[i]; } cout << endl; }}
///* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //void checkMe ()////Decription: check if the closed itemsets are correct//* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *///void CET::checkMe()
//{
// HashClosed::iterator pos1;
// for ( pos1 = closedItemsets.begin(); pos1 != closedItemsets.end(); ++pos1 ) {
// pair<HashClosed::iterator, HashClosed::iterator> p =
// closedItemsets.equal_range(pos1->first);
//
// for ( HashClosed::const_iterator pos = p.first; pos != p.second; ++pos ) {
// if ( pos == pos1 ) continue;
//
// else if ( pos->second.first == pos1->second.first
// && isSubset(pos->second.second, pos1->second.second) )
// {
// cout << "error: " << endl;
// cout << pos1->first << " " << pos1->second.first << " ";
// for ( int i = 0; i < pos1->second.second.size(); i++ )
// cout << pos1->second.second[i] << "-";
// cout << endl;
// cout << pos->first << " " << pos->second.first << " ";
// for ( int i = 0; i < pos->second.second.size(); i++ )
// cout << pos->second.second[i] << "-";
// cout << endl;
// }
// }
// }
//}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -