📄 dffast.cc
字号:
while ( init_items_frequency[left_cursor] >= minsup && left_cursor<number_freq_items ) left_cursor++; while ( init_items_frequency[right_cursor] < minsup ) right_cursor--; if ( left_cursor < right_cursor ) { init_items_frequency[left_cursor] = init_items_frequency[right_cursor]; items_numbers[left_cursor] = right_cursor; }//if right_cursor--; }//while ranking = new short[itemrange]; for ( k = 0; k < itemrange; k++ ) ranking[k] = -1; itemsorder = new int[number_freq_items]; items_frequency = new int[number_freq_items]; int maxfrequency, bestitem = 0, bestindex = 0; int *used; used = new int[number_freq_items]; for ( i = 0; i < number_freq_items; i++ ) used[i] = 0; for ( int rank = number_freq_items-1; rank >= 0; rank-- ) { maxfrequency = -1; for ( j = 0; j < number_freq_items; j++ ) if ( used[j] == 0 && init_items_frequency[j] > maxfrequency ) { bestindex = j; bestitem = items_numbers[j]; maxfrequency = init_items_frequency[j]; }//if itemsorder[rank] = bestitem+min_itemnr; ranking[bestitem] = rank; items_frequency[rank] = maxfrequency; used[bestindex] = 1; }//for}//initialcounts::initialsort// constructordata_array::data_array (char *inputdata){ int v = 1; vector[0] = (char) v; for ( int k = 1; k < 8; k++ ) { v = 2*v; vector[k] = (char) v; }//for supervector = new char[initialcounts::number_freq_items]; for ( int col = 0; col < initialcounts::number_freq_items; col++ ) supervector[col] = vector[col & 7]; dataset = new transaction[initialcounts::number_transactions]; inputread (inputdata);}//data_array::data_array// reads the entire datafile from file inputdata and// puts in into a 2-dimensional character array (eight 0/1's per byte)// reads whole file (for the fourth time)!void data_array::inputread (char *inputdata){ int pos, item, items_count; short newitemnr; char c; ifstream fin (inputdata); if ( ! fin ) cout << "No such filename" << endl; int arraywidth = (int)((initialcounts::number_freq_items-1)/8) + 1; int rowcounter = 0; do { next_transaction = new char[arraywidth]; for ( int column = 0; column < arraywidth; column++ ) next_transaction[column] = '\000'; items_count = 0; do { fin.get (c); item = 0; pos = 0; while ( ( c >= '0' ) && ( c <= '9' ) && ! fin.eof ( ) ) { item = 10*item + (int)(c) -(int)('0'); pos++; fin.get (c); }//while if ( pos && initialcounts::ranking[item-initialcounts::min_itemnr] >= 0 ) { newitemnr = initialcounts::ranking[item-initialcounts::min_itemnr]; next_transaction[newitemnr >> 3] |= supervector[newitemnr]; items_count++; }//if } while ( c != '\n' && ! fin.eof ( ) ); if ( items_count >= 2 ) dataset[rowcounter++] = next_transaction; else delete [ ] next_transaction; } while( ! fin.eof ( ) ); fin.close ( );}//data_array::inputread// constructortrie::trie (initialcounts & initialdata, data_array & datagrid){ triesize = initialcounts::number_freq_items; root = (bucket*) calloc (triesize, sizeof (bucket)); for ( int itemnr = 0; itemnr < triesize; itemnr++ ) { root[itemnr].count = minsup; // to avoid problems with short's; // this particular count-field is never used anymore root[itemnr].itemvalue = itemnr; root[itemnr].number_followers = 0; root[itemnr].next = NULL; }//for}//trie::trie// build trie out of transactionsvoid trie::build_up ( ){ int upperbound = initialcounts::number_freq_items-2; bucket *root2; short numberfol; for ( int k = upperbound; k >= 0; k-- ) { root2 = root+k+1; numberfol = triesize-k-1; makeaux0 (root2, numberfol); for ( therow = 0; therow < initialcounts::number_transactions; therow++ ) { globpointer = dataset[therow]; if ( inspect (k) ) count (root2, numberfol); }//for extend (k); }//for}//trie::build_up// extend trie at position kvoid trie::extend (int k){ int i; int temp = 0; for ( i = k+1; i < triesize; i++ ) if ( root[i].aux >= minsup ) temp++; root[k].next = (bucket*) calloc (temp, sizeof (bucket)); root[k].number_followers = temp; copying (root[k].next, root+k+1, triesize-1-k);}//trie::extend// make all aux-fields 0void trie::makeaux0 (bucket *root, int number) { for ( int j = 0; j < number; j++ ) { root[j].aux = 0; if ( root[j].number_followers > 0 ) makeaux0 (root[j].next, root[j].number_followers); }//for}//trie::makeaux0// copy trie structure from q to pvoid trie::copying (struct bucket * p, struct bucket *q, int number_q_buckets){ short temp; // how many buckets does p need? short i; for ( int source = 0; source < number_q_buckets; source++ ) { if ( q->aux >= minsup ) { p->count = q->aux; p->itemvalue = q->itemvalue; temp = 0; for ( i = 0; i < q->number_followers; i++ ) if ( q->next[i].aux >= minsup ) temp++; p->number_followers = temp; if ( temp > 0 ) { p->next = (bucket*) calloc (temp, sizeof (bucket)); copying (p->next, q->next, q->number_followers); }//if else p->next = NULL; p++; // pointer arithmetic }//if q++; }//for}//trie::copying // do the counting: process current transaction recursively through trienodevoid count (bucket *trienode, short number_buckets){ for ( short i = 0; i < number_buckets; i++ ) { if ( inspect (trienode->itemvalue) ) { trienode->aux++; //!!! if ( trienode->number_followers > 0 ) count (trienode->next, trienode->number_followers); }//if trienode++; // pointer arithmetic }//for}//count// print resulting trie and frequency of each pattern lengthvoid trie::printtrie (char *outputdata){ int k; for ( k = 0; k < MAXDEPTH; k++ ) length_count[k] = 0; outfilename = fopen (outputdata, "w"); fprintf (outfilename, "(%d)\n", initialcounts::lines); printout (1, root, triesize); int lpl; for ( lpl = MAXDEPTH-1; lpl >= 0 && length_count[lpl] == 0; lpl-- ) ; if ( initialcounts::lines >= minsup ) printf ("1\n"); for ( k = 1; k <= lpl; k++ ) printf ("%d\n", length_count[k]); fclose(outfilename);}//trie::printtrie// do the printingvoid trie::printout (int depth, struct bucket *trienode, int number_buckets){ for ( int i = 0; i < number_buckets; i++ ) { results[depth] = trienode[i].itemvalue; length_count[depth]++; for ( int j = 1; j <= depth; j++ ) fprintf (outfilename, "%d ", initialcounts::itemsorder[results[j]]); if ( depth > 1 ) fprintf (outfilename, "(%d)\n", trienode[i].count); else fprintf (outfilename, "(%d)\n", initialcounts::items_frequency[i]); if ( trienode[i].number_followers > 0 ) printout (depth+1, trienode[i].next, trienode[i].number_followers); }//for}//trie::printout// main programint main (int argc, char *argv[ ]){// long int time1, time2, c;// time1 = time (&c); minsup = macro_minsup;// printf ("\nReading input starts ...\n"); initialcounts initialdata (input_filename); data_array datagrid (input_filename);// printf ("Reading input finished\n");// time2 = time (&c);// printf ("Execution time so far: %ld\n", time2-time1);// printf ("Counting starts ...\n");// time1 = time (&c); trie ourtrie (initialdata,datagrid); ourtrie.build_up ( );// time2 = time (&c);// printf ("Execution time - counting: %ld\n\n", time2-time1); if ( argc > 3 ) ourtrie.printtrie (output_filename); return 0;}//main
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -