⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apriori.c

📁 数据挖掘中的关联规则算法
💻 C
📖 第 1 页 / 共 3 页
字号:
  /* --- sort and recode items --- */  MSG(fprintf(stderr, "sorting and recoding items ... "));  t   = clock();                /* start the timer */  map = (int*)malloc(is_cnt(itemset) *sizeof(int));  if (!map) error(E_NOMEM);     /* create an item identifier map */  if (rsdef == IST_BODY)        /* if rule support = body support */    k = (int)ceil(tacnt *supp *conf);  else                          /* if rule supp. = body&head support */    k = (int)ceil(tacnt *supp); /* compute the absolut support */  n = is_recode(itemset, k, sort, map);  if (taset) {                  /* sort and recode the items and */    tas_recode(taset, map,n);   /* recode the loaded transactions */    maxcnt = tas_max(taset);    /* get the new maximal t.a. size */  }                             /* (may be smaller than before) */  free(map);                    /* delete the item identifier map */  MSG(fprintf(stderr, "[%d item(s)] ", n));  MSG(fprintf(stderr, "done [%.2fs].", SEC_SINCE(t)));  if (n <= 0) error(E_NOTAS);   /* print a log message and */  MSG(fprintf(stderr, "\n"));   /* check the number of items */  /* --- create a transaction tree --- */  tt = 0;                       /* init. the tree construction time */  if (tree && taset) {          /* if transactions were loaded */    MSG(fprintf(stderr, "creating transaction tree ... "));    t = clock();                /* start the timer */    tatree = tat_create(taset, heap);     if (!tatree) error(E_NOMEM);/* create a transaction tree */    if (filter == 0) {          /* if a tree rebuild is not needed, */      tas_delete(taset, 0); taset = NULL; }  /* delete transactions */    tt = clock() -t;            /* note the time for the construction */    MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));  }                             /* print a log message */  /* --- create an item set tree --- */  t = clock(); tc = 0;          /* start the timer */  apps = (char*)malloc(n *sizeof(char));  if (!apps) error(E_NOMEM);    /* get the appearance indicators */  for (apps += i = n; --i >= 0; )    *--apps = is_getapp(itemset, i);  istree = ist_create(n, supp, conf, rsdef, apps, memopt);  if (!istree) error(E_NOMEM);  /* create an item set tree */  for (k = n; --k >= 0; )       /* set single item frequencies */    ist_setcnt(istree, k, is_getfrq(itemset, k));  ist_settac(istree, tacnt);    /* set the number of transactions */  if (maxlen > maxcnt)          /* clamp the rule length */    maxlen = maxcnt;            /* to the maximum set size */  /* --- check item subsets --- */  MSG(fprintf(stderr, "checking subsets of size 1"));  while (ist_height(istree) < maxlen) {    if (filter != 0) {          /* if to filter w.r.t. item usage, */      i = ist_check(istree, apps);     /* check current item usage */      if (i < maxlen) maxlen = i;      /* update the maximum size */      if (ist_height(istree) >= i) break;    }                           /* check the tree height */    k = ist_addlvl(istree);     /* while max. height is not reached, */    if (k <  0) error(E_NOMEM); /* add a level to the item set tree */    if (k != 0) break;          /* if no level was added, abort */    MSG(fprintf(stderr, " %d", ist_height(istree)));    if (tatree) {               /* if a transaction tree was created */      if (((filter < 0)         /* if to filter w.r.t. item usage */      &&   (i < -filter *n))    /* and enough items were removed */      ||  ((filter > 0)         /* or counting time is long enough */      &&   (i < n) && (i *(double)tt < filter *n *tc))) {        n = i; x = clock();     /* note the new number of items */        tas_filter(taset,apps); /* and remove unnecessary items */        tat_delete(tatree);     /* delete the transaction tree */        tatree = tat_create(taset, heap);        if (!tatree) error(E_NOMEM);        tt = clock() -x;        /* rebuild the transaction tree and */      }                         /* note the new construction time */      x  = clock();             /* count the transaction tree */      ist_countx(istree, tatree);      tc = clock() -x; }        /* note the new count time */    else if (taset) {           /* if transactions were loaded */      if (((filter < 0)         /* if to filter w.r.t. item usage */      &&   (i <= -filter *n))   /* and enough items were removed */      ||  ((filter > 0)         /* or counting time is long enough */      &&   (i *(double)tt <= filter *n *tc))) {        n = i; x = clock();     /* note the new number of items */        tas_filter(taset,apps); /* and remove unnecessary items */        tt = clock() -t;        /* from the transactions */      }                         /* note the filtering time */      for (i = tacnt; --i >= 0;)/* traverse and count transactions */        ist_count(istree, tas_tract(taset, i), tas_tsize(taset, i));      tc = clock() -t; }        /* note the new count time */    else {                      /* if to work on the input file, */      rewind(in);               /* reset the file position */      for (maxcnt = 0; (i = is_read(itemset, in)) == 0; ) {        if (filter != 0)        /* (re)read the transactions and */          is_filter(itemset, apps);  /* remove unnecessary items */        k = is_tsize(itemset);  /* update the maximum size */        if (k > maxcnt) maxcnt = k;  /* of a transaction */        ist_count(istree, is_tract(itemset), k);      }                         /* count the transaction in the tree */      if (i < 0) error(i, fn_in, RECCNT(itemset), BUFFER(itemset));      if (maxcnt < maxlen)      /* update the maximal rule length */        maxlen = maxcnt;        /* according to the max. t.a. size */    }                           /* (may be smaller than before) */  }  if (!taset && !tatree) {      /* if transactions were not loaded */    if (in != stdin) fclose(in);/* if not read from standard input, */    in = NULL;                  /* close the input file */  }                             /* clear the file variable */  MSG(fprintf(stderr, " done [%.2fs].\n", SEC_SINCE(t)));  /* --- filter found item sets --- */  if ((target == TT_MFSET) || (target == TT_CLSET)) {    MSG(fprintf(stderr, "filtering %s item sets ... ",        (target == TT_MFSET) ? "maximal" : "closed"));    t = clock();                /* filter the item sets */    ist_filter(istree, (target == TT_MFSET) ? IST_MAXFRQ : IST_CLOSED);    MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));  }                             /* (filter takes longer than print) */  /* --- sort transactions --- */  if (target <= TT_CLSET) {     /* if to find frequent item sets */    if (!taset)                 /* transactions must be loaded */      ext = 0;                  /* for extended support output */    else if (ext) {             /* if extended output is requested */      MSG(fprintf(stderr, "sorting transactions ... "));      t = clock();              /* start the timer */      tas_sort(taset, heap);    /* sort the transactions */      MSG(fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)));    }                           /* (sorting is necessary to find the */  }                             /* number of identical transactions) */  /* --- print item sets/rules/hyperedges --- */  t = clock();                  /* start the timer */  if (fn_out && *fn_out)        /* if an output file name is given, */    out = fopen(fn_out, "w");   /* open the output file */  else {                        /* if no output file name is given, */    out = stdout; fn_out = "<stdout>"; }    /* write to std. output */  MSG(fprintf(stderr, "writing %s ... ", fn_out));  if (!out) error(E_FOPEN, fn_out);  ist_init(istree, minlen, arem, minval);  set = is_tract(itemset);      /* get the transaction buffer */  if (target <= TT_CLSET) {     /* if to find frequent item sets */    for (n = 0; 1; ) {          /* extract item sets from the tree */      k = ist_set(istree, set, &supp, &conf);      if (k <= 0) break;        /* get the next frequent item set */      if (supp > smax) continue;/* check against maximal support */      for (i = 0; i < k; i++) { /* traverse the set's items */        name = is_name(itemset, set[i]);        if (c2scf) { sc_format(buf, name, 0); name = buf; }        fputs(name, out);       /* print the name of the next item */        fputs((i < k-1) ? sep : " ", out);      }                         /* print a separator */      fputs(" (", out);         /* print the item set's support */      if (sout & 1) { fprintf(out, fmt, supp *100);                      fputs((sout & 2) ? "%/" : "%", out); }      if (sout & 2) { fprintf(out, "%d", (int)(supp *tacnt +0.5)); }      if (ext) {                /* if to print the extended support */        supp = tas_occur(taset, set, k);        fputs(", ", out);       /* get the number of occurrences */        fprintf(out, fmt, (supp/tacnt) *100);        if (sout & 2) fprintf(out, "/%.0f", supp);      }                         /* print the extended support data */      if (aval) { fputs(", ", out); fprintf(out, fmt, conf *100); }      fputs(")\n", out);        /* print the add. eval. measure, */      n++;                      /* terminate the support output, */    } }                         /* and count the item set */  else if (target == TT_RULE) { /* if to find association rules, */    for (n = 0; 1; ) {          /* extract rules from tree */      k = ist_rule(istree, set, &supp, &conf, &lftval, &minval);      if (k <= 0) break;        /* get the next association rule */      if (supp > smax) continue;/* check against maximal support */      for (i = 0; i < k; i++) { /* traverse the rule's items */        name = is_name(itemset, set[i]);        if (c2scf) { sc_format(buf, name, 0); name = buf; }        fputs(name, out);       /* print the next item */        fputs((i <= 0) ? " <- " : ((i < k-1) ? sep : " "), out);      }                         /* print a separator */      fputs(" (", out);         /* print the rule evaluation */      if (ext && (rsdef == IST_BODY)) {        if (sout & 1) { fprintf(out, fmt, supp *conf *100);                        if (sout & 2) fputc('/', out); }        if (sout & 2) { fprintf(out, "%d", (int)(supp*conf*tacnt+0.5));}        fputs(", ", out);       /* print the support of the rule */      }                         /* from  the support of the body */      if (sout & 1) { fprintf(out, fmt, supp *100);                      if (sout & 2) fputc('/', out); }      if (sout & 2) { fprintf(out, "%d", (int)(supp *tacnt +0.5)); }      fputs(", ", out);         /* print the rule support */      if (ext && (rsdef == IST_BOTH)) {        if (sout & 1) { fprintf(out, fmt, (supp/conf) *100);                        if (sout & 2) fputc('/', out); }        if (sout & 2) { fprintf(out,"%d",(int)((supp/conf)*tacnt+0.5));}        fputs(", ", out);       /* print the support of the body */      }                         /* from  the support of the rule */      fprintf(out, fmt, conf *100); /* print the rule confidence */      if (lift) { fputs(", ", out); fprintf(out, fmt, lftval *100); }      if (aval) { fputs(", ", out); fprintf(out, fmt, minval *100); }      fputs(")\n", out);        /* print the value of the additional */      n++;                      /* rule evaluation measure and */    } }                         /* count the association rule */  else {                        /* if to find association hyperedges */    for (n = 0; 1; ) {          /* extract hyperedges from tree */      k = ist_hedge(istree, set, &supp, &conf);      if (k <= 0) break;        /* get the next hyperedge */      if (supp > smax) continue;/* check against maximal support */      for (i = 0; i < k; i++) { /* traverse the edge's items */        name = is_name(itemset, set[i]);        if (c2scf) { sc_format(buf, name, 0); name = buf; }        fputs(name, out);       /* print the name of the next item */        fputs((i < k-1) ? sep : " ", out);      }                         /* print a separator */      fputs(" (", out);      if (sout & 1) { fprintf(out, fmt, supp *100);                      if (sout & 2) fputc('/', out); }      if (sout & 2) { fprintf(out, "%d", (int)(supp *tacnt +0.5)); }      fputs(", ", out); fprintf(out, fmt, conf *100);      fputs(")\n", out);        /* print support and confidence */      n++;                      /* of the hyperedge and */    }                           /* count the hyperedge */  }  /* if (target <= TT_MFSET) .. else .. */  if (fflush(out) != 0) error(E_FWRITE, fn_out);  if (out != stdout) fclose(out);  out = NULL;                   /* close the output file */  MSG(fprintf(stderr, "[%d %s(s)] done ", n, ttypes[target]));  MSG(fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)));  #ifdef BENCH  printf("number of counters      : %d\n", istree->cnt);  printf("necessary counters      : %d\n", istree->nec);  printf("number of child pointers: %d\n", istree->chcnt);  printf("necessary child pointers: %d\n", istree->chnec);  printf("allocated bytes         : %d\n", istree->bytes);  #endif  /* --- clean up --- */  #ifndef NDEBUG                /* if this is a debug version */  free(apps);                   /* delete the item app. vector */  ist_delete(istree);           /* delete the item set tree, */  if (tatree) tat_delete(tatree);     /* the transaction tree, */  if (taset)  tas_delete(taset, 0);   /* the transaction set, */  is_delete(itemset);                 /* and the item set */  #endif  #ifdef STORAGE                /* if storage debugging */  showmem("at end of program"); /* check memory usage */  #endif  return 0;                     /* return 'ok' */}  /* main() */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -