⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tract.c

📁 数据挖掘Apriori算法在VC下的实现
💻 C
📖 第 1 页 / 共 3 页
字号:
  d   = ts_next(iset->tscan, file, NULL, 0);  if (d == TS_ERR)      return E_FREAD;  if (d != TS_REC)      return E_FLDCNT;  iset->app = _appcode(buf);    /* get default appearance code */  if (iset->app < 0)    return E_UNKAPP;  while (1) {                   /* read item/indicator pairs */    d = ts_next(iset->tscan, file, NULL, 0);    if (d <= TS_EOF)            /* read the next item */      return (d == TS_ERR) ? E_FREAD : 0;    if (buf[0] == '\0')         /* check for end of file */      return E_ITEMEXP;         /* and for a missing item */    item = nim_add(iset->nimap, buf, sizeof(ITEM));    if (item == EXISTS) return E_DUPITEM;  /* add the new item */    if (item == NULL)   return E_NOMEM;    /* to the name/id map */    item->frq = 0;              /* clear the frequency counters */    item->xfq = 0;              /* (occurrence and sum of t.a. sizes) */    if (d != TS_FLD)    return E_APPEXP;    d = ts_next(iset->tscan, file, NULL, 0);    if (d == TS_ERR)    return E_FREAD;    if (d == TS_FLD)    return E_FLDCNT;    item->app = _appcode(buf);  /* get the appearance indicator */    if (item->app <  0) return E_UNKAPP;  }  return 0;                     /* return 'ok' */}  /* is_readapp() *//*--------------------------------------------------------------------*/int is_read (ITEMSET *iset, FILE *file){                               /* --- read a transaction */  int  i, d;                    /* loop variable, delimiter type */  char *buf;                    /* read buffer */  ITEM *item;                   /* pointer to item */  assert(iset && file);         /* check the function arguments */  iset->cnt = 0;                /* initialize the item counter */  d   = _get_item(iset, file);  /* read the first item and */  buf = ts_buf(iset->tscan);    /* get the read buffer */  if ((d      == TS_EOF)        /* if at the end of the file */  &&  (buf[0] == '\0'))         /* and no item has been read, */    return 1;                   /* return 'end of file' */  while ((d      == TS_FLD)     /* read the other items */  &&     (buf[0] != '\0'))      /* of the transaction */    d = _get_item(iset, file);  /* up to the end of the record */  if (d == TS_ERR) return d;    /* check for a read error */  if ((buf[0] == '\0') && (d == TS_FLD) && (iset->cnt > 0))    return E_ITEMEXP;           /* check for an empty field */  ta_sort(iset->items, iset->cnt); /* prepare the transaction */  iset->cnt = ta_unique(iset->items, iset->cnt);  for (i = iset->cnt; --i >= 0; ) {    item = nim_byid(iset->nimap, iset->items[i]);    item->frq += 1;             /* count the item and */    item->xfq += iset->cnt;     /* sum the transaction sizes */  }                             /* as an importance indicator */  iset->tac += 1;               /* count the transaction */  return 0;                     /* return 'ok' */}  /* is_read() *//*--------------------------------------------------------------------*/int is_recode (ITEMSET *iset, int minfrq, int dir, int *map){                               /* --- recode items w.r.t. frequency */  int      i, k, n, t;          /* loop variables, buffer */  ITEM     *item;               /* to traverse the items */  SYMCMPFN *cmp;                /* comparison function */  assert(iset);                 /* check the function arguments */  if      (dir >  1) cmp = _asccmpx;  /* get the appropriate */  else if (dir >  0) cmp = _asccmp;   /* comparison function */  else if (dir >= 0) cmp = _nocmp;    /* (ascending/descending) */  else if (dir > -2) cmp = _descmp;   /* and sort the items */  else               cmp = _descmpx;  /* w.r.t. their frequency */  nim_sort(iset->nimap, cmp, (void*)minfrq, map, 1);  for (n = nim_cnt(iset->nimap); --n >= 0; ) {    item = (ITEM*)nim_byid(iset->nimap, n);    if (item->frq < minfrq)     /* determine frequent items and */      item->app = APP_NONE;     /* set all others to 'ignore' */    else if (item->app != APP_NONE)      break;                    /* in addition, skip all items */  }                             /* that have been set to 'ignore' */  if (map) {                    /* if a map vector is provided */    for (i = k = 0; i < iset->cnt; i++) {      t = map[iset->items[i]];  /* traverse the current transaction */      if (t <= n) iset->items[k++] = t;    }                           /* recode all items and */    iset->cnt = k;              /* delete all items to ignore */    ta_sort(iset->items, k);    /* resort the items */  }  return n+1;                   /* return number of frequent items */}  /* is_recode() *//*--------------------------------------------------------------------*/int is_filter (ITEMSET *iset, const char *marks){                               /* --- filter items in transaction */  return iset->cnt = ta_filter(iset->items, iset->cnt, marks);}  /* is_filter() *//*----------------------------------------------------------------------  Item Set Evaluation Functions----------------------------------------------------------------------*/ISEVAL* ise_create (ITEMSET *iset, int tacnt){                               /* --- create an item set evaluation */  int    i;                     /* loop variable */  ISEVAL *eval;                 /* created item set evaluator */  i    = is_cnt(iset);          /* get the number of items */  eval = (ISEVAL*)malloc(sizeof(ISEVAL) +(i+i) *sizeof(double));  if (!eval) return NULL;       /* create an evaluation object */  eval->logfs = eval->lsums +i +1;  /* and organize the memory */  eval->logta = log(tacnt);     /* store log of number of trans. */  while (--i >= 0)              /* compute logarithms of item freqs. */    eval->logfs[i] = log(is_getfrq(iset, i));  eval->lsums[0] = 0;           /* init. first sum of logarithms */  return eval;                  /* return created item set evaluator */}  /* ise_create() *//*--------------------------------------------------------------------*/double ise_eval (ISEVAL *eval, int *ids, int cnt, int pfx, int supp){                               /* --- evaluate an item set */  double sum;                   /* sum of logarithms of frequencies */  sum = (pfx > 0)               /* if there is a prefix, */      ? eval->lsums[pfx-1] : 0; /* get already known logarithm sum */  for ( ; pfx < cnt; pfx++)     /* compute and add remaining terms */    eval->lsums[pfx] = sum += eval->logfs[ids[pfx]];  return (log(supp) -sum +(cnt-1) *eval->logta) * (1.0/LN_2);}  /* ise_eval() */             /* compute logarithm of quotient *//*----------------------------------------------------------------------  Item Set Formatting Functions----------------------------------------------------------------------*/ISFMTR* isf_create (ITEMSET *iset, int scan){                               /* --- create an item set formatter */  int        i, k, n;           /* loop variable, buffers */  int        len, sum;          /* length of an item name and sum */  ISFMTR     *fmt;              /* created item set formatter */  char       buf[4*TS_SIZE+4];  /* buffer for formatting */  const char *name;             /* to traverse the item names */  char       *copy;             /* for copies of formatted names */  n   = is_cnt(iset);           /* get the number of items */  fmt = (ISFMTR*)malloc(sizeof(ISFMTR) + n    *sizeof(int)                                       +(n-1) *sizeof(char*));  if (!fmt) return NULL;        /* create the base structure */  fmt->buf  = NULL;             /* and organize the memory */  fmt->offs = (int*)(fmt->names +n);  for (i = sum = fmt->cnt = 0; i < n; i++) {    name = is_name(iset, i);    /* traverse the item names */    len  = strlen(name);        /* and get their length */    sum += k = (scan) ? sc_format(buf, name, 0) : len;    if (k > len) {              /* if formatting was needed */      copy = (char*)malloc((k+1) *sizeof(char));      if (!copy) { fmt->cnt = i-1; isf_delete(fmt); return NULL; }      name = strcpy(copy, buf); /* copy the formatted name */    }                           /* into a newly created string */    fmt->names[i] = name;       /* store (formatted) item name */  }                             /* afterwards create output buffer */  if (scan) fmt->cnt = n;       /* note the number of items */  fmt->buf = (char*)malloc((sum +n +1) *sizeof(char));  if (!fmt->buf) { isf_delete(fmt); return NULL; }  fmt->offs[0] = 0;             /* init. the first prefix offset */  return fmt;                   /* return created item set formatter */}  /* isf_create() *//*--------------------------------------------------------------------*/void isf_delete (ISFMTR *fmt){                               /* --- delete an item set formatter */  int i;                        /* loop variable */  for (i = fmt->cnt; --i >= 0; )    if ((fmt->names[i]    != NULL)    &&  (fmt->names[i][0] == '"'))      free((void*)fmt->names[i]);  if (fmt->buf) free(fmt->buf); /* delete reformatted item names, */  free(fmt);                    /* the output buffer and the base */}  /* isf_delete() *//*--------------------------------------------------------------------*/const char* isf_format (ISFMTR *fmt, int *ids, int cnt, int pre){                               /* --- format an item set */  char       *p;                /* to traverse the output buffer */  const char *name;             /* to traverse the item names */  p = fmt->buf +fmt->offs[pre]; /* get position for appending */  while (pre < cnt) {           /* traverse the additional items */    name = fmt->names[ids[pre]];/* copy the item name to the output */    while (*name) *p++ = *name++;    *p++ = ' ';                 /* add an item separator */    fmt->offs[++pre] = (int)(p-fmt->buf);  }                             /* record the new offset */  *p = '\0';                    /* terminate the formatted item set */  fmt->len = (int)(p-fmt->buf); /* note the length of the description */  return fmt->buf;              /* return the output buffer */}  /* isf_format() *//*----------------------------------------------------------------------  Transaction Functions----------------------------------------------------------------------*/int ta_unique (int *items, int n){                               /* --- remove duplicate items */  int *s, *d;                   /* to traverse the item vector */  assert(items && (n >= 0));    /* check the function arguments */  if (n <= 1) return n;         /* check for 0 or 1 item */  for (d = s = items; --n > 0;) /* traverse the sorted vector */    if (*++s != *d) *++d = *s;  /* and remove duplicate items */   return (int)(++d -items);     /* return the new number of items */}  /* ta_unique() *//*--------------------------------------------------------------------*/int ta_filter (int *items, int n, const char *marks){                               /* --- filter items in a transaction */  int i, k;                     /* loop variables */  assert(items && (n >= 0));    /* check the function arguments */  for (i = k = 0; i < n; i++)   /* remove all unmarked items */    if (marks[items[i]]) items[k++] = items[i];  return k;                     /* return the new number of items */}  /* ta_filter() *//*--------------------------------------------------------------------*/static int ta_cmp (const void *p1, const void *p2, void *data){                               /* --- compare transactions */  int       k, k1, k2;          /* loop variable, counters */  const int *i1, *i2;           /* to traverse the item identifiers */  assert(p1 && p2);             /* check the function arguments */  i1 = ((const TRACT*)p1)->items;  i2 = ((const TRACT*)p2)->items;  k1 = ((const TRACT*)p1)->cnt; /* get the item vectors */  k2 = ((const TRACT*)p2)->cnt; /* and the numbers of items */  for (k  = (k1 < k2) ? k1 : k2; --k >= 0; i1++, i2++) {    if (*i1 > *i2) return  1;   /* compare corresponding items */    if (*i1 < *i2) return -1;   /* and abort the comparison */  }                             /* if one of them is greater */  if (k1 > k2) return  1;       /* if one of the transactions */  if (k1 < k2) return -1;       /* is not empty, it is greater */  return 0;                     /* otherwise the two trans. are equal */}  /* ta_cmp() *//*--------------------------------------------------------------------*/static int ta_cmpx (const TRACT *ta, const int *items, int n){                               /* --- compare transactions */  int       k, m;               /* loop variable, counter */  const int *p;                 /* to traverse the item identifiers */  assert(ta && items);          /* check the function arguments */  p = ta->items; m = ta->cnt;   /* traverse the item vector */  m = ta->cnt;  for (k = (n < m) ? n : m; --k >= 0; p++, items++) {    if (*p > *items) return  1; /* compare corresponding items */    if (*p < *items) return -1; /* and abort the comparison */  }                             /* if one of them is greater */  if (m > n) return  1;         /* if one of the transactions */  if (m < n) return -1;         /* is not empty, it is greater */  return 0;                     /* otherwise the two trans. are equal */}  /* ta_cmpx() *//*----------------------------------------------------------------------  Transaction Set Functions----------------------------------------------------------------------*/TASET* tas_create (ITEMSET *itemset){                               /* --- create a transaction set */  TASET *taset;                 /* created transaction set */  assert(itemset);              /* check the function argument */  taset = malloc(sizeof(TASET));  if (!taset) return NULL;      /* create a transaction set */  taset->itemset = itemset;     /* and store the item set */  taset->cnt     = taset->vsz = taset->max = taset->total = 0;  taset->tracts  = NULL;        /* initialize the other fields */  return taset;                 /* return the created t.a. set */}  /* tas_create() *//*--------------------------------------------------------------------*/void tas_delete (TASET *taset, int delis){                               /* --- delete a transaction set */  assert(taset);                /* check the function argument */  if (taset->tracts) {          /* if there are loaded transactions */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -