⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tract.c

📁 apriori算法是数据挖掘的经典算法之1,其基于关联规则的思想.这是我的第2个收藏算法
💻 C
📖 第 1 页 / 共 2 页
字号:
int is_read (ITEMSET *iset, FILE *file){                               /* --- read a transaction set */  int  d;                       /* delimiter type */  char *buf;                    /* read buffer */  assert(iset && file);         /* check the function arguments */  iset->cnt = 0;                /* initialize the item counter */  if (tfs_skip(iset->tfscan, file) < 0)    return E_FREAD;             /* skip leading comments */  d   = _get_item(iset, file);  /* read the first item and */  buf = tfs_buf(iset->tfscan);  /* get the read buffer */  if ((d      == TFS_EOF)       /* if at the end of the file */  &&  (buf[0] == '\0'))         /* and no item has been read, */    return 1;                   /* return 'end of file' */  while ((d      == TFS_FLD)    /* read the other items */  &&     (buf[0] != '\0'))      /* of the transaction */    d = _get_item(iset, file);  /* up to the end of the record */  if (d < TFS_EOF)    return d; /* check for a read error and */  if (buf[0] == '\0') return E_ITEMEXP;     /* an empty field */  ta_sort(iset->items, iset->cnt);  iset->cnt = ta_unique(iset->items, iset->cnt);  return 0;                     /* prepare the transaction */}  /* is_read() */              /* and return 'ok' *//*--------------------------------------------------------------------*/int is_recode (ITEMSET *iset, int minfrq, int dir, int *map){                               /* --- recode items w.r.t. frequency */  int  i, k, n, t;              /* loop variables, buffer */  ITEM *item;                   /* to traverse the items */  assert(iset);                 /* check the function arguments */  nim_sort(iset->nimap, (dir >= 0) ? _asccmp : _descmp,    (void*)minfrq, map, 1);     /* sort the items w.r.t. frequency */  for (n = nim_cnt(iset->nimap); --n >= 0; ) {    item = (ITEM*)nim_byid(iset->nimap, n);    if (item->frq < minfrq)     /* determine frequent items and */      item->app = APP_NONE;     /* set all others to 'ignore' */    else if (item->app != APP_NONE)      break;                    /* in addition, skip all items */  }                             /* that have been set to 'ignore' */  if (map) {                    /* if a map vector is provided */    for (i = k = 0; i < iset->cnt; i++) {      t = map[iset->items[i]];  /* traverse the current transaction */      if (t <= n) iset->items[k++] = t;    }                           /* recode all items and */    iset->cnt = k;              /* delete all items to ignore */    ta_sort(iset->items, k);    /* resort the items */  }  return n+1;                   /* return number of frequent items */}  /* is_recode() *//*----------------------------------------------------------------------  Transaction Functions----------------------------------------------------------------------*/int ta_unique (int *items, int n){                               /* --- remove duplicate items */  int *s, *d;                   /* to traverse the item vector */  assert(items && (n >= 0));    /* check the function arguments */  for (d = s = items; --n > 0;) /* traverse the sorted vector */    if (*++s != *d) *++d = *s;  /* and remove duplicate items */   return (int)(++d -items);     /* return the new number of items */}  /* ta_unique() *//*--------------------------------------------------------------------*/static int ta_cmp (const void *p1, const void *p2, void *data){                               /* --- compare transactions */  int       k;                  /* loop variable */  const int *i1, *i2;           /* to traverse the item identifiers */  assert(p1 && p2);             /* check the function arguments */  if (((const TRACT*)p1)->cnt > ((const TRACT*)p2)->cnt) return  1;  if (((const TRACT*)p1)->cnt < ((const TRACT*)p2)->cnt) return -1;  i1 = ((const TRACT*)p1)->items;  /* if the number of items differs, */  i2 = ((const TRACT*)p2)->items;  /* the result can be det. directly */  for (k = ((const TRACT*)p1)->cnt; --k >= 0; i1++, i2++) {    if (*i1 > *i2) return  1;   /* if the number of items is equal, */    if (*i1 < *i2) return -1;   /* compare corresp. items and abort */  }                             /* if one of them is greater */  return 0;                     /* otherwise the two trans. are equal */}  /* ta_cmp() *//*--------------------------------------------------------------------*/static int ta_cmpx (const TRACT *ta, const int *items, int n){                               /* --- compare transactions */  const int *p;                 /* to traverse the item identifiers */  assert(ta && items);          /* check the function arguments */  if (ta->cnt > n)   return  1; /* if the number of items differs, */  if (ta->cnt < n)   return -1; /* the result can be det. directly */  for (p = ta->items; --n >= 0; p++, items++) {    if (*p > *items) return  1; /* traverse the transactions, */    if (*p < *items) return -1; /* compare corresponding items, and */  }                             /* abort if one of them is greater */  return 0;                     /* otherwise the two trans. are equal */}  /* ta_cmpx() *//*----------------------------------------------------------------------  Transaction Set Functions----------------------------------------------------------------------*/TASET* tas_create (ITEMSET *itemset){                               /* --- create a transaction set */  TASET *taset;                 /* created transaction set */  assert(itemset);              /* check the function argument */  taset = malloc(sizeof(TASET));  if (!taset) return NULL;      /* create a transaction set */  taset->itemset = itemset;     /* and store the item set */  taset->cnt     = taset->vsz = taset->max = 0;  taset->tracts  = NULL;        /* initialize the other fields */  return taset;                 /* return the created t.a. set */}  /* tas_create() *//*--------------------------------------------------------------------*/void tas_delete (TASET *taset, int delis){                               /* --- delete a transaction set */  assert(taset);                /* check the function argument */  if (taset->tracts) {          /* if there are loaded transactions */    while (--taset->cnt >= 0)   /* traverse the transaction vector */      free(taset->tracts[taset->cnt]);    free(taset->tracts);        /* delete all transactions */  }                             /* and the transaction vector */  if (delis && taset->itemset) is_delete(taset->itemset);  free(taset);                  /* delete the item set and */}  /* tas_delete() */           /* the transaction set body *//*--------------------------------------------------------------------*/int tas_add (TASET *taset, const int *items, int n){                               /* --- add a transaction */  TRACT *ta;                    /* new transaction */  int   *p;                     /* to traverse the transaction */  TRACT **vec;                  /* new transaction vector */  int   size;                   /* new transaction vector size */  assert(taset);                /* check the function arguments */  size = taset->vsz;            /* get the transaction vector size */  if (taset->cnt >= size) {     /* if the transaction vector is full */    size += (size > BLKSIZE) ? (size >> 1) : BLKSIZE;    vec   = (TRACT**)realloc(taset->tracts, size *sizeof(TRACT*));    if (!vec) return -1;        /* enlarge the transaction vector */    taset->tracts = vec; taset->vsz = size;  }                             /* set the new vector and its size */  if (!items) {                 /* if no transaction is given */    items = is_tract(taset->itemset);    n     = is_tsize(taset->itemset);  }                             /* get it from the item set */  ta = (TRACT*)malloc(sizeof(TRACT) +(n-1) *sizeof(int));  if (!ta) return -1;           /* create a new transaction */  taset->tracts[taset->cnt++]  = ta;  if (n > taset->max)           /* store the transaction and */    taset->max = n;             /* update maximal transaction size */  for (p = ta->items +(ta->cnt = n); --n >= 0; )    *--p = items[n];            /* copy the items of the t.a. */  return 0;                     /* return 'ok' */}  /* tas_add() *//*--------------------------------------------------------------------*/void tas_recode (TASET *taset, int *map, int max){                               /* --- recode items */  int   i, k, n, x;             /* loop variables, buffer */  TRACT *t;                     /* to traverse the transactions */  int   *p;                     /* to traverse the item identifiers */  taset->max = 0;               /* clear the maximal transaction size */  for (n = taset->cnt; --n >= 0; ) {    t = taset->tracts[n];       /* traverse the transactions and */    p = t->items;               /* the items of each transaction */    for (i = k = 0; i < t->cnt; i++) {      x = map[p[i]];            /* recode the items and */      if (x <= max) p[k++] = x; /* remove superfluous items */    }                           /* from the transaction */    if (k > taset->max)         /* update the max. transaction size */      taset->max = k;           /* with the new size of the t.a. */    ta_sort(t->items, t->cnt = k);  }                             /* resort the item identifiers */}  /* tas_recode() *//*--------------------------------------------------------------------*/void tas_sort (TASET *taset){                               /* --- sort a transaction set */  assert(taset);                /* check the function argument */  v_sort(taset->tracts, taset->cnt, ta_cmp, NULL);}  /* tas_sort() *//*--------------------------------------------------------------------*/int tas_occur (TASET *taset, const int *items, int n){                               /* --- count transaction occurrences */  int l, r, m, k = taset->cnt;  /* index variables */  assert(taset && items);       /* check the function arguments */  for (r = m = 0; r < k; ) {    /* find right boundary */    m = (r + k) >> 1;           /* by a binary search */    if (ta_cmpx(taset->tracts[m], items, n) > 0) k = m;    else                                         r = m+1;  }  for (l = m = 0; l < k; ) {    /* find left boundary */    m = (l + k) >> 1;           /* by a binary search */    if (ta_cmpx(taset->tracts[m], items, n) < 0) l = m+1;    else                                         k = m;  }  return r -l;                  /* compute the number of occurrences */}  /* tas_count *//*--------------------------------------------------------------------*/#ifndef NDEBUGvoid tas_show (TASET *taset){                               /* --- show a transaction set */  int   i, k;                   /* loop variables */  TRACT *t;                     /* to traverse the transactions */  assert(taset);                /* check the function argument */  for (i = 0; i < taset->cnt; i++) {    t = taset->tracts[i];       /* traverse the transactions */    for (k = 0; k < t->cnt; k++) {  /* traverse the items */      if (k > 0) putc(' ', stdout); /* print a separator */      printf(is_name(taset->itemset, t->items[k]));    }                           /* print the next item */    putc('\n', stdout);         /* terminate the transaction */  }                             /* finally print the number of t.a. */  printf("%d transaction(s)\n", taset->cnt);}  /* tas_show() */#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -