📄 tract.c
字号:
int is_read (ITEMSET *iset, FILE *file){ /* --- read a transaction set */ int d; /* delimiter type */ char *buf; /* read buffer */ assert(iset && file); /* check the function arguments */ iset->cnt = 0; /* initialize the item counter */ if (tfs_skip(iset->tfscan, file) < 0) return E_FREAD; /* skip leading comments */ d = _get_item(iset, file); /* read the first item and */ buf = tfs_buf(iset->tfscan); /* get the read buffer */ if ((d == TFS_EOF) /* if at the end of the file */ && (buf[0] == '\0')) /* and no item has been read, */ return 1; /* return 'end of file' */ while ((d == TFS_FLD) /* read the other items */ && (buf[0] != '\0')) /* of the transaction */ d = _get_item(iset, file); /* up to the end of the record */ if (d < TFS_EOF) return d; /* check for a read error and */ if (buf[0] == '\0') return E_ITEMEXP; /* an empty field */ ta_sort(iset->items, iset->cnt); iset->cnt = ta_unique(iset->items, iset->cnt); return 0; /* prepare the transaction */} /* is_read() */ /* and return 'ok' *//*--------------------------------------------------------------------*/int is_recode (ITEMSET *iset, int minfrq, int dir, int *map){ /* --- recode items w.r.t. frequency */ int i, k, n, t; /* loop variables, buffer */ ITEM *item; /* to traverse the items */ assert(iset); /* check the function arguments */ nim_sort(iset->nimap, (dir >= 0) ? _asccmp : _descmp, (void*)minfrq, map, 1); /* sort the items w.r.t. frequency */ for (n = nim_cnt(iset->nimap); --n >= 0; ) { item = (ITEM*)nim_byid(iset->nimap, n); if (item->frq < minfrq) /* determine frequent items and */ item->app = APP_NONE; /* set all others to 'ignore' */ else if (item->app != APP_NONE) break; /* in addition, skip all items */ } /* that have been set to 'ignore' */ if (map) { /* if a map vector is provided */ for (i = k = 0; i < iset->cnt; i++) { t = map[iset->items[i]]; /* traverse the current transaction */ if (t <= n) iset->items[k++] = t; } /* recode all items and */ iset->cnt = k; /* delete all items to ignore */ ta_sort(iset->items, k); /* resort the items */ } return n+1; /* return number of frequent items */} /* is_recode() *//*---------------------------------------------------------------------- Transaction Functions----------------------------------------------------------------------*/int ta_unique (int *items, int n){ /* --- remove duplicate items */ int *s, *d; /* to traverse the item vector */ assert(items && (n >= 0)); /* check the function arguments */ for (d = s = items; --n > 0;) /* traverse the sorted vector */ if (*++s != *d) *++d = *s; /* and remove duplicate items */ return (int)(++d -items); /* return the new number of items */} /* ta_unique() *//*--------------------------------------------------------------------*/static int ta_cmp (const void *p1, const void *p2, void *data){ /* --- compare transactions */ int k; /* loop variable */ const int *i1, *i2; /* to traverse the item identifiers */ assert(p1 && p2); /* check the function arguments */ if (((const TRACT*)p1)->cnt > ((const TRACT*)p2)->cnt) return 1; if (((const TRACT*)p1)->cnt < ((const TRACT*)p2)->cnt) return -1; i1 = ((const TRACT*)p1)->items; /* if the number of items differs, */ i2 = ((const TRACT*)p2)->items; /* the result can be det. directly */ for (k = ((const TRACT*)p1)->cnt; --k >= 0; i1++, i2++) { if (*i1 > *i2) return 1; /* if the number of items is equal, */ if (*i1 < *i2) return -1; /* compare corresp. items and abort */ } /* if one of them is greater */ return 0; /* otherwise the two trans. are equal */} /* ta_cmp() *//*--------------------------------------------------------------------*/static int ta_cmpx (const TRACT *ta, const int *items, int n){ /* --- compare transactions */ const int *p; /* to traverse the item identifiers */ assert(ta && items); /* check the function arguments */ if (ta->cnt > n) return 1; /* if the number of items differs, */ if (ta->cnt < n) return -1; /* the result can be det. directly */ for (p = ta->items; --n >= 0; p++, items++) { if (*p > *items) return 1; /* traverse the transactions, */ if (*p < *items) return -1; /* compare corresponding items, and */ } /* abort if one of them is greater */ return 0; /* otherwise the two trans. are equal */} /* ta_cmpx() *//*---------------------------------------------------------------------- Transaction Set Functions----------------------------------------------------------------------*/TASET* tas_create (ITEMSET *itemset){ /* --- create a transaction set */ TASET *taset; /* created transaction set */ assert(itemset); /* check the function argument */ taset = malloc(sizeof(TASET)); if (!taset) return NULL; /* create a transaction set */ taset->itemset = itemset; /* and store the item set */ taset->cnt = taset->vsz = taset->max = 0; taset->tracts = NULL; /* initialize the other fields */ return taset; /* return the created t.a. set */} /* tas_create() *//*--------------------------------------------------------------------*/void tas_delete (TASET *taset, int delis){ /* --- delete a transaction set */ assert(taset); /* check the function argument */ if (taset->tracts) { /* if there are loaded transactions */ while (--taset->cnt >= 0) /* traverse the transaction vector */ free(taset->tracts[taset->cnt]); free(taset->tracts); /* delete all transactions */ } /* and the transaction vector */ if (delis && taset->itemset) is_delete(taset->itemset); free(taset); /* delete the item set and */} /* tas_delete() */ /* the transaction set body *//*--------------------------------------------------------------------*/int tas_add (TASET *taset, const int *items, int n){ /* --- add a transaction */ TRACT *ta; /* new transaction */ int *p; /* to traverse the transaction */ TRACT **vec; /* new transaction vector */ int size; /* new transaction vector size */ assert(taset); /* check the function arguments */ size = taset->vsz; /* get the transaction vector size */ if (taset->cnt >= size) { /* if the transaction vector is full */ size += (size > BLKSIZE) ? (size >> 1) : BLKSIZE; vec = (TRACT**)realloc(taset->tracts, size *sizeof(TRACT*)); if (!vec) return -1; /* enlarge the transaction vector */ taset->tracts = vec; taset->vsz = size; } /* set the new vector and its size */ if (!items) { /* if no transaction is given */ items = is_tract(taset->itemset); n = is_tsize(taset->itemset); } /* get it from the item set */ ta = (TRACT*)malloc(sizeof(TRACT) +(n-1) *sizeof(int)); if (!ta) return -1; /* create a new transaction */ taset->tracts[taset->cnt++] = ta; if (n > taset->max) /* store the transaction and */ taset->max = n; /* update maximal transaction size */ for (p = ta->items +(ta->cnt = n); --n >= 0; ) *--p = items[n]; /* copy the items of the t.a. */ return 0; /* return 'ok' */} /* tas_add() *//*--------------------------------------------------------------------*/void tas_recode (TASET *taset, int *map, int max){ /* --- recode items */ int i, k, n, x; /* loop variables, buffer */ TRACT *t; /* to traverse the transactions */ int *p; /* to traverse the item identifiers */ taset->max = 0; /* clear the maximal transaction size */ for (n = taset->cnt; --n >= 0; ) { t = taset->tracts[n]; /* traverse the transactions and */ p = t->items; /* the items of each transaction */ for (i = k = 0; i < t->cnt; i++) { x = map[p[i]]; /* recode the items and */ if (x <= max) p[k++] = x; /* remove superfluous items */ } /* from the transaction */ if (k > taset->max) /* update the max. transaction size */ taset->max = k; /* with the new size of the t.a. */ ta_sort(t->items, t->cnt = k); } /* resort the item identifiers */} /* tas_recode() *//*--------------------------------------------------------------------*/void tas_sort (TASET *taset){ /* --- sort a transaction set */ assert(taset); /* check the function argument */ v_sort(taset->tracts, taset->cnt, ta_cmp, NULL);} /* tas_sort() *//*--------------------------------------------------------------------*/int tas_occur (TASET *taset, const int *items, int n){ /* --- count transaction occurrences */ int l, r, m, k = taset->cnt; /* index variables */ assert(taset && items); /* check the function arguments */ for (r = m = 0; r < k; ) { /* find right boundary */ m = (r + k) >> 1; /* by a binary search */ if (ta_cmpx(taset->tracts[m], items, n) > 0) k = m; else r = m+1; } for (l = m = 0; l < k; ) { /* find left boundary */ m = (l + k) >> 1; /* by a binary search */ if (ta_cmpx(taset->tracts[m], items, n) < 0) l = m+1; else k = m; } return r -l; /* compute the number of occurrences */} /* tas_count *//*--------------------------------------------------------------------*/#ifndef NDEBUGvoid tas_show (TASET *taset){ /* --- show a transaction set */ int i, k; /* loop variables */ TRACT *t; /* to traverse the transactions */ assert(taset); /* check the function argument */ for (i = 0; i < taset->cnt; i++) { t = taset->tracts[i]; /* traverse the transactions */ for (k = 0; k < t->cnt; k++) { /* traverse the items */ if (k > 0) putc(' ', stdout); /* print a separator */ printf(is_name(taset->itemset, t->items[k])); } /* print the next item */ putc('\n', stdout); /* terminate the transaction */ } /* finally print the number of t.a. */ printf("%d transaction(s)\n", taset->cnt);} /* tas_show() */#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -