📄 tract.c
字号:
d = ts_next(iset->tscan, file, NULL, 0); if (d == TS_ERR) return E_FREAD; if (d != TS_REC) return E_FLDCNT; iset->app = _appcode(buf); /* get default appearance code */ if (iset->app < 0) return E_UNKAPP; while (1) { /* read item/indicator pairs */ d = ts_next(iset->tscan, file, NULL, 0); if (d <= TS_EOF) /* read the next item */ return (d == TS_ERR) ? E_FREAD : 0; if (buf[0] == '\0') /* check for end of file */ return E_ITEMEXP; /* and for a missing item */ item = nim_add(iset->nimap, buf, sizeof(ITEM)); if (item == EXISTS) return E_DUPITEM; /* add the new item */ if (item == NULL) return E_NOMEM; /* to the name/id map */ item->frq = 0; /* clear the frequency counters */ item->xfq = 0; /* (occurrence and sum of t.a. sizes) */ if (d != TS_FLD) return E_APPEXP; d = ts_next(iset->tscan, file, NULL, 0); if (d == TS_ERR) return E_FREAD; if (d == TS_FLD) return E_FLDCNT; item->app = _appcode(buf); /* get the appearance indicator */ if (item->app < 0) return E_UNKAPP; } return 0; /* return 'ok' */} /* is_readapp() *//*--------------------------------------------------------------------*/int is_read (ITEMSET *iset, FILE *file){ /* --- read a transaction */ int i, d; /* loop variable, delimiter type */ char *buf; /* read buffer */ ITEM *item; /* pointer to item */ assert(iset && file); /* check the function arguments */ iset->cnt = 0; /* initialize the item counter */ d = _get_item(iset, file); /* read the first item and */ buf = ts_buf(iset->tscan); /* get the read buffer */ if ((d == TS_EOF) /* if at the end of the file */ && (buf[0] == '\0')) /* and no item has been read, */ return 1; /* return 'end of file' */ while ((d == TS_FLD) /* read the other items */ && (buf[0] != '\0')) /* of the transaction */ d = _get_item(iset, file); /* up to the end of the record */ if (d == TS_ERR) return d; /* check for a read error */ if ((buf[0] == '\0') && (d == TS_FLD) && (iset->cnt > 0)) return E_ITEMEXP; /* check for an empty field */ ta_sort(iset->items, iset->cnt); /* prepare the transaction */ iset->cnt = ta_unique(iset->items, iset->cnt); for (i = iset->cnt; --i >= 0; ) { item = nim_byid(iset->nimap, iset->items[i]); item->frq += 1; /* count the item and */ item->xfq += iset->cnt; /* sum the transaction sizes */ } /* as an importance indicator */ iset->tac += 1; /* count the transaction */ return 0; /* return 'ok' */} /* is_read() *//*--------------------------------------------------------------------*/int is_recode (ITEMSET *iset, int minfrq, int dir, int *map){ /* --- recode items w.r.t. frequency */ int i, k, n, t; /* loop variables, buffer */ ITEM *item; /* to traverse the items */ SYMCMPFN *cmp; /* comparison function */ assert(iset); /* check the function arguments */ if (dir > 1) cmp = _asccmpx; /* get the appropriate */ else if (dir > 0) cmp = _asccmp; /* comparison function */ else if (dir >= 0) cmp = _nocmp; /* (ascending/descending) */ else if (dir > -2) cmp = _descmp; /* and sort the items */ else cmp = _descmpx; /* w.r.t. their frequency */ nim_sort(iset->nimap, cmp, (void*)minfrq, map, 1); for (n = nim_cnt(iset->nimap); --n >= 0; ) { item = (ITEM*)nim_byid(iset->nimap, n); if (item->frq < minfrq) /* determine frequent items and */ item->app = APP_NONE; /* set all others to 'ignore' */ else if (item->app != APP_NONE) break; /* in addition, skip all items */ } /* that have been set to 'ignore' */ if (map) { /* if a map vector is provided */ for (i = k = 0; i < iset->cnt; i++) { t = map[iset->items[i]]; /* traverse the current transaction */ if (t <= n) iset->items[k++] = t; } /* recode all items and */ iset->cnt = k; /* delete all items to ignore */ ta_sort(iset->items, k); /* resort the items */ } return n+1; /* return number of frequent items */} /* is_recode() *//*--------------------------------------------------------------------*/int is_filter (ITEMSET *iset, const char *marks){ /* --- filter items in transaction */ return iset->cnt = ta_filter(iset->items, iset->cnt, marks);} /* is_filter() *//*---------------------------------------------------------------------- Item Set Evaluation Functions----------------------------------------------------------------------*/ISEVAL* ise_create (ITEMSET *iset, int tacnt){ /* --- create an item set evaluation */ int i; /* loop variable */ ISEVAL *eval; /* created item set evaluator */ i = is_cnt(iset); /* get the number of items */ eval = (ISEVAL*)malloc(sizeof(ISEVAL) +(i+i) *sizeof(double)); if (!eval) return NULL; /* create an evaluation object */ eval->logfs = eval->lsums +i +1; /* and organize the memory */ eval->logta = log(tacnt); /* store log of number of trans. */ while (--i >= 0) /* compute logarithms of item freqs. */ eval->logfs[i] = log(is_getfrq(iset, i)); eval->lsums[0] = 0; /* init. first sum of logarithms */ return eval; /* return created item set evaluator */} /* ise_create() *//*--------------------------------------------------------------------*/double ise_eval (ISEVAL *eval, int *ids, int cnt, int pfx, int supp){ /* --- evaluate an item set */ double sum; /* sum of logarithms of frequencies */ sum = (pfx > 0) /* if there is a prefix, */ ? eval->lsums[pfx-1] : 0; /* get already known logarithm sum */ for ( ; pfx < cnt; pfx++) /* compute and add remaining terms */ eval->lsums[pfx] = sum += eval->logfs[ids[pfx]]; return (log(supp) -sum +(cnt-1) *eval->logta) * (1.0/LN_2);} /* ise_eval() */ /* compute logarithm of quotient *//*---------------------------------------------------------------------- Item Set Formatting Functions----------------------------------------------------------------------*/ISFMTR* isf_create (ITEMSET *iset, int scan){ /* --- create an item set formatter */ int i, k, n; /* loop variable, buffers */ int len, sum; /* length of an item name and sum */ ISFMTR *fmt; /* created item set formatter */ char buf[4*TS_SIZE+4]; /* buffer for formatting */ const char *name; /* to traverse the item names */ char *copy; /* for copies of formatted names */ n = is_cnt(iset); /* get the number of items */ fmt = (ISFMTR*)malloc(sizeof(ISFMTR) + n *sizeof(int) +(n-1) *sizeof(char*)); if (!fmt) return NULL; /* create the base structure */ fmt->buf = NULL; /* and organize the memory */ fmt->offs = (int*)(fmt->names +n); for (i = sum = fmt->cnt = 0; i < n; i++) { name = is_name(iset, i); /* traverse the item names */ len = strlen(name); /* and get their length */ sum += k = (scan) ? sc_format(buf, name, 0) : len; if (k > len) { /* if formatting was needed */ copy = (char*)malloc((k+1) *sizeof(char)); if (!copy) { fmt->cnt = i-1; isf_delete(fmt); return NULL; } name = strcpy(copy, buf); /* copy the formatted name */ } /* into a newly created string */ fmt->names[i] = name; /* store (formatted) item name */ } /* afterwards create output buffer */ if (scan) fmt->cnt = n; /* note the number of items */ fmt->buf = (char*)malloc((sum +n +1) *sizeof(char)); if (!fmt->buf) { isf_delete(fmt); return NULL; } fmt->offs[0] = 0; /* init. the first prefix offset */ return fmt; /* return created item set formatter */} /* isf_create() *//*--------------------------------------------------------------------*/void isf_delete (ISFMTR *fmt){ /* --- delete an item set formatter */ int i; /* loop variable */ for (i = fmt->cnt; --i >= 0; ) if ((fmt->names[i] != NULL) && (fmt->names[i][0] == '"')) free((void*)fmt->names[i]); if (fmt->buf) free(fmt->buf); /* delete reformatted item names, */ free(fmt); /* the output buffer and the base */} /* isf_delete() *//*--------------------------------------------------------------------*/const char* isf_format (ISFMTR *fmt, int *ids, int cnt, int pre){ /* --- format an item set */ char *p; /* to traverse the output buffer */ const char *name; /* to traverse the item names */ p = fmt->buf +fmt->offs[pre]; /* get position for appending */ while (pre < cnt) { /* traverse the additional items */ name = fmt->names[ids[pre]];/* copy the item name to the output */ while (*name) *p++ = *name++; *p++ = ' '; /* add an item separator */ fmt->offs[++pre] = (int)(p-fmt->buf); } /* record the new offset */ *p = '\0'; /* terminate the formatted item set */ fmt->len = (int)(p-fmt->buf); /* note the length of the description */ return fmt->buf; /* return the output buffer */} /* isf_format() *//*---------------------------------------------------------------------- Transaction Functions----------------------------------------------------------------------*/int ta_unique (int *items, int n){ /* --- remove duplicate items */ int *s, *d; /* to traverse the item vector */ assert(items && (n >= 0)); /* check the function arguments */ if (n <= 1) return n; /* check for 0 or 1 item */ for (d = s = items; --n > 0;) /* traverse the sorted vector */ if (*++s != *d) *++d = *s; /* and remove duplicate items */ return (int)(++d -items); /* return the new number of items */} /* ta_unique() *//*--------------------------------------------------------------------*/int ta_filter (int *items, int n, const char *marks){ /* --- filter items in a transaction */ int i, k; /* loop variables */ assert(items && (n >= 0)); /* check the function arguments */ for (i = k = 0; i < n; i++) /* remove all unmarked items */ if (marks[items[i]]) items[k++] = items[i]; return k; /* return the new number of items */} /* ta_filter() *//*--------------------------------------------------------------------*/static int ta_cmp (const void *p1, const void *p2, void *data){ /* --- compare transactions */ int k, k1, k2; /* loop variable, counters */ const int *i1, *i2; /* to traverse the item identifiers */ assert(p1 && p2); /* check the function arguments */ i1 = ((const TRACT*)p1)->items; i2 = ((const TRACT*)p2)->items; k1 = ((const TRACT*)p1)->cnt; /* get the item vectors */ k2 = ((const TRACT*)p2)->cnt; /* and the numbers of items */ for (k = (k1 < k2) ? k1 : k2; --k >= 0; i1++, i2++) { if (*i1 > *i2) return 1; /* compare corresponding items */ if (*i1 < *i2) return -1; /* and abort the comparison */ } /* if one of them is greater */ if (k1 > k2) return 1; /* if one of the transactions */ if (k1 < k2) return -1; /* is not empty, it is greater */ return 0; /* otherwise the two trans. are equal */} /* ta_cmp() *//*--------------------------------------------------------------------*/static int ta_cmpx (const TRACT *ta, const int *items, int n){ /* --- compare transactions */ int k, m; /* loop variable, counter */ const int *p; /* to traverse the item identifiers */ assert(ta && items); /* check the function arguments */ p = ta->items; m = ta->cnt; /* traverse the item vector */ m = ta->cnt; for (k = (n < m) ? n : m; --k >= 0; p++, items++) { if (*p > *items) return 1; /* compare corresponding items */ if (*p < *items) return -1; /* and abort the comparison */ } /* if one of them is greater */ if (m > n) return 1; /* if one of the transactions */ if (m < n) return -1; /* is not empty, it is greater */ return 0; /* otherwise the two trans. are equal */} /* ta_cmpx() *//*---------------------------------------------------------------------- Transaction Set Functions----------------------------------------------------------------------*/TASET* tas_create (ITEMSET *itemset){ /* --- create a transaction set */ TASET *taset; /* created transaction set */ assert(itemset); /* check the function argument */ taset = malloc(sizeof(TASET)); if (!taset) return NULL; /* create a transaction set */ taset->itemset = itemset; /* and store the item set */ taset->cnt = taset->vsz = taset->max = taset->total = 0; taset->tracts = NULL; /* initialize the other fields */ return taset; /* return the created t.a. set */} /* tas_create() *//*--------------------------------------------------------------------*/void tas_delete (TASET *taset, int delis){ /* --- delete a transaction set */ assert(taset); /* check the function argument */ if (taset->tracts) { /* if there are loaded transactions */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -