📄 tract.c
字号:
/*---------------------------------------------------------------------- File : tract.c Contents: item and transaction management Author : Christian Borgelt History : 1996.02.14 file created as apriori.c 1996.06.24 function _get_item optimized 1996.07.01 adapted to modified symtab module 1998.01.04 scan functions moved to module 'tabscan' 1998.06.09 vector enlargement modified 1998.06.20 adapted to changed st_create function 1998.08.07 bug in function _get_tract (is_read) fixed 1998.08.08 item appearances added 1998.08.17 item sorting and recoding added 1998.09.02 several assertions added 1999.02.05 long int changed to int 1999.10.22 bug in item appearances reading fixed 1999.11.11 adapted to name/identifier maps 1999.12.01 check of item appearance added to sort function 2000.03.15 removal of infrequent items added 2001.07.14 adapted to modified module tabscan 2001.12.27 item functions made a separate module 2001.11.18 transaction functions made a separate module 2001.12.28 first version of this module completed 2002.01.12 empty field at end of record reported as error 2002.02.06 item sorting reversed (ascending order) 2002.02.19 transaction tree functions added 2003.07.17 functions is_filter, ta_filter, tas_filter added 2003.08.15 bug in function tat_delete fixed 2003.08.21 parameter 'heap' added to tas_sort, tat_create 2003.09.20 empty transactions in input made possible 2003.12.18 padding for 64 bit architecture added 2004.02.26 item frequency counting moved to is_read 2004.11.20 function tat_mark added 2005.06.20 function _nocmp added for neutral sorting 2006.11.26 structures ISFMTR and ISEVAL added 2007.02.13 adapted to modified tabscan module 2008.01.25 bug in function ise_eval fixed (prefix)----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <assert.h>#include <math.h>#include "tract.h"#include "scan.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define BLKSIZE 256 /* block size for enlarging vectors */#define LN_2 0.69314718055994530942 /* ln(2) *//*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*//* --- item appearance indicators --- */static const char *i_body[] = { /* item to appear in bodies only */ "i", "in", "a", "ante", "antecedent", "b", "body", NULL };static const char *i_head[] = { /* item to appear in heads only */ "o", "out", "c", "cons", "consequent", "h", "head", NULL };static const char *i_both[] = { /* item to appear in both */ "io", "inout", "ac", "bh", "both", NULL };static const char *i_ignore[] ={/* item to ignore */ "n", "neither", "none", "ign", "ignore", "-", NULL };/*---------------------------------------------------------------------- Auxiliary Functions----------------------------------------------------------------------*/static int _appcode (const char *s){ /* --- get appearance indicator code */ const char **p; /* to traverse indicator list */ assert(s); /* check the function argument */ for (p = i_body; *p; p++) /* check 'body' indicators */ if (strcmp(s, *p) == 0) return APP_BODY; for (p = i_head; *p; p++) /* check 'head' indicators */ if (strcmp(s, *p) == 0) return APP_HEAD; for (p = i_both; *p; p++) /* check 'both' indicators */ if (strcmp(s, *p) == 0) return APP_BOTH; for (p = i_ignore; *p; p++) /* check 'ignore' indicators */ if (strcmp(s, *p) == 0) return APP_NONE; return -1; /* if none found, return error code */} /* _appcode() *//*--------------------------------------------------------------------*/static int _get_item (ITEMSET *iset, FILE *file){ /* --- read an item */ int d; /* delimiter type */ char *buf; /* read buffer */ ITEM *item; /* pointer to item */ int *vec; /* new item vector */ int size; /* new item vector size */ assert(iset && file); /* check the function arguments */ d = ts_next(iset->tscan, file, NULL, 0); buf = ts_buf(iset->tscan); /* read the next field (item name) */ if ((d == TS_ERR) || (buf[0] == '\0')) return d; item = nim_byname(iset->nimap, buf); if (!item) { /* look up the name in name/id map */ if (iset->app == APP_NONE) /* if new items are to be ignored, */ return d; /* do not register the item */ item = nim_add(iset->nimap, buf, sizeof(ITEM)); if (!item) return E_NOMEM; /* add the new item to the map, */ item->frq = item->xfq = 0; /* initialize the frequency counters */ item->app = iset->app; /* (occurrence and sum of t.a. sizes) */ } /* and set the appearance indicator */ size = iset->vsz; /* get the item vector size */ if (iset->cnt >= size) { /* if the item vector is full */ size += (size > BLKSIZE) ? (size >> 1) : BLKSIZE; vec = (int*)realloc(iset->items, size *sizeof(int)); if (!vec) return E_NOMEM; /* enlarge the item vector */ iset->items = vec; iset->vsz = size; } /* set the new vector and its size */ iset->items[iset->cnt++] = item->id; return d; /* add the item to the transaction */} /* _get_item() */ /* and return the delimiter type *//*--------------------------------------------------------------------*/static int _nocmp (const void *p1, const void *p2, void *data){ /* --- compare item frequencies */ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; #ifdef ARCH64 if (((const ITEM*)p1)->frq < (long)data) return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (long)data) return -1; #else if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; #endif if (((const ITEM*)p1)->id > ((const ITEM*)p2)->id) return 1; if (((const ITEM*)p1)->id < ((const ITEM*)p2)->id) return -1; return 0; /* return sign of identifier diff. */} /* _nocmp() *//*--------------------------------------------------------------------*/static int _asccmp (const void *p1, const void *p2, void *data){ /* --- compare item frequencies */ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; #ifdef ARCH64 if (((const ITEM*)p1)->frq < (long)data) return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (long)data) return -1; #else if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; #endif if (((const ITEM*)p1)->frq > ((const ITEM*)p2)->frq) return 1; if (((const ITEM*)p1)->frq < ((const ITEM*)p2)->frq) return -1; return 0; /* return sign of frequency diff. */} /* _asccmp() *//*--------------------------------------------------------------------*/static int _descmp (const void *p1, const void *p2, void *data){ /* --- compare item frequencies */ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; if (((const ITEM*)p1)->frq > ((const ITEM*)p2)->frq) return -1; if (((const ITEM*)p1)->frq < ((const ITEM*)p2)->frq) return 1; return 0; /* return sign of frequency diff. */} /* _descmp() *//*--------------------------------------------------------------------*/static int _asccmpx (const void *p1, const void *p2, void *data){ /* --- compare item frequencies */ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; #ifdef ARCH64 if (((const ITEM*)p1)->frq < (long)data) return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (long)data) return -1; #else if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; #endif if (((const ITEM*)p1)->xfq > ((const ITEM*)p2)->xfq) return 1; if (((const ITEM*)p1)->xfq < ((const ITEM*)p2)->xfq) return -1; return 0; /* return sign of frequency diff. */} /* _asccmpx() *//*--------------------------------------------------------------------*/static int _descmpx (const void *p1, const void *p2, void *data){ /* --- compare item frequencies */ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; #ifdef ARCH64 if (((const ITEM*)p1)->frq < (long)data) return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (long)data) return -1; #else if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; #endif if (((const ITEM*)p1)->xfq > ((const ITEM*)p2)->xfq) return -1; if (((const ITEM*)p1)->xfq < ((const ITEM*)p2)->xfq) return 1; return 0; /* return sign of frequency diff. */} /* _descmpx() *//*---------------------------------------------------------------------- Item Set Functions----------------------------------------------------------------------*/ITEMSET* is_create (int cnt){ /* --- create an item set */ ITEMSET *iset; /* created item set */ if (cnt <= 0) cnt = BLKSIZE; /* check and adapt number of items */ iset = malloc(sizeof(ITEMSET)); if (!iset) return NULL; /* create an item set */ iset->tscan = ts_create(); /* and its components */ ts_chars(iset->tscan, TS_NULL, ""); iset->nimap = nim_create(0, 0, (HASHFN*)0, (SYMFN*)0); iset->items = (int*)malloc(cnt *sizeof(int)); if (!iset->tscan || !iset->nimap || !iset->items) { is_delete(iset); return NULL; } iset->tac = iset->cnt = 0; /* initialize the other fields */ iset->app = APP_BOTH; iset->vsz = cnt; iset->chars[0] = ' '; iset->chars[1] = ' '; iset->chars[2] = '\n'; iset->chars[3] = '\0'; return iset; /* return the created item set */} /* is_create() *//*--------------------------------------------------------------------*/void is_delete (ITEMSET *iset){ /* --- delete an item set */ assert(iset); /* check the function argument */ if (iset->items) free(iset->items); if (iset->nimap) nim_delete(iset->nimap); if (iset->tscan) ts_delete(iset->tscan); free(iset); /* delete the components */} /* is_delete() */ /* and the item set body *//*--------------------------------------------------------------------*/void is_chars (ITEMSET *iset, const char *blanks, const char *fldseps, const char *recseps, const char *comment){ /* --- set special characters */ assert(iset); /* check the function argument */ if (blanks) /* set blank characters */ iset->chars[0] = ts_chars(iset->tscan, TS_BLANK, blanks); if (fldseps) /* set field separators */ iset->chars[1] = ts_chars(iset->tscan, TS_FLDSEP, fldseps); if (recseps) /* set record separators */ iset->chars[2] = ts_chars(iset->tscan, TS_RECSEP, recseps); if (comment) /* set comment indicators */ ts_chars(iset->tscan, TS_COMMENT, comment);} /* is_chars() *//*--------------------------------------------------------------------*/int is_item (ITEMSET *iset, const char *name){ /* --- get an item identifier */ ITEM *item = nim_byname(iset->nimap, name); return (item) ? item->id :-1; /* look up the given name */} /* is_item() */ /* in the name/identifier map *//*--------------------------------------------------------------------*/int is_readapp (ITEMSET *iset, FILE *file){ /* --- read appearance indicators */ int d; /* delimiter type */ char *buf; /* read buffer */ ITEM *item; /* to access the item data */ assert(iset && file); /* check the function arguments */ buf = ts_buf(iset->tscan); /* read the first record (one field) */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -