📄 tract.c
字号:
/*---------------------------------------------------------------------- File : tract.c Contents: item and transaction management Author : Christian Borgelt History : 14.02.1996 file created as apriori.c 24.06.1996 function _get_item optimized 01.07.1996 adapted to modified symtab module 04.01.1998 scan functions moved to module 'tfscan' 09.06.1998 vector enlargement modified 20.06.1998 adapted to changed st_create function 07.08.1998 bug in function _get_tract (is_read) fixed 08.08.1998 item appearances added 17.08.1998 item sorting and recoding added 02.09.1998 several assertions added 05.02.1999 long int changed to int 22.10.1999 bug in item appearances reading fixed 11.11.1999 adapted to name/identifier maps 01.12.1999 check of item appearance added to sort function 15.03.2000 removal of infrequent items added 14.07.2001 adapted to modified module tfscan 27.12.2001 item functions made a separate module 18.11.2001 transaction functions made a separate module 28.12.2001 first version of this module completed 12.01.2002 empty field at end of record reported as error 06.02.2002 item sorting reversed (ascending order) 19.02.2002 transaction tree functions added 17.07.2003 functions is_filter, ta_filter, tas_filter added 15.08.2003 bug in function tat_delete fixed 21.08.2003 parameter 'heap' added to tas_sort, tat_create 20.09.2003 empty transactions in input made possible 18.12.2003 padding for 64 bit architecture added 26.02.2004 item frequency counting moved to is_read 20.11.2004 function tat_mark added 20.06.2005 function _nocmp added for neutral sorting----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <assert.h>#include "tract.h"#include "vecops.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define BLKSIZE 256 /* block size for enlarging vectors *//*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*//* --- item appearance indicators --- */static const char *i_body[] = { /* item to appear in bodies only */ "i", "in", "a", "ante", "antecedent", "b", "body", NULL };static const char *i_head[] = { /* item to appear in heads only */ "o", "out", "c", "cons", "consequent", "h", "head", NULL };static const char *i_both[] = { /* item to appear in both */ "io", "inout", "ac", "bh", "both", NULL };static const char *i_ignore[] ={/* item to ignore */ "n", "neither", "none", "ign", "ignore", "-", NULL };/*---------------------------------------------------------------------- Auxiliary Functions----------------------------------------------------------------------*/static int _appcode (const char *s){ /* --- get appearance indicator code */ const char **p; /* to traverse indicator list */ assert(s); /* check the function argument */ for (p = i_body; *p; p++) /* check 'body' indicators */ if (strcmp(s, *p) == 0) return APP_BODY; for (p = i_head; *p; p++) /* check 'head' indicators */ if (strcmp(s, *p) == 0) return APP_HEAD; for (p = i_both; *p; p++) /* check 'both' indicators */ if (strcmp(s, *p) == 0) return APP_BOTH; for (p = i_ignore; *p; p++) /* check 'ignore' indicators */ if (strcmp(s, *p) == 0) return APP_NONE; return -1; /* if none found, return error code */} /* _appcode() *//*--------------------------------------------------------------------*/static int _get_item (ITEMSET *iset, FILE *file){ /* --- read an item */ int d; /* delimiter type */ char *buf; /* read buffer */ ITEM *item; /* pointer to item */ int *vec; /* new item vector */ int size; /* new item vector size */ assert(iset && file); /* check the function arguments */ d = tfs_getfld(iset->tfscan, file, NULL, 0); buf = tfs_buf(iset->tfscan); /* read the next field (item name) */ if ((d < 0) || (buf[0] == '\0')) return d; item = nim_byname(iset->nimap, buf); if (!item) { /* look up the name in name/id map */ if (iset->app == APP_NONE) /* if new items are to be ignored, */ return d; /* do not register the item */ item = nim_add(iset->nimap, buf, sizeof(ITEM)); if (!item) return E_NOMEM; /* add the new item to the map, */ item->frq = item->xfq = 0; /* initialize the frequency counters */ item->app = iset->app; /* (occurrence and sum of t.a. sizes) */ } /* and set the appearance indicator */ size = iset->vsz; /* get the item vector size */ if (iset->cnt >= size) { /* if the item vector is full */ size += (size > BLKSIZE) ? (size >> 1) : BLKSIZE; vec = (int*)realloc(iset->items, size *sizeof(int)); if (!vec) return E_NOMEM; /* enlarge the item vector */ iset->items = vec; iset->vsz = size; } /* set the new vector and its size */ iset->items[iset->cnt++] = item->id; return d; /* add the item to the transaction */} /* _get_item() */ /* and return the delimiter type *//*--------------------------------------------------------------------*/static int _nocmp (const void *p1, const void *p2, void *data){ /* --- compare item frequencies */ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; if (((const ITEM*)p1)->id > ((const ITEM*)p2)->id) return 1; if (((const ITEM*)p1)->id < ((const ITEM*)p2)->id) return -1; return 0; /* return sign of identifier diff. */} /* _nocmp() *//*--------------------------------------------------------------------*/static int _asccmp (const void *p1, const void *p2, void *data){ /* --- compare item frequencies */ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; if (((const ITEM*)p1)->frq > ((const ITEM*)p2)->frq) return 1; if (((const ITEM*)p1)->frq < ((const ITEM*)p2)->frq) return -1; return 0; /* return sign of frequency diff. */} /* _asccmp() *//*--------------------------------------------------------------------*/static int _descmp (const void *p1, const void *p2, void *data){ /* --- compare item frequencies */ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; if (((const ITEM*)p1)->frq > ((const ITEM*)p2)->frq) return -1; if (((const ITEM*)p1)->frq < ((const ITEM*)p2)->frq) return 1; return 0; /* return sign of frequency diff. */} /* _descmp() *//*--------------------------------------------------------------------*/static int _asccmpx (const void *p1, const void *p2, void *data){ /* --- compare item frequencies */ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; if (((const ITEM*)p1)->xfq > ((const ITEM*)p2)->xfq) return 1; if (((const ITEM*)p1)->xfq < ((const ITEM*)p2)->xfq) return -1; return 0; /* return sign of frequency diff. */} /* _asccmpx() *//*--------------------------------------------------------------------*/static int _descmpx (const void *p1, const void *p2, void *data){ /* --- compare item frequencies */ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; if (((const ITEM*)p1)->xfq > ((const ITEM*)p2)->xfq) return -1; if (((const ITEM*)p1)->xfq < ((const ITEM*)p2)->xfq) return 1; return 0; /* return sign of frequency diff. */} /* _descmpx() *//*---------------------------------------------------------------------- Item Set Functions----------------------------------------------------------------------*/ITEMSET* is_create (void){ /* --- create an item set */ ITEMSET *iset; /* created item set */ iset = malloc(sizeof(ITEMSET)); if (!iset) return NULL; /* create an item set */ iset->tfscan = tfs_create(); /* and its components */ iset->nimap = nim_create(0, 0, (HASHFN*)0, (SYMFN*)0); iset->items = (int*)malloc(BLKSIZE *sizeof(int)); if (!iset->tfscan || !iset->nimap || !iset->items) { is_delete(iset); return NULL; } iset->app = APP_BOTH; /* initialize the other fields */ iset->vsz = BLKSIZE; iset->cnt = 0; iset->chars[0] = ' '; iset->chars[1] = ' '; iset->chars[2] = '\n'; iset->chars[3] = '\0'; return iset; /* return the created item set */} /* is_create() *//*--------------------------------------------------------------------*/void is_delete (ITEMSET *iset){ /* --- delete an item set */ assert(iset); /* check the function argument */ if (iset->items) free(iset->items); if (iset->nimap) nim_delete(iset->nimap); if (iset->tfscan) tfs_delete(iset->tfscan); free(iset); /* delete the components */} /* is_delete() */ /* and the item set body *//*--------------------------------------------------------------------*/void is_chars (ITEMSET *iset, const char *blanks, const char *fldseps, const char *recseps, const char *cominds){ /* --- set special characters */ assert(iset); /* check the function argument */ if (blanks) /* set blank characters */ iset->chars[0] = tfs_chars(iset->tfscan, TFS_BLANK, blanks); if (fldseps) /* set field separators */ iset->chars[1] = tfs_chars(iset->tfscan, TFS_FLDSEP, fldseps); if (recseps) /* set record separators */ iset->chars[2] = tfs_chars(iset->tfscan, TFS_RECSEP, recseps); if (cominds) /* set comment indicators */ tfs_chars(iset->tfscan, TFS_COMMENT, cominds);} /* is_chars() *//*--------------------------------------------------------------------*/int is_item (ITEMSET *iset, const char *name){ /* --- get an item identifier */ ITEM *item = nim_byname(iset->nimap, name); return (item) ? item->id :-1; /* look up the given name */} /* is_item() */ /* in the name/identifier map */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -