⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tract.c

📁 数据挖掘Apriori算法在VC下的实现
💻 C
📖 第 1 页 / 共 3 页
字号:
/*----------------------------------------------------------------------  File    : tract.c  Contents: item and transaction management  Author  : Christian Borgelt  History : 1996.02.14 file created as apriori.c            1996.06.24 function _get_item optimized            1996.07.01 adapted to modified symtab module            1998.01.04 scan functions moved to module 'tabscan'            1998.06.09 vector enlargement modified            1998.06.20 adapted to changed st_create function            1998.08.07 bug in function _get_tract (is_read) fixed            1998.08.08 item appearances added            1998.08.17 item sorting and recoding added            1998.09.02 several assertions added            1999.02.05 long int changed to int            1999.10.22 bug in item appearances reading fixed            1999.11.11 adapted to name/identifier maps            1999.12.01 check of item appearance added to sort function            2000.03.15 removal of infrequent items added            2001.07.14 adapted to modified module tabscan            2001.12.27 item functions made a separate module            2001.11.18 transaction functions made a separate module            2001.12.28 first version of this module completed            2002.01.12 empty field at end of record reported as error            2002.02.06 item sorting reversed (ascending order)            2002.02.19 transaction tree functions added            2003.07.17 functions is_filter, ta_filter, tas_filter added            2003.08.15 bug in function tat_delete fixed            2003.08.21 parameter 'heap' added to tas_sort, tat_create            2003.09.20 empty transactions in input made possible            2003.12.18 padding for 64 bit architecture added            2004.02.26 item frequency counting moved to is_read            2004.11.20 function tat_mark added            2005.06.20 function _nocmp added for neutral sorting            2006.11.26 structures ISFMTR and ISEVAL added            2007.02.13 adapted to modified tabscan module            2008.01.25 bug in function ise_eval fixed (prefix)----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <assert.h>#include <math.h>#include "tract.h"#include "scan.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define BLKSIZE  256            /* block size for enlarging vectors */#define LN_2     0.69314718055994530942   /* ln(2) *//*----------------------------------------------------------------------  Constants----------------------------------------------------------------------*//* --- item appearance indicators --- */static const char *i_body[] = { /* item to appear in bodies only */  "i",  "in",  "a", "ante", "antecedent", "b", "body", NULL };static const char *i_head[] = { /* item to appear in heads only */  "o",  "out", "c", "cons", "consequent", "h", "head", NULL };static const char *i_both[] = { /* item to appear in both */  "io", "inout", "ac", "bh", "both",                   NULL };static const char *i_ignore[] ={/* item to ignore */  "n", "neither", "none", "ign", "ignore", "-",        NULL };/*----------------------------------------------------------------------  Auxiliary Functions----------------------------------------------------------------------*/static int _appcode (const char *s){                               /* --- get appearance indicator code */  const char **p;               /* to traverse indicator list */  assert(s);                    /* check the function argument */  for (p = i_body;   *p; p++)   /* check 'body' indicators */    if (strcmp(s, *p) == 0) return APP_BODY;  for (p = i_head;   *p; p++)   /* check 'head' indicators */    if (strcmp(s, *p) == 0) return APP_HEAD;  for (p = i_both;   *p; p++)   /* check 'both' indicators */    if (strcmp(s, *p) == 0) return APP_BOTH;  for (p = i_ignore; *p; p++)   /* check 'ignore' indicators */    if (strcmp(s, *p) == 0) return APP_NONE;  return -1;                    /* if none found, return error code */}  /* _appcode() *//*--------------------------------------------------------------------*/static int _get_item (ITEMSET *iset, FILE *file){                               /* --- read an item */  int  d;                       /* delimiter type */  char *buf;                    /* read buffer */  ITEM *item;                   /* pointer to item */  int  *vec;                    /* new item vector */  int  size;                    /* new item vector size */  assert(iset && file);         /* check the function arguments */  d   = ts_next(iset->tscan, file, NULL, 0);  buf = ts_buf(iset->tscan);    /* read the next field (item name) */  if ((d == TS_ERR) || (buf[0] == '\0')) return d;  item = nim_byname(iset->nimap, buf);  if (!item) {                  /* look up the name in name/id map */    if (iset->app == APP_NONE)  /* if new items are to be ignored, */      return d;                 /* do not register the item */    item = nim_add(iset->nimap, buf, sizeof(ITEM));    if (!item) return E_NOMEM;  /* add the new item to the map, */    item->frq = item->xfq = 0;  /* initialize the frequency counters */    item->app = iset->app;      /* (occurrence and sum of t.a. sizes) */  }                             /* and set the appearance indicator */  size = iset->vsz;             /* get the item vector size */  if (iset->cnt >= size) {      /* if the item vector is full */    size += (size > BLKSIZE) ? (size >> 1) : BLKSIZE;    vec   = (int*)realloc(iset->items, size *sizeof(int));    if (!vec) return E_NOMEM;   /* enlarge the item vector */    iset->items = vec; iset->vsz = size;  }                             /* set the new vector and its size */  iset->items[iset->cnt++] = item->id;  return d;                     /* add the item to the transaction */}  /* _get_item() */            /* and return the delimiter type *//*--------------------------------------------------------------------*/static int _nocmp (const void *p1, const void *p2, void *data){                               /* --- compare item frequencies */  if (((const ITEM*)p1)->app == APP_NONE)    return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1;  if (((const ITEM*)p2)->app == APP_NONE) return -1;  #ifdef ARCH64  if (((const ITEM*)p1)->frq < (long)data)    return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1;  if (((const ITEM*)p2)->frq < (long)data) return -1;  #else  if (((const ITEM*)p1)->frq < (int)data)    return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1;  if (((const ITEM*)p2)->frq < (int)data) return -1;  #endif  if (((const ITEM*)p1)->id  > ((const ITEM*)p2)->id) return  1;  if (((const ITEM*)p1)->id  < ((const ITEM*)p2)->id) return -1;  return 0;                     /* return sign of identifier diff. */}  /* _nocmp() *//*--------------------------------------------------------------------*/static int _asccmp (const void *p1, const void *p2, void *data){                               /* --- compare item frequencies */  if (((const ITEM*)p1)->app == APP_NONE)    return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1;  if (((const ITEM*)p2)->app == APP_NONE) return -1;  #ifdef ARCH64  if (((const ITEM*)p1)->frq < (long)data)    return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1;  if (((const ITEM*)p2)->frq < (long)data) return -1;  #else  if (((const ITEM*)p1)->frq < (int)data)    return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1;  if (((const ITEM*)p2)->frq < (int)data) return -1;  #endif  if (((const ITEM*)p1)->frq > ((const ITEM*)p2)->frq) return  1;  if (((const ITEM*)p1)->frq < ((const ITEM*)p2)->frq) return -1;  return 0;                     /* return sign of frequency diff. */}  /* _asccmp() *//*--------------------------------------------------------------------*/static int _descmp (const void *p1, const void *p2, void *data){                               /* --- compare item frequencies */  if (((const ITEM*)p1)->app == APP_NONE)    return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1;  if (((const ITEM*)p2)->app == APP_NONE) return -1;  if (((const ITEM*)p1)->frq > ((const ITEM*)p2)->frq) return -1;  if (((const ITEM*)p1)->frq < ((const ITEM*)p2)->frq) return  1;  return 0;                     /* return sign of frequency diff. */}  /* _descmp() *//*--------------------------------------------------------------------*/static int _asccmpx (const void *p1, const void *p2, void *data){                               /* --- compare item frequencies */  if (((const ITEM*)p1)->app == APP_NONE)    return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1;  if (((const ITEM*)p2)->app == APP_NONE) return -1;  #ifdef ARCH64  if (((const ITEM*)p1)->frq < (long)data)    return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1;  if (((const ITEM*)p2)->frq < (long)data) return -1;  #else  if (((const ITEM*)p1)->frq < (int)data)    return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1;  if (((const ITEM*)p2)->frq < (int)data) return -1;  #endif  if (((const ITEM*)p1)->xfq > ((const ITEM*)p2)->xfq) return  1;  if (((const ITEM*)p1)->xfq < ((const ITEM*)p2)->xfq) return -1;  return 0;                     /* return sign of frequency diff. */}  /* _asccmpx() *//*--------------------------------------------------------------------*/static int _descmpx (const void *p1, const void *p2, void *data){                               /* --- compare item frequencies */  if (((const ITEM*)p1)->app == APP_NONE)    return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1;  if (((const ITEM*)p2)->app == APP_NONE) return -1;  #ifdef ARCH64  if (((const ITEM*)p1)->frq < (long)data)    return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1;  if (((const ITEM*)p2)->frq < (long)data) return -1;  #else  if (((const ITEM*)p1)->frq < (int)data)    return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1;  if (((const ITEM*)p2)->frq < (int)data) return -1;  #endif  if (((const ITEM*)p1)->xfq > ((const ITEM*)p2)->xfq) return -1;  if (((const ITEM*)p1)->xfq < ((const ITEM*)p2)->xfq) return  1;  return 0;                     /* return sign of frequency diff. */}  /* _descmpx() *//*----------------------------------------------------------------------  Item Set Functions----------------------------------------------------------------------*/ITEMSET* is_create (int cnt){                               /* --- create an item set */  ITEMSET *iset;                /* created item set */  if (cnt <= 0) cnt = BLKSIZE;  /* check and adapt number of items */  iset = malloc(sizeof(ITEMSET));  if (!iset) return NULL;       /* create an item set */  iset->tscan = ts_create();    /* and its components */  ts_chars(iset->tscan, TS_NULL, "");  iset->nimap = nim_create(0, 0, (HASHFN*)0, (SYMFN*)0);  iset->items = (int*)malloc(cnt *sizeof(int));  if (!iset->tscan || !iset->nimap || !iset->items) {    is_delete(iset); return NULL; }  iset->tac = iset->cnt = 0;    /* initialize the other fields */  iset->app = APP_BOTH;  iset->vsz = cnt;  iset->chars[0] = ' ';  iset->chars[1] = ' ';  iset->chars[2] = '\n'; iset->chars[3] = '\0';  return iset;                  /* return the created item set */}  /* is_create() *//*--------------------------------------------------------------------*/void is_delete (ITEMSET *iset){                               /* --- delete an item set */  assert(iset);                 /* check the function argument */  if (iset->items) free(iset->items);  if (iset->nimap) nim_delete(iset->nimap);  if (iset->tscan) ts_delete(iset->tscan);  free(iset);                   /* delete the components */}  /* is_delete() */            /* and the item set body *//*--------------------------------------------------------------------*/void is_chars (ITEMSET *iset, const char *blanks,  const char *fldseps,                              const char *recseps, const char *comment){                               /* --- set special characters */  assert(iset);                 /* check the function argument */  if (blanks)                   /* set blank characters */    iset->chars[0] = ts_chars(iset->tscan, TS_BLANK,  blanks);  if (fldseps)                  /* set field separators */    iset->chars[1] = ts_chars(iset->tscan, TS_FLDSEP, fldseps);  if (recseps)                  /* set record separators */    iset->chars[2] = ts_chars(iset->tscan, TS_RECSEP, recseps);  if (comment)                  /* set comment indicators */    ts_chars(iset->tscan, TS_COMMENT, comment);}  /* is_chars() *//*--------------------------------------------------------------------*/int is_item (ITEMSET *iset, const char *name){                               /* --- get an item identifier */  ITEM *item = nim_byname(iset->nimap, name);  return (item) ? item->id :-1; /* look up the given name */}  /* is_item() */              /* in the name/identifier map *//*--------------------------------------------------------------------*/int is_readapp (ITEMSET *iset, FILE *file){                               /* --- read appearance indicators */  int  d;                       /* delimiter type */  char *buf;                    /* read buffer */  ITEM *item;                   /* to access the item data */  assert(iset && file);         /* check the function arguments */  buf = ts_buf(iset->tscan);    /* read the first record (one field) */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -