📄 tract.c
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <assert.h>#include "tract.h"#include "vecops.h"#ifdef STORAGE#include "storage.h"#endif#define BLKSIZE 256 static const char *i_body[] = { "i", "in", "a", "ante", "antecedent", "b", "body", NULL };static const char *i_head[] = { "o", "out", "c", "cons", "consequent", "h", "head", NULL };static const char *i_both[] = { "io", "inout", "ac", "bh", "both", NULL };static const char *i_ignore[] ={ "n", "neither", "none", "ign", "ignore", "-", NULL };static int _appcode (const char *s){ const char **p; assert(s); for (p = i_body; *p; p++) if (strcmp(s, *p) == 0) return APP_BODY; for (p = i_head; *p; p++) if (strcmp(s, *p) == 0) return APP_HEAD; for (p = i_both; *p; p++) if (strcmp(s, *p) == 0) return APP_BOTH; for (p = i_ignore; *p; p++) if (strcmp(s, *p) == 0) return APP_NONE; return -1; } /*--------------------------------------------------------------------*/static int _get_item (ITEMSET *iset, FILE *file){ int d; char *buf; ITEM *item; int *vec; int size; assert(iset && file); d = tfs_getfld(iset->tfscan, file, NULL, 0); buf = tfs_buf(iset->tfscan); if ((d < 0) || (buf[0] == '\0')) return d; item = nim_byname(iset->nimap, buf); if (!item) { if (iset->app == APP_NONE) return d; item = nim_add(iset->nimap, buf, sizeof(ITEM)); if (!item) return E_NOMEM; item->frq = item->xfq = 0; item->app = iset->app; } size = iset->vsz; if (iset->cnt >= size) { size += (size > BLKSIZE) ? (size >> 1) : BLKSIZE; vec = (int*)realloc(iset->items, size *sizeof(int)); if (!vec) return E_NOMEM; iset->items = vec; iset->vsz = size; } iset->items[iset->cnt++] = item->id; return d; } /*--------------------------------------------------------------------*/static int _nocmp (const void *p1, const void *p2, void *data){ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; #ifdef ARCH64 if (((const ITEM*)p1)->frq < (long)data) return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (long)data) return -1; #else if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; #endif if (((const ITEM*)p1)->id > ((const ITEM*)p2)->id) return 1; if (((const ITEM*)p1)->id < ((const ITEM*)p2)->id) return -1; return 0; } /*--------------------------------------------------------------------*/static int _asccmp (const void *p1, const void *p2, void *data){ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; #ifdef ARCH64 if (((const ITEM*)p1)->frq < (long)data) return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (long)data) return -1; #else if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; #endif if (((const ITEM*)p1)->frq > ((const ITEM*)p2)->frq) return 1; if (((const ITEM*)p1)->frq < ((const ITEM*)p2)->frq) return -1; return 0; } /*--------------------------------------------------------------------*/static int _descmp (const void *p1, const void *p2, void *data){ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; if (((const ITEM*)p1)->frq > ((const ITEM*)p2)->frq) return -1; if (((const ITEM*)p1)->frq < ((const ITEM*)p2)->frq) return 1; return 0; } /*--------------------------------------------------------------------*/static int _asccmpx (const void *p1, const void *p2, void *data){ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; #ifdef ARCH64 if (((const ITEM*)p1)->frq < (long)data) return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (long)data) return -1; #else if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; #endif if (((const ITEM*)p1)->xfq > ((const ITEM*)p2)->xfq) return 1; if (((const ITEM*)p1)->xfq < ((const ITEM*)p2)->xfq) return -1; return 0; } /*--------------------------------------------------------------------*/static int _descmpx (const void *p1, const void *p2, void *data){ if (((const ITEM*)p1)->app == APP_NONE) return (((const ITEM*)p2)->app == APP_NONE) ? 0 : 1; if (((const ITEM*)p2)->app == APP_NONE) return -1; #ifdef ARCH64 if (((const ITEM*)p1)->frq < (long)data) return (((const ITEM*)p2)->frq < (long)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (long)data) return -1; #else if (((const ITEM*)p1)->frq < (int)data) return (((const ITEM*)p2)->frq < (int)data) ? 0 : 1; if (((const ITEM*)p2)->frq < (int)data) return -1; #endif if (((const ITEM*)p1)->xfq > ((const ITEM*)p2)->xfq) return -1; if (((const ITEM*)p1)->xfq < ((const ITEM*)p2)->xfq) return 1; return 0; } /*---------------------------------------------------------------------- ----------------------------------------------------------------------*/ITEMSET* is_create (void){ ITEMSET *iset; iset = malloc(sizeof(ITEMSET)); if (!iset) return NULL; iset->tfscan = tfs_create(); iset->nimap = nim_create(0, 0, (HASHFN*)0, (SYMFN*)0); iset->items = (int*)malloc(BLKSIZE *sizeof(int)); if (!iset->tfscan || !iset->nimap || !iset->items) { is_delete(iset); return NULL; } iset->app = APP_BOTH; iset->vsz = BLKSIZE; iset->cnt = 0; iset->chars[0] = ' '; iset->chars[1] = ' '; iset->chars[2] = '\n'; iset->chars[3] = '\0'; return iset; } /*--------------------------------------------------------------------*/void is_delete (ITEMSET *iset){ assert(iset); if (iset->items) free(iset->items); if (iset->nimap) nim_delete(iset->nimap); if (iset->tfscan) tfs_delete(iset->tfscan); free(iset); } /*--------------------------------------------------------------------*/void is_chars (ITEMSET *iset, const char *blanks, const char *fldseps, const char *recseps, const char *cominds){ assert(iset); if (blanks) iset->chars[0] = tfs_chars(iset->tfscan, TFS_BLANK, blanks); if (fldseps) iset->chars[1] = tfs_chars(iset->tfscan, TFS_FLDSEP, fldseps); if (recseps) iset->chars[2] = tfs_chars(iset->tfscan, TFS_RECSEP, recseps); if (cominds) tfs_chars(iset->tfscan, TFS_COMMENT, cominds);} /*--------------------------------------------------------------------*/int is_item (ITEMSET *iset, const char *name){ ITEM *item = nim_byname(iset->nimap, name); return (item) ? item->id :-1; } /*--------------------------------------------------------------------*/int is_readapp (ITEMSET *iset, FILE *file){ int d; char *buf; ITEM *item; assert(iset && file); if (tfs_skip(iset->tfscan, file) < 0) return E_FREAD; buf = tfs_buf(iset->tfscan); d = tfs_getfld(iset->tfscan, file, NULL, 0); if (d < 0) return E_FREAD; if (d >= TFS_FLD) return E_FLDCNT; iset->app = _appcode(buf); if (iset->app < 0) return E_UNKAPP; while (d > TFS_EOF) { if (tfs_skip(iset->tfscan, file) < 0) return E_FREAD; d = tfs_getfld(iset->tfscan, file, NULL, 0); if (d <= TFS_EOF) return (d < 0) ? E_FREAD : 0; if (buf[0] == '\0') return E_ITEMEXP; item = nim_add(iset->nimap, buf, sizeof(ITEM)); if (item == EXISTS) return E_DUPITEM; if (item == NULL) return E_NOMEM; item->frq = 0; item->xfq = 0; if (d < TFS_FLD) return E_APPEXP; d = tfs_getfld(iset->tfscan, file, NULL, 0); if (d < 0) return E_FREAD; if (d >= TFS_FLD) return E_FLDCNT; item->app = _appcode(buf); if (item->app < 0) return E_UNKAPP; } return 0; } /*--------------------------------------------------------------------*/int is_read (ITEMSET *iset, FILE *file){ int i, d; char *buf; ITEM *item; assert(iset && file); iset->cnt = 0; if (tfs_skip(iset->tfscan, file) < 0) return E_FREAD; d = _get_item(iset, file); buf = tfs_buf(iset->tfscan); if ((d == TFS_EOF) && (buf[0] == '\0')) return 1; while ((d == TFS_FLD) && (buf[0] != '\0')) d = _get_item(iset, file); if (d < TFS_EOF) return d; if ((buf[0] == '\0') && (d == TFS_FLD) && (iset->cnt > 0)) return E_ITEMEXP; ta_sort(iset->items, iset->cnt); iset->cnt = ta_unique(iset->items, iset->cnt); for (i = iset->cnt; --i >= 0; ) { item = nim_byid(iset->nimap, iset->items[i]); item->frq += 1; item->xfq += iset->cnt; } return 0; } /*--------------------------------------------------------------------*/int is_recode (ITEMSET *iset, int minfrq, int dir, int *map){ int i, k, n, t; ITEM *item; SYMCMPFN *cmp; assert(iset); if (dir > 1) cmp = _asccmpx; else if (dir > 0) cmp = _asccmp; else if (dir >= 0) cmp = _nocmp; else if (dir > -2) cmp = _descmp; else cmp = _descmpx; nim_sort(iset->nimap, cmp, (void*)minfrq, map, 1); for (n = nim_cnt(iset->nimap); --n >= 0; ) { item = (ITEM*)nim_byid(iset->nimap, n); if (item->frq < minfrq) item->app = APP_NONE; else if (item->app != APP_NONE) break; } if (map) { for (i = k = 0; i < iset->cnt; i++) { t = map[iset->items[i]]; if (t <= n) iset->items[k++] = t; } iset->cnt = k; ta_sort(iset->items, k); } return n+1; } /*--------------------------------------------------------------------*/int is_filter (ITEMSET *iset, const char *marks){ return iset->cnt = ta_filter(iset->items, iset->cnt, marks);} int ta_unique (int *items, int n){ int *s, *d; assert(items && (n >= 0)); if (n <= 1) return n; for (d = s = items; --n > 0;) if (*++s != *d) *++d = *s; return (int)(++d -items); } /*--------------------------------------------------------------------*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -