📄 cli.c
字号:
if (fwexp < 0) error(E_MFEXP, fwexp); if (moment < 0) error(E_MOMENT, moment); if (maxchg < 1) error(E_UPDPAR, maxchg); if (growth < 1) error(E_UPDPAR, growth); if (shrink < 0) error(E_UPDPAR, shrink); if (rates[0] < 0) error(E_LRATE, rates[0]); if (rates[1] < 0) error(E_LRATE, rates[1]); if (rates[2] < 0) error(E_LRATE, rates[2]); if (epochs < 0) error(E_EPOCHS, epochs); dseed(seed); /* init. the random number generator */ if (!(fn_cls && *fn_cls)) report = 0; if (report) { /* if to report intermediate results */ buf = malloc((strlen(fn_cls) +20) *sizeof(char)); if (!buf) error(E_NOMEM); /* create a buffer for the file names */ } /* that have to be constructed */ if (matinp) { /* if matrix version */ /* --- read cluster description --- */ if (k > 2) { /* if an input file is given */ t = clock(); /* start the timer */ scan = sc_create(fn_in); /* create a scanner */ if (!scan) error((!fn_in || !*fn_in) ? E_NOMEM : E_FOPEN, fn_in); fprintf(stderr, "\nreading %s ... ", sc_fname(scan)); if (sc_nexter(scan) < 0) error(E_PARSE, sc_fname(scan)); clset = cls_parse(scan); /* parse the cluster descriptions */ if (!clset || !sc_eof(scan)) error(E_PARSE, sc_fname(scan)); sc_delete(scan); scan = NULL; /* delete the scanner */ attcnt = cls_incnt(clset); /* get the number of attributes */ clscnt = cls_clscnt(clset); /* and the number of clusters */ fprintf(stderr, "[%d cluster(s)] ", clscnt); fprintf(stderr, "done [%.2fs].", SEC_SINCE(t)); } /* print a success message */ /* --- read data tuples --- */ t = clock(); /* start the timer */ tscan = ts_create(); /* create a table scanner and */ if (!tscan) error(E_NOMEM); /* set the separator characters */ if (blanks) ts_chars(tscan, TS_BLANK, blanks); if (fldseps) ts_chars(tscan, TS_FLDSEP, fldseps); if (recseps) ts_chars(tscan, TS_RECSEP, recseps); if (comment) ts_chars(tscan, TS_COMMENT, comment); ts_chars(tscan,TS_NULL,""); /* remove the null value characters */ if (fn_tab && *fn_tab) /* if a file name is given, */ in = fopen(fn_tab, "r"); /* open the file for reading */ else { /* if no file name is given, */ in = stdin; fn_tab = "<stdin>"; } /* use std. input */ fprintf(stderr, "\nreading %s ... ", fn_tab); if (!in) error(E_FOPEN, fn_tab); matrix = mat_readx(tscan, in, 0, attcnt); if (!matrix) { /* read the data tuples */ tse = ts_info(tscan); /* on error get the error info. */ error(tse->code, fn_tab, tse->rec, tse->s, tse->fld, tse->exp); } /* abort with an error message */ if (ts_delim(tscan) != TS_EOF) /* check for end of file */ error(E_VALUE, fn_tab, tplcnt+1, "\"\"", 1); if (in != stdin) { /* if not read from standard input, */ fclose(in); in = NULL; } /* close the input file */ tplcnt = mat_rowcnt(matrix);/* get the number of data points */ attcnt = mat_colcnt(matrix);/* and their dimensionality */ if (trgname) /* get the number of excluded columns */ exclude = (int)strtol(trgname, NULL, 0); if (exclude >= attcnt) /* check the number of columns */ error(E_FLDCNT, fn_tab, 1, attcnt, exclude +1); if (exclude > 0) { /* if not to use all columns */ submat = mat_create(tplcnt, attcnt -= exclude); if (!submat) error(E_NOMEM); /* create a new matrix */ mat_sub(submat, matrix, 0, 0); mat_delete(matrix); matrix = submat; } /* cut out columns to use */ if (tplcnt <= 0) error(E_TPLCNT); fprintf(stderr, "[%d pattern(s)] ", tplcnt); fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)); } else { /* if table version */ /* --- read attribute set --- */ t = clock(); /* start the timer */ scan = sc_create(fn_dom); /* create a scanner */ if (!scan) error((!fn_dom || !*fn_dom) ? E_NOMEM : E_FOPEN, fn_dom); attset = as_create("domains", att_delete); if (!attset) error(E_NOMEM);/* create an attribute set */ fprintf(stderr, "\nreading %s ... ", sc_fname(scan)); if ((sc_nexter(scan) < 0)/* start scanning (get first token) */ || (as_parse(attset, scan, AT_ALL) != 0) || (as_attcnt(attset) <= 0)) /* parse attribute set */ error(E_PARSE, sc_fname(scan)); for (i = as_attcnt(attset); --i >= 0; ) { att = as_att(attset, i); /* traverse the attributes */ k = att_getdir(att); /* and get their directions */ att_setmark(att, ((k == DIR_IN) || (k == DIR_OUT)) ? -1 : +1); } /* mark input and output attributes */ as_attcut(NULL, attset, AS_MARKED); attcnt = as_attcnt(attset); /* cut all other attributes */ for (i = attcnt; --i >= 0; ) { att = as_att(attset, i); /* traverse the parsed attributes */ att_setmark(att, (att_getdir(att) == DIR_IN) ? +1 : -1); } /* use only the input attributes */ if (trgname) { /* if a target attribute is given */ for (i = attcnt; --i >= 0; ) /* remove attribute directions */ att_setdir(as_att(attset, i), DIR_IN); k = as_attid(attset, trgname); if (k < 0) error(E_TARGET, trgname); att = as_att(attset, k); /* get the target attribute */ att_setmark(att, -1); /* mark it as not to be used */ att_setdir(att, DIR_OUT); /* and as having out direction */ } /* --- read cluster set --- */ if ((sc_token(scan) == T_ID)/* if there is a cluster set */ && ((strcmp(sc_value(scan), "clset") == 0) || (strcmp(sc_value(scan), "clusters") == 0))) { attmap = am_create(attset, AM_MARKED, 1.0); if (!attmap) error(E_NOMEM); /* parse the cluster set */ clset = cls_parsex(scan, attmap, 1); if (!clset || !sc_eof(scan)) error(E_PARSE, sc_fname(scan)); fprintf(stderr, "[%d attribute(s), ", attcnt); fprintf(stderr, "%d cluster(s)] ", cls_clscnt(clset)); } else { /* if there is no cluster set */ if (!sc_eof(scan)) error(E_PARSE, sc_fname(scan)); fprintf(stderr, "[%d attribute(s)] ", attcnt); } /* print a success message */ fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)); sc_delete(scan); scan = NULL; /* delete the scanner */ /* --- read table --- */ as_chars(attset, recseps, fldseps, blanks, "", comment); table = io_tabin(attset, fn_hdr, fn_tab, flags, "table", 2); if (!table) error(1); /* read the table file */ } /* if (matinp) .. else .. */ /* --- create cluster set --- */ t = clock(); /* start the timer */ fprintf(stderr, "inducing clusters ... "); if (!clset) { /* if no cluster set was read */ if (matinp) { /* if matrix version */ clset = cls_create(mat_colcnt(matrix), clscnt); if (!clset) error(E_NOMEM); } /* create a new cluster set */ else { /* if table version */ attmap = am_create(attset, AM_MARKED, 1.0); if (!attmap) error(E_NOMEM); /* create an attribute map */ clset = cls_createx(attmap, clscnt); if (!clset) error(E_NOMEM); /* create a new cluster set */ } /* if (matinp) .. else .. */ if (matinp) { /* if matrix version */ for (i = tplcnt; --i >= 0; ) /* determine the ranges of values */ cls_reg(clset, mat_row(matrix, i), 1); if (irnorm) /* if to normalize the data, */ cls_reg(clset, NULL, 0); } /* compute the scaling factors */ else { /* if table version */ for (i = tab_tplcnt(table); --i >= 0; ) /* determine the */ cls_regx(clset, tab_tpl(table, i)); /* ranges of values */ if (irnorm) /* if to normalize the input ranges, */ cls_regx(clset, NULL); /* compute the scaling factors */ } /* if (matinp) .. else .. */ if ((inimode & CLS_MODE) != CLS_POINTS) cls_init(clset, inimode, range, drand, NULL); else { /* if to select random data points */ if (matinp) { /* if matrix version */ mat_shuffle(matrix, drand); /* shuffle the data points */ for (i = clscnt; --i >= 0; ) { cls_init(clset, inimode, range, drand, mat_row(matrix, i % tplcnt)); } } /* use the first clscnt tuples */ else { /* if table version */ tab_shuffle(table, 0, INT_MAX, drand); k = tab_tplcnt(table); /* shuffle the data tuples */ for (i = clscnt; --i >= 0; ) { cls_valuex(clset, tab_tpl(table, i % k)); cls_init(clset, inimode, range, drand, NULL); } /* use the first clscnt tuples */ } /* (i.e. their corresp. data vectors) */ } /* as the initial cluster centers */ cls_type (clset, type, radius *radius); owrite = -1; /* set the cluster type */ } /* and prepare overwriting */ if (owrite) { /* if to overwrite the parameters, */ cls_type(clset, type, -1); /* set the cluster parameters */ if (gauss) cls_radfn(clset, rf_gauss, rfd_gauss, rfnps); else cls_radfn(clset, rf_cauchy, rfd_cauchy, rfnps); cls_norm (clset, nrmmode, nrmps); cls_noise(clset, noise); /* (these parameters can be read */ cls_fwexp(clset, fwexp); /* from the cluster input file */ cls_msexp(clset, msexp); /* and may be replaced) */ } cls_setup(clset); /* finally set up the cluster set */ /* --- do clustering --- */ cls_method (clset, method); /* set the parameter update method */ cls_lrate (clset, rates, decay); /* set the learning rates, */ cls_moment (clset, moment); /* the momentum coefficient, */ cls_factors(clset, growth, shrink); /* the update factors and */ cls_limits (clset, 1, maxchg); /* the update limits */ cls_regular(clset, regps); /* set the regularization parameters */ if (update <= 0) shuffle = 0; /* suppress unnecessary shuffling */ k = update; /* initialize the tuple counter */ chg = trmchg +trmchg; /* and the change value */ for (n = 0; n < epochs; n++){ /* compute a maximum number of epochs */ if ((n & 0x00ff) == 0) /* print the current number of epochs */ fprintf(stderr, "%8d\b\b\b\b\b\b\b\b", n); if (report) { /* if to report intermediate steps */ sprintf(buf, "%s%d", fn_cls, n); out = fopen(buf, "w"); /* open the output file */ if (!out) error(E_FOPEN, buf); if (!matinp) { /* if table version */ if (as_desc(attset, out, AS_TITLE|AS_IVALS|AS_DIRS,maxlen) != 0) error(E_FWRITE, buf); /* describe attribute domains */ fprintf(out, "\n"); /* leave one line empty */ } if (cls_desc(clset, out, dmode, maxlen) != 0) error(E_FWRITE, buf); /* describe the cluster set */ i = fclose(out); out = NULL; /* close the output file */ if (i != 0) error(E_FWRITE, buf); } /* check for a write error */ chg = 0; /* clear the maximal change */ if (matinp) { /* if matrix version */ if (shuffle || (n == 0)) /* shuffle the data tuples */ mat_shuffle(matrix, drand); for (i = tplcnt; --i >= 0; ) { /* traverse the tuples */ cls_aggr(clset, mat_row(matrix, i), 1); if ((update > 0) && (--k <= 0)) { k = update; c = cls_update(clset, (n < conly)); if (c > chg) chg = c; /* aggregate the data vectors, */ } /* update in regular intervals, */ } } /* and determine the maximal change */ else { /* if table version */ if (shuffle || (n == 0)) /* shuffle the data tuples */ tab_shuffle(table, 0, INT_MAX, drand); for (i = tab_tplcnt(table); --i >= 0; ) { tpl = tab_tpl(table, i);/* traverse the tuples and */ cls_valuex(clset, tpl); /* set the corresponding data vector */ cls_aggr(clset, NULL, tpl_getwgt(tpl)); if ((update > 0) && (--k <= 0)) { k = update; c = cls_update(clset, (n < conly)); if (c > chg) chg = c; /* aggregate the data vectors, */ } /* update in regular intervals, */ } /* and determine the maximal change */ } /* if (matinp) .. else .. */ if (update <= 0) /* if no number of tuples is given, */ chg = cls_update(clset, (n < conly));/* update once per epoch */ if ((n > conly) && (chg <= trmchg)) { n++; break; } /* check the termination criterion */ } /* print a success message */ fprintf(stderr, "[%d epoch(s)] ", n); fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)); /* --- write cluster set --- */ t = clock(); /* start the timer */ if (report) { /* construct final file name */ sprintf(buf, "%s%d", fn_cls, n); fn_cls = buf; } if (fn_cls && *fn_cls) /* if an output file name is given, */ out = fopen(fn_cls, "w"); /* open the output file */ else { /* if no output file name is given, */ out = stdout; fn_cls = "<stdout>"; } /* write to std. output */ fprintf(stderr, "writing %s ... ", fn_cls); if (!out) error(E_FOPEN, fn_cls); if (!matinp) { /* if table version */ if (as_desc(attset, out, AS_TITLE|AS_IVALS|AS_DIRS, maxlen) != 0) error(E_FWRITE, fn_cls); /* describe attribute domains */ fprintf(out, "\n"); /* leave one line empty */ } cls_sort(clset); /* sort clusters by their centers */ if (cls_desc(clset, out, dmode, maxlen) != 0) error(E_FWRITE, fn_cls); /* describe the cluster set */ if (out != stdout) { /* if not written to standard output, */ i = fclose(out); out = NULL;/* close the output file */ if (i != 0) error(E_FWRITE, fn_cls); } /* check for a write error and */ fprintf(stderr, "[%d cluster(s)] ", clscnt); fprintf(stderr, "done [%.2fs].\n", SEC_SINCE(t)); /* --- clean up --- */ #ifndef NDEBUG if (matinp) { /* if matrix version */ cls_delete(clset); /* delete the cluster set, */ mat_delete(matrix); /* the data tuples, */ ts_delete(tscan); } /* and the table scanner */ else { /* if table version */ tab_delete(table, 0); /* delete the data tuples */ cls_deletex(clset, 1); /* and the cluster set */ } if (buf) free(buf); /* delete the file name buffer */ #endif #ifdef STORAGE showmem("at end of program"); /* check memory usage */ #endif return 0; /* return 'ok' */} /* main() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -