getpat.c
来自「NIST Handwriting OCR Testbed」· C语言 代码 · 共 356 行
C
356 行
/* Contains:# proc: getpat - Reads an MLP patterns file.# proc: got_mmm - Tries to read just mpats, minps and mouts from a patterns# proc: file.*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <sex.h>#include <mlp/lims.h>#include <mlp/defs.h>#include <mlp/rd_words.h>#include <mlp/parms.h>/********************************************************************//* getpat: Reads a patterns file. Stores the feature vectors (onlythe first ninps elts of each vector, as specified). If the targetsare represented in the file as classes, i.e. indices (pindex TRUE),then the indices are read into idpat. If the targets are representedin the file as target vectors, then these are either read into thetarget buffer (if purpose is FITTER, i.e. function-approximator), orthey are converted into class indices which are then stored (ifpurpose is CLASSIFIER; this saves memory). This routine also returnsthe long (full) names of the classes, and the full number of patternsin the patterns file.Input args: patterns_infile: File containing the patterns, i.e. feature-vector/ class or feature-vector/target-vector pairs. patsfile_ascii_or_binary: ASCII (BINARY) if patterns_infile is in ascii (fortran-style binary) format. npats: Number of (first) patterns that should be read. (Must be <= the full number of patterns stored in the patterns file.) ninps: Number of (first) elements to read of each feature vector. (Must be <= the full number of elts in each feature vector of the patterns file.) nouts: Number of outputs. (Must equal the number of outputs as specified in the patterns file.) purpose: CLASSIFIER or FITTER. Indicates whether the net is to be trained to classify, or to do function-fitting (approximation). trgoff: This controls how extremely the target values vary. If purpose is CLASSIFIER but the patterns file contains target vectors rather than classes, this routine needs to know trgoff so it can compute the "high" value presumed to occur at the target vector position corresponding to the class.Output args: long_classnames: Long names of the classes. This is an array of nouts strings, with the outer array (of pointers), and the strings, allocated by this routine. (Define a char-pointer-pointer and use an &.) vinp: Input activations, i.e. the feature vectors of the patterns, in an npats by ninps "matrix" allocated by this routine. target: If purpose is FITTER, then this will contain target vectors, in an npats by nouts "matrix" allocated by this routine; otherwise (CLASSIFIER), this will come back just (float *)NULL. idpat: If purpose is CLASSIFIER, then this will contain the actual classes of the patterns, in a buffer of npats shorts allocated by this routine; otherwise (FITTER), this will come back just (short *)NULL. mpats: Full number of patterns in the patterns file.*/voidgetpat(patterns_infile, patsfile_ascii_or_binary, npats, ninps, nouts, purpose, trgoff, long_classnames, vinp, target, idpat, mpats)char patterns_infile[], patsfile_ascii_or_binary, purpose;float trgoff, **vinp, **target;short **idpat;int npats, ninps, nouts, *mpats;char ***long_classnames;{ FILE *fp; char str[200], *cp, *cp_e, **a_name; static char first = TRUE; short *idpat_p; int nbytes, nbytes_expected, minps, mouts, idum1, idum2, idum3, j, p, pindex, tmpint; static int minps_maxsofar, nouts_maxsofar; float tmean, highval, *target_p, *vinp_p, *afp; static float *vtmp, *targvec; if((fp = fopen(patterns_infile, "rb")) == (FILE *)NULL) syserr("getpat", "fopen for reading", patterns_infile); fprintf(stderr, " Reading patterns...\n"); /* Read header info. */ if(patsfile_ascii_or_binary == ASCII) fscanf(fp, "%d %d %d %d %d %d", mpats, &minps, &mouts, &idum1, &idum2, &idum3); else { /* patsfile_ascii_or_binary == BINARY */ nbytes_expected = 6 * sizeof(int); fread(&nbytes, sizeof(int), 1, fp); nbytes = swap_word(nbytes); if(nbytes != nbytes_expected) { sprintf(str, "nbytes is %d, but nbytes_expected is %d", nbytes, nbytes_expected); fatalerr("getpat", str, NULL); } fread(mpats, sizeof(int), 1, fp); *mpats = swap_word(*mpats); fread(&minps, sizeof(int), 1, fp); minps = swap_word(minps); fread(&mouts, sizeof(int), 1, fp); mouts = swap_word(mouts); fread(&idum1, sizeof(int), 1, fp); idum1 = swap_word(idum1); fread(&idum2, sizeof(int), 1, fp); idum2 = swap_word(idum2); fread(&idum3, sizeof(int), 1, fp); idum3 = swap_word(idum3); fread(&nbytes, sizeof(int), 1, fp); nbytes = swap_word(nbytes); if(nbytes != nbytes_expected) { sprintf(str, "nbytes is %d, but nbytes_expected is %d", nbytes, nbytes_expected); fatalerr("getpat", str, NULL); } } /* Maintain buffers at sufficient sizes. */ if(first || (minps > minps_maxsofar)) { if(!first) free((char *)vtmp); minps_maxsofar = minps; if((vtmp = (float *)malloc(minps * sizeof(float))) == (float *)NULL) syserr("getpat", "malloc", "vtmp"); } if(first || (nouts > nouts_maxsofar)) { if(!first) free((char *)targvec); nouts_maxsofar = nouts; if((targvec = (float *)malloc(nouts * sizeof(float))) == (float *)NULL) syserr("getpat", "malloc", "targvec"); } first = FALSE; if(npats > *mpats) { sprintf(str, "no. of first patterns to be read (npats, %d) is\n\> no. of patterns in file (*mpats, %d)\n", npats, *mpats); fatalerr("getpat", str, NULL); } if(ninps > minps) { sprintf(str, "no. of first inputs to be read (ninps, %d) is\n\> no. of inputs in file (minps, %d)\n", ninps, minps); fatalerr("getpat", str, NULL); } if(nouts != mouts) { sprintf(str, "no. of outputs to use (nouts, %d) is different \than\nno. of outputs in file (mouts, %d)\n", nouts, mouts); fatalerr("getpat", str, NULL); } pindex = (idum1 == 1); if(pindex && (purpose == FITTER)) fatalerr("getpat", "Target pointers cannot be used if purpose \is FITTER:\nThis mode allows for varying target vectors.", NULL); /* Correspondence of class to output position. (Read the long (full) names of the classes.) */ if((*long_classnames = (char **)malloc(nouts * sizeof(char *))) == (char **)NULL) syserr("getpat", "malloc", "*long_classnames"); if(patsfile_ascii_or_binary == ASCII) for(j = 0, a_name = *long_classnames; j < nouts; j++, a_name++) { if((*a_name = malloc(LONG_CLASSNAME_MAXSTRLEN + 1)) == (char *)NULL) syserr("getpat", "malloc", "*a_name"); fscanf(fp, "%s", *a_name); } else { nbytes_expected = nouts * LONG_CLASSNAME_MAXSTRLEN; fread(&nbytes, sizeof(int), 1, fp); nbytes = swap_word(nbytes); if(nbytes != nbytes_expected) { sprintf(str, "nbytes is %d, but nbytes_expected is %d", nbytes, nbytes_expected); fatalerr("getpat", str, NULL); } for(j = 0, a_name = *long_classnames; j < nouts; j++, a_name++) { if((*a_name = malloc(LONG_CLASSNAME_MAXSTRLEN + 1)) == (char *)NULL) syserr("getpat", "malloc", "*a_name"); fread(*a_name, LONG_CLASSNAME_MAXSTRLEN, 1, fp); for(cp_e = (cp = *a_name) + LONG_CLASSNAME_MAXSTRLEN; cp < cp_e && !strchr(" \t\n", *cp); cp++); *cp = '\0'; } fread(&nbytes, sizeof(int), 1, fp); nbytes = swap_word(nbytes); if(nbytes != nbytes_expected) { sprintf(str, "nbytes is %d, but nbytes_expected is %d", nbytes, nbytes_expected); fatalerr("getpat", str, NULL); } } /* If trgoff is 0., then training is to 0. and 1.; if trgoff is 1., then all are trained to 1./nouts; trgoff values between 0. and 1. produce effects between these extremes. */ tmean = 1. / nouts; highval = tmean + (1. - tmean) * (1. - trgoff); /* lowval is not used in this routine, but leave its definition here: */ /* lowval = tmean + (-tmean) * (1. - trgoff); */ /* Read the patterns and store appropriate data into memory. (Only load first ninps (can be < minps) elts of each feature vector.) First allocate output buffers. */ if((*vinp = (float *)malloc(npats * ninps * sizeof(float))) == (float *)NULL) syserr("getpat", "malloc", "*vinp"); if(purpose == CLASSIFIER) { if((*idpat = (short *)malloc(npats * sizeof(short))) == (short *)NULL) syserr("getpat", "malloc", "*idpat"); idpat_p = *idpat; *target = (float *)NULL; } else { /* purpose == FITTER */ if((*target = (float *)malloc(npats * nouts * sizeof(float))) == (float *)NULL) syserr("getpat", "malloc", "*target"); target_p = *target; *idpat = (short *)NULL; } for(p = 0, vinp_p = *vinp; p < npats; p++, vinp_p += ninps) { /* Read next feature vector. Read whole vector (minps elts) to get through it, but then copy, as current vector, only its first ninp elts. */ rd_words(patsfile_ascii_or_binary, fp, minps, 1, FLOAT, vtmp); bcopy(vtmp, vinp_p, ninps * sizeof(float)); /* Read target index or target vector. */ if(pindex) { /* File contains target indices (classes); store the target index, but using only a short instead of an int. */ rd_words(patsfile_ascii_or_binary, fp, 1, 1, INT, &tmpint); *idpat_p = --tmpint; /* [The decrementing is done so that old patfiles, in which indexing starts at 1, will still work. Eventually, patfiles should be redefined to index starting at 0, so decrementing won't be necessary.] */ } else { /* File contains target vectors. */ if(purpose == FITTER) /* load the target vector */ rd_words(patsfile_ascii_or_binary, fp, nouts, 1, FLOAT, target_p); else { /* purpose == CLASSIFIER; save memory by converting target vector to a class and loading that into idpat. Assume the identifier is the only activation near highval. Assign valid id (class) in range 0, 1, ..., nouts-1, or error id -1 (no high activation), or error id -2 (more than one high activation). */ rd_words(patsfile_ascii_or_binary, fp, nouts, 1, FLOAT, targvec); *idpat_p = -1; for(j = 0, afp = targvec; j < nouts; j++, afp++) if(*afp >= highval - 0.001) *idpat_p = ((*idpat_p == -1) ? j : -2); if(purpose == CLASSIFIER && *idpat_p < 0) { sprintf(str, "purpose is CLASSIFIER, but pattern p = %d \has error id %d", p, *idpat_p); fatalerr("getpat", str, NULL); } } } if(purpose == CLASSIFIER) idpat_p++; else /* purpose == FITTER */ target_p += nouts; } /* loop over patterns */ fclose(fp); fprintf(stderr, " ...done\n\n");}/********************************************************************//* got_mmm: Tries to read just mpats, minps, and mouts (see theircomments below for explanation) from a patterns file.Input args: patterns_infile: Patterns file. patsfile_ascii_or_binary: Storage mode of pattern file: ASCII or BINARY.Output args: mpats: Number of patterns in pattern file. minps: Number of elements in each feature vector of pattern file. mouts: Number of elements in each target vector, or number of defined classes, of pattern file. errstr: If routine successfully reads mpats, etc. (i.e. return function value TRUE), then this comes back an empty string; otherwise (return function value FALSE) it comes back containing an error message suitable for sending to eb_cat_e. Buffer must be provided by caller already allocated to sufficient size; 200 bytes is very likely to be sufficient.Return value: TRUE (FALSE) if routine was (was not) able to read mpats, etc. from patterns file.*/chargot_mmm(patterns_infile, patsfile_ascii_or_binary, mpats, minps, mouts, errstr)char patterns_infile[], patsfile_ascii_or_binary, errstr[];int *mpats, *minps, *mouts;{ FILE *fp; char ok; int nbytes; if((fp = fopen(patterns_infile, "rb")) == (FILE *)NULL) { sprintf(errstr, "unable to fopen patterns file %s for reading", patterns_infile); return FALSE; } ok = (patsfile_ascii_or_binary == ASCII ? (fscanf(fp, "%d %d %d", mpats, minps, mouts) == 3) : (fread(&nbytes, sizeof(int), 1, fp) == 1 && fread(mpats, sizeof(int), 1, fp) == 1 && fread(minps, sizeof(int), 1, fp) == 1 && fread(mouts, sizeof(int), 1, fp) == 1)); fclose(fp); if(ok){ nbytes = swap_word(nbytes); if(nbytes == 6 * sizeof(int)){ *mpats = swap_word(*mpats); *minps = swap_word(*minps); *mouts = swap_word(*mouts); strcpy(errstr, ""); return TRUE; } else{ sprintf(errstr, "patterns file %s has improper format", patterns_infile); return FALSE; } } else{ sprintf(errstr, "patterns file %s has improper format", patterns_infile); return FALSE; }}/********************************************************************/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?