getpat.big

来自「NIST Handwriting OCR Testbed」· BIG 代码 · 共 332 行

BIG
332
字号
/* Contains:# proc: getpat - Reads an MLP patterns file.# proc: got_mmm - Tries to read just mpats, minps and mouts from a patterns# proc:           file.*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <mlp/lims.h>#include <mlp/defs.h>#include <mlp/rd_words.h>#include <mlp/parms.h>/********************************************************************//* getpat: Reads a patterns file.  Stores the feature vectors (onlythe first ninps elts of each vector, as specified).  If the targetsare represented in the file as classes, i.e. indices (pindex TRUE),then the indices are read into idpat.  If the targets are representedin the file as target vectors, then these are either read into thetarget buffer (if purpose is FITTER, i.e. function-approximator), orthey are converted into class indices which are then stored (ifpurpose is CLASSIFIER; this saves memory).  This routine also returnsthe long (full) names of the classes, and the full number of patternsin the patterns file.Input args:  patterns_infile: File containing the patterns, i.e. feature-vector/    class or feature-vector/target-vector pairs.  patsfile_ascii_or_binary: ASCII (BINARY) if patterns_infile is in    ascii (fortran-style binary) format.  npats: Number of (first) patterns that should be read.  (Must be <=    the full number of patterns stored in the patterns file.)  ninps: Number of (first) elements to read of each feature vector.    (Must be <= the full number of elts in each feature vector of the    patterns file.)  nouts: Number of outputs.  (Must equal the number of outputs as    specified in the patterns file.)  purpose: CLASSIFIER or FITTER.  Indicates whether the net is to be    trained to classify, or to do function-fitting (approximation).  trgoff: This controls how extremely the target values vary.  If    purpose is CLASSIFIER but the patterns file contains target    vectors rather than classes, this routine needs to know trgoff so    it can compute the "high" value presumed to occur at the target    vector position corresponding to the class.Output args:  long_classnames: Long names of the classes.  This is an array of    nouts strings, with the outer array (of pointers), and the    strings, allocated by this routine.  (Define a    char-pointer-pointer and use an &.)  vinp: Input activations, i.e. the feature vectors of the patterns,    in an npats by ninps "matrix" allocated by this routine.  target: If purpose is FITTER, then this will contain target vectors,    in an npats by nouts "matrix" allocated by this routine; otherwise    (CLASSIFIER), this will come back just (float *)NULL.  idpat: If purpose is CLASSIFIER, then this will contain the actual    classes of the patterns, in a buffer of npats shorts allocated by    this routine; otherwise (FITTER), this will come back just    (short *)NULL.  mpats: Full number of patterns in the patterns file.*/voidgetpat(patterns_infile, patsfile_ascii_or_binary, npats, ninps, nouts,  purpose, trgoff, long_classnames, vinp, target, idpat, mpats)char patterns_infile[], patsfile_ascii_or_binary, purpose;float trgoff, **vinp, **target;short **idpat;int npats, ninps, nouts, *mpats;char ***long_classnames;{  FILE *fp;  char str[200], *cp, *cp_e, **a_name;  static char first = TRUE;  short *idpat_p;  int nbytes, nbytes_expected, minps, mouts, idum1, idum2, idum3,    j, p, pindex, tmpint;  static int minps_maxsofar, nouts_maxsofar;  float tmean, highval, *target_p, *vinp_p, *afp;  static float *vtmp, *targvec;  if((fp = fopen(patterns_infile, "rb")) == (FILE *)NULL)    syserr("getpat", "fopen for reading", patterns_infile);  fprintf(stderr, " Reading patterns...\n");  /* Read header info. */  if(patsfile_ascii_or_binary == ASCII)    fscanf(fp, "%d %d %d %d %d %d", mpats, &minps, &mouts, &idum1,      &idum2, &idum3);  else { /* patsfile_ascii_or_binary == BINARY */    nbytes_expected = 6 * sizeof(int);    fread(&nbytes, sizeof(int), 1, fp);    if(nbytes != nbytes_expected) {      sprintf(str, "nbytes is %d, but nbytes_expected is %d",        nbytes, nbytes_expected);      fatalerr("getpat", str, NULL);    }    fread(mpats, sizeof(int), 1, fp);    fread(&minps, sizeof(int), 1, fp);    fread(&mouts, sizeof(int), 1, fp);    fread(&idum1, sizeof(int), 1, fp);    fread(&idum2, sizeof(int), 1, fp);    fread(&idum3, sizeof(int), 1, fp);    fread(&nbytes, sizeof(int), 1, fp);    if(nbytes != nbytes_expected) {      sprintf(str, "nbytes is %d, but nbytes_expected is %d",        nbytes, nbytes_expected);      fatalerr("getpat", str, NULL);    }  }  /* Maintain buffers at sufficient sizes. */  if(first || (minps > minps_maxsofar)) {    if(!first)      free((char *)vtmp);    minps_maxsofar = minps;    if((vtmp = (float *)malloc(minps * sizeof(float))) ==      (float *)NULL)      syserr("getpat", "malloc", "vtmp");  }  if(first || (nouts > nouts_maxsofar)) {    if(!first)      free((char *)targvec);    nouts_maxsofar = nouts;    if((targvec = (float *)malloc(nouts * sizeof(float))) ==       (float *)NULL)      syserr("getpat", "malloc", "targvec");  }  first = FALSE;  if(npats > *mpats) {    sprintf(str, "no. of first patterns to be read (npats, %d) is\n\> no. of patterns in file (*mpats, %d)\n", npats, *mpats);    fatalerr("getpat", str, NULL);  }  if(ninps > minps) {    sprintf(str, "no. of first inputs to be read (ninps, %d) is\n\> no. of inputs in file (minps, %d)\n", ninps, minps);    fatalerr("getpat", str, NULL);  }  if(nouts != mouts) {    sprintf(str, "no. of outputs to use (nouts, %d) is different \than\nno. of outputs in file (mouts, %d)\n", nouts, mouts);    fatalerr("getpat", str, NULL);  }  pindex = (idum1 == 1);  if(pindex && (purpose == FITTER))    fatalerr("getpat", "Target pointers cannot be used if purpose \is FITTER:\nThis mode allows for varying target vectors.", NULL);  /* Correspondence of class to output position.  (Read the long  (full) names of the classes.) */  if((*long_classnames = (char **)malloc(nouts * sizeof(char *))) ==    (char **)NULL)    syserr("getpat", "malloc", "*long_classnames");  if(patsfile_ascii_or_binary == ASCII)    for(j = 0, a_name = *long_classnames; j < nouts; j++, a_name++) {      if((*a_name = malloc(LONG_CLASSNAME_MAXSTRLEN + 1)) ==        (char *)NULL)	syserr("getpat", "malloc", "*a_name");      fscanf(fp, "%s", *a_name);    }  else {    nbytes_expected = nouts * LONG_CLASSNAME_MAXSTRLEN;    fread(&nbytes, sizeof(int), 1, fp);    if(nbytes != nbytes_expected) {      sprintf(str, "nbytes is %d, but nbytes_expected is %d",        nbytes, nbytes_expected);      fatalerr("getpat", str, NULL);    }    for(j = 0, a_name = *long_classnames; j < nouts; j++, a_name++) {      if((*a_name = malloc(LONG_CLASSNAME_MAXSTRLEN + 1)) ==        (char *)NULL)	syserr("getpat", "malloc", "*a_name");      fread(*a_name, LONG_CLASSNAME_MAXSTRLEN, 1, fp);      for(cp_e = (cp = *a_name) + LONG_CLASSNAME_MAXSTRLEN; cp < cp_e        && !strchr(" \t\n", *cp); cp++);      *cp = '\0';    }    fread(&nbytes, sizeof(int), 1, fp);    if(nbytes != nbytes_expected) {      sprintf(str, "nbytes is %d, but nbytes_expected is %d",        nbytes, nbytes_expected);      fatalerr("getpat", str, NULL);    }  }  /* If trgoff is 0., then training is to 0. and 1.; if trgoff is 1.,  then all are trained to 1./nouts; trgoff values between 0. and 1.  produce effects between these extremes. */  tmean = 1. / nouts;  highval = tmean + (1. - tmean) * (1. - trgoff);  /* lowval is not used in this routine, but leave its definition  here: */  /* lowval = tmean + (-tmean) * (1. - trgoff); */  /* Read the patterns and store appropriate data into memory.  (Only  load first ninps (can be < minps) elts of each feature vector.)  First allocate output buffers. */  if((*vinp = (float *)malloc(npats * ninps * sizeof(float))) ==    (float *)NULL)    syserr("getpat", "malloc", "*vinp");  if(purpose == CLASSIFIER) {    if((*idpat = (short *)malloc(npats * sizeof(short))) ==      (short *)NULL)      syserr("getpat", "malloc", "*idpat");    idpat_p = *idpat;    *target = (float *)NULL;  }  else { /* purpose == FITTER */    if((*target = (float *)malloc(npats * nouts * sizeof(float))) ==      (float *)NULL)      syserr("getpat", "malloc", "*target");    target_p = *target;    *idpat = (short *)NULL;  }  for(p = 0, vinp_p = *vinp; p < npats; p++, vinp_p += ninps) {    /* Read next feature vector.  Read whole vector (minps elts) to get    through it, but then copy, as current vector, only its first ninp    elts. */    rd_words(patsfile_ascii_or_binary, fp, minps, 1, FLOAT, vtmp);    bcopy(vtmp, vinp_p, ninps * sizeof(float));    /* Read target index or target vector. */    if(pindex) { /* File contains target indices (classes); store the           target index, but using only a short instead of an int. */      rd_words(patsfile_ascii_or_binary, fp, 1, 1, INT, &tmpint);      *idpat_p = --tmpint; /* [The decrementing is done so that old        patfiles, in which indexing starts at 1, will still work.        Eventually, patfiles should be redefined to index starting        at 0, so decrementing won't be necessary.] */    }    else { /* File contains target vectors. */      if(purpose == FITTER) /* load the target vector */	rd_words(patsfile_ascii_or_binary, fp, nouts, 1, FLOAT,          target_p);      else { /* purpose == CLASSIFIER; save memory by converting        target vector to a class and loading that into idpat.  Assume        the identifier is the only activation near highval.  Assign        valid id (class) in range 0, 1, ..., nouts-1, or error id        -1 (no high activation), or error id -2 (more than one high        activation). */	rd_words(patsfile_ascii_or_binary, fp, nouts, 1, FLOAT,          targvec);	*idpat_p = -1;	for(j = 0, afp = targvec; j < nouts; j++, afp++)	  if(*afp >= highval - 0.001)	    *idpat_p = ((*idpat_p == -1) ? j : -2);	if(purpose == CLASSIFIER && *idpat_p < 0) {	  sprintf(str, "purpose is CLASSIFIER, but pattern p = %d \has error id %d", p, *idpat_p);	  fatalerr("getpat", str, NULL);	}      }    }    if(purpose == CLASSIFIER)      idpat_p++;    else /* purpose == FITTER */      target_p += nouts;  } /* loop over patterns */  fclose(fp);  fprintf(stderr, " ...done\n\n");}/********************************************************************//* got_mmm: Tries to read just mpats, minps, and mouts (see theircomments below for explanation) from a patterns file.Input args:  patterns_infile: Patterns file.  patsfile_ascii_or_binary: Storage mode of pattern file: ASCII or    BINARY.Output args:  mpats: Number of patterns in pattern file.  minps: Number of elements in each feature vector of pattern file.  mouts: Number of elements in each target vector, or number of    defined classes, of pattern file.  errstr: If routine successfully reads mpats, etc. (i.e. return    function value TRUE), then this comes back an empty string;    otherwise (return function value FALSE) it comes back containing    an error message suitable for sending to eb_cat_e.  Buffer must    be provided by caller already allocated to sufficient size; 200    bytes is very likely to be sufficient.Return value:  TRUE (FALSE) if routine was (was not) able to read mpats, etc. from    patterns file.*/chargot_mmm(patterns_infile, patsfile_ascii_or_binary, mpats, minps,  mouts, errstr)char patterns_infile[], patsfile_ascii_or_binary, errstr[];int *mpats, *minps, *mouts;{  FILE *fp;  char ok;  int nbytes;  if((fp = fopen(patterns_infile, "rb")) == (FILE *)NULL) {    sprintf(errstr, "unable to fopen patterns file %s for reading",      patterns_infile);    return FALSE;  }  ok = (patsfile_ascii_or_binary == ASCII ?    (fscanf(fp, "%d %d %d", mpats, minps, mouts) == 3) :    (fread(&nbytes, sizeof(int), 1, fp) == 1    && nbytes == 6 * sizeof(int)    && fread(mpats, sizeof(int), 1, fp) == 1    && fread(minps, sizeof(int), 1, fp) == 1    && fread(mouts, sizeof(int), 1, fp) == 1));  fclose(fp);  if(ok)    strcpy(errstr, "");  else    sprintf(errstr, "patterns file %s has improper format",      patterns_infile);  return ok;}/********************************************************************/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?