phrasmap.c

来自「NIST Handwriting OCR Testbed」· C语言 代码 · 共 741 行 · 第 1/2 页

C
741
字号
/*# proc: build_phrase_map - generate an image map of the central mass of each line of# proc:                    of handprint.# proc: phrases_from_map - sort blobs into phrase lines using the map.# proc:# proc: norm_small_tall_lists - sort blobs into categories of normal, small, and tall.# proc:# proc: pi_lines_from_map - generate lists of blob indices sorting the blobs# prod:                     into lines.# proc: should_split_tall - test to determine if a tall character should be split# proc:                     across multiple lines of handprint.# proc: tst_phrase_overlap - tests the amount of character overlap with a line in the# proc:                      phrase map.# proc: split_across_lines - horizontally splits a character across multiple lines.# proc:# proc: find_closest_line_in_map - finds the closest line in the phrase map to a# proc:                      specified blob.# proc: hist_blob_colors - generates a histogram from the black pixels in a blob# proc:                    that overlap 0 or more lines in the phrase map.# proc: draw_phrase_map - actually construct the phrase map image.# proc:# proc: short_phrases_to_problems - removes and appends too short phrases to the# proc:                   problem list.*/#include <stdio.h>#include <blobls.h>#include <ihead.h>#include <mis.h>#include <phrase.h>#include <segchars.h>#include <defs.h>/***************************************************************************//*     Phase I - Build the phrase region map                               *//***************************************************************************/build_phrase_map(mcdata, w, h, nphrases, blobls, esw, charh)unsigned char **mcdata;int w, h, *nphrases;BLOBLS *blobls;int esw, charh;{   int **pi_lists, *pi_lens, sum;   int *problems, nprob, aprob;   int *nis, nnum, *sis, snum, *tis, tnum;   int *mxs, *mys;   int i,j;   float hmean;   /* extract "normal" sized blobs, pruning small and tall ones */   norm_small_tall_lists(&nis, &nnum, &sis, &snum, &tis, &tnum, blobls, esw, charh);   /* throw away too short and too tall blobs */   free(sis);   free(tis);   /* if no normal sized blobs, then paragraph is empty */   if(nnum == 0){      free(nis);      return(FALSE);   }   malloc_int(&mxs, nnum, "build_phrase_map : mxs");   malloc_int(&mys, nnum, "build_phrase_map : mys");   sum = 0;   for(i = 0; i < nnum; i++){      mxs[i] = blobls->blobs[nis[i]]->cx;      mys[i] = blobls->blobs[nis[i]]->cy;      sum += blobls->blobs[nis[i]]->h;   }   hmean = sum / (float)nnum;   /* build phrase lists from "normal" size blobs */   build_pi_lists_Rel2(&pi_lists, &pi_lens, nphrases, &problems, &nprob,                   hmean, mxs, mys, nnum);   free(mxs);   free(mys);   /* move points in too short phrases to problem list */   aprob = nprob;   short_phrases_to_problems(&problems, &nprob, &aprob,                             pi_lists, pi_lens, nphrases);   /* throw away problem blobs */   free(problems);   /* if no phrases remain, then paragraph is empty */   if(*nphrases == 0){      free(nis);      free(pi_lists);      free(pi_lens);      return(FALSE);   }   /* remove 1 level of indirection */   for(i = 0; i < *nphrases; i++){      for(j = 0; j < pi_lens[i]; j++){         (pi_lists[i])[j] = nis[(pi_lists[i])[j]];      }    }    free(nis);   /* draw phrase region map */   draw_phrase_map(mcdata, w, h, blobls, pi_lists, pi_lens, *nphrases);   for(i = 0; i < *nphrases; i++)      free(pi_lists[i]);   free(pi_lists);   free(pi_lens);   return(TRUE);}/***************************************************************************//*     Phase II - Construct phrases using the region map                   *//***************************************************************************/phrases_from_map(pi_lists, pi_lens, nphrases, blobls, esw, charh,                    mcdata, w, h)int ***pi_lists, **pi_lens, nphrases;BLOBLS *blobls;int esw, charh;unsigned char *mcdata;int w, h;{   int i, *pi_alens, alen;   malloc_dbl_int_l1(pi_lists, nphrases, "phrases_from_map : pi_lists");   malloc_int(&pi_alens, nphrases, "phrases_from_map : pi_alens");   alen = PHRASE_LEN_CHUNKS;   for(i = 0; i < nphrases; i++){      malloc_int(&((*pi_lists)[i]), alen, "phrases_from_map : pi_lists[i]");      pi_alens[i] = alen;   }   calloc_int(pi_lens, nphrases,  "phrases_from_map : pi_lens");   /* reconstruct lines from phrase region map */   pi_lines_from_map(*pi_lists, *pi_lens, pi_alens, nphrases, blobls, esw, charh,                      mcdata, w, h);   free(pi_alens);}/***************************************************************************/norm_small_tall_lists(nis, nnum, sis, snum, tis, tnum, blobls, esw, charh)BLOBLS *blobls;int **nis, *nnum, **sis, *snum, **tis, *tnum;int esw, charh;{   int i, too_tall, head;   malloc_int(nis, blobls->num, "norm_small_tall_lists : nis");   malloc_int(sis, blobls->num, "norm_small_tall_lists : sis");   malloc_int(tis, blobls->num, "norm_small_tall_lists : tis");   /* prune out too small blobs */   *nnum = 0;   *snum = 0;   for(i = 0; i < blobls->num; i++){      /* store non-dot sized blob height */      if(!is_dot_blob(blobls->blobs[i], esw)){         (*nis)[*nnum] = i;         (*nnum)++;      }      /* otherwise skip too small blob */      else         (*sis)[(*snum)++] = i;   }   /* prune out too tall blobs */   too_tall = sround(charh * TALL_FACTOR);   head = 0;   *tnum = 0;   for(i = 0; i < *nnum; i++){      if(blobls->blobs[(*nis)[i]]->h < too_tall){         /* if not copying itself */         if(head != i)            (*nis)[head] = (*nis)[i];         head++;      }      /* otherwise skip too tall blob */      else         (*tis)[(*tnum)++] = (*nis)[i];   }   *nnum = head;}/***************************************************************************/pi_lines_from_map(pi_lists, pi_lens, pi_alens, nphrases, blobls, esw, charh,                  mcdata, w, h)int **pi_lists, *pi_lens, *pi_alens, nphrases;BLOBLS *blobls;unsigned char *mcdata;int w, h, esw, charh;{   int i, map_i, max_v, pi, *mxs;   int *color_hist, hnum, hsize;   int *xphrases, nx, lastblob;   hnum = nphrases + 1;   malloc_int(&color_hist, hnum, "pi_lines_from_map : color_hist");   hsize = hnum * sizeof(int);   malloc_int(&xphrases, nphrases, "pi_lines_from_map : xphrases");   lastblob = blobls->num;   for(i = 0; i < lastblob; i++){      memset(color_hist, '\0', hsize);      hist_blob_colors(color_hist, hnum, blobls->blobs[i], mcdata, w, h);      /* if blob is too tall */      if((blobls->blobs[i]->h > charh) &&         (should_split_tall(xphrases, &nx, blobls->blobs[i], color_hist, hnum,                               mcdata, w, h))){         split_across_lines(i, blobls, esw, pi_lists, pi_lens, pi_alens, nphrases,                            mcdata, w, h, xphrases, nx);      }      /* if tall but not split, then treat as any other blob */      else{         /* find max phrase region overlap */         find_first_max_forward(color_hist, 0, hnum, &map_i, &max_v);         /* if no overlap with a phrase region ... */         if(max_v == 0){            /* then assign to closest */            if((map_i = find_closest_line_in_map(blobls->blobs[i],                             mcdata, w, h)) == NOT_FOUND)               fatalerr("pi_lines_from_map", "no line found in map", NULL);         }         pi = map_i - 1;         /* add blob to appropriate pi_list */         add_pi_list(&(pi_lists[pi]), &(pi_lens[pi]), &(pi_alens[pi]), i);      }   }   free(color_hist);   free(xphrases);   /* sort the blobs in each line on cx's */   malloc_int(&mxs, blobls->num, "pi_lines_from_map : mxs");   for(i = 0; i < blobls->num; i++)      mxs[i] = blobls->blobs[i]->cx;   for(i = 0; i < nphrases; i++){      sort_pi_list_on_x(pi_lists[i], pi_lens[i], mxs, blobls->num);   }   free(mxs);}/***************************************************************************/should_split_tall(xphrases, nx, blob, color_hist, hnum, mcdata, w, h)BLOB *blob;int *color_hist, hnum;unsigned char *mcdata;int w, h;int *xphrases, *nx;{   int i, max1 = -1, max2 = -1;   float pb;   *nx = 0;   for(i = 1; i < hnum; i++){      if(color_hist[i] != 0){         xphrases[*nx] = i-1;         (*nx)++;         if(color_hist[i] > max1){            max2 = max1;            max1 = color_hist[i];         }         else if(color_hist[i] > max2){            max2 = color_hist[i];         }      }   }   if(*nx < 2)      return(FALSE);   pb = max2 / (float)blob->pixcnt;   if(pb < MIN_2ND_OVER)       return(FALSE);   if(tst_phrase_overlap(xphrases[(*nx-1)]+1, blob, mcdata, w, h, PRCT_OVERLAP))      return(TRUE);   else{      /* remove last overlapping phrase */      (*nx)--;      if(*nx < 2)         return(FALSE);      else         return(TRUE);   }}/***************************************************************************/tst_phrase_overlap(pi, blob, mcdata, w, h, thresh)int pi;BLOB *blob;unsigned char *mcdata;int w, h;float thresh;{   int x, y, found, sy, ty, by, pwidth, bover;   unsigned char *sbptr, *bptr, *smptr, *mptr;   int min_x = -1, min_y = -1, max_x = -1, max_y = -1;   float pover;   /* find range of blob overlap with phrase region */   /* start by finding top of range */   bptr = blob->data;   smptr = mcdata + (blob->y1 * w) + blob->x1;   for(y = 0, found = 0; (y < blob->h) && (!found); y++){      mptr = smptr;      for(x = 0; x < blob->w; x++){         if(*bptr && (*mptr == pi)){            min_y = y;            min_x = x;            found = 1;            break;         }         bptr++;         mptr++;      }      smptr += w;   }   if(!found)      return(FALSE);   /* now find bottom of overlap range */   sbptr = blob->data + ((blob->h-1) * blob->w);   smptr = mcdata + (blob->y2 * w) + blob->x1;   for(y = blob->h-1, found = 0; (y >= 0) && (!found); y--){      mptr = smptr;      bptr = sbptr;      for(x = 0; x < blob->w; x++){         if(*bptr && (*mptr == pi)){            max_y = y;            max_x = x;            found = 1;            break;         }         bptr++;         mptr++;      }      smptr -= w;      sbptr -= blob->w;   }   if(!found)      fatalerr("tst_phrase_overlap",               "map pixel not found when locating bottom of overlap", NULL);   bover = max_y - min_y + 1;   /* find phrase band width */   sy = blob->y1 + max_y;   smptr = mcdata + (sy * w) + blob->x1 + max_x;   ty = sy;   mptr = smptr;   while((ty >= 0) && (*mptr == pi)){      ty--;      mptr-=w;   }   by = sy;   mptr = smptr;   while((by < h) && (*mptr == pi)){      by++;      mptr+=w;   }   pwidth = by - ty + 1;   /* if percent overlap large enough */   pover = bover / (float)pwidth;   if(pover < thresh)      return(FALSE);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?