⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 adaptions.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
                           CHAR_SAMPLES_LIST *char_clusters,                           CHAR_SAMPLE_LIST *chars_waiting) {  PBLOB_LIST *blobs = word->outword->blob_list ();  PBLOB_IT blob_it(blobs);  INT16 i;  CHAR_SAMPLE *sample;  CHAR_SAMPLES_IT c_it = char_clusters;  CHAR_SAMPLE_IT cw_it = chars_waiting;  float score;  float best_score;  char best_char;  CHAR_SAMPLES *best_cluster;  PIXROW_LIST *pixrow_list;  PIXROW_IT pixrow_it;  IMAGELINE *imlines;            // lines of the image  BOX pix_box;                   // box of imlines  // extent  WERD copy_outword;             // copy to denorm  BOX b_box;  PBLOB_IT copy_blob_it;  PIXROW *pixrow = NULL;  static INT32 word_number = 0;#ifndef GRAPHICS_DISABLED  WINDOW demo_win = NULL;#endif  INT32 resolution = page_image.get_res ();  word_number++;  if (tessedit_test_cluster_input)    return;  if (word->word->bounding_box ().height () > resolution / 3)    return;  if (char_clusters->length () == 0) {    #ifndef SECURE_NAMES    if (tessedit_cluster_debug)      tprintf ("No clusters to use for adaption\n");    #endif    return;  }  if (!cw_it.empty ()) {    complete_clustering(char_clusters, chars_waiting);    print_em_stats(char_clusters, chars_waiting);  }  if ((!word_adaptable (word, tessedit_cluster_adaption_mode)  && word->reject_map.reject_count () != 0) || tessedit_mm_use_rejmap) {    if (tessedit_cluster_debug) {      tprintf ("\nChecking: \"%s\"  MAP ",        word->best_choice->string ().string ());      word->reject_map.print (debug_fp);      tprintf ("\n");    }    copy_outword = *(word->outword);    copy_outword.baseline_denormalise (&word->denorm);    copy_blob_it.set_to_list (copy_outword.blob_list ());    char_clip_word(&copy_outword, page_image, pixrow_list, imlines, pix_box);    pixrow_it.set_to_list (pixrow_list);    pixrow_it.move_to_first ();                                 // For debugging only    b_box = copy_outword.bounding_box ();    pixrow = pixrow_it.data ();    blob_it.move_to_first ();    copy_blob_it.move_to_first ();    for (i = 0;      word->best_choice->string ()[i] != '\0';      i++, pixrow_it.forward (), blob_it.forward (),    copy_blob_it.forward ()) {      if (word->reject_map[i].recoverable ()      || (tessedit_mm_all_rejects && word->reject_map[i].rejected ())) {        BOX copy_box = copy_blob_it.data ()->bounding_box ();        if (tessedit_cluster_debug)          tprintf ("Sample %c to check found in %s, index %d\n",            word->best_choice->string ()[i],            word->best_choice->string ().string (), i);        if (tessedit_demo_adaption)          tprintf ("Sample %c to check found in %s (%d), index %d\n",            word->best_choice->string ()[i],            word->best_choice->string ().string (),            word_number, i);        sample = clip_sample (pixrow_it.data (),          imlines,          pix_box,          copy_outword.flag (W_INVERSE),          word->best_choice->string ()[i]);        if (sample == NULL) {    //Clip failed          tprintf ("Unable to clip sample from %s, index %d\n",            word->best_choice->string ().string (), i);          #ifndef SECURE_NAMES          if (tessedit_cluster_debug)            tprintf ("Sample rejected (no sample)\n");          #endif          word->reject_map[i].setrej_mm_reject ();          continue;        }        best_score = MAX_INT32;        best_char = '\0';        best_cluster = NULL;        for (c_it.mark_cycle_pt ();        !c_it.cycled_list (); c_it.forward ()) {          if (c_it.data ()->character () != '\0') {            score = c_it.data ()->match_score (sample);            if (score < best_score) {              best_cluster = c_it.data ();              best_score = score;              best_char = c_it.data ()->character ();            }          }        }        if (best_score > tessedit_cluster_t1) {          #ifndef SECURE_NAMES          if (tessedit_cluster_debug)            tprintf ("Sample rejected (score %f)\n", best_score);          if (tessedit_demo_adaption)            tprintf ("Sample rejected (score %f)\n", best_score);          #endif          word->reject_map[i].setrej_mm_reject ();        }        else {          if (word->best_choice->string ()[i] == best_char) {            #ifndef SECURE_NAMES            if (tessedit_cluster_debug)              tprintf ("Sample accepted (score %f)\n", best_score);            if (tessedit_demo_adaption)              tprintf ("Sample accepted (score %f)\n", best_score);            #endif            if (tessedit_test_adaption)              word->reject_map[i].setrej_minimal_rej_accept ();            else              word->reject_map[i].setrej_mm_accept ();          }          else {            #ifndef SECURE_NAMES            if (tessedit_cluster_debug)              tprintf ("Sample rejected (char %c, score %f)\n",                best_char, best_score);            if (tessedit_demo_adaption)              tprintf ("Sample rejected (char %c, score %f)\n",                best_char, best_score);            #endif            word->reject_map[i].setrej_mm_reject ();          }        }        if (tessedit_demo_adaption) {          if (strcmp (imagebasename.string (),            tessedit_demo_file.string ()) != 0            || word_number == tessedit_demo_word1          || word_number == tessedit_demo_word2) {#ifndef GRAPHICS_DISABLED            demo_win =              display_clip_image(&copy_outword,                                 page_image,                                 pixrow_list,                                 pix_box);#endif            demo_word = word_number;            best_cluster->match_score (sample);            demo_word = 0;          }        }      }    }    delete[]imlines;             // Free array of imlines    delete pixrow_list;    if (tessedit_cluster_debug) {      tprintf ("\nFinal: \"%s\"  MAP ",        word->best_choice->string ().string ());      word->reject_map.print (debug_fp);      tprintf ("\n");    }  }}void print_em_stats(CHAR_SAMPLES_LIST *char_clusters,                    CHAR_SAMPLE_LIST *chars_waiting) {  CHAR_SAMPLES_IT c_it = char_clusters;  if (!tessedit_cluster_debug)    return;  #ifndef SECURE_NAMES  tprintf ("There are %d clusters and %d samples waiting\n",    char_clusters->length (), chars_waiting->length ());  for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ())    c_it.data ()->print (debug_fp);  #endif  tprintf ("\n");}CHAR_SAMPLE *clip_sample(              //lines of the image                         PIXROW *pixrow,                         IMAGELINE *imlines,                         BOX pix_box,  //box of imlines extent                         BOOL8 white_on_black,                         char c) {  BOX b_box = pixrow->bounding_box ();  float baseline_pos = 0;  INT32 resolution = page_image.get_res ();  if (!b_box.null_box ()) {    ASSERT_HOST (b_box.width () < page_image.get_xsize () &&      b_box.height () < page_image.get_ysize ());    if (b_box.width () > resolution || b_box.height () > resolution) {      tprintf ("clip sample: sample too big (%d x %d)\n",        b_box.width (), b_box.height ());      return NULL;    }    IMAGE *image = new (IMAGE);    if (image->create (b_box.width (), b_box.height (), 1) == -1) {      tprintf ("clip sample: create image failed (%d x %d)\n",        b_box.width (), b_box.height ());      delete image;      return NULL;    }    if (!white_on_black)      invert_image(image);  // Set background to white    pixrow->char_clip_image (imlines, pix_box, NULL, *image, baseline_pos);    if (white_on_black)      invert_image(image);  //invert white on black for scaling &NN    return new CHAR_SAMPLE (image, c);  }  else    return NULL;}#ifndef GRAPHICS_DISABLEDvoid display_cluster_prototypes(CHAR_SAMPLES_LIST *char_clusters) {  INT16 proto_number = 0;  CHAR_SAMPLES_IT c_it = char_clusters;  char title[WINDOWNAMESIZE];  for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) {    proto_number++;    #ifndef SECURE_NAMES    tprintf ("Displaying proto number %d\n", proto_number);    #endif    if (c_it.data ()->prototype () != NULL) {      sprintf (title, "Proto - %d", proto_number);      display_image (c_it.data ()->prototype ()->make_image (),        title, (proto_number - 1) * 400, 0, FALSE);    }  }}#endif// *********************************************************************// Simplistic routines to test the effect of rejecting ems and fullstops// *********************************************************************void reject_all_ems(WERD_RES *word) {  INT16 i;  for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {    if (word->best_choice->string ()[i] == 'm')                                 // reject all ems      word->reject_map[i].setrej_mm_reject ();  }}void reject_all_fullstops(WERD_RES *word) {  INT16 i;  for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {    if (word->best_choice->string ()[i] == '.')                                 // reject all fullstops      word->reject_map[i].setrej_mm_reject ();  }}void reject_suspect_ems(WERD_RES *word) {  INT16 i;  if (!word_adaptable (word, tessedit_cluster_adaption_mode))  for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {    if (word->best_choice->string ()[i] == 'm' && suspect_em (word, i))                                 // reject all ems      word->reject_map[i].setrej_mm_reject ();  }}void reject_suspect_fullstops(WERD_RES *word) {  INT16 i;  for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {    if (word->best_choice->string ()[i] == '.'      && suspect_fullstop (word, i))                                 // reject all commas      word->reject_map[i].setrej_mm_reject ();  }}BOOL8 suspect_em(WERD_RES *word, INT16 index) {  PBLOB_LIST *blobs = word->outword->blob_list ();  PBLOB_IT blob_it(blobs);  INT16 j;  for (j = 0; j < index; j++)    blob_it.forward ();  return (blob_it.data ()->out_list ()->length () != 1);}BOOL8 suspect_fullstop(WERD_RES *word, INT16 i) {  float aspect_ratio;  PBLOB_LIST *blobs = word->outword->blob_list ();  PBLOB_IT blob_it(blobs);  INT16 j;  BOX box;  INT16 width;  INT16 height;  for (j = 0; j < i; j++)    blob_it.forward ();  box = blob_it.data ()->bounding_box ();  width = box.width ();  height = box.height ();  aspect_ratio = ((width > height) ? ((float) width) / height :  ((float) height) / width);  return (aspect_ratio > tessed_fullstop_aspect_ratio);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -