⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 adaptions.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
        #ifndef SECURE_NAMES        if (tessedit_cluster_debug)          tprintf ("Sample added to an existing cluster\n");        #endif      }      else {        #ifndef SECURE_NAMES        if (tessedit_cluster_debug)          tprintf            ("Sample dropped, good match to an existing cluster\n");        #endif      }    }    else if (best_score > tessedit_cluster_t2) {      c_it.add_to_end (new CHAR_SAMPLES (sample));      #ifndef SECURE_NAMES      if (tessedit_cluster_debug)        tprintf ("New cluster created for this sample\n");      #endif    }    else {      cw_it.add_to_end (sample);      if (tessedit_cluster_debug)        tprintf ("Sample added to the wait list\n");    }  }}void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting,                     CHAR_SAMPLE *sample,                     CHAR_SAMPLES *best_cluster) {  CHAR_SAMPLE *wait_sample;  CHAR_SAMPLE *test_sample = sample;  CHAR_SAMPLE_IT cw_it = chars_waiting;  CHAR_SAMPLE_LIST add_list;     //Samples added to best cluster  CHAR_SAMPLE_IT add_it = &add_list;  float score;  add_list.clear ();  if (!cw_it.empty ()) {    do {      if (!add_list.empty ()) {        add_it.forward ();        test_sample = add_it.extract ();        best_cluster->add_sample (test_sample);      }      for (cw_it.mark_cycle_pt ();      !cw_it.cycled_list (); cw_it.forward ()) {        wait_sample = cw_it.data ();        if (tessedit_mm_use_prototypes)          score = best_cluster->match_score (wait_sample);        else          score = sample->match_sample (wait_sample, FALSE);        if (score < tessedit_cluster_t1) {          if (score > tessedit_cluster_t3          || tessedit_mm_use_prototypes) {            add_it.add_after_stay_put (cw_it.extract ());            #ifndef SECURE_NAMES            if (tessedit_cluster_debug)              tprintf                ("Wait sample added to an existing cluster\n");            #endif          }          else {            #ifndef SECURE_NAMES            if (tessedit_cluster_debug)              tprintf                ("Wait sample dropped, good match to an existing cluster\n");            #endif          }        }      }    }    while (!add_list.empty ());  }}void complete_clustering(CHAR_SAMPLES_LIST *char_clusters,                         CHAR_SAMPLE_LIST *chars_waiting) {  CHAR_SAMPLES *best_cluster;  CHAR_SAMPLES_IT c_it = char_clusters;  CHAR_SAMPLE_IT cw_it = chars_waiting;  CHAR_SAMPLE *sample;  INT32 total_sample_count = 0;  while (!cw_it.empty ()) {    cw_it.move_to_first ();    sample = cw_it.extract ();    best_cluster = new CHAR_SAMPLES (sample);    c_it.add_to_end (best_cluster);    check_wait_list(chars_waiting, sample, best_cluster);  }  for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) {    c_it.data ()->assign_to_char ();    if (tessedit_use_best_sample)      c_it.data ()->find_best_sample ();    else if (tessedit_mm_adapt_using_prototypes)      c_it.data ()->build_prototype ();    if (tessedit_cluster_debug)      total_sample_count += c_it.data ()->n_samples ();  }  #ifndef SECURE_NAMES  if (tessedit_cluster_debug)    tprintf ("Clustering completed, %d samples in all\n", total_sample_count);  #endif#ifndef GRAPHICS_DISABLED  if (tessedit_demo_adaption)    display_cluster_prototypes(char_clusters);#endif}void adapt_to_good_ems(WERD_RES *word,                       CHAR_SAMPLES_LIST *char_clusters,                       CHAR_SAMPLE_LIST *chars_waiting) {  PBLOB_LIST *blobs = word->outword->blob_list ();  PBLOB_IT blob_it(blobs);  INT16 i;  CHAR_SAMPLE *sample;  CHAR_SAMPLES_IT c_it = char_clusters;  CHAR_SAMPLE_IT cw_it = chars_waiting;  float score;  float best_score;  char best_char;  CHAR_SAMPLES *best_cluster;  PIXROW_LIST *pixrow_list;  PIXROW_IT pixrow_it;  IMAGELINE *imlines;            // lines of the image  BOX pix_box;                   // box of imlines  // extent  WERD copy_outword;             // copy to denorm  BOX b_box;  PBLOB_IT copy_blob_it;  OUTLINE_IT copy_outline_it;  PIXROW *pixrow = NULL;  static INT32 word_number = 0;#ifndef GRAPHICS_DISABLED  WINDOW demo_win = NULL;#endif  INT32 resolution = page_image.get_res ();  if (word->word->bounding_box ().height () > resolution / 3)    return;  word_number++;  if (strchr (word->best_choice->string ().string (), 'm') == NULL    && (tessedit_process_rns    && strstr (word->best_choice->string ().string (), "rn") == NULL))    return;  if (tessedit_reject_ems)    reject_all_ems(word);  else if (tessedit_reject_suspect_ems)    reject_suspect_ems(word);  else {    if (char_clusters->length () == 0) {      #ifndef SECURE_NAMES      if (tessedit_cluster_debug)        tprintf ("No clusters to use for em adaption\n");      #endif      return;    }    if (!cw_it.empty ()) {      complete_clustering(char_clusters, chars_waiting);      print_em_stats(char_clusters, chars_waiting);    }    if ((!word_adaptable (word, tessedit_em_adaption_mode) ||      word->reject_map.reject_count () != 0)      && (strchr (word->best_choice->string ().string (), 'm') != NULL      || (tessedit_process_rns      && strstr (word->best_choice->string ().string (),    "rn") != NULL))) {      if (tessedit_process_rns        && strstr (word->best_choice->string ().string (),      "rn") != NULL) {        copy_outword = *(word->outword);        copy_blob_it.set_to_list (copy_outword.blob_list ());        i = 0;        while (word->best_choice->string ()[i] != '\0') {          if (word->best_choice->string ()[i] == 'r'          && word->best_choice->string ()[i + 1] == 'n') {            copy_outline_it.set_to_list (copy_blob_it.data ()->              out_list ());            copy_outline_it.add_list_after (copy_blob_it.              data_relative (1)->              out_list ());            copy_blob_it.forward ();            delete (copy_blob_it.extract ());            i++;          }          copy_blob_it.forward ();          i++;        }      }      else        copy_outword = *(word->outword);      copy_outword.baseline_denormalise (&word->denorm);      copy_blob_it.set_to_list (copy_outword.blob_list ());      char_clip_word(&copy_outword, page_image, pixrow_list, imlines, pix_box);      pixrow_it.set_to_list (pixrow_list);      pixrow_it.move_to_first ();                                 // For debugging only      b_box = copy_outword.bounding_box ();      pixrow = pixrow_it.data ();      blob_it.move_to_first ();      copy_blob_it.move_to_first ();      for (i = 0;        word->best_choice->string ()[i] != '\0';        i++, pixrow_it.forward (), blob_it.forward (),      copy_blob_it.forward ()) {        if ((word->best_choice->string ()[i] == 'm'          || (word->best_choice->string ()[i] == 'r'          && word->best_choice->string ()[i + 1] == 'n'))        && !word->reject_map[i].perm_rejected ()) {          if (tessedit_cluster_debug)            tprintf ("Sample %c to check found in %s, index %d\n",              word->best_choice->string ()[i],              word->best_choice->string ().string (), i);          if (tessedit_demo_adaption)            tprintf              ("Sample %c to check found in %s (%d), index %d\n",              word->best_choice->string ()[i],              word->best_choice->string ().string (), word_number,              i);          if (tessedit_matrix_match) {            BOX copy_box = copy_blob_it.data ()->bounding_box ();            sample = clip_sample (pixrow_it.data (),              imlines,              pix_box,              copy_outword.flag (W_INVERSE),              word->best_choice->string ()[i]);                                 //Clip failed            if (sample == NULL) {              tprintf                ("Unable to clip sample from %s, index %d\n",                word->best_choice->string ().string (), i);              #ifndef SECURE_NAMES              if (tessedit_cluster_debug)                tprintf ("Sample rejected (no sample)\n");              #endif              word->reject_map[i].setrej_mm_reject ();              if (word->best_choice->string ()[i] == 'r') {                word->reject_map[i + 1].setrej_mm_reject ();                i++;              }              continue;            }          }          else            sample = new CHAR_SAMPLE (blob_it.data (),              &word->denorm,              word->best_choice->              string ()[i]);          best_score = MAX_INT32;          best_char = '\0';          best_cluster = NULL;          for (c_it.mark_cycle_pt ();          !c_it.cycled_list (); c_it.forward ()) {            if (c_it.data ()->character () != '\0') {              score = c_it.data ()->match_score (sample);              if (score < best_score) {                best_cluster = c_it.data ();                best_score = score;                best_char = c_it.data ()->character ();              }            }          }          if (best_score > tessedit_cluster_t1) {            #ifndef SECURE_NAMES            if (tessedit_cluster_debug)              tprintf ("Sample rejected (score %f)\n", best_score);            if (tessedit_demo_adaption)              tprintf ("Sample rejected (score %f)\n", best_score);            #endif            word->reject_map[i].setrej_mm_reject ();            if (word->best_choice->string ()[i] == 'r')              word->reject_map[i + 1].setrej_mm_reject ();          }          else {            if (word->best_choice->string ()[i] == best_char) {              #ifndef SECURE_NAMES              if (tessedit_cluster_debug)                tprintf ("Sample accepted (score %f)\n",                  best_score);              if (tessedit_demo_adaption)                tprintf ("Sample accepted (score %f)\n",                  best_score);              #endif              word->reject_map[i].setrej_mm_accept ();              if (word->best_choice->string ()[i] == 'r')                word->reject_map[i + 1].setrej_mm_accept ();            }            else {              #ifndef SECURE_NAMES              if (tessedit_cluster_debug)                tprintf ("Sample rejected (char %c, score %f)\n",                  best_char, best_score);              if (tessedit_demo_adaption)                tprintf ("Sample rejected (char %c, score %f)\n",                  best_char, best_score);              #endif              word->reject_map[i].setrej_mm_reject ();              if (word->best_choice->string ()[i] == 'r')                word->reject_map[i + 1].setrej_mm_reject ();            }          }          if (tessedit_demo_adaption) {            if (strcmp (imagebasename.string (),              tessedit_demo_file.string ()) != 0              || word_number == tessedit_demo_word1            || word_number == tessedit_demo_word2) {#ifndef GRAPHICS_DISABLED              demo_win =                display_clip_image(&copy_outword,                                   page_image,                                   pixrow_list,                                   pix_box);#endif              demo_word = word_number;              best_cluster->match_score (sample);              demo_word = 0;            }          }          if (word->best_choice->string ()[i] == 'r')            i++;                 // Skip next character        }      }      delete[]imlines;           // Free array of imlines      delete pixrow_list;    }  }}void adapt_to_good_samples(WERD_RES *word,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -