⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 control.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 4 页
字号:
  WERD *bln_word;                //baseline norm copy                                 //detailed results  BLOB_CHOICE_LIST_CLIST blob_choices;  set_global_subsubloc_code(SUBSUBLOC_OTHER);  if (matcher_fp != NULL) {    word_answer = (char *) word->word->text ();    if (word_answer != NULL && word_answer[0] == '\0')      word_answer = NULL;  }  matcher_pass = 0;  bln_word = make_bln_copy (word->word, row, x_height, &word->denorm);  set_global_subsubloc_code(SUBSUBLOC_TESS);  if (tessedit_training_tess)    word->best_choice = correct_segment_pass2 (bln_word,      &word->denorm,      tess_default_matcher,      tess_training_tester,      word->raw_choice,      &blob_choices, word->outword);  else if (tessedit_dump_choices)    word->best_choice = test_segment_pass2 (bln_word,        &word->denorm,        tess_default_matcher,        choice_dump_tester,        word->raw_choice,        &blob_choices, word->outword);  //      else if (tessedit_training_wiseowl)  //              best_choice=correct_segment_pass2( word, &denorm,  //                                                                                                        tess_default_matcher,wo_learn,  //                                                                                                        raw_choice,&blob_choices,outword);  //      else if (tessedit_matcher_is_wiseowl)  //              best_choice=tess_segment_pass2( word, &denorm, wo_classify,  //                                                                                                raw_choice, &blob_choices, outword);  else {    word->best_choice = tess_segment_pass2 (bln_word, &word->denorm,      tess_default_matcher,      word->raw_choice, &blob_choices,      word->outword);  }  set_global_subsubloc_code(SUBSUBLOC_OTHER);  /*     Test for TESS screw up on word. Recog_word has already ensured that the     choice list, outword blob lists and best_choice string are the same     length. A TESS screw up is indicated by a blank filled or 0 length string.   */  if ((word->best_choice->string ().length () == 0) ||    (strspn (word->best_choice->string ().string (), " ") ==  word->best_choice->string ().length ())) {    word->tess_failed = TRUE;    word->reject_map.initialise (word->best_choice->string ().length ());    word->reject_map.rej_word_tess_failure ();    //              tprintf("Empty word produced\n");  }  else {    if ((word->best_choice->string ().length () !=      word->outword->blob_list ()->length ()) ||    (word->best_choice->string ().length () != blob_choices.length ())) {      tprintf        ("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",        word->best_choice->string ().string (),        word->best_choice->string ().length (),        word->outword->blob_list ()->length (), blob_choices.length ());    }    ASSERT_HOST (word->best_choice->string ().length () ==      word->outword->blob_list ()->length ());    ASSERT_HOST (word->best_choice->string ().length () ==      blob_choices.length ());    word->tess_failed = FALSE;    if (word->word->flag (W_REP_CHAR)) {      fix_rep_char(word);    }    else {      fix_quotes ((char *) word->best_choice->string ().string (),        word->outword, &blob_choices);      if (tessedit_fix_hyphens)        fix_hyphens ((char *) word->best_choice->string ().string (),          word->outword, &blob_choices);      /* Dont trust fix_quotes! - though I think I've fixed the bug */      if ((word->best_choice->string ().length () !=        word->outword->blob_list ()->length ()) ||        (word->best_choice->string ().length () !=      blob_choices.length ())) {        #ifndef SECURE_NAMES        tprintf          ("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",          word->best_choice->string ().string (),          word->best_choice->string ().length (),          word->outword->blob_list ()->length (),          blob_choices.length ());        #endif      }      ASSERT_HOST (word->best_choice->string ().length () ==        word->outword->blob_list ()->length ());      ASSERT_HOST (word->best_choice->string ().length () ==        blob_choices.length ());      word->tess_accepted = tess_acceptable_word (word->best_choice,        word->raw_choice);      make_reject_map (word, &blob_choices, row, 2);    }  }  blob_choices.deep_clear ();  delete bln_word;  assert (word->raw_choice != NULL);}/************************************************************************* * fix_rep_char() * The word is a repeated char. Find the repeated char character. Make a reject * string which rejects any char other than the voted char. Set the word to done * to stop rematching it. * *************************************************************************/void fix_rep_char(                //Repeated char word                  WERD_RES *word  //word to do                 ) {  struct REP_CH  {    char ch;    int count;  };  REP_CH *rep_ch;                //array of char counts  int word_len;  int rep_ch_count = 0;          //how many unique chs  const char *word_str;          //the repeated chs  int i, j;  int total = 0;  int max = 0;  char maxch = ' ';              //Most common char  word_str = word->best_choice->string ().string ();  word_len = strlen (word_str);  rep_ch = (REP_CH *) alloc_mem (word_len * sizeof (REP_CH));  for (i = 0; i < word_len; i++) {    for (j = 0; j < rep_ch_count && rep_ch[j].ch != word_str[i]; j++);    if (j < rep_ch_count)      rep_ch[j].count++;    else {      rep_ch[rep_ch_count].ch = word_str[i];      rep_ch[rep_ch_count].count = 1;      rep_ch_count++;    }  }  for (j = 0; j < rep_ch_count; j++) {    total += rep_ch[j].count;    if ((rep_ch[j].count > max) && (rep_ch[j].ch != ' ')) {      max = rep_ch[j].count;      maxch = rep_ch[j].ch;    }  }  //      tprintf( "REPEATED CHAR %s len=%d total=%d choice=%c\n",  //                        word_str, word_len, total, maxch );  free_mem(rep_ch);  word->reject_map.initialise (word_len);  for (i = 0; i < word_len; i++) {    if (word_str[i] != maxch)                                 //rej unrecognised blobs      word->reject_map[i].setrej_bad_repetition ();  }  word->done = TRUE;}/********************************************************************** * fix_quotes * * Change pairs of quotes to double quotes. **********************************************************************/void fix_quotes(               //make double quotes                char *string,  //string to fix                WERD *word,    //word to do //char choices                BLOB_CHOICE_LIST_CLIST *blob_choices) {  char *ptr;                     //string ptr                                 //blobs  PBLOB_IT blob_it = word->blob_list ();                                 //choices  BLOB_CHOICE_LIST_C_IT choice_it = blob_choices;  BLOB_CHOICE_IT it1;            //first choices  BLOB_CHOICE_IT it2;            //second choices  for (ptr = string;  *ptr != '\0'; ptr++, blob_it.forward (), choice_it.forward ()) {    if ((*ptr == '\'' || *ptr == '`')    && (*(ptr + 1) == '\'' || *(ptr + 1) == '`')) {      *ptr = '"';                //turn to double      strcpy (ptr + 1, ptr + 2); //shuffle up      merge_blobs (blob_it.data (), blob_it.data_relative (1));      blob_it.forward ();      delete blob_it.extract (); //get rid of spare      it1.set_to_list (choice_it.data ());      it2.set_to_list (choice_it.data_relative (1));      if (it1.data ()->certainty () < it2.data ()->certainty ()) {        choice_it.forward ();                                 //get rid of spare        delete choice_it.extract ();      }      else {                                 //get rid of spare        delete choice_it.extract ();        choice_it.forward ();      }    }  }}/********************************************************************** * fix_hyphens * * Change pairs of hyphens to a single hyphen if the bounding boxes touch * Typically a long dash which has been segmented. **********************************************************************/void fix_hyphens(               //crunch double hyphens                 char *string,  //string to fix                 WERD *word,    //word to do //char choices                 BLOB_CHOICE_LIST_CLIST *blob_choices) {  char *ptr;                     //string ptr                                 //blobs  PBLOB_IT blob_it = word->blob_list ();                                 //choices  BLOB_CHOICE_LIST_C_IT choice_it = blob_choices;  BLOB_CHOICE_IT it1;            //first choices  BLOB_CHOICE_IT it2;            //second choices  for (ptr = string;  *ptr != '\0'; ptr++, blob_it.forward (), choice_it.forward ()) {    if ((*ptr == '-' || *ptr == '~') &&      (*(ptr + 1) == '-' || *(ptr + 1) == '~') &&      (blob_it.data ()->bounding_box ().right () >=    blob_it.data_relative (1)->bounding_box ().left ())) {      *ptr = '-';                //turn to single hyphen      strcpy (ptr + 1, ptr + 2); //shuffle up      merge_blobs (blob_it.data (), blob_it.data_relative (1));      blob_it.forward ();      delete blob_it.extract (); //get rid of spare      it1.set_to_list (choice_it.data ());      it2.set_to_list (choice_it.data_relative (1));      if (it1.data ()->certainty () < it2.data ()->certainty ()) {        choice_it.forward ();                                 //get rid of spare        delete choice_it.extract ();      }      else {                                 //get rid of spare        delete choice_it.extract ();        choice_it.forward ();      }    }  }}/********************************************************************** * merge_blobs * * Add the outlines from blob2 to blob1. Blob2 is emptied but not deleted. **********************************************************************/void merge_blobs(               //combine 2 blobs                 PBLOB *blob1,  //dest blob                 PBLOB *blob2   //source blob                ) {  OUTLINE_IT outline_it = blob1->out_list ();  //iterator  outline_it.move_to_last ();    //go to end                                 //do it  outline_it.add_list_after (blob2->out_list ());}/********************************************************************** * choice_dump_tester * * Matcher tester function which generates .chc file entries. * Called via test_segment_pass2 for every blob tested by tess in a word. * (But only for words for which a correct segmentation could be found.) **********************************************************************/void choice_dump_tester(                           //dump chars in word                        PBLOB *,                   //blob                        DENORM *,                  //de-normaliser                        BOOL8 correct,             //ly segmented                        char *text,                //correct text                        INT32 count,               //chars in text                        BLOB_CHOICE_LIST *ratings  //list of results                       ) {  STRING choice_file_name;  BLOB_CHOICE *blob_choice;  BLOB_CHOICE_IT it;  char source_chars[20];  char correct_char[3];  if (choice_file == NULL) {    choice_file_name = imagebasename + ".chc";    if (!(choice_file = fopen (choice_file_name.string (), "w"))) {      CANTOPENFILE.error ("choice_dump_tester", EXIT, "%s %d",        choice_file_name.string (), errno);    }  }  if ((count == 0) || (text == NULL) || (text[0] == '\0')) {    strcpy (source_chars, "$$");    strcpy (correct_char, "$$");  }  else {    strncpy(source_chars, text, count);    source_chars[count] = '\0';    if (correct) {      correct_char[0] = text[0];      correct_char[1] = '\0';    }    else {      strcpy (correct_char, "$$");    }  }  fprintf (choice_file, "%s\t%s", source_chars, correct_char);  it.set_to_list (ratings);  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {    blob_choice = it.data ();    if ((blob_choice->char_class () >= '!') &&      (blob_choice->char_class () <= '~'))      fprintf (choice_file, "\t%c\t%f\t%f",        blob_choice->char_class (),        blob_choice->rating (), blob_choice->certainty ());  }  fprintf (choice_file, "\n");}/************************************************************************* * make_bln_copy() * * Generate a baseline normalised copy of the source word. The copy is done so * that whatever format the original word is in, a polygonal bln version is * generated as output. *************************************************************************/WERD *make_bln_copy(WERD *src_word, ROW *row, float x_height, DENORM *denorm) {  WERD *result;  //      if (wordit_linearc && !src_word->flag(W_POLYGON))  //      {  //              larc_word = src_word->larc_copy( row->x_height() );  //              result = larc_word->poly_copy( row->x_height() );  //              delete larc_word;  //      }  // else  result = src_word->poly_copy (row->x_height ());  //      if (tessedit_draw_words)  //      {  //              if ( la_win == NO_WINDOW )  //                      create_la_win();  //              result->plot( la_win );  //      }  result->baseline_normalise_x (row, x_height, denorm);  return result;}ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s) {  int i = 0;  int leading_punct_count;  int upper_count = 0;  int hyphen_pos = -1;  ACCEPTABLE_WERD_TYPE word_type = AC_UNACCEPTABLE;  if (strlen (s) > 20)    return word_type;  /* Single Leading punctuation char*/  if ((s[i] != '\0') && (STRING (chs_leading_punct).contains (s[i])))    i++;  leading_punct_count = i;  /* Initial cap */  while (isupper (s[i])) {    i++;    upper_count++;  }  if (upper_count > 1)    word_type = AC_UPPER_CASE;  else {    /* Lower case word, possibly with an initial cap */    while (islower (s[i])) {      i++;    }    if (i - leading_punct_count < quality_min_initial_alphas_reqd)      goto not_a_word;    /*    Allow a single hyphen in a lower case word    - dont trust upper case - I've seen several cases of "H" -> "I-I"    */    if (s[i] == '-') {      hyphen_pos = i++;      if (s[i] != '\0') {        while (islower (s[i])) {          i++;        }        if (i < hyphen_pos + 3)          goto not_a_word;      }    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -