⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 control.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 4 页
字号:
    else {      /* Allow "'s" in NON hyphenated lower case words */      if ((s[i] == '\'') && (s[i + 1] == 's'))        i += 2;    }    if (upper_count > 0)      word_type = AC_INITIAL_CAP;    else      word_type = AC_LOWER_CASE;  }  /* Up to two different, constrained trailing punctuation chars */  if ((s[i] != '\0') && (STRING (chs_trailing_punct1).contains (s[i])))    i++;  if ((s[i] != '\0') &&    (s[i - 1] != s[i]) && (STRING (chs_trailing_punct2).contains (s[i])))    i++;  if (s[i] != '\0')    word_type = AC_UNACCEPTABLE;  not_a_word:  if (word_type == AC_UNACCEPTABLE) {    /* Look for abbreviation string */    i = 0;    if (isupper (s[0])) {      word_type = AC_UC_ABBREV;      while ((s[i] != '\0') && isupper (s[i]) && (s[i + 1] == '.'))        i += 2;    }    else if (islower (s[0])) {      word_type = AC_LC_ABBREV;      while ((s[i] != '\0') && islower (s[i]) && (s[i + 1] == '.'))        i += 2;    }    if (s[i] != '\0')      word_type = AC_UNACCEPTABLE;  }  return word_type;}/* DEBUGGING ROUTINE */BOOL8 check_debug_pt(WERD_RES *word, int location) {  BOOL8 show_map_detail = FALSE;  INT16 i;  #ifndef SECURE_NAMES  if (!test_pt)    return FALSE;  tessedit_rejection_debug.set_value (FALSE);  debug_x_ht_level.set_value (0);  tessedit_cluster_debug.set_value (FALSE);  nn_debug.set_value (FALSE);  nn_reject_debug.set_value (FALSE);  if (word->word->bounding_box ().contains (FCOORD (test_pt_x, test_pt_y))) {    if (location < 0)      return TRUE;               //For breakpoint use    tessedit_rejection_debug.set_value (TRUE);    debug_x_ht_level.set_value (20);    tessedit_cluster_debug.set_value (TRUE);    nn_debug.set_value (TRUE);    nn_reject_debug.set_value (TRUE);    tprintf ("\n\nTESTWD::");    switch (location) {      case 0:        tprintf ("classify_word_pass1 start\n");        word->word->print (debug_fp);        break;      case 10:        tprintf ("make_reject_map: initial map");        break;      case 20:        tprintf ("make_reject_map: after NN");        break;      case 30:        tprintf ("classify_word_pass2 - START");        break;      case 40:        tprintf ("classify_word_pass2 - Pre Xht");        break;      case 50:        tprintf ("classify_word_pass2 - END");        show_map_detail = TRUE;        break;      case 60:        tprintf ("fixspace");        break;      case 70:        tprintf ("MM pass START");        break;      case 80:        tprintf ("MM pass END");        break;      case 90:        tprintf ("After Poor quality rejection");        break;      case 100:        tprintf ("unrej_good_quality_words - START");        break;      case 110:        tprintf ("unrej_good_quality_words - END");        break;      case 120:        tprintf ("Write results pass");        show_map_detail = TRUE;        break;    }    tprintf (" \"%s\" ", word->best_choice->string ().string ());    word->reject_map.print (debug_fp);    tprintf ("\n");    if (show_map_detail) {      tprintf ("\"%s\"\n", word->best_choice->string ().string ());      for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {        tprintf ("**** \"%c\" ****\n", word->best_choice->string ()[i]);        word->reject_map[i].full_print (debug_fp);      }    }    tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");    tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");    return TRUE;  }  else  #endif    return FALSE;}/********************************************************************** * set_word_fonts * * Get the fonts for the word. **********************************************************************/void set_word_fonts(                 //good chars in word                    WERD_RES *word,  //word to adapt to //detailed results                    BLOB_CHOICE_LIST_CLIST *blob_choices) {  INT32 index;                   //char index  char choice_char;              //char from word  INT8 config;                   //font of char                                 //character iterator  BLOB_CHOICE_LIST_C_IT char_it = blob_choices;  BLOB_CHOICE_IT choice_it;      //choice iterator  STATS fonts (0, 32);           //font counters  static INT8 italic_table[32] = {    1, -1, 1, -1,    1, -1, 1, -1,    1, -1, 1, -1,    1, -1, 1, -1,    1, -1, 1, -1,    1, -1, 1, -1,    1, -1, 1, -1,    1, -1, 1, -1  };  static INT8 bold_table[32] = {    1, 1, -1, -1,    1, 1, -1, -1,    1, 1, -1, -1,    1, 1, -1, -1,    1, 1, -1, -1,    1, 1, -1, -1,    1, 1, -1, -1,    1, 1, -1, -1  };  static INT8 font_table[32] = {    2, 2, 2, 2,    -1, -1, -1, -1,    0, 0, 0, 0,    1, 1, 1, 1,    3, 3, 3, 3,    4, 4, 4, 4,    5, 5, 5, 5,    2, 2, 2, 2  };  word->italic = 0;  word->bold = 0;  for (char_it.mark_cycle_pt (), index = 0;  !char_it.cycled_list (); char_it.forward (), index++) {    choice_char = word->best_choice->string ()[index];    choice_it.set_to_list (char_it.data ());    for (choice_it.mark_cycle_pt (); !choice_it.cycled_list ();    choice_it.forward ()) {      if (choice_it.data ()->char_class () == choice_char) {        config = choice_it.data ()->config ();        if (tessedit_debug_fonts)          tprintf ("%c(%d=%d%c%c)",            choice_char, config, (config & 31) >> 2,            config & 2 ? 'N' : 'B', config & 1 ? 'N' : 'I');        if (config != -1) {          config &= 31;          word->italic += italic_table[config];          word->bold += bold_table[config];          if (font_table[config] != -1)            fonts.add (font_table[config], 1);        }        break;      }    }  }  find_modal_font (&fonts, &word->font1, &word->font1_count);  find_modal_font (&fonts, &word->font2, &word->font2_count);  if (tessedit_debug_fonts)    tprintf ("\n");  /*	if (word->font1_count>0)    {      for (char_it.mark_cycle_pt(),index=0;      !char_it.cycled_list();char_it.forward(),index++)      {        choice_char=word->best_choice->string()[index];        choice_it.set_to_list(char_it.data());        for (choice_it.mark_cycle_pt();!choice_it.cycled_list();choice_it.forward())        {          if (choice_it.data()->char_class()==choice_char)          {            config=choice_it.data()->config();            if (config!=-1 && font_table[config&31]==word->font1)            {              word->italic+=italic_table[config];              word->bold+=bold_table[config];            }            break;          }        }      }    }*/}/********************************************************************** * font_recognition_pass * * Smooth the fonts for the document. **********************************************************************/void font_recognition_pass(  //good chars in word                           PAGE_RES_IT &page_res_it) {  INT32 length;                  //of word  INT32 count;                   //of a feature  INT8 doc_font;                 //modal font  INT8 doc_font_count;           //modal font  INT32 doc_italic;              //total italics  INT32 doc_bold;                //total bolds  ROW_RES *row = NULL;           //current row  WERD_RES *word;                //current word  STATS fonts (0, 32);           //font counters  STATS doc_fonts (0, 32);       //font counters  doc_italic = 0;  doc_bold = 0;  page_res_it.restart_page ();  while (page_res_it.word () != NULL) {    if (row != page_res_it.row ()) {      if (row != NULL) {        find_modal_font (&fonts, &row->font1, &row->font1_count);        find_modal_font (&fonts, &row->font2, &row->font2_count);      }      row = page_res_it.row ();  //current row      fonts.clear ();            //clear counters      row->italic = 0;      row->bold = 0;    }    word = page_res_it.word ();    row->italic += word->italic;    row->bold += word->bold;    fonts.add (word->font1, word->font1_count);    fonts.add (word->font2, word->font2_count);    doc_italic += word->italic;    doc_bold += word->bold;    doc_fonts.add (word->font1, word->font1_count);    doc_fonts.add (word->font2, word->font2_count);    page_res_it.forward ();  }  if (row != NULL) {    find_modal_font (&fonts, &row->font1, &row->font1_count);    find_modal_font (&fonts, &row->font2, &row->font2_count);  }  find_modal_font(&doc_fonts, &doc_font, &doc_font_count);  /*    row=NULL;    page_res_it.restart_page();    while (page_res_it.word() != NULL)    {      if (row!=page_res_it.row())      {        row2=row;        row=page_res_it.row();        if (row->font1_count<MIN_FONT_ROW_COUNT)        {          fonts.clear();          italic=0;          bold=0;          add_in_one_row(row,&fonts,&italic,&bold);          if (row2!=NULL)          {            hdiff=row->row->x_height()-row2->row->x_height();            if (hdiff<0)              hdiff=-hdiff;            if (hdiff<MAX_XHEIGHT_DIFF)              add_in_one_row(row2,&fonts,&italic,&bold);          }          do            page_res_it.forward();          while (page_res_it.row()==row);          row2=page_res_it.row();          if (row2!=NULL)          {            hdiff=row->row->x_height()-row2->row->x_height();            if (hdiff<0)              hdiff=-hdiff;            if (hdiff<MAX_XHEIGHT_DIFF)              add_in_one_row(row2,&fonts,&italic,&bold);          }          row->italic=italic;          row->bold=bold;          find_modal_font(&fonts,&row->font1,&row->font1_count);          find_modal_font(&fonts,&row->font2,&row->font2_count);        }        else          page_res_it.forward();      }      else        page_res_it.forward();    }*/  page_res_it.restart_page ();  while (page_res_it.word () != NULL) {    row = page_res_it.row ();    //current row    word = page_res_it.word ();    length = word->best_choice->string ().length ();    count = word->italic;    if (count < 0)      count = -count;    if (!(count == length || length > 3 && count >= length * 3 / 4))      word->italic = doc_italic > 0 ? 1 : -1;    count = word->bold;    if (count < 0)      count = -count;    if (!(count == length || length > 3 && count >= length * 3 / 4))      word->bold = doc_bold > 0 ? 1 : -1;    count = word->font1_count;    if (!(count == length || length > 3 && count >= length * 3 / 4)) {      word->font1 = doc_font;      word->font1_count = doc_font_count;    }    page_res_it.forward ();  }}/********************************************************************** * add_in_one_row * * Add into the stats for one row. **********************************************************************/void add_in_one_row(               //good chars in word                    ROW_RES *row,  //current row                    STATS *fonts,  //font stats                    INT8 *italic,  //output count                    INT8 *bold     //output count                   ) {  WERD_RES *word;                //current word  WERD_RES_IT word_it = &row->word_res_list;  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {    word = word_it.data ();    *italic += word->italic;    *bold += word->bold;    if (word->font1_count > 0)      fonts->add (word->font1, word->font1_count);    if (word->font2_count > 0)      fonts->add (word->font2, word->font2_count);  }}/********************************************************************** * find_modal_font * * Find the modal font and remove from the stats. **********************************************************************/void find_modal_font(                  //good chars in word                     STATS *fonts,     //font stats                     INT8 *font_out,   //output font                     INT8 *font_count  //output count                    ) {  INT8 font;                     //font index  INT32 count;                   //pile couat  if (fonts->get_total () > 0) {    font = (INT8) fonts->mode ();    *font_out = font;    count = fonts->pile_count (font);    *font_count = count < MAX_INT8 ? count : MAX_INT8;    fonts->add (font, -*font_count);  }  else {    *font_out = -1;    *font_count = 0;  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -