⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 output.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
        rating = text[index] == ' ' ? 100 : 0;      else        rating = word->reject_map[index].accepted ()? 0 : 100;      if (rating > 255)        rating = 255;      if (word->font1_count > 2)        font = word->font1;      else if (row->font1_count > 8)        font = row->font1;      else                                 //font index        font = word->word->flag (W_DONT_CHOP) ? 0 : 1;      lineend = word->word->flag (W_EOL) && index == length - 1;      if (word->word->flag (W_EOL) && tessedit_zero_rejection      && index < length - 1 && text[index + 1] == ' ') {        for (index2 = index + 1; index2 < length && text[index2] == ' ';          index2++);        if (index2 == length)          lineend = TRUE;      }      if (!tessedit_zero_rejection || text[index] != ' '      || tessedit_word_for_word) {                                 //confidence        ocr_append_char (text[index] == ' ' ? unrecognised : text[index], blob_box.left (), blob_box.right (), page_image.get_ysize () - 1 - blob_box.top (), page_image.get_ysize () - 1 - blob_box.bottom (), font, (UINT8) rating,          ptsize,                //point size          blanks, enhancement,   //enhancement          OCR_CDIR_LEFT_RIGHT,          OCR_LDIR_DOWN_RIGHT,          lineend ? OCR_NL_NEWLINE : OCR_NL_NONE);        blanks = 0;      }    }  }  else if (tessedit_word_for_word) {    blanks = word->word->space ();    if (blanks == 0 && !word->word->flag (W_BOL))      blanks = 1;    blob_box = word->word->bounding_box ();    enhancement = 0;    if (word->italic > 0)      enhancement |= EUC_ITALIC;    if (word->bold > 0)      enhancement |= EUC_BOLD;    rating = 100;    if (word->font1_count > 2)      font = word->font1;    else if (row->font1_count > 8)      font = row->font1;    else                                 //font index      font = word->word->flag (W_DONT_CHOP) ? 0 : 1;    lineend = word->word->flag (W_EOL);                                 //font index    ocr_append_char (unrecognised, blob_box.left (), blob_box.right (), page_image.get_ysize () - 1 - blob_box.top (), page_image.get_ysize () - 1 - blob_box.bottom (), font,      rating,                    //confidence      ptsize,                    //point size      blanks, enhancement,       //enhancement      OCR_CDIR_LEFT_RIGHT,      OCR_LDIR_DOWN_RIGHT,      lineend ? OCR_NL_NEWLINE : OCR_NL_NONE);  }}/********************************************************************** * write_map * * Write a map file of 0's and 1'a which associates characters from the .txt * file with those in the .etx file. 0 = .txt char was deleted. 1 = .txt char * is kept.  Note that there may be reject regions in the .etx file WITHOUT * .txt chars being rejected.  The map file should be the same length, and * the same number of lines as the .txt file * * The paramaterised input is because I thought I might be able to generate * multiple map files in a single run.  However, it didn't work because * newdiff needs etx files! **********************************************************************/void write_map(                //output a map file               FILE *mapfile,  //mapfile to write to               WERD_RES *word) {  INT16 index;  int status;  STRING mapstr = "";  if (word->best_choice->string ().length () > 0) {    for (index = 0; index < word->word->space (); index++) {      if (word->reject_spaces &&        (suspect_level >= suspect_space_level) &&        !tessedit_minimal_rejection && !tessedit_zero_rejection)        /* Write rejected spaces to .map file ONLY. Newdiff converts these back to        accepted spaces AFTER generating basic space stats but BEFORE using .etx */        status = fprintf (mapfile, "0");      else        status = fprintf (mapfile, "1");      if (status < 0)        WRITEFAILED.error ("write_map", EXIT, "Space Errno: %d", errno);    }    if ((word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes)) {      for (index = 0; index < 5; index++)        mapstr += '1';    }    else {      ASSERT_HOST (word->reject_map.length () ==        word->best_choice->string ().length ());      for (index = 0; index < word->reject_map.length (); index++) {        if (word->reject_map[index].accepted ())          mapstr += '1';        else          mapstr += '0';      }    }    status = fprintf (mapfile, "%s", mapstr.string ());    if (status < 0)      WRITEFAILED.error ("write_map", EXIT, "Map str Errno: %d", errno);  }  if (word->word->flag (W_EOL)) {    status = fprintf (mapfile, "\n");    if (status < 0)      WRITEFAILED.error ("write_map", EXIT, "Newline Errno: %d", errno);  }  status = fflush (mapfile);  if (status != 0)    WRITEFAILED.error ("write_map", EXIT, "fflush Errno: %d", errno);}/************************************************************************* * open_file() *************************************************************************/FILE *open_outfile(  //open .map & .unlv file                   const char *extension) {  STRING file_name;  FILE *outfile;  file_name = imagebasename + extension;  if (!(outfile = fopen (file_name.string (), "w"))) {    CANTOPENFILE.error ("open_outfile", EXIT, "%s %d",      file_name.string (), errno);  }  return outfile;}void write_unlv_text(WERD_RES *word) {   const char *wordstr;  char buff[512];                //string to output  int i = 0;  int j = 0;  char unrecognised = STRING (unrecognised_char)[0];  int status;  char space_str[3];  wordstr = word->best_choice->string ().string ();  /* DONT need to do anything special for repeated char words - at this stage  the repetition char has been identified and any other chars have been  rejected.  */  for (; wordstr[i] != '\0'; i++) {    if ((wordstr[i] == ' ') ||      (wordstr[i] == '~') || (wordstr[i] == '^') || (wordstr[i] == '|'))      buff[j++] = unrecognised;    else {      if (word->reject_map[i].rejected ())        buff[j++] = '^';         //Add suspect marker      buff[j++] = wordstr[i];    }  }  buff[j] = '\0';  if (strlen (wordstr) > 0) {    if (word->reject_spaces &&      (suspect_level >= suspect_space_level) &&      !tessedit_minimal_rejection && !tessedit_zero_rejection)      strcpy (space_str, "^ ");  //Suspect space    else      strcpy (space_str, " ");   //Certain space    for (i = 0; i < word->word->space (); i++) {      status = fprintf (unlv_file, "%s", space_str);      if (status < 0)        WRITEFAILED.error ("write_unlv_text", EXIT,          "Space Errno: %d", errno);    }    status = fprintf (unlv_file, "%s", buff);    if (status < 0)      WRITEFAILED.error ("write_unlv_text", EXIT, "Word Errno: %d", errno);  }  if (word->word->flag (W_EOL)) {    status = fprintf (unlv_file, "\n");    if (status < 0)      WRITEFAILED.error ("write_unlv_text", EXIT,        "Newline Errno: %d", errno);  }  status = fflush (unlv_file);  if (status != 0)    WRITEFAILED.error ("write_unlv_text", EXIT, "Fflush Errno: %d", errno);}/************************************************************************* * get_rep_char() * Return the first accepted character from the repetition string. This is the * character which is repeated - as determined earlier by fix_rep_char() *************************************************************************/char get_rep_char(  // what char is repeated?                  WERD_RES *word) {  int i;  for (i = 0;    ((i < word->reject_map.length ()) &&    (word->reject_map[i].rejected ())); i++);  if (i < word->reject_map.length ())    return word->best_choice->string ()[i];  else    return STRING (unrecognised_char)[0];}void ensure_rep_chars_are_consistent(WERD_RES *word) {   char rep_char = get_rep_char (word);  char *ptr;  ptr = (char *) word->best_choice->string ().string ();  for (; *ptr != '\0'; ptr++) {    if (*ptr != rep_char)      *ptr = rep_char;  }}/************************************************************************* * SUSPECT LEVELS * * 0 - dont reject ANYTHING * 1,2 - partial rejection * 3 - BEST * * NOTE: to reject JUST tess failures in the .map file set suspect_level 3 and * tessedit_minimal_rejection. *************************************************************************/void set_unlv_suspects(WERD_RES *word) {   int len = word->reject_map.length ();  int i;  const char *ptr;  float rating_per_ch;  ptr = word->best_choice->string ().string ();  if (suspect_level == 0) {    for (i = 0; i < len; i++) {      if (word->reject_map[i].rejected ())        word->reject_map[i].setrej_minimal_rej_accept ();    }    return;  }  if (suspect_level >= 3)    return;                      //Use defaults  /* NOW FOR LEVELS 1 and 2 Find some stuff to unreject*/  if (safe_dict_word (ptr) && (count_alphas (ptr) > suspect_short_words)) {    /* Unreject alphas in dictionary words */    for (i = 0; i < len; i++) {      if (word->reject_map[i].rejected () && isalpha (ptr[i]))        word->reject_map[i].setrej_minimal_rej_accept ();    }  }  rating_per_ch = word->best_choice->rating () / word->reject_map.length ();  if (rating_per_ch >= suspect_rating_per_ch)    return;                      //Dont touch bad ratings  if ((word->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {    /* Unreject any Tess Acceptable word - but NOT tess reject chs*/    for (i = 0; i < len; i++) {      if (word->reject_map[i].rejected () && (ptr[i] != ' '))        word->reject_map[i].setrej_minimal_rej_accept ();    }  }  for (i = 0; i < len; i++) {    if (word->reject_map[i].rejected ()) {      if (word->reject_map[i].flag (R_DOC_REJ))        word->reject_map[i].setrej_minimal_rej_accept ();      if (word->reject_map[i].flag (R_BLOCK_REJ))        word->reject_map[i].setrej_minimal_rej_accept ();      if (word->reject_map[i].flag (R_ROW_REJ))        word->reject_map[i].setrej_minimal_rej_accept ();    }  }  if (suspect_level == 2)    return;  if (!suspect_constrain_1Il ||  (word->reject_map.length () <= suspect_short_words)) {    for (i = 0; i < len; i++) {      if (word->reject_map[i].rejected ()) {        if ((word->reject_map[i].flag (R_1IL_CONFLICT) ||          word->reject_map[i].flag (R_POSTNN_1IL)))          word->reject_map[i].setrej_minimal_rej_accept ();        if (!suspect_constrain_1Il &&          word->reject_map[i].flag (R_MM_REJECT))          word->reject_map[i].setrej_minimal_rej_accept ();      }    }  }  if ((acceptable_word_string (word->best_choice->string ().string ())    != AC_UNACCEPTABLE) ||  acceptable_number_string (word->best_choice->string ().string ())) {    if (word->reject_map.length () > suspect_short_words) {      for (i = 0; i < len; i++) {        if (word->reject_map[i].rejected () &&          (!word->reject_map[i].perm_rejected () ||          word->reject_map[i].flag (R_1IL_CONFLICT) ||          word->reject_map[i].flag (R_POSTNN_1IL) ||        word->reject_map[i].flag (R_MM_REJECT))) {          word->reject_map[i].setrej_minimal_rej_accept ();        }      }    }  }}INT16 count_alphas(  //how many alphas                   const char *s) {  int count = 0;  for (; *s != '\0'; s++) {    if (isalpha (*s))      count++;  }  return count;}INT16 count_alphanums(  //how many alphanums                      const char *s) {  int count = 0;  for (; *s != '\0'; s++) {    if (isalnum (*s))      count++;  }  return count;}BOOL8 acceptable_number_string(const char *s) {   BOOL8 prev_digit = FALSE;  if (*s == '(')    s++;  if ((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-'))    s++;  for (; *s != '\0'; s++) {    if (isdigit (*s))      prev_digit = TRUE;    else if (prev_digit && ((*s == '.') || (*s == ',') || (*s == '-')))      prev_digit = FALSE;    else if (prev_digit &&      (*(s + 1) == '\0') && ((*s == '%') || (*s == ')')))      return TRUE;    else if (prev_digit &&      (*s == '%') && (*(s + 1) == ')') && (*(s + 2) == '\0'))      return TRUE;    else      return FALSE;  }  return TRUE;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -