⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 output.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
  if (write_to_shm)    write_shm_text (word, page_res_it.block ()->block,      page_res_it.row (), *wordstr);  if (tessedit_write_output)    write_cooked_text (word->word, *wordstr, TRUE, FALSE, textfile);  if (tessedit_write_raw_output)    write_cooked_text (word->word, word->raw_choice->string (),      TRUE, FALSE, rawfile);  if (tessedit_write_txt_map)    write_map(txt_mapfile, word);   ep_choice = make_epaper_choice (word, newline_type);  word->ep_choice = ep_choice;  character_count += word->best_choice->string ().length ();  word_count++;}/********************************************************************** * make_epaper_choice * * Construct the epaper text string for a word, using the reject map to * determine whether each blob should be rejected. **********************************************************************/WERD_CHOICE *make_epaper_choice(                   //convert one word                                WERD_RES *word,    //word to do                                char newline_type  //type of newline                               ) {  INT16 index = 0;               //to string  INT16 blobindex;               //to word  INT16 prevright = 0;           //right of previous blob  INT16 nextleft;                //left of next blob  PBLOB *blob;  BOX inset_box;                 //bounding box  PBLOB_IT blob_it;              //blob iterator  char word_string[MAX_PATH];    //converted string  BOOL8 force_total_reject;  char unrecognised = STRING (unrecognised_char)[0];  blob_it.set_to_list (word->outword->blob_list ());  ASSERT_HOST (word->reject_map.length () ==    word->best_choice->string ().length ());  /*  tprintf( "\"%s\" -> length: %d;  blobcount: %d (%d)\n",      word->best_choice->string().string(),        word->best_choice->string().length(),      blob_it.length(),        blob_count( word->outword ) );  */  if (word->best_choice->string ().length () == 0)    force_total_reject = TRUE;  else {    force_total_reject = FALSE;    ASSERT_HOST (blob_it.length () ==      word->best_choice->string ().length ());  }  if (!blob_it.empty ()) {    for (index = 0; index < word->word->space (); index++)      word_string[index] = ' ';  //leading blanks  }  /* Why does this generate leading blanks regardless of whether the  word_choice string is empty, when write_cooked_text ony generates leading  blanks when the string is NOT empty???. */  if (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes) {    strcpy (word_string + index, "|^~R");    index += 4;    word_string[index++] = get_rep_char (word);  }  else {    if (!blob_it.empty ())      prevright = blob_it.data ()->bounding_box ().left ();    //actually first left    for (blobindex = 0, blob_it.mark_cycle_pt ();    !blob_it.cycled_list (); blobindex++, blob_it.forward ()) {      blob = blob_it.data ();      if (word->reject_map[blobindex].accepted ()) {        if (word->best_choice->string ()[blobindex] == ' ')                                 //but not rejected!!          word_string[index++] = unrecognised;        else          word_string[index++] =            word->best_choice->string ()[blobindex];      }      else {                     // start reject        inset_box = blob->bounding_box ();        /* Extend reject box to include rejected neighbours */        while (!blob_it.at_last () &&          (force_total_reject ||        (word->reject_map[blobindex + 1].rejected ()))) {          blobindex++;          blob = blob_it.forward ();                                 //get total box          inset_box += blob->bounding_box ();        }        if (blob_it.at_last ())          nextleft = inset_box.right ();        else          nextleft = blob_it.data_relative (1)->bounding_box ().left ();        //       tprintf("Making reject from (%d,%d)->(%d,%d)\n",        //          inset_box.left(),inset_box.bottom(),        //          inset_box.right(),inset_box.top());        index += make_reject (&inset_box, prevright, nextleft,          &word->denorm, &word_string[index]);      }      prevright = blob->bounding_box ().right ();    }  }  if (newline_type)                                 //end line    word_string[index++] = newline_type;  word_string[index] = '\0';     //terminate string  if (strlen (word_string) != index) {    tprintf ("ASSERT ABOUT TO FAIL: %s, index %d len %d\n",      word_string, index, strlen (word_string));  }                                 //don't pass any zeros  ASSERT_HOST (strlen (word_string) == index);  return new WERD_CHOICE (word_string, 0, 0, NO_PERM);}/********************************************************************** * make_reject * * Add the escape code to the string for the reject. **********************************************************************/INT16make_reject (                    //make reject codeBOX * inset_box,                 //bounding boxINT16 prevright,                 //previous charINT16 nextleft,                  //next charDENORM * denorm,                 //de-normalizerchar word_string[]               //output string) {  INT16 index;                   //to string  INT16 xpos;                    //start of inset  INT16 ypos;  INT16 width;                   //size of inset  INT16 height;  INT16 left_offset;             //shift form prev char  INT16 right_offset;            //shift to next char  INT16 baseline_offset;         //shift from baseline  INT16 inset_index = 0;         //number of inset  INT16 min_chars;               //min width estimate  INT16 max_chars;               //max width estimate  float x_centre;                //centre of box  index = 0;  x_centre = (inset_box->left () + inset_box->right ()) / 2.0;  left_offset =    (INT16) (denorm->x (inset_box->left ()) - denorm->x (prevright));  right_offset =    (INT16) (denorm->x (nextleft) - denorm->x (inset_box->right ()));  xpos = (INT16) floor (denorm->x (inset_box->left ()));  width = (INT16) ceil (denorm->x (inset_box->right ())) - xpos;  ypos = (INT16) floor (denorm->y (inset_box->bottom (), x_centre));  height = (INT16) ceil (denorm->y (inset_box->top (), x_centre)) - ypos;  baseline_offset = ypos - (INT16) denorm->y (bln_baseline_offset, x_centre);                                 //escape code  word_string[index++] = CTRL_INSET;  min_chars = (INT16) ceil (0.27 * width / denorm->row ()->x_height ());  max_chars = (INT16) floor (1.8 * width / denorm->row ()->x_height ());  /*  Ensure min_chars and max_chars are in the range 0..254. This ensures that  we can add 1 to them to avoid putting \0 in a string, and still not exceed  the max value in a byte.  */  if (min_chars < 0)    min_chars = 0;  if (min_chars > 254)    min_chars = 254;  if (max_chars < min_chars)    max_chars = min_chars;  if (max_chars > 254)    max_chars = 254;                                 //min chars  word_string[index++] = min_chars + 1;                                 //max chars  word_string[index++] = max_chars + 1;  word_string[index++] = 2;      //type?                                 //store index  word_string[index++] = inset_index / 255 + 1;  word_string[index++] = inset_index % 255 + 1;  return index;                  //size of string}/********************************************************************** * determine_newline_type * * Find whether we have a wrapping or hard newline. * Return FALSE if not at end of line. **********************************************************************/char determine_newline_type(                   //test line ends                            WERD *word,        //word to do                            BLOCK *block,      //current block                            WERD *next_word,   //next word                            BLOCK *next_block  //block of next word                           ) {  INT16 end_gap;                 //to right edge  INT16 width;                   //of next word  BOX word_box;                  //bounding  BOX next_box;                  //next word  BOX block_box;                 //block bounding  if (!word->flag (W_EOL))    return FALSE;                //not end of line  if (next_word == NULL || next_block == NULL || block != next_block)    return CTRL_NEWLINE;  if (next_word->space () > 0)    return CTRL_HARDLINE;        //it is tabbed  word_box = word->bounding_box ();  next_box = next_word->bounding_box ();  block_box = block->bounding_box ();                                 //gap to eol  end_gap = block_box.right () - word_box.right ();  end_gap -= (INT32) block->space ();  width = next_box.right () - next_box.left ();  //      tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n",  //              block_box.right(),word_box.right(),end_gap,  //              next_box.right(),next_box.left(),width,  //              end_gap>width ? CTRL_HARDLINE : CTRL_NEWLINE);  return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE;}/********************************************************************** * write_cooked_text * * Write the cooked text (with bold for pass2 and underline for reject) * to the given file. **********************************************************************/void write_cooked_text(                     //write output                       WERD *word,          //word to do                       const STRING &text,  //text to write                       BOOL8 acceptable,    //good stuff                       BOOL8 pass2,         //done on pass2                       FILE *fp             //file to write                      ) {  INT16 index;                   //blank counter  int status;  static int newaline = 1;  static int havespace = 0;  char buff[512];  const char *wordstr = text.string ();  int i = 0;  char unrecognised = STRING (unrecognised_char)[0];  static int old_segs = 0;  BOX mybox;  for (i = 0; wordstr[i] != '\0'; i++) {    if (wordstr[i] == ' ')      buff[i] = unrecognised;    else      buff[i] = wordstr[i];  }  buff[i] = '\0';  if (fp == stdout) {    tprintf ("Cooked=%s, %d segs, acceptable=%d",      buff, num_popped - old_segs, acceptable);    old_segs = num_popped;    return;  }  if (text.length () > 0) {    for (index = 0; index < word->space (); index++) {      status = fprintf (fp, " ");      havespace = 1;      if (status < 0)        WRITEFAILED.error ("write_cooked_text", EXIT,          "Space Errno: %d", errno);    }    if (pass2) {      status = fprintf (fp, BOLD_ON);      if (status < 0)        WRITEFAILED.error ("write_cooked_text", EXIT,          "Bold Errno: %d", errno);    }    if (!acceptable) {      status = fprintf (fp, UNDERLINE_ON);      if (status < 0)        WRITEFAILED.error ("write_cooked_text", EXIT,          "Underline Errno: %d", errno);    }                                 //xiaofan    if (NO_BLOCK && word && strlen (buff)) {      mybox = word->bounding_box ();      if (newaline || !havespace) {        fprintf (fp, " ");        newaline = 0;      }      fprintf (fp, "(%d," INT32FORMAT ",%d," INT32FORMAT ")",        XOFFSET + mybox.left (),        YOFFSET + page_image.get_ysize () - mybox.top (),        XOFFSET + mybox.right (),        YOFFSET + page_image.get_ysize () - mybox.bottom ());      havespace = 0;    }    status = fprintf (fp, "%s", buff);    if (status < 0)      WRITEFAILED.error ("write_cooked_text", EXIT,        "Word Errno: %d", errno);    if (pass2) {      status = fprintf (fp, BOLD_OFF);      if (status < 0)        WRITEFAILED.error ("write_cooked_text", EXIT,          "Bold off Errno: %d", errno);    }    if (!acceptable) {      status = fprintf (fp, UNDERLINE_OFF);      if (status < 0)        WRITEFAILED.error ("write_cooked_text", EXIT,          "Underline off Errno: %d", errno);    }  }  if (word->flag (W_EOL)) {    status = fprintf (fp, "\n");    newaline = 1;    if (status < 0)      WRITEFAILED.error ("write_cooked_text", EXIT,        "Newline Errno: %d", errno);  }  status = fflush (fp);  if (status != 0)    WRITEFAILED.error ("write_cooked_text", EXIT, "Fflush Errno: %d", errno);}/********************************************************************** * write_shm_text * * Write the cooked text to the shared memory for the api. **********************************************************************/void write_shm_text(                    //write output                    WERD_RES *word,     //word to do                    BLOCK *block,       //block it is from                    ROW_RES *row,       //row it is from                    const STRING &text  //text to write                   ) {  INT32 index;                   //char counter  INT32 index2;                  //char counter  INT32 length;                  //chars in word  INT32 ptsize;                  //font size  INT8 blanks;                   //blanks in word  UINT8 enhancement;             //bold etc  UINT8 font;                    //font index  char unrecognised = STRING (unrecognised_char)[0];  PBLOB *blob;  BOX blob_box;                  //bounding box  PBLOB_IT blob_it;              //blob iterator  WERD copy_outword;             // copy to denorm  UINT32 rating;                 //of char  BOOL8 lineend;                 //end of line                                 //point size  ptsize = pixels_to_pts ((INT32) (row->row->x_height () + row->row->ascenders () - row->row->descenders ()), 300);  if (word->word->flag (W_BOL) && ocr_char_space () < 128    && ocr_send_text (TRUE) != OKAY)    return;                      //release failed  copy_outword = *(word->outword);  copy_outword.baseline_denormalise (&word->denorm);  blob_it.set_to_list (copy_outword.blob_list ());  length = text.length ();  if (length > 0) {    blanks = word->word->space ();    if (blanks == 0 && tessedit_word_for_word && !word->word->flag (W_BOL))      blanks = 1;    for (index = 0; index < length; index++, blob_it.forward ()) {      blob = blob_it.data ();      blob_box = blob->bounding_box ();      enhancement = 0;      if (word->italic > 0 || word->italic == 0 && row->italic > 0)        enhancement |= EUC_ITALIC;      if (word->bold > 0 || word->bold == 0 && row->bold > 0)        enhancement |= EUC_BOLD;      if (tessedit_write_ratings)        rating = (UINT32) (-word->best_choice->certainty () / 0.035);      else if (tessedit_zero_rejection)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -