output.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,274 行 · 第 1/3 页

CPP
1,274
字号
      TRUE, FALSE, rawfile);  if (tessedit_write_txt_map)    write_map(txt_mapfile, word);  ep_choice = make_epaper_choice (word, newline_type);  word->ep_choice = ep_choice;#endif  character_count += word->best_choice->lengths ().length ();  word_count++;}/********************************************************************** * make_epaper_choice * * Construct the epaper text string for a word, using the reject map to * determine whether each blob should be rejected. **********************************************************************/#if 0WERD_CHOICE *make_epaper_choice(                   //convert one word                                WERD_RES *word,    //word to do                                char newline_type  //type of newline                               ) {  inT16 index = 0;               //to string  inT16 blobindex;               //to word  inT16 prevright = 0;           //right of previous blob  inT16 nextleft;                //left of next blob  PBLOB *blob;  TBOX inset_box;                 //bounding box  PBLOB_IT blob_it;              //blob iterator  char word_string[MAX_PATH];    //converted string  BOOL8 force_total_reject;  char unrecognised = STRING (unrecognised_char)[0];  blob_it.set_to_list (word->outword->blob_list ());  ASSERT_HOST (word->reject_map.length () ==    word->best_choice->string ().length ());  /*  tprintf( "\"%s\" -> length: %d;  blobcount: %d (%d)\n",      word->best_choice->string().string(),        word->best_choice->string().length(),      blob_it.length(),        blob_count( word->outword ) );  */  if (word->best_choice->string ().length () == 0)    force_total_reject = TRUE;  else {    force_total_reject = FALSE;    ASSERT_HOST (blob_it.length () ==      word->best_choice->string ().length ());  }  if (!blob_it.empty ()) {    for (index = 0; index < word->word->space (); index++)      word_string[index] = ' ';  //leading blanks  }  /* Why does this generate leading blanks regardless of whether the  word_choice string is empty, when write_cooked_text ony generates leading  blanks when the string is NOT empty???. */  if (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes) {    strcpy (word_string + index, "|^~R");    index += 4;    strcpy(word_string + index, unicharset.id_to_unichar(get_rep_char (word)));    index += strlen(unicharset.id_to_unichar(get_rep_char (word)));  }  else {    if (!blob_it.empty ())      prevright = blob_it.data ()->bounding_box ().left ();    //actually first left    for (blobindex = 0, blob_it.mark_cycle_pt ();    !blob_it.cycled_list (); blobindex++, blob_it.forward ()) {      blob = blob_it.data ();      if (word->reject_map[blobindex].accepted ()) {        if (word->best_choice->string ()[blobindex] == ' ')                                 //but not rejected!!          word_string[index++] = unrecognised;        else          word_string[index++] =            word->best_choice->string ()[blobindex];      }      else {                     // start reject        inset_box = blob->bounding_box ();        /* Extend reject box to include rejected neighbours */        while (!blob_it.at_last () &&          (force_total_reject ||        (word->reject_map[blobindex + 1].rejected ()))) {          blobindex++;          blob = blob_it.forward ();                                 //get total box          inset_box += blob->bounding_box ();        }        if (blob_it.at_last ())          nextleft = inset_box.right ();        else          nextleft = blob_it.data_relative (1)->bounding_box ().left ();        //       tprintf("Making reject from (%d,%d)->(%d,%d)\n",        //          inset_box.left(),inset_box.bottom(),        //          inset_box.right(),inset_box.top());        index += make_reject (&inset_box, prevright, nextleft,          &word->denorm, &word_string[index]);      }      prevright = blob->bounding_box ().right ();    }  }  if (newline_type)                                 //end line    word_string[index++] = newline_type;  word_string[index] = '\0';     //terminate string  if (strlen (word_string) != index) {    tprintf ("ASSERT ABOUT TO FAIL: %s, index %d len %d\n",      word_string, index, strlen (word_string));  }                                 //don't pass any zeros  ASSERT_HOST (strlen (word_string) == index);  return new WERD_CHOICE (word_string, 0, 0, NO_PERM);}#endif/********************************************************************** * make_reject * * Add the escape code to the string for the reject. **********************************************************************/inT16make_reject (                    //make reject codeTBOX * inset_box,                 //bounding boxinT16 prevright,                 //previous charinT16 nextleft,                  //next charDENORM * denorm,                 //de-normalizerchar word_string[]               //output string) {  inT16 index;                   //to string  inT16 xpos;                    //start of inset  inT16 ypos;  inT16 width;                   //size of inset  inT16 height;  inT16 left_offset;             //shift form prev char  inT16 right_offset;            //shift to next char  inT16 baseline_offset;         //shift from baseline  inT16 inset_index = 0;         //number of inset  inT16 min_chars;               //min width estimate  inT16 max_chars;               //max width estimate  float x_centre;                //centre of box  index = 0;  x_centre = (inset_box->left () + inset_box->right ()) / 2.0;  left_offset =    (inT16) (denorm->x (inset_box->left ()) - denorm->x (prevright));  right_offset =    (inT16) (denorm->x (nextleft) - denorm->x (inset_box->right ()));  xpos = (inT16) floor (denorm->x (inset_box->left ()));  width = (inT16) ceil (denorm->x (inset_box->right ())) - xpos;  ypos = (inT16) floor (denorm->y (inset_box->bottom (), x_centre));  height = (inT16) ceil (denorm->y (inset_box->top (), x_centre)) - ypos;  baseline_offset = ypos - (inT16) denorm->y (bln_baseline_offset, x_centre);                                 //escape code  word_string[index++] = CTRL_INSET;  min_chars = (inT16) ceil (0.27 * width / denorm->row ()->x_height ());  max_chars = (inT16) floor (1.8 * width / denorm->row ()->x_height ());  /*  Ensure min_chars and max_chars are in the range 0..254. This ensures that  we can add 1 to them to avoid putting \0 in a string, and still not exceed  the max value in a byte.  */  if (min_chars < 0)    min_chars = 0;  if (min_chars > 254)    min_chars = 254;  if (max_chars < min_chars)    max_chars = min_chars;  if (max_chars > 254)    max_chars = 254;                                 //min chars  word_string[index++] = min_chars + 1;                                 //max chars  word_string[index++] = max_chars + 1;  word_string[index++] = 2;      //type?                                 //store index  word_string[index++] = inset_index / 255 + 1;  word_string[index++] = inset_index % 255 + 1;  return index;                  //size of string}/********************************************************************** * determine_newline_type * * Find whether we have a wrapping or hard newline. * Return FALSE if not at end of line. **********************************************************************/char determine_newline_type(                   //test line ends                            WERD *word,        //word to do                            BLOCK *block,      //current block                            WERD *next_word,   //next word                            BLOCK *next_block  //block of next word                           ) {  inT16 end_gap;                 //to right edge  inT16 width;                   //of next word  TBOX word_box;                  //bounding  TBOX next_box;                  //next word  TBOX block_box;                 //block bounding  if (!word->flag (W_EOL))    return FALSE;                //not end of line  if (next_word == NULL || next_block == NULL || block != next_block)    return CTRL_NEWLINE;  if (next_word->space () > 0)    return CTRL_HARDLINE;        //it is tabbed  word_box = word->bounding_box ();  next_box = next_word->bounding_box ();  block_box = block->bounding_box ();                                 //gap to eol  end_gap = block_box.right () - word_box.right ();  end_gap -= (inT32) block->space ();  width = next_box.right () - next_box.left ();  //      tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n",  //              block_box.right(),word_box.right(),end_gap,  //              next_box.right(),next_box.left(),width,  //              end_gap>width ? CTRL_HARDLINE : CTRL_NEWLINE);  return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE;}/********************************************************************** * write_cooked_text * * Write the cooked text (with bold for pass2 and underline for reject) * to the given file. **********************************************************************/#if 0void write_cooked_text(                     //write output                       WERD *word,          //word to do                       const STRING &text,  //text to write                       BOOL8 acceptable,    //good stuff                       BOOL8 pass2,         //done on pass2                       FILE *fp             //file to write                      ) {  inT16 index;                   //blank counter  int status;  static int newaline = 1;  static int havespace = 0;  char buff[512];  const char *wordstr = text.string ();  int i = 0;  char unrecognised = STRING (unrecognised_char)[0];  static int old_segs = 0;  TBOX mybox;  for (i = 0; wordstr[i] != '\0'; i++) {    if (wordstr[i] == ' ')      buff[i] = unrecognised;    else      buff[i] = wordstr[i];  }  buff[i] = '\0';  if (fp == stdout) {    tprintf ("Cooked=%s, %d segs, acceptable=%d",      buff, num_popped - old_segs, acceptable);    old_segs = num_popped;    return;  }  if (text.length () > 0) {    for (index = 0; index < word->space (); index++) {      status = fprintf (fp, " ");      havespace = 1;      if (status < 0)        WRITEFAILED.error ("write_cooked_text", EXIT,          "Space Errno: %d", errno);    }    if (pass2) {      status = fprintf (fp, BOLD_ON);      if (status < 0)        WRITEFAILED.error ("write_cooked_text", EXIT,          "Bold Errno: %d", errno);    }    if (!acceptable) {      status = fprintf (fp, UNDERLINE_ON);      if (status < 0)        WRITEFAILED.error ("write_cooked_text", EXIT,          "Underline Errno: %d", errno);    }                                 //xiaofan    if (NO_BLOCK && word && strlen (buff)) {      mybox = word->bounding_box ();      if (newaline || !havespace) {        fprintf (fp, " ");        newaline = 0;      }      fprintf (fp, "(%d," INT32FORMAT ",%d," INT32FORMAT ")",        XOFFSET + mybox.left (),        YOFFSET + page_image.get_ysize () - mybox.top (),        XOFFSET + mybox.right (),        YOFFSET + page_image.get_ysize () - mybox.bottom ());      havespace = 0;    }    status = fprintf (fp, "%s", buff);    if (status < 0)      WRITEFAILED.error ("write_cooked_text", EXIT,        "Word Errno: %d", errno);    if (pass2) {      status = fprintf (fp, BOLD_OFF);      if (status < 0)        WRITEFAILED.error ("write_cooked_text", EXIT,          "Bold off Errno: %d", errno);    }    if (!acceptable) {      status = fprintf (fp, UNDERLINE_OFF);      if (status < 0)        WRITEFAILED.error ("write_cooked_text", EXIT,          "Underline off Errno: %d", errno);    }  }  if (word->flag (W_EOL)) {    status = fprintf (fp, "\n");    newaline = 1;    if (status < 0)      WRITEFAILED.error ("write_cooked_text", EXIT,        "Newline Errno: %d", errno);  }  status = fflush (fp);  if (status != 0)    WRITEFAILED.error ("write_cooked_text", EXIT, "Fflush Errno: %d", errno);}#endif/********************************************************************** * write_shm_text * * Write the cooked text to the shared memory for the api. **********************************************************************/void write_shm_text(                    //write output                    WERD_RES *word,     //word to do                    BLOCK *block,       //block it is from                    ROW_RES *row,       //row it is from                    const STRING &text, //text to write                    const STRING &text_lengths                   ) {  inT32 index;                   //char counter  inT32 index2;                  //char counter  inT32 length;                  //chars in word  inT32 ptsize;                  //font size  inT8 blanks;                   //blanks in word  uinT8 enhancement;             //bold etc  uinT8 font;                    //font index  char unrecognised = STRING (unrecognised_char)[0];  PBLOB *blob;  TBOX blob_box;                  //bounding box  PBLOB_IT blob_it;              //blob iterator  WERD copy_outword;             // copy to denorm  uinT32 rating;                 //of char  BOOL8 lineend;                 //end of line  int offset;  int offset2;                                 //point size  ptsize = pixels_to_pts ((inT32) (row->row->x_height () + row->row->ascenders () - row->row->descenders ()), 300);  if (word->word->flag (W_BOL) && ocr_char_space () < 128    && ocr_send_text (TRUE) != OKAY)    return;                      //release failed  copy_outword = *(word->outword);  copy_outword.baseline_denormalise (&word->denorm);  blob_it.set_to_list (copy_outword.blob_list ());  length = text_lengths.length ();  if (length > 0) {    blanks = word->word->space ();    if (blanks == 0 && tessedit_word_for_word && !word->word->flag (W_BOL))      blanks = 1;    for (index = 0, offset = 0; index < length;         offset += text_lengths[index++], blob_it.forward ()) {      blob = blob_it.data ();      blob_box = blob->bounding_box ();      enhancement = 0;      if (word->italic > 0 || (word->italic == 0 && row->italic > 0))        enhancement |= EUC_ITALIC;      if (word->bold > 0 || (word->bold == 0 && row->bold > 0))        enhancement |= EUC_BOLD;      if (tessedit_write_ratings)        rating = (uinT32) (-word->best_choice->certainty () / 0.035);      else if (tessedit_zero_rejection)        rating = text[offset] == ' ' ? 100 : 0;      else        rating = word->reject_map[index].accepted ()? 0 : 100;      if (rating > 255)        rating = 255;      if (word->font1_count > 2)        font = word->font1;      else if (row->font1_count > 8)        font = row->font1;      else                                 //font index        font = word->word->flag (W_DONT_CHOP) ? 0 : 1;      lineend = word->word->flag (W_EOL) && index == length - 1;      if (word->word->flag (W_EOL) && tessedit_zero_rejection      && index < length - 1 && text[index + text_lengths[index]] == ' ') {        for (index2 = index + 1, offset2 = offset + text_lengths[index];             index2 < length && text[offset2] == ' ';             offset2 += text_lengths[index2++]);        if (index2 == length)          lineend = TRUE;      }      if (!tessedit_zero_rejection || text[offset] != ' '      || tessedit_word_for_word) {                                 //confidence        if (text[offset] == ' ') {        ocr_append_char (unrecognised,                         blob_box.left (), blob_box.right (),                         page_image.get_ysize () - 1 - blob_box.top (),

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?