📄 output.cpp
字号:
if (write_to_shm) write_shm_text (word, page_res_it.block ()->block, page_res_it.row (), *wordstr); if (tessedit_write_output) write_cooked_text (word->word, *wordstr, TRUE, FALSE, textfile); if (tessedit_write_raw_output) write_cooked_text (word->word, word->raw_choice->string (), TRUE, FALSE, rawfile); if (tessedit_write_txt_map) write_map(txt_mapfile, word); ep_choice = make_epaper_choice (word, newline_type); word->ep_choice = ep_choice; character_count += word->best_choice->string ().length (); word_count++;}/********************************************************************** * make_epaper_choice * * Construct the epaper text string for a word, using the reject map to * determine whether each blob should be rejected. **********************************************************************/WERD_CHOICE *make_epaper_choice( //convert one word WERD_RES *word, //word to do char newline_type //type of newline ) { INT16 index = 0; //to string INT16 blobindex; //to word INT16 prevright = 0; //right of previous blob INT16 nextleft; //left of next blob PBLOB *blob; BOX inset_box; //bounding box PBLOB_IT blob_it; //blob iterator char word_string[MAX_PATH]; //converted string BOOL8 force_total_reject; char unrecognised = STRING (unrecognised_char)[0]; blob_it.set_to_list (word->outword->blob_list ()); ASSERT_HOST (word->reject_map.length () == word->best_choice->string ().length ()); /* tprintf( "\"%s\" -> length: %d; blobcount: %d (%d)\n", word->best_choice->string().string(), word->best_choice->string().length(), blob_it.length(), blob_count( word->outword ) ); */ if (word->best_choice->string ().length () == 0) force_total_reject = TRUE; else { force_total_reject = FALSE; ASSERT_HOST (blob_it.length () == word->best_choice->string ().length ()); } if (!blob_it.empty ()) { for (index = 0; index < word->word->space (); index++) word_string[index] = ' '; //leading blanks } /* Why does this generate leading blanks regardless of whether the word_choice string is empty, when write_cooked_text ony generates leading blanks when the string is NOT empty???. */ if (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes) { strcpy (word_string + index, "|^~R"); index += 4; word_string[index++] = get_rep_char (word); } else { if (!blob_it.empty ()) prevright = blob_it.data ()->bounding_box ().left (); //actually first left for (blobindex = 0, blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blobindex++, blob_it.forward ()) { blob = blob_it.data (); if (word->reject_map[blobindex].accepted ()) { if (word->best_choice->string ()[blobindex] == ' ') //but not rejected!! word_string[index++] = unrecognised; else word_string[index++] = word->best_choice->string ()[blobindex]; } else { // start reject inset_box = blob->bounding_box (); /* Extend reject box to include rejected neighbours */ while (!blob_it.at_last () && (force_total_reject || (word->reject_map[blobindex + 1].rejected ()))) { blobindex++; blob = blob_it.forward (); //get total box inset_box += blob->bounding_box (); } if (blob_it.at_last ()) nextleft = inset_box.right (); else nextleft = blob_it.data_relative (1)->bounding_box ().left (); // tprintf("Making reject from (%d,%d)->(%d,%d)\n", // inset_box.left(),inset_box.bottom(), // inset_box.right(),inset_box.top()); index += make_reject (&inset_box, prevright, nextleft, &word->denorm, &word_string[index]); } prevright = blob->bounding_box ().right (); } } if (newline_type) //end line word_string[index++] = newline_type; word_string[index] = '\0'; //terminate string if (strlen (word_string) != index) { tprintf ("ASSERT ABOUT TO FAIL: %s, index %d len %d\n", word_string, index, strlen (word_string)); } //don't pass any zeros ASSERT_HOST (strlen (word_string) == index); return new WERD_CHOICE (word_string, 0, 0, NO_PERM);}/********************************************************************** * make_reject * * Add the escape code to the string for the reject. **********************************************************************/INT16make_reject ( //make reject codeBOX * inset_box, //bounding boxINT16 prevright, //previous charINT16 nextleft, //next charDENORM * denorm, //de-normalizerchar word_string[] //output string) { INT16 index; //to string INT16 xpos; //start of inset INT16 ypos; INT16 width; //size of inset INT16 height; INT16 left_offset; //shift form prev char INT16 right_offset; //shift to next char INT16 baseline_offset; //shift from baseline INT16 inset_index = 0; //number of inset INT16 min_chars; //min width estimate INT16 max_chars; //max width estimate float x_centre; //centre of box index = 0; x_centre = (inset_box->left () + inset_box->right ()) / 2.0; left_offset = (INT16) (denorm->x (inset_box->left ()) - denorm->x (prevright)); right_offset = (INT16) (denorm->x (nextleft) - denorm->x (inset_box->right ())); xpos = (INT16) floor (denorm->x (inset_box->left ())); width = (INT16) ceil (denorm->x (inset_box->right ())) - xpos; ypos = (INT16) floor (denorm->y (inset_box->bottom (), x_centre)); height = (INT16) ceil (denorm->y (inset_box->top (), x_centre)) - ypos; baseline_offset = ypos - (INT16) denorm->y (bln_baseline_offset, x_centre); //escape code word_string[index++] = CTRL_INSET; min_chars = (INT16) ceil (0.27 * width / denorm->row ()->x_height ()); max_chars = (INT16) floor (1.8 * width / denorm->row ()->x_height ()); /* Ensure min_chars and max_chars are in the range 0..254. This ensures that we can add 1 to them to avoid putting \0 in a string, and still not exceed the max value in a byte. */ if (min_chars < 0) min_chars = 0; if (min_chars > 254) min_chars = 254; if (max_chars < min_chars) max_chars = min_chars; if (max_chars > 254) max_chars = 254; //min chars word_string[index++] = min_chars + 1; //max chars word_string[index++] = max_chars + 1; word_string[index++] = 2; //type? //store index word_string[index++] = inset_index / 255 + 1; word_string[index++] = inset_index % 255 + 1; return index; //size of string}/********************************************************************** * determine_newline_type * * Find whether we have a wrapping or hard newline. * Return FALSE if not at end of line. **********************************************************************/char determine_newline_type( //test line ends WERD *word, //word to do BLOCK *block, //current block WERD *next_word, //next word BLOCK *next_block //block of next word ) { INT16 end_gap; //to right edge INT16 width; //of next word BOX word_box; //bounding BOX next_box; //next word BOX block_box; //block bounding if (!word->flag (W_EOL)) return FALSE; //not end of line if (next_word == NULL || next_block == NULL || block != next_block) return CTRL_NEWLINE; if (next_word->space () > 0) return CTRL_HARDLINE; //it is tabbed word_box = word->bounding_box (); next_box = next_word->bounding_box (); block_box = block->bounding_box (); //gap to eol end_gap = block_box.right () - word_box.right (); end_gap -= (INT32) block->space (); width = next_box.right () - next_box.left (); // tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n", // block_box.right(),word_box.right(),end_gap, // next_box.right(),next_box.left(),width, // end_gap>width ? CTRL_HARDLINE : CTRL_NEWLINE); return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE;}/********************************************************************** * write_cooked_text * * Write the cooked text (with bold for pass2 and underline for reject) * to the given file. **********************************************************************/void write_cooked_text( //write output WERD *word, //word to do const STRING &text, //text to write BOOL8 acceptable, //good stuff BOOL8 pass2, //done on pass2 FILE *fp //file to write ) { INT16 index; //blank counter int status; static int newaline = 1; static int havespace = 0; char buff[512]; const char *wordstr = text.string (); int i = 0; char unrecognised = STRING (unrecognised_char)[0]; static int old_segs = 0; BOX mybox; for (i = 0; wordstr[i] != '\0'; i++) { if (wordstr[i] == ' ') buff[i] = unrecognised; else buff[i] = wordstr[i]; } buff[i] = '\0'; if (fp == stdout) { tprintf ("Cooked=%s, %d segs, acceptable=%d", buff, num_popped - old_segs, acceptable); old_segs = num_popped; return; } if (text.length () > 0) { for (index = 0; index < word->space (); index++) { status = fprintf (fp, " "); havespace = 1; if (status < 0) WRITEFAILED.error ("write_cooked_text", EXIT, "Space Errno: %d", errno); } if (pass2) { status = fprintf (fp, BOLD_ON); if (status < 0) WRITEFAILED.error ("write_cooked_text", EXIT, "Bold Errno: %d", errno); } if (!acceptable) { status = fprintf (fp, UNDERLINE_ON); if (status < 0) WRITEFAILED.error ("write_cooked_text", EXIT, "Underline Errno: %d", errno); } //xiaofan if (NO_BLOCK && word && strlen (buff)) { mybox = word->bounding_box (); if (newaline || !havespace) { fprintf (fp, " "); newaline = 0; } fprintf (fp, "(%d," INT32FORMAT ",%d," INT32FORMAT ")", XOFFSET + mybox.left (), YOFFSET + page_image.get_ysize () - mybox.top (), XOFFSET + mybox.right (), YOFFSET + page_image.get_ysize () - mybox.bottom ()); havespace = 0; } status = fprintf (fp, "%s", buff); if (status < 0) WRITEFAILED.error ("write_cooked_text", EXIT, "Word Errno: %d", errno); if (pass2) { status = fprintf (fp, BOLD_OFF); if (status < 0) WRITEFAILED.error ("write_cooked_text", EXIT, "Bold off Errno: %d", errno); } if (!acceptable) { status = fprintf (fp, UNDERLINE_OFF); if (status < 0) WRITEFAILED.error ("write_cooked_text", EXIT, "Underline off Errno: %d", errno); } } if (word->flag (W_EOL)) { status = fprintf (fp, "\n"); newaline = 1; if (status < 0) WRITEFAILED.error ("write_cooked_text", EXIT, "Newline Errno: %d", errno); } status = fflush (fp); if (status != 0) WRITEFAILED.error ("write_cooked_text", EXIT, "Fflush Errno: %d", errno);}/********************************************************************** * write_shm_text * * Write the cooked text to the shared memory for the api. **********************************************************************/void write_shm_text( //write output WERD_RES *word, //word to do BLOCK *block, //block it is from ROW_RES *row, //row it is from const STRING &text //text to write ) { INT32 index; //char counter INT32 index2; //char counter INT32 length; //chars in word INT32 ptsize; //font size INT8 blanks; //blanks in word UINT8 enhancement; //bold etc UINT8 font; //font index char unrecognised = STRING (unrecognised_char)[0]; PBLOB *blob; BOX blob_box; //bounding box PBLOB_IT blob_it; //blob iterator WERD copy_outword; // copy to denorm UINT32 rating; //of char BOOL8 lineend; //end of line //point size ptsize = pixels_to_pts ((INT32) (row->row->x_height () + row->row->ascenders () - row->row->descenders ()), 300); if (word->word->flag (W_BOL) && ocr_char_space () < 128 && ocr_send_text (TRUE) != OKAY) return; //release failed copy_outword = *(word->outword); copy_outword.baseline_denormalise (&word->denorm); blob_it.set_to_list (copy_outword.blob_list ()); length = text.length (); if (length > 0) { blanks = word->word->space (); if (blanks == 0 && tessedit_word_for_word && !word->word->flag (W_BOL)) blanks = 1; for (index = 0; index < length; index++, blob_it.forward ()) { blob = blob_it.data (); blob_box = blob->bounding_box (); enhancement = 0; if (word->italic > 0 || word->italic == 0 && row->italic > 0) enhancement |= EUC_ITALIC; if (word->bold > 0 || word->bold == 0 && row->bold > 0) enhancement |= EUC_BOLD; if (tessedit_write_ratings) rating = (UINT32) (-word->best_choice->certainty () / 0.035); else if (tessedit_zero_rejection)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -