baseapi.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,123 行 · 第 1/3 页
CPP
1,123 行
  for (int t = 0; t < 255; ++t) {    omega_0 += histogram[t];    mu_t += t * static_cast<double>(histogram[t]);    if (omega_0 == 0)      continue;    omega_1 = H - omega_0;    mu_0 = mu_t / omega_0;    mu_1 = (mu_T - mu_t) / omega_1;    double sig_sq_B = mu_1 - mu_0;    sig_sq_B *= sig_sq_B * omega_0 * omega_1;    if (best_t < 0 || sig_sq_B > best_sig_sq_B) {      best_sig_sq_B = sig_sq_B;      best_t = t;      best_omega_0 = omega_0;    }  }  if (H_out != NULL) *H_out = H;  if (omega0_out != NULL) *omega0_out = best_omega_0;  return best_t;}// Threshold the given grey or color image into the tesseract global// image ready for recognition. Requires thresholds and hi_value// produced by OtsuThreshold above.void TessBaseAPI::ThresholdRect(const unsigned char* imagedata,                                int bytes_per_pixel,                                int bytes_per_line,                                int left, int top,                                int width, int height,                                const int* thresholds,                                const int* hi_values) {  IMAGELINE line;  page_image.create(width, height, 1);  line.init(width);  // For each line in the image, fill the IMAGELINE class and put it into the  // Tesseract global page_image. Note that Tesseract stores images with the  // bottom at y=0 and 0 is black, so we need 2 kinds of inversion.  const unsigned char* data = imagedata + top*bytes_per_line +                              left*bytes_per_pixel;  for (int y = height - 1 ; y >= 0; --y) {    const unsigned char* pix = data;    for (int x = 0; x < width; ++x, pix += bytes_per_pixel) {      line.pixels[x] = 1;      for (int ch = 0; ch < bytes_per_pixel; ++ch) {        if (hi_values[ch] >= 0 &&            (pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {          line.pixels[x] = 0;          break;        }      }    }    page_image.put_line(0, y, width, &line, 0);    data += bytes_per_line;  }}// Cut out the requested rectangle of the binary image to the// tesseract global image ready for recognition.void TessBaseAPI::CopyBinaryRect(const unsigned char* imagedata,                                 int bytes_per_line,                                 int left, int top,                                 int width, int height) {  // Copy binary image, cutting out the required rectangle.  IMAGE image;  image.capture(const_cast<unsigned char*>(imagedata),                bytes_per_line*8, top + height, 1);  page_image.create(width, height, 1);  copy_sub_image(&image, left, 0, width, height, &page_image, 0, 0, false);}// Low-level function to recognize the current global image to a string.char* TessBaseAPI::RecognizeToString() {  BLOCK_LIST    block_list;  FindLines(&block_list);  // Now run the main recognition.  PAGE_RES* page_res = Recognize(&block_list, NULL);  return TesseractToText(page_res);}// Find lines from the image making the BLOCK_LIST.void TessBaseAPI::FindLines(BLOCK_LIST* block_list) {  // The following call creates a full-page block and then runs connected  // component analysis and text line creation.  pgeditor_read_file(input_file, block_list);}// Recognize the tesseract global image and return the result as Tesseract// internal structures.PAGE_RES* TessBaseAPI::Recognize(BLOCK_LIST* block_list, ETEXT_DESC* monitor) {  if (tessedit_resegment_from_boxes)    apply_boxes(block_list);  PAGE_RES* page_res = new PAGE_RES(block_list);  if (interactive_mode) {    pgeditor_main(block_list);                  //pgeditor user I/F  } else if (tessedit_train_from_boxes) {    apply_box_training(block_list);  } else {    // Now run the main recognition.    recog_all_words(page_res, monitor);  }  return page_res;}// Return the maximum length that the output text string might occupy.int TessBaseAPI::TextLength(PAGE_RES* page_res) {  PAGE_RES_IT   page_res_it(page_res);  int total_length = 2;  // Iterate over the data structures to extract the recognition result.  for (page_res_it.restart_page(); page_res_it.word () != NULL;       page_res_it.forward()) {    WERD_RES *word = page_res_it.word();    WERD_CHOICE* choice = word->best_choice;    if (choice != NULL) {      total_length += choice->string().length() + 1;      for (int i = 0; i < word->reject_map.length(); ++i) {        if (word->reject_map[i].rejected())          ++total_length;      }    }  }  return total_length;}// Returns an array of all word confidences, terminated by -1.int* TessBaseAPI::AllTextConfidences(PAGE_RES* page_res) {  if (!page_res) return NULL;  int n_word = 0;  PAGE_RES_IT res_it(page_res);  for (res_it.restart_page(); res_it.word () != NULL; res_it.forward())    n_word++;  int* conf = new int[n_word+1];  n_word = 0;  for (res_it.restart_page(); res_it.word () != NULL; res_it.forward()) {    WERD_RES *word = res_it.word();    WERD_CHOICE* choice = word->best_choice;    int w_conf = static_cast<int>(100 + 5 * choice->certainty());                 // This is the eq for converting Tesseract confidence to 1..100    if (w_conf < 0) w_conf = 0;    if (w_conf > 100) w_conf = 100;    conf[n_word++] = w_conf;  }  conf[n_word] = -1;  return conf;}// Returns the average word confidence for Tesseract page result.int TessBaseAPI::TextConf(PAGE_RES* page_res) {  int* conf = AllTextConfidences(page_res);  if (!conf) return 0;  int sum = 0;  int *pt = conf;  while (*pt >= 0) sum += *pt++;  if (pt != conf) sum /= pt - conf;  delete [] conf;  return sum;}// Make a text string from the internal data structures.// The input page_res is deleted.char* TessBaseAPI::TesseractToText(PAGE_RES* page_res) {  if (page_res != NULL) {    int total_length = TextLength(page_res);    PAGE_RES_IT   page_res_it(page_res);    char* result = new char[total_length];    char* ptr = result;    for (page_res_it.restart_page(); page_res_it.word () != NULL;         page_res_it.forward()) {      WERD_RES *word = page_res_it.word();      WERD_CHOICE* choice = word->best_choice;      if (choice != NULL) {        strcpy(ptr, choice->string().string());        ptr += strlen(ptr);        if (word->word->flag(W_EOL))          *ptr++ = '\n';        else          *ptr++ = ' ';      }    }    *ptr++ = '\n';    *ptr = '\0';    delete page_res;    return result;  }  return NULL;}static int ConvertWordToBoxText(WERD_RES *word,                                ROW_RES* row,                                int left,                                int bottom,                                char* word_str) {  // Copy the output word and denormalize it back to image coords.  WERD copy_outword;  copy_outword = *(word->outword);  copy_outword.baseline_denormalise(&word->denorm);  PBLOB_IT blob_it;  blob_it.set_to_list(copy_outword.blob_list());  int length = copy_outword.blob_list()->length();  int output_size = 0;  if (length > 0) {    for (int index = 0, offset = 0; index < length;         offset += word->best_choice->lengths()[index++], blob_it.forward()) {      PBLOB* blob = blob_it.data();      TBOX blob_box = blob->bounding_box();      if (word->tess_failed ||          blob_box.left() < 0 ||          blob_box.right() > page_image.get_xsize() ||          blob_box.bottom() < 0 ||          blob_box.top() > page_image.get_ysize()) {        // Bounding boxes can be illegal when tess fails on a word.        blob_box = word->word->bounding_box();  // Use original word as backup.        tprintf("Using substitute bounding box at (%d,%d)->(%d,%d)\n",                blob_box.left(), blob_box.bottom(),                blob_box.right(), blob_box.top());      }      // A single classification unit can be composed of several UTF-8      // characters. Append each of them to the result.      for (int sub = 0; sub < word->best_choice->lengths()[index]; ++sub) {        char ch = word->best_choice->string()[offset + sub];        // Tesseract uses space for recognition failure. Fix to a reject        // character, '~' so we don't create illegal box files.        if (ch == ' ')          ch = '~';        word_str[output_size++] = ch;      }      sprintf(word_str + output_size, " %d %d %d %d\n",              blob_box.left() + left, blob_box.bottom() + bottom,              blob_box.right() + left, blob_box.top() + bottom);      output_size += strlen(word_str + output_size);    }  }  return output_size;}// Multiplier for textlength assumes 4 numbers @ 5 digits and a space// plus the newline and the orginial character = 4*(5+1)+2const int kMaxCharsPerChar = 26;// Make a text string from the internal data structures.// The input page_res is deleted.// The text string takes the form of a box file as needed for training.char* TessBaseAPI::TesseractToBoxText(PAGE_RES* page_res,                                      int left, int bottom) {  if (page_res != NULL) {    int total_length = TextLength(page_res) * kMaxCharsPerChar;    PAGE_RES_IT   page_res_it(page_res);    char* result = new char[total_length];    char* ptr = result;    for (page_res_it.restart_page(); page_res_it.word () != NULL;         page_res_it.forward()) {      WERD_RES *word = page_res_it.word();      ptr += ConvertWordToBoxText(word,page_res_it.row(),left, bottom, ptr);    }    *ptr = '\0';    delete page_res;    return result;  }  return NULL;}// Make a text string from the internal data structures.// The input page_res is deleted. The text string is converted// to UNLV-format: Latin-1 with specific reject and suspect codes.const char kUnrecognized = '~';// Conversion table for non-latin characters.// Maps characters out of the latin set into the latin set.// TODO(rays) incorporate this translation into unicharset.const int kUniChs[] = {  0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0};// Latin chars corresponding to the unicode chars above.const int kLatinChs[] = {  0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0};char* TessBaseAPI::TesseractToUNLV(PAGE_RES* page_res) {  bool tilde_crunch_written = false;  bool last_char_was_newline = true;  bool last_char_was_tilde = false;  if (page_res != NULL) {    int total_length = TextLength(page_res);    PAGE_RES_IT   page_res_it(page_res);    char* result = new char[total_length];    char* ptr = result;    for (page_res_it.restart_page(); page_res_it.word () != NULL;         page_res_it.forward()) {      WERD_RES *word = page_res_it.word();      // Process the current word.      if (word->unlv_crunch_mode != CR_NONE) {        if (word->unlv_crunch_mode != CR_DELETE &&            (!tilde_crunch_written ||             (word->unlv_crunch_mode == CR_KEEP_SPACE &&              word->word->space () > 0 &&              !word->word->flag (W_FUZZY_NON) &&              !word->word->flag (W_FUZZY_SP)))) {          if (!word->word->flag (W_BOL) &&              word->word->space () > 0 &&              !word->word->flag (W_FUZZY_NON) &&              !word->word->flag (W_FUZZY_SP)) {            /* Write a space to separate from preceeding good text */            *ptr++ = ' ';            last_char_was_tilde = false;          }          if (!last_char_was_tilde) {            // Write a reject char.            last_char_was_tilde = true;            *ptr++ = kUnrecognized;            tilde_crunch_written = true;            last_char_was_newline = false;          }        }      } else {        // NORMAL PROCESSING of non tilde crunched words.        tilde_crunch_written = false;        if (last_char_was_tilde &&            word->word->space () == 0 &&            (word->best_choice->string ()[0] == ' ')) {          /* Prevent adjacent tilde across words - we know that adjacent tildes within             words have been removed */          char* p = (char *) word->best_choice->string().string ();          strcpy (p, p + 1);       //shuffle up          p = (char *) word->best_choice->lengths().string ();          strcpy (p, p + 1);       //shuffle up          word->reject_map.remove_pos (0);          PBLOB_IT blob_it = word->outword->blob_list ();          delete blob_it.extract ();   //get rid of reject blob        }        if (word->word->flag(W_REP_CHAR) && tessedit_consistent_reps)          ensure_rep_chars_are_consistent(word);        set_unlv_suspects(word);        const char* wordstr = word->best_choice->string().string();        if (wordstr[0] != 0) {          if (!last_char_was_newline)            *ptr++ = ' ';          else            last_char_was_newline = false;          int offset = 0;          const STRING& lengths = word->best_choice->lengths();          int length = lengths.length();          for (int i = 0; i < length; offset += lengths[i++]) {            if (wordstr[offset] == ' ' ||                wordstr[offset] == '~' ||                wordstr[offset] == '|') {              *ptr++ = kUnrecognized;              last_char_was_tilde = true;            } else {              if (word->reject_map[i].rejected())                *ptr++ = '^';              UNICHAR ch(wordstr + offset, lengths[i]);              int uni_ch = ch.first_uni();              for (int j = 0; kUniChs[j] != 0; ++j) {                if (kUniChs[j] == uni_ch) {                  uni_ch = kLatinChs[j];                  break;                }              }              if (uni_ch <= 0xff) {                *ptr++ = static_cast<char>(uni_ch);                last_char_was_tilde = false;              } else {                *ptr++ = kUnrecognized;                last_char_was_tilde = true;              }            }
baseapi.cpp - 源码说明

本页面展示了「一个google的OCR源码」中的 baseapi.cpp 源码文件，采用 C++ 编程语言编写，共 1,123 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与google相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?