⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tordmain.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
      if (blob_size >= textord_noise_sizelimit * row->x_height ()      && blob_size < row->x_height () * 2) {        trans_threshold = blob_size / textord_noise_sizefraction;        trans_count = blob->count_transitions (trans_threshold);        if (trans_count < textord_noise_translimit)          norm_count++;      }      else if (blob_box.height () > row->x_height () * 2        && (!word_it.at_first () || !blob_it.at_first ()))        dot_count += 2;      #ifndef SECURE_NAMES      if (testing_on) {        tprintf          ("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n",          blob_box.left (), blob_box.bottom (), blob_box.right (),          blob_box.top (), blob->out_list ()->length (), trans_count,          blob_box.bottom () - row->base_line (blob_box.left ()));      }      #endif    }  }  #ifndef SECURE_NAMES  if (textord_noise_debug) {    tprintf ("Row ending at (%d,%g):",      blob_box.right (), row->base_line (blob_box.right ()));    tprintf (" R=%g, dc=%d, nc=%d, %s\n",      norm_count > 0 ? (float) dot_count / norm_count : 9999,      dot_count, norm_count,      dot_count > norm_count * textord_noise_normratio      && dot_count > 2 ? "REJECTED" : "ACCEPTED");  }  #endif  return super_norm_count < textord_noise_sncount    && dot_count > norm_count * textord_noise_rowratio && dot_count > 2;}/********************************************************************** * clean_noise_from_words * * Move blobs of words from rows of garbage into the reject blobs list. **********************************************************************/void clean_noise_from_words(          //remove empties                            ROW *row  //row to clean                           ) {  BOX blob_box;                  //bounding box  INT8 *word_dud;                //was it chucked  C_BLOB *blob;                  //current blob  C_OUTLINE *outline;            //current outline  WERD *word;                    //current word  INT32 blob_size;               //biggest size  INT32 trans_count;             //no of transitions  INT32 trans_threshold;         //noise tolerance  INT32 dot_count;               //small objects  INT32 norm_count;              //normal objects  INT32 dud_words;               //number discarded  INT32 ok_words;                //number remaining  INT32 word_index;              //current word                                 //words of row  WERD_IT word_it = row->word_list ();  C_BLOB_IT blob_it;             //blob iterator  C_OUTLINE_IT out_it;           //outline iterator  ok_words = word_it.length ();  if (ok_words == 0)    return;  word_dud = (INT8 *) alloc_mem (ok_words * sizeof (INT8));  dud_words = 0;  ok_words = 0;  word_index = 0;  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {    word = word_it.data ();      //current word    dot_count = 0;    norm_count = 0;                                 //blobs in word    blob_it.set_to_list (word->cblob_list ());    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();    blob_it.forward ()) {      blob = blob_it.data ();      if (!word->flag (W_DONT_CHOP)) {                                 //get outlines        out_it.set_to_list (blob->out_list ());        for (out_it.mark_cycle_pt (); !out_it.cycled_list ();        out_it.forward ()) {          outline = out_it.data ();          blob_box = outline->bounding_box ();          blob_size =            blob_box.width () >            blob_box.height ()? blob_box.width () : blob_box.            height();          if (blob_size < textord_noise_sizelimit * row->x_height ())            dot_count++;         //count smal outlines          if (!outline->child ()->empty ()            && blob_box.height () <            (1 + textord_noise_syfract) * row->x_height ()            && blob_box.height () >            (1 - textord_noise_syfract) * row->x_height ()            && blob_box.width () <            (1 + textord_noise_sxfract) * row->x_height ()            && blob_box.width () >            (1 - textord_noise_sxfract) * row->x_height ())            norm_count++;        //count smal outlines        }      }      else        norm_count++;      blob_box = blob->bounding_box ();      blob_size =        blob_box.width () >        blob_box.height ()? blob_box.width () : blob_box.height ();      if (blob_size >= textord_noise_sizelimit * row->x_height ()      && blob_size < row->x_height () * 2) {        trans_threshold = blob_size / textord_noise_sizefraction;        trans_count = blob->count_transitions (trans_threshold);        if (trans_count < textord_noise_translimit)          norm_count++;      }      else if (blob_box.height () > row->x_height () * 2        && (!word_it.at_first () || !blob_it.at_first ()))        dot_count += 2;    }    if (dot_count > 2) {      if (dot_count > norm_count * textord_noise_normratio * 2)        word_dud[word_index] = 2;      else if (dot_count > norm_count * textord_noise_normratio)        word_dud[word_index] = 1;      else        word_dud[word_index] = 0;    }    else      word_dud[word_index] = 0;    if (word_dud[word_index] == 2)      dud_words++;    else      ok_words++;    word_index++;  }  word_index = 0;  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {    if (word_dud[word_index] == 2    || word_dud[word_index] == 1 && dud_words > ok_words) {      word = word_it.data ();    //current word                                 //rejected blobs      blob_it.set_to_list (word->rej_cblob_list ());                                 //move from blobs      blob_it.add_list_after (word->cblob_list ());    }    word_index++;  }  free_mem(word_dud);}/********************************************************************** * tweak_row_baseline * * Shift baseline to fit the blobs more accurately where they are * close enough. **********************************************************************/void tweak_row_baseline(          //remove empties                        ROW *row  //row to clean                       ) {  BOX blob_box;                  //bounding box  C_BLOB *blob;                  //current blob  WERD *word;                    //current word  INT32 blob_count;              //no of blobs  INT32 src_index;               //source segment  INT32 dest_index;              //destination segment  INT32 *xstarts;                //spline segments  double *coeffs;                //spline coeffs  float ydiff;                   //baseline error  float x_centre;                //centre of blob                                 //words of row  WERD_IT word_it = row->word_list ();  C_BLOB_IT blob_it;             //blob iterator  blob_count = 0;  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {    word = word_it.data ();      //current word                                 //get total blobs    blob_count += word->cblob_list ()->length ();  }  if (blob_count == 0)    return;  xstarts =    (INT32 *) alloc_mem ((blob_count + row->baseline.segments + 1) *    sizeof (INT32));  coeffs =    (double *) alloc_mem ((blob_count + row->baseline.segments) * 3 *    sizeof (double));  src_index = 0;  dest_index = 0;  xstarts[0] = row->baseline.xcoords[0];  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {    word = word_it.data ();      //current word                                 //blobs in word    blob_it.set_to_list (word->cblob_list ());    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();    blob_it.forward ()) {      blob = blob_it.data ();      blob_box = blob->bounding_box ();      x_centre = (blob_box.left () + blob_box.right ()) / 2.0;      ydiff = blob_box.bottom () - row->base_line (x_centre);      if (ydiff < 0)        ydiff = -ydiff / row->x_height ();      else        ydiff = ydiff / row->x_height ();      if (ydiff < textord_blshift_maxshift        && blob_box.height () / row->x_height () >      textord_blshift_xfraction) {        if (xstarts[dest_index] >= x_centre)          xstarts[dest_index] = blob_box.left ();        coeffs[dest_index * 3] = 0;        coeffs[dest_index * 3 + 1] = 0;        coeffs[dest_index * 3 + 2] = blob_box.bottom ();        //shift it        dest_index++;        xstarts[dest_index] = blob_box.right () + 1;      }      else {        if (xstarts[dest_index] <= x_centre) {          while (row->baseline.xcoords[src_index + 1] <= x_centre          && src_index < row->baseline.segments - 1) {            if (row->baseline.xcoords[src_index + 1] >            xstarts[dest_index]) {              coeffs[dest_index * 3] =                row->baseline.quadratics[src_index].a;              coeffs[dest_index * 3 + 1] =                row->baseline.quadratics[src_index].b;              coeffs[dest_index * 3 + 2] =                row->baseline.quadratics[src_index].c;              dest_index++;              xstarts[dest_index] =                row->baseline.xcoords[src_index + 1];            }            src_index++;          }          coeffs[dest_index * 3] =            row->baseline.quadratics[src_index].a;          coeffs[dest_index * 3 + 1] =            row->baseline.quadratics[src_index].b;          coeffs[dest_index * 3 + 2] =            row->baseline.quadratics[src_index].c;          dest_index++;          xstarts[dest_index] = row->baseline.xcoords[src_index + 1];        }      }    }  }  while (src_index < row->baseline.segments    && row->baseline.xcoords[src_index + 1] <= xstarts[dest_index])    src_index++;  while (src_index < row->baseline.segments) {    coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;    coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;    coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;    dest_index++;    src_index++;    xstarts[dest_index] = row->baseline.xcoords[src_index];  }                                 //turn to spline  row->baseline = QSPLINE (dest_index, xstarts, coeffs);  free_mem(xstarts);  free_mem(coeffs);}/********************************************************************** * blob_y_order * * Sort function to sort blobs in y from page top. **********************************************************************/INT32 blob_y_order(              //sort function                   void *item1,  //items to compare                   void *item2) {                                 //converted ptr  BLOBNBOX *blob1 = *(BLOBNBOX **) item1;                                 //converted ptr  BLOBNBOX *blob2 = *(BLOBNBOX **) item2;  if (blob1->bounding_box ().bottom () > blob2->bounding_box ().bottom ())    return -1;  else if (blob1->bounding_box ().bottom () <    blob2->bounding_box ().bottom ())    return 1;  else {    if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())      return -1;    else if (blob1->bounding_box ().left () >      blob2->bounding_box ().left ())      return 1;    else      return 0;  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -