⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 oldbasel.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 5 页
字号:
/********************************************************************** * File:        oldbasel.cpp  (Formerly oldbl.c) * Description: A re-implementation of the old baseline algorithm. * Author:		Ray Smith * Created:		Wed Oct  6 09:41:48 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include "mfcpch.h"#include          "statistc.h"#include          "quadlsq.h"#include          "lmedsq.h"#include          "makerow.h"#include          "drawtord.h"#include          "oldbasel.h"#include          "tprintf.h"#define EXTERNEXTERN BOOL_VAR (textord_really_old_xheight, FALSE,"Use original wiseowl xheight");EXTERN BOOL_VAR (textord_oldbl_debug, FALSE, "Debug old baseline generation");EXTERN BOOL_VAR (textord_debug_baselines, FALSE, "Debug baseline generation");EXTERN BOOL_VAR (textord_oldbl_paradef, TRUE, "Use para default mechanism");EXTERN BOOL_VAR (textord_oldbl_split_splines, TRUE, "Split stepped splines");EXTERN BOOL_VAR (textord_oldbl_merge_parts, TRUE, "Merge suspect partitions");EXTERN BOOL_VAR (oldbl_corrfix, TRUE, "Improve correlation of heights");EXTERN BOOL_VAR (oldbl_xhfix, FALSE,"Fix bug in modes threshold for xheights");EXTERN double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc");EXTERN INT_VAR (oldbl_holed_losscount, 10,"Max lost before fallback line used");EXTERN double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot");EXTERN double_VAR (textord_oldbl_jumplimit, 0.15,"X fraction for new partition");#define TURNLIMIT          1     /*min size for turning point */#define X_HEIGHT_FRACTION  0.7   /*x-height/caps height */#define DESCENDER_FRACTION 0.5   /*descender/x-height */#define MIN_ASC_FRACTION   0.20  /*min size of ascenders */#define MIN_DESC_FRACTION  0.25  /*min size of descenders */#define MINASCRISE         2.0   /*min ascender/desc step */#define MAXHEIGHTVARIANCE  0.15  /*accepted variation in x-height */#define MAXHEIGHT          300   /*max blob height */#define MAXOVERLAP         0.1   /*max 10% missed overlap */#define MAXBADRUN          2     /*max non best for failed */#define HEIGHTBUCKETS      200   /* Num of buckets */#define DELTAHEIGHT        5.0   /* Small amount of diff */#define GOODHEIGHT         5#define MAXLOOPS           10#define MODENUM            10#define MAXPARTS      6#define SPLINESIZE      23#define ABS(x) ((x)<0 ? (-(x)) : (x))/********************************************************************** * make_old_baselines * * Top level function to make baselines the old way. **********************************************************************/void make_old_baselines(                  //make splines                        TO_BLOCK *block,  //block to do                        BOOL8 testing_on  //correct orientation                       ) {  QSPLINE *prev_baseline;        //baseline of previous row  TO_ROW *row;                   //current row  TO_ROW_IT row_it = block->get_rows ();  BLOBNBOX_IT blob_it;  prev_baseline = NULL;          //nothing yet  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    row = row_it.data ();    find_textlines (block, row, 2, NULL);    if (row->xheight <= 0 && prev_baseline != NULL)      find_textlines (block, row, 2, prev_baseline);    if (row->xheight > 0)                                 //was a good one      prev_baseline = &row->baseline;    else {      prev_baseline = NULL;      blob_it.set_to_list (row->blob_list ());      if (textord_debug_baselines)        tprintf ("Row baseline generation failed on row at (%d,%d)\n",          blob_it.data ()->bounding_box ().left (),          blob_it.data ()->bounding_box ().bottom ());    }  }  correlate_lines(block);}/********************************************************************** * correlate_lines * * Correlate the x-heights and ascender heights of a block to fill-in * the ascender height and descender height for rows without one. * Also fix baselines of rows without a decent fit. **********************************************************************/void correlate_lines(                 //cleanup lines                     TO_BLOCK *block  //block to do                    ) {  TO_ROW **rows;                 //array of ptrs  int rowcount;                  /*no of rows to do */  register int rowindex;         /*no of row */                                 //iterator  TO_ROW_IT row_it = block->get_rows ();  rowcount = row_it.length ();  if (rowcount == 0) {                                 //default value    block->xheight = block->line_size;    return;                      /*none to do */  }  rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));  rowindex = 0;  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())                                 //make array    rows[rowindex++] = row_it.data ();                                 /*try to fix bad lines */  correlate_neighbours(block, rows, rowcount);  block->xheight = (float) correlate_with_stats (rows, rowcount);  /*use stats */  if (block->xheight <= 0)                                 //desperate    block->xheight = block->line_size * textord_merge_x;  if (block->xheight < textord_min_xheight)    block->xheight = (float) textord_min_xheight;  free_mem(rows);}/********************************************************************** * correlate_neighbours * * Try to fix rows that had a bad spline fit by using neighbours. **********************************************************************/void correlate_neighbours(                  //fix bad rows                          TO_BLOCK *block,  /*block rows are in */                          TO_ROW **rows,    /*rows of block */                          int rowcount      /*no of rows to do */                         ) {  TO_ROW *row;                   /*current row */  register int rowindex;         /*no of row */  register int otherrow;         /*second row */  int upperrow;                  /*row above to use */  int lowerrow;                  /*row below to use */  float biggest;  for (rowindex = 0; rowindex < rowcount; rowindex++) {    row = rows[rowindex];        /*current row */    if (row->xheight < 0) {                                 /*quadratic failed */      for (otherrow = rowindex - 2;        otherrow >= 0        && (rows[otherrow]->xheight < 0.0        || !row->baseline.overlap (&rows[otherrow]->baseline,        MAXOVERLAP)); otherrow--);      upperrow = otherrow;       /*decent row above */      for (otherrow = rowindex + 1;        otherrow < rowcount        && (rows[otherrow]->xheight < 0.0        || !row->baseline.overlap (&rows[otherrow]->baseline,        MAXOVERLAP)); otherrow++);      lowerrow = otherrow;       /*decent row below */      if (upperrow >= 0)        find_textlines (block, row, 2, &rows[upperrow]->baseline);      if (row->xheight < 0 && lowerrow < rowcount)        find_textlines (block, row, 2, &rows[lowerrow]->baseline);      if (row->xheight < 0) {        if (upperrow >= 0)          find_textlines (block, row, 1, &rows[upperrow]->baseline);        else if (lowerrow < rowcount)          find_textlines (block, row, 1, &rows[lowerrow]->baseline);      }    }  }  for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {    row = rows[rowindex];        /*current row */    if (row->xheight < 0)        /*linear failed */                                 /*make do */        row->xheight = -row->xheight;    biggest = MAX (biggest, row->xheight);  }}/********************************************************************** * correlate_with_stats * * correlate the x-heights and ascender heights of a block to fill-in * the ascender height and descender height for rows without one. **********************************************************************/int correlate_with_stats(                //fix xheights                         TO_ROW **rows,  /*rows of block */                         int rowcount    /*no of rows to do */                        ) {  TO_ROW *row;                   /*current row */  register int rowindex;         /*no of row */  float lineheight;              /*mean x-height */  float ascheight;               /*average ascenders */  float minascheight;            /*min allowed ascheight */  int xcount;                    /*no of samples for xheight */  float fullheight;              /*mean top height */  int fullcount;                 /*no of samples */  float descheight;              /*mean descender drop */  float mindescheight;           /*min allowed descheight */  int desccount;                 /*no of samples */  float xshift;                  /*shift in xheight */                                 /*no samples */  xcount = fullcount = desccount = 0;  lineheight = ascheight = fullheight = descheight = 0.0;  for (rowindex = 0; rowindex < rowcount; rowindex++) {    row = rows[rowindex];        /*current row */    if (row->ascrise > 0.0) {    /*got ascenders? */      lineheight += row->xheight;/*average x-heights */      ascheight += row->ascrise; /*average ascenders */      xcount++;    }    else {      fullheight += row->xheight;/*assume full height */      fullcount++;    }    if (row->descdrop < 0.0) {   /*got descenders? */                                 /*average descenders */      descheight += row->descdrop;      desccount++;    }  }  if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {    lineheight /= xcount;        /*average x-height */                                 /*average caps height */    fullheight = lineheight + ascheight / xcount;                                 /*must be decent size */    if (fullheight < lineheight * (1 + MIN_ASC_FRACTION))      fullheight = lineheight * (1 + MIN_ASC_FRACTION);  }  else {    fullheight /= fullcount;     /*average max height */                                 /*guess x-height */    lineheight = fullheight * X_HEIGHT_FRACTION;  }  if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2))    descheight /= desccount;     /*average descenders */  else                                 /*guess descenders */    descheight = -lineheight * DESCENDER_FRACTION;  minascheight = lineheight * MIN_ASC_FRACTION;  mindescheight = -lineheight * MIN_DESC_FRACTION;  for (rowindex = 0; rowindex < rowcount; rowindex++) {    row = rows[rowindex];        /*do each row */    row->all_caps = FALSE;    if (row->ascrise / row->xheight < MIN_ASC_FRACTION) {    /*no ascenders */      if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)      && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) {        row->ascrise = fullheight - lineheight;                                 /*shift in x */        xshift = lineheight - row->xheight;                                 /*set to average */        row->xheight = lineheight;      }      else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE)      && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) {        row->ascrise = row->xheight - lineheight;        xshift = -row->ascrise;  /*shift in x */                                 /*set to average */        row->xheight = lineheight;        row->all_caps = TRUE;      }      else {        row->ascrise = (fullheight - lineheight) * row->xheight          / fullheight;        xshift = -row->ascrise;  /*shift in x */                                 /*scale it */        row->xheight -= row->ascrise;        row->all_caps = TRUE;      }      if (row->ascrise < minascheight)        row->ascrise =          row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION);    }    if (row->descdrop > mindescheight) {      if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)        && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE))                                 /*set to average */          row->descdrop = descheight;      else        row->descdrop = -row->xheight * DESCENDER_FRACTION;    }  }  return (int) lineheight;       //block xheight}/********************************************************************** * find_textlines * * Compute the baseline for the given row. **********************************************************************/void find_textlines(                  //get baseline                    TO_BLOCK *block,  //block row is in                    TO_ROW *row,      //row to do                    int degree,       //required approximation                    QSPLINE *spline   //starting spline                   ) {  int partcount;                 /*no of partitions of */  BOOL8 holed_line;              //lost too many blobs  int bestpart;                  /*biggest partition */  char *partids;                 /*partition no of each blob */  int partsizes[MAXPARTS];       /*no in each partition */  int lineheight;                /*guessed x-height */  float jumplimit;               /*allowed delta change */  int *xcoords;                  /*useful sample points */  int *ycoords;                  /*useful sample points */  BOX *blobcoords;               /*edges of blob rectangles */  int blobcount;                 /*no of blobs on line */  float *ydiffs;                 /*diffs from 1st approx */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -