📄 oldbasel.cpp
字号:
/********************************************************************** * File: oldbasel.cpp (Formerly oldbl.c) * Description: A re-implementation of the old baseline algorithm. * Author: Ray Smith * Created: Wed Oct 6 09:41:48 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include "mfcpch.h"#include "statistc.h"#include "quadlsq.h"#include "lmedsq.h"#include "makerow.h"#include "drawtord.h"#include "oldbasel.h"#include "tprintf.h"#define EXTERNEXTERN BOOL_VAR (textord_really_old_xheight, FALSE,"Use original wiseowl xheight");EXTERN BOOL_VAR (textord_oldbl_debug, FALSE, "Debug old baseline generation");EXTERN BOOL_VAR (textord_debug_baselines, FALSE, "Debug baseline generation");EXTERN BOOL_VAR (textord_oldbl_paradef, TRUE, "Use para default mechanism");EXTERN BOOL_VAR (textord_oldbl_split_splines, TRUE, "Split stepped splines");EXTERN BOOL_VAR (textord_oldbl_merge_parts, TRUE, "Merge suspect partitions");EXTERN BOOL_VAR (oldbl_corrfix, TRUE, "Improve correlation of heights");EXTERN BOOL_VAR (oldbl_xhfix, FALSE,"Fix bug in modes threshold for xheights");EXTERN double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc");EXTERN INT_VAR (oldbl_holed_losscount, 10,"Max lost before fallback line used");EXTERN double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot");EXTERN double_VAR (textord_oldbl_jumplimit, 0.15,"X fraction for new partition");#define TURNLIMIT 1 /*min size for turning point */#define X_HEIGHT_FRACTION 0.7 /*x-height/caps height */#define DESCENDER_FRACTION 0.5 /*descender/x-height */#define MIN_ASC_FRACTION 0.20 /*min size of ascenders */#define MIN_DESC_FRACTION 0.25 /*min size of descenders */#define MINASCRISE 2.0 /*min ascender/desc step */#define MAXHEIGHTVARIANCE 0.15 /*accepted variation in x-height */#define MAXHEIGHT 300 /*max blob height */#define MAXOVERLAP 0.1 /*max 10% missed overlap */#define MAXBADRUN 2 /*max non best for failed */#define HEIGHTBUCKETS 200 /* Num of buckets */#define DELTAHEIGHT 5.0 /* Small amount of diff */#define GOODHEIGHT 5#define MAXLOOPS 10#define MODENUM 10#define MAXPARTS 6#define SPLINESIZE 23#define ABS(x) ((x)<0 ? (-(x)) : (x))/********************************************************************** * make_old_baselines * * Top level function to make baselines the old way. **********************************************************************/void make_old_baselines( //make splines TO_BLOCK *block, //block to do BOOL8 testing_on //correct orientation ) { QSPLINE *prev_baseline; //baseline of previous row TO_ROW *row; //current row TO_ROW_IT row_it = block->get_rows (); BLOBNBOX_IT blob_it; prev_baseline = NULL; //nothing yet for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row = row_it.data (); find_textlines (block, row, 2, NULL); if (row->xheight <= 0 && prev_baseline != NULL) find_textlines (block, row, 2, prev_baseline); if (row->xheight > 0) //was a good one prev_baseline = &row->baseline; else { prev_baseline = NULL; blob_it.set_to_list (row->blob_list ()); if (textord_debug_baselines) tprintf ("Row baseline generation failed on row at (%d,%d)\n", blob_it.data ()->bounding_box ().left (), blob_it.data ()->bounding_box ().bottom ()); } } correlate_lines(block);}/********************************************************************** * correlate_lines * * Correlate the x-heights and ascender heights of a block to fill-in * the ascender height and descender height for rows without one. * Also fix baselines of rows without a decent fit. **********************************************************************/void correlate_lines( //cleanup lines TO_BLOCK *block //block to do ) { TO_ROW **rows; //array of ptrs int rowcount; /*no of rows to do */ register int rowindex; /*no of row */ //iterator TO_ROW_IT row_it = block->get_rows (); rowcount = row_it.length (); if (rowcount == 0) { //default value block->xheight = block->line_size; return; /*none to do */ } rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *)); rowindex = 0; for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) //make array rows[rowindex++] = row_it.data (); /*try to fix bad lines */ correlate_neighbours(block, rows, rowcount); block->xheight = (float) correlate_with_stats (rows, rowcount); /*use stats */ if (block->xheight <= 0) //desperate block->xheight = block->line_size * textord_merge_x; if (block->xheight < textord_min_xheight) block->xheight = (float) textord_min_xheight; free_mem(rows);}/********************************************************************** * correlate_neighbours * * Try to fix rows that had a bad spline fit by using neighbours. **********************************************************************/void correlate_neighbours( //fix bad rows TO_BLOCK *block, /*block rows are in */ TO_ROW **rows, /*rows of block */ int rowcount /*no of rows to do */ ) { TO_ROW *row; /*current row */ register int rowindex; /*no of row */ register int otherrow; /*second row */ int upperrow; /*row above to use */ int lowerrow; /*row below to use */ float biggest; for (rowindex = 0; rowindex < rowcount; rowindex++) { row = rows[rowindex]; /*current row */ if (row->xheight < 0) { /*quadratic failed */ for (otherrow = rowindex - 2; otherrow >= 0 && (rows[otherrow]->xheight < 0.0 || !row->baseline.overlap (&rows[otherrow]->baseline, MAXOVERLAP)); otherrow--); upperrow = otherrow; /*decent row above */ for (otherrow = rowindex + 1; otherrow < rowcount && (rows[otherrow]->xheight < 0.0 || !row->baseline.overlap (&rows[otherrow]->baseline, MAXOVERLAP)); otherrow++); lowerrow = otherrow; /*decent row below */ if (upperrow >= 0) find_textlines (block, row, 2, &rows[upperrow]->baseline); if (row->xheight < 0 && lowerrow < rowcount) find_textlines (block, row, 2, &rows[lowerrow]->baseline); if (row->xheight < 0) { if (upperrow >= 0) find_textlines (block, row, 1, &rows[upperrow]->baseline); else if (lowerrow < rowcount) find_textlines (block, row, 1, &rows[lowerrow]->baseline); } } } for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) { row = rows[rowindex]; /*current row */ if (row->xheight < 0) /*linear failed */ /*make do */ row->xheight = -row->xheight; biggest = MAX (biggest, row->xheight); }}/********************************************************************** * correlate_with_stats * * correlate the x-heights and ascender heights of a block to fill-in * the ascender height and descender height for rows without one. **********************************************************************/int correlate_with_stats( //fix xheights TO_ROW **rows, /*rows of block */ int rowcount /*no of rows to do */ ) { TO_ROW *row; /*current row */ register int rowindex; /*no of row */ float lineheight; /*mean x-height */ float ascheight; /*average ascenders */ float minascheight; /*min allowed ascheight */ int xcount; /*no of samples for xheight */ float fullheight; /*mean top height */ int fullcount; /*no of samples */ float descheight; /*mean descender drop */ float mindescheight; /*min allowed descheight */ int desccount; /*no of samples */ float xshift; /*shift in xheight */ /*no samples */ xcount = fullcount = desccount = 0; lineheight = ascheight = fullheight = descheight = 0.0; for (rowindex = 0; rowindex < rowcount; rowindex++) { row = rows[rowindex]; /*current row */ if (row->ascrise > 0.0) { /*got ascenders? */ lineheight += row->xheight;/*average x-heights */ ascheight += row->ascrise; /*average ascenders */ xcount++; } else { fullheight += row->xheight;/*assume full height */ fullcount++; } if (row->descdrop < 0.0) { /*got descenders? */ /*average descenders */ descheight += row->descdrop; desccount++; } } if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) { lineheight /= xcount; /*average x-height */ /*average caps height */ fullheight = lineheight + ascheight / xcount; /*must be decent size */ if (fullheight < lineheight * (1 + MIN_ASC_FRACTION)) fullheight = lineheight * (1 + MIN_ASC_FRACTION); } else { fullheight /= fullcount; /*average max height */ /*guess x-height */ lineheight = fullheight * X_HEIGHT_FRACTION; } if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2)) descheight /= desccount; /*average descenders */ else /*guess descenders */ descheight = -lineheight * DESCENDER_FRACTION; minascheight = lineheight * MIN_ASC_FRACTION; mindescheight = -lineheight * MIN_DESC_FRACTION; for (rowindex = 0; rowindex < rowcount; rowindex++) { row = rows[rowindex]; /*do each row */ row->all_caps = FALSE; if (row->ascrise / row->xheight < MIN_ASC_FRACTION) { /*no ascenders */ if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) { row->ascrise = fullheight - lineheight; /*shift in x */ xshift = lineheight - row->xheight; /*set to average */ row->xheight = lineheight; } else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE) && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) { row->ascrise = row->xheight - lineheight; xshift = -row->ascrise; /*shift in x */ /*set to average */ row->xheight = lineheight; row->all_caps = TRUE; } else { row->ascrise = (fullheight - lineheight) * row->xheight / fullheight; xshift = -row->ascrise; /*shift in x */ /*scale it */ row->xheight -= row->ascrise; row->all_caps = TRUE; } if (row->ascrise < minascheight) row->ascrise = row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION); } if (row->descdrop > mindescheight) { if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) /*set to average */ row->descdrop = descheight; else row->descdrop = -row->xheight * DESCENDER_FRACTION; } } return (int) lineheight; //block xheight}/********************************************************************** * find_textlines * * Compute the baseline for the given row. **********************************************************************/void find_textlines( //get baseline TO_BLOCK *block, //block row is in TO_ROW *row, //row to do int degree, //required approximation QSPLINE *spline //starting spline ) { int partcount; /*no of partitions of */ BOOL8 holed_line; //lost too many blobs int bestpart; /*biggest partition */ char *partids; /*partition no of each blob */ int partsizes[MAXPARTS]; /*no in each partition */ int lineheight; /*guessed x-height */ float jumplimit; /*allowed delta change */ int *xcoords; /*useful sample points */ int *ycoords; /*useful sample points */ BOX *blobcoords; /*edges of blob rectangles */ int blobcount; /*no of blobs on line */ float *ydiffs; /*diffs from 1st approx */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -