⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 stopper.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
/****************************************************************************** **	Filename:    stopper.c **	Purpose:     Stopping criteria for word classifier. **	Author:      Dan Johnson **	History:     Mon Apr 29 14:56:49 1991, DSJ, Created. ** **	(c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************//**----------------------------------------------------------------------------          Include Files and Type Defines----------------------------------------------------------------------------**/#include "stopper.h"#include "emalloc.h"#include "matchdefs.h"#include "debug.h"#include "callcpp.h"#include "permute.h"#include "context.h"#include "permnum.h"#include "danerror.h"#include "const.h"#include "freelist.h"#include "efio.h"#include "globals.h"#include "scanutils.h"#include <stdio.h>#include <string.h>#include <ctype.h>#include <math.h>#ifdef __UNIX__#include <assert.h>#endif/* these are kludges - add appropriate .h file later */extern float CertaintyScale;     /* from subfeat.h */#define MAX_WERD_SIZE   100#define MAX_AMBIG_SIZE    3#define DANGEROUS_AMBIGS  "tessdata/DangAmbigs"typedef LIST AMBIG_TABLE;typedef struct{  UINT8 Class;  UINT16 NumChunks;  float Certainty;}CHAR_CHOICE;typedef struct{  float Rating;  float Certainty;  FLOAT32 AdjustFactor;  int Length;  CHAR_CHOICE Blob[1];} VIABLE_CHOICE_STRUCT;typedef VIABLE_CHOICE_STRUCT *VIABLE_CHOICE;typedef struct{  VIABLE_CHOICE Choice;  float ChunkCertainty[MAX_NUM_CHUNKS];  UINT8 ChunkClass[MAX_NUM_CHUNKS];}EXPANDED_CHOICE;/**----------------------------------------------------------------------------          Macros----------------------------------------------------------------------------**/#define BestCertainty(Choices)  (((VIABLE_CHOICE) first (Choices))->Certainty)#define BestRating(Choices) (((VIABLE_CHOICE) first (Choices))->Rating)#define BestFactor(Choices) (((VIABLE_CHOICE) first (Choices))->AdjustFactor)#define AmbigThreshold(F1,F2)	(((F2) - (F1)) * AmbigThresholdGain - \				AmbigThresholdOffset)/*---------------------------------------------------------------------------          Private Function Prototoypes----------------------------------------------------------------------------*/void AddNewChunk(VIABLE_CHOICE Choice, int Blob);int AmbigsFound(char *Word,                char *CurrentChar,                const char *Tail,                LIST Ambigs,                DANGERR *fixpt);int ChoiceSameAs(A_CHOICE *Choice, VIABLE_CHOICE ViableChoice);int CmpChoiceRatings(void *arg1,   //VIABLE_CHOICE         Choice1,                     void *arg2);  //VIABLE_CHOICE         Choice2);void ExpandChoice(VIABLE_CHOICE Choice, EXPANDED_CHOICE *ExpandedChoice);AMBIG_TABLE *FillAmbigTable();int FreeBadChoice(void *item1,   //VIABLE_CHOICE                 Choice,                  void *item2);  //EXPANDED_CHOICE                       *BestChoice);int LengthOfShortestAlphaRun(register char *Word);VIABLE_CHOICE NewViableChoice (A_CHOICE * Choice,FLOAT32 AdjustFactor, float Certainties[]);void PrintViableChoice(FILE *File, const char *Label, VIABLE_CHOICE Choice);void ReplaceDuplicateChoice (VIABLE_CHOICE OldChoice,A_CHOICE * NewChoice,FLOAT32 AdjustFactor, float Certainties[]);int StringSameAs(const char *String, VIABLE_CHOICE ViableChoice);int UniformCertainties(CHOICES_LIST Choices, A_CHOICE *BestChoice);/**----------------------------------------------------------------------------        Global Data Definitions and Declarations----------------------------------------------------------------------------**//* Name of file containing potentially dangerous ambiguities */static const char *DangerousAmbigs = DANGEROUS_AMBIGS;/* Word for which stopper debug information should be printed to stdout */static char *WordToDebug = NULL;/* flag used to disable accumulation of word choices during compound word  permutation */BOOL8 KeepWordChoices = TRUE;/* additional certainty padding allowed before a word is rejected */static FLOAT32 RejectOffset = 0.0;/* structures to keep track of viable word choices */static VIABLE_CHOICE BestRawChoice = NULL;static LIST BestChoices = NIL;static PIECES_STATE CurrentSegmentation;make_float_var (NonDictCertainty, -2.50, MakeNonDictCertainty,17, 2, SetNonDictCertainty,"Certainty threshold for non-dict words");make_float_var (RejectCertaintyOffset, 1.0, MakeRejectCertaintyOffset,17, 3, SetRejectCertaintyOffset, "Reject certainty offset");make_int_var (SmallWordSize, 2, MakeSmallWordSize,17, 4, SetSmallWordSize,"Size of dict word to be treated as non-dict word");make_float_var (CertaintyPerChar, -0.50, MakeCertaintyPerChar,17, 5, SetCertaintyPerChar,"Certainty to add for each dict char above SmallWordSize");make_float_var (CertaintyVariation, 3.0, MakeCertaintyVariation,17, 6, SetCertaintyVariation,"Max certaintly variation allowed in a word (in sigma)");make_int_var (StopperDebugLevel, 0, MakeStopperDebugLevel,17, 7, SetStopperDebugLevel, "Stopper debug level");make_float_var (AmbigThresholdGain, 8.0, MakeAmbigThresholdGain,17, 8, SetAmbigThresholdGain,"Gain factor for ambiguity threshold");make_float_var (AmbigThresholdOffset, 1.5, MakeAmbigThresholdOffset,17, 9, SetAmbigThresholdOffset,"Certainty offset for ambiguity threshold");//extern char *demodir;extern int first_pass;INT_VAR (tessedit_truncate_wordchoice_log, 10, "Max words to keep in list");/**----------------------------------------------------------------------------              Public Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/int AcceptableChoice(CHOICES_LIST Choices,                     A_CHOICE *BestChoice,                     A_CHOICE *RawChoice,                     DANGERR *fixpt) {/* **	Parameters: **		Choices		choices for current segmentation **		BestChoice	best choice for current segmentation **		RawChoice	best raw choice for current segmentation **	Globals: **		NonDictCertainty	certainty for a non-dict word **		SmallWordSize		size of word to be treated as non-word **		CertaintyPerChar	certainty to add for each dict char **	Operation: Return TRUE if the results from this segmentation are **		good enough to stop.  Otherwise return FALSE. **	Return: TRUE or FALSE. **	Exceptions: none **	History: Mon Apr 29 14:57:32 1991, DSJ, Created. */  float CertaintyThreshold = NonDictCertainty;  int WordSize;  if (fixpt != NULL)    fixpt->index = -1;  if ((BestChoice == NULL) || (class_string (BestChoice) == NULL))    return (FALSE);  if (StopperDebugLevel >= 1)    cprintf ("\nStopper:  %s (word=%c, case=%c, punct=%c)\n",      class_string (BestChoice),      (valid_word (class_string (BestChoice)) ? 'y' : 'n'),    (case_ok (class_string (BestChoice)) ? 'y' : 'n'),    ((punctuation_ok (class_string (BestChoice)) !=    -1) ? 'y' : 'n'));  if (valid_word (class_string (BestChoice)) &&    case_ok (class_string (BestChoice)) &&  punctuation_ok (class_string (BestChoice)) != -1) {    WordSize = LengthOfShortestAlphaRun (class_string (BestChoice));    WordSize -= SmallWordSize;    if (WordSize < 0)      WordSize = 0;    CertaintyThreshold += WordSize * CertaintyPerChar;  }  else if (stopper_numbers_on && valid_number (class_string (BestChoice))) {    CertaintyThreshold += stopper_numbers_on * CertaintyPerChar;  }  if (StopperDebugLevel >= 1)    cprintf ("Stopper:  Certainty = %4.1f, Threshold = %4.1f\n",      class_certainty (BestChoice), CertaintyThreshold);  if (NoDangerousAmbig (class_string (BestChoice), fixpt)    && class_certainty (BestChoice) > CertaintyThreshold &&    UniformCertainties (Choices, BestChoice))    return (TRUE);  else    return (FALSE);}                                /* AcceptableChoice *//*---------------------------------------------------------------------------*/int AcceptableResult(A_CHOICE *BestChoice, A_CHOICE *RawChoice) {/* **	Parameters: **		BestChoice	best choice for current word **		RawChoice	best raw choice for current word **	Globals: **		NonDictCertainty	certainty for a non-dict word **		SmallWordSize		size of word to be treated as non-word **		CertaintyPerChar	certainty to add for each dict char **		BestChoices		list of all good choices found **		RejectOffset		allowed offset before a word is rejected **	Operation: Return FALSE if the best choice for the current word **		is questionable and should be tried again on the second **		pass or should be flagged to the user. **	Return: TRUE or FALSE. **	Exceptions: none **	History: Thu May  9 14:05:05 1991, DSJ, Created. */  float CertaintyThreshold = NonDictCertainty - RejectOffset;  int WordSize;  if (StopperDebugLevel >= 1)    cprintf ("\nRejecter: %s (word=%c, case=%c, punct=%c, unambig=%c)\n",      class_string (BestChoice),      (valid_word (class_string (BestChoice)) ? 'y' : 'n'),    (case_ok (class_string (BestChoice)) ? 'y' : 'n'),    ((punctuation_ok (class_string (BestChoice)) != -1) ? 'y' : 'n'),    ((rest (BestChoices) != NIL) ? 'n' : 'y'));  if ((BestChoice == NULL) ||    (class_string (BestChoice) == NULL) || CurrentWordAmbig ())    return (FALSE);  if (valid_word (class_string (BestChoice)) &&    case_ok (class_string (BestChoice)) &&  punctuation_ok (class_string (BestChoice)) != -1) {    WordSize = LengthOfShortestAlphaRun (class_string (BestChoice));    WordSize -= SmallWordSize;    if (WordSize < 0)      WordSize = 0;    CertaintyThreshold += WordSize * CertaintyPerChar;  }  if (StopperDebugLevel >= 1)    cprintf ("Rejecter: Certainty = %4.1f, Threshold = %4.1f   ",      class_certainty (BestChoice), CertaintyThreshold);  if (class_certainty (BestChoice) > CertaintyThreshold) {    if (StopperDebugLevel >= 1)      cprintf ("ACCEPTED\n");    return (TRUE);  }  else {    if (StopperDebugLevel >= 1)      cprintf ("REJECTED\n");    return (FALSE);  }}                                /* AcceptableResult *//*---------------------------------------------------------------------------*/int AlternativeChoicesWorseThan(FLOAT32 Threshold) {/* **	Parameters: **		Threshold	minimum adjust factor for alternative choices **	Globals: **		BestChoices	alternative choices for current word **	Operation: This routine returns TRUE if there are no alternative **		choices for the current word OR if all alternatives have **		an adjust factor worse than Threshold. **	Return: TRUE or FALSE. **	Exceptions: none **	History: Mon Jun  3 09:36:31 1991, DSJ, Created. */  LIST Alternatives;  VIABLE_CHOICE Choice;  Alternatives = rest (BestChoices);  iterate(Alternatives) {    Choice = (VIABLE_CHOICE) first (Alternatives);    if (Choice->AdjustFactor <= Threshold)      return (FALSE);  }  return (TRUE);}                                /* AlternativeChoicesWorseThan *//*---------------------------------------------------------------------------*/int CurrentBestChoiceIs(const char *Word) {/* **	Parameters: **		Word	string to compare to current best choice **	Globals: **		BestChoices	set of best choices for current word **	Operation: Returns TRUE if Word is the same as the current best **		choice, FALSE otherwise. **	Return: TRUE or FALSE **	Exceptions: none **	History: Thu May 30 14:44:22 1991, DSJ, Created. */  return (BestChoices != NIL &&    StringSameAs (Word, (VIABLE_CHOICE) first (BestChoices)));}                                /* CurrentBestChoiceIs *//*---------------------------------------------------------------------------*/FLOAT32 CurrentBestChoiceAdjustFactor() {/* **	Parameters: none **	Globals: **		BestChoices	set of best choices for current word **	Operation: Return the adjustment factor for the best choice for **		the current word. **	Return: Adjust factor for current best choice. **	Exceptions: none **	History: Thu May 30 14:48:24 1991, DSJ, Created. */  VIABLE_CHOICE BestChoice;  if (BestChoices == NIL)    return (MAX_FLOAT32);  BestChoice = (VIABLE_CHOICE) first (BestChoices);  return (BestChoice->AdjustFactor);}                                /* CurrentBestChoiceAdjustFactor *//*---------------------------------------------------------------------------*/int CurrentWordAmbig() {/* **	Parameters: none **	Globals: **		BestChoices	set of best choices for current word **	Operation: This routine returns TRUE if there are multiple good **		choices for the current word and FALSE otherwise. **	Return: TRUE or FALSE **	Exceptions: none **	History: Wed May 22 15:38:38 1991, DSJ, Created. */  return (rest (BestChoices) != NIL);}                                /* CurrentWordAmbig *//*---------------------------------------------------------------------------*/void DebugWordChoices() {/* **	Parameters: none **	Globals: **		BestRawChoice **		BestChoices **	Operation: Print the current choices for this word to stdout. **	Return: none **	Exceptions: none **	History: Wed May 15 13:52:08 1991, DSJ, Created. */  LIST Choices;  int i;  char LabelString[80];  if (StopperDebugLevel >= 1 ||    WordToDebug && BestChoices &&  StringSameAs (WordToDebug, (VIABLE_CHOICE) first (BestChoices))) {    if (BestRawChoice)      PrintViableChoice (stdout, "\nBest Raw Choice:   ", BestRawChoice);    i = 1;    Choices = BestChoices;    if (Choices)      cprintf ("\nBest Cooked Choices:\n");    iterate(Choices) {      sprintf (LabelString, "Cooked Choice #%d:  ", i);      PrintViableChoice (stdout, LabelString,        (VIABLE_CHOICE) first (Choices));      i++;    }  }}                                /* DebugWordChoices *//*---------------------------------------------------------------------------*/void FilterWordChoices() {/* **	Parameters: none **	Globals: **		BestChoices	set of choices for current word **	Operation: This routine removes from BestChoices all choices which **		are not within a reasonable range of the best choice. **	Return: none **	Exceptions: none **	History: Wed May 15 13:08:24 1991, DSJ, Created. */  EXPANDED_CHOICE BestChoice;  if (BestChoices == NIL || second (BestChoices) == NIL)    return;  /* compute certainties and class for each chunk in best choice */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -