stopper.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,459 行 · 第 1/4 页
CPP
1,459 行
** Word string to compare to current best choice ** Word_lengths lengths of unichars in Word ** Globals: ** BestChoices set of best choices for current word ** Operation: Returns TRUE if Word is the same as the current best ** choice, FALSE otherwise. ** Return: TRUE or FALSE ** Exceptions: none ** History: Thu May 30 14:44:22 1991, DSJ, Created. */ return (BestChoices != NIL && StringSameAs (Word, Word_lengths, (VIABLE_CHOICE) first_node (BestChoices)));} /* CurrentBestChoiceIs *//*---------------------------------------------------------------------------*/FLOAT32 CurrentBestChoiceAdjustFactor() {/* ** Parameters: none ** Globals: ** BestChoices set of best choices for current word ** Operation: Return the adjustment factor for the best choice for ** the current word. ** Return: Adjust factor for current best choice. ** Exceptions: none ** History: Thu May 30 14:48:24 1991, DSJ, Created. */ VIABLE_CHOICE BestChoice; if (BestChoices == NIL) return (MAX_FLOAT32); BestChoice = (VIABLE_CHOICE) first_node (BestChoices); return (BestChoice->AdjustFactor);} /* CurrentBestChoiceAdjustFactor *//*---------------------------------------------------------------------------*/int CurrentWordAmbig() {/* ** Parameters: none ** Globals: ** BestChoices set of best choices for current word ** Operation: This routine returns TRUE if there are multiple good ** choices for the current word and FALSE otherwise. ** Return: TRUE or FALSE ** Exceptions: none ** History: Wed May 22 15:38:38 1991, DSJ, Created. */ return (rest (BestChoices) != NIL);} /* CurrentWordAmbig *//*---------------------------------------------------------------------------*/void DebugWordChoices() {/* ** Parameters: none ** Globals: ** BestRawChoice ** BestChoices ** Operation: Print the current choices for this word to stdout. ** Return: none ** Exceptions: none ** History: Wed May 15 13:52:08 1991, DSJ, Created. */ LIST Choices; int i; char LabelString[80]; if (StopperDebugLevel >= 1 || (WordToDebug && BestChoices && StringSameAs (WordToDebug, WordToDebug_lengths, (VIABLE_CHOICE) first_node (BestChoices)))) { if (BestRawChoice) PrintViableChoice (stderr, "\nBest Raw Choice: ", BestRawChoice); i = 1; Choices = BestChoices; if (Choices) cprintf ("\nBest Cooked Choices:\n"); iterate(Choices) { sprintf (LabelString, "Cooked Choice #%d: ", i); PrintViableChoice (stderr, LabelString, (VIABLE_CHOICE) first_node (Choices)); i++; } }} /* DebugWordChoices *//*---------------------------------------------------------------------------*/void FilterWordChoices() {/* ** Parameters: none ** Globals: ** BestChoices set of choices for current word ** Operation: This routine removes from BestChoices all choices which ** are not within a reasonable range of the best choice. ** Return: none ** Exceptions: none ** History: Wed May 15 13:08:24 1991, DSJ, Created. */ EXPANDED_CHOICE BestChoice; if (BestChoices == NIL || second_node (BestChoices) == NIL) return; /* compute certainties and class for each chunk in best choice */ ExpandChoice ((VIABLE_CHOICE_STRUCT *) first_node (BestChoices), &BestChoice); set_rest (BestChoices, delete_d (rest (BestChoices), &BestChoice, FreeBadChoice));} /* FilterWordChoices *//*---------------------------------------------------------------------------*/voidFindClassifierErrors (FLOAT32 MinRating,FLOAT32 MaxRating,FLOAT32 RatingMargin, FLOAT32 Thresholds[]) {/* ** Parameters: ** MinRating limits how tight to make a template ** MaxRating limits how loose to make a template ** RatingMargin amount of margin to put in template ** Thresholds[] place to put error thresholds ** Globals: none ** Operation: This routine compares the best choice for the current ** word to the best raw choice to determine which characters ** were classified incorrectly by the classifier. It then ** places a separate threshold into Thresholds for each ** character in the word. If the classifier was correct, ** MaxRating is placed into Thresholds. If the ** classifier was incorrect, the avg. match rating (error ** percentage) of the classifier's incorrect choice minus ** some margin is ** placed into thresholds. This can then be used by the ** caller to try to create a new template for the desired ** class that will classify the character with a rating better ** than the threshold value. The match rating placed into ** Thresholds is never allowed to be below MinRating in order ** to prevent trying to make overly tight templates. ** Return: none (results are placed in Thresholds) ** Exceptions: none ** History: Fri May 31 16:02:57 1991, DSJ, Created. */ EXPANDED_CHOICE BestRaw; VIABLE_CHOICE Choice; int i, j, Chunk; FLOAT32 AvgRating; int NumErrorChunks; assert (BestChoices != NIL); assert (BestRawChoice != NULL); ExpandChoice(BestRawChoice, &BestRaw); Choice = (VIABLE_CHOICE) first_node (BestChoices); for (i = 0, Chunk = 0; i < Choice->Length; i++, Thresholds++) { AvgRating = 0.0; NumErrorChunks = 0; for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) if (Choice->Blob[i].Class != BestRaw.ChunkClass[Chunk]) { AvgRating += BestRaw.ChunkCertainty[Chunk]; NumErrorChunks++; } if (NumErrorChunks > 0) { AvgRating /= NumErrorChunks; *Thresholds = (AvgRating / -CertaintyScale) * (1.0 - RatingMargin); } else *Thresholds = MaxRating; if (*Thresholds > MaxRating) *Thresholds = MaxRating; if (*Thresholds < MinRating) *Thresholds = MinRating; }} /* FindClassifierErrors *//*---------------------------------------------------------------------------*/void InitStopperVars() {/* ** Parameters: none ** Globals: none ** Operation: Initializes the control variables used by the stopper. ** Return: none ** Exceptions: none ** History: Thu May 9 10:06:04 1991, DSJ, Created. */ VALUE dummy; string_variable (DangerousAmbigs, "DangerousAmbigs", DANGEROUS_AMBIGS); string_variable (WordToDebug, "WordToDebug", ""); string_variable (WordToDebug_lengths, "WordToDebug_lengths", ""); MakeNonDictCertainty(); MakeRejectCertaintyOffset(); MakeSmallWordSize(); MakeCertaintyPerChar(); MakeCertaintyVariation(); MakeStopperDebugLevel(); MakeAmbigThresholdGain(); MakeAmbigThresholdOffset();} /* InitStopperVars *//*---------------------------------------------------------------------------*/void InitChoiceAccum() {/* ** Parameters: none ** Globals: none ** Operation: This routine initializes the data structures used to ** keep track the good word choices found for a word. ** Return: none ** Exceptions: none ** History: Fri May 17 07:59:00 1991, DSJ, Created. */ BLOB_WIDTH *BlobWidth, *End; if (BestRawChoice) memfree(BestRawChoice); if (BestChoices) destroy_nodes(BestChoices, memfree); BestRawChoice = NULL; BestChoices = NIL; EnableChoiceAccum(); for (BlobWidth = CurrentSegmentation, End = CurrentSegmentation + MAX_NUM_CHUNKS; BlobWidth < End; *BlobWidth++ = 1);} /* InitChoiceAccum *//*---------------------------------------------------------------------------*/voidLogNewRawChoice (A_CHOICE * Choice, FLOAT32 AdjustFactor, float Certainties[]) {/* ** Parameters: ** Choice new raw choice for current word ** AdjustFactor adjustment factor which was applied to choice ** Certainties certainties for each char in new choice ** Globals: ** BestRawChoice best raw choice so far for current word ** Operation: This routine compares Choice to the best raw (non-dict) ** choice so far and replaces it if the new choice is better. ** Return: none ** Exceptions: none ** History: Wed May 15 09:57:19 1991, DSJ, Created. */ if (!KeepWordChoices) return; if (!BestRawChoice) BestRawChoice = NewViableChoice (Choice, AdjustFactor, Certainties); else if (class_probability (Choice) < BestRawChoice->Rating) { if (ChoiceSameAs (Choice, BestRawChoice)) ReplaceDuplicateChoice(BestRawChoice, Choice, AdjustFactor, Certainties); else { memfree(BestRawChoice); BestRawChoice = NewViableChoice (Choice, AdjustFactor, Certainties); } }} /* LogNewRawChoice *//*---------------------------------------------------------------------------*/void LogNewSegmentation(PIECES_STATE BlobWidth) {/* ** Parameters: ** BlobWidth[] number of chunks in each blob in segmentation ** Globals: ** CurrentSegmentation blob widths for current segmentation ** Operation: This routine updates the blob widths in CurrentSegmentation ** to be the same as provided in BlobWidth. ** Return: none ** Exceptions: none ** History: Mon May 20 11:52:26 1991, DSJ, Created. */ BLOB_WIDTH *Segmentation; for (Segmentation = CurrentSegmentation; *BlobWidth != 0; BlobWidth++, Segmentation++) *Segmentation = *BlobWidth; *Segmentation = 0;} /* LogNewSegmentation *//*---------------------------------------------------------------------------*/void LogNewSplit(int Blob) {/* ** Parameters: ** Blob index of blob that was split ** Globals: ** BestRawChoice current best raw choice ** BestChoices list of best choices found so far ** Operation: This routine adds 1 chunk to the specified blob for each ** choice in BestChoices and for the BestRawChoice. ** Return: none ** Exceptions: none ** History: Mon May 20 11:38:56 1991, DSJ, Created. */ LIST Choices; if (BestRawChoice) { AddNewChunk(BestRawChoice, Blob); } Choices = BestChoices; iterate(Choices) { AddNewChunk ((VIABLE_CHOICE) first_node (Choices), Blob); }} /* LogNewSplit *//*---------------------------------------------------------------------------*/voidLogNewWordChoice (A_CHOICE * Choice,FLOAT32 AdjustFactor, float Certainties[]) {/* ** Parameters: ** Choice new choice for current word ** AdjustFactor adjustment factor which was applied to choice ** Certainties certainties for each char in new choice ** Globals: ** BestChoices best choices so far for current word ** Operation: This routine adds Choice to BestChoices if the ** adjusted certainty for Choice is within a reasonable range ** of the best choice in BestChoices. The BestChoices ** list is kept in sorted order by rating. Duplicates are ** removed. ** Return: none ** Exceptions: none ** History: Wed May 15 09:57:19 1991, DSJ, Created. */ VIABLE_CHOICE NewChoice; LIST Choices; FLOAT32 Threshold; if (!KeepWordChoices) return; /* throw out obviously bad choices to save some work */ if (BestChoices != NIL) { Threshold = AmbigThreshold (BestFactor (BestChoices), AdjustFactor); if (Threshold > -AmbigThresholdOffset) Threshold = -AmbigThresholdOffset; if (class_certainty (Choice) - BestCertainty (BestChoices) < Threshold) return; } /* see if a choice with the same text string has already been found */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?