📄 stopper.cpp
字号:
ExpandChoice ((VIABLE_CHOICE_STRUCT *) first (BestChoices), &BestChoice); set_rest (BestChoices, delete_d (rest (BestChoices), &BestChoice, FreeBadChoice));} /* FilterWordChoices *//*---------------------------------------------------------------------------*/voidFindClassifierErrors (FLOAT32 MinRating,FLOAT32 MaxRating,FLOAT32 RatingMargin, FLOAT32 Thresholds[]) {/* ** Parameters: ** MinRating limits how tight to make a template ** MaxRating limits how loose to make a template ** RatingMargin amount of margin to put in template ** Thresholds[] place to put error thresholds ** Globals: none ** Operation: This routine compares the best choice for the current ** word to the best raw choice to determine which characters ** were classified incorrectly by the classifier. It then ** places a separate threshold into Thresholds for each ** character in the word. If the classifier was correct, ** MaxRating is placed into Thresholds. If the ** classifier was incorrect, the avg. match rating (error ** percentage) of the classifier's incorrect choice minus ** some margin is ** placed into thresholds. This can then be used by the ** caller to try to create a new template for the desired ** class that will classify the character with a rating better ** than the threshold value. The match rating placed into ** Thresholds is never allowed to be below MinRating in order ** to prevent trying to make overly tight templates. ** Return: none (results are placed in Thresholds) ** Exceptions: none ** History: Fri May 31 16:02:57 1991, DSJ, Created. */ EXPANDED_CHOICE BestRaw; VIABLE_CHOICE Choice; int i, j, Chunk; FLOAT32 AvgRating; int NumErrorChunks; assert (BestChoices != NIL); assert (BestRawChoice != NULL); ExpandChoice(BestRawChoice, &BestRaw); Choice = (VIABLE_CHOICE) first (BestChoices); for (i = 0, Chunk = 0; i < Choice->Length; i++, Thresholds++) { AvgRating = 0.0; NumErrorChunks = 0; for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) if (Choice->Blob[i].Class != BestRaw.ChunkClass[Chunk]) { AvgRating += BestRaw.ChunkCertainty[Chunk]; NumErrorChunks++; } if (NumErrorChunks > 0) { AvgRating /= NumErrorChunks; *Thresholds = (AvgRating / -CertaintyScale) * (1.0 - RatingMargin); } else *Thresholds = MaxRating; if (*Thresholds > MaxRating) *Thresholds = MaxRating; if (*Thresholds < MinRating) *Thresholds = MinRating; }} /* FindClassifierErrors *//*---------------------------------------------------------------------------*/void InitStopperVars() {/* ** Parameters: none ** Globals: none ** Operation: Initializes the control variables used by the stopper. ** Return: none ** Exceptions: none ** History: Thu May 9 10:06:04 1991, DSJ, Created. */ VALUE dummy; string_variable (DangerousAmbigs, "DangerousAmbigs", DANGEROUS_AMBIGS); string_variable (WordToDebug, "WordToDebug", ""); MakeNonDictCertainty(); MakeRejectCertaintyOffset(); MakeSmallWordSize(); MakeCertaintyPerChar(); MakeCertaintyVariation(); MakeStopperDebugLevel(); MakeAmbigThresholdGain(); MakeAmbigThresholdOffset();} /* InitStopperVars *//*---------------------------------------------------------------------------*/void InitChoiceAccum() {/* ** Parameters: none ** Globals: none ** Operation: This routine initializes the data structures used to ** keep track the good word choices found for a word. ** Return: none ** Exceptions: none ** History: Fri May 17 07:59:00 1991, DSJ, Created. */ BLOB_WIDTH *BlobWidth, *End; if (BestRawChoice) memfree(BestRawChoice); if (BestChoices) destroy_nodes(BestChoices, memfree); BestRawChoice = NULL; BestChoices = NIL; EnableChoiceAccum(); for (BlobWidth = CurrentSegmentation, End = CurrentSegmentation + MAX_NUM_CHUNKS; BlobWidth < End; *BlobWidth++ = 1);} /* InitChoiceAccum *//*---------------------------------------------------------------------------*/voidLogNewRawChoice (A_CHOICE * Choice, FLOAT32 AdjustFactor, float Certainties[]) {/* ** Parameters: ** Choice new raw choice for current word ** AdjustFactor adjustment factor which was applied to choice ** Certainties certainties for each char in new choice ** Globals: ** BestRawChoice best raw choice so far for current word ** Operation: This routine compares Choice to the best raw (non-dict) ** choice so far and replaces it if the new choice is better. ** Return: none ** Exceptions: none ** History: Wed May 15 09:57:19 1991, DSJ, Created. */ if (!KeepWordChoices) return; if (!BestRawChoice) BestRawChoice = NewViableChoice (Choice, AdjustFactor, Certainties); else if (class_probability (Choice) < BestRawChoice->Rating) { if (ChoiceSameAs (Choice, BestRawChoice)) ReplaceDuplicateChoice(BestRawChoice, Choice, AdjustFactor, Certainties); else { memfree(BestRawChoice); BestRawChoice = NewViableChoice (Choice, AdjustFactor, Certainties); } }} /* LogNewRawChoice *//*---------------------------------------------------------------------------*/void LogNewSegmentation(PIECES_STATE BlobWidth) {/* ** Parameters: ** BlobWidth[] number of chunks in each blob in segmentation ** Globals: ** CurrentSegmentation blob widths for current segmentation ** Operation: This routine updates the blob widths in CurrentSegmentation ** to be the same as provided in BlobWidth. ** Return: none ** Exceptions: none ** History: Mon May 20 11:52:26 1991, DSJ, Created. */ BLOB_WIDTH *Segmentation; for (Segmentation = CurrentSegmentation; *BlobWidth != 0; BlobWidth++, Segmentation++) *Segmentation = *BlobWidth; *Segmentation = 0;} /* LogNewSegmentation *//*---------------------------------------------------------------------------*/void LogNewSplit(int Blob) {/* ** Parameters: ** Blob index of blob that was split ** Globals: ** BestRawChoice current best raw choice ** BestChoices list of best choices found so far ** Operation: This routine adds 1 chunk to the specified blob for each ** choice in BestChoices and for the BestRawChoice. ** Return: none ** Exceptions: none ** History: Mon May 20 11:38:56 1991, DSJ, Created. */ LIST Choices; if (BestRawChoice) { AddNewChunk(BestRawChoice, Blob); } Choices = BestChoices; iterate(Choices) { AddNewChunk ((VIABLE_CHOICE) first (Choices), Blob); }} /* LogNewSplit *//*---------------------------------------------------------------------------*/voidLogNewWordChoice (A_CHOICE * Choice,FLOAT32 AdjustFactor, float Certainties[]) {/* ** Parameters: ** Choice new choice for current word ** AdjustFactor adjustment factor which was applied to choice ** Certainties certainties for each char in new choice ** Globals: ** BestChoices best choices so far for current word ** Operation: This routine adds Choice to BestChoices if the ** adjusted certainty for Choice is within a reasonable range ** of the best choice in BestChoices. The BestChoices ** list is kept in sorted order by rating. Duplicates are ** removed. ** Return: none ** Exceptions: none ** History: Wed May 15 09:57:19 1991, DSJ, Created. */ VIABLE_CHOICE NewChoice; LIST Choices; FLOAT32 Threshold; if (!KeepWordChoices) return; /* throw out obviously bad choices to save some work */ if (BestChoices != NIL) { Threshold = AmbigThreshold (BestFactor (BestChoices), AdjustFactor); if (Threshold > -AmbigThresholdOffset) Threshold = -AmbigThresholdOffset; if (class_certainty (Choice) - BestCertainty (BestChoices) < Threshold) return; } /* see if a choice with the same text string has already been found */ NewChoice = NULL; Choices = BestChoices; iterate(Choices) { if (ChoiceSameAs (Choice, (VIABLE_CHOICE) first (Choices))) if (class_probability (Choice) < BestRating (Choices)) NewChoice = (VIABLE_CHOICE) first (Choices); else return; } if (NewChoice) { ReplaceDuplicateChoice(NewChoice, Choice, AdjustFactor, Certainties); BestChoices = delete_d (BestChoices, NewChoice, is_same_node); } else { NewChoice = NewViableChoice (Choice, AdjustFactor, Certainties); } BestChoices = s_adjoin (BestChoices, NewChoice, CmpChoiceRatings); if (StopperDebugLevel >= 2) PrintViableChoice (stdout, "New Word Choice: ", NewChoice); if (count (BestChoices) > tessedit_truncate_wordchoice_log) { Choices = (LIST) nth_cell (BestChoices, tessedit_truncate_wordchoice_log); destroy_nodes (rest (Choices), Efree); set_rest(Choices, NIL); }} /* LogNewWordChoice *//*---------------------------------------------------------------------------*/static AMBIG_TABLE *AmbigFor = NULL;int NoDangerousAmbig(const char *Word, DANGERR *fixpt) {/* ** Parameters: ** Word word to check for dangerous ambiguities ** Globals: none ** Operation: This word checks each letter in word against a list ** of potentially ambiguous characters. If a match is found ** that letter is replaced with its ambiguity and tested in ** the dictionary. If the ambiguous word is found in the ** dictionary, FALSE is returned. Otherwise, the search ** continues for other ambiguities. If no ambiguities that ** match in the dictionary are found, TRUE is returned. ** Return: TRUE if Word contains no dangerous ambiguities. ** Exceptions: none ** History: Mon May 6 16:28:56 1991, DSJ, Created. */ char NewWord[MAX_WERD_SIZE]; char *NextNewChar; int bad_index = 0; if (!AmbigFor) AmbigFor = FillAmbigTable (); NextNewChar = NewWord; while (*Word) if (AmbigsFound (NewWord, NextNewChar, Word + 1, AmbigFor[*Word], fixpt)) { if (fixpt != NULL) fixpt->index = bad_index; return (FALSE); } else { *NextNewChar++ = *Word++; bad_index++; } return (TRUE);} /* NoDangerousAmbig */void EndDangerousAmbigs() { if (AmbigFor != NULL) { for (int i = 0; i <= MAX_CLASS_ID; ++i) { destroy_nodes(AmbigFor[i], Efree); } Efree(AmbigFor); AmbigFor = NULL; }}/*---------------------------------------------------------------------------*/void SettupStopperPass1() {/* ** Parameters: none ** Globals: ** RejectOffset offset allowed before word is rejected ** Operation: This routine performs any settup of stopper variables ** that is needed in preparation for the first pass. ** Return: none ** Exceptions: none ** History: Mon Jun 3 12:32:00 1991, DSJ, Created. */ RejectOffset = 0.0;} /* SettupStopperPass1 *//*---------------------------------------------------------------------------*/void SettupStopperPass2() {/* ** Parameters: none ** Globals: ** RejectOffset offset allowed before word is rejected ** Operation: This routine performs any settup of stopper variables ** that is needed in preparation for the second pass. ** Return: none ** Exceptions: none ** History: Mon Jun 3 12:32:00 1991, DSJ, Created. */ RejectOffset = RejectCertaintyOffset;} /* SettupStopperPass2 *//**---------------------------------------------------------------------------- Private Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/void AddNewChunk(VIABLE_CHOICE Choice, int Blob) {/* ** Parameters: ** Choice choice to add a new chunk to ** Blob index of blob being split ** Globals: none ** Operation: This routine increments the chunk count of the character ** in Choice which corresponds to Blob. ** Return: none ** Exceptions: none ** History: Mon May 20 11:43:27 1991, DSJ, Created. */ int i, LastChunk; for (i = 0, LastChunk = 0; i < Choice->Length; i++) { LastChunk += Choice->Blob[i].NumChunks; if (Blob < LastChunk) { (Choice->Blob[i].NumChunks)++; return; } } mem_tidy (1); cprintf ("AddNewChunk failed:Choice->Length=%d, LastChunk=%d, Blob=%d\n", Choice->Length, LastChunk, Blob); assert(FALSE); /* this should never get executed */} /* AddNewChunk *//*---------------------------------------------------------------------------*/int AmbigsFound(char *Word, char *CurrentChar, const char *Tail, LIST Ambigs, DANGERR *fixpt) {/* ** Parameters: ** Word word being tested for ambiguities ** CurrentChar position in Word to put ambig replacement ** Tail end of word to place after ambiguity ** Ambigs list of ambiguities to test at this position ** Globals: none ** Operation: For each ambiguity in Ambigs, see if the remainder of ** the test string matches the start of Tail. If it does, ** construct a word consisting of the contents of Word up to, ** but not including, CurrentChar followed by the replacement ** string for the ambiguity followed by the unmatched ** contents of Tail. Then test this word to see if it ** is a dictionary word. If it is return TRUE. If none of ** the ambiguities result in a dictionary word, return FALSE. ** Return: TRUE if the Word is ambiguous at the specified position ** Exceptions: none ** History: Thu May 9 10:10:28 1991, DSJ, Created. */ char *AmbigSpec; const char *UnmatchedTail; int Matches; int bad_length; iterate(Ambigs) { AmbigSpec = (char *) first (Ambigs); bad_length = 1; UnmatchedTail = Tail; Matches = TRUE; while (*AmbigSpec != ' ' && Matches) if (*AmbigSpec == *UnmatchedTail) { AmbigSpec++; UnmatchedTail++; bad_length++; } else Matches = FALSE; if (Matches) { AmbigSpec++; /* skip over the space */ /* insert replacement string */ strcpy(CurrentChar, AmbigSpec); /* add tail */ strcat(Word, UnmatchedTail); if (valid_word (Word)) { if (StopperDebugLevel >= 1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -