stopper.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,459 行 · 第 1/4 页
CPP
1,459 行
NewChoice = NULL; Choices = BestChoices; iterate(Choices) { if (ChoiceSameAs (Choice, (VIABLE_CHOICE) first_node (Choices))) { if (class_probability (Choice) < BestRating (Choices)) NewChoice = (VIABLE_CHOICE) first_node (Choices); else return; } } if (NewChoice) { ReplaceDuplicateChoice(NewChoice, Choice, AdjustFactor, Certainties); BestChoices = delete_d (BestChoices, NewChoice, is_same_node); } else { NewChoice = NewViableChoice (Choice, AdjustFactor, Certainties); } BestChoices = s_adjoin (BestChoices, NewChoice, CmpChoiceRatings); if (StopperDebugLevel >= 2) PrintViableChoice (stderr, "New Word Choice: ", NewChoice); if (count (BestChoices) > tessedit_truncate_wordchoice_log) { Choices = (LIST) nth_cell (BestChoices, tessedit_truncate_wordchoice_log); destroy_nodes (rest (Choices), Efree); set_rest(Choices, NIL); }} /* LogNewWordChoice *//*---------------------------------------------------------------------------*/static AMBIG_TABLE *AmbigFor = NULL;int NoDangerousAmbig(const char *Word, const char *Word_lengths, DANGERR *fixpt) {/* ** Parameters: ** Word word to check for dangerous ambiguities ** Word_lengths lengths of unichars in Word ** Globals: none ** Operation: This word checks each letter in word against a list ** of potentially ambiguous characters. If a match is found ** that letter is replaced with its ambiguity and tested in ** the dictionary. If the ambiguous word is found in the ** dictionary, FALSE is returned. Otherwise, the search ** continues for other ambiguities. If no ambiguities that ** match in the dictionary are found, TRUE is returned. ** Return: TRUE if Word contains no dangerous ambiguities. ** Exceptions: none ** History: Mon May 6 16:28:56 1991, DSJ, Created. */ char NewWord[MAX_WERD_SIZE * UNICHAR_LEN + 1]; char *NextNewChar; int bad_index = 0; if (!AmbigFor) AmbigFor = FillAmbigTable (); NextNewChar = NewWord; while (*Word) if (AmbigsFound (NewWord, NextNewChar, Word + *Word_lengths, Word_lengths + 1, AmbigFor[unicharset.unichar_to_id(Word, *Word_lengths)], fixpt)) { if (fixpt != NULL) fixpt->index = bad_index; return (FALSE); } else { strncpy(NextNewChar, Word, *Word_lengths); NextNewChar += *Word_lengths; Word += *Word_lengths; Word_lengths++; bad_index++; } return (TRUE);} /* NoDangerousAmbig */void EndDangerousAmbigs() { if (AmbigFor != NULL) { for (int i = 0; i <= MAX_CLASS_ID; ++i) { destroy_nodes(AmbigFor[i], Efree); } Efree(AmbigFor); AmbigFor = NULL; }}/*---------------------------------------------------------------------------*/void SettupStopperPass1() {/* ** Parameters: none ** Globals: ** RejectOffset offset allowed before word is rejected ** Operation: This routine performs any settup of stopper variables ** that is needed in preparation for the first pass. ** Return: none ** Exceptions: none ** History: Mon Jun 3 12:32:00 1991, DSJ, Created. */ RejectOffset = 0.0;} /* SettupStopperPass1 *//*---------------------------------------------------------------------------*/void SettupStopperPass2() {/* ** Parameters: none ** Globals: ** RejectOffset offset allowed before word is rejected ** Operation: This routine performs any settup of stopper variables ** that is needed in preparation for the second pass. ** Return: none ** Exceptions: none ** History: Mon Jun 3 12:32:00 1991, DSJ, Created. */ RejectOffset = RejectCertaintyOffset;} /* SettupStopperPass2 *//**---------------------------------------------------------------------------- Private Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/void AddNewChunk(VIABLE_CHOICE Choice, int Blob) {/* ** Parameters: ** Choice choice to add a new chunk to ** Blob index of blob being split ** Globals: none ** Operation: This routine increments the chunk count of the character ** in Choice which corresponds to Blob. ** Return: none ** Exceptions: none ** History: Mon May 20 11:43:27 1991, DSJ, Created. */ int i, LastChunk; for (i = 0, LastChunk = 0; i < Choice->Length; i++) { LastChunk += Choice->Blob[i].NumChunks; if (Blob < LastChunk) { (Choice->Blob[i].NumChunks)++; return; } } mem_tidy (1); cprintf ("AddNewChunk failed:Choice->Length=%d, LastChunk=%d, Blob=%d\n", Choice->Length, LastChunk, Blob); assert(FALSE); /* this should never get executed */} /* AddNewChunk *//*---------------------------------------------------------------------------*/int AmbigsFound(char *Word, char *CurrentChar, const char *Tail, const char *Tail_lengths, LIST Ambigs, DANGERR *fixpt) {/* ** Parameters: ** Word word being tested for ambiguities ** CurrentChar position in Word to put ambig replacement ** Tail end of word to place after ambiguity ** Tail_lengths lengths of the unichars in Tail ** Ambigs list of ambiguities to test at this position ** Globals: none ** Operation: For each ambiguity in Ambigs, see if the remainder of ** the test string matches the start of Tail. If it does, ** construct a word consisting of the contents of Word up to, ** but not including, CurrentChar followed by the replacement ** string for the ambiguity followed by the unmatched ** contents of Tail. Then test this word to see if it ** is a dictionary word. If it is return TRUE. If none of ** the ambiguities result in a dictionary word, return FALSE. ** Return: TRUE if the Word is ambiguous at the specified position ** Exceptions: none ** History: Thu May 9 10:10:28 1991, DSJ, Created. */ AMBIG_SPEC *AmbigSpec; char *ambig; char *ambig_lengths; const char *UnmatchedTail; const char *UnmatchedTail_lengths; int Matches; int bad_length; iterate(Ambigs) { AmbigSpec = (AMBIG_SPEC *) first_node (Ambigs); ambig = AmbigSpec->ambig; ambig_lengths = AmbigSpec->lengths; bad_length = 1; UnmatchedTail = Tail; UnmatchedTail_lengths = Tail_lengths; Matches = TRUE; while (*ambig != ' ' && Matches) if (*UnmatchedTail_lengths == *ambig_lengths && strncmp(ambig, UnmatchedTail, *ambig_lengths) == 0) { ambig += *(ambig_lengths++); UnmatchedTail += *(UnmatchedTail_lengths++); bad_length++; } else Matches = FALSE; if (Matches) { ambig += *(ambig_lengths++); /* skip over the space */ /* insert replacement string */ strcpy(CurrentChar, ambig); /* add tail */ strcat(Word, UnmatchedTail); if (valid_word (Word)) { if (StopperDebugLevel >= 1) cprintf ("Stopper: Possible ambiguous word = %s\n", Word); if (fixpt != NULL) { fixpt->good_length = strlen (ambig_lengths); fixpt->bad_length = bad_length; } return (TRUE); } } } return (FALSE);} /* AmbigsFound *//*---------------------------------------------------------------------------*/int ChoiceSameAs(A_CHOICE *Choice, VIABLE_CHOICE ViableChoice) {/* ** Parameters: ** Choice choice to compare to ViableChoice ** ViableChoice viable choice to compare to Choice ** Globals: none ** Operation: This routine compares the corresponding strings of ** Choice and ViableChoice and returns TRUE if they are the ** same, FALSE otherwise. ** Return: TRUE or FALSE. ** Exceptions: none ** History: Fri May 17 08:48:04 1991, DSJ, Created. */ return (StringSameAs (class_string (Choice), class_lengths (Choice), ViableChoice));} /* ChoiceSameAs *//*---------------------------------------------------------------------------*/int CmpChoiceRatings(void *arg1, //VIABLE_CHOICE Choice1, void *arg2) { //VIABLE_CHOICE Choice2)/* ** Parameters: ** Choice1, Choice2 choices to compare ratings for ** Globals: none ** Operation: Return -1 if the rating for Choice1 is less than the ** rating for Choice2, otherwise return (1). ** Return: -1 or 1 ** Exceptions: none ** History: Wed May 15 13:02:37 1991, DSJ, Created. */ float R1, R2; VIABLE_CHOICE Choice1 = (VIABLE_CHOICE) arg1; VIABLE_CHOICE Choice2 = (VIABLE_CHOICE) arg2; R1 = Choice1->Rating; R2 = Choice2->Rating; if (R1 < R2) return (-1); else return (1);} /* CmpChoiceRatings *//*---------------------------------------------------------------------------*/void ExpandChoice(VIABLE_CHOICE Choice, EXPANDED_CHOICE *ExpandedChoice) {/* ** Parameters: ** Choice choice to be expanded ** ExpandedChoice place to put resulting expanded choice ** Globals: none ** Operation: This routine expands Choice and places the results ** in ExpandedChoice. The primary function of expansion ** is to create an two arrays, one which holds the corresponding ** certainty for each chunk in Choice, and one which holds ** the class for each chunk. ** Return: none (results are placed in ExpandedChoice) ** Exceptions: none ** History: Fri May 31 15:21:57 1991, DSJ, Created. */ int i, j, Chunk; ExpandedChoice->Choice = Choice; for (i = 0, Chunk = 0; i < Choice->Length; i++) for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) { ExpandedChoice->ChunkCertainty[Chunk] = Choice->Blob[i].Certainty; ExpandedChoice->ChunkClass[Chunk] = Choice->Blob[i].Class; }} /* ExpandChoice *//*---------------------------------------------------------------------------*/AMBIG_TABLE *FillAmbigTable() {/* ** Parameters: none ** Globals: ** DangerousAmbigs filename of dangerous ambig info ** Operation: This routine allocates a new ambiguity table and fills ** it in from the file specified by DangerousAmbigs. An ** ambiguity table is an array of lists. The array is indexed ** by a class id. Therefore, each entry in the table provides ** a list of potential ambiguities which can start with the ** corresponding character. Each potential ambiguity is ** described by a string which contains the remainder of the ** test string followed by a space followed by the replacement ** string. For example the ambiguity "rn -> m", would be ** located in the table at index 'r'. The string corresponding ** to this ambiguity would be "n m". ** Return: Pointer to new ambiguity table. ** Exceptions: none ** History: Thu May 9 09:20:57 1991, DSJ, Created. */ FILE *AmbigFile; AMBIG_TABLE *NewTable; int i; int AmbigPartSize; char buffer[256 * UNICHAR_LEN]; char TestString[256 * UNICHAR_LEN]; char TestString_lengths[256]; char ReplacementString[256 * UNICHAR_LEN]; char ReplacementString_lengths[256]; STRING name; char lengths[2]; AMBIG_SPEC *AmbigSpec; UNICHAR_ID unichar_id; lengths[1] = 0; name = language_data_path_prefix; name += DangerousAmbigs; AmbigFile = Efopen (name.string(), "r"); NewTable = (AMBIG_TABLE *) Emalloc (sizeof (LIST) * (MAX_CLASS_ID + 1)); for (i = 0; i <= MAX_CLASS_ID; i++) NewTable[i] = NIL; while (fscanf (AmbigFile, "%d", &AmbigPartSize) == 1) { TestString[0] = '\0'; TestString_lengths[0] = 0; ReplacementString[0] = '\0'; ReplacementString_lengths[0] = 0; bool illegal_char = false; for (i = 0; i < AmbigPartSize; ++i) { fscanf (AmbigFile, "%s", buffer); strcat(TestString, buffer); lengths[0] = strlen(buffer);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?