📄 stopper.cpp
字号:
cprintf ("Stopper: Possible ambiguous word = %s\n", Word); if (fixpt != NULL) { fixpt->good_length = strlen (AmbigSpec); fixpt->bad_length = bad_length; } return (TRUE); } } } return (FALSE);} /* AmbigsFound *//*---------------------------------------------------------------------------*/int ChoiceSameAs(A_CHOICE *Choice, VIABLE_CHOICE ViableChoice) {/* ** Parameters: ** Choice choice to compare to ViableChoice ** ViableChoice viable choice to compare to Choice ** Globals: none ** Operation: This routine compares the corresponding strings of ** Choice and ViableChoice and returns TRUE if they are the ** same, FALSE otherwise. ** Return: TRUE or FALSE. ** Exceptions: none ** History: Fri May 17 08:48:04 1991, DSJ, Created. */ return (StringSameAs (class_string (Choice), ViableChoice));} /* ChoiceSameAs *//*---------------------------------------------------------------------------*/int CmpChoiceRatings(void *arg1, //VIABLE_CHOICE Choice1, void *arg2) { //VIABLE_CHOICE Choice2)/* ** Parameters: ** Choice1, Choice2 choices to compare ratings for ** Globals: none ** Operation: Return -1 if the rating for Choice1 is less than the ** rating for Choice2, otherwise return (1). ** Return: -1 or 1 ** Exceptions: none ** History: Wed May 15 13:02:37 1991, DSJ, Created. */ float R1, R2; VIABLE_CHOICE Choice1 = (VIABLE_CHOICE) arg1; VIABLE_CHOICE Choice2 = (VIABLE_CHOICE) arg2; R1 = Choice1->Rating; R2 = Choice2->Rating; if (R1 < R2) return (-1); else return (1);} /* CmpChoiceRatings *//*---------------------------------------------------------------------------*/void ExpandChoice(VIABLE_CHOICE Choice, EXPANDED_CHOICE *ExpandedChoice) {/* ** Parameters: ** Choice choice to be expanded ** ExpandedChoice place to put resulting expanded choice ** Globals: none ** Operation: This routine expands Choice and places the results ** in ExpandedChoice. The primary function of expansion ** is to create an two arrays, one which holds the corresponding ** certainty for each chunk in Choice, and one which holds ** the class for each chunk. ** Return: none (results are placed in ExpandedChoice) ** Exceptions: none ** History: Fri May 31 15:21:57 1991, DSJ, Created. */ int i, j, Chunk; ExpandedChoice->Choice = Choice; for (i = 0, Chunk = 0; i < Choice->Length; i++) for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) { ExpandedChoice->ChunkCertainty[Chunk] = Choice->Blob[i].Certainty; ExpandedChoice->ChunkClass[Chunk] = Choice->Blob[i].Class; }} /* ExpandChoice *//*---------------------------------------------------------------------------*/AMBIG_TABLE *FillAmbigTable() {/* ** Parameters: none ** Globals: ** DangerousAmbigs filename of dangerous ambig info ** Operation: This routine allocates a new ambiguity table and fills ** it in from the file specified by DangerousAmbigs. An ** ambiguity table is an array of lists. The array is indexed ** by a class id. Therefore, each entry in the table provides ** a list of potential ambiguities which can start with the ** corresponding character. Each potential ambiguity is ** described by a string which contains the remainder of the ** test string followed by a space followed by the replacement ** string. For example the ambiguity "rn -> m", would be ** located in the table at index 'r'. The string corresponding ** to this ambiguity would be "n m". ** Return: Pointer to new ambiguity table. ** Exceptions: none ** History: Thu May 9 09:20:57 1991, DSJ, Created. */ FILE *AmbigFile; AMBIG_TABLE *NewTable; int i; char TestString[256]; char ReplacementString[256]; char name[1024]; char *AmbigSpec; int AmbigSize; strcpy(name, demodir); strcat(name, DangerousAmbigs); AmbigFile = Efopen (name, "r"); NewTable = (AMBIG_TABLE *) Emalloc (sizeof (LIST) * (MAX_CLASS_ID + 1)); for (i = 0; i <= MAX_CLASS_ID; i++) NewTable[i] = NIL; while (fscanf (AmbigFile, "%s", TestString) == 1 && fscanf (AmbigFile, "%s", ReplacementString) == 1) { if (strlen (TestString) > MAX_AMBIG_SIZE || strlen (ReplacementString) > MAX_AMBIG_SIZE) DoError (0, "Illegal ambiguity specification!"); AmbigSize = strlen (TestString) + strlen (ReplacementString) + 1; AmbigSpec = (char *) Emalloc (sizeof (char) * AmbigSize); strcpy (AmbigSpec, &(TestString[1])); strcat (AmbigSpec, " "); strcat(AmbigSpec, ReplacementString); NewTable[TestString[0]] = push_last (NewTable[TestString[0]], AmbigSpec); } fclose(AmbigFile); return (NewTable);} /* FillAmbigTable *//*---------------------------------------------------------------------------*/int FreeBadChoice(void *item1, //VIABLE_CHOICE Choice, void *item2) { //EXPANDED_CHOICE *BestChoice)/* ** Parameters: ** Choice choice to be tested ** BestChoice best choice found ** Globals: ** AmbigThresholdGain ** AmbigThresholdOffset ** Operation: If the certainty of any chunk in Choice is not ambiguous ** with the corresponding chunk in the best choice, free ** Choice and return TRUE. Otherwise, return FALSE. ** Return: TRUE or FALSE. ** Exceptions: none ** History: Wed May 15 13:20:26 1991, DSJ, Created. */ int i, j, Chunk; FLOAT32 Threshold; VIABLE_CHOICE Choice; EXPANDED_CHOICE *BestChoice; Choice = (VIABLE_CHOICE) item1; BestChoice = (EXPANDED_CHOICE *) item2; Threshold = AmbigThreshold (BestChoice->Choice->AdjustFactor, Choice->AdjustFactor); for (i = 0, Chunk = 0; i < Choice->Length; i++) for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) if (Choice->Blob[i].Class != BestChoice->ChunkClass[Chunk] && Choice->Blob[i].Certainty - BestChoice->ChunkCertainty[Chunk] < Threshold) { memfree(Choice); return (TRUE); } return (FALSE);} /* FreeBadChoice *//*---------------------------------------------------------------------------*/int LengthOfShortestAlphaRun(register char *Word) {/* ** Parameters: ** Word word to be tested ** Globals: none ** Operation: Return the length of the shortest alpha run in Word. ** Return: Return the length of the shortest alpha run in Word. ** Exceptions: none ** History: Tue May 14 07:50:45 1991, DSJ, Created. */ register int Shortest = MAXINT; register int Length; for (; *Word; Word++) if (isalpha (*Word)) { for (Length = 1, Word++; isalpha (*Word); Word++, Length++); if (Length < Shortest) Shortest = Length; if (*Word == 0) break; } if (Shortest == MAXINT) Shortest = 0; return (Shortest);} /* LengthOfShortestAlphaRun *//*---------------------------------------------------------------------------*/VIABLE_CHOICENewViableChoice (A_CHOICE * Choice, FLOAT32 AdjustFactor, float Certainties[]) {/* ** Parameters: ** Choice choice to be converted to a viable choice ** AdjustFactor factor used to adjust ratings for Choice ** Certainties certainty for each character in Choice ** Globals: ** CurrentSegmentation segmentation corresponding to Choice ** Operation: Allocate a new viable choice data structure, copy ** Choice, Certainties, and CurrentSegmentation into it, ** and return a pointer to it. ** Return: Ptr to new viable choice. ** Exceptions: none ** History: Thu May 16 15:28:29 1991, DSJ, Created. */ VIABLE_CHOICE NewChoice; int Length; char *Word; CHAR_CHOICE *NewChar; BLOB_WIDTH *BlobWidth; Length = strlen (class_string (Choice)); assert (Length <= MAX_NUM_CHUNKS && Length > 0); NewChoice = (VIABLE_CHOICE) Emalloc (sizeof (VIABLE_CHOICE_STRUCT) + (Length - 1) * sizeof (CHAR_CHOICE)); NewChoice->Rating = class_probability (Choice); NewChoice->Certainty = class_certainty (Choice); NewChoice->AdjustFactor = AdjustFactor; NewChoice->Length = Length; for (Word = class_string (Choice), NewChar = &(NewChoice->Blob[0]), BlobWidth = CurrentSegmentation; *Word; Word++, NewChar++, Certainties++, BlobWidth++) { NewChar->Class = *Word; NewChar->NumChunks = *BlobWidth; NewChar->Certainty = *Certainties; } return (NewChoice);} /* NewViableChoice *//*---------------------------------------------------------------------------*/void PrintViableChoice(FILE *File, const char *Label, VIABLE_CHOICE Choice) {/* ** Parameters: ** File open text file to print Choice to ** Label text label to be printed with Choice ** Choice choice to be printed ** Globals: none ** Operation: This routine dumps a text representation of the ** specified Choice to File. ** Return: none ** Exceptions: none ** History: Mon May 20 11:16:44 1991, DSJ, Created. */ int i, j; fprintf (File, "%s", Label); fprintf (File, "(R=%5.1f, C=%4.1f, F=%4.2f) ", Choice->Rating, Choice->Certainty, Choice->AdjustFactor); for (i = 0; i < Choice->Length; i++) fprintf (File, "%c", Choice->Blob[i].Class); fprintf (File, "\n"); for (i = 0; i < Choice->Length; i++) { fprintf (File, " %c", Choice->Blob[i].Class); for (j = 0; j < Choice->Blob[i].NumChunks - 1; j++) fprintf (File, " "); } fprintf (File, "\n"); for (i = 0; i < Choice->Length; i++) { for (j = 0; j < Choice->Blob[i].NumChunks; j++) fprintf (File, "%3d", (int) (Choice->Blob[i].Certainty * -10.0)); } fprintf (File, "\n");} /* PrintViableChoice *//*---------------------------------------------------------------------------*/voidReplaceDuplicateChoice (VIABLE_CHOICE OldChoice,A_CHOICE * NewChoice,FLOAT32 AdjustFactor, float Certainties[]) {/* ** Parameters: ** OldChoice existing viable choice to be replaced ** NewChoice choice to replace OldChoice with ** AdjustFactor factor used to adjust ratings for OldChoice ** Certainties certainty for each character in OldChoice ** Globals: ** CurrentSegmentation segmentation for NewChoice ** Operation: This routine is used whenever a better segmentation (or ** contextual interpretation) is found for a word which already ** exists. The OldChoice is updated with the relevant ** information from the new choice. The text string itself ** does not need to be copied since, by definition, has not ** changed. ** Return: none ** Exceptions: none ** History: Fri May 17 13:35:58 1991, DSJ, Created. */ char *Word; CHAR_CHOICE *NewChar; BLOB_WIDTH *BlobWidth; OldChoice->Rating = class_probability (NewChoice); OldChoice->Certainty = class_certainty (NewChoice); OldChoice->AdjustFactor = AdjustFactor; for (Word = class_string (NewChoice), NewChar = &(OldChoice->Blob[0]), BlobWidth = CurrentSegmentation; *Word; Word++, NewChar++, Certainties++, BlobWidth++) { NewChar->NumChunks = *BlobWidth; NewChar->Certainty = *Certainties; }} /* ReplaceDuplicateChoice *//*---------------------------------------------------------------------------*/int StringSameAs(const char *String, VIABLE_CHOICE ViableChoice) {/* ** Parameters: ** String string to compare to ViableChoice ** ViableChoice viable choice to compare to String ** Globals: none ** Operation: This routine compares String to ViableChoice and ** returns TRUE if they are the same, FALSE otherwise. ** Return: TRUE or FALSE. ** Exceptions: none ** History: Fri May 17 08:48:04 1991, DSJ, Created. */ CHAR_CHOICE *Char; int i; for (Char = &(ViableChoice->Blob[0]), i = 0; i < ViableChoice->Length; String++, Char++, i++) if (*String != Char->Class) return (FALSE); if (*String == 0) return (TRUE); else return (FALSE);} /* StringSameAs *//*---------------------------------------------------------------------------*/int UniformCertainties(CHOICES_LIST Choices, A_CHOICE *BestChoice) {/* ** Parameters: ** Choices choices for current segmentation ** BestChoice best choice for current segmentation ** Globals: ** CertaintyVariation max allowed certainty variation ** Operation: This routine returns TRUE if the certainty of the ** BestChoice word is within a reasonable range of the average ** certainties for the best choices for each character in ** the segmentation. This test is used to catch words in which ** one character is much worse than the other characters in ** the word (i.e. FALSE will be returned in that case). ** The algorithm computes the mean and std deviation of the ** certainties in the word with the worst certainty thrown out. ** Return: TRUE or FALSE. ** Exceptions: none ** History: Tue May 14 08:23:21 1991, DSJ, Created. */ int i; CHOICES CharChoices; float Certainty; float WorstCertainty = MAX_FLOAT32; float CertaintyThreshold; FLOAT64 TotalCertainty; FLOAT64 TotalCertaintySquared; FLOAT64 Variance; FLOAT32 Mean, StdDev; int WordLength; WordLength = array_count (Choices); if (WordLength < 3) return (TRUE); TotalCertainty = TotalCertaintySquared = 0.0; for_each_choice(Choices, i) { CharChoices = (CHOICES) array_index (Choices, i); Certainty = best_certainty (CharChoices); TotalCertainty += Certainty; TotalCertaintySquared += Certainty * Certainty; if (Certainty < WorstCertainty) WorstCertainty = Certainty; } /* subtract off worst certainty from statistics */ WordLength--; TotalCertainty -= WorstCertainty; TotalCertaintySquared -= WorstCertainty * WorstCertainty; Mean = TotalCertainty / WordLength; Variance = ((WordLength * TotalCertaintySquared - TotalCertainty * TotalCertainty) / (WordLength * (WordLength - 1))); if (Variance < 0.0) Variance = 0.0; StdDev = sqrt (Variance); CertaintyThreshold = Mean - CertaintyVariation * StdDev; if (CertaintyThreshold > NonDictCertainty) CertaintyThreshold = NonDictCertainty; if (class_certainty (BestChoice) < CertaintyThreshold) { if (StopperDebugLevel >= 1) cprintf ("Stopper: Non-uniform certainty = %4.1f (m=%4.1f, s=%4.1f, t=%4.1f)\n", class_certainty (BestChoice), Mean, StdDev, CertaintyThreshold); return (FALSE); } else return (TRUE);} /* UniformCertainties */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -