stopper.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,459 行 · 第 1/4 页

CPP
1,459
字号
      strcat(TestString_lengths, lengths);      if (!unicharset.contains_unichar(buffer))        illegal_char = true;    }    fscanf (AmbigFile, "%d", &AmbigPartSize);    for (i = 0; i < AmbigPartSize; ++i) {      fscanf (AmbigFile, "%s", buffer);      strcat(ReplacementString, buffer);      lengths[0] = strlen(buffer);      strcat(ReplacementString_lengths, lengths);      if (!unicharset.contains_unichar(buffer))        illegal_char = true;    }    if (strlen (TestString_lengths) > MAX_AMBIG_SIZE ||        strlen (ReplacementString_lengths) > MAX_AMBIG_SIZE)      DoError (0, "Illegal ambiguity specification!");    if (illegal_char) {      continue;    }    AmbigSpec = (AMBIG_SPEC *) Emalloc (sizeof (AMBIG_SPEC));    strcpy(AmbigSpec->ambig, TestString + TestString_lengths[0]);    strcat(AmbigSpec->ambig, " ");    strcat(AmbigSpec->ambig, ReplacementString);    strcpy(AmbigSpec->lengths, TestString_lengths + 1);    lengths[0] = 1;    strcat(AmbigSpec->lengths, lengths);    strcat(AmbigSpec->lengths, ReplacementString_lengths);    unichar_id = unicharset.unichar_to_id(TestString, TestString_lengths[0]);    NewTable[unichar_id] = push_last (NewTable[unichar_id], AmbigSpec);  }  fclose(AmbigFile);  return (NewTable);}                                /* FillAmbigTable *//*---------------------------------------------------------------------------*/int FreeBadChoice(void *item1,    //VIABLE_CHOICE                 Choice,                  void *item2) {  //EXPANDED_CHOICE                       *BestChoice)/* **	Parameters: **		Choice			choice to be tested **		BestChoice		best choice found **	Globals: **		AmbigThresholdGain **		AmbigThresholdOffset **	Operation: If the certainty of any chunk in Choice is not ambiguous **		with the corresponding chunk in the best choice, free **		Choice and return TRUE.  Otherwise, return FALSE. **	Return: TRUE or FALSE. **	Exceptions: none **	History: Wed May 15 13:20:26 1991, DSJ, Created. */  int i, j, Chunk;  FLOAT32 Threshold;  VIABLE_CHOICE Choice;  EXPANDED_CHOICE *BestChoice;  Choice = (VIABLE_CHOICE) item1;  BestChoice = (EXPANDED_CHOICE *) item2;  Threshold = AmbigThreshold (BestChoice->Choice->AdjustFactor,    Choice->AdjustFactor);  for (i = 0, Chunk = 0; i < Choice->Length; i++)    for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++)      if (Choice->Blob[i].Class != BestChoice->ChunkClass[Chunk] &&    Choice->Blob[i].Certainty - BestChoice->ChunkCertainty[Chunk] <      Threshold) {        memfree(Choice);    return (TRUE);  }  return (FALSE);}                                /* FreeBadChoice *//*---------------------------------------------------------------------------*/int LengthOfShortestAlphaRun(register char *Word, const char *Word_lengths) {/* **	Parameters: **		Word            word to be tested **		Word_lengths    lengths of the unichars in Word **	Globals: none **	Operation: Return the length of the shortest alpha run in Word. **	Return:  Return the length of the shortest alpha run in Word. **	Exceptions: none **	History: Tue May 14 07:50:45 1991, DSJ, Created. */  register int Shortest = MAXINT;  register int Length;  for (; *Word; Word += *(Word_lengths++))  if (unicharset.get_isalpha(Word, *Word_lengths)) {    for (Length = 1, Word += *(Word_lengths++);         *Word && unicharset.get_isalpha(Word, *Word_lengths);         Word += *(Word_lengths++), Length++);    if (Length < Shortest)      Shortest = Length;    if (*Word == 0)      break;  }  if (Shortest == MAXINT)    Shortest = 0;  return (Shortest);}                                /* LengthOfShortestAlphaRun *//*---------------------------------------------------------------------------*/VIABLE_CHOICENewViableChoice (A_CHOICE * Choice, FLOAT32 AdjustFactor, float Certainties[]) {/* **	Parameters: **		Choice		choice to be converted to a viable choice **		AdjustFactor	factor used to adjust ratings for Choice **		Certainties	certainty for each character in Choice **	Globals: **		CurrentSegmentation	segmentation corresponding to Choice **	Operation: Allocate a new viable choice data structure, copy **		Choice, Certainties, and CurrentSegmentation into it, **		and return a pointer to it. **	Return: Ptr to new viable choice. **	Exceptions: none **	History: Thu May 16 15:28:29 1991, DSJ, Created. */  VIABLE_CHOICE NewChoice;  int Length;  char *Word;  char *Word_lengths;  CHAR_CHOICE *NewChar;  BLOB_WIDTH *BlobWidth;  Length = strlen (class_lengths (Choice));  assert (Length <= MAX_NUM_CHUNKS && Length > 0);  NewChoice = (VIABLE_CHOICE) Emalloc (sizeof (VIABLE_CHOICE_STRUCT) +    (Length - 1) * sizeof (CHAR_CHOICE));  NewChoice->Rating = class_probability (Choice);  NewChoice->Certainty = class_certainty (Choice);  NewChoice->AdjustFactor = AdjustFactor;  NewChoice->Length = Length;  for (Word = class_string (Choice),           Word_lengths = class_lengths (Choice),           NewChar = &(NewChoice->Blob[0]),           BlobWidth = CurrentSegmentation;       *Word;       Word += *(Word_lengths++), NewChar++, Certainties++, BlobWidth++) {    NewChar->Class = unicharset.unichar_to_id(Word, *Word_lengths);    NewChar->NumChunks = *BlobWidth;    NewChar->Certainty = *Certainties;  }  return (NewChoice);}                                /* NewViableChoice *//*---------------------------------------------------------------------------*/void PrintViableChoice(FILE *File, const char *Label, VIABLE_CHOICE Choice) {/* **	Parameters: **		File	open text file to print Choice to **		Label	text label to be printed with Choice **		Choice	choice to be printed **	Globals: none **	Operation: This routine dumps a text representation of the **		specified Choice to File. **	Return: none **	Exceptions: none **	History: Mon May 20 11:16:44 1991, DSJ, Created. */  int i, j;  fprintf (File, "%s", Label);  fprintf (File, "(R=%5.1f, C=%4.1f, F=%4.2f)  ",    Choice->Rating, Choice->Certainty, Choice->AdjustFactor);  for (i = 0; i < Choice->Length; i++)    fprintf (File, "%s", unicharset.id_to_unichar(Choice->Blob[i].Class));  fprintf (File, "\n");  for (i = 0; i < Choice->Length; i++) {    fprintf (File, "  %s", unicharset.id_to_unichar(Choice->Blob[i].Class));    for (j = 0; j < Choice->Blob[i].NumChunks - 1; j++)      fprintf (File, "   ");  }  fprintf (File, "\n");  for (i = 0; i < Choice->Length; i++) {    for (j = 0; j < Choice->Blob[i].NumChunks; j++)      fprintf (File, "%3d", (int) (Choice->Blob[i].Certainty * -10.0));  }  fprintf (File, "\n");}                                /* PrintViableChoice *//*---------------------------------------------------------------------------*/voidReplaceDuplicateChoice (VIABLE_CHOICE OldChoice,A_CHOICE * NewChoice,FLOAT32 AdjustFactor, float Certainties[]) {/* **	Parameters: **		OldChoice	existing viable choice to be replaced **		NewChoice	choice to replace OldChoice with **		AdjustFactor	factor used to adjust ratings for OldChoice **		Certainties	certainty for each character in OldChoice **	Globals: **		CurrentSegmentation	segmentation for NewChoice **	Operation: This routine is used whenever a better segmentation (or **		contextual interpretation) is found for a word which already **		exists.  The OldChoice is updated with the relevant **		information from the new choice.  The text string itself **		does not need to be copied since, by definition, has not **		changed. **	Return: none **	Exceptions: none **	History: Fri May 17 13:35:58 1991, DSJ, Created. */  char *Word;  char *Word_lengths;  CHAR_CHOICE *NewChar;  BLOB_WIDTH *BlobWidth;  OldChoice->Rating = class_probability (NewChoice);  OldChoice->Certainty = class_certainty (NewChoice);  OldChoice->AdjustFactor = AdjustFactor;  for (Word = class_string (NewChoice),           Word_lengths = class_lengths (NewChoice),           NewChar = &(OldChoice->Blob[0]),           BlobWidth = CurrentSegmentation;       *Word;       Word += *(Word_lengths++), NewChar++, Certainties++, BlobWidth++) {    NewChar->NumChunks = *BlobWidth;    NewChar->Certainty = *Certainties;  }}                                /* ReplaceDuplicateChoice *//*---------------------------------------------------------------------------*/int StringSameAs(const char *String,                 const char *String_lengths,                 VIABLE_CHOICE ViableChoice) {/* **	Parameters: **		String		string to compare to ViableChoice **		String_lengths	lengths of unichars in String **		ViableChoice	viable choice to compare to String **	Globals: none **	Operation: This routine compares String to ViableChoice and **		returns TRUE if they are the same, FALSE otherwise. **	Return: TRUE or FALSE. **	Exceptions: none **	History: Fri May 17 08:48:04 1991, DSJ, Created. */  CHAR_CHOICE *Char;  int i;  int current_unichar_length;  for (Char = &(ViableChoice->Blob[0]), i = 0;    i < ViableChoice->Length;       String += *(String_lengths++), Char++, i++) {    current_unichar_length = strlen(unicharset.id_to_unichar(Char->Class));  if (current_unichar_length != *String_lengths ||      strncmp(String, unicharset.id_to_unichar(Char->Class),              current_unichar_length) != 0)    return (FALSE);  }  if (*String == 0)    return (TRUE);  else    return (FALSE);}                                /* StringSameAs *//*---------------------------------------------------------------------------*/int UniformCertainties(CHOICES_LIST Choices, A_CHOICE *BestChoice) {/* **	Parameters: **		Choices		choices for current segmentation **		BestChoice	best choice for current segmentation **	Globals: **		CertaintyVariation	max allowed certainty variation **	Operation: This routine returns TRUE if the certainty of the **		BestChoice word is within a reasonable range of the average **		certainties for the best choices for each character in **		the segmentation.  This test is used to catch words in which **		one character is much worse than the other characters in **		the word (i.e. FALSE will be returned in that case). **		The algorithm computes the mean and std deviation of the **		certainties in the word with the worst certainty thrown out. **	Return: TRUE or FALSE. **	Exceptions: none **	History: Tue May 14 08:23:21 1991, DSJ, Created. */  int i;  CHOICES CharChoices;  float Certainty;  float WorstCertainty = MAX_FLOAT32;  float CertaintyThreshold;  FLOAT64 TotalCertainty;  FLOAT64 TotalCertaintySquared;  FLOAT64 Variance;  FLOAT32 Mean, StdDev;  int WordLength;  WordLength = array_count (Choices);  if (WordLength < 3)    return (TRUE);  TotalCertainty = TotalCertaintySquared = 0.0;  for_each_choice(Choices, i) {    CharChoices = (CHOICES) array_index (Choices, i);    Certainty = best_certainty (CharChoices);    TotalCertainty += Certainty;    TotalCertaintySquared += Certainty * Certainty;    if (Certainty < WorstCertainty)      WorstCertainty = Certainty;  }  /* subtract off worst certainty from statistics */  WordLength--;  TotalCertainty -= WorstCertainty;  TotalCertaintySquared -= WorstCertainty * WorstCertainty;  Mean = TotalCertainty / WordLength;  Variance = ((WordLength * TotalCertaintySquared -    TotalCertainty * TotalCertainty) /    (WordLength * (WordLength - 1)));  if (Variance < 0.0)    Variance = 0.0;  StdDev = sqrt (Variance);  CertaintyThreshold = Mean - CertaintyVariation * StdDev;  if (CertaintyThreshold > NonDictCertainty)    CertaintyThreshold = NonDictCertainty;  if (class_certainty (BestChoice) < CertaintyThreshold) {    if (StopperDebugLevel >= 1)      cprintf        ("Stopper:  Non-uniform certainty = %4.1f (m=%4.1f, s=%4.1f, t=%4.1f)\n",        class_certainty (BestChoice), Mean, StdDev, CertaintyThreshold);    return (FALSE);  }  else    return (TRUE);}                                /* UniformCertainties */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?