stopper.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,459 行 · 第 1/4 页

CPP
1,459
字号
  NewChoice = NULL;  Choices = BestChoices;  iterate(Choices) {    if (ChoiceSameAs (Choice, (VIABLE_CHOICE) first_node (Choices))) {      if (class_probability (Choice) < BestRating (Choices))        NewChoice = (VIABLE_CHOICE) first_node (Choices);      else        return;    }  }  if (NewChoice) {    ReplaceDuplicateChoice(NewChoice, Choice, AdjustFactor, Certainties);    BestChoices = delete_d (BestChoices, NewChoice, is_same_node);  }  else {    NewChoice = NewViableChoice (Choice, AdjustFactor, Certainties);  }  BestChoices = s_adjoin (BestChoices, NewChoice, CmpChoiceRatings);  if (StopperDebugLevel >= 2)    PrintViableChoice (stderr, "New Word Choice:  ", NewChoice);  if (count (BestChoices) > tessedit_truncate_wordchoice_log) {    Choices =      (LIST) nth_cell (BestChoices, tessedit_truncate_wordchoice_log);    destroy_nodes (rest (Choices), Efree);    set_rest(Choices, NIL);  }}                                /* LogNewWordChoice *//*---------------------------------------------------------------------------*/static AMBIG_TABLE *AmbigFor = NULL;int NoDangerousAmbig(const char *Word,                     const char *Word_lengths,                     DANGERR *fixpt) {/* **	Parameters: **		Word	word to check for dangerous ambiguities **		Word_lengths	lengths of unichars in Word **	Globals: none **	Operation: This word checks each letter in word against a list **		of potentially ambiguous characters.  If a match is found **		that letter is replaced with its ambiguity and tested in **		the dictionary.  If the ambiguous word is found in the **		dictionary, FALSE is returned.  Otherwise, the search **		continues for other ambiguities.  If no ambiguities that **		match in the dictionary are found, TRUE is returned. **	Return: TRUE if Word contains no dangerous ambiguities. **	Exceptions: none **	History: Mon May  6 16:28:56 1991, DSJ, Created. */  char NewWord[MAX_WERD_SIZE * UNICHAR_LEN + 1];  char *NextNewChar;  int bad_index = 0;  if (!AmbigFor)    AmbigFor = FillAmbigTable ();  NextNewChar = NewWord;  while (*Word)  if (AmbigsFound (NewWord, NextNewChar,                   Word + *Word_lengths, Word_lengths + 1,                   AmbigFor[unicharset.unichar_to_id(Word, *Word_lengths)],                   fixpt)) {    if (fixpt != NULL)      fixpt->index = bad_index;    return (FALSE);  }  else {    strncpy(NextNewChar, Word, *Word_lengths);    NextNewChar += *Word_lengths;    Word += *Word_lengths;    Word_lengths++;    bad_index++;  }  return (TRUE);}                                /* NoDangerousAmbig */void EndDangerousAmbigs() {  if (AmbigFor != NULL) {    for (int i = 0; i <= MAX_CLASS_ID; ++i) {      destroy_nodes(AmbigFor[i], Efree);    }    Efree(AmbigFor);    AmbigFor = NULL;  }}/*---------------------------------------------------------------------------*/void SettupStopperPass1() {/* **	Parameters: none **	Globals: **		RejectOffset	offset allowed before word is rejected **	Operation: This routine performs any settup of stopper variables **		that is needed in preparation for the first pass. **	Return: none **	Exceptions: none **	History: Mon Jun  3 12:32:00 1991, DSJ, Created. */  RejectOffset = 0.0;}                                /* SettupStopperPass1 *//*---------------------------------------------------------------------------*/void SettupStopperPass2() {/* **	Parameters: none **	Globals: **		RejectOffset	offset allowed before word is rejected **	Operation: This routine performs any settup of stopper variables **		that is needed in preparation for the second pass. **	Return: none **	Exceptions: none **	History: Mon Jun  3 12:32:00 1991, DSJ, Created. */  RejectOffset = RejectCertaintyOffset;}                                /* SettupStopperPass2 *//**----------------------------------------------------------------------------              Private Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/void AddNewChunk(VIABLE_CHOICE Choice, int Blob) {/* **	Parameters: **		Choice	choice to add a new chunk to **		Blob	index of blob being split **	Globals: none **	Operation: This routine increments the chunk count of the character **		in Choice which corresponds to Blob. **	Return: none **	Exceptions: none **	History: Mon May 20 11:43:27 1991, DSJ, Created. */  int i, LastChunk;  for (i = 0, LastChunk = 0; i < Choice->Length; i++) {    LastChunk += Choice->Blob[i].NumChunks;    if (Blob < LastChunk) {      (Choice->Blob[i].NumChunks)++;      return;    }  }  mem_tidy (1);  cprintf ("AddNewChunk failed:Choice->Length=%d, LastChunk=%d, Blob=%d\n",    Choice->Length, LastChunk, Blob);  assert(FALSE);  /* this should never get executed */}                                /* AddNewChunk *//*---------------------------------------------------------------------------*/int AmbigsFound(char *Word,                char *CurrentChar,                const char *Tail,                const char *Tail_lengths,                LIST Ambigs,                DANGERR *fixpt) {/* **	Parameters: **		Word		word being tested for ambiguities **		CurrentChar	position in Word to put ambig replacement **		Tail		end of word to place after ambiguity **		Tail_lengths    lengths of the unichars in Tail **		Ambigs		list of ambiguities to test at this position **	Globals: none **	Operation: For each ambiguity in Ambigs, see if the remainder of **		the test string matches the start of Tail.  If it does, **		construct a word consisting of the contents of Word up to, **		but not including, CurrentChar followed by the replacement **		string for the ambiguity followed by the unmatched **		contents of Tail.  Then test this word to see if it **		is a dictionary word.  If it is return TRUE.  If none of **		the ambiguities result in a dictionary word, return FALSE. **	Return: TRUE if the Word is ambiguous at the specified position **	Exceptions: none **	History: Thu May  9 10:10:28 1991, DSJ, Created. */  AMBIG_SPEC *AmbigSpec;  char *ambig;  char *ambig_lengths;  const char *UnmatchedTail;  const char *UnmatchedTail_lengths;  int Matches;  int bad_length;  iterate(Ambigs) {    AmbigSpec = (AMBIG_SPEC *) first_node (Ambigs);    ambig = AmbigSpec->ambig;    ambig_lengths = AmbigSpec->lengths;    bad_length = 1;    UnmatchedTail = Tail;    UnmatchedTail_lengths = Tail_lengths;    Matches = TRUE;    while (*ambig != ' ' && Matches)      if (*UnmatchedTail_lengths == *ambig_lengths &&          strncmp(ambig, UnmatchedTail, *ambig_lengths) == 0) {        ambig += *(ambig_lengths++);        UnmatchedTail += *(UnmatchedTail_lengths++);        bad_length++;      }      else        Matches = FALSE;    if (Matches) {      ambig += *(ambig_lengths++); /* skip over the space */                                   /* insert replacement string */      strcpy(CurrentChar, ambig);                                   /* add tail */      strcat(Word, UnmatchedTail);      if (valid_word (Word)) {        if (StopperDebugLevel >= 1)          cprintf ("Stopper:  Possible ambiguous word = %s\n", Word);        if (fixpt != NULL) {          fixpt->good_length = strlen (ambig_lengths);          fixpt->bad_length = bad_length;        }        return (TRUE);      }    }  }  return (FALSE);}                                /* AmbigsFound *//*---------------------------------------------------------------------------*/int ChoiceSameAs(A_CHOICE *Choice, VIABLE_CHOICE ViableChoice) {/* **	Parameters: **		Choice		choice to compare to ViableChoice **		ViableChoice	viable choice to compare to Choice **	Globals: none **	Operation: This routine compares the corresponding strings of **		Choice and ViableChoice and returns TRUE if they are the **		same, FALSE otherwise. **	Return: TRUE or FALSE. **	Exceptions: none **	History: Fri May 17 08:48:04 1991, DSJ, Created. */  return (StringSameAs (class_string (Choice), class_lengths (Choice),                        ViableChoice));}                                /* ChoiceSameAs *//*---------------------------------------------------------------------------*/int CmpChoiceRatings(void *arg1,    //VIABLE_CHOICE                 Choice1,                     void *arg2) {  //VIABLE_CHOICE                 Choice2)/* **	Parameters: **		Choice1, Choice2	choices to compare ratings for **	Globals: none **	Operation: Return -1 if the rating for Choice1 is less than the **		rating for Choice2, otherwise return (1). **	Return: -1 or 1 **	Exceptions: none **	History: Wed May 15 13:02:37 1991, DSJ, Created. */  float R1, R2;  VIABLE_CHOICE Choice1 = (VIABLE_CHOICE) arg1;  VIABLE_CHOICE Choice2 = (VIABLE_CHOICE) arg2;  R1 = Choice1->Rating;  R2 = Choice2->Rating;  if (R1 < R2)    return (-1);  else    return (1);}                                /* CmpChoiceRatings *//*---------------------------------------------------------------------------*/void ExpandChoice(VIABLE_CHOICE Choice, EXPANDED_CHOICE *ExpandedChoice) {/* **	Parameters: **		Choice		choice to be expanded **		ExpandedChoice	place to put resulting expanded choice **	Globals: none **	Operation: This routine expands Choice and places the results **		in ExpandedChoice.  The primary function of expansion **		is to create an two arrays, one which holds the corresponding **		certainty for each chunk in Choice, and one which holds **		the class for each chunk. **	Return: none (results are placed in ExpandedChoice) **	Exceptions: none **	History: Fri May 31 15:21:57 1991, DSJ, Created. */  int i, j, Chunk;  ExpandedChoice->Choice = Choice;  for (i = 0, Chunk = 0; i < Choice->Length; i++)  for (j = 0; j < Choice->Blob[i].NumChunks; j++, Chunk++) {    ExpandedChoice->ChunkCertainty[Chunk] = Choice->Blob[i].Certainty;    ExpandedChoice->ChunkClass[Chunk] = Choice->Blob[i].Class;  }}                                /* ExpandChoice *//*---------------------------------------------------------------------------*/AMBIG_TABLE *FillAmbigTable() {/* **	Parameters: none **	Globals: **		DangerousAmbigs		filename of dangerous ambig info **	Operation: This routine allocates a new ambiguity table and fills **		it in from the file specified by DangerousAmbigs.  An **		ambiguity table is an array of lists.  The array is indexed **		by a class id.  Therefore, each entry in the table provides **		a list of potential ambiguities which can start with the **		corresponding character.  Each potential ambiguity is **		described by a string which contains the remainder of the **		test string followed by a space followed by the replacement **		string.  For example the ambiguity "rn -> m", would be **		located in the table at index 'r'.  The string corresponding **		to this ambiguity would be "n m". **	Return: Pointer to new ambiguity table. **	Exceptions: none **	History: Thu May  9 09:20:57 1991, DSJ, Created. */  FILE *AmbigFile;  AMBIG_TABLE *NewTable;  int i;  int AmbigPartSize;  char buffer[256 * UNICHAR_LEN];  char TestString[256 * UNICHAR_LEN];  char TestString_lengths[256];  char ReplacementString[256 * UNICHAR_LEN];  char ReplacementString_lengths[256];  STRING name;  char lengths[2];  AMBIG_SPEC *AmbigSpec;  UNICHAR_ID unichar_id;  lengths[1] = 0;  name = language_data_path_prefix;  name += DangerousAmbigs;  AmbigFile = Efopen (name.string(), "r");  NewTable = (AMBIG_TABLE *) Emalloc (sizeof (LIST) * (MAX_CLASS_ID + 1));  for (i = 0; i <= MAX_CLASS_ID; i++)    NewTable[i] = NIL;  while (fscanf (AmbigFile, "%d", &AmbigPartSize) == 1) {    TestString[0] = '\0';    TestString_lengths[0] = 0;    ReplacementString[0] = '\0';    ReplacementString_lengths[0] = 0;    bool illegal_char = false;    for (i = 0; i < AmbigPartSize; ++i) {      fscanf (AmbigFile, "%s", buffer);      strcat(TestString, buffer);      lengths[0] = strlen(buffer);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?