adaptmatch.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,846 行 · 第 1/5 页
CPP
1,846 行
*/ FEATURE_SET Features; int Fid, Pid; FEATURE Feature; int NumFeatures; TEMP_PROTO TempProto; PROTO Proto; ADAPT_CLASS Class; INT_CLASS IClass; CLASS_INDEX ClassIndex; TEMP_CONFIG Config; NormMethod = baseline; Features = ExtractOutlineFeatures (Blob, LineStats); NumFeatures = NumFeaturesIn (Features); if (NumFeatures > UNLIKELY_NUM_FEAT) { FreeFeatureSet(Features); return; } Class = NewAdaptedClass (); ClassIndex = AddAdaptedClass (Templates, Class, ClassId); Config = NewTempConfig (NumFeatures - 1); TempConfigFor (Class, 0) = Config; /* this is a kludge to construct cutoffs for adapted templates */ if (Templates == AdaptedTemplates) BaselineCutoffs[ClassIndex] = CharNormCutoffs[IndexForClassId (PreTrainedTemplates, ClassId)]; IClass = ClassForClassId (Templates->Templates, ClassId); for (Fid = 0; Fid < NumFeaturesIn (Features); Fid++) { Pid = AddIntProto (IClass); assert (Pid != NO_PROTO); Feature = FeatureIn (Features, Fid); TempProto = NewTempProto (); Proto = &(TempProto->Proto); /* compute proto params - NOTE that Y_DIM_OFFSET must be used because ConvertProto assumes that the Y dimension varies from -0.5 to 0.5 instead of the -0.25 to 0.75 used in baseline normalization */ ProtoAngle (Proto) = ParamOf (Feature, OutlineFeatDir); ProtoX (Proto) = ParamOf (Feature, OutlineFeatX); ProtoY (Proto) = ParamOf (Feature, OutlineFeatY) - Y_DIM_OFFSET; ProtoLength (Proto) = ParamOf (Feature, OutlineFeatLength); FillABC(Proto); TempProto->ProtoId = Pid; SET_BIT (Config->Protos, Pid); ConvertProto(Proto, Pid, IClass); AddProtoToProtoPruner(Proto, Pid, IClass); Class->TempProtos = push (Class->TempProtos, TempProto); } FreeFeatureSet(Features); AddIntConfig(IClass); ConvertConfig (AllProtosOn, 0, IClass); if (LearningDebugLevel >= 1) { cprintf ("Added new class '%s' with index %d and %d protos.\n", unicharset.id_to_unichar(ClassId), ClassIndex, NumFeatures); }} /* MakeNewAdaptedClass *//*---------------------------------------------------------------------------*/int GetAdaptiveFeatures(TBLOB *Blob, LINE_STATS *LineStats, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures) {/* ** Parameters: ** Blob blob to extract features from** LineStats statistics about text row blob is in** IntFeatures array to fill with integer features** FloatFeatures place to return actual floating-pt features** Globals: none** Operation: This routine sets up the feature extractor to extract** baseline normalized pico-features.** The extracted pico-features are converted** to integer form and placed in IntFeatures. The original** floating-pt. features are returned in FloatFeatures.** Return: Number of pico-features returned (0 if an error occurred)** Exceptions: none** History: Tue Mar 12 17:55:18 1991, DSJ, Created.*/ FEATURE_SET Features; int NumFeatures; NormMethod = baseline; Features = ExtractPicoFeatures (Blob, LineStats); NumFeatures = NumFeaturesIn (Features); if (NumFeatures > UNLIKELY_NUM_FEAT) { FreeFeatureSet(Features); return (0); } ComputeIntFeatures(Features, IntFeatures); *FloatFeatures = Features; return (NumFeatures);} /* GetAdaptiveFeatures *//**---------------------------------------------------------------------------- Private Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/int AdaptableWord(TWERD *Word, const char *BestChoice, const char *BestChoice_lengths, const char *BestRawChoice, const char *BestRawChoice_lengths) {/* ** Parameters: ** Word current word** BestChoice best overall choice for word with context** BestRawChoice best choice for word without context** Globals: none** Operation: Return TRUE if the specified word is acceptable for** adaptation.** Return: TRUE or FALSE** Exceptions: none** History: Thu May 30 14:25:06 1991, DSJ, Created.*/ int BestChoiceLength; return ( /* rules that apply in general - simplest to compute first */ /* EnableLearning && */ /* new rules */ BestChoice != NULL && BestRawChoice != NULL && Word != NULL && (BestChoiceLength = strlen (BestChoice_lengths)) > 0 && BestChoiceLength == NumBlobsIn (Word) && BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && ( (EnableNewAdaptRules && CurrentBestChoiceAdjustFactor () <= ADAPTABLE_WERD && AlternativeChoicesWorseThan (ADAPTABLE_WERD) && CurrentBestChoiceIs (BestChoice, BestChoice_lengths)) || /* old rules */ (!EnableNewAdaptRules && BestChoiceLength == strlen (BestRawChoice_lengths) && ((valid_word (BestChoice) && case_ok (BestChoice, BestChoice_lengths)) || (valid_number (BestChoice, BestChoice_lengths) && pure_number (BestChoice, BestChoice_lengths))) && punctuation_ok (BestChoice, BestChoice_lengths) != -1 && punctuation_ok (BestChoice, BestChoice_lengths) <= 1)));} /* AdaptableWord *//*---------------------------------------------------------------------------*/void AdaptToChar(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId, FLOAT32 Threshold) {/* ** Parameters: ** Blob blob to add to templates for ClassId** LineStats statistics about text line blob is in** ClassId class to add blob to** Threshold minimum match rating to existing template** Globals:** AdaptedTemplates current set of adapted templates** AllProtosOn dummy mask to match against all protos** AllConfigsOn dummy mask to match against all configs** Operation:** Return: none** Exceptions: none** History: Thu Mar 14 09:36:03 1991, DSJ, Created.*/ int NumFeatures; INT_FEATURE_ARRAY IntFeatures; INT_RESULT_STRUCT IntResult; CLASS_INDEX ClassIndex; INT_CLASS IClass; ADAPT_CLASS Class; TEMP_CONFIG TempConfig; FEATURE_SET FloatFeatures; int NewTempConfigId; NumCharsAdaptedTo++; if (!LegalClassId (ClassId)) return; if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) { MakeNewAdaptedClass(Blob, LineStats, ClassId, AdaptedTemplates); } else { IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId); ClassIndex = IndexForClassId (AdaptedTemplates->Templates, ClassId); Class = AdaptedTemplates->Class[ClassIndex]; NumFeatures = GetAdaptiveFeatures (Blob, LineStats, IntFeatures, &FloatFeatures); if (NumFeatures <= 0) return; SetBaseLineMatch(); IntegerMatcher (IClass, AllProtosOn, AllConfigsOn, NumFeatures, NumFeatures, IntFeatures, 0, &IntResult, NO_DEBUG); SetAdaptiveThreshold(Threshold); if (IntResult.Rating <= Threshold) { if (ConfigIsPermanent (Class, IntResult.Config)) { if (LearningDebugLevel >= 1) cprintf ("Found good match to perm config %d = %4.1f%%.\n", IntResult.Config, (1.0 - IntResult.Rating) * 100.0); FreeFeatureSet(FloatFeatures); return; } TempConfig = TempConfigFor (Class, IntResult.Config); IncreaseConfidence(TempConfig); if (LearningDebugLevel >= 1) cprintf ("Increasing reliability of temp config %d to %d.\n", IntResult.Config, TempConfig->NumTimesSeen); if (TempConfigReliable (TempConfig)) MakePermanent (AdaptedTemplates, ClassId, IntResult.Config, Blob, LineStats); } else { if (LearningDebugLevel >= 1) cprintf ("Found poor match to temp config %d = %4.1f%%.\n", IntResult.Config, (1.0 - IntResult.Rating) * 100.0); NewTempConfigId = MakeNewTemporaryConfig(AdaptedTemplates, ClassId, NumFeatures, IntFeatures, FloatFeatures); if (NewTempConfigId >= 0 && TempConfigReliable (TempConfigFor (Class, NewTempConfigId))) MakePermanent (AdaptedTemplates, ClassId, NewTempConfigId, Blob, LineStats); if (LearningDebugLevel >= 1) { IntegerMatcher (IClass, AllProtosOn, AllConfigsOn, NumFeatures, NumFeatures, IntFeatures, 0, &IntResult, NO_DEBUG); cprintf ("Best match to temp config %d = %4.1f%%.\n", IntResult.Config, (1.0 - IntResult.Rating) * 100.0); if (LearningDebugLevel >= 2) { uinT32 ConfigMask; ConfigMask = 1 << IntResult.Config; ShowMatchDisplay(); IntegerMatcher (IClass, AllProtosOn, (BIT_VECTOR)&ConfigMask, NumFeatures, NumFeatures, IntFeatures, 0, &IntResult, 6 | 0x19); UpdateMatchDisplay(); GetClassToDebug ("Adapting"); } } } FreeFeatureSet(FloatFeatures); }} /* AdaptToChar *//*---------------------------------------------------------------------------*/void AdaptToPunc(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId, FLOAT32 Threshold) {/* ** Parameters: ** Blob blob to add to templates for ClassId** LineStats statistics about text line blob is in** ClassId class to add blob to** Threshold minimum match rating to existing template** Globals:** PreTrainedTemplates current set of built-in templates** Operation:** Return: none** Exceptions: none** History: Thu Mar 14 09:36:03 1991, DSJ, Created.*/ ADAPT_RESULTS Results; int i; Results.BlobLength = MAX_INT32; Results.NumMatches = 0; Results.BestRating = WORST_POSSIBLE_RATING; Results.BestClass = NO_CLASS; Results.BestConfig = 0; InitMatcherRatings (Results.Ratings); CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results); RemoveBadMatches(&Results); if (Results.NumMatches != 1) { if (LearningDebugLevel >= 1) { cprintf ("Rejecting punc = %s (Alternatives = ", unicharset.id_to_unichar(ClassId)); for (i = 0; i < Results.NumMatches; i++) cprintf ("%s", unicharset.id_to_unichar(Results.Classes[i])); cprintf (")\n"); } return; } #ifndef SECURE_NAMES if (LearningDebugLevel >= 1) cprintf ("Adapting to punc = %s, thr= %g\n", unicharset.id_to_unichar(ClassId), Threshold); #endif AdaptToChar(Blob, LineStats, ClassId, Threshold);} /* AdaptToPunc *//*---------------------------------------------------------------------------*/void AddNewResult(ADAPT_RESULTS *Results, CLASS_ID ClassId, FLOAT32 Rating, int ConfigId) {/* ** Parameters: ** Results results to add new result to** ClassId class of new result** Rating rating of new result** ConfigId config id of new result** Globals:** BadMatchPad defines limits of an acceptable match** Operation: This routine adds the result of a classification into** Results. If the new rating is much worse than the current** best rating, it is not entered into results because it** would end up being stripped later anyway. If the new rating
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?