intmatcher.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,532 行 · 第 1/4 页

CPP
1,532
字号
/****************************************************************************** **      Filename:    intmatcher.c **      Purpose:     Generic high level classification routines. **      Author:      Robert Moss **      History:     Wed Feb 13 17:35:28 MST 1991, RWM, Created. **                   Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add **                        support for adaptive matching. **      (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************//**----------------------------------------------------------------------------                          Include Files and Type Defines----------------------------------------------------------------------------**/#include "intmatcher.h"#include "tordvars.h"#include "callcpp.h"#include "scrollview.h"#include "globals.h"#include <math.h>#define CLASS_MASK_SIZE ((MAX_NUM_CLASSES*NUM_BITS_PER_CLASS \		+BITS_PER_WERD-1)/BITS_PER_WERD)/**----------------------------------------------------------------------------                    Global Data Definitions and Declarations----------------------------------------------------------------------------**/#define  SE_TABLE_BITS    9#define  SE_TABLE_SIZE  512#define TEMPLATE_CACHE 2static uinT8 SimilarityEvidenceTable[SE_TABLE_SIZE];static uinT8 offset_table[256] = {  255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};static uinT8 next_table[256] = {  0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e,  0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, 0x18,  0x1c, 0x1c, 0x1e,  0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, 0x20, 0x28, 0x28, 0x2a, 0x28,  0x2c, 0x2c, 0x2e,  0x20, 0x30, 0x30, 0x32, 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a,  0x38, 0x3c, 0x3c, 0x3e,  0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, 0x48,  0x4c, 0x4c, 0x4e,  0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, 0x50, 0x58, 0x58, 0x5a,  0x58, 0x5c, 0x5c, 0x5e,  0x40, 0x60, 0x60, 0x62, 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a,  0x68, 0x6c, 0x6c, 0x6e,  0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a,  0x78, 0x7c, 0x7c, 0x7e,  0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, 0x80, 0x88, 0x88, 0x8a, 0x88,  0x8c, 0x8c, 0x8e,  0x80, 0x90, 0x90, 0x92, 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a,  0x98, 0x9c, 0x9c, 0x9e,  0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa,  0xa8, 0xac, 0xac, 0xae,  0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, 0xb0, 0xb8, 0xb8, 0xba,  0xb8, 0xbc, 0xbc, 0xbe,  0x80, 0xc0, 0xc0, 0xc2, 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca,  0xc8, 0xcc, 0xcc, 0xce,  0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda,  0xd8, 0xdc, 0xdc, 0xde,  0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, 0xe0, 0xe8, 0xe8, 0xea,  0xe8, 0xec, 0xec, 0xee,  0xe0, 0xf0, 0xf0, 0xf2, 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa,  0xf8, 0xfc, 0xfc, 0xfe};static uinT32 EvidenceTableMask;static uinT32 MultTruncShiftBits;static uinT32 TableTruncShiftBits;uinT32 EvidenceMultMask;static inT16 LocalMatcherMultiplier;make_int_var (ClassPrunerThreshold, 229, MakeClassPrunerThreshold,16, 20, SetClassPrunerThreshold,"Class Pruner Threshold 0-255:        ");make_int_var (ClassPrunerMultiplier, 30, MakeClassPrunerMultiplier,16, 21, SetClassPrunerMultiplier,"Class Pruner Multiplier 0-255:       ");make_int_var (IntegerMatcherMultiplier, 14, MakeIntegerMatcherMultiplier,16, 22, SetIntegerMatcherMultiplier,"Integer Matcher Multiplier  0-255:   ");make_int_var (IntThetaFudge, 128, MakeIntThetaFudge,16, 23, SetIntThetaFudge,"Integer Matcher Theta Fudge 0-255:   ");make_int_var (CPCutoffStrength, 7, MakeCPCutoffStrength,16, 24, SetCPCutoffStrength,"Class Pruner CutoffStrength:         ");make_int_var (EvidenceTableBits, 9, MakeEvidenceTableBits,16, 25, SetEvidenceTableBits,"Bits in Similarity to Evidence Lookup  8-9:   ");make_int_var (IntEvidenceTruncBits, 14, MakeIntEvidenceTruncBits,16, 26, SetIntEvidenceTruncBits,"Integer Evidence Truncation Bits (Distance) 8-14:   ");make_float_var (SEExponentialMultiplier, 0, MakeSEExponentialMultiplier,16, 27, SetSEExponentialMultiplier,"Similarity to Evidence Table Exponential Multiplier: ");make_float_var (SimilarityCenter, 0.0075, MakeSimilarityCenter,16, 28, SetSimilarityCenter, "Center of Similarity Curve: ");make_int_var (AdaptProtoThresh, 230, MakeAdaptProtoThresh,16, 29, SetAdaptProtoThresh,"Threshold for good protos during adaptive 0-255:   ");make_int_var (AdaptFeatureThresh, 230, MakeAdaptFeatureThresh,16, 30, SetAdaptFeatureThresh,"Threshold for good features during adaptive 0-255:   ");//extern int display_ratings;//extern inT32                                  cp_maps[4];int protoword_lookups;int zero_protowords;int proto_shifts;int set_proto_bits;int config_shifts;int set_config_bits;/**----------------------------------------------------------------------------              Public Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/int ClassPruner(INT_TEMPLATES IntTemplates,                inT16 NumFeatures,                INT_FEATURE_ARRAY Features,                CLASS_NORMALIZATION_ARRAY NormalizationFactors,                CLASS_CUTOFF_ARRAY ExpectedNumFeatures,                CLASS_PRUNER_RESULTS Results,                int Debug) {/* **      Parameters: **              IntTemplates           Class pruner tables **              NumFeatures            Number of features in blob **              Features               Array of features **              NormalizationFactors   Array of fudge factors from blob **                                     normalization process **                                     (by CLASS_INDEX) **              ExpectedNumFeatures    Array of expected number of features **                                     for each class **                                     (by CLASS_INDEX) **              Results                Sorted Array of pruned classes **                                     (by CLASS_ID) **              Debug                  Debugger flag: 1=debugger on **      Globals: **              ClassPrunerThreshold   Cutoff threshold **              ClassPrunerMultiplier  Normalization factor multiplier **      Operation: **              Prune the classes using a modified fast match table. **              Return a sorted list of classes along with the number **              of pruned classes in that list. **      Return: Number of pruned classes. **      Exceptions: none **      History: Tue Feb 19 10:24:24 MST 1991, RWM, Created. */  uinT32 PrunerWord;  inT32 class_index;             //index to class  int Word;  uinT32 *BasePrunerAddress;  uinT32 feature_address;        //current feature index  INT_FEATURE feature;           //current feature  CLASS_PRUNER *ClassPruner;  int PrunerSet;  int NumPruners;  inT32 feature_index;           //current feature  static int ClassCount[MAX_NUM_CLASSES];  static int NormCount[MAX_NUM_CLASSES];  static int SortKey[MAX_NUM_CLASSES + 1];  static int SortIndex[MAX_NUM_CLASSES + 1];  CLASS_INDEX Class;  int out_class;  int MaxNumClasses;  int MaxCount;  int NumClasses;  FLOAT32 max_rating;            //max allowed rating  int *ClassCountPtr;  CLASS_ID classch;  MaxNumClasses = NumClassesIn (IntTemplates);  /* Clear Class Counts */  ClassCountPtr = &(ClassCount[0]);  for (Class = 0; Class < MaxNumClasses; Class++) {    *ClassCountPtr++ = 0;  }  /* Update Class Counts */  NumPruners = NumClassPrunersIn (IntTemplates);  for (feature_index = 0; feature_index < NumFeatures; feature_index++) {    feature = &Features[feature_index];    feature_address = (((feature->X * NUM_CP_BUCKETS >> 8) * NUM_CP_BUCKETS      +      (feature->Y * NUM_CP_BUCKETS >> 8)) *      NUM_CP_BUCKETS +      (feature->Theta * NUM_CP_BUCKETS >> 8)) << 1;    ClassPruner = ClassPrunersFor (IntTemplates);    class_index = 0;    for (PrunerSet = 0; PrunerSet < NumPruners; PrunerSet++, ClassPruner++) {      BasePrunerAddress = (uinT32 *) (*ClassPruner) + feature_address;      for (Word = 0; Word < WERDS_PER_CP_VECTOR; Word++) {        PrunerWord = *BasePrunerAddress++;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];        PrunerWord >>= 2;        ClassCount[class_index++] += cp_maps[PrunerWord & 3];      }    }  }  /* Adjust Class Counts for Number of Expected Features */  for (Class = 0; Class < MaxNumClasses; Class++) {    if (NumFeatures < ExpectedNumFeatures[Class]) {      int deficit = ExpectedNumFeatures[Class] - NumFeatures;      ClassCount[Class] -= ClassCount[Class] * deficit /                           (NumFeatures*CPCutoffStrength + deficit);    }    if (!unicharset.get_enabled(ClassIdForIndex(IntTemplates, Class)))      ClassCount[Class] = 0;  // This char is disabled!  }  /* Adjust Class Counts for Normalization Factors */  MaxCount = 0;  for (Class = 0; Class < MaxNumClasses; Class++) {    NormCount[Class] = ClassCount[Class]      - ((ClassPrunerMultiplier * NormalizationFactors[Class]) >> 8)      * cp_maps[3] / 3;    if (NormCount[Class] > MaxCount)      MaxCount = NormCount[Class];  }  /* Prune Classes */  MaxCount *= ClassPrunerThreshold;  MaxCount >>= 8;  /* Select Classes */  if (MaxCount < 1)    MaxCount = 1;  NumClasses = 0;  for (Class = 0; Class < MaxNumClasses; Class++)  if (NormCount[Class] >= MaxCount) {    NumClasses++;    SortIndex[NumClasses] = Class;    SortKey[NumClasses] = NormCount[Class];  }  /* Sort Classes using Heapsort Algorithm */  if (NumClasses > 1)    HeapSort(NumClasses, SortKey, SortIndex);  if (display_ratings > 1) {    cprintf ("CP:%d classes, %d features:\n", NumClasses, NumFeatures);    for (Class = 0; Class < NumClasses; Class++) {      classch = ClassIdForIndex (IntTemplates, SortIndex[NumClasses - Class]);      cprintf ("%s:C=%d, E=%d, N=%d, Rat=%d\n",               unicharset.id_to_unichar(classch),               ClassCount[SortIndex[NumClasses - Class]],               ExpectedNumFeatures[SortIndex[NumClasses - Class]],               SortKey[NumClasses - Class],               1010 - 1000 * SortKey[NumClasses - Class] /                 (cp_maps[3] * NumFeatures));    }    if (display_ratings > 2) {      NumPruners = NumClassPrunersIn (IntTemplates);      for (feature_index = 0; feature_index < NumFeatures;      feature_index++) {        cprintf ("F=%3d,", feature_index);        feature = &Features[feature_index];        feature_address =          (((feature->X * NUM_CP_BUCKETS >> 8) * NUM_CP_BUCKETS +          (feature->Y * NUM_CP_BUCKETS >> 8)) * NUM_CP_BUCKETS +          (feature->Theta * NUM_CP_BUCKETS >> 8)) << 1;        ClassPruner = ClassPrunersFor (IntTemplates);        class_index = 0;        for (PrunerSet = 0; PrunerSet < NumPruners;        PrunerSet++, ClassPruner++) {          BasePrunerAddress = (uinT32 *) (*ClassPruner)            + feature_address;          for (Word = 0; Word < WERDS_PER_CP_VECTOR; Word++) {            PrunerWord = *BasePrunerAddress++;            for (Class = 0; Class < 16; Class++, class_index++) {              if (NormCount[class_index] >= MaxCount)                cprintf (" %s=%d,",                  unicharset.id_to_unichar(ClassIdForIndex (IntTemplates,                                                            class_index)),                  PrunerWord & 3);              PrunerWord >>= 2;            }          }        }        cprintf ("\n");      }      cprintf ("Adjustments:");      for (Class = 0; Class < MaxNumClasses; Class++) {        if (NormCount[Class] > MaxCount)          cprintf (" %s=%d,",            unicharset.id_to_unichar(ClassIdForIndex (IntTemplates, Class)),            -((ClassPrunerMultiplier *            NormalizationFactors[Class]) >> 8) * cp_maps[3] /            3);      }      cprintf ("\n");    }  }  /* Set Up Results */  max_rating = 0.0f;  for (Class = 0, out_class = 0; Class < NumClasses; Class++) {    Results[out_class].Class =      ClassIdForIndex (IntTemplates, SortIndex[NumClasses - Class]);    Results[out_class].Rating =      1.0 - SortKey[NumClasses -      Class] / ((float) cp_maps[3] * NumFeatures);    out_class++;  }  NumClasses = out_class;  return NumClasses;}/*---------------------------------------------------------------------------*/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?