intmatcher.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,532 行 · 第 1/4 页
CPP
1,532 行
/****************************************************************************** ** Filename: intmatcher.c ** Purpose: Generic high level classification routines. ** Author: Robert Moss ** History: Wed Feb 13 17:35:28 MST 1991, RWM, Created. ** Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add ** support for adaptive matching. ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************//**---------------------------------------------------------------------------- Include Files and Type Defines----------------------------------------------------------------------------**/#include "intmatcher.h"#include "tordvars.h"#include "callcpp.h"#include "scrollview.h"#include "globals.h"#include <math.h>#define CLASS_MASK_SIZE ((MAX_NUM_CLASSES*NUM_BITS_PER_CLASS \ +BITS_PER_WERD-1)/BITS_PER_WERD)/**---------------------------------------------------------------------------- Global Data Definitions and Declarations----------------------------------------------------------------------------**/#define SE_TABLE_BITS 9#define SE_TABLE_SIZE 512#define TEMPLATE_CACHE 2static uinT8 SimilarityEvidenceTable[SE_TABLE_SIZE];static uinT8 offset_table[256] = { 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};static uinT8 next_table[256] = { 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, 0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, 0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32, 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e, 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, 0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, 0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62, 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e, 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a, 0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, 0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92, 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e, 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa, 0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, 0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2, 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce, 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda, 0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, 0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2, 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe};static uinT32 EvidenceTableMask;static uinT32 MultTruncShiftBits;static uinT32 TableTruncShiftBits;uinT32 EvidenceMultMask;static inT16 LocalMatcherMultiplier;make_int_var (ClassPrunerThreshold, 229, MakeClassPrunerThreshold,16, 20, SetClassPrunerThreshold,"Class Pruner Threshold 0-255: ");make_int_var (ClassPrunerMultiplier, 30, MakeClassPrunerMultiplier,16, 21, SetClassPrunerMultiplier,"Class Pruner Multiplier 0-255: ");make_int_var (IntegerMatcherMultiplier, 14, MakeIntegerMatcherMultiplier,16, 22, SetIntegerMatcherMultiplier,"Integer Matcher Multiplier 0-255: ");make_int_var (IntThetaFudge, 128, MakeIntThetaFudge,16, 23, SetIntThetaFudge,"Integer Matcher Theta Fudge 0-255: ");make_int_var (CPCutoffStrength, 7, MakeCPCutoffStrength,16, 24, SetCPCutoffStrength,"Class Pruner CutoffStrength: ");make_int_var (EvidenceTableBits, 9, MakeEvidenceTableBits,16, 25, SetEvidenceTableBits,"Bits in Similarity to Evidence Lookup 8-9: ");make_int_var (IntEvidenceTruncBits, 14, MakeIntEvidenceTruncBits,16, 26, SetIntEvidenceTruncBits,"Integer Evidence Truncation Bits (Distance) 8-14: ");make_float_var (SEExponentialMultiplier, 0, MakeSEExponentialMultiplier,16, 27, SetSEExponentialMultiplier,"Similarity to Evidence Table Exponential Multiplier: ");make_float_var (SimilarityCenter, 0.0075, MakeSimilarityCenter,16, 28, SetSimilarityCenter, "Center of Similarity Curve: ");make_int_var (AdaptProtoThresh, 230, MakeAdaptProtoThresh,16, 29, SetAdaptProtoThresh,"Threshold for good protos during adaptive 0-255: ");make_int_var (AdaptFeatureThresh, 230, MakeAdaptFeatureThresh,16, 30, SetAdaptFeatureThresh,"Threshold for good features during adaptive 0-255: ");//extern int display_ratings;//extern inT32 cp_maps[4];int protoword_lookups;int zero_protowords;int proto_shifts;int set_proto_bits;int config_shifts;int set_config_bits;/**---------------------------------------------------------------------------- Public Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/int ClassPruner(INT_TEMPLATES IntTemplates, inT16 NumFeatures, INT_FEATURE_ARRAY Features, CLASS_NORMALIZATION_ARRAY NormalizationFactors, CLASS_CUTOFF_ARRAY ExpectedNumFeatures, CLASS_PRUNER_RESULTS Results, int Debug) {/* ** Parameters: ** IntTemplates Class pruner tables ** NumFeatures Number of features in blob ** Features Array of features ** NormalizationFactors Array of fudge factors from blob ** normalization process ** (by CLASS_INDEX) ** ExpectedNumFeatures Array of expected number of features ** for each class ** (by CLASS_INDEX) ** Results Sorted Array of pruned classes ** (by CLASS_ID) ** Debug Debugger flag: 1=debugger on ** Globals: ** ClassPrunerThreshold Cutoff threshold ** ClassPrunerMultiplier Normalization factor multiplier ** Operation: ** Prune the classes using a modified fast match table. ** Return a sorted list of classes along with the number ** of pruned classes in that list. ** Return: Number of pruned classes. ** Exceptions: none ** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created. */ uinT32 PrunerWord; inT32 class_index; //index to class int Word; uinT32 *BasePrunerAddress; uinT32 feature_address; //current feature index INT_FEATURE feature; //current feature CLASS_PRUNER *ClassPruner; int PrunerSet; int NumPruners; inT32 feature_index; //current feature static int ClassCount[MAX_NUM_CLASSES]; static int NormCount[MAX_NUM_CLASSES]; static int SortKey[MAX_NUM_CLASSES + 1]; static int SortIndex[MAX_NUM_CLASSES + 1]; CLASS_INDEX Class; int out_class; int MaxNumClasses; int MaxCount; int NumClasses; FLOAT32 max_rating; //max allowed rating int *ClassCountPtr; CLASS_ID classch; MaxNumClasses = NumClassesIn (IntTemplates); /* Clear Class Counts */ ClassCountPtr = &(ClassCount[0]); for (Class = 0; Class < MaxNumClasses; Class++) { *ClassCountPtr++ = 0; } /* Update Class Counts */ NumPruners = NumClassPrunersIn (IntTemplates); for (feature_index = 0; feature_index < NumFeatures; feature_index++) { feature = &Features[feature_index]; feature_address = (((feature->X * NUM_CP_BUCKETS >> 8) * NUM_CP_BUCKETS + (feature->Y * NUM_CP_BUCKETS >> 8)) * NUM_CP_BUCKETS + (feature->Theta * NUM_CP_BUCKETS >> 8)) << 1; ClassPruner = ClassPrunersFor (IntTemplates); class_index = 0; for (PrunerSet = 0; PrunerSet < NumPruners; PrunerSet++, ClassPruner++) { BasePrunerAddress = (uinT32 *) (*ClassPruner) + feature_address; for (Word = 0; Word < WERDS_PER_CP_VECTOR; Word++) { PrunerWord = *BasePrunerAddress++; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; } } } /* Adjust Class Counts for Number of Expected Features */ for (Class = 0; Class < MaxNumClasses; Class++) { if (NumFeatures < ExpectedNumFeatures[Class]) { int deficit = ExpectedNumFeatures[Class] - NumFeatures; ClassCount[Class] -= ClassCount[Class] * deficit / (NumFeatures*CPCutoffStrength + deficit); } if (!unicharset.get_enabled(ClassIdForIndex(IntTemplates, Class))) ClassCount[Class] = 0; // This char is disabled! } /* Adjust Class Counts for Normalization Factors */ MaxCount = 0; for (Class = 0; Class < MaxNumClasses; Class++) { NormCount[Class] = ClassCount[Class] - ((ClassPrunerMultiplier * NormalizationFactors[Class]) >> 8) * cp_maps[3] / 3; if (NormCount[Class] > MaxCount) MaxCount = NormCount[Class]; } /* Prune Classes */ MaxCount *= ClassPrunerThreshold; MaxCount >>= 8; /* Select Classes */ if (MaxCount < 1) MaxCount = 1; NumClasses = 0; for (Class = 0; Class < MaxNumClasses; Class++) if (NormCount[Class] >= MaxCount) { NumClasses++; SortIndex[NumClasses] = Class; SortKey[NumClasses] = NormCount[Class]; } /* Sort Classes using Heapsort Algorithm */ if (NumClasses > 1) HeapSort(NumClasses, SortKey, SortIndex); if (display_ratings > 1) { cprintf ("CP:%d classes, %d features:\n", NumClasses, NumFeatures); for (Class = 0; Class < NumClasses; Class++) { classch = ClassIdForIndex (IntTemplates, SortIndex[NumClasses - Class]); cprintf ("%s:C=%d, E=%d, N=%d, Rat=%d\n", unicharset.id_to_unichar(classch), ClassCount[SortIndex[NumClasses - Class]], ExpectedNumFeatures[SortIndex[NumClasses - Class]], SortKey[NumClasses - Class], 1010 - 1000 * SortKey[NumClasses - Class] / (cp_maps[3] * NumFeatures)); } if (display_ratings > 2) { NumPruners = NumClassPrunersIn (IntTemplates); for (feature_index = 0; feature_index < NumFeatures; feature_index++) { cprintf ("F=%3d,", feature_index); feature = &Features[feature_index]; feature_address = (((feature->X * NUM_CP_BUCKETS >> 8) * NUM_CP_BUCKETS + (feature->Y * NUM_CP_BUCKETS >> 8)) * NUM_CP_BUCKETS + (feature->Theta * NUM_CP_BUCKETS >> 8)) << 1; ClassPruner = ClassPrunersFor (IntTemplates); class_index = 0; for (PrunerSet = 0; PrunerSet < NumPruners; PrunerSet++, ClassPruner++) { BasePrunerAddress = (uinT32 *) (*ClassPruner) + feature_address; for (Word = 0; Word < WERDS_PER_CP_VECTOR; Word++) { PrunerWord = *BasePrunerAddress++; for (Class = 0; Class < 16; Class++, class_index++) { if (NormCount[class_index] >= MaxCount) cprintf (" %s=%d,", unicharset.id_to_unichar(ClassIdForIndex (IntTemplates, class_index)), PrunerWord & 3); PrunerWord >>= 2; } } } cprintf ("\n"); } cprintf ("Adjustments:"); for (Class = 0; Class < MaxNumClasses; Class++) { if (NormCount[Class] > MaxCount) cprintf (" %s=%d,", unicharset.id_to_unichar(ClassIdForIndex (IntTemplates, Class)), -((ClassPrunerMultiplier * NormalizationFactors[Class]) >> 8) * cp_maps[3] / 3); } cprintf ("\n"); } } /* Set Up Results */ max_rating = 0.0f; for (Class = 0, out_class = 0; Class < NumClasses; Class++) { Results[out_class].Class = ClassIdForIndex (IntTemplates, SortIndex[NumClasses - Class]); Results[out_class].Rating = 1.0 - SortKey[NumClasses - Class] / ((float) cp_maps[3] * NumFeatures); out_class++; } NumClasses = out_class; return NumClasses;}/*---------------------------------------------------------------------------*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?