📄 charsample.cpp
字号:
/********************************************************************** * File: charsample.cpp (Formerly charsample.c) * Description: Class to contain character samples and match scores * to be used for adaption * Author: Chris Newton * Created: Thu Oct 7 13:40:37 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include "mfcpch.h"#include <stdio.h>#include <ctype.h>#include <math.h>#ifdef __UNIX__#include <assert.h>#include <unistd.h>#endif#include "memry.h"#include "tessvars.h"#include "statistc.h"#include "charsample.h"#include "paircmp.h"#include "matmatch.h"#include "adaptions.h"#include "secname.h"#include "notdll.h"extern INT32 demo_word; // Hack for demosELISTIZE (CHAR_SAMPLE) ELISTIZE (CHAR_SAMPLES) CHAR_SAMPLE::CHAR_SAMPLE () { sample_blob = NULL; sample_denorm = NULL; sample_image = NULL; ch = '\0'; n_samples_matched = 0; total_match_scores = 0.0; sumsq_match_scores = 0.0;}CHAR_SAMPLE::CHAR_SAMPLE(PBLOB *blob, DENORM *denorm, char c) { sample_blob = blob; sample_denorm = denorm; sample_image = NULL; ch = c; n_samples_matched = 0; total_match_scores = 0.0; sumsq_match_scores = 0.0;}CHAR_SAMPLE::CHAR_SAMPLE(IMAGE *image, char c) { sample_blob = NULL; sample_denorm = NULL; sample_image = image; ch = c; n_samples_matched = 0; total_match_scores = 0.0; sumsq_match_scores = 0.0;}float CHAR_SAMPLE::match_sample( // Update match scores CHAR_SAMPLE *test_sample, BOOL8 updating) { float score1; float score2; IMAGE *image = test_sample->image (); if (sample_blob != NULL && test_sample->blob () != NULL) { PBLOB *blob = test_sample->blob (); DENORM *denorm = test_sample->denorm (); score1 = compare_bln_blobs (sample_blob, sample_denorm, blob, denorm); score2 = compare_bln_blobs (blob, denorm, sample_blob, sample_denorm); score1 = (score1 > score2) ? score1 : score2; } else if (sample_image != NULL && image != NULL) { CHAR_PROTO *sample = new CHAR_PROTO (this); score1 = matrix_match (sample_image, image); delete sample; } else return BAD_SCORE; if ((tessedit_use_best_sample || tessedit_cluster_debug) && updating) { n_samples_matched++; total_match_scores += score1; sumsq_match_scores += score1 * score1; } return score1;}double CHAR_SAMPLE::mean_score() { if (n_samples_matched > 0) return (total_match_scores / n_samples_matched); else return BAD_SCORE;}double CHAR_SAMPLE::variance() { double mean = mean_score (); if (n_samples_matched > 0) { return (sumsq_match_scores / n_samples_matched) - mean * mean; } else return BAD_SCORE;}void CHAR_SAMPLE::print(FILE *f) { if (!tessedit_cluster_debug) return; if (n_samples_matched > 0) fprintf (f, "%c - sample matched against " INT32FORMAT " blobs, mean: %f, var: %f\n", ch, n_samples_matched, mean_score (), variance ()); else fprintf (f, "No matches for this sample (%c)\n", ch);}void CHAR_SAMPLE::reset_match_statistics() { n_samples_matched = 0; total_match_scores = 0.0; sumsq_match_scores = 0.0;}CHAR_SAMPLES::CHAR_SAMPLES() { type = UNKNOWN; samples.clear (); ch = '\0'; best_sample = NULL; proto = NULL;}CHAR_SAMPLES::CHAR_SAMPLES(CHAR_SAMPLE *sample) { CHAR_SAMPLE_IT sample_it = &samples; ASSERT_HOST (sample->image () != NULL || sample->blob () != NULL); if (sample->image () != NULL) type = IMAGE_CLUSTER; else if (sample->blob () != NULL) type = BLOB_CLUSTER; samples.clear (); sample_it.add_to_end (sample); if (tessedit_mm_only_match_same_char) ch = sample->character (); else ch = '\0'; best_sample = NULL; proto = NULL;}void CHAR_SAMPLES::add_sample(CHAR_SAMPLE *sample) { CHAR_SAMPLE_IT sample_it = &samples; if (tessedit_use_best_sample || tessedit_cluster_debug) for (sample_it.mark_cycle_pt (); !sample_it.cycled_list (); sample_it.forward ()) { sample_it.data ()->match_sample (sample, TRUE); sample->match_sample (sample_it.data (), TRUE); } sample_it.add_to_end (sample); if (tessedit_mm_use_prototypes && type == IMAGE_CLUSTER) if (samples.length () == tessedit_mm_prototype_min_size) this->build_prototype (); else if (samples.length () > tessedit_mm_prototype_min_size) this->add_sample_to_prototype (sample);}void CHAR_SAMPLES::add_sample_to_prototype(CHAR_SAMPLE *sample) { BOOL8 rebuild = FALSE; INT32 new_xsize = proto->x_size (); INT32 new_ysize = proto->y_size (); INT32 sample_xsize = sample->image ()->get_xsize (); INT32 sample_ysize = sample->image ()->get_ysize (); if (sample_xsize > new_xsize) { new_xsize = sample_xsize; rebuild = TRUE; } if (sample_ysize > new_ysize) { new_ysize = sample_ysize; rebuild = TRUE; } if (rebuild) proto->enlarge_prototype (new_xsize, new_ysize); proto->add_sample (sample);}void CHAR_SAMPLES::build_prototype() { CHAR_SAMPLE_IT sample_it = &samples; CHAR_SAMPLE *sample; INT32 proto_xsize = 0; INT32 proto_ysize = 0; if (type != IMAGE_CLUSTER || samples.length () < tessedit_mm_prototype_min_size) return; for (sample_it.mark_cycle_pt (); !sample_it.cycled_list (); sample_it.forward ()) { sample = sample_it.data (); if (sample->image ()->get_xsize () > proto_xsize) proto_xsize = sample->image ()->get_xsize (); if (sample->image ()->get_ysize () > proto_ysize) proto_ysize = sample->image ()->get_ysize (); } proto = new CHAR_PROTO (proto_xsize, proto_ysize, 0, 0, '\0'); for (sample_it.mark_cycle_pt (); !sample_it.cycled_list (); sample_it.forward ()) this->add_sample_to_prototype (sample_it.data ());}void CHAR_SAMPLES::find_best_sample() { CHAR_SAMPLE_IT sample_it = &samples; double score; double best_score = MAX_INT32; if (ch == '\0' || samples.length () < tessedit_mm_prototype_min_size) return; for (sample_it.mark_cycle_pt (); !sample_it.cycled_list (); sample_it.forward ()) { score = sample_it.data ()->mean_score (); if (score < best_score) { best_score = score; best_sample = sample_it.data (); } } #ifndef SECURE_NAMES if (tessedit_cluster_debug) { tprintf ("Best sample for this %c cluster:\n", ch); best_sample->print (debug_fp); } #endif}float CHAR_SAMPLES::match_score(CHAR_SAMPLE *sample) { if (tessedit_mm_only_match_same_char && sample->character () != ch) return BAD_SCORE; if (tessedit_use_best_sample && best_sample != NULL) return best_sample->match_sample (sample, FALSE); else if ((tessedit_mm_use_prototypes || tessedit_mm_adapt_using_prototypes) && proto != NULL) return proto->match_sample (sample); else return this->nn_match_score (sample);}float CHAR_SAMPLES::nn_match_score(CHAR_SAMPLE *sample) { CHAR_SAMPLE_IT sample_it = &samples; float score; float min_score = MAX_INT32; for (sample_it.mark_cycle_pt (); !sample_it.cycled_list (); sample_it.forward ()) { score = sample_it.data ()->match_sample (sample, FALSE); if (score < min_score) min_score = score; } return min_score;}void CHAR_SAMPLES::assign_to_char() { STATS char_frequency(FIRST_CHAR, LAST_CHAR); CHAR_SAMPLE_IT sample_it = &samples; INT32 i; INT32 max_index = 0; INT32 max_freq = 0; if (samples.length () == 0 || tessedit_mm_only_match_same_char) return; for (sample_it.mark_cycle_pt (); !sample_it.cycled_list (); sample_it.forward ()) char_frequency.add ((INT32) sample_it.data ()->character (), 1); for (i = FIRST_CHAR; i <= LAST_CHAR; i++) if (char_frequency.pile_count (i) > max_freq) { max_index = i; max_freq = char_frequency.pile_count (i); } if (samples.length () >= tessedit_cluster_min_size && max_freq > samples.length () * tessedit_cluster_accept_fraction) ch = (char) max_index;}void CHAR_SAMPLES::print(FILE *f) { CHAR_SAMPLE_IT sample_it = &samples; fprintf (f, "Collected " INT32FORMAT " samples\n", samples.length ()); #ifndef SECURE_NAMES if (tessedit_cluster_debug) for (sample_it.mark_cycle_pt (); !sample_it.cycled_list (); sample_it.forward ()) sample_it.data ()->print (f); if (ch == '\0') fprintf (f, "\nCluster not used for adaption\n"); else fprintf (f, "\nCluster used to adapt to '%c's\n", ch); #endif}CHAR_PROTO::CHAR_PROTO() { xsize = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -