⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 charsample.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/********************************************************************** * File:        charsample.cpp  (Formerly charsample.c) * Description: Class to contain character samples and match scores *					to be used for adaption * Author:      Chris Newton * Created:     Thu Oct  7 13:40:37 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include "mfcpch.h"#include <stdio.h>#include          <ctype.h>#include          <math.h>#ifdef __UNIX__#include <assert.h>#include          <unistd.h>#endif#include "memry.h"#include          "tessvars.h"#include "statistc.h"#include          "charsample.h"#include "paircmp.h"#include "matmatch.h"#include          "adaptions.h"#include          "secname.h"#include          "notdll.h"extern INT32 demo_word;          // Hack for demosELISTIZE (CHAR_SAMPLE) ELISTIZE (CHAR_SAMPLES) CHAR_SAMPLE::CHAR_SAMPLE () {  sample_blob = NULL;  sample_denorm = NULL;  sample_image = NULL;  ch = '\0';  n_samples_matched = 0;  total_match_scores = 0.0;  sumsq_match_scores = 0.0;}CHAR_SAMPLE::CHAR_SAMPLE(PBLOB *blob, DENORM *denorm, char c) {   sample_blob = blob;  sample_denorm = denorm;  sample_image = NULL;  ch = c;  n_samples_matched = 0;  total_match_scores = 0.0;  sumsq_match_scores = 0.0;}CHAR_SAMPLE::CHAR_SAMPLE(IMAGE *image, char c) {   sample_blob = NULL;  sample_denorm = NULL;  sample_image = image;  ch = c;  n_samples_matched = 0;  total_match_scores = 0.0;  sumsq_match_scores = 0.0;}float CHAR_SAMPLE::match_sample(  // Update match scores                                CHAR_SAMPLE *test_sample,                                BOOL8 updating) {  float score1;  float score2;  IMAGE *image = test_sample->image ();  if (sample_blob != NULL && test_sample->blob () != NULL) {    PBLOB *blob = test_sample->blob ();    DENORM *denorm = test_sample->denorm ();    score1 = compare_bln_blobs (sample_blob, sample_denorm, blob, denorm);    score2 = compare_bln_blobs (blob, denorm, sample_blob, sample_denorm);    score1 = (score1 > score2) ? score1 : score2;  }  else if (sample_image != NULL && image != NULL) {    CHAR_PROTO *sample = new CHAR_PROTO (this);    score1 = matrix_match (sample_image, image);    delete sample;  }  else    return BAD_SCORE;  if ((tessedit_use_best_sample || tessedit_cluster_debug) && updating) {    n_samples_matched++;    total_match_scores += score1;    sumsq_match_scores += score1 * score1;  }  return score1;}double CHAR_SAMPLE::mean_score() {   if (n_samples_matched > 0)    return (total_match_scores / n_samples_matched);  else    return BAD_SCORE;}double CHAR_SAMPLE::variance() {   double mean = mean_score ();  if (n_samples_matched > 0) {    return (sumsq_match_scores / n_samples_matched) - mean * mean;  }  else    return BAD_SCORE;}void CHAR_SAMPLE::print(FILE *f) {   if (!tessedit_cluster_debug)    return;  if (n_samples_matched > 0)    fprintf (f,      "%c - sample matched against " INT32FORMAT      " blobs, mean: %f, var: %f\n", ch, n_samples_matched,      mean_score (), variance ());  else    fprintf (f, "No matches for this sample (%c)\n", ch);}void CHAR_SAMPLE::reset_match_statistics() {   n_samples_matched = 0;  total_match_scores = 0.0;  sumsq_match_scores = 0.0;}CHAR_SAMPLES::CHAR_SAMPLES() {   type = UNKNOWN;  samples.clear ();  ch = '\0';  best_sample = NULL;  proto = NULL;}CHAR_SAMPLES::CHAR_SAMPLES(CHAR_SAMPLE *sample) {   CHAR_SAMPLE_IT sample_it = &samples;  ASSERT_HOST (sample->image () != NULL || sample->blob () != NULL);  if (sample->image () != NULL)    type = IMAGE_CLUSTER;  else if (sample->blob () != NULL)    type = BLOB_CLUSTER;  samples.clear ();  sample_it.add_to_end (sample);  if (tessedit_mm_only_match_same_char)    ch = sample->character ();  else    ch = '\0';  best_sample = NULL;  proto = NULL;}void CHAR_SAMPLES::add_sample(CHAR_SAMPLE *sample) {   CHAR_SAMPLE_IT sample_it = &samples;  if (tessedit_use_best_sample || tessedit_cluster_debug)    for (sample_it.mark_cycle_pt ();  !sample_it.cycled_list (); sample_it.forward ()) {    sample_it.data ()->match_sample (sample, TRUE);    sample->match_sample (sample_it.data (), TRUE);  }  sample_it.add_to_end (sample);  if (tessedit_mm_use_prototypes && type == IMAGE_CLUSTER)    if (samples.length () == tessedit_mm_prototype_min_size)      this->build_prototype ();  else if (samples.length () > tessedit_mm_prototype_min_size)    this->add_sample_to_prototype (sample);}void CHAR_SAMPLES::add_sample_to_prototype(CHAR_SAMPLE *sample) {   BOOL8 rebuild = FALSE;  INT32 new_xsize = proto->x_size ();  INT32 new_ysize = proto->y_size ();  INT32 sample_xsize = sample->image ()->get_xsize ();  INT32 sample_ysize = sample->image ()->get_ysize ();  if (sample_xsize > new_xsize) {    new_xsize = sample_xsize;    rebuild = TRUE;  }  if (sample_ysize > new_ysize) {    new_ysize = sample_ysize;    rebuild = TRUE;  }  if (rebuild)    proto->enlarge_prototype (new_xsize, new_ysize);  proto->add_sample (sample);}void CHAR_SAMPLES::build_prototype() {   CHAR_SAMPLE_IT sample_it = &samples;  CHAR_SAMPLE *sample;  INT32 proto_xsize = 0;  INT32 proto_ysize = 0;  if (type != IMAGE_CLUSTER    || samples.length () < tessedit_mm_prototype_min_size)    return;  for (sample_it.mark_cycle_pt ();  !sample_it.cycled_list (); sample_it.forward ()) {    sample = sample_it.data ();    if (sample->image ()->get_xsize () > proto_xsize)      proto_xsize = sample->image ()->get_xsize ();    if (sample->image ()->get_ysize () > proto_ysize)      proto_ysize = sample->image ()->get_ysize ();  }  proto = new CHAR_PROTO (proto_xsize, proto_ysize, 0, 0, '\0');  for (sample_it.mark_cycle_pt ();    !sample_it.cycled_list (); sample_it.forward ())  this->add_sample_to_prototype (sample_it.data ());}void CHAR_SAMPLES::find_best_sample() {   CHAR_SAMPLE_IT sample_it = &samples;  double score;  double best_score = MAX_INT32;  if (ch == '\0' || samples.length () < tessedit_mm_prototype_min_size)    return;  for (sample_it.mark_cycle_pt ();  !sample_it.cycled_list (); sample_it.forward ()) {    score = sample_it.data ()->mean_score ();    if (score < best_score) {      best_score = score;      best_sample = sample_it.data ();    }  }  #ifndef SECURE_NAMES  if (tessedit_cluster_debug) {    tprintf ("Best sample for this %c cluster:\n", ch);    best_sample->print (debug_fp);  }  #endif}float CHAR_SAMPLES::match_score(CHAR_SAMPLE *sample) {   if (tessedit_mm_only_match_same_char && sample->character () != ch)    return BAD_SCORE;  if (tessedit_use_best_sample && best_sample != NULL)    return best_sample->match_sample (sample, FALSE);  else if ((tessedit_mm_use_prototypes    || tessedit_mm_adapt_using_prototypes) && proto != NULL)    return proto->match_sample (sample);  else    return this->nn_match_score (sample);}float CHAR_SAMPLES::nn_match_score(CHAR_SAMPLE *sample) {   CHAR_SAMPLE_IT sample_it = &samples;  float score;  float min_score = MAX_INT32;  for (sample_it.mark_cycle_pt ();  !sample_it.cycled_list (); sample_it.forward ()) {    score = sample_it.data ()->match_sample (sample, FALSE);    if (score < min_score)      min_score = score;  }  return min_score;}void CHAR_SAMPLES::assign_to_char() {   STATS char_frequency(FIRST_CHAR, LAST_CHAR);   CHAR_SAMPLE_IT sample_it = &samples;  INT32 i;  INT32 max_index = 0;  INT32 max_freq = 0;  if (samples.length () == 0 || tessedit_mm_only_match_same_char)    return;  for (sample_it.mark_cycle_pt ();    !sample_it.cycled_list (); sample_it.forward ())  char_frequency.add ((INT32) sample_it.data ()->character (), 1);  for (i = FIRST_CHAR; i <= LAST_CHAR; i++)  if (char_frequency.pile_count (i) > max_freq) {    max_index = i;    max_freq = char_frequency.pile_count (i);  }  if (samples.length () >= tessedit_cluster_min_size    && max_freq > samples.length () * tessedit_cluster_accept_fraction)    ch = (char) max_index;}void CHAR_SAMPLES::print(FILE *f) {   CHAR_SAMPLE_IT sample_it = &samples;  fprintf (f, "Collected " INT32FORMAT " samples\n", samples.length ());  #ifndef SECURE_NAMES  if (tessedit_cluster_debug)    for (sample_it.mark_cycle_pt ();    !sample_it.cycled_list (); sample_it.forward ())  sample_it.data ()->print (f);  if (ch == '\0')    fprintf (f, "\nCluster not used for adaption\n");  else    fprintf (f, "\nCluster used to adapt to '%c's\n", ch);  #endif}CHAR_PROTO::CHAR_PROTO() {   xsize = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -