⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 statistc.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/********************************************************************** * File:        statistc.c  (Formerly stats.c) * Description: Simple statistical package for integer values. * Author:					Ray Smith * Created:					Mon Feb 04 16:56:05 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include          "mfcpch.h"     //precompiled headers#include          <string.h>#include          <math.h>#include          <stdlib.h>#include          "memry.h"//#include                                      "ipeerr.h"#include          "tprintf.h"#include          "statistc.h"#define SEED1       0x1234       //default seeds#define SEED2       0x5678#define SEED3       0x9abc/********************************************************************** * STATS::STATS * * Construct a new stats element by allocating and zeroing the memory. **********************************************************************/STATS::STATS(            //constructor             INT32 min,  //min of range             INT32 max   //max of range            ) {  if (max <= min) {    /*		err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,            ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,            "Illegal range for stats, Min=%d, Max=%d",min,max);*/    min = 0;    max = 1;  }  rangemin = min;                //setup  rangemax = max;  buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32));  if (buckets != NULL)    this->clear ();              //zero it  /*   else     err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,     ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,     "No memory for stats, Min=%d, Max=%d",min,max); */}STATS::STATS() {  //constructor  rangemax = 0;                  //empty  rangemin = 0;  buckets = NULL;}/********************************************************************** * STATS::set_range * * Alter the range on an existing stats element. **********************************************************************/bool STATS::set_range(            //constructor                      INT32 min,  //min of range                      INT32 max   //max of range                     ) {  if (max <= min) {    return false;  }  rangemin = min;                //setup  rangemax = max;  if (buckets != NULL)    free_mem(buckets);  //no longer want it  buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32));  /*	if (buckets==NULL)      return err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,          ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,          "No memory for stats, Min=%d, Max=%d",min,max);*/  this->clear ();                //zero it  return true;}/********************************************************************** * STATS::clear * * Clear out the STATS class by zeroing all the buckets. **********************************************************************/void STATS::clear() {  //clear out buckets  total_count = 0;  if (buckets != NULL)    memset (buckets, 0, (rangemax - rangemin) * sizeof (INT32));  //zero it}/********************************************************************** * STATS::~STATS * * Destructor for a stats class. **********************************************************************/STATS::~STATS (                  //destructor) {  if (buckets != NULL) {    free_mem(buckets);     buckets = NULL;  }}/********************************************************************** * STATS::add * * Add a set of samples to (or delete from) a pile. **********************************************************************/void STATS::add(              //add sample                INT32 value,  //bucket                INT32 count   //no to add               ) {  if (buckets == NULL) {    /*		err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,            ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,            "Empty stats");*/    return;  }  if (value <= rangemin)    buckets[0] += count;         //silently clip to range  else if (value >= rangemax)    buckets[rangemax - rangemin - 1] += count;  else                                 //add count to cell    buckets[value - rangemin] += count;  total_count += count;          //keep count of total}/********************************************************************** * STATS::mode * * Find the mode of a stats class. **********************************************************************/INT32 STATS::mode() {  //get mode of samples  INT32 index;                   //current index  INT32 max;                     //max cell count  INT32 maxindex;                //index of max  if (buckets == NULL) {    /*		err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,            ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,            "Empty stats");*/    return rangemin;  }  for (max = 0, maxindex = 0, index = rangemax - rangemin - 1; index >= 0;  index--) {    if (buckets[index] > max) {      max = buckets[index];      //find biggest      maxindex = index;    }  }  return maxindex + rangemin;    //index of biggest}/********************************************************************** * STATS::mean * * Find the mean of a stats class. **********************************************************************/float STATS::mean() {  //get mean of samples  INT32 index;                   //current index  INT32 sum;                     //sum of cells  if (buckets == NULL) {    /*		err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,            ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,            "Empty stats");*/    return (float) rangemin;  }  for (sum = 0, index = rangemax - rangemin - 1; index >= 0; index--) {                                 //sum all buckets    sum += index * buckets[index];  }  if (total_count > 0)                                 //mean value    return (float) sum / total_count + rangemin;  else    return (float) rangemin;     //no mean}/********************************************************************** * STATS::sd * * Find the standard deviation of a stats class. **********************************************************************/float STATS::sd() {  //standard deviation  INT32 index;                   //current index  INT32 sum;                     //sum of cells  INT32 sqsum;                   //sum of squares  float variance;  if (buckets == NULL) {    /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,       ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,       "Empty stats"); */    return (float) 0.0;  }  for (sum = 0, sqsum = 0, index = rangemax - rangemin - 1; index >= 0;  index--) {                                 //sum all buckets    sum += index * buckets[index];                                 //and squares    sqsum += index * index * buckets[index];  }  if (total_count > 0) {    variance = sum / ((float) total_count);    variance = sqsum / ((float) total_count) - variance * variance;    return (float) sqrt (variance);  }  else    return (float) 0.0;}/********************************************************************** * STATS::ile * * Find an arbitrary %ile of a stats class. **********************************************************************/float STATS::ile(            //percentile                 float frac  //fraction to find                ) {  INT32 index;                   //current index  INT32 sum;                     //sum of cells  float target;                  //target value  if (buckets == NULL) {    /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,       ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,       "Empty stats"); */    return (float) rangemin;  }  target = frac * total_count;  if (target <= 0)    target = (float) 1;  if (target > total_count)    target = (float) total_count;  for (sum = 0, index = 0; index < rangemax - rangemin    && sum < target; sum += buckets[index], index++);  if (index > 0)    return rangemin + index - (sum - target) / buckets[index - 1];  //better than just ints  else    return (float) rangemin;}/********************************************************************** * STATS::median * * Finds a more usefule estimate of median than ile(0.5). * * Overcomes a problem with ile() - if the samples are, for example, * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway * between 6 and 13 = 9.5 **********************************************************************/float STATS::median() {  //get median  float median;  INT32 min_pile;  INT32 median_pile;  INT32 max_pile;  if (buckets == NULL) {    /*		err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,            ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,            "Empty stats");*/    return (float) rangemin;  }  median = (float) ile ((float) 0.5);  median_pile = (INT32) floor (median);  if ((total_count > 1) && (pile_count (median_pile) == 0)) {    /* Find preceeding non zero pile */    for (min_pile = median_pile; pile_count (min_pile) == 0; min_pile--);    /* Find following non zero pile */    for (max_pile = median_pile; pile_count (max_pile) == 0; max_pile++);    median = (float) ((min_pile + max_pile) / 2.0);  }  return median;}/********************************************************************** * STATS::smooth * * Apply a triangular smoothing filter to the stats. * This makes the modes a bit more useful. * The factor gives the height of the triangle, i.e. the weight of the * centre. **********************************************************************/void STATS::smooth(              //smooth samples                   INT32 factor  //size of triangle                  ) {  INT32 entry;                   //bucket index  INT32 offset;                  //from entry  INT32 entrycount;              //no of entries  INT32 bucket;                  //new smoothed pile                                 //output stats  STATS result(rangemin, rangemax);   if (buckets == NULL) {    /*     err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,       ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,       "Empty stats"); */    return;  }  if (factor < 2)    return;                      //is a no-op  entrycount = rangemax - rangemin;  for (entry = 0; entry < entrycount; entry++) {                                 //centre weight    bucket = buckets[entry] * factor;    for (offset = 1; offset < factor; offset++) {      if (entry - offset >= 0)        bucket += buckets[entry - offset] * (factor - offset);      if (entry + offset < entrycount)        bucket += buckets[entry + offset] * (factor - offset);    }    result.add (entry + rangemin, bucket);  }  total_count = result.total_count;  memcpy (buckets, result.buckets, entrycount * sizeof (INT32));}/********************************************************************** * STATS::cluster * * Cluster the samples into max_cluster clusters. * Each call runs one iteration. The array of clusters must be * max_clusters+1 in size as cluster 0 is used to indicate which samples * have been used. * The return value is the current number of clusters. **********************************************************************/INT32 STATS::cluster(                     //cluster samples                     float lower,         //thresholds                     float upper,                     float multiple,      //distance threshold                     INT32 max_clusters,  //max no to make                     STATS *clusters      //array of clusters                    ) {  BOOL8 new_cluster;             //added one  float *centres;                //cluster centres  INT32 entry;                   //bucket index  INT32 cluster;                 //cluster index  INT32 best_cluster;            //one to assign to  INT32 new_centre = 0;          //residual mode  INT32 new_mode;                //pile count of new_centre  INT32 count;                   //pile to place  float dist;                    //from cluster  float min_dist;                //from best_cluster  INT32 cluster_count;           //no of clusters  if (max_clusters < 1)    return 0;  if (buckets == NULL) {    /*		err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES,            ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,            "Empty stats");*/    return 0;  }  centres = (float *) alloc_mem ((max_clusters + 1) * sizeof (float));  if (centres == NULL) {    /*     err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES,       ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR,       "No memory for centres"); */    return 0;  }  for (cluster_count = 1; cluster_count <= max_clusters    && clusters[cluster_count].buckets != NULL  && clusters[cluster_count].total_count > 0; cluster_count++) {    centres[cluster_count] =      (float) clusters[cluster_count].ile ((float) 0.5);    new_centre = clusters[cluster_count].mode ();    for (entry = new_centre - 1; centres[cluster_count] - entry < lower      && entry >= rangemin    && pile_count (entry) <= pile_count (entry + 1); entry--) {      count = pile_count (entry) - clusters[0].pile_count (entry);      if (count > 0) {        clusters[cluster_count].add (entry, count);        clusters[0].add (entry, count);      }    }    for (entry = new_centre + 1; entry - centres[cluster_count] < lower      && entry < rangemax    && pile_count (entry) <= pile_count (entry - 1); entry++) {      count = pile_count (entry) - clusters[0].pile_count (entry);      if (count > 0) {        clusters[cluster_count].add (entry, count);        clusters[0].add (entry, count);      }    }  }  cluster_count--;  if (cluster_count == 0) {    clusters[0].set_range (rangemin, rangemax);  }  do {    new_cluster = FALSE;    new_mode = 0;    for (entry = 0; entry < rangemax - rangemin; entry++) {      count = buckets[entry] - clusters[0].buckets[entry];      //remaining pile      if (count > 0) {           //any to handle        min_dist = (float) MAX_INT32;        best_cluster = 0;        for (cluster = 1; cluster <= cluster_count; cluster++) {          dist = entry + rangemin - centres[cluster];          //find distance          if (dist < 0)            dist = -dist;          if (dist < min_dist) {            min_dist = dist;     //find least            best_cluster = cluster;          }        }        if (min_dist > upper     //far enough for new          && (best_cluster == 0          || entry + rangemin > centres[best_cluster] * multiple        || entry + rangemin < centres[best_cluster] / multiple)) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -