📄 statistc.cpp
字号:
/********************************************************************** * File: statistc.c (Formerly stats.c) * Description: Simple statistical package for integer values. * Author: Ray Smith * Created: Mon Feb 04 16:56:05 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include "mfcpch.h" //precompiled headers#include <string.h>#include <math.h>#include <stdlib.h>#include "memry.h"//#include "ipeerr.h"#include "tprintf.h"#include "statistc.h"#define SEED1 0x1234 //default seeds#define SEED2 0x5678#define SEED3 0x9abc/********************************************************************** * STATS::STATS * * Construct a new stats element by allocating and zeroing the memory. **********************************************************************/STATS::STATS( //constructor INT32 min, //min of range INT32 max //max of range ) { if (max <= min) { /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "Illegal range for stats, Min=%d, Max=%d",min,max);*/ min = 0; max = 1; } rangemin = min; //setup rangemax = max; buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32)); if (buckets != NULL) this->clear (); //zero it /* else err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "No memory for stats, Min=%d, Max=%d",min,max); */}STATS::STATS() { //constructor rangemax = 0; //empty rangemin = 0; buckets = NULL;}/********************************************************************** * STATS::set_range * * Alter the range on an existing stats element. **********************************************************************/bool STATS::set_range( //constructor INT32 min, //min of range INT32 max //max of range ) { if (max <= min) { return false; } rangemin = min; //setup rangemax = max; if (buckets != NULL) free_mem(buckets); //no longer want it buckets = (INT32 *) alloc_mem ((max - min) * sizeof (INT32)); /* if (buckets==NULL) return err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "No memory for stats, Min=%d, Max=%d",min,max);*/ this->clear (); //zero it return true;}/********************************************************************** * STATS::clear * * Clear out the STATS class by zeroing all the buckets. **********************************************************************/void STATS::clear() { //clear out buckets total_count = 0; if (buckets != NULL) memset (buckets, 0, (rangemax - rangemin) * sizeof (INT32)); //zero it}/********************************************************************** * STATS::~STATS * * Destructor for a stats class. **********************************************************************/STATS::~STATS ( //destructor) { if (buckets != NULL) { free_mem(buckets); buckets = NULL; }}/********************************************************************** * STATS::add * * Add a set of samples to (or delete from) a pile. **********************************************************************/void STATS::add( //add sample INT32 value, //bucket INT32 count //no to add ) { if (buckets == NULL) { /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "Empty stats");*/ return; } if (value <= rangemin) buckets[0] += count; //silently clip to range else if (value >= rangemax) buckets[rangemax - rangemin - 1] += count; else //add count to cell buckets[value - rangemin] += count; total_count += count; //keep count of total}/********************************************************************** * STATS::mode * * Find the mode of a stats class. **********************************************************************/INT32 STATS::mode() { //get mode of samples INT32 index; //current index INT32 max; //max cell count INT32 maxindex; //index of max if (buckets == NULL) { /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "Empty stats");*/ return rangemin; } for (max = 0, maxindex = 0, index = rangemax - rangemin - 1; index >= 0; index--) { if (buckets[index] > max) { max = buckets[index]; //find biggest maxindex = index; } } return maxindex + rangemin; //index of biggest}/********************************************************************** * STATS::mean * * Find the mean of a stats class. **********************************************************************/float STATS::mean() { //get mean of samples INT32 index; //current index INT32 sum; //sum of cells if (buckets == NULL) { /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "Empty stats");*/ return (float) rangemin; } for (sum = 0, index = rangemax - rangemin - 1; index >= 0; index--) { //sum all buckets sum += index * buckets[index]; } if (total_count > 0) //mean value return (float) sum / total_count + rangemin; else return (float) rangemin; //no mean}/********************************************************************** * STATS::sd * * Find the standard deviation of a stats class. **********************************************************************/float STATS::sd() { //standard deviation INT32 index; //current index INT32 sum; //sum of cells INT32 sqsum; //sum of squares float variance; if (buckets == NULL) { /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "Empty stats"); */ return (float) 0.0; } for (sum = 0, sqsum = 0, index = rangemax - rangemin - 1; index >= 0; index--) { //sum all buckets sum += index * buckets[index]; //and squares sqsum += index * index * buckets[index]; } if (total_count > 0) { variance = sum / ((float) total_count); variance = sqsum / ((float) total_count) - variance * variance; return (float) sqrt (variance); } else return (float) 0.0;}/********************************************************************** * STATS::ile * * Find an arbitrary %ile of a stats class. **********************************************************************/float STATS::ile( //percentile float frac //fraction to find ) { INT32 index; //current index INT32 sum; //sum of cells float target; //target value if (buckets == NULL) { /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "Empty stats"); */ return (float) rangemin; } target = frac * total_count; if (target <= 0) target = (float) 1; if (target > total_count) target = (float) total_count; for (sum = 0, index = 0; index < rangemax - rangemin && sum < target; sum += buckets[index], index++); if (index > 0) return rangemin + index - (sum - target) / buckets[index - 1]; //better than just ints else return (float) rangemin;}/********************************************************************** * STATS::median * * Finds a more usefule estimate of median than ile(0.5). * * Overcomes a problem with ile() - if the samples are, for example, * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway * between 6 and 13 = 9.5 **********************************************************************/float STATS::median() { //get median float median; INT32 min_pile; INT32 median_pile; INT32 max_pile; if (buckets == NULL) { /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "Empty stats");*/ return (float) rangemin; } median = (float) ile ((float) 0.5); median_pile = (INT32) floor (median); if ((total_count > 1) && (pile_count (median_pile) == 0)) { /* Find preceeding non zero pile */ for (min_pile = median_pile; pile_count (min_pile) == 0; min_pile--); /* Find following non zero pile */ for (max_pile = median_pile; pile_count (max_pile) == 0; max_pile++); median = (float) ((min_pile + max_pile) / 2.0); } return median;}/********************************************************************** * STATS::smooth * * Apply a triangular smoothing filter to the stats. * This makes the modes a bit more useful. * The factor gives the height of the triangle, i.e. the weight of the * centre. **********************************************************************/void STATS::smooth( //smooth samples INT32 factor //size of triangle ) { INT32 entry; //bucket index INT32 offset; //from entry INT32 entrycount; //no of entries INT32 bucket; //new smoothed pile //output stats STATS result(rangemin, rangemax); if (buckets == NULL) { /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "Empty stats"); */ return; } if (factor < 2) return; //is a no-op entrycount = rangemax - rangemin; for (entry = 0; entry < entrycount; entry++) { //centre weight bucket = buckets[entry] * factor; for (offset = 1; offset < factor; offset++) { if (entry - offset >= 0) bucket += buckets[entry - offset] * (factor - offset); if (entry + offset < entrycount) bucket += buckets[entry + offset] * (factor - offset); } result.add (entry + rangemin, bucket); } total_count = result.total_count; memcpy (buckets, result.buckets, entrycount * sizeof (INT32));}/********************************************************************** * STATS::cluster * * Cluster the samples into max_cluster clusters. * Each call runs one iteration. The array of clusters must be * max_clusters+1 in size as cluster 0 is used to indicate which samples * have been used. * The return value is the current number of clusters. **********************************************************************/INT32 STATS::cluster( //cluster samples float lower, //thresholds float upper, float multiple, //distance threshold INT32 max_clusters, //max no to make STATS *clusters //array of clusters ) { BOOL8 new_cluster; //added one float *centres; //cluster centres INT32 entry; //bucket index INT32 cluster; //cluster index INT32 best_cluster; //one to assign to INT32 new_centre = 0; //residual mode INT32 new_mode; //pile count of new_centre INT32 count; //pile to place float dist; //from cluster float min_dist; //from best_cluster INT32 cluster_count; //no of clusters if (max_clusters < 1) return 0; if (buckets == NULL) { /* err.log(RESULT_LOGICAL_ERROR,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "Empty stats");*/ return 0; } centres = (float *) alloc_mem ((max_clusters + 1) * sizeof (float)); if (centres == NULL) { /* err.log(RESULT_NO_MEMORY,E_LOC,ERR_PRIMITIVES, ERR_SCROLLING,ERR_CONTINUE,ERR_ERROR, "No memory for centres"); */ return 0; } for (cluster_count = 1; cluster_count <= max_clusters && clusters[cluster_count].buckets != NULL && clusters[cluster_count].total_count > 0; cluster_count++) { centres[cluster_count] = (float) clusters[cluster_count].ile ((float) 0.5); new_centre = clusters[cluster_count].mode (); for (entry = new_centre - 1; centres[cluster_count] - entry < lower && entry >= rangemin && pile_count (entry) <= pile_count (entry + 1); entry--) { count = pile_count (entry) - clusters[0].pile_count (entry); if (count > 0) { clusters[cluster_count].add (entry, count); clusters[0].add (entry, count); } } for (entry = new_centre + 1; entry - centres[cluster_count] < lower && entry < rangemax && pile_count (entry) <= pile_count (entry - 1); entry++) { count = pile_count (entry) - clusters[0].pile_count (entry); if (count > 0) { clusters[cluster_count].add (entry, count); clusters[0].add (entry, count); } } } cluster_count--; if (cluster_count == 0) { clusters[0].set_range (rangemin, rangemax); } do { new_cluster = FALSE; new_mode = 0; for (entry = 0; entry < rangemax - rangemin; entry++) { count = buckets[entry] - clusters[0].buckets[entry]; //remaining pile if (count > 0) { //any to handle min_dist = (float) MAX_INT32; best_cluster = 0; for (cluster = 1; cluster <= cluster_count; cluster++) { dist = entry + rangemin - centres[cluster]; //find distance if (dist < 0) dist = -dist; if (dist < min_dist) { min_dist = dist; //find least best_cluster = cluster; } } if (min_dist > upper //far enough for new && (best_cluster == 0 || entry + rangemin > centres[best_cluster] * multiple || entry + rangemin < centres[best_cluster] / multiple)) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -