📄 hcat.cpp
字号:
// hcat: Histogram concatenation program#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>const unsigned int MAX_LINE = 1024;#ifndef MIN#define MIN(X,Y) (X<Y?X:Y)#define MAX(X,Y) (X>Y?X:Y)#endif // MIN/MAXvoid usage(){ fprintf(stderr, "Usage: hcat [normalize] [percent [<rangeMin>:]<rangeMax>] file1 [file2 file3 ...]\n");}class FastReader{ public: FastReader(); bool Read(FILE* filePtr, char* buffer, unsigned int* len); bool Readline(FILE* filePtr, char* buffer, unsigned int* len); private: enum {BUFSIZE = 1024}; char savebuf[BUFSIZE]; char* saveptr; unsigned int savecount;}; // end class FastReader// Simple self-scaling linear/non-linear histogram (one-sided)class Histogram{ public: Histogram(); bool IsEmpty() {return (NULL == bin);} void Init(unsigned long numBins, double linearity) { num_bins = numBins; q = linearity; if (bin) delete[] bin; bin = NULL; } bool Tally(double value, unsigned long count = 1); void Print(FILE* file); unsigned long Count(); double PercentageInRange(double rangeMin, double rangeMax); double Min() {return min_val;} double Max() {return ((max_val < 0.0) ? 2.0*max_val : 0.5*max_val);} double Percentile(double p); private: typedef struct { double total; unsigned long count; } Bin; double q; unsigned long num_bins; double min_val; double max_val; Bin* bin; }; // end class HistogramHistogram::Histogram() : q(1.0), num_bins(1000), min_val(0.0), max_val(0.0), bin(NULL){}bool Histogram::Tally(double value, unsigned long count){ if (!bin) { if (!(bin = new Bin[num_bins])) { perror("trpr: Histogram::Tally() Error allocating histogram"); return false; } memset(bin, 0, num_bins*sizeof(Bin)); min_val = max_val = value; bin[0].count = count; bin[0].total = (value * (double)count); } else if ((value > max_val) || (value < min_val)) { Bin* newBin = new Bin[num_bins]; if (!newBin) { perror("trpr: Histogram::Tally() Error reallocating histogram"); return false; } memset(newBin, 0, num_bins*sizeof(Bin)); double newScale, minVal; if (value < min_val) { newScale = ((double)(num_bins-1)) / pow(max_val - value, q); unsigned long index = (unsigned long)ceil(newScale * pow(min_val - value, q)); if (index > (num_bins-1)) index = num_bins - 1; newBin[index].total += bin[0].total; newBin[index].count += bin[0].count; minVal = value; } else { double s = (value < 0.0) ? 0.5 : 2.0; newScale = ((double)(num_bins-1)) / pow(s*value - min_val, q); newBin[0].total = bin[0].total; newBin[0].count = bin[0].count; minVal = min_val; } for (unsigned int i = 1; i < num_bins; i++) { if (bin[i].count) { double x = bin[i].total / ((double)bin[i].count); unsigned long index = (unsigned long)ceil(newScale * pow(x - minVal, q)); if (index > (num_bins-1)) index = num_bins - 1; newBin[index].count += bin[i].count; newBin[index].total += bin[i].total; } } if (value < min_val) { newBin[0].count += count; newBin[0].total += (value * (double)count); min_val = value; } else { double s = (value < 0.0) ? 0.5 : 2.0; max_val = s*value; unsigned long index = (unsigned long)ceil(((double)(num_bins-1)) * pow((value-min_val)/(max_val-min_val), q)); if (index > (num_bins-1)) index = num_bins - 1; bin[index].count += count; bin[index].total += (value * (double)count); } delete[] bin; bin = newBin; } else { unsigned long index = (unsigned long)ceil(((double)(num_bins-1)) * pow((value-min_val)/(max_val-min_val), q)); if (index > (num_bins-1)) index = num_bins - 1; bin[index].count += count; bin[index].total += (value * (double)count); } return true;} // end Histogram::Tally()void Histogram::Print(FILE* file){ if (bin) { for (unsigned int i = 0; i < num_bins; i++) { if (bin[i].count) { double x = bin[i].total / ((double)bin[i].count); fprintf(file, "%f, %lu\n", x, bin[i].count); } } }} // end Histogram::Print()unsigned long Histogram::Count(){ if (bin) { unsigned long total =0 ; for (unsigned int i = 0; i < num_bins; i++) { total += bin[i].count; } return total; } else { return 0; } } // end Histogram::Count()double Histogram::PercentageInRange(double rangeMin, double rangeMax){ if (bin) { unsigned long countTotal = 0; unsigned long rangeTotal = 0; for (unsigned long i = 0; i < num_bins; i++) { double value = bin[i].total / ((double)bin[i].count); countTotal += bin[i].count; if (value < rangeMin) continue; else if (value > rangeMax) continue; else rangeTotal += bin[i].count; } return (100.0 * ((double)rangeTotal) / ((double)countTotal)); } else { return 0.0; } } // end Histogram::PercentageInRange(double Histogram::Percentile(double p){ unsigned long goal = Count(); goal = (unsigned long)(((double)goal) * p + 0.5); unsigned long count = 0; if (bin) { for (unsigned long i = 0; i < num_bins; i++) { count += bin[i].count; if (count >= goal) { double x = pow(((double)i) / ((double)num_bins-1), 1.0/q); x *= (max_val - min_val); x += min_val; return x; } } } return max_val;} // end Histogram::Percentile()int main(int argc, char* argv[]){ bool normalize = false; bool firstBin = true; double minimum = 0.0; bool compute_percent = false; double percentMin = 0.0; double percentMax = 0.0; if (argc < 2) { fprintf(stderr, "hcat: Insufficient arguments!\n"); usage(); exit(-1); } // Check for command options unsigned int i = 1; unsigned int argMin = 1; while(i < argc) { if (!strcmp(argv[i], "normalize")) { i++; normalize = true; if (argc < 3) { fprintf(stderr, "hcat: Insufficient arguments!\n"); usage(); exit(-1); } } else if (!strcmp(argv[i], "percent")) { i++; if (i >= argc) { fprintf(stderr, "hcat: Insufficient \"percent\" args!\n"); usage(); exit(-1); } char* ptr = strchr(argv[i], ':'); if (ptr) { if (2 != sscanf(argv[i], "%lf:%lf", &percentMin, &percentMax)) { fprintf(stderr, "hcat: Bad \"percent\" arg!\n"); usage(); exit(-1); } } else { percentMin = 0.0; if (1 != sscanf(argv[i], "%lf", &percentMax)) { fprintf(stderr, "hcat: Bad \"percent\" arg!\n"); usage(); exit(-1); } } i++; compute_percent = true; } else { // Must be first of file names break; } } Histogram h; h.Init(1000, 0.5); // 1000 point, non-linear histogram (low-value precision) for (; i < argc; i++) { FILE* file = fopen(argv[i], "r"); if (!file) { perror("hcat: Error opening input file"); usage(); exit(-1); } FastReader reader; char buffer[MAX_LINE]; unsigned int len = MAX_LINE; while (reader.Readline(file, buffer, &len)) { // Skip blank and commented (leading `#` lines) if ((0 == len) || ('#' == buffer[0])) { len = MAX_LINE; continue; } len = MAX_LINE; double value; unsigned int count; if (2 != sscanf(buffer, "%lf, %lu", &value, &count)) { fprintf(stderr, "hcat: Warning! Bad histogram line in file: %s\n", argv[i]); continue; } if (normalize) { if (firstBin) { minimum = value; firstBin = false; value = 0.0; } else { value -= minimum; } } if (!h.Tally(value, count)) { fprintf(stderr, "hcat: Error adding tallying data point!\n"); exit(-1); } } // end while(reader.Readline()) fclose(file); firstBin = true; } // end for(i=1..argc) if (h.IsEmpty()) { fprintf(stderr, "hcat: Warning! Empty histogram.\n"); exit(0); // nothing to output } if (compute_percent) { double percent = h.PercentageInRange(percentMin, percentMax); fprintf(stdout, "%f\n", percent); } else { // Default output // Output new combined histogram w/ percentile info const double p[6] = {0.99, 0.95, 0.9, 0.8, 0.75, 0.5}; fprintf(stdout, "#histogram: "); fprintf(stdout, "min:%f max:%f percentiles: ", h.Min(), h.Max()); for (int j = 0; j < 6; j++) { double percentile = h.Percentile(p[j]); fprintf(stdout, "%2d>%f ", (int)(p[j]*100.0+0.5), percentile); } fprintf(stdout, "\n"); h.Print(stdout); } exit(0);} // end main()FastReader::FastReader() : savecount(0){ }bool FastReader::Read(FILE* filePtr, char* buffer, unsigned int* len){ unsigned int want = *len; if (savecount) { unsigned int ncopy = MIN(want, savecount); memcpy(buffer, saveptr, ncopy); savecount -= ncopy; saveptr += ncopy; buffer += ncopy; want -= ncopy; } while (want) { unsigned int result = fread(savebuf, sizeof(char), BUFSIZE, filePtr); if (result) { unsigned int ncopy= MIN(want, result); memcpy(buffer, savebuf, ncopy); savecount = result - ncopy; saveptr = savebuf + ncopy; buffer += ncopy; want -= ncopy; } else // end-of-file { *len -= want; if (*len) return true; // we read something else return false; // we read nothing } } return true;} // end FastReader::Read()// An OK text readline() routine (reads what will fit into buffer incl. NULL termination)// if *len is unchanged on return, it means the line is bigger than the buffer and // requires multiple readsbool FastReader::Readline(FILE* filePtr, char* buffer, unsigned int* len){ unsigned int count = 0; unsigned int length = *len; char* ptr = buffer; unsigned int one = 1; while ((count < length) && Read(filePtr, ptr, &one)) { if (('\n' == *ptr) || ('\r' == *ptr)) { *ptr = '\0'; *len = count; return true; } count++; ptr++; } // Either we've filled the buffer or hit end-of-file if (count < length) *len = count; return false;} // end FastReader::Readline()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -