📄 metric.cc
字号:
const char *help = "\progname: metric.cc\n\code2html: This program computes a distance (using a metric) between 2 patterns.\n\version: Torch3 vision2.1, 2004-2006\n\(c) Sebastien Marcel (marcel@idiap.ch)\n";// system#include <sys/stat.h>// core#include "string_utils.h"// datasets#include "FileBinDataSet.h"// machines#include "MyMLP.h"// normalisation#include "MyMeanVarNorm.h"// metrics#include "Pearson.h"#include "Canberra.h"#include "NormalizeCorrelation.h"#include "StandardCorrelation.h"#include "StandardCovariance.h"#include "ChiSquare.h"#include "TangentDistance.h"#include "Mahanalobis.h"#include "DotProduct.h"// eigen#include "PCAMachine.h"// misc#include "CmdLine.h"#include "FileListCmdOption.h"using namespace Torch;// check data filesbool checkFile(char *file_name);int checkFiles(int n_files, char **file_names);// metric based on a MLPreal mMlpMetric(int n_inputs, real *x, real *y, MyMLP *mlp, MyMeanVarNorm *mv_norm, Sequence *seq, bool diff, bool delta);//bool verbose;int main(int argc, char **argv){ // char *model_filename; char *output_filename; // bool mahalanobis; bool canberra; bool pearson; bool nc; bool stdcor; bool stdcov; bool chisquare; bool td; bool mlpmetric; bool dotproduct; char *metric_filename; // bool usemean; bool diff; bool delta; // bool model_is_an_index; int dim; int width; int height; // Allocator *allocator = new Allocator; DiskXFile::setLittleEndianMode(); //=================== The command-line ========================== FileListCmdOption filelist("file name", "the list files or one data file"); filelist.isArgument(true); // Construct the command line CmdLine cmd; cmd.setBOption("write log", false); // Put the help line at the beginning cmd.info(help); // Train mode cmd.addText("\nArguments:"); cmd.addSCmdArg("model", &model_filename, "the model file to compare with"); cmd.addCmdOption(&filelist); cmd.addSCmdArg("output", &output_filename, "the output file of distance"); cmd.addText("\nOptions:"); cmd.addBCmdOption("-mi", &model_is_an_index, false, "the model is an index on the data but not the data itself", true); cmd.addBCmdOption("-verbose", &verbose, false, "verbose", true); cmd.addBCmdOption("-usemean", &usemean, false, "use the mean model", true); cmd.addText("\nFeatures:"); cmd.addBCmdOption("-diff", &diff, false, "diff input features", true); cmd.addBCmdOption("-delta", &delta, false, "delta input features", true); cmd.addICmdOption("-dim", &dim, -1, "number of dimensions to use", true); cmd.addText("\nMetrics:"); cmd.addBCmdOption("-mahalanobis", &mahalanobis, false, "Mahalanobis metric", true); cmd.addBCmdOption("-canberra", &canberra, false, "Canberra metric", true); cmd.addBCmdOption("-pearson", &pearson, false, "one minus Pearson correlation", true); cmd.addBCmdOption("-nc", &nc, false, "Normalized correlation", true); cmd.addBCmdOption("-stdcor", &stdcor, false, "Standard Correlation", true); cmd.addBCmdOption("-stdcov", &stdcov, false, "Standard Covariance", true); cmd.addBCmdOption("-chisquare", &chisquare, false, "Chi Square", true); cmd.addBCmdOption("-dotproduct", &dotproduct, false, "Dot product", true); cmd.addText("\nImage Metric:"); cmd.addBCmdOption("-td", &td, false, "tangent distance", true); cmd.addICmdOption("-width", &width, -1, "width of the image for tangent distance", true); cmd.addICmdOption("-height", &height, -1, "height of the image for tangent distance", true); cmd.addText("\nTrained Metric:"); cmd.addBCmdOption("-mlpmetric", &mlpmetric, false, "mlpmetric distance"); cmd.addSCmdOption("-metricmodel", &metric_filename, "", "metric filename"); // Read the command line cmd.read(argc, argv); // int n_reminding_files = filelist.n_files; n_reminding_files = checkFiles(filelist.n_files, filelist.file_names); if(verbose) { if(mahalanobis) print("Using Mahalanobis-cosine metric with PCA model %s\n", metric_filename); else if(td) print("Using Tangent distance on %dx%d images\n", width, height); else if(canberra) print("Using Canberra metric\n"); else if(pearson) print("Using one minus Pearson correlation\n"); else if(nc) print("Using Normalized correlation\n"); else if(stdcor) print("Using Standard Correlation\n"); else if(stdcov) print("Using Standard Covariance\n"); else if(chisquare) print("Using Chi Square\n"); else if(dotproduct) print("Using the dot product\n"); else if(mlpmetric) print("Using MLP metric with model %s\n", metric_filename); else print("No metric chosen, setting to Euclidean by default\n"); print(" + n_filenames = %d\n", n_reminding_files); for(int i = 0 ; i < n_reminding_files ; i++) print(" filename[%d] = %s\n", i, filelist.file_names[i]); } // // load the model int model_n_files = 0; char **model_file_names = NULL; int n_inputs_model = 0; int n_patterns_model = 0; real **ref_model = NULL; real *mean_model = NULL; real *inputs = NULL; DiskXFile *model = new(allocator) DiskXFile(model_filename, "r"); if(model_is_an_index) { char *str_ = (char *)allocator->alloc(1024); model->read(str_, 1, 1024); str_[1023] = '\0'; model->rewind(); char *endp_; strtol(str_, &endp_, 10); if( (*endp_ != '\0') && (*endp_ != '\n') ) { do { model->gets(str_, 1024); model_n_files++; } while (!model->eof()); model_n_files--; model->rewind(); } else model->scanf("%d", &model_n_files); if(verbose) print("model %s is an index: %d files indexed\n", model_filename, model_n_files); model_file_names = (char **)allocator->alloc(sizeof(char *)*model_n_files); bool first_time_ = true; for(int i = 0; i < model_n_files; i++) { model->scanf("%s", str_); model_file_names[i] = (char *)allocator->alloc(strlen(str_)+1); strcpy(model_file_names[i], str_); if(verbose) print(" > %s\n", model_file_names[i]); if(checkFile(model_file_names[i])) { DiskXFile *model_ = new(allocator) DiskXFile(model_file_names[i], "r"); int n_inputs_model_ = 0; int n_patterns_model_ = 0; model_->read(&n_patterns_model_, sizeof(int), 1); model_->read(&n_inputs_model_, sizeof(int), 1); if(first_time_) { n_inputs_model = n_inputs_model_; first_time_ = false; } else if(n_inputs_model != n_inputs_model_) error("Incorrect number of inputs %d != %d", n_inputs_model, n_inputs_model_); n_patterns_model += n_patterns_model_; allocator->free(model_); } } if(n_patterns_model == 0) { error("The model file contains only non-existing files of features."); delete allocator; } allocator->free(str_); // // memory allocations ref_model = (real **)allocator->alloc(sizeof(real*)*n_patterns_model); mean_model = (real *)allocator->alloc(sizeof(real)*n_inputs_model); inputs = (real *)allocator->alloc(sizeof(real)*n_inputs_model); for(int j=0; j< n_inputs_model; j++) { mean_model[j] = 0.0; inputs[j] = 0.0; } int p = 0; for(int i = 0; i < model_n_files; i++) { if(checkFile(model_file_names[i])) { // DiskXFile *model_ = new(allocator) DiskXFile(model_file_names[i], "r"); int n_inputs_model_ = 0; int n_patterns_model_ = 0; model_->read(&n_patterns_model_, sizeof(int), 1); model_->read(&n_inputs_model_, sizeof(int), 1); // for(int k = 0 ; k < n_patterns_model_ ; k++) { ref_model[p] = (real *)allocator->alloc(sizeof(real)*n_inputs_model); model_->read(ref_model[p], sizeof(real), n_inputs_model); for(int j=0; j< n_inputs_model; j++) mean_model[j] += ref_model[p][j]; p++; } // allocator->free(model_); } } for(int j=0; j< n_inputs_model; j++) mean_model[j] /= (real) n_patterns_model; } else { model->read(&n_patterns_model, sizeof(int), 1); model->read(&n_inputs_model, sizeof(int), 1); // // memory allocations ref_model = (real **)allocator->alloc(sizeof(real*)*n_patterns_model); mean_model = (real *)allocator->alloc(sizeof(real)*n_inputs_model); inputs = (real *)allocator->alloc(sizeof(real)*n_inputs_model); for(int j=0; j< n_inputs_model; j++) { mean_model[j] = 0.0; inputs[j] = 0.0; } for(int p = 0 ; p < n_patterns_model ; p++) { ref_model[p] = (real *)allocator->alloc(sizeof(real)*n_inputs_model); model->read(ref_model[p], sizeof(real), n_inputs_model); for(int j=0; j< n_inputs_model; j++) mean_model[j] += ref_model[p][j]; } for(int j=0; j< n_inputs_model; j++) mean_model[j] /= (real) n_patterns_model; } if(verbose) { print(" Number of inputs = %d\n", n_inputs_model); print(" Number of reference patterns = %d\n", n_patterns_model); } // // DiskXFile *output_xfile = NULL; output_xfile = new(allocator) DiskXFile(output_filename, "w"); // // create the metric Metric *metric = NULL; // for the MLP metric MyMLP *mlp = NULL; MyMeanVarNorm *mv_norm = NULL; Sequence *seq; // for the Mahalanobis metric PCAMachine *pca_machine = NULL; // restricting the number of dimensions to use int dim_ = dim; if((dim_ == -1) || (dim_ > n_inputs_model)) dim_ = n_inputs_model; if(canberra) metric = new(allocator) mCanberra(dim_); else if(pearson) metric = new(allocator) mPearson(dim_); else if(nc) metric = new(allocator) mNC(dim_); else if(stdcor) metric = new(allocator) mStdCorrelation(dim_); else if(stdcov) metric = new(allocator) mStdCovariance(dim_); else if(chisquare) metric = new(allocator) mChiSquare(dim_); else if(dotproduct) metric = new(allocator) mDotProduct(dim_); else if(td) { if(width != -1 && height != -1 && width * height == n_inputs_model) metric = new(allocator) mTangentDistance(width, height); else error("width(%d) or height (%d) incorrect for Tangent Distance", width, height); } else if(mahalanobis) { if(strcmp(metric_filename, "")) { pca_machine = new(allocator) PCAMachine(n_inputs_model); DiskXFile *file = NULL; file = new(allocator) DiskXFile(metric_filename, "r"); pca_machine->loadXFile(file); allocator->free(file); pca_machine->setIOption("verbose_level", 1); pca_machine->setROption("variance", -1.0); pca_machine->init(); if(dim > 0) pca_machine->n_outputs = dim; metric = new(allocator) mMahanalobisCosine(n_inputs_model, pca_machine); } else error("No PCA model available for Mahalanobis"); } else if(mlpmetric) { if(strcmp(metric_filename, "")) { int n_inputs_; if(verbose) print("Loading MLP metric\n"); if(diff) { if(verbose) print("Using diff features.\n"); n_inputs_ = n_inputs_model; //if(n_inputs_ != n_inputs_model) error("Number of inputs incorrect."); } else { if(verbose) if(delta) print("Using delta features.\n"); n_inputs_ = 2*n_inputs_model; //if(n_inputs_ != 2*n_inputs_model) error("Number of inputs incorrect."); } //if(n_outputs != 2) error("Number of outputs incorrect."); // mv_norm = new(allocator) MyMeanVarNorm(n_inputs_, 1); mlp = new(allocator) MyMLP; mlp->load(metric_filename, mv_norm); if(verbose) mlp->info(); seq = new(allocator) Sequence(1, n_inputs_); } else error("No model available"); } else metric = new(allocator) mEuclidean(dim_); for(int i = 0 ; i < n_reminding_files ; i++) { if(verbose) print(" + filename[%d] = %s\n", i, filelist.file_names[i]); int n_inputs; int n_patterns; // Test the file DiskXFile *file = new(allocator) DiskXFile(filelist.file_names[i], "r"); file->read(&n_patterns, sizeof(int), 1); file->read(&n_inputs, sizeof(int), 1); if(verbose) { print("Reading bindata file (%s)\n", filelist.file_names[i]); print(" n_inputs = %d\n", n_inputs); print(" n_patterns = %d\n", n_patterns); } if(n_inputs != n_inputs_model) error("Incorrect number of inputs (%d <> %d) !", n_inputs, n_inputs_model); real min_ = INF; real max_ = -INF; real sum_ = 0.0; real avg; for(int j=0; j< n_patterns; j++) { file->read(inputs, sizeof(real), n_inputs); real d = 0.0; if(usemean) { if(mlpmetric) d = -mMlpMetric(n_inputs, inputs, mean_model, mlp, mv_norm, seq, diff, delta); else d = metric->measure(inputs, mean_model); } else { for(int p = 0 ; p < n_patterns_model ; p++) { if(mlpmetric) d += -mMlpMetric(n_inputs, inputs, ref_model[p], mlp, mv_norm, seq, diff, delta); else d += metric->measure(inputs, ref_model[p]); } d /= (real) n_patterns_model; } if(verbose) print(" > %g\n", d); sum_ += d; // if(d < min_) min_ = d; if(d > max_) max_ = d; } avg = sum_/(real)n_patterns; // if(verbose) { print("Outputs:\n"); print(" min = %g\n", min_); print(" max = %g\n", max_); print(" sum = %g\n", sum_); print(" avg = %g\n", avg); } char *temp = strBaseName(filelist.file_names[i]); char *basename = strRemoveSuffix(temp); allocator->retain(basename); output_xfile->printf("%s %g\n", basename, -avg); // allocator->free(file); } // delete allocator; return(0);}bool checkFile(char *file_name){ struct stat st; if(stat(file_name, &st) == -1) { warning("Couldn't stat file %s.", file_name); return false; } else { if(!S_ISREG (st.st_mode)) { warning("not regular file %s.", file_name); return false; } else return true; }}int checkFiles(int n_files, char **file_names){ int reminding_files = n_files; //struct stat st; if(verbose) print("Checking files:\n"); int i = 0; while(i < reminding_files) { if(verbose) print("Checking %s\n", file_names[i]); if(checkFile(file_names[i]) == false) { for(int j = i ; j < reminding_files-1 ; j++) file_names[j] = file_names[j+1]; file_names[reminding_files-1] = NULL; reminding_files--; } else i++;/* if(stat(file_names[i], &st) == -1) { warning("Couldn't stat file %s.", file_names[i]); for(int j = i ; j < reminding_files-1 ; j++) file_names[j] = file_names[j+1]; file_names[reminding_files-1] = NULL; reminding_files--; } else { if(!S_ISREG (st.st_mode)) { warning("not regular file %s.", file_names[i]); for(int j = i ; j < reminding_files-1 ; j++) file_names[j] = file_names[j+1]; file_names[reminding_files-1] = NULL; reminding_files--; } else i++; }*/ } if(verbose) { print("Checked files (%d):\n", reminding_files); for(int i = 0 ; i < reminding_files ; i++) print("-> %s\n", file_names[i]); } return reminding_files;}real mMlpMetric(int n_inputs, real *x, real *y, MyMLP *mlp, MyMeanVarNorm *mv_norm, Sequence *seq, bool diff, bool delta){ if(diff) for(int i = 0 ; i < n_inputs ; i++) seq->frames[0][i] = x[i] - y[i]; else if(delta) { int j = 0; for(int i = 0 ; i < n_inputs ; i++, j++) seq->frames[0][j] = x[i]; for(int i = 0 ; i < n_inputs ; i++, j++) seq->frames[0][j] = x[i] - y[i]; } else { int j = 0; for(int i = 0 ; i < n_inputs ; i++, j++) seq->frames[0][j] = y[i]; for(int i = 0 ; i < n_inputs ; i++, j++) seq->frames[0][j] = x[i]; } mv_norm->preProcessInputs(seq); mlp->forward(seq); return mlp->outputs->frames[0][0];}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -