📄 tools.cc
字号:
/* Tools.cc Implementation of the Tools class Copyright (c) 2005, 2006 by Hyuk Cho Copyright (c) 2003, 2004 by Hyuk Cho, Yuqiang Guan, and Suvrit Sra {hyukcho, yguan, suvrit}@cs.utexas.edu*/ #include <fstream>#include <iostream>#include <iomanip>#include <cmath>#include <stdio.h>#include <stdlib.h>#include <ctype.h>#include <string.h>#include <time.h>#include "TimerUtility.h"#include "Constants.h"#include "Tools.h"using namespace std;void printUsage(){ printf("\n"); printf("USAGE\n"); printf("\t%s-'uname' [SWITCHES]\n", EXEC_PROGRAM);}void printDescription(){ printf("DESCRIPTION\n"); printf("\tImplements three co-clustering algorithms (in C++):\n"); printf("\tinformation-theoretic co-clustering and two types of\n"); printf("\tminimum sum-squared residue co-clustering algorithms\n");}void printAuthor(){ printf("AUTHORS\n"); printf("\tHyuk Cho\n"); printf("\tCopyright (c) 2005, 2006\n"); printf("\tHyuk Cho, Yuqiang Guan, and Suvrit Sra\n"); printf("\t{hyukcho, yguan, suvrit}@cs.utexas.edu\n"); printf("\tCopyright (c) 2003, 2004\n");}void printAlgorithmType(){ printf("REQUIRED SWITCHES\n"); printf("\tAt least, the following switches are required: -A -C -I -R\n"); printf("DESCRIPTION OF SWITCHES\n"); printf("\t-A algorithmType (REQUIRED)\n"); printf("\t specifies type of coclustering algorithm\n"); printf("\t e -- euclidean coclustering algorithm (DEFAULT)\n"); printf("\t i -- information theoretic coclustering algorithm\n"); printf("\t r -- minimim squared residue coclustering algorithm\n");}void printColClusterNum(){ printf("\t-C colClusterNum (REQUIRED)\n"); printf("\t specifies number of column clusters (DEFAULT = %d)\n", DEFAULT_numColCluster);}void printDumpLevel(){ printf("\t-D dumpLevel [dumpAccessMode dumpFilename]\n"); printf("\t specifies level of intermediate information dump\n"); printf("\t dumpLevel can be one of\n"); printf("\t 0 -- minimal information (DEFAULT)\n"); printf("\t 1 -- objective function value in Batch Update and minimal statistics\n"); printf("\t 2 -- objective function value in local search and maximal statistics\n"); printf("\t 3 -- maximal information in a specified file and minimal on stdout\n"); printf("\t dumpAccessMode is required after '-D 3' and one of\n"); printf("\t a -- append mode\n"); printf("\t o -- output mode\n"); printf("\t dumpFilename is required after '-D 3 a/o'\n");}void printShowingEachCluster(){ printf("\t-E outputEachClsuter\n"); printf("\t specifies detail ouputs in external cluster validattion section\n"); printf("\t outputEachCluster can be one of\n"); printf("\t 0 -- don't output a confusion matrix and each cluster's statistics (DEFAULT)\n"); printf("\t 1 -- output them in external cluster validation section\n");}void printInputMatrixType(){ printf("\t-I inputMatrixType inputFormatType inputFilename (REQUIRED)\n"); printf("\t specifies details of input matrix file\n"); printf("\t inputMatrixType can be one of\n"); printf("\t d -- dense (i.e., rectangle/square) matrix\n"); printf("\t inputFormatType after '-I d' can be one of\n"); printf("\t s -- dimension and matrix stored separately\n"); printf("\t t -- both dimension and matrix stored together\n"); printf("\t s -- sparse matrix in CCS\n"); printf("\t inputFormatType after '-I s' can be one of\n"); printf("\t f -- tfn scaling\n"); printf("\t x -- txx scaling\n");}void printComputingOneWayObject(){ printf("\t-J oneWay\n"); printf("\t specifies to get one-way row/column clustering objective function values\n"); printf("\t oneWay can be one of\n"); printf("\t 0 -- don't compute one-way objective function value(s) (DEFAULT)\n"); printf("\t 1 -- compute one-way objective function value(s)\n");}void printClassLabelSelection(){ printf("\t-K classLabelSelection classOffsetType classLabelFilename\n"); printf("\t specifies details of class label file\n"); printf("\t classLabelSelection can be one of\n"); printf("\t b -- both row and column class labels\n"); printf("\t c -- column class labels\n"); printf("\t r -- row class labels\n"); printf("\t classOffsetType can be one of\n"); printf("\t 0 -- class label index starts from 0\n"); printf("\t 1 -- class label index starts from 1\n");}void printSmoothingType(){ printf("\t-M smoothingType [rowSmoothingMagnitude colSmoothingMagnitude]\n"); printf("\t specifies details of smoothing techniques\n"); printf("\t smoothingType can be one of\n"); printf("\t a -- annealing of uniform smoothing (DEFAULT = %g)\n", DEFAULT_rowAnnealingFactor); printf("\t h -- maximum entropy smoothing\n"); printf("\t n -- no smoothing (DEFAULT)\n"); printf("\t u -- uniform smoothing (DEFAULT = %g)\n", DEFAULT_rowSmoothingFactor); printf("\t smoothingMagnitude is required for '-M a/h/u'\n");}void printRunNum(){ printf("\t-N runNum\n"); printf("\t specifies number of runs to get averaged statistics (DEFAULT = %d)\n", DEFAULT_numRun);}void printOutputLabelType(){ printf("\t-O outputFileType [outputLabelType outputOffsetType] outputAccessMode outputFilename\n"); printf("\t specifies details of cocluster/objective/statistics file information\n"); printf("\t outputFileType can be one of\n"); printf("\t c -- output co-clusters\n"); printf("\t outputLabelType is required after '-O c' and can be one of\n"); printf("\t b -- each co-cluster block consisting of three rows like\n"); printf("\t #rows #columns (in 1st row)\n"); printf("\t list of row #s in (1,1)-th co-cluster (in 2nd row)\n"); printf("\t list of column #s in (1,2)-th column co-cluster (in 3rd row)\n"); printf("\t and so on\n"); printf("\t s -- all co-clusters represented in a simple format of two rows like (DEFAULT)\n"); printf("\t list of row cluster labels of all rows (in 1st row)\n"); printf("\t list of column cluster labels of all columns (in 2nd row)\n"); printf("\t outputOffsetType is required after '-O c b' and outputLabelType and can be one of\n"); printf("\t 0 -- cluster label index starts from 0\n"); printf("\t 1 -- cluster label index starts from 1\n"); printf("\t o -- output objective function value(s)\n"); printf("\t s -- output satistical information\n"); printf("\t outputAccessMode can be one of\n"); printf("\t a -- append mode, useful with '-N randomRunNum'\n"); printf("\t o -- output mode\n");}void printRowClusterNum(){ printf("\t-R rowClusterNum (REQUIRED)\n"); printf("\t specifies number of row clusters (DEFAULT = %d)\n", DEFAULT_numRowCluster);}void printSeedingType(){ printf("\t-S seedingType seedingLabelSelection [seedingPertValue\n"); printf("\t | (seedingOffsetType numSeedingSet seedingFilename)\n"); printf("\t | (numRowPermutation | numColPermutation)]\n"); printf("\t specifies details of initial cluster assignment\n"); printf("\t seedingType can be one of\n"); printf("\t f -- farthest apart assignment\n"); printf("\t m -- permute an initial random cluster vector\n"); printf("\t numRowPermutation or/and numColPermutation is/are required after '-S m b/c/r'\n"); printf("\t numRowPermutation numColPermutation -- after '-S m b'\n"); printf("\t numColPermutation -- after '-S m c'\n"); printf("\t numRowPermutation -- after '-S m r'\n"); printf("\t p -- perturbates cluster centroids\n"); printf("\t seedingPertValue is required after '-S p'\n"); printf("\t r -- random assignment (DEFAULT)\n"); printf("\t s -- read cluster labels from a seeding file\n"); printf("\t seedingOffsetType is required after '-S s b/c/r' and one of\n"); printf("\t 0 -- seeding label index starts from 0\n"); printf("\t 1 -- seeding label index starts from 1\n"); printf("\t numSeedingSet is required after '-S s b/c/r 0/1'\n"); printf("\t n -- positive integer (>=1)\n"); printf("\t seedingFilename is required after '-S s b/c/r/ 0/1 n'\n"); printf("\t seedingLabelSelection can be one of\n"); printf("\t b -- both row and column cluster labels\n"); printf("\t c -- column cluster labels\n"); printf("\t r -- row cluster labels\n");}void printThresholdType(){ printf("\t-T thresholdType rowThreshold colThreshold\n"); printf("\t specifies details of threshold for either batch update or local search\n"); printf("\t thresholdType can be one of\n"); printf("\t b -- batch update (default = +%g)\n", DEFAULT_rowBatchUpdateThreshold); printf("\t l -- local search (default = %g)\n", DEFAULT_rowLocalSearchThreshold); printf("\t Both rowThreshold and colThreshold are required.\n");}void printUpdateType(){ printf("\t-U updateType [updateSelection | (rowLocalSearchLength colLocalSearchLength)]\n"); printf("\t specifies details of cluster-centroid-updating order in batch update or local search\n"); printf("\t updateType can be one of\n"); printf("\t b -- batch update\n"); printf("\t updateSelection is required after '-U b' and can be one of\n"); printf("\t 0 -- single row and single column, respectively (DEFAULT)\n"); printf("\t 1 -- single row and single column, in batch mode\n"); printf("\t 2 -- single row or single column, flipping a pair coin\n"); printf("\t 3 -- multiple run of either row or column, flipping a pair coin\n"); printf("\t l -- local search\n"); printf("\t rowLocalSearchLength (DEFAULT = %d) is required after 'l'\n", DEFAULT_rowLocalSearchLength); printf("\t colLocalSearchLength (DEFAULT = %d) is required after 'l' and rowLocalSearchLength\n", DEFAULT_colLocalSearchLength); printf("\t To avoid empty row/column cluster(s), use -1 for row/colLocalSearchLength.\n");}void printTakingReverse(){ printf("\t-X anticorrelation\n"); printf("\t specifies to capture anti-correlated rows (by taking reverse of rows)\n"); printf("\t anticorrelation can be one of\n"); printf("\t 0 -- don't capture anti-correlated rows (DEFAULT)\n"); printf("\t 1 -- capture anti-correlated rows\n");}// show command-line parameters...void printHelp(){ printUsage(); printDescription(); printAuthor(); printAlgorithmType(); printColClusterNum(); printDumpLevel(); printShowingEachCluster(); printInputMatrixType(); printComputingOneWayObject(); printClassLabelSelection(); printSmoothingType(); printRunNum(); printOutputLabelType(); printRowClusterNum(); printSeedingType(); printThresholdType(); printUpdateType(); printTakingReverse(); printf("\n"); exit(EXIT_SUCCESS);}// set default parameters...void setCommandLine(commandLineArgument &myCLA){ // default parameters myCLA.numInvalidCLA = 0; myCLA.algorithmType = DEFAULT_algorithmType; myCLA.numColCluster = DEFAULT_numColCluster; myCLA.dumpLevel = DEFAULT_dumpLevel; myCLA.showingEachCluster = DEFAULT_showingEachCluster; myCLA.inputMatrixType = DEFAULT_inputMatrixType; myCLA.inputFormatType = DEFAULT_inputFormatType; myCLA.computingOneWayObjective = DEFAULT_computingOneWayObjective; myCLA.externalValidityType = DEFAULT_externalValidityType; myCLA.classOffsetType = DEFAULT_classOffsetType; myCLA.numRowClass = DEFAULT_numRowClass; // used in validation myCLA.numColClass = DEFAULT_numColClass; // used in validation myCLA.smoothingType = DEFAULT_smoothingType; myCLA.rowSmoothingFactor = DEFAULT_rowSmoothingFactor; myCLA.colSmoothingFactor = DEFAULT_colSmoothingFactor; myCLA.numRun = DEFAULT_numRun; myCLA.coclusterOffsetType = DEFAULT_coclusterOffsetType; myCLA.coclusterLabelType = DEFAULT_coclusterLabelType; myCLA.coclusterAccessMode = DEFAULT_coclusterAccessMode; myCLA.numRowCluster = DEFAULT_numRowCluster; myCLA.colInitializationMethod = DEFAULT_colInitializationMethod; myCLA.rowInitializationMethod = DEFAULT_rowInitializationMethod; myCLA.rowSeedingOffsetType = DEFAULT_rowSeedingOffsetType; myCLA.colSeedingOffsetType = DEFAULT_colSeedingOffsetType; myCLA.numRowSeedingSet = DEFAULT_numRowSeedingSet; myCLA.numColSeedingSet = DEFAULT_numColSeedingSet; myCLA.rowSeedingAccessMode = DEFAULT_rowSeedingAccessMode; myCLA.colSeedingAccessMode = DEFAULT_colSeedingAccessMode; myCLA.perturbationMagnitude = DEFAULT_perturbationMagnitude; myCLA.numRowPermutation = DEFAULT_numRowPermutation; myCLA.numColPermutation = DEFAULT_numColPermutation; myCLA.rowBatchUpdateThreshold = DEFAULT_rowBatchUpdateThreshold; myCLA.colBatchUpdateThreshold = DEFAULT_colBatchUpdateThreshold; myCLA.rowLocalSearchThreshold = DEFAULT_rowLocalSearchThreshold; myCLA.colLocalSearchThreshold = DEFAULT_colLocalSearchThreshold; myCLA.batchUpdateType = DEFAULT_batchUpdateType; myCLA.localSearchType = DEFAULT_localSearchType; // not used... myCLA.rowLocalSearchLength = DEFAULT_rowLocalSearchLength; myCLA.colLocalSearchLength = DEFAULT_colLocalSearchLength; myCLA.takingReverse = DEFAULT_takingReverse; myCLA.havingArgument = DEFAULT_havingArgument; myCLA.emptyRowId = NULL; // not used... myCLA.emptyColId = NULL; myCLA.numEmptyRow = 0; myCLA.numEmptyCol = 0; myCLA.rowClassLabel = NULL; // used in validation myCLA.colClassLabel = NULL; // used in validation // input and output files strncpy(myCLA.dumpFilename, EMPTY_STRING, FILENAME_LENGTH ); strncpy(myCLA.inputFilename, EMPTY_STRING, FILENAME_LENGTH); strncpy(myCLA.bothClassFilename, EMPTY_STRING, FILENAME_LENGTH); strncpy(myCLA.rowClassFilename, EMPTY_STRING, FILENAME_LENGTH); strncpy(myCLA.colClassFilename, EMPTY_STRING, FILENAME_LENGTH); strncpy(myCLA.objectiveFilename, EMPTY_STRING, FILENAME_LENGTH); strncpy(myCLA.coclusterFilename, EMPTY_STRING, FILENAME_LENGTH); strncpy(myCLA.bothSeedingFilename, EMPTY_STRING, FILENAME_LENGTH); // not used strncpy(myCLA.rowSeedingFilename, EMPTY_STRING, FILENAME_LENGTH); strncpy(myCLA.colSeedingFilename, EMPTY_STRING, FILENAME_LENGTH); strcpy(myCLA.scalingType, TXX_SCALING); // used only for the matrix in CCS}// get type of co-clustering algorithm from command-line...char **getAlgorithmType(int argc, char **argv, commandLineArgument &myCLA){ bool validCLA = true; switch (toupper((*(++argv))[0])){ case MSSRCC_I_ALGORITHM: myCLA.algorithmType = MINIMUM_SUM_SQUARE_RESIDUE_I_CC; break; case ITCC_ALGORITHM: myCLA.algorithmType = INFORMATION_THEORETIC_CC; break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -