⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tools.cc

📁 一种聚类算法,名字是cocluster
💻 CC
📖 第 1 页 / 共 5 页
字号:
/*  Tools.cc    Implementation of the Tools class    Copyright (c) 2005, 2006              by Hyuk Cho    Copyright (c) 2003, 2004    	      by Hyuk Cho, Yuqiang Guan, and Suvrit Sra                {hyukcho, yguan, suvrit}@cs.utexas.edu*/  #include <fstream>#include <iostream>#include <iomanip>#include <cmath>#include <stdio.h>#include <stdlib.h>#include <ctype.h>#include <string.h>#include <time.h>#include "TimerUtility.h"#include "Constants.h"#include "Tools.h"using namespace std;void printUsage(){  printf("\n");  printf("USAGE\n");  printf("\t%s-'uname' [SWITCHES]\n", EXEC_PROGRAM);}void printDescription(){  printf("DESCRIPTION\n");  printf("\tImplements three co-clustering algorithms (in C++):\n");  printf("\tinformation-theoretic co-clustering and two types of\n");  printf("\tminimum sum-squared residue co-clustering algorithms\n");}void printAuthor(){  printf("AUTHORS\n");  printf("\tHyuk Cho\n");  printf("\tCopyright (c) 2005, 2006\n");  printf("\tHyuk Cho, Yuqiang Guan, and Suvrit Sra\n");  printf("\t{hyukcho, yguan, suvrit}@cs.utexas.edu\n");  printf("\tCopyright (c) 2003, 2004\n");}void printAlgorithmType(){  printf("REQUIRED SWITCHES\n");  printf("\tAt least, the following switches are required: -A -C -I -R\n");  printf("DESCRIPTION OF SWITCHES\n");  printf("\t-A  algorithmType  (REQUIRED)\n");  printf("\t    specifies type of coclustering algorithm\n");  printf("\t      e -- euclidean coclustering algorithm (DEFAULT)\n");  printf("\t      i -- information theoretic coclustering algorithm\n");  printf("\t      r -- minimim squared residue coclustering algorithm\n");}void printColClusterNum(){  printf("\t-C  colClusterNum  (REQUIRED)\n");  printf("\t    specifies number of column clusters (DEFAULT = %d)\n", DEFAULT_numColCluster);}void printDumpLevel(){  printf("\t-D  dumpLevel  [dumpAccessMode  dumpFilename]\n");  printf("\t    specifies level of intermediate information dump\n");   printf("\t    dumpLevel can be one of\n");  printf("\t      0 -- minimal information (DEFAULT)\n");  printf("\t      1 -- objective function value in Batch Update and minimal statistics\n");  printf("\t      2 -- objective function value in local search and maximal statistics\n");  printf("\t      3 -- maximal information in a specified file and minimal on stdout\n");  printf("\t           dumpAccessMode is required after '-D 3' and one of\n");  printf("\t             a -- append mode\n");  printf("\t             o -- output mode\n");  printf("\t           dumpFilename is required after '-D 3 a/o'\n");}void printShowingEachCluster(){  printf("\t-E  outputEachClsuter\n");  printf("\t    specifies detail ouputs in external cluster validattion section\n");  printf("\t    outputEachCluster can be one of\n");  printf("\t      0 -- don't output a confusion matrix and each cluster's statistics (DEFAULT)\n");  printf("\t      1 -- output them in external cluster validation section\n");}void printInputMatrixType(){  printf("\t-I  inputMatrixType  inputFormatType  inputFilename  (REQUIRED)\n");  printf("\t    specifies details of input matrix file\n");  printf("\t    inputMatrixType can be one of\n");  printf("\t      d -- dense (i.e., rectangle/square) matrix\n");  printf("\t           inputFormatType after '-I d' can be one of\n");  printf("\t             s -- dimension and matrix stored separately\n");  printf("\t             t -- both dimension and matrix stored together\n");  printf("\t      s -- sparse matrix in CCS\n");  printf("\t           inputFormatType after '-I s' can be one of\n");  printf("\t             f -- tfn scaling\n");  printf("\t             x -- txx scaling\n");}void printComputingOneWayObject(){  printf("\t-J  oneWay\n");  printf("\t    specifies to get one-way row/column clustering objective function values\n");  printf("\t    oneWay can be one of\n");  printf("\t      0 -- don't compute one-way objective function value(s) (DEFAULT)\n");  printf("\t      1 -- compute one-way objective function value(s)\n");}void printClassLabelSelection(){  printf("\t-K  classLabelSelection  classOffsetType  classLabelFilename\n");  printf("\t    specifies details of class label file\n");  printf("\t    classLabelSelection can be one of\n");  printf("\t      b -- both row and column class labels\n");  printf("\t      c -- column class labels\n");  printf("\t      r -- row class labels\n");  printf("\t    classOffsetType can be one of\n");  printf("\t             0 -- class label index starts from 0\n");  printf("\t             1 -- class label index starts from 1\n");}void printSmoothingType(){  printf("\t-M  smoothingType  [rowSmoothingMagnitude  colSmoothingMagnitude]\n");  printf("\t    specifies details of smoothing techniques\n");  printf("\t    smoothingType can be one of\n");  printf("\t      a -- annealing of uniform smoothing (DEFAULT = %g)\n", DEFAULT_rowAnnealingFactor);  printf("\t      h -- maximum entropy smoothing\n");  printf("\t      n -- no smoothing (DEFAULT)\n");  printf("\t      u -- uniform smoothing (DEFAULT = %g)\n", DEFAULT_rowSmoothingFactor);  printf("\t           smoothingMagnitude is required for '-M a/h/u'\n");}void printRunNum(){  printf("\t-N  runNum\n");  printf("\t    specifies number of runs to get averaged statistics (DEFAULT = %d)\n", DEFAULT_numRun);}void printOutputLabelType(){  printf("\t-O  outputFileType  [outputLabelType  outputOffsetType]  outputAccessMode  outputFilename\n");  printf("\t    specifies details of cocluster/objective/statistics file information\n");  printf("\t    outputFileType can be one of\n");  printf("\t      c -- output co-clusters\n");  printf("\t           outputLabelType is required after '-O c' and can be one of\n");  printf("\t             b -- each co-cluster block consisting of three rows like\n");  printf("\t                    #rows #columns (in 1st row)\n");  printf("\t                    list of row #s in (1,1)-th co-cluster (in 2nd row)\n");  printf("\t                    list of column #s in (1,2)-th column co-cluster (in 3rd row)\n");  printf("\t                    and so on\n");  printf("\t             s -- all co-clusters represented in a simple format of two rows like (DEFAULT)\n");  printf("\t                    list of row cluster labels of all rows (in 1st row)\n");  printf("\t                    list of column cluster labels of all columns (in 2nd row)\n");  printf("\t           outputOffsetType is required after '-O c b' and outputLabelType and can be one of\n");  printf("\t                    0 -- cluster label index starts from 0\n");  printf("\t                    1 -- cluster label index starts from 1\n");  printf("\t      o -- output objective function value(s)\n");  printf("\t      s -- output satistical information\n");  printf("\t    outputAccessMode can be one of\n");  printf("\t                           a -- append mode, useful with '-N randomRunNum'\n");  printf("\t                           o -- output mode\n");}void printRowClusterNum(){  printf("\t-R  rowClusterNum  (REQUIRED)\n");  printf("\t    specifies number of row clusters (DEFAULT = %d)\n", DEFAULT_numRowCluster);}void printSeedingType(){  printf("\t-S  seedingType  seedingLabelSelection  [seedingPertValue\n");   printf("\t                                         | (seedingOffsetType  numSeedingSet seedingFilename)\n");   printf("\t                                         | (numRowPermutation | numColPermutation)]\n");  printf("\t    specifies details of initial cluster assignment\n");  printf("\t    seedingType can be one of\n");  printf("\t      f -- farthest apart assignment\n");  printf("\t      m -- permute an initial random cluster vector\n");  printf("\t           numRowPermutation or/and numColPermutation is/are required after '-S m b/c/r'\n");  printf("\t                    numRowPermutation  numColPermutation -- after '-S m b'\n");   printf("\t                    numColPermutation -- after '-S m c'\n");   printf("\t                    numRowPermutation -- after '-S m r'\n");  printf("\t      p -- perturbates cluster centroids\n");  printf("\t           seedingPertValue is required after '-S p'\n");  printf("\t      r -- random assignment (DEFAULT)\n");  printf("\t      s -- read cluster labels from a seeding file\n");          printf("\t           seedingOffsetType is required after '-S s b/c/r' and one of\n");  printf("\t                    0 -- seeding label index starts from 0\n");  printf("\t                    1 -- seeding label index starts from 1\n");  printf("\t           numSeedingSet is required after '-S s b/c/r 0/1'\n");  printf("\t                           n -- positive integer (>=1)\n");  printf("\t           seedingFilename is required after '-S s b/c/r/ 0/1 n'\n");  printf("\t    seedingLabelSelection can be one of\n");  printf("\t             b -- both row and column cluster labels\n");  printf("\t             c -- column cluster labels\n");  printf("\t             r -- row cluster labels\n");}void printThresholdType(){  printf("\t-T  thresholdType  rowThreshold  colThreshold\n");  printf("\t    specifies details of threshold for either batch update or local search\n");  printf("\t    thresholdType can be one of\n");  printf("\t      b -- batch update (default = +%g)\n", DEFAULT_rowBatchUpdateThreshold);  printf("\t      l -- local search (default = %g)\n", DEFAULT_rowLocalSearchThreshold);  printf("\t    Both rowThreshold and colThreshold are required.\n");}void printUpdateType(){  printf("\t-U  updateType  [updateSelection  |  (rowLocalSearchLength  colLocalSearchLength)]\n");  printf("\t    specifies details of cluster-centroid-updating order in batch update or local search\n");  printf("\t    updateType can be one of\n");  printf("\t      b -- batch update\n");  printf("\t           updateSelection is required after '-U b' and can be one of\n");  printf("\t             0 -- single row and single column, respectively (DEFAULT)\n");  printf("\t             1 -- single row and single column, in batch mode\n");  printf("\t             2 -- single row or single column, flipping a pair coin\n");  printf("\t             3 -- multiple run of either row or column, flipping a pair coin\n");  printf("\t      l -- local search\n");  printf("\t           rowLocalSearchLength (DEFAULT = %d) is required after 'l'\n", DEFAULT_rowLocalSearchLength);  printf("\t           colLocalSearchLength (DEFAULT = %d) is required after 'l' and rowLocalSearchLength\n", DEFAULT_colLocalSearchLength);  printf("\t           To avoid empty row/column cluster(s), use -1 for row/colLocalSearchLength.\n");}void printTakingReverse(){  printf("\t-X  anticorrelation\n");  printf("\t    specifies to capture anti-correlated rows (by taking reverse of rows)\n");  printf("\t    anticorrelation can be one of\n");  printf("\t      0 -- don't capture anti-correlated rows (DEFAULT)\n");  printf("\t      1 -- capture anti-correlated rows\n");}// show command-line parameters...void printHelp(){  printUsage();  printDescription();  printAuthor();    printAlgorithmType();    printColClusterNum();  printDumpLevel();  printShowingEachCluster();  printInputMatrixType();    printComputingOneWayObject();  printClassLabelSelection();  printSmoothingType();  printRunNum();  printOutputLabelType();  printRowClusterNum();            printSeedingType();  printThresholdType();  printUpdateType();  printTakingReverse();  printf("\n");  exit(EXIT_SUCCESS);}// set default parameters...void setCommandLine(commandLineArgument &myCLA){ // default parameters   myCLA.numInvalidCLA			= 0;  myCLA.algorithmType 			= DEFAULT_algorithmType;  myCLA.numColCluster 			= DEFAULT_numColCluster;  myCLA.dumpLevel 			= DEFAULT_dumpLevel;  myCLA.showingEachCluster		= DEFAULT_showingEachCluster;  myCLA.inputMatrixType 		= DEFAULT_inputMatrixType;  myCLA.inputFormatType			= DEFAULT_inputFormatType;  myCLA.computingOneWayObjective 	= DEFAULT_computingOneWayObjective;  myCLA.externalValidityType		= DEFAULT_externalValidityType;  myCLA.classOffsetType			= DEFAULT_classOffsetType;  myCLA.numRowClass 			= DEFAULT_numRowClass;	// used in validation  myCLA.numColClass 			= DEFAULT_numColClass;	// used in validation  myCLA.smoothingType 			= DEFAULT_smoothingType;  myCLA.rowSmoothingFactor 		= DEFAULT_rowSmoothingFactor;  myCLA.colSmoothingFactor 		= DEFAULT_colSmoothingFactor;  myCLA.numRun 				= DEFAULT_numRun;  myCLA.coclusterOffsetType		= DEFAULT_coclusterOffsetType;  myCLA.coclusterLabelType 		= DEFAULT_coclusterLabelType;  myCLA.coclusterAccessMode 		= DEFAULT_coclusterAccessMode;  myCLA.numRowCluster 			= DEFAULT_numRowCluster;  myCLA.colInitializationMethod 	= DEFAULT_colInitializationMethod;  myCLA.rowInitializationMethod		= DEFAULT_rowInitializationMethod;  myCLA.rowSeedingOffsetType		= DEFAULT_rowSeedingOffsetType;  myCLA.colSeedingOffsetType		= DEFAULT_colSeedingOffsetType;  myCLA.numRowSeedingSet		= DEFAULT_numRowSeedingSet;  myCLA.numColSeedingSet		= DEFAULT_numColSeedingSet;  myCLA.rowSeedingAccessMode		= DEFAULT_rowSeedingAccessMode;  myCLA.colSeedingAccessMode		= DEFAULT_colSeedingAccessMode;  myCLA.perturbationMagnitude		= DEFAULT_perturbationMagnitude;  myCLA.numRowPermutation		= DEFAULT_numRowPermutation;  myCLA.numColPermutation		= DEFAULT_numColPermutation;  myCLA.rowBatchUpdateThreshold 	= DEFAULT_rowBatchUpdateThreshold;  myCLA.colBatchUpdateThreshold 	= DEFAULT_colBatchUpdateThreshold;  myCLA.rowLocalSearchThreshold 	= DEFAULT_rowLocalSearchThreshold;  myCLA.colLocalSearchThreshold 	= DEFAULT_colLocalSearchThreshold;  myCLA.batchUpdateType 		= DEFAULT_batchUpdateType;  myCLA.localSearchType			= DEFAULT_localSearchType;	// not used...  myCLA.rowLocalSearchLength 		= DEFAULT_rowLocalSearchLength;  myCLA.colLocalSearchLength 		= DEFAULT_colLocalSearchLength;      myCLA.takingReverse			= DEFAULT_takingReverse;  myCLA.havingArgument			= DEFAULT_havingArgument;  myCLA.emptyRowId			= NULL;	// not used...  myCLA.emptyColId 			= NULL;  myCLA.numEmptyRow			= 0;  myCLA.numEmptyCol 			= 0;  myCLA.rowClassLabel 			= NULL;		// used in validation  myCLA.colClassLabel 			= NULL;		// used in validation  // input and output files  strncpy(myCLA.dumpFilename, EMPTY_STRING, FILENAME_LENGTH );  strncpy(myCLA.inputFilename, EMPTY_STRING, FILENAME_LENGTH);  strncpy(myCLA.bothClassFilename, EMPTY_STRING, FILENAME_LENGTH);  strncpy(myCLA.rowClassFilename, EMPTY_STRING, FILENAME_LENGTH);  strncpy(myCLA.colClassFilename, EMPTY_STRING, FILENAME_LENGTH);  strncpy(myCLA.objectiveFilename, EMPTY_STRING, FILENAME_LENGTH);  strncpy(myCLA.coclusterFilename, EMPTY_STRING, FILENAME_LENGTH);  strncpy(myCLA.bothSeedingFilename, EMPTY_STRING, FILENAME_LENGTH);		// not used  strncpy(myCLA.rowSeedingFilename, EMPTY_STRING, FILENAME_LENGTH);  strncpy(myCLA.colSeedingFilename, EMPTY_STRING, FILENAME_LENGTH);  strcpy(myCLA.scalingType, TXX_SCALING);		// used only for the matrix in CCS}// get type of co-clustering algorithm from command-line...char **getAlgorithmType(int argc, char **argv, commandLineArgument &myCLA){  bool validCLA = true;  switch (toupper((*(++argv))[0])){    case MSSRCC_I_ALGORITHM:      myCLA.algorithmType = MINIMUM_SUM_SQUARE_RESIDUE_I_CC;      break;    case ITCC_ALGORITHM:      myCLA.algorithmType = INFORMATION_THEORETIC_CC;      break;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -