bvdecomposesegcvsub.java
来自「Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等」· Java 代码 · 共 1,109 行 · 第 1/3 页
JAVA
1,109 行
} } else { setClassIndex(0); } String classifyIterations = Utils.getOption('l', options); if (classifyIterations.length() != 0) { setClassifyIterations(Integer.parseInt(classifyIterations)); } else { setClassifyIterations(10); } String prob = Utils.getOption('p', options); if (prob.length() != 0) { setP( Double.parseDouble(prob)); } else { setP(-1); } //throw new Exception("A proportion must be specified" + " with a -p option."); String seedString = Utils.getOption('s', options); if (seedString.length() != 0) { setSeed(Integer.parseInt(seedString)); } else { setSeed(1); } String dataFile = Utils.getOption('t', options); if (dataFile.length() != 0) { setDataFileName(dataFile); } else { throw new Exception("An arff file must be specified" + " with the -t option."); } String trainSize = Utils.getOption('T', options); if (trainSize.length() != 0) { setTrainSize(Integer.parseInt(trainSize)); } else { setTrainSize(-1); } //throw new Exception("A training set size must be specified" + " with a -T option."); String classifierName = Utils.getOption('W', options); if (classifierName.length() != 0) { setClassifier(Classifier.forName(classifierName, Utils.partitionOptions(options))); } else { throw new Exception("A learner must be specified with the -W option."); } } /** * Gets the current settings of the CheckClassifier. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] classifierOptions = new String [0]; if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler)) { classifierOptions = ((OptionHandler)m_Classifier).getOptions(); } String [] options = new String [classifierOptions.length + 14]; int current = 0; if (getDebug()) { options[current++] = "-D"; } options[current++] = "-c"; options[current++] = "" + getClassIndex(); options[current++] = "-l"; options[current++] = "" + getClassifyIterations(); options[current++] = "-p"; options[current++] = "" + getP(); options[current++] = "-s"; options[current++] = "" + getSeed(); if (getDataFileName() != null) { options[current++] = "-t"; options[current++] = "" + getDataFileName(); } options[current++] = "-T"; options[current++] = "" + getTrainSize(); if (getClassifier() != null) { options[current++] = "-W"; options[current++] = getClassifier().getClass().getName(); } options[current++] = "--"; System.arraycopy(classifierOptions, 0, options, current, classifierOptions.length); current += classifierOptions.length; while (current < options.length) { options[current++] = ""; } return options; } /** * Set the classifiers being analysed * * @param newClassifier the Classifier to use. */ public void setClassifier(Classifier newClassifier) { m_Classifier = newClassifier; } /** * Gets the name of the classifier being analysed * * @return the classifier being analysed. */ public Classifier getClassifier() { return m_Classifier; } /** * Sets debugging mode * * @param debug true if debug output should be printed */ public void setDebug(boolean debug) { m_Debug = debug; } /** * Gets whether debugging is turned on * * @return true if debugging output is on */ public boolean getDebug() { return m_Debug; } /** * Sets the random number seed * * @param seed the random number seed */ public void setSeed(int seed) { m_Seed = seed; } /** * Gets the random number seed * * @return the random number seed */ public int getSeed() { return m_Seed; } /** * Sets the number of times an instance is classified * * @param classifyIterations number of times an instance is classified */ public void setClassifyIterations(int classifyIterations) { m_ClassifyIterations = classifyIterations; } /** * Gets the number of times an instance is classified * * @return the maximum number of times an instance is classified */ public int getClassifyIterations() { return m_ClassifyIterations; } /** * Sets the name of the dataset file. * * @param dataFileName name of dataset file. */ public void setDataFileName(String dataFileName) { m_DataFileName = dataFileName; } /** * Get the name of the data file used for the decomposition * * @return the name of the data file */ public String getDataFileName() { return m_DataFileName; } /** * Get the index (starting from 1) of the attribute used as the class. * * @return the index of the class attribute */ public int getClassIndex() { return m_ClassIndex + 1; } /** * Sets index of attribute to discretize on * * @param classIndex the index (starting from 1) of the class attribute */ public void setClassIndex(int classIndex) { m_ClassIndex = classIndex - 1; } /** * Get the calculated bias squared according to the Kohavi and Wolpert definition * * @return the bias squared */ public double getKWBias() { return m_KWBias; } /** * Get the calculated bias according to the Webb definition * * @return the bias * */ public double getWBias() { return m_WBias; } /** * Get the calculated variance according to the Kohavi and Wolpert definition * * @return the variance */ public double getKWVariance() { return m_KWVariance; } /** * Get the calculated variance according to the Webb definition * * @return the variance according to Webb * */ public double getWVariance() { return m_WVariance; } /** * Get the calculated sigma according to the Kohavi and Wolpert definition * * @return the sigma * */ public double getKWSigma() { return m_KWSigma; } /** * Set the training size. * * @param size the size of the training set * */ public void setTrainSize(int size) { m_TrainSize = size; } /** * Get the training size * * @return the size of the training set * */ public int getTrainSize() { return m_TrainSize; } /** * Set the proportion of instances that are common between two training sets * used to train a classifier. * * @param proportion the proportion of instances that are common between training * sets. * */ public void setP(double proportion) { m_P = proportion; } /** * Get the proportion of instances that are common between two training sets. * * @return the proportion * */ public double getP() { return m_P; } /** * Get the calculated error rate * * @return the error rate */ public double getError() { return m_Error; } /** * Carry out the bias-variance decomposition using the sub-sampled cross-validation method. * * @throws Exception if the decomposition couldn't be carried out */ public void decompose() throws Exception { Reader dataReader; Instances data; int tps; // training pool size, size of segment E. int k; // number of folds in segment E. int q; // number of segments of size tps. dataReader = new BufferedReader(new FileReader(m_DataFileName)); //open file data = new Instances(dataReader); // encapsulate in wrapper class called weka.Instances() if (m_ClassIndex < 0) { data.setClassIndex(data.numAttributes() - 1); } else { data.setClassIndex(m_ClassIndex); } if (data.classAttribute().type() != Attribute.NOMINAL) { throw new Exception("Class attribute must be nominal"); } int numClasses = data.numClasses(); data.deleteWithMissingClass(); if ( data.checkForStringAttributes() ) { throw new Exception("Can't handle string attributes!"); } // Dataset size must be greater than 2 if ( data.numInstances() <= 2 ){ throw new Exception("Dataset size must be greater than 2."); } if ( m_TrainSize == -1 ){ // default value m_TrainSize = (int) Math.floor( (double) data.numInstances() / 2.0 ); }else if ( m_TrainSize < 0 || m_TrainSize >= data.numInstances() - 1 ) { // Check if 0 < training Size < D - 1 throw new Exception("Training set size of "+m_TrainSize+" is invalid."); } if ( m_P == -1 ){ // default value m_P = (double) m_TrainSize / ( (double)data.numInstances() - 1 ); }else if ( m_P < ( m_TrainSize / ( (double)data.numInstances() - 1 ) ) || m_P >= 1.0 ) { //Check if p is in range: m/(|D|-1) <= p < 1.0 throw new Exception("Proportion is not in range: "+ (m_TrainSize / ((double) data.numInstances() - 1 )) +" <= p < 1.0 ");
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?