📄 ensembleselectionlibrary.java
字号:
} // Now we have all the data chopped up appropriately, and we can train all models Iterator it = m_Models.iterator(); int model_index = 0; m_predictions = new double[m_Models.size()][validationSet.numInstances()][data.numClasses()]; // We'll keep a set of all the models which fail so that we can remove them from // our library. Set invalidModels = new HashSet(); while (it.hasNext()) { // For each model, EnsembleSelectionLibraryModel model = (EnsembleSelectionLibraryModel)it.next(); // set the appropriate options model.setDebug(m_Debug); model.setFolds(m_folds); model.setSeed(m_seed); model.setValidationRatio(m_validationRatio); model.setChecksum(getInstancesChecksum(data)); try { // Create the model. This will attempt to load the model, if it // alreay exists. If it does not, it will train the model using // m_trainingData and cache the model's predictions for // m_hillclimbData. model.createModel(m_trainingData, m_hillclimbData, dataDirectory.getPath(), algorithm); } catch (Exception e) { // If the model failed, print a message and add it to our set of // invalid models. System.out.println("**Couldn't create model "+model.getStringRepresentation() +" because of following exception: "+e.getMessage()); invalidModels.add(model); continue; } if (!invalidModels.contains(model)) { // If the model succeeded, add its predictions to our array // of predictions. Note that the successful models' predictions // are packed in to the front of m_predictions. m_predictions[model_index] = model.getValidationPredictions(); ++model_index; // We no longer need it in memory, so release it. model.releaseModel(); } } // Remove all invalidModels from m_Models. it = invalidModels.iterator(); while (it.hasNext()) { EnsembleSelectionLibraryModel model = (EnsembleSelectionLibraryModel)it.next(); if (m_Debug) System.out.println("removing invalid library model: "+model.getStringRepresentation()); m_Models.remove(model); } if (m_Debug) System.out.println("model index: "+model_index+" tree set size: "+m_Models.size()); if (invalidModels.size() > 0) { // If we had any invalid models, we have some bad predictions in the back // of m_predictions, so we'll shrink it to the right size. double tmpPredictions[][][] = new double[m_Models.size()][][]; for (int i = 0; i < m_Models.size(); i++) { tmpPredictions[i] = m_predictions[i]; } m_predictions = tmpPredictions; } if (m_Debug) System.out.println("Finished remapping models"); return validationSet; //Give the appropriate "hillclimb" set back to ensemble //selection. } /** * Creates the working directory associated with this library * * @param dirName the new directory */ public void createWorkingDirectory(String dirName) { File directory = new File(dirName); if (!directory.exists()) directory.mkdirs(); } /** * This will remove the model associated with the given String * from the model libraryHashMap * * @param modelKey the key of the model */ public void removeModel(String modelKey) { m_Models.remove(modelKey); //TODO - is this really all there is to it?? } /** * This method will return a Set object containing all the * String representations of the models. The iterator across * this Set object will return the model name in alphebetical * order. * * @return all model representations */ public Set getModelNames() { Set names = new TreeSet(); Iterator it = m_Models.iterator(); while (it.hasNext()) { names.add(((EnsembleLibraryModel)it.next()).getStringRepresentation()); } return names; } /** * This method will get the predictions for all the models in the * ensemble library. If cross validaiton is used, then predictions * will be returned for the entire training set. If cross validation * is not used, then predictions will only be returned for the ratio * of the training set reserved for validation. * * @return the predictions */ public double[][][] getHillclimbPredictions() { return m_predictions; } /** * Gets the working Directory of the ensemble library. * * @return the working directory. */ public File getWorkingDirectory() { return m_workingDirectory; } /** * Sets the working Directory of the ensemble library. * * @param workingDirectory the working directory to use. */ public void setWorkingDirectory(File workingDirectory) { m_workingDirectory = workingDirectory; if (m_workingDirectoryPropertySupport != null) { m_workingDirectoryPropertySupport.firePropertyChange(null, null, null); } } /** * Gets the model list file that holds the list of models * in the ensemble library. * * @return the working directory. */ public String getModelListFile() { return m_modelListFile; } /** * Sets the model list file that holds the list of models * in the ensemble library. * * @param modelListFile the model list file to use */ public void setModelListFile(String modelListFile) { m_modelListFile = modelListFile; } /** * creates a LibraryModel from a set of arguments * * @param classifier the classifier to use * @return the generated library model */ public EnsembleLibraryModel createModel(Classifier classifier) { EnsembleSelectionLibraryModel model = new EnsembleSelectionLibraryModel(classifier); model.setDebug(m_Debug); return model; } /** * This method takes a String argument defining a classifier and * uses it to create a base Classifier. * * WARNING! This method is only called when trying to craete models * from flat files (.mlf). This method is highly untested and * foreseeably will cause problems when trying to nest arguments * within multiplte meta classifiers. To avoid any problems we * recommend using only XML serialization, via saving to * .model.xml and using only the createModel(Classifier) method * above. * * @param modelString the classifier definition * @return the generated library model */ public EnsembleLibraryModel createModel(String modelString) { String[] splitString = modelString.split("\\s+"); String className = splitString[0]; String argString = modelString.replaceAll(splitString[0], ""); String[] optionStrings = argString.split("\\s+"); EnsembleSelectionLibraryModel model = null; try { model = new EnsembleSelectionLibraryModel(Classifier.forName(className, optionStrings)); model.setDebug(m_Debug); } catch (Exception e) { e.printStackTrace(); } return model; } /** * This method takes an Instances object and returns a checksum of its * toString method - that is the checksum of the .arff file that would * be created if the Instances object were transformed into an arff file * in the file system. * * @param instances the data to get the checksum for * @return the checksum */ public static String getInstancesChecksum(Instances instances) { String checksumString = null; try { Adler32 checkSummer = new Adler32(); byte[] utf8 = instances.toString().getBytes("UTF8");; checkSummer.update(utf8); checksumString = Long.toHexString(checkSummer.getValue()); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } return checksumString; } /** * Returns the unique name for the set of instances supplied. This is * used to create a directory for all of the models corresponding to that * set of instances. This was intended as a way to keep Working Directories * "organized" * * @param instances the data to get the directory for * @return the directory */ public static String getDataDirectoryName(Instances instances) { String directory = null; directory = new String(instances.numInstances()+ "_instances_"+getInstancesChecksum(instances)); //System.out.println("generated directory name: "+directory); return directory; } /** * Adds an object to the list of those that wish to be informed when the * eotking directory changes. * * @param listener a new listener to add to the list */ public void addWorkingDirectoryListener(PropertyChangeListener listener) { if (m_workingDirectoryPropertySupport != null) { m_workingDirectoryPropertySupport.addPropertyChangeListener(listener); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -