📄 matlabnmf.java
字号:
* Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String evaluatorTipText() { return "Set the attribute evaluator to use. This evaluator is used " +"during the attribute selection phase before the classifier is " +"invoked."; } /** * Sets the attribute evaluator * * @param evaluator the evaluator with all options set. */ public void setEvaluator(ASEvaluation evaluator) { m_eval = evaluator; } /** * Gets the attribute evaluator used * * @return the attribute evaluator */ public ASEvaluation getEvaluator() { return m_eval; } /** * Gets the evaluator specification string, which contains the class * name of the attribute evaluator and any options to it * * @return the evaluator string. */ protected String getEvaluatorSpec() { ASEvaluation e = getEvaluator(); if (e instanceof OptionHandler) { return e.getClass().getName() + " " + Utils.joinOptions(((OptionHandler)e).getOptions()); } return e.getClass().getName(); } /** * Gets the current settings of MatlabNMF * * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions () { String[] options = new String[9]; int current = 0; if (!getNormalize()) { options[current++] = "-D"; } options[current++] = "-R"; options[current++] = ""+getRank(); options[current++] = "-n"; options[current++] = ""+getIterations(); options[current++] = "-O"; options[current++] = ""+getObjectiveFunction(); options[current++] = "-E"; options[current++] = ""+getEvaluatorSpec(); while (current < options.length) { options[current++] = ""; } return options; } /** * Initializes NMF. * @param data the instances to analyze * @exception Exception if analysis fails */ public void buildEvaluator(Instances data) throws Exception { buildAttributeConstructor(data); } private void buildAttributeConstructor (Instances data) throws Exception { m_basis = null; m_attributeFilter = null; if (data.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Can't handle string attributes!"); } m_trainInstances = data; // make a copy of the training data so that we can get the class // column to append to the transformed data (if necessary) m_trainCopy = new Instances(m_trainInstances); System.out.println("Copied instances"); m_replaceMissingFilter = new ReplaceMissingValues(); m_replaceMissingFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_replaceMissingFilter); System.out.println("Replaced missing values"); if (m_normalize) { m_normalizeFilter = new Normalize(); m_normalizeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_normalizeFilter); System.out.println("Normalized"); } // delete any attributes with only one distinct value or are all missing Vector deleteCols = new Vector(); for (int i=0;i<m_trainInstances.numAttributes();i++) { if (m_trainInstances.numDistinctValues(i) <=1) { deleteCols.addElement(new Integer(i)); } } System.out.println("Deleted single-value attributes"); if (m_trainInstances.classIndex() >=0) { // get rid of the class column m_hasClass = true; deleteCols.addElement(new Integer(m_trainInstances.classIndex())); } // remove columns from the data if necessary if (deleteCols.size() > 0) { m_attributeFilter = new Remove(); int [] todelete = new int [deleteCols.size()]; for (int i=0;i<deleteCols.size();i++) { todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue(); } m_attributeFilter.setAttributeIndicesArray(todelete); m_attributeFilter.setInvertSelection(false); m_attributeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter); } System.out.println("Removed attributes filtered above"); m_numInstances = m_trainInstances.numInstances(); m_numAttribs = m_trainInstances.numAttributes(); // w/o class index // Reduce output rank when # attributes falls below that if (getRank() > m_numAttribs) { System.out.println("Reduce rank to number of attributes"); setRank(m_numAttribs); } System.out.println("About to run NMF in matlab with " + m_numAttribs + " attributes"); dumpScripts(); dumpInstances(m_dataFilename); int[] params = new int[3]; params[0] = getRank(); params[1] = getIterations(); params[2] = getObjectiveFunction(); dumpVector(m_paramFilename, params, 3); runMatlab(m_mFile); System.out.println("Done training"); m_basis = readVectors(m_basisFilename, -1); System.out.println("Successfully parsed matlab output files"); m_transformedFormat = setOutputFormat(); // Build the attribute evaluator if (!(m_eval instanceof AttributeEvaluator)) { throw new Exception("Invalid attribute evaluator!"); } m_eval.buildEvaluator(transformedData()); // Save the basis vectors in decreasing order of ranking double[] merit = new double[m_rank]; for (int i = 0; i < m_rank; ++i) merit[i] = ((AttributeEvaluator) m_eval).evaluateAttribute(i); int[] pos = Utils.sort(merit); int[] bestToWorst = new int[m_rank]; for (int i = 0; i < m_rank; ++i) bestToWorst[m_rank-pos[i]-1] = i; try { // Save attribute names first m_timestamp = MatlabPCA.getLogTimestamp(); MatlabPCA.dumpAttributeNames (m_trainInstances, m_nmfAttributeFilenameBase+m_timestamp+".txt"); // Then save the basis vectors in order PrintWriter writer = new PrintWriter (new BufferedOutputStream (new FileOutputStream(m_rankedBasisFilenameBase+m_timestamp+".txt"))); for (int i = 0; i < m_numAttribs; ++i) { for (int j = 0; j < m_rank; ++j) writer.print(m_basis[i][bestToWorst[j]] + " "); writer.println(); } writer.close(); } catch (Exception e) { System.err.println("Could not create a temporary file for dumping basis vectors: " + e); } } /** Read column vectors from a text file * @param name file name * @param maxVectors max number of vectors to read, -1 to read all * @return double[][] array corresponding to vectors */ public double[][] readVectors(String name, int maxVectors) throws Exception { BufferedReader r = new BufferedReader(new FileReader(name)); int numAttributes=-1, numVectors=-1; // number of attributes String s = r.readLine(); try { numAttributes = (int) Double.parseDouble(s); } catch (Exception e) { System.err.println("Couldn't parse " + s + " as int"); } // number of vectors s = r.readLine(); try { numVectors = (int) Double.parseDouble(s); } catch (Exception e) { System.err.println("Couldn't parse " + s + " as int"); } double[][] vectors = new double[numAttributes][numVectors]; int i = 0; while ((s = r.readLine()) != null) { StringTokenizer tokenizer = new StringTokenizer(s); int j = 0; while (tokenizer.hasMoreTokens()) { String value = tokenizer.nextToken(); try { vectors[i][j] = Double.parseDouble(value); } catch (Exception e) { System.err.println("Couldn't parse " + value + " as double"); } j++; if (j > numVectors) { System.err.println("Too many vectors in line: " + s); } } if (j != numVectors) { System.err.println("Too few vectors in line: " + s); } i++; if (i > numAttributes) { System.err.println("Too many attributes: " + i + " expecting " + numAttributes + " attributes"); } } if (i != numAttributes) { System.err.println("Too few attributes: " + i + " expecting " + numAttributes + " attributes"); } return vectors; } /** Read a column vector from a text file * @param vector array into which the column vector is stored * @param name file name * @returns double[] array corresponding to a vector */ public void readVector(double[] vector, String name) throws Exception { // Determine the dimensionality from the first line BufferedReader r = new BufferedReader(new FileReader(name)); int numAttributes = -1; // Read the number of attributes String s = r.readLine(); try { numAttributes = (int) Double.parseDouble(s); } catch (Exception e) { System.err.println("Couldn't parse " + s + " as int"); } // Assume vector has enough space int i = 0; while ((s = r.readLine()) != null) { try { vector[i] = Double.parseDouble(s); } catch (Exception e) { System.err.println("Couldn't parse " + s + " as double"); } i++; if (i > numAttributes) { System.err.println("Too many attributes: " + i + " expecting " + numAttributes + " attributes"); } } if (i != numAttributes) { System.err.println("Too few attributes: " + i + " expecting " + numAttributes + " attributes"); } } /** * Returns just the header for the transformed data (ie. an empty * set of instances. This is so that AttributeSelection can * determine the structure of the transformed data without actually * having to get all the transformed data through getTransformedData(). * @return the header of the transformed data. * @exception Exception if the header of the transformed data can't * be determined. */ public Instances transformedHeader() throws Exception { if (m_basis == null) { throw new Exception("Basis hasn't been formed yet"); } return m_transformedFormat; } /** * Gets the transformed training data. * @return the transformed training data * @exception Exception if transformed data can't be returned */ public Instances transformedData() throws Exception { if (m_basis == null) { throw new Exception("Basis hasn't been formed yet"); } Instances output; output = new Instances(m_transformedFormat); double[][] encoding = readVectors(m_encodingFilename, -1); for (int i = 0; i < m_trainCopy.numInstances(); ++i) { Instance inst = m_trainCopy.instance(i); double[] h = null; if (m_hasClass) { h = new double[m_rank+1]; h[m_rank] = inst.value(inst.classIndex()); } else h = new double[m_rank]; for (int j = 0; j < m_rank; ++j) h[j] = encoding[j][i]; output.add(new Instance(inst.weight(), h)); } return output; } /** * Evaluates the merit of a transformed attribute. * @param att the attribute to be evaluated * @return the merit of a transformed attribute * @exception Exception if attribute can't be evaluated */ public double evaluateAttribute(int att) throws Exception { if (m_basis == null) { throw new Exception("Basis hasn't been formed yet!"); } if (!(m_eval instanceof AttributeEvaluator)) { throw new Exception("Invalid attribute evaluator!"); } return ((AttributeEvaluator)m_eval).evaluateAttribute(att); } /** * Dump scripts into temporary files */ private void dumpScripts() { try { PrintWriter nmf = new PrintWriter(new BufferedOutputStream(new FileOutputStream(m_mFile))); nmf.print ("V=load('" + m_dataFilename + "');\n"+ "param=load('" + m_paramFilename + "');\n"+
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -