📄 mismo.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    // Filter instances     if (m_Missing!=null)       insts = Filter.useFilter(insts, m_Missing);     if (m_Filter!=null)      insts = Filter.useFilter(insts, m_Filter);         // convert the single-instance format to multi-instance format    convertToMI.setInputFormat(insts);    insts=Filter.useFilter( insts, convertToMI);    inst = insts.instance(0);      if (!m_fitLogisticModels) {      double[] result = new double[inst.numClasses()];      for (int i = 0; i < inst.numClasses(); i++) {        for (int j = i + 1; j < inst.numClasses(); j++) {          if ((m_classifiers[i][j].m_alpha != null) ||               (m_classifiers[i][j].m_sparseWeights != null)) {            double output = m_classifiers[i][j].SVMOutput(-1, inst);            if (output > 0) {              result[j] += 1;            } else {              result[i] += 1;            }              }        }       }      Utils.normalize(result);      return result;    } else {      // We only need to do pairwise coupling if there are more      // then two classes.      if (inst.numClasses() == 2) {        double[] newInst = new double[2];        newInst[0] = m_classifiers[0][1].SVMOutput(-1, inst);        newInst[1] = Instance.missingValue();        return m_classifiers[0][1].m_logistic.          distributionForInstance(new Instance(1, newInst));      }      double[][] r = new double[inst.numClasses()][inst.numClasses()];      double[][] n = new double[inst.numClasses()][inst.numClasses()];      for (int i = 0; i < inst.numClasses(); i++) {        for (int j = i + 1; j < inst.numClasses(); j++) {          if ((m_classifiers[i][j].m_alpha != null) ||               (m_classifiers[i][j].m_sparseWeights != null)) {            double[] newInst = new double[2];            newInst[0] = m_classifiers[i][j].SVMOutput(-1, inst);            newInst[1] = Instance.missingValue();            r[i][j] = m_classifiers[i][j].m_logistic.              distributionForInstance(new Instance(1, newInst))[0];            n[i][j] = m_classifiers[i][j].m_sumOfWeights;              }        }      }      return pairwiseCoupling(n, r);    }  }  /**   * Implements pairwise coupling.   *   * @param n the sum of weights used to train each model   * @param r the probability estimate from each model   * @return the coupled estimates   */  public double[] pairwiseCoupling(double[][] n, double[][] r) {    // Initialize p and u array    double[] p = new double[r.length];    for (int i =0; i < p.length; i++) {      p[i] = 1.0 / (double)p.length;    }    double[][] u = new double[r.length][r.length];    for (int i = 0; i < r.length; i++) {      for (int j = i + 1; j < r.length; j++) {        u[i][j] = 0.5;      }    }    // firstSum doesn't change    double[] firstSum = new double[p.length];    for (int i = 0; i < p.length; i++) {      for (int j = i + 1; j < p.length; j++) {        firstSum[i] += n[i][j] * r[i][j];        firstSum[j] += n[i][j] * (1 - r[i][j]);      }    }    // Iterate until convergence    boolean changed;    do {      changed = false;      double[] secondSum = new double[p.length];      for (int i = 0; i < p.length; i++) {        for (int j = i + 1; j < p.length; j++) {          secondSum[i] += n[i][j] * u[i][j];          secondSum[j] += n[i][j] * (1 - u[i][j]);        }      }      for (int i = 0; i < p.length; i++) {        if ((firstSum[i] == 0) || (secondSum[i] == 0)) {          if (p[i] > 0) {            changed = true;          }          p[i] = 0;        } else {          double factor = firstSum[i] / secondSum[i];          double pOld = p[i];          p[i] *= factor;          if (Math.abs(pOld - p[i]) > 1.0e-3) {            changed = true;          }        }      }      Utils.normalize(p);      for (int i = 0; i < r.length; i++) {        for (int j = i + 1; j < r.length; j++) {          u[i][j] = p[i] / (p[i] + p[j]);        }      }    } while (changed);    return p;  }  /**   * Returns the weights in sparse format.   *    * @return the weights in sparse format   */  public double [][][] sparseWeights() {    int numValues = m_classAttribute.numValues();    double [][][] sparseWeights = new double[numValues][numValues][];    for (int i = 0; i < numValues; i++) {      for (int j = i + 1; j < numValues; j++) {        sparseWeights[i][j] = m_classifiers[i][j].m_sparseWeights;      }    }    return sparseWeights;  }  /**   * Returns the indices in sparse format.   *    * @return the indices in sparse format   */  public int [][][] sparseIndices() {    int numValues = m_classAttribute.numValues();    int [][][] sparseIndices = new int[numValues][numValues][];    for (int i = 0; i < numValues; i++) {      for (int j = i + 1; j < numValues; j++) {        sparseIndices[i][j] = m_classifiers[i][j].m_sparseIndices;      }    }    return sparseIndices;  }  /**   * Returns the bias of each binary SMO.   *    * @return the bias of each binary SMO   */  public double [][] bias() {    int numValues = m_classAttribute.numValues();    double [][] bias = new double[numValues][numValues];    for (int i = 0; i < numValues; i++) {      for (int j = i + 1; j < numValues; j++) {        bias[i][j] = m_classifiers[i][j].m_b;      }    }    return bias;  }  /**   * Returns the number of values of the class attribute.   *    * @return the number values of the class attribute   */  public int numClassAttributeValues() {    return m_classAttribute.numValues();  }  /**   * Returns the names of the class attributes.   *    * @return the names of the class attributes   */  public String[] classAttributeNames() {    int numValues = m_classAttribute.numValues();    String[] classAttributeNames = new String[numValues];    for (int i = 0; i < numValues; i++) {      classAttributeNames[i] = m_classAttribute.value(i);    }    return classAttributeNames;  }  /**   * Returns the attribute names.   *    * @return the attribute names   */  public String[][][] attributeNames() {    int numValues = m_classAttribute.numValues();    String[][][] attributeNames = new String[numValues][numValues][];    for (int i = 0; i < numValues; i++) {      for (int j = i + 1; j < numValues; j++) {        int numAttributes = m_classifiers[i][j].m_data.numAttributes();        String[] attrNames = new String[numAttributes];        for (int k = 0; k < numAttributes; k++) {          attrNames[k] = m_classifiers[i][j].m_data.attribute(k).name();        }        attributeNames[i][j] = attrNames;                }    }    return attributeNames;  }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector result = new Vector();    Enumeration enm = super.listOptions();    while (enm.hasMoreElements())      result.addElement(enm.nextElement());    result.addElement(new Option(	"\tTurns off all checks - use with caution!\n"	+ "\tTurning them off assumes that data is purely numeric, doesn't\n"	+ "\tcontain any missing values, and has a nominal class. Turning them\n"	+ "\toff also means that no header information will be stored if the\n"	+ "\tmachine is linear. Finally, it also assumes that no instance has\n"	+ "\ta weight equal to 0.\n"	+ "\t(default: checks on)",	"no-checks", 0, "-no-checks"));    result.addElement(new Option(          "\tThe complexity constant C. (default 1)",          "C", 1, "-C <double>"));        result.addElement(new Option(          "\tWhether to 0=normalize/1=standardize/2=neither.\n"           + "\t(default 0=normalize)",          "N", 1, "-N"));        result.addElement(new Option(          "\tUse MIminimax feature space. ",          "I", 0, "-I"));        result.addElement(new Option(          "\tThe tolerance parameter. (default 1.0e-3)",          "L", 1, "-L <double>"));        result.addElement(new Option(          "\tThe epsilon for round-off error. (default 1.0e-12)",          "P", 1, "-P <double>"));        result.addElement(new Option(          "\tFit logistic models to SVM outputs. ",          "M", 0, "-M"));        result.addElement(new Option(          "\tThe number of folds for the internal cross-validation. \n"          + "\t(default -1, use training data)",          "V", 1, "-V <double>"));        result.addElement(new Option(          "\tThe random number seed. (default 1)",          "W", 1, "-W <double>"));        result.addElement(new Option(	"\tThe Kernel to use.\n"	+ "\t(default: weka.classifiers.functions.supportVector.PolyKernel)",	"K", 1, "-K <classname and parameters>"));    result.addElement(new Option(	"",	"", 0, "\nOptions specific to kernel "	+ getKernel().getClass().getName() + ":"));        enm = ((OptionHandler) getKernel()).listOptions();    while (enm.hasMoreElements())      result.addElement(enm.nextElement());    return result.elements();  }  /**   * Parses a given list of options. <p/>   *    <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -D   *  If set, classifier is run in debug mode and   *  may output additional info to the console</pre>   *    * <pre> -no-checks   *  Turns off all checks - use with caution!   *  Turning them off assumes that data is purely numeric, doesn't   *  contain any missing values, and has a nominal class. Turning them   *  off also means that no header information will be stored if the   *  machine is linear. Finally, it also assumes that no instance has   *  a weight equal to 0.   *  (default: checks on)</pre>   *    * <pre> -C &lt;double&gt;   *  The complexity constant C. (default 1)</pre>   *    * <pre> -N   *  Whether to 0=normalize/1=standardize/2=neither.   *  (default 0=normalize)</pre>   *    * <pre> -I   *  Use MIminimax feature space. </pre>   *    * <pre> -L &lt;double&gt;   *  The tolerance parameter. (default 1.0e-3)</pre>   *    * <pre> -P &lt;double&gt;   *  The epsilon for round-off error. (default 1.0e-12)</pre>   *    * <pre> -M   *  Fit logistic models to SVM outputs. </pre>   *    * <pre> -V &lt;double&gt;   *  The number of folds for the internal cross-validation.    *  (default -1, use training data)</pre>   *    * <pre> -W &lt;double&gt;   *  The random number seed. (default 1)</pre>   *    * <pre> -K &lt;classname and parameters&gt;   *  The Kernel to use.   *  (default: weka.classifiers.functions.supportVector.PolyKernel)</pre>   *    * <pre>    * Options specific to kernel weka.classifiers.mi.supportVector.MIPolyKernel:   * </pre>   *    * <pre> -D   *  Enables debugging output (if available) to be printed.   *  (default: off)</pre>   *    * <pre> -no-checks   *  Turns off all checks - use with caution!   *  (default: checks on)</pre>   *    * <pre> -C &lt;num&gt;   *  The size of the cache (a prime number).   *  (default: 250007)</pre>   *    * <pre> -E &lt;num&gt;   *  The Exponent to use.   *  (default: 1.0)</pre>   *    * <pre> -L   *  Use lower-order terms.   *  (default: no)</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported    */  public void setOptions(String[] options) throws Exception {    String	tmpStr;    String[]	tmpOptions;        setChecksTurnedOff(Utils.getFlag("no-checks", options));    tmpStr = Utils.getOption('C', options);    if (tmpStr.length() != 0)      setC(Double.parseDouble(tmpStr));    else      setC(1.0);    tmpStr = Utils.getOption('L', options);    if (tmpStr.length() != 0)      setToleranceParameter(Double.parseDouble(tmpStr));    else      setToleranceParameter(1.0e-3);        tmpStr = Utils.getOption('P', options);    if (tmpStr.length() != 0)      setEpsilon(new Double(tmpStr));    else      setEpsilon(1.0e-12);    setMinimax(Utils.getFlag('I', options));    tmpStr = Utils.getOption('N', options);    if (tmpStr.length() != 0)      setFilterType(new SelectedTag(Integer.parseInt(tmpStr), TAGS_FILTER));    else      setFilterType(new SelectedTag(FILTER_NORMALIZE, TAGS_FILTER));        setBuildLogisticModels(Utils.getFlag('M', options));        tmpStr = Utils.getOption('V', options);    if (tmpStr.length() != 0)      m_numFolds = Integer.parseInt(tmpStr);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -