⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 svmlightinterface.java

📁 SVM_light是一种非常流行的支持向量机的java接口
💻 JAVA
字号:
/*
 * JNI_SVM-light - A Java Native Interface for SVM-light
 * 
 * Copyright (C) 2005 
 * Tom Crecelius & Martin Theobald 
 * Max-Planck Institute for Computer Science
 * 
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free Software
 * Foundation.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 * details.
 * 
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 51
 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */

package jnisvmlight;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.StringTokenizer;

/**
 * The main interface class that transfers the training data to the SVM-light
 * library by a native call. Optionally takes as input an individually modified
 * set of training parameters or an array of string paramaters that exactly
 * simulate the command line input parameters used by the SVM-light binaries.
 * This class can also be used for native classification calls.
 * 
 * @author Tom Crecelius & Martin Theobald
 */
public class SVMLightInterface {

  /**
   * Apply an in-place quicksort prior to each native training call to
   * SVM-light. SVM-light requires each input feature vector to be sorted in
   * ascending order of dimensions. Disable this option if you are sure to
   * provide sorted vectors already.
   */
  public static boolean SORT_INPUT_VECTORS = true;

  static {
    System.loadLibrary("svmlight");
  }

  /**
   * Reads a set of labeled training vectors from a URL. The format is
   * compatible to the SVM-light training files.
   */
  public static LabeledFeatureVector[] getLabeledFeatureVectorsFromURL(
      URL file, int numOfLinesToSkip) throws ParseException {

    ArrayList data = new ArrayList();
    LabeledFeatureVector[] traindata = null;
    BufferedReader bi = null;
    
    try {
      
      bi = new BufferedReader(new InputStreamReader(file
          .openStream()));
      
      String line = null;
      int cnt = 0;
      while ((line = bi.readLine()) != null) {
        cnt++;
        if (cnt <= numOfLinesToSkip) {
          continue;
        }
        String label = null;
        String tokens[] = line.trim().split("[ \\t\\n\\x0B\\f\\r\\[\\]]");
        if (tokens.length > 2) {
          label = tokens[0];
          String factor = tokens[1].substring(0, tokens[1].length() - 1);
          
          ArrayList dimlist = new ArrayList();
          ArrayList vallist = new ArrayList();
          for (int tokencnt = 2; tokencnt < tokens.length; tokencnt++) {
            String dimval = tokens[tokencnt];
            if (dimval.trim().startsWith("#"))
              break;
            
            int idx = dimval.indexOf(':');
            if (idx >= 0) {
              String dim = dimval.substring(0, idx);
              String val = dimval.substring(idx + 1, dimval.length());
              dimlist.add(dim);
              vallist.add(val);
            } else {
              throw new ParseException("Parse error in FeatureVector of file '"
                  + file.toString() + "' at line: " + cnt + ", token: "
                  + tokencnt + ". Could not estimate a \"int:double\" pair ?! "
                  + file.toString()
                  + " contains a wrongly defined feature vector!", 0);
            }
          }
          if (dimlist.size() > 0) {
            double labelvalue = new Double(label).doubleValue();
            double factorValue = new Double(factor).doubleValue();
            int[] dimarray = new int[dimlist.size()];
            double[] valarray = new double[vallist.size()];
            for (int i = 0; i < dimlist.size(); i++) {
              dimarray[i] = new Integer((String) dimlist.get(i)).intValue();
            }
            for (int i = 0; i < vallist.size(); i++) {
              valarray[i] = new Double((String) vallist.get(i)).doubleValue();
            }
            LabeledFeatureVector lfv = new LabeledFeatureVector(labelvalue, dimarray, valarray);
            lfv.setFactor(factorValue);
            data.add(lfv);
          }
        } else {
          throw new ParseException("Parse error in FeatureVector of file '"
              + file.toString() + "' at line: " + cnt + ". "
              + " Wrong format of the labeled feature vector?", 0);
        }
      }
      if (data.size() > 0) {
        traindata = new LabeledFeatureVector[data.size()];
        for (int i = 0; i < data.size(); i++) {
          traindata[i] = (LabeledFeatureVector) data.get(i);
        }
      } else {
        throw new ParseException("No labeled features found within " + cnt
            + "lines of file '" + file.toString() + "'.", 0);
      }
    } catch (IOException ioe) {
      ioe.printStackTrace();
    } finally {
      if (bi != null) {
        try {
          bi.close();
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
    }
    return traindata;
  }

  protected TrainingParameters m_tp;

  /**
   * Performs a classifcation step as a native call to SVM-light. If this method
   * is used exlusively, no additional SVMLightModel object has to be kept in
   * the Java runtime process.
   */
  public native double classifyNative(FeatureVector doc);

  public TrainingParameters getTrainingParameters() {
    return m_tp;
  }

  private int partition(int[] dims, double[] vals, int low, int high) {
    Object pivot;
    double pivotprim = 0;
    int i = low - 1;
    int j = high + 1;
    pivotprim = dims[(low + high) / 2];
    while (i < j) {
      i++;
      while (dims[i] < pivotprim)
        i++;
      j--;
      while (dims[j] > pivotprim)
        j--;
      if (i < j) {
        int tmp = dims[i];
        dims[i] = dims[j];
        dims[j] = tmp;
        double tmpd = vals[i];
        vals[i] = vals[j];
        vals[j] = tmpd;
      }
    }
    return j;
  }

  private void quicksort(int[] dims, double[] vals, int low, int high) {
    if (low >= high)
      return;
    int p = partition(dims, vals, low, high);
    quicksort(dims, vals, low, p);
    quicksort(dims, vals, p + 1, high);
  }

  private void sort(FeatureVector[] trainingData) {
    for (int i = 0; i < trainingData.length; i++) {
      if (trainingData[i] != null)
        quicksort(trainingData[i].m_dims, trainingData[i].m_vals, 0,
            trainingData[i].size() - 1);
    }
  }

  private native SVMLightModel trainmodel(LabeledFeatureVector[] traindata,
      TrainingParameters p);

  public SVMLightModel trainModel(LabeledFeatureVector[] trainingData) {
    this.m_tp = new TrainingParameters();
    if (SORT_INPUT_VECTORS) {
      sort(trainingData);
    }
    return trainmodel(trainingData, m_tp);
  }

  public SVMLightModel trainModel(LabeledFeatureVector[] trainingData,
      String[] argv) {
    this.m_tp = new TrainingParameters(argv);
    if (SORT_INPUT_VECTORS) {
      sort(trainingData);
    }
    return trainmodel(trainingData, m_tp);
  }

  public SVMLightModel trainModel(LabeledFeatureVector[] trainingData,
      TrainingParameters tp) {
    this.m_tp = tp;
    if (SORT_INPUT_VECTORS) {
      sort(trainingData);
    }
    return trainmodel(trainingData, m_tp);
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -