📄 mutikdf.java
字号:
package core;
import java.text.NumberFormat;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import util.MyMath;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.matrix.EigenvalueDecomposition;
import weka.core.matrix.Matrix;
import weka.classifiers.Classifier;
import util.MyClassifier;
public class MutiKDF {
//训练集
private Instances srcData;
//核矩阵M,采用RBF核,用来表示映射到高维空间F后的类间距
private Matrix kernelM;
//核矩阵N,采用RBF核,用来表示映射到高维空间F后的类内距
private Matrix sumK;
//每类类别的样本数目,这里处理多类样本
private double num[];
//核矩阵的特征向量
private Matrix eigVec;
//核矩阵的特征值
private Matrix eigVal;
//去除过小特征值之后的矩阵的秩
private int rankKM;
//alpha特征向量
private Matrix alpha;
//参数omit,我们选取为两类之间的最小距离?还是最大? (先取最大)
private Matrix oness ;
private double omit;
private double[] der;
public MutiKDF(Instances data,int inputKMRank, double getomit){
data.sort(data.numAttributes()-1);
srcData = new Instances(data);
num = new double[data.numClasses()];
kernelM = new Matrix(data.numInstances(),data.numInstances());
oness = new Matrix(data.numInstances(),data.numInstances());
rankKM = inputKMRank;
omit=getomit;
}
//计算各类的样本数目 ok
private void calSpNum(){
int i;
for(i=0;i<srcData.numClasses();i++)
{
num[i] = 0;
}
for(i = 0;i<srcData.numInstances();i++)
{
num[(int)srcData.instance(i).classValue()]++;
}
}
//建立核矩阵M.
private void buildKM(){
sumK = new Matrix(srcData.numInstances(),srcData.numInstances());
Matrix ones = new Matrix(srcData.numInstances(),srcData.numInstances()); //均值矩阵
omit = Double.MIN_VALUE; //omit为核函数的参数.迩衡是用启发式来确定这个值.
MyMath mh = new MyMath();//调用迩衡的mymath类,运用里面的数学方法
//先算出omit的值,用于下面的核变换
for(int i=0;i<srcData.numInstances();i++)
{
for(int j=0;j<srcData.numInstances();j++)
{
double v = mh.CalSim(srcData.instance(i), srcData.instance(j));
if(srcData.instance(i).classValue()!=srcData.instance(j).classValue())
{
omit = Math.max(v,omit);
}
}
}
//omit=omit*0.5;
//omit=getomit;
//System.out.println("mutiKDF omit is "+omit);
//omit = 10;
//这个ALLMi是用来保存每个Mi向量.有多少类就有多少个Mi向量
Matrix ALLMi = new Matrix(srcData.numInstances(),num.length);
Matrix M1 = new Matrix(srcData.numInstances(),1);
Matrix M2 = new Matrix(srcData.numInstances(),1);
//Mi可以指代M1,M2,M3...Mk
Matrix Mi = new Matrix(srcData.numInstances(),1);
//这个n维向量Mc是用来记录均值向量的
Matrix Mc = new Matrix(srcData.numInstances(),1);
//用来暂时存放向量的
Matrix tempM = new Matrix(srcData.numInstances(),1);
//build Mi~Mk &&ALLMi
//************************
//这里要对照一下那个公式,是不是有新版本的公式
int numcont = 0;
for(int n = 0; n < num.length; n++)
{
double value = 0; //每次value要清零.
for(int i = 0; i < srcData.numInstances();i++)
{
for(int j =0; j < num[n];j++)
{
value = value + Math.exp(-mh.CalSim(srcData.instance(i), srcData.instance(j+numcont))/omit);
}
value =((1.0)/num[n])*value;
ALLMi.set(i, n, value);
}
numcont+=(int)num[n];
}
//计算均值向量Mc
for(int i =0; i < num.length; i++)
{
tempM = ALLMi.getMatrix(0,srcData.numInstances()-1, i, i);
Mc.plusEquals(tempM);
}
System.out.println(Mc.toString());
Mc.timesEquals(1.0/(double)num.length); // 1/k*sumMc
//Mc.timesEquals(2);
//System.out.println("***"+num.length);
//System.out.println(Mc.toString());
//计算核矩阵M
for(int i = 0; i < num.length; i++)
{
tempM = ALLMi.getMatrix(0, srcData.numInstances()-1,i,i);
Matrix M = tempM.minus(Mc); //Mi-Mc
Matrix Mt = M.transpose(); //(Mi-Mc)T
Matrix MMt=M.times(Mt);
kernelM.plusEquals(MMt);
}
kernelM.timesEquals(1.0/(double)num.length);
//对kernelM矩阵做规整化,确保kernelM的秩为满秩
Matrix IN = new Matrix(kernelM.getRowDimension(),kernelM.getColumnDimension());
//build IN
for(int i=0;i<IN.getColumnDimension();i++)
//IN.set(i, i, 0.00000000000001);
IN.set(i, i, 0.000000001);
kernelM=kernelM.plus(IN);
System.out.println("*********** the rank of MutiKDF is "+kernelM.rank());
}
private void calEig(){
EigenvalueDecomposition ED = kernelM.eig();
eigVal = ED.getD();
eigVec = ED.getV();
new MyMath().quickSort(eigVal, eigVec); //特征值和特征向量排一下序
//***************************************************************************************
/*
rankKM = 0; //过滤特征值过小的特征向量
double minV =eigVal.get(0, 0)/100; //设定特征值的阀值为(最大特征值/1000).但除最大特征值,其他都没有一个比阀值大...其他的特征向量趋于0
for(int i =0; i < eigVal.getRowDimension();i++) //根据阀值来选取比较大的特征向量和特征值
{
if(eigVal.get(i, i)<minV)break;
rankKM++;
}
rankKM = Math.min(rankKM, srcData.numInstances());
//下面这句可以去掉,暴力设定取110维.正常的是应该根据上面程序段来确定rankKM的值.
//rankKM =198;
*/
System.out.println("rankKM for MutiKDF is "+rankKM);
}
//构造alpha特征向量
private void calAlpha(){
alpha = new Matrix(srcData.numInstances(),rankKM);
alpha = eigVec.getMatrix(0, eigVec.getRowDimension()-1,0,rankKM-1);
}
//映射新样本集
public Instances deInstances(Instances newData){
MyMath mh = new MyMath();
//System.out.println("the omit for kdf is "+omit);
//当前样本集和学习样本集的核矩阵
Matrix newKernel = new Matrix(newData.numInstances(),srcData.numInstances());
for(int i = 0; i < newData.numInstances(); i++)
{
double sum = 0.0;
for(int j=0;j<srcData.numInstances();j++)
{
double val = Math.exp(-mh.CalSim(newData.instance(i), srcData.instance(j))/omit);
newKernel.set(i, j, val);
sum += val;
}
for(int j=0;j<srcData.numInstances();j++) //迩衡说这个是使得矩阵均值为0
{
newKernel.set(i, j, newKernel.get(i, j)-sum/srcData.numInstances());
}
}
Matrix newSum = sumK.getMatrix(0, 0, 0, sumK.getRowDimension()-1);
double sumV = 0.0;
for(int i =0; i <sumK.getRowDimension();i++) sumV += newSum.get(0, i);
for(int i =0; i <sumK.getRowDimension();i++) newSum.set(0, i, newSum.get(0,i)-sumV/srcData.numInstances());
/*for(int i =0; i <newData.numInstances();i++)
{
for(int j=0; j<srcData.numInstances();j++)
{
newKernel.set(i, j, newKernel.get(i, j)-newSum.get(0, j)); //也是矩阵均值化.
}
}*/
newKernel = newKernel.times(alpha); //新样本集映射到F空间
FastVector fv = new FastVector();
//设置特征属性
for(int i=0;i<newKernel.getColumnDimension();i++)
{
fv.addElement(new Attribute("F"+i));
}
//设置类属性
FastVector classAttribute = new FastVector();
for(int i =0; i<newData.numClasses();i++)
{
classAttribute.addElement(""+i);
}
fv.addElement(new Attribute("class",classAttribute));
//创建实例集
Instances disData = new Instances("RD_"+srcData.relationName(),fv,newData.numInstances());
for(int i =0;i<newKernel.getRowDimension();i++)
{
Instance f = new Instance(newKernel.getColumnDimension()+1);
for(int j=0;j<newKernel.getColumnDimension();j++)
{
f.setValue(j, newKernel.get(i, j));
}
f.setValue(newKernel.getColumnDimension(), (int)newData.instance(i).classValue());
disData.add(f);
}
disData.setClassIndex(newKernel.getColumnDimension());
return disData;
}
//执行
public void excute(){
calSpNum(); //先计算各类样本数目,用于下面建M矩阵等计算
buildKM(); //建立M矩阵
calEig(); //计算M矩阵的特征向量和特征值
calAlpha(); //算出alpha
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -