📄 randomprojection.java
字号:
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String distributionTipText() {
return "The distribution to use for calculating the random matrix.\n"
+"Sparse1 is:\n"
+" sqrt(3) * { -1 with prob(1/6), \n"
+" 0 with prob(2/3), \n"
+" +1 with prob(1/6) } \n"
+"Sparse2 is:\n"
+" { -1 with prob(1/2), \n"
+" +1 with prob(1/2) } ";
}
/** Sets the distribution to use for calculating the random matrix */
public void setDistribution(SelectedTag newDstr) {
if (newDstr.getTags() == TAGS_DSTRS_TYPE) {
m_distribution = newDstr.getSelectedTag().getID();
}
}
/** Returns the current distribution that'll be used for calculating the
random matrix */
public SelectedTag getDistribution() {
return new SelectedTag(m_distribution, TAGS_DSTRS_TYPE);
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String replaceMissingValuesTipText() {
return "If set the filter uses weka.filters.unsupervised.attribute.ReplaceMissingValues"
+ " to replace the missing values";
}
/**
* Sets either to use replace missing values filter or not
*/
public void setReplaceMissingValues(boolean t) {
m_replaceMissing = t;
}
/** Gets the current setting for using ReplaceMissingValues filter */
public boolean getReplaceMissingValues() {
return m_replaceMissing;
}
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input
* instance structure (any instances contained in the object are
* ignored - only the structure is required).
* @return true if the outputFormat may be collected immediately
* @exception Exception if the input format can't be set
* successfully
*/
public boolean setInputFormat(Instances instanceInfo) throws Exception {
super.setInputFormat(instanceInfo);
/*
if (instanceInfo.classIndex() < 0) {
throw new UnassignedClassException("No class has been assigned to the instances");
}
*/
for(int i=0; i<instanceInfo.numAttributes(); i++) {
if( i!=instanceInfo.classIndex() && instanceInfo.attribute(i).isNominal() ) {
if(instanceInfo.classIndex()>=0)
ntob = new weka.filters.supervised.attribute.NominalToBinary();
else
ntob = new weka.filters.unsupervised.attribute.NominalToBinary();
break;
}
}
//r.setSeed(m_rndmSeed); //in case the setRandomSeed() is not
//called we better set the seed to its
//default value of 42.
boolean temp=true;
if(replaceMissing!=null) {
replaceMissing = new weka.filters.unsupervised.attribute.ReplaceMissingValues();
if(replaceMissing.setInputFormat(instanceInfo))
temp=true;
else
temp=false;
}
if(ntob!=null) {
if(ntob.setInputFormat(instanceInfo)) {
setOutputFormat();
return temp && true;
}
else {
return false;
}
}
else {
setOutputFormat();
return temp && true;
}
}
/**
* Input an instance for filtering.
*
* @param instance the input instance
* @return true if the filtered instance may now be
* collected with output().
* @exception IllegalStateException if no input format has been set
*/
public boolean input(Instance instance) throws Exception {
Instance newInstance=null;
if (getInputFormat()==null) {
throw new IllegalStateException("No input instance format defined");
}
if(m_NewBatch) {
resetQueue();
//if(ntob!=null)
// ntob.m_NewBatch=true;
m_NewBatch = false;
}
boolean replaceDone=false;
if(replaceMissing!=null) {
if(replaceMissing.input(instance)) {
if(m_OutputFormatDefined == false)
setOutputFormat();
newInstance = replaceMissing.output();
replaceDone = true;
}
else
return false;;
}
if(ntob!=null) {
if(replaceDone==false)
newInstance = instance;
if(ntob.input(newInstance)) {
if(m_OutputFormatDefined == false)
setOutputFormat();
newInstance = ntob.output();
newInstance = convertInstance(newInstance);
push(newInstance);
return true;
}
else {
return false;
}
}
else {
if(replaceDone==false)
newInstance = instance;
newInstance = convertInstance(newInstance);
push(newInstance);
return true;
}
}
/**
* Signify that this batch of input to the filter is finished.
*
* @return true if there are instances pending output
* @exception NullPointerException if no input structure has been defined,
* @exception Exception if there was a problem finishing the batch.
*/
public boolean batchFinished() throws Exception {
if (getInputFormat() == null) {
throw new NullPointerException("No input instance format defined");
}
boolean conversionDone=false;
if(replaceMissing!=null) {
if(replaceMissing.batchFinished()) {
Instance newInstance, instance;
while((instance=replaceMissing.output())!=null) {
if(!m_OutputFormatDefined)
setOutputFormat();
if(ntob!=null) {
ntob.input(instance);
}
else {
newInstance = convertInstance(instance);
push(newInstance);
}
}
if(ntob!=null) {
if(ntob.batchFinished()) {
//Instance newInstance, instance;
while((instance=ntob.output())!=null) {
if(!m_OutputFormatDefined)
setOutputFormat();
newInstance = convertInstance(instance);
push(newInstance);
}
ntob = null;
}
}
replaceMissing = null;
conversionDone=true;
}
}
if(conversionDone==false && ntob!=null) {
if(ntob.batchFinished()) {
Instance newInstance, instance;
while((instance=ntob.output())!=null) {
if(!m_OutputFormatDefined)
setOutputFormat();
newInstance = convertInstance(instance);
push(newInstance);
}
ntob = null;
}
}
m_OutputFormatDefined=false;
return super.batchFinished();
}
/** Sets the output format */
private void setOutputFormat() {
Instances currentFormat;
if(ntob!=null) {
currentFormat = ntob.getOutputFormat();
}
else
currentFormat = getInputFormat();
if(m_percent>0)
{ m_k = (int) ((getInputFormat().numAttributes()-1)*m_percent);
// System.out.print("numAtts: "+currentFormat.numAttributes());
// System.out.print("percent: "+m_percent);
// System.out.print("percent*numAtts: "+(currentFormat.numAttributes()*m_percent));
// System.out.println("m_k: "+m_k);
}
Instances newFormat;
int newClassIndex=-1;
FastVector attributes = new FastVector();
for(int i=0; i<m_k; i++) {
attributes.addElement( new Attribute("K"+(i+1)) );
}
if(currentFormat.classIndex()!=-1) { //if classindex is set
//attributes.removeElementAt(attributes.size()-1);
attributes.addElement(currentFormat.attribute(currentFormat.classIndex()));
newClassIndex = attributes.size()-1;
}
newFormat = new Instances(currentFormat.relationName(), attributes, 0);
if(newClassIndex!=-1)
newFormat.setClassIndex(newClassIndex);
m_OutputFormatDefined=true;
r = new Random();
r.setSeed(m_rndmSeed);
rmatrix = new double[m_k][currentFormat.numAttributes()];
if(m_distribution==GAUSSIAN) {
for(int i=0; i<rmatrix.length; i++)
for(int j=0; j<rmatrix[i].length; j++)
rmatrix[i][j] = r.nextGaussian();
}
else {
boolean useDstrWithZero = (m_distribution==SPARSE1);
for(int i=0; i<rmatrix.length; i++)
for(int j=0; j<rmatrix[i].length; j++)
rmatrix[i][j] = rndmNum(useDstrWithZero);
}
setOutputFormat(newFormat);
}
/** converts a single instance to the required format */
private Instance convertInstance(Instance currentInstance) {
Instance newInstance;
double vals[] = new double[getOutputFormat().numAttributes()];
int classIndex = (ntob==null) ? getInputFormat().classIndex():ntob.getOutputFormat().classIndex();
int attNum = m_k;
//double d = Math.sqrt(1D/attNum);
for(int i=0; i<attNum; i++) {
boolean ismissing=false;
for(int j=0; j<currentInstance.numValues(); j++) {
if(classIndex!=-1 && j==classIndex) //ignore the class value for now
continue;
if(!currentInstance.isMissing(j)) {
vals[i] += rmatrix[i][j] * currentInstance.value(j);
}
//else {
// ismissing=true;
// vals[i] = currentInstance.missingValue();
// break;
//}
}
//if(ismissing)
// break;
}
if(classIndex!=-1) {
vals[m_k] = currentInstance.value(classIndex);
}
if(currentInstance instanceof SparseInstance) {
newInstance = new SparseInstance(currentInstance.weight(), vals);
}
else {
newInstance = new Instance(currentInstance.weight(), vals);
}
newInstance.setDataset(getOutputFormat());
return newInstance;
}
private static final int weights[] = {1, 1, 4};
private static final int vals[] = {-1, 1, 0};
private static final int weights2[] = {1, 1};
private static final int vals2[] = {-1, 1};
private static final double sqrt3 = Math.sqrt(3);
/**
returns a double x such that
x = sqrt(3) * { -1 with prob. 1/6, 0 with prob. 2/3, 1 with prob. 1/6 }
*/
private double rndmNum(boolean useDstrWithZero) {
if(useDstrWithZero)
return sqrt3 * vals[weightedDistribution(weights)];
else
return vals2[weightedDistribution(weights2)];
}
/** Calculates a weighted distribution */
private int weightedDistribution(int [] weights) {
int sum=0;
for(int i=0; i<weights.length; i++)
sum += weights[i];
int val = (int)Math.floor(r.nextDouble()*sum);
for(int i=0; i<weights.length; i++) {
val -= weights[i];
if(val<0)
return i;
}
return -1;
}
/**
* Main method for testing this class.
*
* @param argv should contain arguments to the filter:
* use -h for help
*/
public static void main(String [] argv) {
try {
if (Utils.getFlag('b', argv)) {
Filter.batchFilterFile(new RandomProjection(), argv);
} else {
Filter.filterFile(new RandomProjection(), argv);
}
} catch (Exception ex) {
System.out.println(ex.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -