📄 sun.java
字号:
import java.io.*;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.StringTokenizer;
public class ReadTrain {
//定义文件路径
private String filePath = "E:/data_set_ALL_AML_train.txt";
//临时存放读出的一行string
private String dataString;
//用于排序,合并连续值属性的临时空间
private int[][] temp = new int[2][38];
//定义目标属性熵
public double last_entropy;
//AML在数组里用1表示,ALL在数组里用0表示
public double AML = 11.0;
public double ALL = 27.0;
//记录最大的信息增益
public double max_max_value = 0;
//root的位置,即列好
public int root;
public static ArrayList external = new ArrayList();
ReadTrain(){
}
//计算熵值***********************
//******************************
public double comput_entropy(int p,int m,double num){
double num1;
double num2;
if(p == 0 || m == 0){
return 0.0;
}
else{
num1 = -(p/num)*(Math.log(p/num)/Math.log(2.0));
num2 = -(m/num)*(Math.log(m/num)/Math.log(2.0));
return num1+num2;
}
}
public void Read_in_train(){
try{
FileInputStream fis = new FileInputStream(filePath);
InputStreamReader isr = new InputStreamReader(fis);
BufferedReader br = new BufferedReader(isr);
int j = 0;
//从第二行开始
br.readLine();
while((dataString = br.readLine()) != null){
StringTokenizer column = new StringTokenizer(dataString," ");
//从第三列开始
column.nextToken();
column.nextToken();
int i = 0;
while(column.hasMoreTokens()){
DecisionTree.primal[i][j] = Integer.parseInt(column.nextToken());
i++;
}
j++;
}
}catch(Exception e){
System.out.println(e);
}
//创建目标属性,0代表ALL,1代表AML
for(int i = 0;i< 38;i++){
if(i<27)
DecisionTree.primal[i][7129] = 0;
else
DecisionTree.primal[i][7129] = 1;
}
last_entropy = comput_entropy(27,11,38.0);
//System.out.println(DecisionTree.primal[28][7122]);
}
/*
*
*
*/
//将数组里的数全不转换成布尔类型
public void Make_boolean(){
//for(int j = 0;j<1;j++){
for(int j = 0;j<7129;j++){
for(int i = 0;i<38;i++){
temp[0][i] = DecisionTree.primal[i][j];
temp[1][i] = DecisionTree.primal[i][7129];
}
//定义交换临时空间
int a;
int b;
//对temp第一行进行冒泡排序
for(int i = 0;i < 37;i++)
for(int k = 0;k < 37-i;k++){
if(temp[0][k] > temp[0][k+1]){
a = temp[0][k+1];
b = temp[1][k+1];
temp[0][k+1] = temp[0][k];
temp[1][k+1] = temp[1][k];
temp[0][k] = a;
temp[1][k] = b;
}
}
//for(int i = 0;i < 37;i++)
//System.out.println(temp[0][i] + " " + temp[1][i]);
//求阈值
//记录最大阈值
double max_value = 0.0;
double current = 0.0;
//定义阈值
double yuzhi = 0.0;
int positive = 0; //目标属性1的数量
int minus = 0;//目标属性0的数量
double entropy_1;
double entropy_2;
for(int i = 0;i<37;i++){
if(temp[1][i] == 1)
positive++;
else
minus++;
if( temp[1][i] != temp[1][i+1]){
//System.out.println(i + " " + temp[0][i]);
//计算一次点为阈值的信息增益Gain(S,A)
entropy_1 = comput_entropy(positive,minus,(double)(i+1));
entropy_2 = comput_entropy(11-positive,27-minus,(double)(37-i));
//System.out.println(entropy_1);
//System.out.println(entropy_2);
current = last_entropy - entropy_1*(i+1)/38 - entropy_2*(37-i)/38;
//System.out.println(current);
if(current > max_value){
max_value = current;
yuzhi = ( temp[0][i] + temp[0][i+1] )/2;
if(max_value > max_max_value){
//记录不同属性最大的信息增益,记住他的位置即可
max_max_value = max_value;
root = j;
}
}
}
}
DecisionTree.middle[j] = yuzhi;
for(int i= 0;i<38;i++){
if(DecisionTree.primal[i][j] < yuzhi)
DecisionTree.primal[i][j] = 0;
else
DecisionTree.primal[i][j] = 1;
}
}//总的for结束
}//make_boolean方法结束
// 把二维数组做成ArrayList形式,ArrayList对象中做成LinkedList形式:)
// 39行,7130列,前38行为普通属性值,后1行为属性描述,前7129列为属性值,后1行为属性描述
public void Make_List(){
//数组转化成LinkedList,ArrayList
for(int i = 0;i<38;i++){
LinkedList inner = new LinkedList();
for(int j = 0;j<7130;j++){
Integer ob = new Integer(DecisionTree.primal[i][j]);
inner.add(ob);
}
external.add(inner);
}
//最后一行做成标签行
LinkedList label = new LinkedList();
for(int i = 0;i<7129;i++)
label.add(new Integer(i));
external.add(label);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -