📄 form1.cs
字号:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Collections;
using DecisionTree;
namespace DecisionTreeAlgorithm
{
public partial class Form1 : Form
{
enum attributeSelectionMethodType { InformationGain, GainRate, Gini };
enum ageType { year0_20, year21_30, year31_40, year41_50, year51_60, yearover60 };
enum educationType { edu1_3, edu4_6, edu7_9, edu10_12, edu13_14, edu15_16 };
enum occupationType {Tech_support, Craft_repair, Other_service, Sales, Exec_managerial, Prof_specialty,
Handlers_cleaners, Machine_op_inspct, Adm_clerical, Farming_fishing, Transport_moving,
Priv_house_serv, Protective_serv, Armed_Forces};
enum sexType { Male, Female };
enum native_countryType { developNO1, developNO2, developNO3, developNO4, developNO5};
enum makeover50kType { yes, no };
/* 1、 ageType分6类:<=20为year0_20,21-30为year21_30,31-40为year31_40,41-50为year41_50,51-60为year51_60,>=61为yearover60
2、 原有education_numType: 1-16 ,分成6级: edu1_3,edu4_6,edu7_9,edu10_12,edu13_14,edu15_16
3、 occupationType共14类:Tech_support, Craft_repair, Other_service, Sales, Exec_managerial, Prof_specialty,
Handlers_cleaners, Machine_op_inspct, Adm_clerical, Farming_fishing, Transport_moving,
Priv_house_serv, Protective_serv, Armed_Forces
4 、原有native-countryType分成5类developNO1,developNO2,developNO3,developNO4,developNO5后:
developNO1:(<0.1) Outlying-US(Guam-USVI-etc),Vietnam,Mexico,Dominican-Republic,Laos,Haiti,Hungary,
Guatemala,Nicaragua,Scotland,El-Salvador,Trinadad&Tobago,Holand-Netherlands
developNO2:(>=0.1 <0.2) Puerto-Rico,South,China,Cuba,Poland,Jamaica,Portugal,Ireland,Ecuador,Peru,?
developNO3:(>=0.2 <0.3) Honduras,France,Columbia,United-States,England,Germany,Greece,Philippines,Thailand,
Yugoslavia
developNO4:(>=0.3 <0.4) India.Japan.
developNO5:(>=0.4 ) Cambodia,Canada,Iran,Italy,Taiwan,Hong
5、sexType分为Male、Female两类
*/
struct Item
{
public int ID;
public ageType age;
public educationType education;
public occupationType occupation;
public sexType sex;
public native_countryType native_country;
public makeover50kType makeover50k;
}
private attributeSelectionMethodType attributeSelectionMethod; //属性选择度量模式
public ArrayList al; //测试数据
private ArrayList attributeList;
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
// TODO: 这行代码将数据加载到表“adult_dataDataSet.traindata2”中。您可以根据需要移动或移除它。
this.traindata2TableAdapter.Fill(this.adult_dataDataSet.traindata2);
// TODO: 这行代码将数据加载到表“adult_dataDataSet.traindata1”中。您可以根据需要移动或移除它。
this.traindata1TableAdapter.Fill(this.adult_dataDataSet.traindata1);
// TODO: 这行代码将数据加载到表“adult_dataDataSet.testdata2”中。您可以根据需要移动或移除它。
this.testdata2TableAdapter.Fill(this.adult_dataDataSet.testdata2);
// TODO: 这行代码将数据加载到表“adult_dataDataSet.testdata1”中。您可以根据需要移动或移除它。
this.testdata1TableAdapter.Fill(this.adult_dataDataSet.testdata1);
attributeSelectionMethod = attributeSelectionMethodType.InformationGain;
al = new ArrayList();
attributeList = new ArrayList();
}
private void databasetoArray()
{
Item testItem;
int rownum = dataGridView1.RowCount;
int i;
al.Clear();
for (i = 0; i < rownum - 1; i++)
{
testItem = new Item();
testItem.ID = al.Count + 1;
testItem.age = (ageType)Enum.Parse(typeof(ageType), dataGridView1.Rows[i].Cells[0].Value.ToString());
testItem.education = (educationType)Enum.Parse(typeof(educationType), dataGridView1.Rows[i].Cells[1].Value.ToString());
testItem.occupation = (occupationType)Enum.Parse(typeof(occupationType), dataGridView1.Rows[i].Cells[2].Value.ToString());
testItem.sex = (sexType)Enum.Parse(typeof(sexType), dataGridView1.Rows[i].Cells[3].Value.ToString());
testItem.native_country = (native_countryType)Enum.Parse(typeof(native_countryType), dataGridView1.Rows[i].Cells[4].Value.ToString());
testItem.makeover50k = (makeover50kType)Enum.Parse(typeof(makeover50kType), dataGridView1.Rows[i].Cells[5].Value.ToString());
al.Add(testItem);
}
}
private void infomationGainToolStripMenuItem_Click(object sender, EventArgs e)
{
this.infomationGainToolStripMenuItem.Checked = true;
this.gainRateToolStripMenuItem.Checked = false;
this.giniToolStripMenuItem.Checked = false;
attributeSelectionMethod = attributeSelectionMethodType.InformationGain;
CreateTree();
}
private void gainRateToolStripMenuItem_Click(object sender, EventArgs e)
{
this.infomationGainToolStripMenuItem.Checked = false;
this.gainRateToolStripMenuItem.Checked = true;
this.giniToolStripMenuItem.Checked = false;
attributeSelectionMethod = attributeSelectionMethodType.GainRate;
CreateTree();
}
private void giniToolStripMenuItem_Click(object sender, EventArgs e)
{
this.infomationGainToolStripMenuItem.Checked = false;
this.gainRateToolStripMenuItem.Checked = false;
this.giniToolStripMenuItem.Checked = true;
attributeSelectionMethod = attributeSelectionMethodType.Gini;
CreateTree();
}
private void CreateTree()
{
if (istraindata2 == false ) //确保选择的是“清理后的训练数据集”
{
MessageBox.Show("请先输入清理后的训练数据集,并重新选择决策树度量属性");
testtextBox.Text = "";
}
else
{
switch (attributeSelectionMethod)
{
case attributeSelectionMethodType.InformationGain:
break;
case attributeSelectionMethodType.GainRate:
break;
case attributeSelectionMethodType.Gini:
break;
default:
MessageBox.Show("请选择正确的决策树度量属性");
break;
}
this.DecisionTree.Nodes.Clear();
testtextBox.Text = "";
attributeList.Clear();
attributeList.Add("age");
attributeList.Add("education");
attributeList.Add("occupation");
attributeList.Add("sex");
attributeList.Add("native_country");
this.DecisionTree.Nodes.Add(GenerateDecisionTree(al, attributeList));
}
}
//算法:Generate_decision_tree。由数据划分D的训练元组产生决策树。
private TreeNode GenerateDecisionTree(ArrayList d, ArrayList attribute_list)
{
//(1) 创建一个节点N
TreeNode temp = new TreeNode();
//(2) ifD中的元组都是同一类C then
string C = FindSameClass(d);
if (C != null)
//(3) 返回N作为叶节点,以类C标记
{
temp.Text = C;
return temp;
}
//(4) if attribute_list为空 then
if (attribute_list.Count == 0)
//(5) 返回N作为叶子节点,标记为D中的多数类;//多数表决
{
temp.Text = FindMoreClass(d);
return temp;
}
//(6) 使用attribute_selection_method(D,attribute_list),找出“最好”的splitting_criterion
SplitCriterion splittingCriterion = AttributeSelectionMethod(d, attribute_list);
//(7) 用splitting_criterion标记节点N
temp.Text = splittingCriterion.attribute;
//(8) if splitting_attribute是离散值的并且允许多路划分 then //不限于二叉树
if (true)//假定总是离散的并且允许多路划分
//(9) attribute_list<--attribute_list-splitting_attribute //删除划分属性
{
attribute_list.Remove(splittingCriterion.attribute);
}
//(10) for splitting_criterion的每个输出j //划分元组并对每个划分产生子树
for (int i = 0; i < splittingCriterion.splitList.Count; i++)
{
//(11) 设Dj是D中满足输出j的数据元组的集合 //一个划分
ArrayList Dj = (ArrayList)splittingCriterion.splitList[i];
//(12) if Dj为空 then
if (Dj.Count == 0)
//(13) 加一个树叶到节点N,标记为D中的多数类
{
TreeNode leaf = new TreeNode();
leaf.Text = FindMoreClass(d);
temp.Nodes.Add(leaf);
}
//(14) else 加一个由 Generate_decision_tree(Dj,attribute_list)返回的节点到节点N
else
{
TreeNode leaf = GenerateDecisionTree(Dj, new ArrayList(attribute_list));
leaf.Text = splittingCriterion.descriptionList[i].ToString() + "&&" + leaf.Text;
temp.Nodes.Add(leaf);
}
// end for
}
//(15) 返回N
return temp;
}
//寻找D中的同一类C,找到返回C的名称,找不到返回NULL。
private string FindSameClass(ArrayList d)
{
Item temp = new Item();
bool find;
find = true;
for (int i = 0; i < d.Count; i++)
{
if (i == 0)
{
temp.makeover50k = ((Item)d[i]).makeover50k;
}
else
{
if (temp.makeover50k != ((Item)d[i]).makeover50k)
{
find = false;
break;
}
}
}
if (find) return " => makeover50k=" + temp.makeover50k.ToString();
return null;
}
//寻找D中的多数类
private string FindMoreClass(ArrayList d)
{
int count = 0;
string temp = " => makeover50k=";
for (int i = 0; i < d.Count; i++)
{
if (((Item)d[i]).makeover50k == makeover50kType.yes)
count++;
}
if (2 * count >= d.Count)
return temp + "yes";
else
return temp + "no";
}
//找出“最好”的splitting_criterion
private SplitCriterion AttributeSelectionMethod(ArrayList d, ArrayList attribute_list)
{
SplitCriterion splitting_criterion = new SplitCriterion();
switch (attributeSelectionMethod)
{
case attributeSelectionMethodType.InformationGain:
//double info; //按类划分的期望信息
double infoA; //按A划分的期望信息
//double gainA; //信息增益
//计算info
/*
info = Info(d);
*/
//计算最小的InfoA和选择的属性
infoA = 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -