⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 form1.cs

📁 基于决策树和贝叶斯的预测分析器
💻 CS
📖 第 1 页 / 共 5 页
字号:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Collections;
using DecisionTree;

namespace DecisionTreeAlgorithm
{
    public partial class Form1 : Form
    {
        enum attributeSelectionMethodType { InformationGain, GainRate, Gini };
        enum ageType { year0_20, year21_30, year31_40, year41_50, year51_60, yearover60 };
        enum educationType { edu1_3, edu4_6, edu7_9, edu10_12, edu13_14, edu15_16 };
        enum occupationType {Tech_support, Craft_repair, Other_service, Sales, Exec_managerial, Prof_specialty,
            Handlers_cleaners, Machine_op_inspct, Adm_clerical, Farming_fishing, Transport_moving,
            Priv_house_serv, Protective_serv, Armed_Forces};
        enum sexType { Male, Female };
        enum native_countryType { developNO1, developNO2, developNO3, developNO4, developNO5};
        enum makeover50kType { yes, no };

        /* 1、 ageType分6类:<=20为year0_20,21-30为year21_30,31-40为year31_40,41-50为year41_50,51-60为year51_60,>=61为yearover60
           2、 原有education_numType: 1-16  ,分成6级:   edu1_3,edu4_6,edu7_9,edu10_12,edu13_14,edu15_16
           3、 occupationType共14类:Tech_support, Craft_repair, Other_service, Sales, Exec_managerial, Prof_specialty,
                   Handlers_cleaners, Machine_op_inspct, Adm_clerical, Farming_fishing, Transport_moving, 
                   Priv_house_serv, Protective_serv, Armed_Forces
           4 、原有native-countryType分成5类developNO1,developNO2,developNO3,developNO4,developNO5后:
              developNO1:(<0.1)       Outlying-US(Guam-USVI-etc),Vietnam,Mexico,Dominican-Republic,Laos,Haiti,Hungary,
				                 Guatemala,Nicaragua,Scotland,El-Salvador,Trinadad&Tobago,Holand-Netherlands	
              developNO2:(>=0.1 <0.2)  Puerto-Rico,South,China,Cuba,Poland,Jamaica,Portugal,Ireland,Ecuador,Peru,?
              developNO3:(>=0.2 <0.3)  Honduras,France,Columbia,United-States,England,Germany,Greece,Philippines,Thailand,         
                                  Yugoslavia
              developNO4:(>=0.3 <0.4)  India.Japan.
              developNO5:(>=0.4 )      Cambodia,Canada,Iran,Italy,Taiwan,Hong      
           5、sexType分为Male、Female两类
         */

        struct Item
        {
            public int ID;
            public ageType age;
            public educationType education;
            public occupationType occupation;
            public sexType sex;
            public native_countryType native_country;
            public makeover50kType makeover50k;
        }

        private attributeSelectionMethodType attributeSelectionMethod;      //属性选择度量模式
        public ArrayList al;                                                //测试数据
        private ArrayList attributeList;
        

        public Form1()
        {
            InitializeComponent();

        }

        private void Form1_Load(object sender, EventArgs e)
        {
            // TODO: 这行代码将数据加载到表“adult_dataDataSet.traindata2”中。您可以根据需要移动或移除它。
            this.traindata2TableAdapter.Fill(this.adult_dataDataSet.traindata2);
            // TODO: 这行代码将数据加载到表“adult_dataDataSet.traindata1”中。您可以根据需要移动或移除它。
            this.traindata1TableAdapter.Fill(this.adult_dataDataSet.traindata1);
            // TODO: 这行代码将数据加载到表“adult_dataDataSet.testdata2”中。您可以根据需要移动或移除它。
            this.testdata2TableAdapter.Fill(this.adult_dataDataSet.testdata2);
            // TODO: 这行代码将数据加载到表“adult_dataDataSet.testdata1”中。您可以根据需要移动或移除它。
            this.testdata1TableAdapter.Fill(this.adult_dataDataSet.testdata1);
           
            attributeSelectionMethod = attributeSelectionMethodType.InformationGain;
            al = new ArrayList();
            attributeList = new ArrayList();

        }



        private void databasetoArray()
        {
            Item testItem;
            int rownum = dataGridView1.RowCount;
            int i;
            al.Clear();
            for (i = 0; i < rownum - 1; i++)
            {
                testItem = new Item();
                testItem.ID = al.Count + 1;
                testItem.age = (ageType)Enum.Parse(typeof(ageType), dataGridView1.Rows[i].Cells[0].Value.ToString());
                testItem.education = (educationType)Enum.Parse(typeof(educationType), dataGridView1.Rows[i].Cells[1].Value.ToString());
                testItem.occupation = (occupationType)Enum.Parse(typeof(occupationType), dataGridView1.Rows[i].Cells[2].Value.ToString());
                testItem.sex = (sexType)Enum.Parse(typeof(sexType), dataGridView1.Rows[i].Cells[3].Value.ToString());
                testItem.native_country = (native_countryType)Enum.Parse(typeof(native_countryType), dataGridView1.Rows[i].Cells[4].Value.ToString());
                testItem.makeover50k = (makeover50kType)Enum.Parse(typeof(makeover50kType), dataGridView1.Rows[i].Cells[5].Value.ToString());
                al.Add(testItem);
            }

        }



        private void infomationGainToolStripMenuItem_Click(object sender, EventArgs e)
        {
            this.infomationGainToolStripMenuItem.Checked = true;
            this.gainRateToolStripMenuItem.Checked = false;
            this.giniToolStripMenuItem.Checked = false;
            attributeSelectionMethod = attributeSelectionMethodType.InformationGain;
            CreateTree();

        }

        private void gainRateToolStripMenuItem_Click(object sender, EventArgs e)
        {
            this.infomationGainToolStripMenuItem.Checked = false;
            this.gainRateToolStripMenuItem.Checked = true;
            this.giniToolStripMenuItem.Checked = false;
            attributeSelectionMethod = attributeSelectionMethodType.GainRate;
            CreateTree();
        }

        private void giniToolStripMenuItem_Click(object sender, EventArgs e)
        {
            this.infomationGainToolStripMenuItem.Checked = false;
            this.gainRateToolStripMenuItem.Checked = false;
            this.giniToolStripMenuItem.Checked = true;
            attributeSelectionMethod = attributeSelectionMethodType.Gini;
            CreateTree();
        }

        private void CreateTree()
        {
            if (istraindata2 == false ) //确保选择的是“清理后的训练数据集”
            {
                MessageBox.Show("请先输入清理后的训练数据集,并重新选择决策树度量属性");
                testtextBox.Text = "";

            }
            else
            {
                switch (attributeSelectionMethod)
                {
                    case attributeSelectionMethodType.InformationGain:
                        break;
                    case attributeSelectionMethodType.GainRate:
                        break;
                    case attributeSelectionMethodType.Gini:
                        break;
                    default:
                        MessageBox.Show("请选择正确的决策树度量属性");
                        break;
                }
            
            this.DecisionTree.Nodes.Clear();
            testtextBox.Text = "";

            attributeList.Clear();
            attributeList.Add("age");
            attributeList.Add("education");
            attributeList.Add("occupation");
            attributeList.Add("sex");
            attributeList.Add("native_country");
            this.DecisionTree.Nodes.Add(GenerateDecisionTree(al, attributeList));
            }
           
        }

        
        //算法:Generate_decision_tree。由数据划分D的训练元组产生决策树。
        private TreeNode GenerateDecisionTree(ArrayList d, ArrayList attribute_list)
        {
            //(1)  创建一个节点N
            TreeNode temp = new TreeNode(); 

            //(2)  ifD中的元组都是同一类C then
            string C = FindSameClass(d);
            if (C != null)

            //(3)  返回N作为叶节点,以类C标记
            {
                temp.Text = C;
                return temp;
            }

            //(4)  if attribute_list为空 then
            if (attribute_list.Count == 0)

            //(5)  返回N作为叶子节点,标记为D中的多数类;//多数表决
            {
                temp.Text = FindMoreClass(d);
                return temp;
            }

            //(6)  使用attribute_selection_method(D,attribute_list),找出“最好”的splitting_criterion
            SplitCriterion splittingCriterion = AttributeSelectionMethod(d, attribute_list);

            //(7)  用splitting_criterion标记节点N
            temp.Text = splittingCriterion.attribute;

            //(8)  if splitting_attribute是离散值的并且允许多路划分 then //不限于二叉树
            if (true)//假定总是离散的并且允许多路划分

            //(9)  attribute_list<--attribute_list-splitting_attribute //删除划分属性
            {
                attribute_list.Remove(splittingCriterion.attribute);
            }

            //(10) for splitting_criterion的每个输出j //划分元组并对每个划分产生子树            
            for (int i = 0; i < splittingCriterion.splitList.Count; i++)
            {
                //(11) 设Dj是D中满足输出j的数据元组的集合 //一个划分
                ArrayList Dj = (ArrayList)splittingCriterion.splitList[i];

                //(12) if Dj为空 then
                if (Dj.Count == 0)

                //(13) 加一个树叶到节点N,标记为D中的多数类
                {
                    TreeNode leaf = new TreeNode();
                    leaf.Text = FindMoreClass(d);
                    temp.Nodes.Add(leaf);
                }

                //(14) else 加一个由 Generate_decision_tree(Dj,attribute_list)返回的节点到节点N
                else
                {
                    TreeNode leaf = GenerateDecisionTree(Dj, new ArrayList(attribute_list));
                    leaf.Text = splittingCriterion.descriptionList[i].ToString() + "&&" + leaf.Text;
                    temp.Nodes.Add(leaf);
                }

                //     end for
            }

            //(15) 返回N
            return temp;
        }

        //寻找D中的同一类C,找到返回C的名称,找不到返回NULL。
        private string FindSameClass(ArrayList d)
        {
            Item temp = new Item();
            bool find;

            find = true;
            for (int i = 0; i < d.Count; i++)
            {
                if (i == 0)
                {
                    temp.makeover50k = ((Item)d[i]).makeover50k;
                }
                else
                {
                    if (temp.makeover50k != ((Item)d[i]).makeover50k)
                    {
                        find = false;
                        break;
                    }
                }
            }
            if (find) return " => makeover50k=" + temp.makeover50k.ToString();

            return null;
        }

        //寻找D中的多数类
        private string FindMoreClass(ArrayList d)
        {
            int count = 0;
            string temp = " => makeover50k=";
            for (int i = 0; i < d.Count; i++)
            {
                if (((Item)d[i]).makeover50k == makeover50kType.yes)
                    count++;
            }
            if (2 * count >= d.Count)
                return temp + "yes";
            else
                return temp + "no";

        }

        //找出“最好”的splitting_criterion
        private SplitCriterion AttributeSelectionMethod(ArrayList d, ArrayList attribute_list)
        {
            SplitCriterion splitting_criterion = new SplitCriterion();
            switch (attributeSelectionMethod)
            {
                case attributeSelectionMethodType.InformationGain:
                    //double info;           //按类划分的期望信息
                    double infoA;          //按A划分的期望信息
                    //double gainA;           //信息增益

                    //计算info
                    /*
                    info = Info(d);
                    */

                    //计算最小的InfoA和选择的属性
                    infoA = 1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -