⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 form1.cs

📁 基于决策树和贝叶斯的预测分析器
💻 CS
📖 第 1 页 / 共 5 页
字号:
                        developNO3++;
                        break;
                    case native_countryType.developNO4:
                        developNO4++;
                        break;
                    case native_countryType.developNO5:
                        developNO5++;
                        break;
                    default:
                        MessageBox.Show("无此国家类别");
                        break;
                }
            }
            return ( - developNO1 * Math.Log(developNO1 / d.Count, 2)
                     - developNO2 * Math.Log(developNO2 / d.Count, 2) 
                     - developNO3 * Math.Log(developNO3 / d.Count, 2) 
                     - developNO4 * Math.Log(developNO4 / d.Count, 2) 
                     - developNO5 * Math.Log(developNO5 / d.Count, 2) ) / d.Count;
        }

        //按属性A计算SplitInfo
        private double SplitInfoA(string a, ArrayList d)
        {
            switch (a)
            {
                case "age":
                    return SplitInfoage(d);
                case "education":
                    return SplitInfoeducation(d);
                case "occupation":
                    return SplitInfooccupation(d);
                case "sex":
                    return SplitInfosex(d);
                case "native_country":
                    return SplitInfonative_country(d);
                default:
                    MessageBox.Show("无此属性");
                    break;
            }
            return 0;
        }

        //计算平均增益SplitInfo
        private double SplitInfo(ArrayList d)
        {
            return 0;
        }

        //计算"age"属性的Gini
        //ageType { year0_20, year21_30, year31_40, year41_50, year51_60, yearover60 }
        private GiniSplit Giniage(ArrayList d)
        {
            double gini;
            ArrayList d1, d2, d3, d4, d5, d6;
            ArrayList D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12;
            GiniSplit giniSplit = new GiniSplit();
            giniSplit.gini = 1;
            D1 = new ArrayList();
            D2 = new ArrayList();
            D3 = new ArrayList();
            D4 = new ArrayList();
            D5 = new ArrayList();
            D6 = new ArrayList();
            D7 = new ArrayList();
            D8 = new ArrayList();
            D9 = new ArrayList();
            D10 = new ArrayList();
            D11 = new ArrayList();
            D12 = new ArrayList();


            d1 = CopySmallCollection(d, "age", ageType.year0_20);
            d2 = CopySmallCollection(d, "age", ageType.year21_30);
            d3 = CopySmallCollection(d, "age", ageType.year31_40);
            d4 = CopySmallCollection(d, "age", ageType.year41_50);
            d5 = CopySmallCollection(d, "age", ageType.year51_60);
            d6 = CopySmallCollection(d, "age", ageType.yearover60);

            //计算"year0_20"+"year21_30, year31_40, year41_50, year51_60, yearover60"的gini
            D1.Clear();
            D2.Clear();
            D1.AddRange(d1);
            D2.AddRange(d2);
            D2.AddRange(d3);
            D2.AddRange(d4);
            D2.AddRange(d5);
            D2.AddRange(d6);

            gini = (double)D1.Count / d.Count * Gini(D1) + (double)D2.Count / d.Count * Gini(D2);
            if (gini < giniSplit.gini)
            {
                giniSplit.giniSplitCriterion.descriptionList.Clear();
                giniSplit.giniSplitCriterion.splitList.Clear();
                giniSplit.gini = gini;
                giniSplit.giniSplitCriterion.attribute = "age";
                giniSplit.giniSplitCriterion.descriptionList.Add("year0_20");
                giniSplit.giniSplitCriterion.descriptionList.Add("year21_30, year31_40, year41_50, year51_60, yearover60");
                giniSplit.giniSplitCriterion.splitList.Add(D1);
                giniSplit.giniSplitCriterion.splitList.Add(D2);
            }
            else
            {
                D1.Clear();
                D2.Clear();
            }


            //计算"year21_30"+"year0_20, year31_40, year41_50, year51_60, yearover60"的gini
            D3.Clear();
            D4.Clear();
            D3.AddRange(d2);
            D4.AddRange(d1);
            D4.AddRange(d3);
            D4.AddRange(d4);
            D4.AddRange(d5);
            D4.AddRange(d6);

            gini = (double)D3.Count / d.Count * Gini(D3) + (double)D4.Count / d.Count * Gini(D4);
            if (gini < giniSplit.gini)
            {
                giniSplit.giniSplitCriterion.descriptionList.Clear();
                giniSplit.giniSplitCriterion.splitList.Clear();
                giniSplit.gini = gini;
                giniSplit.giniSplitCriterion.attribute = "age";
                giniSplit.giniSplitCriterion.descriptionList.Add("year21_30");
                giniSplit.giniSplitCriterion.descriptionList.Add("year0_20, year31_40, year41_50, year51_60, yearover60");
                giniSplit.giniSplitCriterion.splitList.Add(D3);
                giniSplit.giniSplitCriterion.splitList.Add(D4);
            }
            else
            {
                D3.Clear();
                D4.Clear();
            }

            //计算"year31_40"+"year0_20, year21_30, year41_50, year51_60, yearover60"的gini
            D5.Clear();
            D6.Clear();
            D5.AddRange(d3);
            D6.AddRange(d1);
            D6.AddRange(d2);
            D6.AddRange(d4);
            D6.AddRange(d5);
            D6.AddRange(d6);

            gini = (double)D5.Count / d.Count * Gini(D5) + (double)D6.Count / d.Count * Gini(D6);
            if (gini < giniSplit.gini)
            {
                giniSplit.giniSplitCriterion.descriptionList.Clear();
                giniSplit.giniSplitCriterion.splitList.Clear();
                giniSplit.gini = gini;
                giniSplit.giniSplitCriterion.attribute = "age";
                giniSplit.giniSplitCriterion.descriptionList.Add("year31_40");
                giniSplit.giniSplitCriterion.descriptionList.Add("year0_20, year21_30, year41_50, year51_60, yearover60");
                giniSplit.giniSplitCriterion.splitList.Add(D5);
                giniSplit.giniSplitCriterion.splitList.Add(D6);
            }
            else
            {
                D5.Clear();
                D6.Clear();
            }
            //计算"year41_50"+"year0_20, year21_30, year31_40, year51_60, yearover60"的gini
            D7.Clear();
            D8.Clear();
            D7.AddRange(d4);
            D8.AddRange(d1);
            D8.AddRange(d2);
            D8.AddRange(d3);
            D8.AddRange(d5);
            D8.AddRange(d6);

            gini = (double)D7.Count / d.Count * Gini(D7) + (double)D8.Count / d.Count * Gini(D8);
            if (gini < giniSplit.gini)
            {
                giniSplit.giniSplitCriterion.descriptionList.Clear();
                giniSplit.giniSplitCriterion.splitList.Clear();
                giniSplit.gini = gini;
                giniSplit.giniSplitCriterion.attribute = "age";
                giniSplit.giniSplitCriterion.descriptionList.Add("year41_50");
                giniSplit.giniSplitCriterion.descriptionList.Add("year0_20, year21_30, year31_40, year51_60, yearover60");
                giniSplit.giniSplitCriterion.splitList.Add(D7);
                giniSplit.giniSplitCriterion.splitList.Add(D8);
            }
            else
            {
                D7.Clear();
                D8.Clear();
            }
            //计算"year51_60"+"year0_20, year21_30, year31_40, year41_50, yearover60"的gini
            D9.Clear();
            D10.Clear();
            D9.AddRange(d5);
            D10.AddRange(d1);
            D10.AddRange(d2);
            D10.AddRange(d3);
            D10.AddRange(d4);
            D10.AddRange(d6);

            gini = (double)D9.Count / d.Count * Gini(D9) + (double)D10.Count / d.Count * Gini(D10);
            if (gini < giniSplit.gini)
            {
                giniSplit.giniSplitCriterion.descriptionList.Clear();
                giniSplit.giniSplitCriterion.splitList.Clear();
                giniSplit.gini = gini;
                giniSplit.giniSplitCriterion.attribute = "age";
                giniSplit.giniSplitCriterion.descriptionList.Add("year51_60");
                giniSplit.giniSplitCriterion.descriptionList.Add("year0_20, year21_30, year31_40, year41_50, yearover60");
                giniSplit.giniSplitCriterion.splitList.Add(D9);
                giniSplit.giniSplitCriterion.splitList.Add(D10);
            }
            else
            {
                D9.Clear();
                D10.Clear();
            }
            //计算"yearover60"+"year0_20, year21_30, year31_40, year41_50, year51_60"的gini
            D11.Clear();
            D12.Clear();
            D11.AddRange(d6);
            D12.AddRange(d1);
            D12.AddRange(d2);
            D12.AddRange(d3);
            D12.AddRange(d4);
            D12.AddRange(d5);

            gini = (double)D11.Count / d.Count * Gini(D11) + (double)D12.Count / d.Count * Gini(D12);
            if (gini < giniSplit.gini)
            {
                giniSplit.giniSplitCriterion.descriptionList.Clear();
                giniSplit.giniSplitCriterion.splitList.Clear();
                giniSplit.gini = gini;
                giniSplit.giniSplitCriterion.attribute = "age";
                giniSplit.giniSplitCriterion.descriptionList.Add("yearover60");
                giniSplit.giniSplitCriterion.descriptionList.Add("year0_20, year21_30, year31_40, year41_50, year51_60");
                giniSplit.giniSplitCriterion.splitList.Add(D11);
                giniSplit.giniSplitCriterion.splitList.Add(D12);
            }
            else
            {
                D11.Clear();
                D12.Clear();
            }
            return giniSplit;
        }
        //计算"education"属性的Gini
        //educationType { edu1_3, edu4_6, edu7_9, edu10_12, edu13_14, edu15_16 }
        private GiniSplit Ginieducation(ArrayList d)
        {
            double gini;
            ArrayList d1, d2, d3, d4, d5, d6;
            ArrayList D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12;
            GiniSplit giniSplit = new GiniSplit();
            giniSplit.gini = 1;
            D1 = new ArrayList();
            D2 = new ArrayList();
            D3 = new ArrayList();
            D4 = new ArrayList();
            D5 = new ArrayList();
            D6 = new ArrayList();
            D7 = new ArrayList();
            D8 = new ArrayList();
            D9 = new ArrayList();
            D10 = new ArrayList();
            D11 = new ArrayList();
            D12 = new ArrayList();

            d1 = CopySmallCollection(d, "education", educationType.edu1_3);
            d2 = CopySmallCollection(d, "education", educationType.edu4_6);
            d3 = CopySmallCollection(d, "education", educationType.edu7_9);
            d4 = CopySmallCollection(d, "education", educationType.edu10_12);
            d5 = CopySmallCollection(d, "education", educationType.edu13_14);
            d6 = CopySmallCollection(d, "education", educationType.edu15_16);

            //计算"edu1_3"+"edu4_6, edu7_9, edu10_12, edu13_14, edu15_16"的gini
            D1.Clear();
            D2.Clear();
            D1.AddRange(d1);
            D2.AddRange(d2);
            D2.AddRange(d3);
            D2.AddRange(d4);
            D2.AddRange(d5);
            D2.AddRange(d6);

            gini = (double)D1.Count / d.Count * Gini(D1) + (double)D2.Count / d.Count * Gini(D2);
            if (gini < giniSplit.gini)
            {
                giniSplit.giniSplitCriterion.descriptionList.Clear();
                giniSplit.giniSplitCriterion.splitList.Clear();
                giniSplit.gini = gini;
                giniSplit.giniSplitCriterion.attribute = "education";
                giniSplit.giniSplitCriterion.descriptionList.Add("edu1_3");
                giniSplit.giniSplitCriterion.descriptionList.Add("edu4_6, edu7_9, edu10_12, edu13_14, edu15_16");
                giniSplit.giniSplitCriterion.splitList.Add(D1);
                giniSplit.giniSplitCriterion.splitList.Add(D2);
            }
            else
            {
                D1.Clear();
                D2.Clear();
            }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -