⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 program.cs

📁 应用FCM(模糊c均值聚类)算法到文本聚类 采用两种方法计算文本相似度 采用ShootSeg分词 采用sogou互联网词库简化特征值计算
💻 CS
📖 第 1 页 / 共 2 页
字号:
            {
                return this._sogou1;
            }
            set
            {
                this._sogou1 = this.sogou1;
            }
        }
    }

    public class Word
    {
        public Word(string key)
        {
            this._key = key;
        }

        private string _key;

        public string Key
        {
            get { return _key; }
            set { _key = value; }
        }

        private int _wordfrequency;

        public int WordFrequency
        {
            get { return _wordfrequency; }
            set { _wordfrequency = value; }
        }

        private double _characterValue;

        public double CharacterValue
        {
            get { return _characterValue; }
            set { _characterValue = value; }
        }
    }

    public class FCM
    {
        public FCM()
        { }
        public string FCMf(string[] fName, int c, int m, int s, bool sim)
        {
            //输出不相似度矩阵、中心矩阵、隶属度矩阵
            StringBuilder strBuild = new StringBuilder("");
            string strName = "";
            int n = fName.Length;//样本数n
            double[,] rmatrix = new double[n, n];//不相似度矩阵r
            double[,] umatrix = new double[c, n];//隶属度矩阵u
            int[,] cmatrix = new int[s + 1, c];//中心矩阵c
            int[,] fcmatrix = new int[c, n];//聚类结果矩阵
            //计算文本之间的不相似度矩阵rmatrix
            Document Doc = new Document(fName);
            strBuild.Append("文档对应编号:\n");
            for (int j = 0; j < n; j++)
            {
                strName += "\t" + fName[j].Substring(fName[j].LastIndexOf("\\") + 1) + "(" + j + ")" + "\t\n";
            }
            strBuild.Append(strName);
            strBuild.Append("不相似度矩阵rmatrix:\n");
            int ir = 0, jr = 0;
            foreach (Document Doc1 in Doc.Words.Keys)
            {
                strBuild.Append(ir + "\t");
                foreach (Document Doc2 in Doc.Words.Keys)
                {
                    if (sim = true)
                    {
                        rmatrix[ir, jr] = Doc.UnSimilitudeValueToDocumentUsingCos(Doc1, Doc2);
                    }
                    else
                    {
                        rmatrix[ir, jr] = Doc.UnSimilitudeValueToDocumentUsingGeneralizedJaccardCoefficient(Doc1, Doc2);
                    }
                    strBuild.Append(rmatrix[ir, jr].ToString() + "\t");
                    jr += 1;
                }
                strBuild.Append("\n");
                ir += 1;
                jr = 0;
            }

            //调整不相关性矩阵rmatrix
            double[] tempMins = new double[n];
            double[] tempMaxs = new double[n];
            for (int i = 0; i < n; i++)
            {
                double tempMin = 1;
                double tempMax = 0;
                for (int j = 0; j < n; j++)
                {
                    if (tempMin > rmatrix[i, j])
                    {
                        tempMin = rmatrix[i, j];
                    }
                    if (tempMax < rmatrix[i, j])
                    {
                        tempMax = rmatrix[i, j];
                    }
                    tempMins[i] = tempMin;
                    tempMaxs[i] = tempMax;
                }
            }
            //数据标准化
            for (int i = 0; i < n; i++)
            {
                for (int j = 0; j < n; j++)
                {
                    rmatrix[i, j] = (rmatrix[i, j] - tempMins[i]) / (tempMaxs[i] - tempMins[i]);
                }
            }

            //随机产生c个中心
            Random ra = new Random();
            strBuild.Append("随机产生" + c + "个中心:\n");
            cmatrix[0, 0] = ra.Next(n - 1);
            strBuild.Append(cmatrix[0, 0].ToString() + "\t");
            for (int i = 1; i < c; i++)
            {
                int temp = ra.Next(n);
                bool dif = true;
                while (dif)
                {
                    for (int j = 0; j < i; j++)
                    {
                        if (temp == cmatrix[0, j])
                        {
                            temp = ra.Next(n - 1);
                            dif = true;
                        }
                        else
                        {
                            cmatrix[0, i] = temp;
                            dif = false;
                        }
                    }
                }
                strBuild.Append(cmatrix[0, i].ToString() + "\t");
            }
            strBuild.Append("\n");

            //递归求解
            bool isEqual = true;
            int times = 1;
            strBuild.Append("求解过程:\n");
            while (times <= s && isEqual)
            {
                //求隶属度矩阵u
                double num0 = (m - 1);
                num0 = 2 / num0;
                double num1 = 0;
                double num2 = 0;
                double sum = 0;

                strBuild.Append("第" + times + "次运行结果:\n");
                strBuild.Append("隶属度矩阵umatrix:\n");
                for (int i = 0; i < c; i++)
                {
                    strBuild.Append(i + "\t");
                    double temp = 0;
                    for (int j = 0; j < n; j++)
                    {
                        sum = 0;
                        for (int k = 0; k < c; k++)
                        {
                            num1 = rmatrix[cmatrix[times - 1, i], j];
                            num2 = rmatrix[cmatrix[times - 1, k], j];
                            if (num2 != 0)
                            {
                                temp = Math.Pow(num1 / num2, num0);
                                sum += 1 / temp;
                            }
                        }
                        umatrix[i, j] = 1 / sum;
                        strBuild.Append(umatrix[i, j].ToString() + "\t");
                    }
                    strBuild.Append("\n");
                }

                //求新的中心矩阵c
                int q = 0;
                strBuild.Append("新的中心矩阵rmatrix:\n");
                for (int i = 0; i < c; i++)
                {
                    double tmin = 0x5f5e100;
                    for (int k = 0; k < n; k++)
                    {
                        double temp = 0;
                        double tt = 0;
                        for (int j = 0; j < n; j++)
                        {
                            tt = Math.Pow(umatrix[i, j], m);
                            temp += tt * rmatrix[k, j];
                        }
                        if (tmin > temp)
                        {
                            tmin = temp;
                            q = k;
                        }
                    }
                    bool dif = true;
                    while (dif && i > 0)
                    {
                        for (int e = 0; e < i; e++)
                        {
                            if (q == cmatrix[times, e])
                            {
                                q = (q + 1) % n;
                                dif = true;
                            }
                            else
                            {
                                cmatrix[times, i] = q;
                                dif = false;
                            }
                        }
                    }
                    strBuild.Append(cmatrix[times, i].ToString() + "\t");
                }
                strBuild.Append("\n");
                times++;
                strBuild.Append("\n");

                //如果中心矩阵不再变化,退出循环
                for (int i = 0; i < c; i++)
                {
                    if (cmatrix[times - 1, i] != cmatrix[times - 2, i])
                    {
                        isEqual = true;
                    }
                    else
                    {
                        isEqual = false;
                    }
                }
                if (isEqual == false)
                {
                    strBuild.Append("中心矩阵不再变化,退出循环!聚类结果为:\n");
                }
            }

            //判断隶属类
            int[] list = new int[c];
            if (isEqual == true)
            {
                strBuild.Append("循环次数达到" + s + "次,聚类结果为:\n");
            }
            int cmax = 0;
            for (int j = 0; j < n; j++)
            {
                double temp = 0;
                double tmax = 0;
                for (int i = 0; i < c; i++)
                {
                    temp = umatrix[i, j];
                    if (tmax < temp)
                    {
                        tmax = temp;
                        cmax = i;
                    }
                }
                fcmatrix[cmax, list[cmax]] = j;
                list[cmax]++;
            }
            for (int i = 1; i <= c; i++)
            {
                strBuild.Append("第" + i + "类:\t");
                for (int j = 0; j < list[cmax]; j++)
                {
                    strBuild.Append("(" + fcmatrix[i - 1, j] + "," + umatrix[i - 1, fcmatrix[i - 1, j]] + ")" + "\t");
                }
                strBuild.Append("\n");
            }
            strBuild.Append("结束!");
            return (strBuild.ToString());
        }
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -