📄 program.cs
字号:
using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.Text.RegularExpressions;
namespace StandardApriori
{
class Program
{
static void Main(string[] args)
{
//设置系统启动的当前时间
//Console.WriteLine(DateTime.Now);
//初始化事务数据集
ArrayList D = InitTranSet();
//初始化候选项目集合
ArrayList C = InitItemSet();
//初始化支持度
double s = 0.2;
double c = 0.3;
//所有频繁项集
List<ItemSet> L = new List<ItemSet>();
//调用迭代算法求频繁项集
L = Apriori(D, C, s);
//输出频繁集中的项目和支持数
Console.WriteLine("关联规则的频繁项集为");
for (int i = 0; i < L.Count; i++)
{
Console.WriteLine(L[i].Items + "-" + L[i].Sup);
}
List<AssoRule> R = new List<AssoRule>();
R = AssociationRules(L, c);
//输出规则中的项目和可信度
Console.WriteLine("关联规则的可信规则为");
for (int i = 0; i < R.Count; i++)
{
Console.WriteLine(R[i].Items + "-" + (R[i].Conf*100).ToString("#0")+"%");
}
//设置挖掘完成的时间
//Console.WriteLine(DateTime.Now);
//等待用户输入
Console.Read();
}
#region Apriori方法调用
static List<ItemSet> Apriori(ArrayList D, ArrayList C, double sup)
{
List<ItemSet> L = new List<ItemSet>();
//L是一个泛化集,期中包括了项集的元素和支持数
L = InitLK(D, C, sup);
ArrayList Lk = new ArrayList();
//将L中的项集提取出来之后生成频繁集
C.Clear();
for(int i=0;i<L.Count;i++)
Lk.Add(L[i].Items.ToString());
//如果得到频繁集为空,则返回
if (Lk.Count == 0) return L;
//根据频繁集生成候选集
C = AprioriGen(Lk);
L.AddRange(Apriori(D, C, sup));
//在递归调用Apriori算法结束之后返回最终得到的频繁集
//期中包括了第1到k阶频繁集
return L;
}
#endregion
#region
static ArrayList AprioriGen(ArrayList L)
{
ArrayList C = new ArrayList();
Regex r = new Regex(",");
for (int i = 0; i < L.Count; i++)
{
string[] subL1 = r.Split(L[i].ToString());
for(int j=i+1;j<L.Count;j++)
{
string[] subL2 = r.Split(L[j].ToString());
//生成所有可能的两个结合的并集
string temp = L[j].ToString();
for (int m = 0; m < subL1.Length; m++)
{
bool subL1mInSubL2 = false;
for (int n = 0; n < subL2.Length; n++)
{
if (subL1[m] == subL2[n]) subL1mInSubL2 = true;
}
if (subL1mInSubL2 == false) temp = temp + "," + subL1[m];
}
//如果生成的Temp中包含的项的个数原来的+1项
string[] subTemp = r.Split(temp);
if (subTemp.Length == subL1.Length + 1)
{
//判断是否在C中已经存在
bool isExist = false;
for (int m = 0; m < C.Count; m++)
{
bool isContained = true;
for (int n = 0; n < subTemp.Length; n++)
{
if (!C[m].ToString().Contains(subTemp[n].ToString())) isContained = false;
}
if (isContained == true) isExist = true;
}
if (isExist == false) C.Add(temp);
}
//判断C中的每一个项集的子集是否在L中存在
//如果不存在,则从C中进行删除,否则给予保留
//还未能完成
}
}
return C;
}
#endregion
#region InitL1方法调用
static List<ItemSet> InitLK(ArrayList D, ArrayList I, double sup)
{
//1-阶频繁项目集
List<ItemSet> L1 = new List<ItemSet>();
if (I.Count == 0) return L1;
else
{
int[] ICount = new int[I.Count];
Regex r = new Regex(",");
for (int i = 0; i < D.Count; i++)
{
string[] subD = r.Split(D[i].ToString());
for (int j = 0; j < I.Count; j++)
{
string[] subI = r.Split(I[j].ToString());
bool subIInSubD = true;
//判断subI是否是subD的子集
subIInSubD = IsSubSet(subI, subD);
//如果项集是D中的一项,计数加1
if (subIInSubD == true) ICount[j]++;
}
}
for (int i = 0; i < ICount.Length; i++)
{
if (ICount[i] >= sup * D.Count)
{
ItemSet iSet = new ItemSet();
iSet.Items = I[i].ToString();
iSet.Sup = ICount[i];
L1.Add(iSet);
}
}
return L1;
}
}
#endregion
#region 根据频繁项目集得到规则
static List<AssoRule> AssociationRules(List<ItemSet> L,double c)
{
List<AssoRule> R = new List<AssoRule>();
Regex r = new Regex(",");
int Lk = r.Split(L[L.Count - 1].Items).Length;
for (int cnt = L.Count - 1; cnt >= 0; cnt--)
{
string[] LISet = r.Split(L[cnt].Items.ToString());
if (LISet.Length == Lk)
{
for (int i = 1; i < Lk; i++)
{
for (int j = 0; j < L.Count; j++)
{
string[] subL1 = r.Split(L[j].Items.ToString());
if (i == subL1.Length)
{
for (int k = 0; k < L.Count; k++)
{
string[] subL2 = r.Split(L[k].Items.ToString());
if (Lk - i == subL2.Length)
{
if (!IsSubSet(subL1, subL2) && !IsSubSet(subL2, subL1) && IsSubSet(subL1, LISet) && IsSubSet(subL2, LISet))
{
double conf = (double)L[L.Count - 1].Sup / L[j].Sup;
if (conf >= c)
{
AssoRule aRule = new AssoRule();
aRule.Items = L[j].Items + "-->" + L[k].Items;
aRule.Conf = conf;
R.Add(aRule);
}
}
}
}
}
}
}
}
}
return R;
}
#endregion
#region 判断一个项集是否是另外一个的子集
static bool IsSubSet(string[] sub1,string[] sub2)
{
bool sub1InSub2 = true;
for (int m = 0; m < sub1.Length; m++)
{
bool sub1mInSub2 = false;
for (int n = 0; n < sub2.Length; n++)
{
if (sub1[m] == sub2[n])
{
sub1mInSub2 = true;
continue;
}
}
if (sub1mInSub2 == false) sub1InSub2 = false;
}
return sub1InSub2;
}
#endregion
#region 初始化事务集和项目集
static ArrayList InitItemSet()
{
ArrayList iSet = new ArrayList();
iSet.Add("I1");
iSet.Add("I2");
iSet.Add("I3");
iSet.Add("I4");
iSet.Add("I5");
//iSet.Add("S");
//iSet.Add("L");
return iSet;
}
static ArrayList InitTranSet()
{
ArrayList tSet = new ArrayList();
tSet.Add("I1,I2,I5");
tSet.Add("I2,I4");
tSet.Add("I2,I3");
tSet.Add("I1,I2,I4");
tSet.Add("I1,I3");
tSet.Add("I2,I3");
tSet.Add("I1,I3");
tSet.Add("I1,I2,I3,I5");
tSet.Add("I1,I2,I3");
return tSet;
}
#endregion
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -