⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 test_main.cpp

📁 数据挖掘中的经典算法Apriori实现
💻 CPP
字号:
//#include "list.h"
//#include "items.h"
#include <stdio.h>
#include <stdlib.h>
//#include <iostream>
#include <string.h>
//#include "itemSet.h"
//#include "itemSet.cpp"  //自动包含了
//#include "hashTree.h"
#include "apriori.h"
//从指定的文件中读入事务集,max_pageno是小项的最大个数
list *loadItemSets(char *datafile, int& max_pageno, bool keeporder)
{
    list *m_data;
    itemSet *pitemset;
    char line[20*4096];           //*********************************************
    int pageno, hostid;
    char *t, *s;
    double weight;
    FILE *fp;

    // try to open the data file
    if((fp = fopen(datafile, "rt")) == NULL)
    {
        printf("Cannot open the data file %s !\n", datafile);
		return (list *)NULL;
    }

    printf("\n\nLoading data from file %s  ...  ", datafile);
	fflush(stdout);

    // this is the data structure that stores the input data
    m_data = (list *)new list();
    max_pageno = -1;

    // Read in each line, extract all the items within this line
     while(fgets(line, 20*4096, fp) != NULL)   //each time read 20*4096chars into line
    {
        if (strchr(line, ',') != NULL)             //检索并返回字符,在字符串line中第一次出现的位置
        {
            s = (char *) new char[strlen(line) + 1];
            strcpy(s, line);
            if ((t = strtok(s, ",")) == NULL)
            {
                delete s;
                break;
            }

            pitemset = (itemSet *)new itemSet();
			pitemset->keeporder(keeporder);
            
            pageno = atoi(t);
            if(max_pageno < pageno)
                max_pageno = pageno;
            pitemset->add(pageno);
            
            t = strtok((char *)NULL, ",");

            for ( ; t != NULL; t = strtok((char *)NULL, ","))
            {
                pageno = atoi(t);
                if(max_pageno < pageno)
                    max_pageno = pageno;
				
                pitemset->add(pageno);
            }

            // if the length of the session is greater than 1, then insert it into the session list
            if(pitemset->size() > 1)
            {
                pitemset->support(1);
                m_data->add(pitemset);
            }
			else
	            delete pitemset;

            delete s;
        }
    }

    fclose(fp);

    printf("Finish Loading!\n\n");
	fflush(stdout);

    return(m_data);
}

void save(char *resultfile,list *ls)
{
	FILE *fp;
	itemSet *pitem;
	fp=fopen(resultfile,"w");
	fprintf(fp, "Large Item Sets ...\n\n\n");
	for(int i = 0; i < ls->size(); i++)
	{
		pitem = (itemSet *)ls->get(i);
		for(int j = 0; j < pitem->size(); j++)
			fprintf(fp, "%d ", pitem->get(j));
		fprintf(fp,"\t%ld %f",pitem->support(),pitem->weight());
		fprintf(fp, "\n");
	}
	fprintf(fp, "\n\n\n");


	fclose(fp);
}

void main()
{
	/*items *it1=new items();
	items *it2=new items();
	printf("%d\n",it1->compare(it2));
	list *lt=new list();
	listNode *ln=new listNode();
	lt->add(it1);
	lt->add(it2);
	printf("now list have %d items\n",lt->size());
	if(lt->get(2)!=(items *)NULL)
		lt->get(1)->dump();
	else 
		printf("it's null\n");

	list *lt2=lt->clone();
	lt2->get(1)->dump();
	//lt2->remove(2);
    printf("now list lt2 have %d items\n",lt2->size());
	printf("it2 is at the %d place of lt1\n",lt->indexOf(it1));  //会输出0,因为我们在items类中的compare中设置输出为0
	lt->dump();
	lt->add(lt2);
    printf("now list have %d items\n",lt->size());
	itemSet *is=new itemSet();
	is->add((Item)1);
	is->add(2);
	is->dump();
	itemSet *left=is->left(1);
	left->dump();
	CHashTree *ht=new CHashTree();
	printf("after hash we got %d\n",ht->hash(is,0));
	//ht->insert(ht->root,it1,1);*/
	//从文件中读入所有的事件组成的list,并利用FindLargeItemSets(list *instances)找到频繁项集,输出到输出文件中
	list *ls=new list();
	CApriori *m_apriori;
	int pagenum;
    //char *datafile = NULL;
	char *inputfile;
    double support = 0.1, confidence = 0.5;
	m_apriori = new CApriori();
    m_apriori->setsupport(support);
	inputfile = (char *)new char[ 6 + 3];
	//strcpy(rules, datafile);
     strcpy(inputfile, "data.txt");
	ls=loadItemSets(inputfile,pagenum,true);
	m_apriori->pagenum = pagenum + 1;
	m_apriori->FindLargeItemSets(ls);
	char *outputfile;
	outputfile=(char *)new char[11];
	strcpy(outputfile,"result.txt");
	int i=0;
	while(i<m_apriori->m_Ls->size())                //输出这里有问题
	{
		itemSet *is=new itemSet();
		is=(itemSet *)m_apriori->m_Ls->get(i);       //每次i增1实际上都是增加一个itemSet,因为这里的is是个指针
		is->dump();
		i++;
	};
	save(outputfile,m_apriori->m_Ls);
    

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -