test_main.cpp

来自「数据挖掘中的经典算法Apriori实现」· C++ 代码 · 共 170 行

CPP

170 行

//#include "list.h"
//#include "items.h"
#include <stdio.h>
#include <stdlib.h>
//#include <iostream>
#include <string.h>
//#include "itemSet.h"
//#include "itemSet.cpp"  //自动包含了
//#include "hashTree.h"
#include "apriori.h"
//从指定的文件中读入事务集，max_pageno是小项的最大个数
list *loadItemSets(char *datafile, int& max_pageno, bool keeporder)
{
    list *m_data;
    itemSet *pitemset;
    char line[20*4096];           //*********************************************
    int pageno, hostid;
    char *t, *s;
    double weight;
    FILE *fp;

    // try to open the data file
    if((fp = fopen(datafile, "rt")) == NULL)
    {
        printf("Cannot open the data file %s !\n", datafile);
		return (list *)NULL;
    }

    printf("\n\nLoading data from file %s  ...  ", datafile);
	fflush(stdout);

    // this is the data structure that stores the input data
    m_data = (list *)new list();
    max_pageno = -1;

    // Read in each line, extract all the items within this line
     while(fgets(line, 20*4096, fp) != NULL)   //each time read 20*4096chars into line
    {
        if (strchr(line, ',') != NULL)             //检索并返回字符,在字符串line中第一次出现的位置
        {
            s = (char *) new char[strlen(line) + 1];
            strcpy(s, line);
            if ((t = strtok(s, ",")) == NULL)
            {
                delete s;
                break;
            }

            pitemset = (itemSet *)new itemSet();
			pitemset->keeporder(keeporder);
            
            pageno = atoi(t);
            if(max_pageno < pageno)
                max_pageno = pageno;
            pitemset->add(pageno);
            
            t = strtok((char *)NULL, ",");

            for ( ; t != NULL; t = strtok((char *)NULL, ","))
            {
                pageno = atoi(t);
                if(max_pageno < pageno)
                    max_pageno = pageno;
				
                pitemset->add(pageno);
            }

            // if the length of the session is greater than 1, then insert it into the session list
            if(pitemset->size() > 1)
            {
                pitemset->support(1);
                m_data->add(pitemset);
            }
			else
	            delete pitemset;

            delete s;
        }
    }

    fclose(fp);

    printf("Finish Loading!\n\n");
	fflush(stdout);

    return(m_data);
}

void save(char *resultfile,list *ls)
{
	FILE *fp;
	itemSet *pitem;
	fp=fopen(resultfile,"w");
	fprintf(fp, "Large Item Sets ...\n\n\n");
	for(int i = 0; i < ls->size(); i++)
	{
		pitem = (itemSet *)ls->get(i);
		for(int j = 0; j < pitem->size(); j++)
			fprintf(fp, "%d ", pitem->get(j));
		fprintf(fp,"\t%ld %f",pitem->support(),pitem->weight());
		fprintf(fp, "\n");
	}
	fprintf(fp, "\n\n\n");


	fclose(fp);
}

void main()
{
	/*items *it1=new items();
	items *it2=new items();
	printf("%d\n",it1->compare(it2));
	list *lt=new list();
	listNode *ln=new listNode();
	lt->add(it1);
	lt->add(it2);
	printf("now list have %d items\n",lt->size());
	if(lt->get(2)!=(items *)NULL)
		lt->get(1)->dump();
	else 
		printf("it's null\n");

	list *lt2=lt->clone();
	lt2->get(1)->dump();
	//lt2->remove(2);
    printf("now list lt2 have %d items\n",lt2->size());
	printf("it2 is at the %d place of lt1\n",lt->indexOf(it1));  //会输出0，因为我们在items类中的compare中设置输出为0
	lt->dump();
	lt->add(lt2);
    printf("now list have %d items\n",lt->size());
	itemSet *is=new itemSet();
	is->add((Item)1);
	is->add(2);
	is->dump();
	itemSet *left=is->left(1);
	left->dump();
	CHashTree *ht=new CHashTree();
	printf("after hash we got %d\n",ht->hash(is,0));
	//ht->insert(ht->root,it1,1);*/
	//从文件中读入所有的事件组成的list，并利用FindLargeItemSets(list *instances)找到频繁项集，输出到输出文件中
	list *ls=new list();
	CApriori *m_apriori;
	int pagenum;
    //char *datafile = NULL;
	char *inputfile;
    double support = 0.1, confidence = 0.5;
	m_apriori = new CApriori();
    m_apriori->setsupport(support);
	inputfile = (char *)new char[ 6 + 3];
	//strcpy(rules, datafile);
     strcpy(inputfile, "data.txt");
	ls=loadItemSets(inputfile,pagenum,true);
	m_apriori->pagenum = pagenum + 1;
	m_apriori->FindLargeItemSets(ls);
	char *outputfile;
	outputfile=(char *)new char[11];
	strcpy(outputfile,"result.txt");
	int i=0;
	while(i<m_apriori->m_Ls->size())                //输出这里有问题
	{
		itemSet *is=new itemSet();
		is=(itemSet *)m_apriori->m_Ls->get(i);       //每次i增1实际上都是增加一个itemSet，因为这里的is是个指针
		is->dump();
		i++;
	};
	save(outputfile,m_apriori->m_Ls);
    

}

test_main.cpp - 源码说明

本页面展示了「数据挖掘中的经典算法Apriori实现」中的 test_main.cpp 源码文件，采用 C++ 编程语言编写，共 170 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与Apriori相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?