📄 naivedis.cpp
字号:
// CNaivedisDoc.cpp: implementation of the CNaivedisDoc class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "../RSet.h"
#include "fstream.h"
#include "Naivedis.h"
#include "math.h"
#include "stdlib.h"
#include "string.h"
#include "stdio.h"
//#define MAX 30
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CNaivedis::CNaivedis()//构造函数对变量初始化
{
Cut=NULL;
NewTable=NULL;
pAttName=NULL;
pDataType=NULL;
pStringTable=NULL;
pStrResult=NULL;
pNonStringTable=NULL;
pStringTableResult=NULL;
iAttNum=0;
iStrAttNum=NULL;
iNonStrAttNum=NULL;
strCuts=NULL;
}
CNaivedis::~CNaivedis()//析构函数释放变量空间
{
int i;
if(Cut!=NULL)
{
for(i=0;i<iNonStrAttNum;i++)
delete []Cut[i];
delete []Cut;
}
if(NewTable!=NULL)
{
for(i=0;i<iRecordNum;i++)
delete []NewTable[i];
delete []NewTable;
}
if(pAttName!=NULL)
{
for(i=0;i<iNonStrAttNum+1;i++)
delete []pAttName[i];
delete[] pAttName;
}
if(pDataType!=NULL)
{
for(i=0;i<iNonStrAttNum+1;i++)
delete []pDataType[i];
delete[] pDataType;
}
if(pStringTable!=NULL)
{
for(i=0;i<iRecordNum;i++)
{
for(int j=0;j<iStrAttNum;j++)
delete[] pStringTable[i][j];
delete[] pStringTable[i];
}
delete[] pStringTable;
}
if(pStringTableResult!=NULL){
for(i=0;i<iRecordNum;i++)
delete[] pStringTableResult[i];
delete[] pStringTableResult;
}
if(pStrResult!=NULL){
for(i=0;i<iRecordNum;i++){
for(int j=0;j<iStrAttNum;j++)
if(pStrResult[i][j]!=NULL)
delete[] pStrResult[i][j];
delete[] pStrResult[i];
}
delete[] pStrResult;
}
if(pNonStringTable){
for(i=0;i<iRecordNum;i++)
delete[] pNonStringTable[i];
delete[] pNonStringTable;
}
if(strCuts)
delete[] strCuts;
}
void CNaivedis::GetNewTable()////根据所产生的断点集得到离散化后的决策表并存入NewTable中
{
int i,j,k;
int m;
for(i=0;i<iNonStrAttNum;i++)
GetCut(i);
NewTable=new int *[iRecordNum];
for(i=0;i<iRecordNum;i++)
{
try
{
NewTable[i]=new int[iNonStrAttNum+1];
}
catch(CMemoryException * e)
{
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Out of the memory!",MB_OK|MB_ICONSTOP);
e->Delete();
}
}
for(i=0;i<iRecordNum;i++)
{
for(j=0;j<iNonStrAttNum;j++)
{
m=0;
for(k=0;k<(int)Cut[j][0];k++)
{
if(pNonStringTable[i][j]<Cut[j][k+1])
{
NewTable[i][j]=m;
break;
}
m++;
}
if(pNonStringTable[i][j]>=Cut[j][(int)Cut[j][0]])
NewTable[i][j]=(int)Cut[j][0];
}
NewTable[i][iNonStrAttNum]=(int)pNonStringTable[i][iNonStrAttNum];
}
}
//具体实现离散化的地方
BOOL CNaivedis::OnNaivedis(){
int i;
i=InitTable();
if(i==1)
GetNewTable();
if(iStrAttNum!=0)
doString();
return TRUE;
}
//保存离散化后的结果,包括离散化后的决策表和所选取的断点集
bool CNaivedis::OnNaivedisSave(LPCTSTR lpszPathName)
{
int i,j;
fstream fpw;
fpw.open(lpszPathName,ios::out);
if(!fpw)
{
::MessageBeep(MB_ICONHAND);
AfxMessageBox("some error happen, file can't be opend!",
MB_OK|MB_ICONSTOP);
exit(0);
}
fpw<<"Style:"<<"train"<<endl;
fpw<<"Stage:2"<<endl;
fpw<<"Condition attributes number:"<<iAttNum<<endl;
fpw<<"Records number:"<<iRecordNum<<endl;
for(i = 0;i < iAttNum+1;i++)
fpw<<pAttName[i]<<" ";
fpw<<endl;
for(i = 0;i < iAttNum+1;i++)
fpw<<pDataType[i]<<" ";
fpw<<endl;
int strIndex=0,nonStrIndex=0;
for(i=0;i<iRecordNum;i++){
for(j=0;j< iAttNum;j++){
if(!strcmp(pDataType[j],"String")){
fpw<<pStringTableResult[i][strIndex++]<<" ";
}
else{
fpw<<NewTable[i][nonStrIndex++]<<" ";
}
}
fpw<<NewTable[i][nonStrIndex];//决策属性
strIndex=0,nonStrIndex=0;
fpw<<endl;
}
fpw<<"[Cuts]"<<endl;
strIndex=0,nonStrIndex=0;
for(i=0;i<iAttNum;i++){
fpw<<i<<endl;
if(!strcmp(pDataType[i],"String")){
fpw<<strCuts[strIndex]<<endl; //断点个数
for(j=0;j<strCuts[strIndex];j++){
fpw<<pStrResult[j][strIndex]<<" "<<j<<endl; //断点与离散值对应关系
}
strIndex++;
}
else{
fpw<<Cut[nonStrIndex][0]+1<<endl; //断点个数
fpw<<"["<<"*"<<",";
for(j=0;j<(int)Cut[nonStrIndex][0];j++){
fpw<<Cut[nonStrIndex][j+1]<<")"<<" "<<j<<endl;
fpw<<"["<<Cut[nonStrIndex][j+1]<<",";
}
fpw<<"*"<<"]"<<" "<<j;
fpw<<endl;
nonStrIndex++;
}
}
fpw.close();
return true;
}
//给cut数组(保存断点的数组)分配内存
int CNaivedis::InitTable()
{
int i=0,j=0;
Cut=new float*[iAttNum];
for(i=0;i<iAttNum;i++)
{
try
{
Cut[i]=new float[iRecordNum];
}
catch(CMemoryException * e)
{
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Out of the memory!",MB_OK|MB_ICONSTOP);
e->Delete();
return -2;
}
}
return 1;
}
//先获得第i个属性的属性值,再对它进行排序,最后得到这个属性上的断点保存在数组cut[i][]中
void CNaivedis::GetCut(int i)
{
int j,k=1;
float **Mid;
Cut[i][0]=0;
Mid=new float*[iRecordNum];
for(j=0;j<iRecordNum;j++)
{
try
{
Mid[j]=new float[2];
}
catch(CMemoryException * e)
{
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Out of the memory!",MB_OK|MB_ICONSTOP);
e->Delete();
}
}
for(j=0;j<iRecordNum;j++)
{
Mid[j][0]=pNonStringTable[j][i];//属性值
Mid[j][1]=pNonStringTable[j][iNonStrAttNum];//决策值
}
SelectSort(Mid);
for(j=0;j<iRecordNum-1;j++)
if((Mid[j][0]!=Mid[j+1][0])&&(Mid[j][1]!=Mid[j+1][1]))
{
Cut[i][k++]=(Mid[j][0]+Mid[j+1][0])/2;
Cut[i][0]++;
}
for(j=0;j<iRecordNum;j++)
delete []Mid[j];
delete []Mid;
}
//对某个属性进行排序,输入没有排序的数组Mid,输出排序后数组Mid
void CNaivedis::SelectSort(float **Mid)
{
int i,j;
float Mid1;
float Mid2;
for(i=0;i<iRecordNum-1;i++)
for(j=i+1;j<iRecordNum;j++)
if(Mid[i][0]>Mid[j][0])
{
Mid1=Mid[i][0];
Mid2=Mid[i][1];
Mid[i][0]=Mid[j][0];
Mid[i][1]=Mid[j][1];
Mid[j][0]=Mid1;
Mid[j][1]=Mid2;
}
}
//从文件中读取决策表的所有信息
BOOL CNaivedis::ReadDataFromFile(char *filename){
FILE *fp;
if((fp = fopen(filename,"r")) == NULL){
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Couldn't open the file",MB_OK|MB_ICONSTOP);
return FALSE;
}//end if
fscanf(fp,"Style:%s\n",cStyle);
fscanf(fp,"Stage:%d\n",&iStage);
fscanf(fp,"Condition attributes number:%d\n",&iAttNum);
if(_stricmp(cStyle,"train") == 0 && iStage<=1){
fscanf(fp,"Records number:%d\n",&iRecordNum);
if(!readAttrInfo(fp))
return FALSE;
if(!readTable(fp))
return FALSE;
}
return TRUE;
}
//读取属性名到pAttName中,将数据类型读入pDataType中
BOOL CNaivedis::readAttrInfo(FILE* fp){
int i;
if(pAttName == NULL){
try{
pAttName = new char*[iAttNum+1];
pDataType= new char*[iAttNum+1];
}
catch(CMemoryException* e){
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Out of memory!",MB_OK|MB_ICONSTOP);
e->Delete();
return FALSE;
}
for(i=0;i <= iAttNum;i++){
try{
pAttName[i]=new char[MAX];
pDataType[i]=new char[MAX];
}
catch(CMemoryException* e){
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Out of memory!",MB_OK|MB_ICONSTOP);
e->Delete();
return FALSE;
}
}//end for
}//end if
for(i=0;i <= iAttNum;i++)
fscanf(fp,"%s",pAttName[i]);
fscanf(fp,"\n");
//读属性名
iStrAttNum = 0;
iNonStrAttNum = 0;
for(i = 0;i < iAttNum;i++){
fscanf(fp,"%s",pDataType[i]);
if(!strcmp(pDataType[i],"String"))
iStrAttNum++;//字符串属性的个数
else
iNonStrAttNum++; //非字符串属性个数
}//读属性值类型
fscanf(fp,"%s",pDataType[iAttNum]);
//决策属性类型
fscanf(fp,"\n");
return TRUE;
}
//读入属性值
BOOL CNaivedis::readTable(FILE* fp){
int i,j;
if((pStringTable == NULL)&&(pNonStringTable == NULL)){
try
{
if(iStrAttNum!=0)
pStringTable = new char**[iRecordNum];
pNonStringTable = new float*[iRecordNum];
//至少决策属性是保存在pNonStringTable中的,所以pNonStringTable一
//定不为空
}
catch(CMemoryException* e){
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Out of memory!",MB_OK|MB_ICONSTOP);
e->Delete();
return FALSE;
}
for(i = 0;i < iRecordNum;i++){
try{
if(iStrAttNum!=0)
pStringTable[i] = new char*[iStrAttNum];
pNonStringTable[i] = new float[iNonStrAttNum+1];//包括决策属性
}
catch(CMemoryException* e){
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Out of memory!",MB_OK|MB_ICONSTOP);
e->Delete();
return FALSE;
}
for(j = 0;j < iStrAttNum;j++){
try{
pStringTable[i][j] = new char[MAX];
}
catch(CMemoryException* e){
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Out of memory!",MB_OK|MB_ICONSTOP);
e->Delete();
return FALSE;
}
}//end for
for(j=0;j<=iNonStrAttNum;j++)
pNonStringTable[i][j]=0.0;
}//end for
}//end if
int iStrIndex,iNonStrIndex;
iStrIndex=0;
iNonStrIndex=0;
char* str;
str=new char[MAX];
for(i = 0;i < iRecordNum;i++){
for(j = 0;j < iAttNum;j++){
if(!strcmp(pDataType[j],"String"))
fscanf(fp,"%s",pStringTable[i][iStrIndex++]);
//字符串类型的属性值读入pStringTable
else{
fscanf(fp,"%s",str);
pNonStringTable[i][iNonStrIndex++]=(float)atof(str);
}//非字符串类型的属性值读入pNonStringTable
}
fscanf(fp,"%s",str);//读入决策属性的值
fscanf(fp,"\n");
pNonStringTable[i][iNonStrIndex]=(int)atof(str);
iStrIndex=0,iNonStrIndex=0;
}
delete[] str;
return TRUE;
}
void CNaivedis::doString(){
//先建立一个pStrResult,通过对pStringTable的一次扫描,把每一个属性的所有可能值
//找出来,建立字符串值与离散值的映射关系
//再把pStringTable中的数据与pStrResult中的数据进行比较,修改pStringTable中的
//值,以便打印结果
//strCuts[]存放断点个数
int i=0,j=0,k=0;
try{
pStrResult = new char**[iRecordNum];
pStringTableResult = new int*[iRecordNum];
//存放离散化以后的结果
strCuts = new int[iStrAttNum];
//存放字符串属性的断点个数
}
catch(CMemoryException* e){
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Out of memory!",MB_OK|MB_ICONSTOP);
e->Delete();
}
for (i=0;i<iRecordNum;i++){
try{
pStrResult[i]= new char*[iStrAttNum];
pStringTableResult[i]= new int[iStrAttNum];
}
catch(CMemoryException* e){
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Out of memory!",MB_OK|MB_ICONSTOP);
e->Delete();
}
try{
for(j=0;j<iStrAttNum;j++){
pStrResult[i][j] = new char[20];
// pStrResult[i][j] =NULL;//不用直接赋值方式,因为这样后面就不能拷贝再赋值
strcpy(pStrResult[i][j],"");//用拷贝方式给字符串赋值
}
}
catch(CMemoryException* e){
::MessageBeep(MB_ICONHAND);
AfxMessageBox("Out of memory!",MB_OK|MB_ICONSTOP);
e->Delete();
}
}
for(i=0;i<iStrAttNum;i++){
for(j=0;j<iRecordNum;j++)
pStringTableResult[j][i]=0;
strCuts[i]=0;
}
for(i=0;i<iStrAttNum;i++){ //i列
for(j=0;j<iRecordNum;j++){//对每个属性值
for(k=0;k<=j; k++){
if(!strcmp(pStringTable[j][i],pStrResult[k][i]))
break;
else if(!strcmp(pStrResult[k][i],"")){
//如果pStrResult[k][i]不等于pStringTable[j][i]而且pStrResult[k][i]为空
//则应该把pStringTable[j][i]赋值给pStrResult[k][i]
strcpy(pStrResult[k][i],pStringTable[j][i]); //j样例i属性
strCuts[i]++;//记录断点个数
break; //退出k循环
} //end else if
} //end for(k)
pStringTableResult[j][i]=k;
//把pStringTableResult[j][i]的值改为离散化以后的值
} //end for(j)
} //end for(i)
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -