📄 proto.cpp
字号:
// PROTO.cpp: implementation of the PROTO class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <string.h>
#include <conio.h>
#include <time.h>
#include "dt.h"
#include "PROTO.h"
#include <ole2.h>
#include <comutil.h>
#include <comdef.h>
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
CProgressCtrl*progress;
CProgressCtrl*progress2;
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
PROTO::PROTO()
{
}
PROTO::~PROTO()
{
}
MATRIX*PROTO::build_matrix (UINT width, UINT height)
{
MATRIX *_matrix;
UINT i;
_matrix = (MATRIX*) malloc (sizeof (MATRIX));
if (!_matrix)
AfxMessageBox("asdf"); //err_exit (__FILE__, __LINE__);
_matrix->width = width;
_matrix->height = height;
_matrix->data = (REAL**) malloc (height * sizeof (REAL*));
if (_matrix->data == NULL)
AfxMessageBox("asdf");//err_exit(__FILE__, __LINE__);
for (i=0; i<height; i++)
{
_matrix->data[i] = (REAL*) malloc (width * sizeof(REAL));
if (_matrix->data[i] == NULL)
AfxMessageBox("asdf");//err_exit(__FILE__, __LINE__);
}
return _matrix;
}
void PROTO::err_exit (CHAR* file, UINT line)
{
printf("\n Fatal error in file %s, line %u", file, line);
exit(0);
}
void PROTO::file_size (CHAR *file_name, UINT *width, UINT *height)
/*
* Given the name of a file of numeric data, this routine counts
* the numbers of rows and columns. It's assumed that the number
* of entries is the same in each row, and an error is flagged if this
* is not the case.
*
*/
{
FILE *f;
UINT buf_size = 0xFFFF, _width = 0;
CHAR *buffer, *ptr;
*width = *height = 0;
buffer = (CHAR*) malloc (buf_size * sizeof (CHAR));
if (buffer == NULL)
err_exit (__FILE__, __LINE__);
/* Open price file - abort if filename invalid */
f = fopen(file_name, "r");
if (f == NULL)
{
printf("\n File not found : %s\n", file_name);
err_exit (__FILE__, __LINE__);
}
/* Get number of entries in first row */
if (fgets(buffer, buf_size, f) != NULL)
{
++*height;
ptr = strtok (buffer, " ,");
while (ptr != NULL)
{
++*width;
ptr = strtok (NULL, " ,");
}
}
/* Count numbers of subsequent rows */
while (!feof(f))
{
if (fgets(buffer, buf_size, f) != NULL)
{
if (strlen(buffer) > strlen("\n")) /* if line is more than a NL char */
{
++*height;
_width = 0;
ptr = strtok (buffer, " ,");
while (ptr != NULL)
{
++_width;
ptr = strtok (NULL, " ,");
}
if (*width != _width)
{
AfxMessageBox("*width != _width");
// printf("\n Number of entries in file %s did not agree", file_name);
// err_exit (__FILE__, __LINE__);
}
}
}
}
free (buffer);
}
/*-------------------------------------------------------------------*/
NODE* PROTO::ID3 ( MATRIX *matrix, NODE* parent, UINT target, UINT state)
/* Routine to build a decision tree, based on Quinlan's ID3 algorithm. */
{
NEGENTROPY negentropy_struct;
///////////////////////////////
/////////////////////////////
NODE *node;
UINT n_vars = matrix->width, n_samples = matrix->height, i, j, split;
REAL **data = matrix->data;
REAL best_threshold, min_negentropy, _negentropy;
Mysort*sort= (Mysort*) malloc (n_vars * sizeof(Mysort));
REAL test;
/* Allocate memory for this node */
node = (NODE*) malloc (sizeof(NODE));
if (!node)
err_exit (__FILE__, __LINE__);
/* Set up links in decision tree */
node->parent = parent; /* Set address of parent node */
if (parent != NULL) /* parent to child; not relevant for root node */
{
/* Pass address of this node to the parent node */
if (state == ON)
parent->on = node;
else
if (state == OFF)
parent->off = node;
}
/*
* Select attribute with lowest negentropy for splitting. Scan through
* ALL attributes (except the target) and ALL data samples. This is
* pretty inefficient for data sets with repeated values, but will do
* for illustrative purposes
*/
min_negentropy = 1.0;
progress2->SetRange(0,n_vars*n_samples);
for (i=0; i<n_vars; i++)//属性号
{
min_negentropy=10;
for (j=0; j<n_samples; j++) //样本遍历
{
if (i != target)
{
progress2->StepIt();
/* Set trial values for this node... */
node->idx = i;//基因号码
node->threshold = data[i][j];//设置本身
/* ...and calculate the negentropy of this partition */
negentropy_struct = negentropy (data, n_samples, node, target);//计算信息商
_negentropy = negentropy_struct.ne;//信息熵
/* If this negentropy is lower than any other, retain the
index and threshold for future use */
if (_negentropy <min_negentropy) //取信息最小负熵
{
min_negentropy = _negentropy;
if(i==3)
i=3;
sort[i].genedata=min_negentropy;
sort[i].index=i;
split = i;
best_threshold = data[i][j];
}
}
}
sort[i].index=i;
sort[i].genedata=min_negentropy;
} /*for (i=0; i<n_vars; i++)*/
bubble(sort,n_vars);
node->idx = split;//基因号
node->threshold = best_threshold;//基因表达域值
return node;
}
NEGENTROPY PROTO::negentropy ( REAL **data,
UINT n_samples,
NODE *local,
UINT target)
{
NEGENTROPY ret_val;
NODE *_node, *_parent;
UINT on_ctr, off_ctr, p1, p2, i, _match;
REAL p_on, p_off, negentropy_on, negentropy_off;
on_ctr = off_ctr = p1 = p2 = 0;
for (i=0; i<n_samples; i++)
{
_match = 1;
_node = local;
while (_node->parent != NULL)
{
_parent = _node->parent;
if (_node == _parent->on)
{
/* if parent node is ON */
if (data[_parent->idx][i] < _parent->threshold)//不表达
_match = 0;
}
else
if (_node == _parent->off)
{
/* if parent node is OFF */
if (data[_parent->idx][i] >= _parent->threshold)//表示表达
_match = 0;
}
_node = _parent;
}
if (_match)
{
if (data[local->idx][i] >= local->threshold)
{
on_ctr++;
if (data[target][i] >= 0.5)
p1++;
}
else
{
off_ctr++;
if (data[target][i] >= 0.5)
p2++;
}
}
}
if (on_ctr > 0)
{
p_on = (REAL) p1 / (REAL) on_ctr;
p_off = 1 - p_on;
negentropy_on = -entropy (p_on) - entropy (p_off);
}
else
negentropy_on = 0.0;
/* 2: Entropy of subtable with activation OFF */
if (off_ctr > 0)
{
p_on = (REAL) p2 / (REAL) off_ctr;
p_off = 1 - p_on;
negentropy_off = -entropy (p_on) - entropy (p_off);
}
else
negentropy_off = 0.0;
ret_val.ne = (negentropy_on * on_ctr + negentropy_off * off_ctr);
ret_val.ne /= (on_ctr + off_ctr);
/*
* If all values examined were the same, set 'ret_val.status' to
* the target value since this will be an end-of-branch node
*/
if ((p1 == on_ctr) && (p2 == off_ctr))
ret_val.status = ON;
else if ((p1 == 0) && (p2 == 0))
ret_val.status = OFF;
else
ret_val.status = INACTIVE;
return ret_val;
}
void PROTO::print_tree (NODE *node, CHAR** tag_names)
{
/*
* Displays algebraic equivalent of the decision tree
*/
CString tree;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -