⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 proto.cpp

📁 信息增益C++程序
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// PROTO.cpp: implementation of the PROTO class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <string.h>
#include <conio.h>
#include <time.h>
#include "dt.h"
#include "PROTO.h"
#include <ole2.h>
#include <comutil.h>
#include <comdef.h>
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
CProgressCtrl*progress;
CProgressCtrl*progress2;
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

PROTO::PROTO()
{

}

PROTO::~PROTO()
{

}
MATRIX*PROTO::build_matrix (UINT width, UINT height)
{
   MATRIX *_matrix;
    UINT i;

    _matrix = (MATRIX*) malloc (sizeof (MATRIX));
    if (!_matrix)
       AfxMessageBox("asdf"); //err_exit (__FILE__, __LINE__);

    _matrix->width  = width;
    _matrix->height = height;

    _matrix->data = (REAL**) malloc (height * sizeof (REAL*));
    if (_matrix->data == NULL)
        AfxMessageBox("asdf");//err_exit(__FILE__, __LINE__);

    for (i=0; i<height; i++)
    {
        _matrix->data[i] = (REAL*) malloc (width * sizeof(REAL));
        if (_matrix->data[i] == NULL)
            AfxMessageBox("asdf");//err_exit(__FILE__, __LINE__);
    }
    return _matrix;
}
void PROTO::err_exit (CHAR* file, UINT line)
{
    printf("\n Fatal error in file %s, line %u", file, line);
    exit(0);
}

void PROTO::file_size (CHAR *file_name, UINT *width, UINT *height)
/*
* Given the name of a file of numeric data, this routine counts
* the numbers of rows and columns. It's assumed that the number
* of entries is the same in each row, and an error is flagged if this
* is not the case.
*
*/
{
    FILE *f;
    UINT buf_size = 0xFFFF, _width = 0;
    CHAR *buffer, *ptr;

    *width = *height = 0;

    buffer = (CHAR*) malloc (buf_size * sizeof (CHAR));
    if (buffer == NULL)
        err_exit (__FILE__, __LINE__);

    /* Open price file - abort if filename invalid */
    f = fopen(file_name, "r");
    if (f == NULL)
    {
        printf("\n File not found : %s\n", file_name);
        err_exit (__FILE__, __LINE__);
    }

    /* Get number of entries in first row */
    if (fgets(buffer, buf_size, f) != NULL)
    {
        ++*height;
        ptr = strtok (buffer, "	,");
        while (ptr != NULL)
        {
            ++*width;
            ptr = strtok (NULL, "	,");
        }
    }

    /* Count numbers of subsequent rows */
    while (!feof(f))
    {
        if (fgets(buffer, buf_size, f) != NULL)
        {
            if (strlen(buffer) > strlen("\n"))  /* if line is more than a NL char */
            {
                ++*height;
                _width = 0;
                ptr = strtok (buffer, " ,");
                while (ptr != NULL)
                {
                    ++_width;
                    ptr = strtok (NULL, " ,");
                }

                if (*width != _width)
                {
					AfxMessageBox("*width != _width");
                   // printf("\n Number of entries in file %s did not agree", file_name);
                   // err_exit (__FILE__, __LINE__);
                }
            }
        }
    }
    free (buffer);
}

/*-------------------------------------------------------------------*/

NODE* PROTO::ID3 ( MATRIX *matrix, NODE* parent, UINT target, UINT state)

/* Routine to build a decision tree, based on Quinlan's ID3 algorithm. */
{
    NEGENTROPY negentropy_struct;
	///////////////////////////////
	
	/////////////////////////////
    NODE *node;
    UINT n_vars = matrix->width, n_samples = matrix->height, i, j, split;
    REAL **data = matrix->data;
    REAL best_threshold, min_negentropy, _negentropy;
    Mysort*sort= (Mysort*) malloc (n_vars * sizeof(Mysort));
	REAL test;
    /* Allocate memory for this node */
    node = (NODE*) malloc (sizeof(NODE));
    if (!node)
        err_exit (__FILE__, __LINE__);

    /* Set up links in decision tree */
    node->parent = parent;  /* Set address of parent node */

    if (parent != NULL) /* parent to child; not relevant for root node */
    {
        /* Pass address of this node to the parent node */
        if (state == ON)
            parent->on = node;
        else
            if (state == OFF)
                parent->off = node;
    }

    /*
     * Select attribute with lowest negentropy for splitting. Scan through
     * ALL attributes (except the target) and ALL data samples. This is
     * pretty inefficient for data sets with repeated values, but will do
     * for illustrative purposes
     */

    min_negentropy = 1.0;
	progress2->SetRange(0,n_vars*n_samples);

    for (i=0; i<n_vars; i++)//属性号
    {
		min_negentropy=10;

        for (j=0; j<n_samples; j++) //样本遍历
        {

           
             if (i != target) 
			 {
				 progress2->StepIt();

                /* Set trial values for this node... */
                node->idx = i;//基因号码
                node->threshold = data[i][j];//设置本身

                /* ...and calculate the negentropy of this partition */
                negentropy_struct = negentropy (data, n_samples, node, target);//计算信息商
                _negentropy = negentropy_struct.ne;//信息熵

                /* If this negentropy is lower than any other, retain the
                       index and threshold for future use */
                if (_negentropy <min_negentropy) //取信息最小负熵
                {
                    min_negentropy = _negentropy;
					if(i==3)
						i=3;
					
					sort[i].genedata=min_negentropy;
					sort[i].index=i;
                    split = i;
                    best_threshold = data[i][j];
                }

           

        } 
		}
		sort[i].index=i;
		sort[i].genedata=min_negentropy;
		

    } /*for (i=0; i<n_vars; i++)*/
     
    bubble(sort,n_vars);
    node->idx = split;//基因号
    node->threshold = best_threshold;//基因表达域值

   

 

    return node;
}

NEGENTROPY PROTO::negentropy ( REAL **data,
    UINT   n_samples,
    NODE   *local,
    UINT   target)
{
   
    NEGENTROPY ret_val;
    NODE *_node, *_parent;
    UINT on_ctr, off_ctr, p1, p2, i, _match;
    REAL p_on, p_off, negentropy_on, negentropy_off;

    on_ctr = off_ctr = p1 = p2 = 0;

    for (i=0; i<n_samples; i++) 
    {

        _match = 1;
        _node = local;
        while (_node->parent != NULL) 
        { 
			
            _parent = _node->parent;
            if (_node == _parent->on) 
            { 
				/* if parent node is ON */
                if (data[_parent->idx][i] < _parent->threshold)//不表达
                    _match = 0;
            }
            else
                if (_node == _parent->off) 
                { 
					/* if parent node is OFF */
                    if (data[_parent->idx][i] >= _parent->threshold)//表示表达
                        _match = 0;
                }
            _node = _parent;
        }

        if (_match) 
        {
            if (data[local->idx][i] >= local->threshold) 
            {
                on_ctr++;
                if (data[target][i] >= 0.5)
                    p1++;
            }
            else 
            {
                off_ctr++;
                if (data[target][i] >= 0.5)
                    p2++;
            }
        }
    } 

    if (on_ctr > 0)
    {
        p_on  = (REAL) p1 / (REAL) on_ctr;
        p_off = 1 - p_on;
        negentropy_on = -entropy (p_on) - entropy (p_off);
    }
    else
        negentropy_on = 0.0;

    /* 2: Entropy of subtable with activation OFF */

    if (off_ctr > 0)
    {
        p_on  = (REAL) p2 / (REAL) off_ctr;
        p_off = 1 - p_on;
        negentropy_off = -entropy (p_on) - entropy (p_off);
    }
    else
        negentropy_off = 0.0;

    ret_val.ne = (negentropy_on * on_ctr + negentropy_off * off_ctr);
    ret_val.ne /= (on_ctr + off_ctr);

    /*
     * If all values examined were the same, set 'ret_val.status' to
     * the target value since this will be an end-of-branch node
     */

    if ((p1 == on_ctr) && (p2 == off_ctr))
        ret_val.status = ON;
    else if ((p1 == 0) && (p2 == 0))
        ret_val.status = OFF;
    else
        ret_val.status = INACTIVE;

    return ret_val;

}

void PROTO::print_tree (NODE *node, CHAR** tag_names)
{
    /*
     * Displays algebraic equivalent of the decision tree
     */
	CString tree;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -