⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 id3algorithm(c).txt

📁 [综合]ID3算法源程序(C语言) 网上资源, 软件技术, 电脑与网络, 科学研究
💻 TXT
📖 第 1 页 / 共 2 页
字号:
     * found.
     */

    if  (negentropy_struct.status != INACTIVE) 
    {
        node->on = node->off = NULL;
        node->idx = negentropy_struct.status;
    }
    else
    {
        node->on  = ID3 (matrix, node, target, ON);
        node->off = ID3 (matrix, node, target, OFF);
    }

    return node;
}

/*-------------------------------------------------------------------*/

void main (int argv, char *argc[])
{

    MATRIX *matrix;
    NODE *node;
    UINT target, n_vars, n_samples;
    CHAR data_file[13], tag_file[13];  /* Longest file name in DOS */
    CHAR **tag_names;

    /* Set up file names */
    if (argv != 2)
    {
        printf("\nUsage: id3 [datafile]");
        exit(0);
    }
    else
    {
        printf("\nWelcome to ID3");
        printf("\nLast compiled on %s, %s", __DATE__, __TIME__);
        printf("\n");
        strcpy(data_file, argc[1]);
        strcpy(tag_file,  argc[1]);
        strcat(data_file, ".dat");
        strcat(tag_file,  ".tag");
    }

    /* Read dimensions of data file */
    file_size (data_file, &n_vars, &n_samples);

    /* Read labels for columns of data */
    tag_names = read_tags (tag_file, n_vars);

    /* Allocate storage for data... */
    matrix = build_matrix (n_vars, n_samples);

    /* ...and read it from disk */
    read_matrix (data_file, matrix);

    /* Classification target is last column */
    target = n_vars - 1;

    /* Return root of decision tree - ID3 continues to call itself
       recursively */

    node = ID3 ( matrix, NULL, target, 0 );

    print_tree(node, tag_names);

    printf("\n");

    free_tags (tag_names, n_vars);
    free_matrix(matrix);
    free_tree (node);
}

/*-------------------------------------------------------------------*/

NEGENTROPY negentropy ( REAL **data,
    UINT   n_samples,
    NODE   *local,
    UINT   target)
{
    /*
     * Calculates the entropy of classification of an attribute, given
     * a table of attributes already used, the attribute on which splitting
     * is to be taken, and the target attribute. Entropy is calculated in
     * bits, so logs are taken to base 2 by dividing by LN_2.
     *
     * The returned value always lies in the (closed) range [0, 1].
     */

    NEGENTROPY ret_val;
    NODE *_node, *_parent;
    UINT on_ctr, off_ctr, p1, p2, i, _match;
    REAL p_on, p_off, negentropy_on, negentropy_off;

    on_ctr = off_ctr = p1 = p2 = 0;

    /* Scan through all supplied data samples */
    for (i=0; i<n_samples; i++) 
    {

        /*
         * If pattern matches the current position in the decision tree,
         * then use this vector. The match is determined by passing up
         * the decision tree and checking whether 'data[idx] >= threshold'
         * matches at each step, where idx and threshold are taken from
         * each node in turn.
         */

        _match = 1;
        _node = local;
        while (_node->parent != NULL) 
        { /* If at the root node, all entries match*/
            _parent = _node->parent;
            if (_node == _parent->on) 
            { /* if parent node is ON */
                if (data[i][_parent->idx] < _parent->threshold)
                    _match = 0;
            }
            else
                if (_node == _parent->off) 
                { /* if parent node is OFF */
                    if (data[i][_parent->idx] >= _parent->threshold)
                        _match = 0;
                }
            _node = _parent;
        }

        if (_match) 
        {
            if (data[i][local->idx] >= local->threshold) 
            {
                on_ctr++;
                if (data[i][target] >= 0.5)
                    p1++;
            }
            else 
            {
                off_ctr++;
                if (data[i][target] >= 0.5)
                    p2++;
            }
        }
    }   /* for (i=0; i<n_samples; i++) */

    /* 1: Entropy of subtable with activation ON */

    /*
     * We now have the numbers of samples that match this part of the
     * decision tree, and the number of samples for which the supplied
     * condition are true. From these quantities we can find the negentropy of
     * this partition.
     */

    if (on_ctr > 0)
    {
        p_on  = (REAL) p1 / (REAL) on_ctr;
        p_off = 1 - p_on;
        negentropy_on = -entropy (p_on) - entropy (p_off);
    }
    else
        negentropy_on = 0.0;

    /* 2: Entropy of subtable with activation OFF */

    if (off_ctr > 0)
    {
        p_on  = (REAL) p2 / (REAL) off_ctr;
        p_off = 1 - p_on;
        negentropy_off = -entropy (p_on) - entropy (p_off);
    }
    else
        negentropy_off = 0.0;

    ret_val.ne = (negentropy_on * on_ctr + negentropy_off * off_ctr);
    ret_val.ne /= (on_ctr + off_ctr);

    /*
     * If all values examined were the same, set 'ret_val.status' to
     * the target value since this will be an end-of-branch node
     */

    if ((p1 == on_ctr) && (p2 == off_ctr))
        ret_val.status = ON;
    else if ((p1 == 0) && (p2 == 0))
        ret_val.status = OFF;
    else
        ret_val.status = INACTIVE;

    return ret_val;

}

/*-------------------------------------------------------------------*/

void print_tree (NODE *node, CHAR** tag_names)
{
    /*
     * Displays algebraic equivalent of the decision tree
     */

    if (node->on == NULL) 
    {
        if (node->idx == ON)
            printf("ON");
        else if (node->idx == OFF)
            printf("OFF");
        return;
    }

    else
        printf("if { %s >= %1.2f then ", tag_names[node->idx], node->threshold);

    print_tree ( node->on, tag_names );
    printf(" else ");
    print_tree ( node->off, tag_names );
    printf( " } " );
}

/*-------------------------------------------------------------------*/

void read_matrix (CHAR filename[], MATRIX *matrix)

{

    UINT i, j;
    UINT height = matrix->height;
    UINT width  = matrix->width;
    REAL **data = matrix->data;
    FILE *f;

    /* Open price file */
    if ((f = fopen(filename, "r")) == NULL) 
    {
        printf("\n File not found : %s\n", filename);
        err_exit (__FILE__, __LINE__);
    }

    for (i=0; i<height; i++)
        for (j=0; j<width; j++)
        {
            fscanf(f, "%lf\n", &data[i][j] );
        }

    fclose(f);

}

/*-------------------------------------------------------------------*/

CHAR **read_tags (CHAR *tag_file, UINT width)
{
    FILE *f;
    CHAR **_varname;
    UINT i;
    CHAR buffer[0xFF];

    f = fopen(tag_file, "r");
    if (f == NULL)
    {
        printf("\n File not found : %s\n", tag_file);
        err_exit (__FILE__, __LINE__);
    }

    _varname = (CHAR**) malloc (width * sizeof (CHAR*));
    for (i=0; i<width; i++)
        _varname[i] = (CHAR*) malloc (0xFF * sizeof (CHAR));

    i = 0;
    while (!feof(f))
    {
        if (fgets(buffer, 0xFF, f) != NULL)
        {
            if (strlen(buffer) > strlen("\n"))
            {
                if (i>width-1)
                {
                    printf("\nMore variable names were detected than data items.");
                    printf("\nPlease correct this problem before proceeding");
                    exit(0);
                }
                sscanf (buffer, "%[a-zA-Z0-9-_;:!@#$%^&*(){}[]]", _varname[i]);
                i++;
            }
        }
    }

    if (i<width) 
    {
        printf("\nFewer variable names than data items were detected.");
        printf("\nPlease correct this problem before proceeding");
        exit(0);
    }

    fclose (f);

    return _varname;

}
 
 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -