⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 subset.c

📁 一个简洁好用的SVM代码
💻 C
字号:
/*************************************************************************/

/*									 */

/*      Evaluation of the subsetting of a discrete attribute		 */

/*      ----------------------------------------------------		 */

/*									 */

/*************************************************************************/





#include "buildex.i"





ItemCount

	*Slice1,	/* Slice1[c]    = saved values of Freq[x][c] in subset.c */

	*Slice2;	/* Slice2[c]    = saved values of Freq[y][c] */



Set

	**Subset;	/* Subset[a][s] = subset s for att a */



short

	*Subsets;	/* Subsets[a] = no. subsets for att a */







/*************************************************************************/

/*									 */

/*  Evaluate subsetting a discrete attribute and form the chosen	 */

/*  subsets Subset[Att][], setting Subsets[Att] to the number of	 */

/*  subsets, and the Info[] and Gain[] of a test on the attribute	 */

/*									 */

/*************************************************************************/





    EvalSubset(Att, Fp, Lp, Items)

/*  ----------  */ 

    Attribute Att;

    ItemNo Fp, Lp; 

    ItemCount Items;

{ 

    DiscrValue V1, V2, BestV1, BestV2, Barred;

    ItemCount KnownItems;

    ClassNo c;

    float BaseInfo, MinGain, ThisGain, ThisInfo,

	Val, BestVal, BestGain, BestInfo,

	PrevVal, PrevGain, PrevInfo,

	DiscrKnownBaseInfo(), Worth(), ComputeGain(), TotalInfo();

    short Blocks=0, MissingValues=0, ReasonableSubsets, Bytes, b;

    Boolean MergedSubsets = false;

    int SaveMINOBJS;



    SaveMINOBJS = MINOBJS;

    MINOBJS = 1;



    /*  First compute Freq[][], ValFreq[], base info, and the gain

	and total info of a split on discrete attribute Att  */



    ComputeFrequencies(Att, Fp, Lp);



    KnownItems = Items - ValFreq[0];

    if ( KnownItems < Epsilon )

    {

	Verbosity(2) printf("\tAtt %s: no known values\n", AttName[Att]);



	Gain[Att] = -Epsilon;

	Info[Att] = 0;

	return;

    }



    BaseInfo = DiscrKnownBaseInfo(KnownItems, MaxAttVal[Att]);



    PrevGain = ComputeGain(BaseInfo, UnknownRate[Att], MaxAttVal[Att],KnownItems);

    PrevInfo = TotalInfo(ValFreq, 0, MaxAttVal[Att]) / Items;

    PrevVal = Worth(PrevInfo, PrevGain, Epsilon);



    Verbosity(2)

    {

	printf("\tAtt %s", AttName[Att]);



	Verbosity(3) PrintDistribution(Att, MaxAttVal[Att], true);



	printf("\tinf %.3f, gain %.3f, val=%.3f\n",

		PrevInfo, PrevGain, PrevVal);

    }



    /*  Eliminate unrepresented attribute values from Freq[] and ValFreq[]

	and form a separate subset for each represented attribute value  */



    Bytes = (MaxAttVal[Att]>>3) + 1;

    ClearBits(Bytes, Subset[Att][0]);



    ForEach(V1, 1, MaxAttVal[Att])

    {

	if ( ValFreq[V1] > 0.5 )

	{

	    if ( ++Blocks < V1 )

	    {

		ValFreq[Blocks] = ValFreq[V1];

		ForEach(c, 0, MaxClass)

		{

		    Freq[Blocks][c] = Freq[V1][c];

		}

	    }

	    ClearBits(Bytes, Subset[Att][Blocks]);

	    SetBit(V1, Subset[Att][Blocks]);

	}

	else

	{

	    SetBit(V1, Subset[Att][0]);

	    MissingValues++;

	}

    }



    /*  Merge any single-class subsets with others of the same class  */

    /*  Note: have ValFreq[V] > 0 for all V  */



    ForEach(V1, 1, Blocks-1)

    {

	for ( c = 0 ; Freq[V1][c] < 0.1 ; c++ )

	    ;



	if ( Freq[V1][c] < ValFreq[V1] - 0.1 ) continue;



	/*  Now have a single class -- look for others  */



	for ( V2 = V1+1 ; V2 <= Blocks ; )

	{

	    if ( Freq[V2][c] < ValFreq[V2] - 0.1 )

	    {

		V2++;

	    }

	    else

	    {

		/*  Merge these subsets  */



		Combine(V1, V2, Blocks);



		ForEach(b, 0, Bytes-1)

		{

		    Subset[Att][V1][b] |= Subset[Att][V2][b];

		    Subset[Att][V2][b] = Subset[Att][Blocks][b];

		}



		Blocks--;

		MergedSubsets = true;

	    }

	}

    }



    if ( MergedSubsets )

    {

	PrevGain = ComputeGain(BaseInfo, UnknownRate[Att], Blocks, KnownItems);

	PrevInfo = TotalInfo(ValFreq, 0, Blocks) / Items;

	PrevVal = Worth(PrevInfo, PrevGain, Epsilon);



	Verbosity(2)

	{

	    printf("\tAfter merging single-class subsets:");



	    Verbosity(3) PrintDistribution(Att, Blocks, false);



	    printf("\tinf %.3f, gain %.3f, val=%.3f\n",

		    PrevInfo, PrevGain, PrevVal);

	}

    }



    /*  Examine possible pair mergers and hill-climb  */



    MinGain = PrevGain / 2;



    while ( Blocks > 2 )

    {

	BestVal = BestV1 = 0;

	BestGain = -Epsilon;



	/*  Check reasonable subsets; if less than 3, bar mergers

	    involving the largest block  */



	ReasonableSubsets = 0;

	Barred = 1;



	ForEach(V1, 1, Blocks)

	{

	    if ( ValFreq[V1] >= SaveMINOBJS ) ReasonableSubsets++;



	    if ( ValFreq[V1] > ValFreq[Barred] ) Barred = V1;

	}



	if ( ReasonableSubsets >= 3 ) Barred = 0;



	/*  For each possible pair of values, calculate the gain and

	    total info of a split in which they are treated as one.

	    Keep track of the pair with the best gain.  */



	ForEach(V1, 1, Blocks-1)

	{

	    ForEach(V2, V1+1, Blocks)

	    {

		if ( V1 == Barred || V2 == Barred ) continue;



		Combine(V1, V2, Blocks);



		ThisGain = ComputeGain(BaseInfo, UnknownRate[Att],

					Blocks-1, KnownItems);

		ThisInfo = TotalInfo(ValFreq, 0, Blocks-1) / Items;

		Val      = Worth(ThisInfo, ThisGain, Epsilon);



		Verbosity(4)

		{

		    printf("\tcombine %d %d info %.3f gain %.3f val %.3f",

		           V1, V2, ThisInfo, ThisGain, Val);

		    PrintDistribution(Att, Blocks-1, false);

		}



		/*  Force a split if

			less than two reasonable subsets, or

			using GAIN criterion

		    Prefer this split to the previous one if

			gain >= MinGain (and previous < MinGain), or

			val >= previous best val  */



		if ( ThisGain >= MinGain && BestGain < MinGain ||

		     Val >= BestVal ||

		     ! BestV1 && ( ! GAINRATIO || ReasonableSubsets < 2 ) )

		{

		    BestVal  = Val;

		    BestGain = ThisGain;

		    BestInfo = ThisInfo;

		    BestV1   = V1;

		    BestV2   = V2;

		}



		Uncombine(V1, V2);

	    }

	}



	if ( GAINRATIO &&

	     ReasonableSubsets >= 2 &&

	     ( ! BestV1 ||

	       BestVal < PrevVal + 1E-5 ||

	       BestVal == PrevVal && BestGain < PrevGain ) ) break;



	PrevGain = BestGain;

	PrevInfo = BestInfo;

        PrevVal = BestVal;



	Combine(BestV1, BestV2, Blocks);



	ForEach(b, 0, Bytes-1)

	{

	    Subset[Att][BestV1][b] |= Subset[Att][BestV2][b];

	    Subset[Att][BestV2][b] = Subset[Att][Blocks][b];

	}



	Blocks--;



	Verbosity(2)

	{

	    printf("\t\tform subset ");

	    PrintSubset(Att, Subset[Att][BestV1]);

	    printf(": %d subsets, inf %.3f, gain %.3f, val %.3f\n",

		   Blocks, BestInfo, BestGain, BestVal);

	    Verbosity(3)

	    {

		printf("\t\tcombine %d, %d", BestV1, BestV2);

		PrintDistribution(Att, Blocks, false);

	    }

	}

    }



    MINOBJS = SaveMINOBJS;



    if ( PrevVal <= 0 )

    {

	Gain[Att] = -Epsilon;

	Info[Att] = 0;

    }

    else

    {

	Gain[Att] = ComputeGain(BaseInfo, UnknownRate[Att], Blocks, KnownItems);

	Info[Att] = PrevInfo;



	if ( MissingValues )

	{

	    Blocks++;

	    CopyBits(Bytes, Subset[Att][0], Subset[Att][Blocks]);

	}



	Subsets[Att] = Blocks;



	Verbosity(2) printf("\tFinal subsets:");

	Verbosity(3) PrintDistribution(Att, Blocks, false);

	Verbosity(2)

	    printf("\tinf %.3f gain %.3f val %.3f\n", 

		   Info[Att], Gain[Att], Worth(Info[Att], Gain[Att], Epsilon));

    }

}







/*************************************************************************/

/*									 */

/*  Combine the distribution figures of discrete attribute values	 */

/*  x and y, putting the combined figures in Freq[x][] and		 */

/*  ValFreq[x][], and saving old values in Slice1 and Slice2		 */

/*									 */

/*************************************************************************/





    Combine(x, y, Last)

/*  -------  */

    DiscrValue x, y, Last;

{

    ClassNo c;



    ForEach(c, 0, MaxClass)

    {

	Slice1[c] = Freq[x][c];

	Slice2[c] = Freq[y][c];



	Freq[x][c] += Freq[y][c];

	Freq[y][c]  = Freq[Last][c];

    }



    Slice1[MaxClass+1] = ValFreq[x];

    Slice2[MaxClass+1] = ValFreq[y];



    ValFreq[x] += ValFreq[y];

    ValFreq[y]  = ValFreq[Last];

}







/*************************************************************************/

/*									 */

/*  Restore old class distribution figures of discrete attribute	 */

/*  values x and y from Slice1 and Slice2				 */

/*									 */

/*************************************************************************/





    Uncombine(x, y)

/*  ---------  */

    DiscrValue x, y;

{

    ClassNo c;



    ForEach(c, 0, MaxClass)

    {

	Freq[x][c] = Slice1[c];

	Freq[y][c] = Slice2[c];

    }



    ValFreq[x] = Slice1[MaxClass+1];

    ValFreq[y] = Slice2[MaxClass+1];

}







/*************************************************************************/

/*									 */

/*  Print the values of attribute Att which are in the subset Ss	 */

/*									 */

/*************************************************************************/





    PrintSubset(Att, Ss)

/*  -----------  */

    Attribute Att;

    Set Ss;

{

    DiscrValue V1;

    Boolean First=true;



    ForEach(V1, 1, MaxAttVal[Att])

    {

	if ( In(V1, Ss) )

	{

	    if ( First )

	    {

		First = false;

	    }

	    else

	    {

		printf(", ");

	    }



	    printf("%s", AttValName[Att][V1]);

	}

    }

}







/*************************************************************************/

/*									 */

/*  Construct and return a node for a test on a subset of values	 */

/*									 */

/*************************************************************************/





    SubsetTest(Node, Att)

/*  -----------  */

    Tree Node;

    Attribute Att;

{ 

    ItemCount CountItems();

    short S, Bytes;



    Sprout(Node, Subsets[Att]);



    Node->NodeType	= BrSubset;

    Node->Tested	= Att;

    Node->Errors	= 0;

    

    Bytes = (MaxAttVal[Att]>>3) + 1;

    Node->Subset = (Set *) calloc(Subsets[Att] + 1, sizeof(Set));

    ForEach(S, 1, Node->Forks)

    {

	Node->Subset[S] = (Set) malloc(Bytes);

	CopyBits(Bytes, Subset[Att][S], Node->Subset[S]);

    }

} 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -