⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 garff.cpp

📁 一个非常有用的开源代码
💻 CPP
📖 第 1 页 / 共 3 页
字号:
	{		pRow = GetVector(n);		pRow[nAttribute] -= dInputMin;		pRow[nAttribute] *= dScale;		pRow[nAttribute] += dOutputMin;	}}/*static*/ double GArffData::Normalize(double dVal, double dInputMin, double dInputRange, double dOutputMin, double dOutputRange){	GAssert(dInputRange > 0, "divide by zero");	dVal -= dInputMin;	dVal /= dInputRange;	dVal *= dOutputRange;	dVal += dOutputMin;	return dVal;}double* GArffData::MakeSetOfMostCommonOutputs(GArffRelation* pRelation){	int nOutputs = pRelation->GetOutputCount();	double* pOutputs = new double[nOutputs];	double* pRow;	int nVal;	int nIndex;	int n;	for(n = 0; n < nOutputs; n++)	{		nIndex = pRelation->GetOutputIndex(n);		GArffAttribute* pAttr = pRelation->GetAttribute(nIndex);		if(pAttr->IsContinuous())		{			// Find the mean output value			int i;			int nCount = GetSize();			double dSum = 0;			for(i = 0; i < nCount; i++)			{				pRow = GetVector(i);				dSum += pRow[n];			}			pOutputs[n] = dSum / nCount;		}		else		{			// Init the counts to zero			int nCount = pAttr->GetValueCount();			Holder<int*> hCounts(new int[nCount]);			int* pCounts = hCounts.Get();			memset(pCounts, '\0', sizeof(int) * nCount);			// Count occurrences of each output value			int i;			nCount = GetSize();			for(i = 0; i < nCount; i++)			{				pRow = GetVector(i);				nVal = (int)pRow[nIndex];				if(nVal < 0)				{					GAssert(nVal == -1, "out of range");					continue;				}				pCounts[nVal]++;			}			// Find the most common output value			nCount = pAttr->GetValueCount();						int nMaxCount = pCounts[0];			int nBestValue = 0;			for(i = 1; i < nCount; i++)			{				if(pCounts[i] > nMaxCount)				{					nBestValue = i;					nMaxCount = pCounts[i];				}			}			// Set the value			pOutputs[n] = (double)nBestValue;		}	}	return pOutputs;}bool GArffData::IsOutputHomogenous(GArffRelation* pRelation){	int nRowCount = GetSize();	if(nRowCount <= 0)		return true;	int nOutputs = pRelation->GetOutputCount();	int n, i, nIndex, nVal, nTmp;	double* pRow;	double dVal;	for(i = 0; i < nOutputs; i++)	{		nIndex = pRelation->GetOutputIndex(i);		GArffAttribute* pAttr = pRelation->GetAttribute(nIndex);		if(pAttr->IsContinuous())		{			pRow = GetVector(0);			dVal = pRow[nIndex];			for(n = 1; n < nRowCount; n++)			{				pRow = GetVector(n);				if(pRow[nIndex] != dVal)					return false;			}		}		else		{			for(n = 0; n < nRowCount; n++)			{				pRow = GetVector(n);				nVal = (int)pRow[nIndex];				if(nVal >= 0)				{					n++;					break;				}			}			for( ; n < nRowCount; n++)			{				pRow = GetVector(n);				nTmp = (int)pRow[nIndex];				if(nTmp != nVal && nTmp >= 0)					return false;			}		}	}	return true;}void GArffData::RandomlyReplaceMissingData(GArffRelation* pRelation){	int n, i, j;	int nRowCount = GetSize();	int nAttrCount = pRelation->GetAttributeCount();	int nMaxValues = 0;	int nValues;	int nVal;	int nSum;	int nRand;	int* pCounts = NULL;	double* pRow;	GArffAttribute* pAttr;	for(i = 0; i < nAttrCount; i++)	{		// Make a buffer to hold the counts		pAttr = pRelation->GetAttribute(i);		if(pAttr->IsContinuous())			continue;		nValues = pAttr->GetValueCount();		if(nValues > nMaxValues)		{			delete(pCounts);			nMaxValues = pAttr->GetValueCount() + 3;			pCounts = new int[nMaxValues];		}		// Count the number of each value		memset(pCounts, '\0', sizeof(int) * nValues);		for(n = 0; n < nRowCount; n++)		{			nVal = (int)GetVector(n)[i];			if(nVal >= 0)			{				GAssert(nVal < nValues, "out of range");				pCounts[nVal]++;			}			else			{				GAssert(nVal == -1, "out of range");			}		}		// Sum the value counts		nSum = 0;		for(n = 0; n < nValues; n++)			nSum += pCounts[n];		// Replace the missing values		for(n = 0; n < nRowCount; n++)		{			pRow = GetVector(n);			nVal = (int)pRow[i];			if(nVal < 0)			{				nRand = (int)(GBits::GetRandomUint() % nSum);				for(j = 0; ; j++)				{					GAssert(j < nValues, "internal inconsistency");					nRand -= pCounts[j];					if(nRand < 0)					{						pRow[i] = (double)j;						break;					}				}			}		}	}}void GArffData::ReplaceMissingAttributeWithMostCommonValue(GArffRelation* pRelation, int nAttribute){	GArffAttribute* pAttr = pRelation->GetAttribute(nAttribute);	if(pAttr->IsContinuous())		return; // missing values are currently only supported for discreet values	int nValues = pAttr->GetValueCount();	GTEMPBUF(int, pCounts, nValues);	memset(pCounts, '\0', sizeof(int) * nValues);	double* pRow;	int nRowCount = GetSize();	int n, nVal;	for(n = 0; n < nRowCount; n++)	{		pRow = GetVector(n);		nVal = (int)pRow[nAttribute];		if(nVal < 0)			continue;		GAssert(nVal < nValues, "out of range");		pCounts[nVal]++;	}	int nBest = 0;	for(n = 1; n < nValues; n++)	{		if(pCounts[n] > pCounts[nBest])			nBest = n;	}	for(n = 0; n < nRowCount; n++)	{		pRow = GetVector(n);		nVal = (int)pRow[nAttribute];		if(nVal < 0)		{			pRow[nAttribute] = (double)nBest;		}	}}void GArffData::Print(int nAttributes){	int nRows = GetSize();	double* pRow;	int n, i;	for(n = 0; n < nRows; n++)	{		pRow = GetVector(n);		printf("%f", pRow[0]);		for(i = 1; i < nAttributes; i++)			printf("\t%f", pRow[i]);		printf("\n");	}}int ComputeMinimumVariancePivotComparer(void* pThis, void* pA, void* pB){	int nAttr = (int)pThis;	double* pdA = (double*)pA;	double* pdB = (double*)pB;	if(pdA[nAttr] > pdB[nAttr])		return 1;	else		return -1;}double GArffData::ComputeMinimumVariancePivot(int nAttr){	int nRows = GetSize();	GPointerArray arr(nRows);	int n;	for(n = 0; n < nRows; n++)		arr.AddPointer(GetVector(n));	arr.Sort(ComputeMinimumVariancePivotComparer, (void*)nAttr);	double dBestPivotScore = 1e100;	double dBestPivot = 0;	double dPivot, d;	double* pRow1;	double* pRow2;	double dMean1, dMean2, dVar1, dVar2;	int nCount1, nCount2, i;	for(n = nRows - 2; n >= 0; n--)	{		// Try a pivot		pRow1 = (double*)arr.GetPointer(n);		pRow2 = (double*)arr.GetPointer(n + 1);		dPivot = (pRow1[nAttr] + pRow2[nAttr]) / 2;		// Compute the mean of each half		dMean1 = 0;		dMean2 = 0;		nCount1 = 0;		nCount2 = 0;		for(i = 0; i < nRows; i++)		{			pRow1 = GetVector(i);			if(pRow1[nAttr] < dPivot)			{				nCount1++;				dMean1 += pRow1[nAttr];			}			else			{				nCount2++;				dMean2 += pRow1[nAttr];			}		}		dMean1 /= nCount1;		dMean2 /= nCount2;		// Compute the variance of each half		dVar1 = 0;		dVar2 = 0;		for(i = 0; i < nRows; i++)		{			pRow1 = GetVector(i);			if(pRow1[nAttr] < dPivot)			{				d = pRow1[nAttr] - dMean1;				dVar1 += (d * d);			}			else			{				d = pRow2[nAttr] - dMean2;				dVar2 += (d * d);			}		}		dVar1 /= nCount1;		dVar2 /= nCount2;		d = dVar1 + dVar2;				// See if we've got a new best score		if(d < dBestPivotScore)		{			dBestPivotScore = d;			dBestPivot = dPivot;		}	}	return dBestPivot;}double GArffData::ComputeMinimumInfoPivot(GArffRelation* pRelation, int nAttr, double* pOutputInfo){	int nRows = GetSize();	GPointerArray arr(nRows);	int n;	for(n = 0; n < nRows; n++)		arr.AddPointer(GetVector(n));	arr.Sort(ComputeMinimumVariancePivotComparer, (void*)nAttr);	double dBestPivotScore = 1e100;	double dBestPivot = 0;	double dPivot, d;	double* pRow1;	double* pRow2;	for(n = nRows - 2; n >= 0; n--)	{		// Try a pivot		pRow1 = (double*)arr.GetPointer(n);		pRow2 = (double*)arr.GetPointer(n + 1);		dPivot = (pRow1[nAttr] + pRow2[nAttr]) / 2;		// Split at the pivot and measure the sum info		GArffData* pData2 = SplitByPivot(nAttr, dPivot);		d = pRelation->MeasureTotalOutputInfo(this) + pRelation->MeasureTotalOutputInfo(pData2);		Merge(pData2);		delete(pData2);		// See if we've got a new best score		if(d < dBestPivotScore)		{			dBestPivotScore = d;			dBestPivot = dPivot;		}	}	*pOutputInfo = dBestPivotScore;	return dBestPivot;}void GArffData::ComputeCovarianceMatrix(GMatrix* pOutMatrix, GArffRelation* pRelation){	// Resize the matrix	int nInputs = pRelation->GetInputCount();	pOutMatrix->Resize(nInputs, nInputs);	// Compute the deviations	Holder<double*> hMeans(new double[nInputs]);	double* pMeans = hMeans.Get();	int nRowCount = GetSize();	double* pRow;	int n, i, j, nIndex;	for(i = 0; i < nInputs; i++)	{		nIndex = pRelation->GetInputIndex(i);		// Compute the mean		double dSum = 0;		for(n = 0; n < nRowCount; n++)		{			pRow = GetVector(n);			dSum += pRow[nIndex];		}		pMeans[i] = dSum / nRowCount;	}	// Compute the covariances for half the matrix	for(i = 0; i < nInputs; i++)	{		for(n = i; n < nInputs; n++)		{			double dSum = 0;			for(j = 0; j < nRowCount; j++)			{				pRow = GetVector(j);				dSum += ((pRow[i] - pMeans[i]) * (pRow[n] - pMeans[n]));			}			pOutMatrix->Set(i, n, dSum / (nRowCount - 1));		}	}	// Fill out the other half of the matrix	for(i = 1; i < nInputs; i++)	{		for(n = 0; n < i; n++)			pOutMatrix->Set(i, n, pOutMatrix->Get(n, i));	}}void GArffData::ComputeCoprobabilityMatrix(GMatrix* pOutMatrix, GArffRelation* pRelation, int nAttr, double noDataValue){	// Resize the matrix	GArffAttribute* pAttr = pRelation->GetAttribute(nAttr);	int nRows = pAttr->GetValueCount();	int nAttributes = pRelation->GetAttributeCount();	int nCols = 0;	int i;	for(i = 0; i < nAttributes; i++)	{		GArffAttribute* pAttrCol = pRelation->GetAttribute(i);		nCols += pAttrCol->GetValueCount();	}	pOutMatrix->Resize(nRows, nCols);	// Compute the coprobabilities	int nRowCount = GetSize();	int row, col, nMatch, nTotal, nAttrCol, nVal;	double* pRow;	for(row = 0; row < nRows; row++)	{		col = 0;		for(nAttrCol = 0; nAttrCol < nAttributes; nAttrCol++)		{			GArffAttribute* pAttrCol = pRelation->GetAttribute(nAttrCol);			for(nVal = 0; nVal < pAttrCol->GetValueCount(); nVal++)			{				nMatch = 0;				nTotal = 0;				for(i = 0; i < nRowCount; i++)				{					pRow = GetVector(i);					if((int)pRow[nAttrCol] == nVal)					{						nTotal++;						if((int)pRow[nAttr] == row)							nMatch++;					}				}				if(nTotal == 0)					pOutMatrix->Set(row, col, noDataValue);				else					pOutMatrix->Set(row, col, (double)nMatch / nTotal);				col++;			}		}		GAssert(col == nCols, "problem with columns");	}}int DimensionComparer(void* pThis, void* pA, void* pB){	int nDim = *(int*)pThis;	if(((double*)pA)[nDim] < ((double*)pB)[nDim])		return -1;	else if(((double*)pA)[nDim] > ((double*)pB)[nDim])		return 1;	else		return 0;}void GArffData::Sort(int nDimension){	GPointerArray::Sort(DimensionComparer, &nDimension);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -