⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 garff.cpp

📁 一个非常有用的开源代码
💻 CPP
📖 第 1 页 / 共 3 页
字号:
		{			if(pRow2[nIndex] != pRow1[nIndex])				dSum += pInputScales[n];		}	}	return dSum;}// ------------------------------------------------------------------GArffAttribute::GArffAttribute(){	m_szName = NULL;	m_nValues = 0;	m_szValues = NULL;	m_bIsInput = true;}GArffAttribute::GArffAttribute(bool bIsInput, int nValues, const char** szValues){	m_szName = NULL;	m_nValues = nValues;	if(nValues == 0)		m_szValues = NULL;	else	{		if(szValues)		{			m_szValues = new char*[nValues];			int n;			for(n = 0; n < nValues; n++)			{				m_szValues[n] = new char[strlen(szValues[n]) + 1];				strcpy(m_szValues[n], szValues[n]);			}		}		else			m_szValues = NULL;	}	m_bIsInput = bIsInput;}GArffAttribute::~GArffAttribute(){	delete(m_szName);	if(m_szValues)	{		int n;		for(n = 0; n < m_nValues; n++)			delete(m_szValues[n]);		delete(m_szValues);	}}GArffAttribute* GArffAttribute::NewCopy(){	return new GArffAttribute(m_bIsInput, m_nValues, (const char**)m_szValues);}/*static*/ GArffAttribute* GArffAttribute::Parse(const char* szFile, int nLen){	// Eat whitespace	while(nLen > 0 && *szFile <= ' ')	{		if(*szFile == '\n')			return NULL;		szFile++;		nLen--;	}	if(nLen < 1)		return NULL;	// Parse the name	Holder<GArffAttribute*> hAttr(new GArffAttribute());	GArffAttribute* pAttr = hAttr.Get();	int nQuotes = 0;	if(szFile[0] == '\'' || szFile[0] == '"')		nQuotes = 1;	int nPos = 1;	for( ; nPos < nLen && (szFile[nPos] > ' ' || nQuotes > 0); nPos++)	{		if(szFile[nPos] == '\'' || szFile[nPos] == '"')			nQuotes--;	}	pAttr->m_szName = new char[nPos + 1];	memcpy(pAttr->m_szName, szFile, nPos);	pAttr->m_szName[nPos] = '\0';	// Eat whitespace	while(nPos < nLen && szFile[nPos] <= ' ')	{		if(szFile[nPos] == '\n')			return NULL;		nPos++;	}	if(nPos >= nLen)		return NULL;	// Check for CONTINUOUS	if(nLen - nPos >= 10 && strnicmp(&szFile[nPos], "CONTINUOUS", 10) == 0)		return hAttr.Drop();	if(nLen - nPos >= 7 && strnicmp(&szFile[nPos], "NUMERIC", 7) == 0)		return hAttr.Drop();	if(nLen - nPos >= 4 && strnicmp(&szFile[nPos], "REAL", 4) == 0)		return hAttr.Drop();	// Parse the values	if(szFile[nPos] != '{')		return NULL;	nPos++;	// Count the values	int nCount = 1;	int n;	for(n = nPos; szFile[n] != '{' && szFile[n] != '\n' && n < nLen; n++)	{		if(szFile[n] == ',')			nCount++;	}	// Parse the values	pAttr->m_szValues = new char*[nCount];	pAttr->m_nValues = nCount;	int nValue = 0;	for(n = nPos; szFile[n] != '}' && szFile[n] != '\n' && n < nLen; n++)	{		if(szFile[n] == ',')		{			int nStart = nPos;			int nEnd = n;			while(nStart < nEnd && szFile[nStart] <= ' ')				nStart++;			while(nStart < nEnd && szFile[nEnd - 1] <= ' ')				nEnd--;			pAttr->m_szValues[nValue] = new char[nEnd - nStart + 1];			memcpy(pAttr->m_szValues[nValue], &szFile[nStart], nEnd - nStart);			(pAttr->m_szValues[nValue])[nEnd - nStart] = '\0';			nPos = n + 1;			nValue++;		}	}	int nStart = nPos;	int nEnd = n;	while(nStart < nEnd && szFile[nStart] <= ' ')		nStart++;	while(nStart < nEnd && szFile[nEnd - 1] <= ' ')		nEnd--;	pAttr->m_szValues[nValue] = new char[nEnd - nStart + 1];	memcpy(pAttr->m_szValues[nValue], &szFile[nStart], nEnd - nStart);	(pAttr->m_szValues[nValue])[nEnd - nStart] = '\0';	if(szFile[n] != '}')		return NULL;	return hAttr.Drop();}void GArffAttribute::SetContinuous(){	if(m_szValues)	{		int n;		for(n = 0; n < m_nValues; n++)			delete(m_szValues[n]);		delete(m_szValues);	}	m_szValues = NULL;	m_nValues = 0;}int GArffAttribute::GetValueCount(){	return m_nValues;}const char* GArffAttribute::GetValue(int n){	if(n < 0)	{		GAssert(n == -1, "out of range");		return "<?>";	}	GAssert(n < m_nValues, "out of range");	return m_szValues[n];}int GArffAttribute::FindEnumeratedValue(const char* szValue){	GAssert(!IsContinuous(), "Not an enumerated attribute");	int n;	for(n = 0; n < m_nValues; n++)	{		if(strcmp(m_szValues[n], szValue) == 0)			return n;	}	return -1;}// ------------------------------------------------------------------GArffData::GArffData(int nGrowSize): GPointerArray(nGrowSize){}GArffData::~GArffData(){	int nCount = GetSize();	int n;	for(n = 0; n < nCount; n++)		delete[] (double*)GetPointer(n);}void GArffData::CopyVector(double* pVector, int nAttributeCount){	double* pNewVector = new double[nAttributeCount];	memcpy(pNewVector, pVector, sizeof(double) * nAttributeCount);	AddVector(pNewVector);}double* GArffData::DropVector(int nIndex){	int nCount = GetSize();	double* pVector = GetVector(nIndex);	SetPointer(nIndex, GetPointer(nCount - 1));	DeleteCell(nCount - 1);	return pVector;}void GArffData::DropAllVectors(){	Clear();}void GArffData::Shuffle(){	// Swap every row with a randomely selected row	int nCount = GetSize();	int n, r;	void* pTemp;	for(n = nCount - 1; n > 0; n--)	{		r = rand() % n;		pTemp = GetPointer(r);		SetPointer(r, GetPointer(n));		SetPointer(n, pTemp);	}}double GArffData::MeasureEntropy(GArffRelation* pRelation, int nColumn){	// Count the number of occurrences of each value	GArffAttribute* pAttr = pRelation->GetAttribute(nColumn);	GAssert(!pAttr->IsInput(), "Expected an output");	GAssert(!pAttr->IsContinuous(), "MeasureEntropy doesn't work with continuous attributes");	int nPossibleValues = pAttr->GetValueCount();	GTEMPBUF(int, pnCounts, nPossibleValues);	int nTotalCount = 0;	memset(pnCounts, '\0', pAttr->GetValueCount() * sizeof(int));	int n;	int nRows = GetSize();	for(n = 0; n < nRows; n++)	{		int nValue = (int)GetVector(n)[nColumn];		if(nValue < 0)		{			GAssert(nValue == -1, "out of range");			continue;		}		GAssert(nValue < nPossibleValues, "value out of range");		pnCounts[nValue]++;		nTotalCount++;	}	if(nTotalCount == 0)		return 0;	// Total up the entropy	double dLog2 = log((double)2);	double dEntropy = 0;	double dRatio;	for(n = 0; n < nPossibleValues; n++)	{		if(pnCounts[n] > 0)		{			dRatio = (double)pnCounts[n] / nTotalCount;			dEntropy -= (dRatio * log(dRatio) / dLog2);		}	}	return dEntropy;}GArffData* GArffData::SplitByPivot(int nColumn, double dPivot){	GArffData* pNewSet = new GArffData(MAX(8, GetSize()));	double* pRow;	int n;	for(n = 0; n < GetSize(); n++)	{		pRow = GetVector(n);		if(pRow[nColumn] <= dPivot)		{			pNewSet->AddVector(DropVector(n));			n--;		}	}	return pNewSet;}int DoubleRefComparer(void* pThis, void* pA, void* pB){	if(*(double*)pA > *(double*)pB)		return 1;	if(*(double*)pA < *(double*)pB)		return -1;	return 0;}GArffData** GArffData::SplitByAttribute(GArffRelation* pRelation, int nAttribute){	GArffAttribute* pAttr = pRelation->GetAttribute(nAttribute);	GAssert(pAttr->IsInput(), "Expected an input");	int nCount = pAttr->GetValueCount();	GAssert(nCount > 0, "Only discreet values are supported");	GArffData** ppParts = new GArffData*[nCount];	int n;	for(n = 0; n < nCount; n++)		ppParts[n] = SplitByPivot(nAttribute, (double)n);	GAssert(GetSize() == 0, "some data out of range");	return ppParts;}GArffData* GArffData::SplitBySize(int nRows){	GAssert(nRows >= 0 && nRows <= GetSize(), "out of range");	GArffData* pNewSet = new GArffData(MAX(8, GetSize() - nRows));	while(GetSize() > nRows)		pNewSet->AddVector(DropVector(nRows));	return pNewSet;}void GArffData::Merge(GArffData* pData){	while(pData->GetSize() > 0)		AddVector(pData->DropVector(0));}void GArffData::DiscretizeNonContinuousOutputs(GArffRelation* pRelation){	int nOutputs = pRelation->GetOutputCount();	int n, nIndex, i, nValueCount, nVal;	int nRowCount = GetSize();	double* pRow;	for(n = 0; n < nOutputs; n++)	{		nIndex = pRelation->GetOutputIndex(n);		GArffAttribute* pAttr = pRelation->GetAttribute(nIndex);		if(pAttr->IsContinuous())			continue;		nValueCount = pAttr->GetValueCount();		for(i = 0; i < nRowCount; i++)		{			pRow = GetVector(i);			nVal = (int)(pRow[nIndex] - .5);			if(nVal < 0)				nVal = 0;			else if(nVal >= nValueCount)				nVal = nValueCount - 1;			pRow[nIndex] = (double)nVal;		}	}}double GArffData::ComputeMean(int nAttribute){	double dMean = 0;	int nRowCount = GetSize();	double* pRow;	int i;	for(i = 0; i < nRowCount; i++)	{		pRow = GetVector(i);		dMean += pRow[nAttribute];	}	return dMean / nRowCount;}void GArffData::GetMeans(double* pOutMeans, int nAttributes){	int n;	for(n = 0; n < nAttributes; n++)		pOutMeans[n] = 0;	int nRowCount = GetSize();	double* pRow;	int i;	for(i = 0; i < nRowCount; i++)	{		pRow = GetVector(i);		for(n = 0; n < nAttributes; n++)			pOutMeans[n] += pRow[n];	}	for(n = 0; n < nAttributes; n++)		pOutMeans[n] /= nRowCount;}double GArffData::ComputeVariance(double dMean, int nAttribute){	double dVariance = 0;	double* pRow;	double d;	int i;	int nRowCount = GetSize();	for(i = 0; i < nRowCount; i++)	{		pRow = GetVector(i);		d = pRow[nAttribute] - dMean;		dVariance += (d * d);	}	return dVariance / nRowCount;}void GArffData::GetVariance(double* pOutVariance, double* pMeans, int nAttributes){	int n;	for(n = 0; n < nAttributes; n++)		pOutVariance[n] = 0;	int nRowCount = GetSize();	double* pRow;	int i;	for(i = 0; i < nRowCount; i++)	{		pRow = GetVector(i);		for(n = 0; n < nAttributes; n++)			pOutVariance[n] += ((pRow[n] - pMeans[n]) * (pRow[n] - pMeans[n]));	}	for(n = 0; n < nAttributes; n++)		pOutVariance[n] /= nRowCount;}int GArffData::RemoveOutlyers(double dStandardDeviations, int nAttributes){	int nOutlyers = 0;	GTEMPBUF(double, pMeans, nAttributes);	GTEMPBUF(double, pVariance, nAttributes);	GetMeans(pMeans, nAttributes);	GetVariance(pVariance, pMeans, nAttributes);	int n, i;	for(n = 0; n < nAttributes; n++)		pVariance[n] = sqrt(pVariance[n]);	double* pRow;	int nRowCount = GetSize();	for(i = nRowCount - 1; i >= 0; i--)	{		pRow = GetVector(i);		for(n = 0; n < nAttributes; n++)		{			if(ABS(pRow[n] - pMeans[n]) > dStandardDeviations * pVariance[n])			{				delete(DropVector(i));				nOutlyers++;				break;			}		}	}	return nOutlyers;}void GArffData::GetMinAndRange(int nAttribute, double* pMin, double* pRange){	int nCount = GetSize();	GAssert(nCount > 0, "No data");	double* pRow = GetVector(0);	double dMin = pRow[nAttribute];	double dMax = dMin;	int n;	for(n = 1; n < nCount; n++)	{		pRow = GetVector(n);		if(pRow[nAttribute] < dMin)			dMin = pRow[nAttribute];		if(pRow[nAttribute] > dMax)			dMax = pRow[nAttribute];	}	*pMin = dMin;	*pRange = dMax - dMin;}void GArffData::Normalize(int nAttribute, double dInputMin, double dInputRange, double dOutputMin, double dOutputRange){	GAssert(dInputRange > 0, "divide by zero");	int nCount = GetSize();	double* pRow;	double dScale = dOutputRange / dInputRange;	int n;	for(n = 0; n < nCount; n++)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -