⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 garff.cpp

📁 一个非常有用的开源代码
💻 CPP
📖 第 1 页 / 共 3 页
字号:
/*	Copyright (C) 2006, Mike Gashler	This library is free software; you can redistribute it and/or	modify it under the terms of the GNU Lesser General Public	License as published by the Free Software Foundation; either	version 2.1 of the License, or (at your option) any later version.	see http://www.gnu.org/copyleft/lesser.html*/#include "GArff.h"#include "../GClasses/GArray.h"#include "../GClasses/GMacros.h"#include "../GClasses/GMath.h"#include <math.h>#include "GBits.h"#include "GMatrix.h"GArffRelation::GArffRelation(){	m_szName = NULL;	m_pAttributes = new GPointerArray(32);	m_nInputCount = -1;	m_pInputIndexes = NULL;	m_nOutputCount = -1;	m_pOutputIndexes = NULL;}GArffRelation::~GArffRelation(){	int n;	int nCount;	nCount = m_pAttributes->GetSize();	for(n = 0; n < nCount; n++)		delete((GArffAttribute*)m_pAttributes->GetPointer(n));	delete(m_pAttributes);	delete[] m_szName;	delete[] m_pInputIndexes;	delete[] m_pOutputIndexes;}void GArffRelation::AddAttribute(GArffAttribute* pAttr){	m_pAttributes->AddPointer(pAttr);}GArffRelation* ParseError(int nLine, const char* szProblem){	GAssert(false, szProblem);	return NULL;}int GArffRelation::CountContinuousAttributes(){	int n;	int nAttributes = GetAttributeCount();	int nCount = 0;	for(n = 0; n < nAttributes; n++)	{		GArffAttribute* pAttr = GetAttribute(n);		if(pAttr->IsContinuous())			nCount++;	}	return nCount;}void GArffRelation::SaveArffFile(GArffData* pData, const char* szFilename){	// Open the file for writing	FILE* pFile = fopen(szFilename, "w");	FileHolder hFile(pFile);	if(!pFile)		ThrowError(L"Failed to open file: %s", szFilename);	// Write the relation title	fputs("@RELATION ", pFile);	const char* szName = GetName();	if(!szName)		szName = "Untitled";	fputs(szName, pFile);	fputs("\n\n", pFile);	// Write the attributes	char szTmp[64];	int i, j;	for(i = 0; i < GetAttributeCount(); i++)	{		GArffAttribute* pAttr = GetAttribute(i);		fputs("@ATTRIBUTE ", pFile);		szName = pAttr->GetName();		if(!szName)		{			strcpy(szTmp, "a");			itoa(i, szTmp, 10);			szName = szTmp;		}		fputs(szName, pFile);		fputs("\t", pFile);		if(pAttr->IsContinuous())			fputs("CONTINUOUS", pFile);		else		{			fputs("{", pFile);			for(j = 0; j < pAttr->GetValueCount(); j++)			{				szName = pAttr->GetValue(j);				if(!szName)				{					strcpy(szTmp, "v");					itoa(j, szTmp, 10);					szName = szTmp;				}				fputs(szName, pFile);				fputs(",", pFile);			}			fputs("}", pFile);		}		fputs("\n", pFile);	}	// Write the data	fputs("\n@DATA\n", pFile);	for(i = 0; i < pData->GetSize(); i++)	{		double* pVector = pData->GetVector(i);		for(j = 0; j < GetAttributeCount(); j++)		{			if(j > 0)				fputs(",", pFile);			GArffAttribute* pAttr = GetAttribute(j);			if(pAttr->IsContinuous())			{				GBits::DoubleToString(szTmp, pVector[j]);				fputs(szTmp, pFile);			}			else			{				szName = pAttr->GetValue((int)pVector[j]);				if(!szName)				{					strcpy(szTmp, "v");					itoa(j, szTmp, 10);					szName = szTmp;				}				fputs(szName, pFile);			}		}		fputs("\n", pFile);	}}/*static*/ GArffRelation* GArffRelation::ParseFile(GArffData** ppOutData, const char* szFile, int nLen){	// Parse the relation name	int nPos = 0;	int nLine = 1;	Holder<GArffRelation*> hRelation(new GArffRelation());	GArffRelation* pRelation = hRelation.Get();	while(true)	{		// Skip Whitespace		while(nPos < nLen && szFile[nPos] <= ' ')		{			if(szFile[nPos] == '\n')				nLine++;			nPos++;		}		if(nPos >= nLen)			return ParseError(nLine, "Expected @RELATION");		// Check for comments		if(szFile[nPos] == '%')		{			for(nPos++; szFile[nPos] != '\n' && nPos < nLen; nPos++)			{			}			continue;		}		// Parse Relation		if(nLen - nPos < 9 || strnicmp(&szFile[nPos], "@RELATION", 9) != 0)			return ParseError(nLine, "Expected @RELATION");		nPos += 9;		// Skip Whitespace		while(szFile[nPos] <= ' ' && nPos < nLen)		{			if(szFile[nPos] == '\n')				nLine++;			nPos++;		}		if(nPos >= nLen)			return ParseError(nLine, "Expected relation name");		// Parse Name		int nNameStart = nPos;		while(szFile[nPos] > ' ' && nPos < nLen)			nPos++;		pRelation->m_szName = new char[nPos - nNameStart + 1];		memcpy(pRelation->m_szName, &szFile[nNameStart], nPos - nNameStart);		pRelation->m_szName[nPos - nNameStart] = '\0';		break;	}	// Parse the attribute section	while(true)	{		// Skip Whitespace		while(nPos < nLen && szFile[nPos] <= ' ')		{			if(szFile[nPos] == '\n')				nLine++;			nPos++;		}		if(nPos >= nLen)			return ParseError(nLine, "Expected @ATTRIBUTE or @DATA");		// Check for comments		if(szFile[nPos] == '%')		{			for(nPos++; szFile[nPos] != '\n' && nPos < nLen; nPos++)			{			}			continue;		}		// Check for @DATA		if(nLen - nPos < 5) // 10 = strlen("@DATA")			return ParseError(nLine, "Expected @DATA");		if(strnicmp(&szFile[nPos], "@DATA", 5) == 0)		{			nPos += 5;			break;		}		// Parse @ATTRIBUTE		if(nLen - nPos < 10) // 10 = strlen("@ATTRIBUTE")			return ParseError(nLine, "Expected @ATTRIBUTE");		if(strnicmp(&szFile[nPos], "@ATTRIBUTE", 10) != 0)			return ParseError(nLine, "Expected @ATTRIBUTE or @DATA");		nPos += 10;		GArffAttribute* pAttr = GArffAttribute::Parse(&szFile[nPos], nLen - nPos);		if(!pAttr)			return ParseError(nLine, "Problem with attribute");		pRelation->m_pAttributes->AddPointer(pAttr);		// Move to next line		for(nPos++; szFile[nPos] != '\n' && nPos < nLen; nPos++)		{		}	}	// Parse the data section	Holder<GArffData*> hData(new GArffData(256));	GArffData* pData = hData.Get();	while(true)	{		// Skip Whitespace		while(nPos < nLen && szFile[nPos] <= ' ')		{			if(szFile[nPos] == '\n')				nLine++;			nPos++;		}		if(nPos >= nLen)			break;		// Check for comments		if(szFile[nPos] == '%')		{			for(nPos++; szFile[nPos] != '\n' && nPos < nLen; nPos++)			{			}			continue;		}		// Parse the data line		double* pRow = pRelation->ParseDataRow(&szFile[nPos], nLen - nPos);		if(!pRow)			return ParseError(nLine, "Problem with data line");		pData->AddVector(pRow);		// Move to next line		for(nPos++; szFile[nPos] != '\n' && nPos < nLen; nPos++)		{		}		continue;	}	*ppOutData = hData.Drop();	return hRelation.Drop();}/*static*/ double* GArffRelation::ParseDataRow(const char* szFile, int nLen){	char szBuf[512];	int nAttributeCount = GetAttributeCount();	Holder<double*> hData(new double[nAttributeCount]);	double* pData = hData.Get();	GArffAttribute* pAttr;	int n;	for(n = 0; n < nAttributeCount; n++)	{		// Eat whitespace		while(nLen > 0 && *szFile <= ' ')		{			if(*szFile == '\n')				return NULL;			szFile++;			nLen--;		}		if(nLen < 1)			return NULL;		// Parse the next value		pAttr = GetAttribute(n);		int nPos;		for(nPos = 0; nPos < nLen; nPos++)		{			if(szFile[nPos] == ',')				break;			if(szFile[nPos] == '\n')				break;			if(nPos > 0 && szFile[nPos] > ' ' && szFile[nPos - 1] <= ' ')			{				nPos--;				break;			}		}		int nEnd;		for(nEnd = nPos; nEnd > 0 && szFile[nEnd - 1] <= ' '; nEnd--)		{		}		memcpy(szBuf, szFile, nEnd);		szBuf[nEnd] = '\0';		if(strcmp(szBuf, "?") == 0)			pData[n] = -1;		else if(pAttr->IsContinuous())		{			// Parse a continuous value			if(szBuf[0] == '.' || szBuf[0] == '-' || (szBuf[0] >= '0' && szBuf[0] <= '9'))				pData[n] = atof(szBuf);			else				return NULL;		}		else		{			// Parse an enumerated value			int nVal = pAttr->FindEnumeratedValue(szBuf);			if(nVal < 0)				return NULL;			pData[n] = nVal;		}		// Advance past the attribute		if(nPos < nLen)			nPos++;		while(nPos > 0)		{			szFile++;			nPos--;			nLen--;		}	}	return hData.Drop();}int GArffRelation::GetAttributeCount(){	return m_pAttributes->GetSize();}GArffAttribute* GArffRelation::GetAttribute(int n){	return (GArffAttribute*)m_pAttributes->GetPointer(n);}void GArffRelation::CountInputs(){	m_nInputCount = 0;	m_nOutputCount = 0;	int n;	int nCount = GetAttributeCount();	GArffAttribute* pAttr;	for(n = 0; n < nCount; n++)	{		pAttr = GetAttribute(n);		if(pAttr->IsInput())			m_nInputCount++;		else			m_nOutputCount++;	}	GAssert(m_nInputCount > 0, "no inputs");	//GAssert(m_nOutputCount > 0, "no outputs");	delete[] m_pInputIndexes;	delete[] m_pOutputIndexes;	m_pInputIndexes = new int[m_nInputCount];	m_pOutputIndexes = new int[m_nOutputCount];	int nIn = 0;	int nOut = 0;	for(n = 0; n < nCount; n++)	{		pAttr = GetAttribute(n);		if(pAttr->IsInput())			m_pInputIndexes[nIn++] = n;		else			m_pOutputIndexes[nOut++] = n;	}}int GArffRelation::GetInputCount(){	if(m_nInputCount < 0)		CountInputs();	return m_nInputCount;}int GArffRelation::GetOutputCount(){	if(m_nOutputCount < 0)		CountInputs();	return m_nOutputCount;}int GArffRelation::GetInputIndex(int n){	if(!m_pInputIndexes)		CountInputs();	GAssert(n >= 0 && n < m_nInputCount, "out of range");	return m_pInputIndexes[n];}int GArffRelation::GetOutputIndex(int n){	if(!m_pOutputIndexes)		CountInputs();	GAssert(n >= 0 && n < m_nOutputCount, "out of range");	return m_pOutputIndexes[n];}double GArffRelation::MeasureTotalOutputInfo(GArffData* pData){	double dInfo = 0;	int nOutputs = GetOutputCount();	int n, nIndex;	GArffAttribute* pAttr;	for(n = 0; n < nOutputs; n++)	{		nIndex = GetOutputIndex(n);		pAttr = GetAttribute(nIndex);		if(pAttr->IsContinuous())			dInfo += pData->ComputeVariance(pData->ComputeMean(nIndex), nIndex);		else			dInfo += pData->MeasureEntropy(this, nIndex);	}	return dInfo;}double GArffRelation::ComputeInputDistanceSquared(double* pRow1, double* pRow2){	double dSum = 0;	double d;	int n, nIndex;	for(n = 0; n < m_nInputCount; n++)	{		nIndex = GetInputIndex(n);		if(GetAttribute(nIndex)->IsContinuous())		{			d = pRow2[nIndex] - pRow1[nIndex];			dSum += (d * d);		}		else		{			if(pRow2[nIndex] != pRow1[nIndex])				dSum += 1;		}	}	return dSum;}double GArffRelation::ComputeScaledInputDistanceSquared(double* pRow1, double* pRow2, double* pInputScales){	double dSum = 0;	double d;	int n, nIndex;	for(n = 0; n < m_nInputCount; n++)	{		nIndex = GetInputIndex(n);		if(GetAttribute(nIndex)->IsContinuous())		{			d = pRow2[nIndex] * pInputScales[n] - pRow1[nIndex] * pInputScales[n];			dSum += (d * d);		}		else

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -