gagents.cpp

来自「一个由Mike Gashler完成的机器学习方面的includes neural」· C++ 代码 · 共 1,066 行 · 第 1/2 页

CPP
1,066
字号
}// virtualAgentAl::~AgentAl(){	// Delete the sense constructs	int i;	for(i = 0; i < m_nSenseConstructs; i++)		delete(m_pConstructNodes[i]);	delete[] m_pConstructNodes;	// Delete the dependency order	delete[] m_pSenseConstructDependencyOrder;	// Delete short term memory	delete[] m_pShortTermMemory;}#ifdef LOGSPEWvoid AgentAl::LogSpew_PrintObservations(){	printf("Observed sense vector:\t");	double* pVector = GetMemoryVector(0);	int i;	for(i = 0; i < m_nSenseCount; i++)		printf("%f\t", pVector[i]);	printf("\n");}void AgentAl::LogSpew_PrintSenseConstructs(){	printf("Sense Constructs:\n");	int i, j;	for(j = 0; j < SENSE_POPULATION_SIZE; j++)	{		for(i = 0; i < m_nSenseCount; i++)		{			ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * i + j];			printf("%f\t", pConstruct->GetOutputValue());		}		printf("\n");	}	printf("\n");}void AgentAl::LogSpew_PrintSingleActionConstruct(int nSense, IncrementalModel* pModel, ConstructNode* pConstruct, double* pSenseVector){	// Allocate space for the vector	int nSize = pModel->GetRelation()->GetAttributeCount();	GTEMPBUF(double, pVector, nSize);	// Set the input values from the sense vector	GAssert(pModel->GetRelation()->GetInputCount() == m_nSenseCount, "Number of inputs don't match");	memcpy(pVector, pSenseVector, sizeof(double) * m_nSenseCount);	// Evaluate	pModel->Eval(pVector);	// Print the results	int nOutputs = pModel->GetRelation()->GetOutputCount();	if(nOutputs > 1)	{		printf("[");		int i;		for(i = 0; i < nOutputs - 1; i++)			printf("%f,", pVector[m_nSenseCount + i]);		printf("]");	}	printf("%f\t", pVector[m_nSenseCount + nOutputs - 1]);}void AgentAl::LogSpew_PrintActionConstructs(){	printf("\n");	int j;	for(j = 0; j < m_nActionCount; j++)	{		printf("\tAction %d\t{", j);		int i;		for(i = 0; i < m_nSenseConstructs; i++)		{			ConstructNode* pConstruct = m_pConstructNodes[i];			IncrementalModel* pModel = pConstruct->GetActionModel(j);			double* pSenseVector = GetMemoryVector(0);			LogSpew_PrintSingleActionConstruct(i, pModel, pConstruct, pSenseVector);		}		printf("}\n");	}}void AgentAl::LogSpew_PrintConstructs(){	printf("---------------------------------\n");	LogSpew_PrintObservations();	LogSpew_PrintSenseConstructs();	LogSpew_PrintActionConstructs();}#endif // LOGSPEWdouble* AgentAl::GetMemoryVector(int i){	GAssert(i >= 0 && i < m_nShortTermMemoryCapacity, "out of range");	int nSlot = (m_nShortTermMemoryPos + m_nShortTermMemoryCapacity - i) % m_nShortTermMemoryCapacity;	return &m_pShortTermMemory[(m_nSenseCount + 1) * nSlot];}void AgentAl::Observe(){	double* pSenseVector = GetMemoryVector(0);	int i;	for(i = 0; i < m_nSenseCount; i++)		pSenseVector[i] = m_pWorld->GetSenseValue(i);}void AgentAl::EvaluatePredictions(){	if(!m_bTrialIteration)		return;	// Critique each construct	double dError;	int i;	double* pSenseVector = GetMemoryVector(0);	for(i = 0; i < m_nSenseConstructs; i++)	{		ConstructNode* pConstruct = m_pConstructNodes[i];		dError = pSenseVector[i] - pConstruct->GetOutputValue(); // todo: this makes the prediction using predicted inputs. It should probably use actual inputs instead.		pConstruct->Criticize(dError * dError);	}	// Evolve the constructs	if((rand() % EVALUATIONS_PER_EVOLUTION) == 0)		EvolveConstructs();}void AgentAl::EvolveConstructs(){	// Kill off the weak parts of the populations	GIntQueue q;	int i, j, k;	for(j = 0; j < m_nSenseCount; j++)	{		// Count the number of nodes that are candidates for the axe, and find the		// one with the worst error score		int nCandidates = 0;		double dWorst = 0;		int nWorstIndex = 0;		int nNulls = 0;		for(i = 0; i < SENSE_POPULATION_SIZE; i++)		{			ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * j + i];			if(!pConstruct)			{				nNulls++;				break;			}			if(pConstruct->GetCritiqueCount() >= GRACE_CRITIQUES)			{				nCandidates++;				if(pConstruct->GetError() > dWorst)				{					dWorst = pConstruct->GetError();					nWorstIndex = i;				}			}		}		if(nNulls > 0)			continue; // a node for this sense was already axed due to dependence on some other node, so let's be content with that so we don't axe too much of the population		// Axe the worst candidate		if(nCandidates > 1)		{			// Axe the condemned candidate and all nodes that depend (transitively			// or directly) on the condemned candidate			q.Push(SENSE_POPULATION_SIZE * j + nWorstIndex);			while(q.GetSize() > 0)			{				int index = q.Pop();				int sense = index / SENSE_POPULATION_SIZE;				ConstructNode* pCondemned = m_pConstructNodes[index];				m_pConstructNodes[index] = NULL;				delete(pCondemned);				for(k = 0; k < m_nSenseCount; k++)				{					if(k == sense)						continue;					for(i = 0; i < SENSE_POPULATION_SIZE; i++)					{						ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * k + i];						if(!pConstruct)							continue;						if(pConstruct->IsDependency(pCondemned))							q.Push(SENSE_POPULATION_SIZE * k + i);					}				}			}		}	}	// Fill openings in the population	for(j = 0; j < m_nSenseCount; j++)	{		// Select a model		ConstructNode* pModel = m_pConstructNodes[SENSE_POPULATION_SIZE * j + (rand() % SENSE_POPULATION_SIZE)];		if(!pModel)		{			// Find the best candidate			double dBest = 1e200;			int nBestIndex = -1;			for(i = 0; i < SENSE_POPULATION_SIZE; i++)			{				ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * j + i];				if(!pConstruct)					continue;				if(pConstruct->GetCritiqueCount() < GRACE_CRITIQUES)					continue;				if(pConstruct->GetError() >= dBest)					continue;				dBest = pConstruct->GetError();				nBestIndex = i;			}			if(nBestIndex >= 0)				pModel = m_pConstructNodes[SENSE_POPULATION_SIZE * j + nBestIndex];			else			{				double dBest = 1e200;				int nBestIndex = -1;				for(i = 0; i < SENSE_POPULATION_SIZE; i++)				{					ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * j + i];					if(!pConstruct)						continue;					if(pConstruct->GetError() >= dBest)						continue;					dBest = pConstruct->GetError();					nBestIndex = i;				}				if(nBestIndex >= 0)					pModel = m_pConstructNodes[SENSE_POPULATION_SIZE * j + nBestIndex];			}		}		// Fill openings		for(i = 0; i < SENSE_POPULATION_SIZE; i++)		{			ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * j + i];			if(pConstruct)				continue;			// Try to find an acceptable mutation			for(k = (pModel ? 8 : -1); k >= 0; k--)			{				if((rand() % 5) < 2)				{					// prune an input					int nInputs = pModel->GetInputCount();					if(nInputs < 1)						continue;					ConstructNode** pNewInputs;					if(nInputs > 1)					{						int nPrunedInput = rand() % nInputs;						pNewInputs = new ConstructNode*[nInputs - 1];						ConstructNode** pOldInputs = pModel->GetInputs();						memcpy(pNewInputs, pOldInputs, sizeof(ConstructNode*) * nPrunedInput);						memcpy(&pNewInputs[nPrunedInput], &pOldInputs[nPrunedInput + 1], sizeof(ConstructNode*) * (nInputs - nPrunedInput - 1));					}					else						pNewInputs = NULL;					m_pConstructNodes[SENSE_POPULATION_SIZE * j + i] = new ConstructNode(j, nInputs - 1, pNewInputs, m_nSenseCount, m_nActionCount, false);					break;				}				else				{					// add a new input					int nNewInput = rand() % m_nSenseConstructs;					ConstructNode* pNewInput = m_pConstructNodes[nNewInput];					if(!pNewInput)						continue;					if(pNewInput->DoesDependOnSense(j))						continue;					int nInputs = pModel->GetInputCount();					ConstructNode** pOldInputs = pModel->GetInputs();					ConstructNode** pNewInputs = new ConstructNode*[nInputs + 1];					memcpy(pNewInputs, pOldInputs, sizeof(ConstructNode*) * nInputs);					pNewInputs[nInputs] = pNewInput;					m_pConstructNodes[SENSE_POPULATION_SIZE * j + i] = new ConstructNode(j, nInputs + 1, pNewInputs, m_nSenseCount, m_nActionCount, false);					break;				}			}			// Default mutation--no inputs			if(k < 0)			{				m_pConstructNodes[SENSE_POPULATION_SIZE * j + i] = new ConstructNode(j, 0, NULL, m_nSenseCount, m_nActionCount, false);			}		}	}	// Recompute dependency ordering	RecomputeDependencyOrder();}void AgentAl::RecomputeDependencyOrder(){	int i, j;	for(i = 0; i < m_nSenseConstructs; i++)		m_pConstructNodes[i]->ResetFlag();	for(i = 0; i < m_nSenseConstructs; )	{		for(j = 0; j < m_nSenseConstructs; j++)		{			if(m_pConstructNodes[j]->IsReady())				m_pSenseConstructDependencyOrder[i++] = j;		}	}}void AgentAl::LearnToModelTheWorld(){	// Evaluate predictions (This is the Meta-learning step for sense constructs)	EvaluatePredictions();	// Train with observed (sensed) values	int i, j;	double* pSenseVector = GetMemoryVector(0);	for(i = 0; i < m_nSenseConstructs; i++)	{		j = m_pSenseConstructDependencyOrder[i];		m_pConstructNodes[j]->TrainModel(pSenseVector[j / SENSE_POPULATION_SIZE]);	}}void AgentAl::TrainActionModel(int nSense, double* pActionVector, double* pCurrentVector){	// Allocate space for the vector	int nAction = (int)pActionVector[m_nSenseCount];	IncrementalModel* pActionModel = m_pConstructNodes[nSense]->GetActionModel(nAction);	int nSize = pActionModel->GetRelation()->GetAttributeCount();	GTEMPBUF(double, pVector, nSize);	// Set the input values from the sense vector at the time the action was performed	GAssert(pActionModel->GetRelation()->GetInputCount() == m_nSenseCount, "Number of inputs don't match");	memcpy(pVector, pActionVector, sizeof(double) * m_nSenseCount);	// Set the output values	int nOutputs = pActionModel->GetRelation()->GetOutputCount();	int nValues = m_pConstructNodes[nSense]->SetLeafValues(pVector + m_nSenseCount, 0);	if(nValues != nOutputs - 1)		GAssert(false, "mismatching number of leaf values");	pVector[m_nSenseCount + nOutputs - 1] = (pCurrentVector[nSense] - pActionVector[nSense]) / 2 + .5;	// Train the model	pActionModel->TrainIncremental(pVector);}void AgentAl::LearnToPredictHowActionsAffectTheWorld(){	double* pCurrentVector = GetMemoryVector(0);	double* pActionVector;	int i, nSense, j, nAction;	bool bGotOne;	for(i = 0; i < m_nSenseConstructs; i++)	{		nSense = m_pSenseConstructDependencyOrder[i];		// Search for the most recent time when the inputs to this sense matched		bGotOne = false;		for(j = 1; j < m_nShortTermMemoryCapacity; j++)		{			pActionVector = GetMemoryVector(j);			if(pActionVector[nSense] == MEM_BLANK)				break;			if(m_pConstructNodes[nSense]->AreInputsConstant(pActionVector, pCurrentVector, ACTION_CONSTRUCT_INPUT_TOLERANCE))			{				while(j >= 1)				{					pActionVector = GetMemoryVector(j);					nAction = (int)pActionVector[m_nSenseCount];					TrainActionModel(nSense, pActionVector, pCurrentVector);					j--;				}				break;			}		}	}}/*// This class makes a heuristic (aka target function) based on how// interesting a state is to the agent. Interestingness is defined// as a combination of how well explored the state is, and how much// output variance there is in that region of state space.class GInterestingnessCritic : public GRealVectorCritic{protected:public:	// nVectorSize is the number of dimensions in the search space	GInterestingnessCritic(int nVectorSize) : GRealVectorCritic(nVectorSize)	{	}	virtual ~GInterestingnessCritic()	{	}protected:	// Computes the error of the given vector with respect to the search space	virtual double ComputeError(double* pVector)	{		// todo: write me	}};void AgentAl::PickExplorationGoal(){	GInterestingnessCritic critic(m_pConstructNodes->GetSize());	GMomentumGreedySearch search(&critic);	//search.SetState();	search.SetAllStepSizes(.2);	int i;	for(i = 0; i < 500; i++) // todo: don't use magic numbers		search.Iterate();	}*/void AgentAl::SelectNextAction(){	int nAction = 0;	if(false)	{		// Exploit--todo: search for the best action	}	else	{		// Explore--Just pick a random action		nAction = rand() % m_nActionCount;	}	// Store the action in short term memory	double* pCurrentVector = GetMemoryVector(0);	pCurrentVector[m_nSenseCount] = nAction;	// Predict what will happen when the action is performed, so we can evaluate	// how well we've learned	PredictConsequences();}void AgentAl::SimulateAction(int nAction, IncrementalModel* pModel, ConstructNode* pSenseConstruct){	// Allocate space for the vector	int nInputs = pModel->GetRelation()->GetInputCount();	int nOutputs = pModel->GetRelation()->GetOutputCount();	GTEMPBUF(double, pVector, nInputs + nOutputs);	// Set the input values from the sense vector	double* pSenseVector = GetMemoryVector(0);	GAssert(nInputs == m_nSenseCount, "Number of inputs don't match");	memcpy(pVector, pSenseVector, sizeof(double) * nInputs);	// Evaluate the action model	pModel->Eval(pVector);	// Train the sense construct	pSenseConstruct->TrainFromActionModelOutput(nOutputs, pVector + nInputs);}void AgentAl::PredictConsequences(){	// Decide if we need to put the agent on trial	if(rand() % AVE_ITERATIONS_PER_TRIAL != 0)	{		m_bTrialIteration = false;		return;	}	m_bTrialIteration = true;	// Read the next action from short term memory	double* pCurrentVector = GetMemoryVector(0);	int nAction = (int)pCurrentVector[m_nSenseCount];	// Apply action models to all the sense constructs	int i, j;	for(i = m_nSenseConstructs - 1; i >= 0; i--)	{		j = m_pSenseConstructDependencyOrder[i];		ConstructNode* pConstruct = m_pConstructNodes[j];		IncrementalModel* pActionModel = pConstruct->GetActionModel(nAction);		SimulateAction(nAction, pActionModel, pConstruct);	}}void AgentAl::Act(){	// Read the next action from short term memory	double* pCurrentVector = GetMemoryVector(0);	int nAction = (int)pCurrentVector[m_nSenseCount];	// Do it	m_pWorld->DoAction(nAction, 0, NULL); // todo: support action params	// Advance the short term memory pointer	m_nShortTermMemoryPos = (m_nShortTermMemoryPos + 1) % m_nShortTermMemoryCapacity;}void AgentAl::Think(){	// Learn how to model the world	LearnToModelTheWorld();#ifdef LOGSPEW	// Spew info for debugging purposes	//LogSpew_PrintConstructs();#endif // LOGSPEW	// Learn how to predict the consequences of each action	LearnToPredictHowActionsAffectTheWorld();	// Decide what to do next	SelectNextAction();}void AgentAl::Iterate(){	Observe();	Think();	Act();}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?