gagents.cpp
来自「一个由Mike Gashler完成的机器学习方面的includes neural」· C++ 代码 · 共 1,066 行 · 第 1/2 页
CPP
1,066 行
}// virtualAgentAl::~AgentAl(){ // Delete the sense constructs int i; for(i = 0; i < m_nSenseConstructs; i++) delete(m_pConstructNodes[i]); delete[] m_pConstructNodes; // Delete the dependency order delete[] m_pSenseConstructDependencyOrder; // Delete short term memory delete[] m_pShortTermMemory;}#ifdef LOGSPEWvoid AgentAl::LogSpew_PrintObservations(){ printf("Observed sense vector:\t"); double* pVector = GetMemoryVector(0); int i; for(i = 0; i < m_nSenseCount; i++) printf("%f\t", pVector[i]); printf("\n");}void AgentAl::LogSpew_PrintSenseConstructs(){ printf("Sense Constructs:\n"); int i, j; for(j = 0; j < SENSE_POPULATION_SIZE; j++) { for(i = 0; i < m_nSenseCount; i++) { ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * i + j]; printf("%f\t", pConstruct->GetOutputValue()); } printf("\n"); } printf("\n");}void AgentAl::LogSpew_PrintSingleActionConstruct(int nSense, IncrementalModel* pModel, ConstructNode* pConstruct, double* pSenseVector){ // Allocate space for the vector int nSize = pModel->GetRelation()->GetAttributeCount(); GTEMPBUF(double, pVector, nSize); // Set the input values from the sense vector GAssert(pModel->GetRelation()->GetInputCount() == m_nSenseCount, "Number of inputs don't match"); memcpy(pVector, pSenseVector, sizeof(double) * m_nSenseCount); // Evaluate pModel->Eval(pVector); // Print the results int nOutputs = pModel->GetRelation()->GetOutputCount(); if(nOutputs > 1) { printf("["); int i; for(i = 0; i < nOutputs - 1; i++) printf("%f,", pVector[m_nSenseCount + i]); printf("]"); } printf("%f\t", pVector[m_nSenseCount + nOutputs - 1]);}void AgentAl::LogSpew_PrintActionConstructs(){ printf("\n"); int j; for(j = 0; j < m_nActionCount; j++) { printf("\tAction %d\t{", j); int i; for(i = 0; i < m_nSenseConstructs; i++) { ConstructNode* pConstruct = m_pConstructNodes[i]; IncrementalModel* pModel = pConstruct->GetActionModel(j); double* pSenseVector = GetMemoryVector(0); LogSpew_PrintSingleActionConstruct(i, pModel, pConstruct, pSenseVector); } printf("}\n"); }}void AgentAl::LogSpew_PrintConstructs(){ printf("---------------------------------\n"); LogSpew_PrintObservations(); LogSpew_PrintSenseConstructs(); LogSpew_PrintActionConstructs();}#endif // LOGSPEWdouble* AgentAl::GetMemoryVector(int i){ GAssert(i >= 0 && i < m_nShortTermMemoryCapacity, "out of range"); int nSlot = (m_nShortTermMemoryPos + m_nShortTermMemoryCapacity - i) % m_nShortTermMemoryCapacity; return &m_pShortTermMemory[(m_nSenseCount + 1) * nSlot];}void AgentAl::Observe(){ double* pSenseVector = GetMemoryVector(0); int i; for(i = 0; i < m_nSenseCount; i++) pSenseVector[i] = m_pWorld->GetSenseValue(i);}void AgentAl::EvaluatePredictions(){ if(!m_bTrialIteration) return; // Critique each construct double dError; int i; double* pSenseVector = GetMemoryVector(0); for(i = 0; i < m_nSenseConstructs; i++) { ConstructNode* pConstruct = m_pConstructNodes[i]; dError = pSenseVector[i] - pConstruct->GetOutputValue(); // todo: this makes the prediction using predicted inputs. It should probably use actual inputs instead. pConstruct->Criticize(dError * dError); } // Evolve the constructs if((rand() % EVALUATIONS_PER_EVOLUTION) == 0) EvolveConstructs();}void AgentAl::EvolveConstructs(){ // Kill off the weak parts of the populations GIntQueue q; int i, j, k; for(j = 0; j < m_nSenseCount; j++) { // Count the number of nodes that are candidates for the axe, and find the // one with the worst error score int nCandidates = 0; double dWorst = 0; int nWorstIndex = 0; int nNulls = 0; for(i = 0; i < SENSE_POPULATION_SIZE; i++) { ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * j + i]; if(!pConstruct) { nNulls++; break; } if(pConstruct->GetCritiqueCount() >= GRACE_CRITIQUES) { nCandidates++; if(pConstruct->GetError() > dWorst) { dWorst = pConstruct->GetError(); nWorstIndex = i; } } } if(nNulls > 0) continue; // a node for this sense was already axed due to dependence on some other node, so let's be content with that so we don't axe too much of the population // Axe the worst candidate if(nCandidates > 1) { // Axe the condemned candidate and all nodes that depend (transitively // or directly) on the condemned candidate q.Push(SENSE_POPULATION_SIZE * j + nWorstIndex); while(q.GetSize() > 0) { int index = q.Pop(); int sense = index / SENSE_POPULATION_SIZE; ConstructNode* pCondemned = m_pConstructNodes[index]; m_pConstructNodes[index] = NULL; delete(pCondemned); for(k = 0; k < m_nSenseCount; k++) { if(k == sense) continue; for(i = 0; i < SENSE_POPULATION_SIZE; i++) { ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * k + i]; if(!pConstruct) continue; if(pConstruct->IsDependency(pCondemned)) q.Push(SENSE_POPULATION_SIZE * k + i); } } } } } // Fill openings in the population for(j = 0; j < m_nSenseCount; j++) { // Select a model ConstructNode* pModel = m_pConstructNodes[SENSE_POPULATION_SIZE * j + (rand() % SENSE_POPULATION_SIZE)]; if(!pModel) { // Find the best candidate double dBest = 1e200; int nBestIndex = -1; for(i = 0; i < SENSE_POPULATION_SIZE; i++) { ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * j + i]; if(!pConstruct) continue; if(pConstruct->GetCritiqueCount() < GRACE_CRITIQUES) continue; if(pConstruct->GetError() >= dBest) continue; dBest = pConstruct->GetError(); nBestIndex = i; } if(nBestIndex >= 0) pModel = m_pConstructNodes[SENSE_POPULATION_SIZE * j + nBestIndex]; else { double dBest = 1e200; int nBestIndex = -1; for(i = 0; i < SENSE_POPULATION_SIZE; i++) { ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * j + i]; if(!pConstruct) continue; if(pConstruct->GetError() >= dBest) continue; dBest = pConstruct->GetError(); nBestIndex = i; } if(nBestIndex >= 0) pModel = m_pConstructNodes[SENSE_POPULATION_SIZE * j + nBestIndex]; } } // Fill openings for(i = 0; i < SENSE_POPULATION_SIZE; i++) { ConstructNode* pConstruct = m_pConstructNodes[SENSE_POPULATION_SIZE * j + i]; if(pConstruct) continue; // Try to find an acceptable mutation for(k = (pModel ? 8 : -1); k >= 0; k--) { if((rand() % 5) < 2) { // prune an input int nInputs = pModel->GetInputCount(); if(nInputs < 1) continue; ConstructNode** pNewInputs; if(nInputs > 1) { int nPrunedInput = rand() % nInputs; pNewInputs = new ConstructNode*[nInputs - 1]; ConstructNode** pOldInputs = pModel->GetInputs(); memcpy(pNewInputs, pOldInputs, sizeof(ConstructNode*) * nPrunedInput); memcpy(&pNewInputs[nPrunedInput], &pOldInputs[nPrunedInput + 1], sizeof(ConstructNode*) * (nInputs - nPrunedInput - 1)); } else pNewInputs = NULL; m_pConstructNodes[SENSE_POPULATION_SIZE * j + i] = new ConstructNode(j, nInputs - 1, pNewInputs, m_nSenseCount, m_nActionCount, false); break; } else { // add a new input int nNewInput = rand() % m_nSenseConstructs; ConstructNode* pNewInput = m_pConstructNodes[nNewInput]; if(!pNewInput) continue; if(pNewInput->DoesDependOnSense(j)) continue; int nInputs = pModel->GetInputCount(); ConstructNode** pOldInputs = pModel->GetInputs(); ConstructNode** pNewInputs = new ConstructNode*[nInputs + 1]; memcpy(pNewInputs, pOldInputs, sizeof(ConstructNode*) * nInputs); pNewInputs[nInputs] = pNewInput; m_pConstructNodes[SENSE_POPULATION_SIZE * j + i] = new ConstructNode(j, nInputs + 1, pNewInputs, m_nSenseCount, m_nActionCount, false); break; } } // Default mutation--no inputs if(k < 0) { m_pConstructNodes[SENSE_POPULATION_SIZE * j + i] = new ConstructNode(j, 0, NULL, m_nSenseCount, m_nActionCount, false); } } } // Recompute dependency ordering RecomputeDependencyOrder();}void AgentAl::RecomputeDependencyOrder(){ int i, j; for(i = 0; i < m_nSenseConstructs; i++) m_pConstructNodes[i]->ResetFlag(); for(i = 0; i < m_nSenseConstructs; ) { for(j = 0; j < m_nSenseConstructs; j++) { if(m_pConstructNodes[j]->IsReady()) m_pSenseConstructDependencyOrder[i++] = j; } }}void AgentAl::LearnToModelTheWorld(){ // Evaluate predictions (This is the Meta-learning step for sense constructs) EvaluatePredictions(); // Train with observed (sensed) values int i, j; double* pSenseVector = GetMemoryVector(0); for(i = 0; i < m_nSenseConstructs; i++) { j = m_pSenseConstructDependencyOrder[i]; m_pConstructNodes[j]->TrainModel(pSenseVector[j / SENSE_POPULATION_SIZE]); }}void AgentAl::TrainActionModel(int nSense, double* pActionVector, double* pCurrentVector){ // Allocate space for the vector int nAction = (int)pActionVector[m_nSenseCount]; IncrementalModel* pActionModel = m_pConstructNodes[nSense]->GetActionModel(nAction); int nSize = pActionModel->GetRelation()->GetAttributeCount(); GTEMPBUF(double, pVector, nSize); // Set the input values from the sense vector at the time the action was performed GAssert(pActionModel->GetRelation()->GetInputCount() == m_nSenseCount, "Number of inputs don't match"); memcpy(pVector, pActionVector, sizeof(double) * m_nSenseCount); // Set the output values int nOutputs = pActionModel->GetRelation()->GetOutputCount(); int nValues = m_pConstructNodes[nSense]->SetLeafValues(pVector + m_nSenseCount, 0); if(nValues != nOutputs - 1) GAssert(false, "mismatching number of leaf values"); pVector[m_nSenseCount + nOutputs - 1] = (pCurrentVector[nSense] - pActionVector[nSense]) / 2 + .5; // Train the model pActionModel->TrainIncremental(pVector);}void AgentAl::LearnToPredictHowActionsAffectTheWorld(){ double* pCurrentVector = GetMemoryVector(0); double* pActionVector; int i, nSense, j, nAction; bool bGotOne; for(i = 0; i < m_nSenseConstructs; i++) { nSense = m_pSenseConstructDependencyOrder[i]; // Search for the most recent time when the inputs to this sense matched bGotOne = false; for(j = 1; j < m_nShortTermMemoryCapacity; j++) { pActionVector = GetMemoryVector(j); if(pActionVector[nSense] == MEM_BLANK) break; if(m_pConstructNodes[nSense]->AreInputsConstant(pActionVector, pCurrentVector, ACTION_CONSTRUCT_INPUT_TOLERANCE)) { while(j >= 1) { pActionVector = GetMemoryVector(j); nAction = (int)pActionVector[m_nSenseCount]; TrainActionModel(nSense, pActionVector, pCurrentVector); j--; } break; } } }}/*// This class makes a heuristic (aka target function) based on how// interesting a state is to the agent. Interestingness is defined// as a combination of how well explored the state is, and how much// output variance there is in that region of state space.class GInterestingnessCritic : public GRealVectorCritic{protected:public: // nVectorSize is the number of dimensions in the search space GInterestingnessCritic(int nVectorSize) : GRealVectorCritic(nVectorSize) { } virtual ~GInterestingnessCritic() { }protected: // Computes the error of the given vector with respect to the search space virtual double ComputeError(double* pVector) { // todo: write me }};void AgentAl::PickExplorationGoal(){ GInterestingnessCritic critic(m_pConstructNodes->GetSize()); GMomentumGreedySearch search(&critic); //search.SetState(); search.SetAllStepSizes(.2); int i; for(i = 0; i < 500; i++) // todo: don't use magic numbers search.Iterate(); }*/void AgentAl::SelectNextAction(){ int nAction = 0; if(false) { // Exploit--todo: search for the best action } else { // Explore--Just pick a random action nAction = rand() % m_nActionCount; } // Store the action in short term memory double* pCurrentVector = GetMemoryVector(0); pCurrentVector[m_nSenseCount] = nAction; // Predict what will happen when the action is performed, so we can evaluate // how well we've learned PredictConsequences();}void AgentAl::SimulateAction(int nAction, IncrementalModel* pModel, ConstructNode* pSenseConstruct){ // Allocate space for the vector int nInputs = pModel->GetRelation()->GetInputCount(); int nOutputs = pModel->GetRelation()->GetOutputCount(); GTEMPBUF(double, pVector, nInputs + nOutputs); // Set the input values from the sense vector double* pSenseVector = GetMemoryVector(0); GAssert(nInputs == m_nSenseCount, "Number of inputs don't match"); memcpy(pVector, pSenseVector, sizeof(double) * nInputs); // Evaluate the action model pModel->Eval(pVector); // Train the sense construct pSenseConstruct->TrainFromActionModelOutput(nOutputs, pVector + nInputs);}void AgentAl::PredictConsequences(){ // Decide if we need to put the agent on trial if(rand() % AVE_ITERATIONS_PER_TRIAL != 0) { m_bTrialIteration = false; return; } m_bTrialIteration = true; // Read the next action from short term memory double* pCurrentVector = GetMemoryVector(0); int nAction = (int)pCurrentVector[m_nSenseCount]; // Apply action models to all the sense constructs int i, j; for(i = m_nSenseConstructs - 1; i >= 0; i--) { j = m_pSenseConstructDependencyOrder[i]; ConstructNode* pConstruct = m_pConstructNodes[j]; IncrementalModel* pActionModel = pConstruct->GetActionModel(nAction); SimulateAction(nAction, pActionModel, pConstruct); }}void AgentAl::Act(){ // Read the next action from short term memory double* pCurrentVector = GetMemoryVector(0); int nAction = (int)pCurrentVector[m_nSenseCount]; // Do it m_pWorld->DoAction(nAction, 0, NULL); // todo: support action params // Advance the short term memory pointer m_nShortTermMemoryPos = (m_nShortTermMemoryPos + 1) % m_nShortTermMemoryCapacity;}void AgentAl::Think(){ // Learn how to model the world LearnToModelTheWorld();#ifdef LOGSPEW // Spew info for debugging purposes //LogSpew_PrintConstructs();#endif // LOGSPEW // Learn how to predict the consequences of each action LearnToPredictHowActionsAffectTheWorld(); // Decide what to do next SelectNextAction();}void AgentAl::Iterate(){ Observe(); Think(); Act();}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?