📄 bayesian.cc
字号:
// ################################################################################//// name: bayesian.cc //// author: Martin Pelikan//// purpose: functions for construction and use of bayesian networks // (not including the metric related functions and some of more// specific functions defined elsewhere)//// last modified: February 1999//// #################################################################################include <string.h>#include "bayesian.h"#include "boa.h"#include "population.h"#include "graph.h"#include "memalloc.h"#include "recomputeGains.h"#include "checkCycles.h"#include "utils.h"#include "computeCounts.h"#include "binary.h"#include "random.h"// ================================================================================//// name: constructTheNetwork//// function: constructs the network for a given population of strings (data// set), and stores it into the graph structure, satisfying the// maximal number of incoming edges constraint defined in the// parameters passed to the BOA; uses a simple greedy algorithm// with only edge additions allowed adding edges which increase// the network most (positively) and do not violate constraints;// starts with an empty network//// parameters: population...the population of strings to model (a data set)// G............where the resulting network is to be stored// boaParams....the parameters sent to the BOA//// returns: (int) 0//// ================================================================================int constructTheNetwork(Population *population, AcyclicOrientedGraph *G, BoaParams *boaParams){ int k,l; int maxNumberOfEdges,numberOfNodes,maxNumberOfIncomingEdges; int numAdded; char finito; float **gain; char *full; int maxFrom, maxTo; float maxGain; // empty the graph G->removeAllEdges(); // if no incoming edges allowed, the empty network is the solution if (boaParams->maxIncoming==0) return 0; // set some variables numberOfNodes = G->size(); maxNumberOfIncomingEdges = boaParams->maxIncoming; // maximal number of edges is determined by the number of nodes and the maximal number of incoming edges // into each node maxNumberOfEdges = numberOfNodes*maxNumberOfIncomingEdges; // allocate the array for the gains with respect to the addition of each of the edges gain = (float**) Calloc(numberOfNodes,sizeof(float*)); for (k=0; k<numberOfNodes; k++) gain[k] = (float*) Calloc(numberOfNodes,sizeof(float)); // allocate memory for the array containing information about the full nodes (no more edge can go in those) full = (char*) Malloc(numberOfNodes); // a greedy algorithm for the network constrution ----------------------------------------------- finito = 0; // recompute the gains for edges into all nodes and set each node as not full yet for (k=0; k<numberOfNodes; k++) { full[k] = 0; recomputeGains(k,gain,full,G,population); } for (numAdded=0; (numAdded<maxNumberOfEdges)&&(!finito); numAdded++) { maxGain = -1; maxFrom = 0; maxTo = 0; // find the best edge addition (from maxFrom to maxTo with the increase of a metric maxGain) for (k=0; k<numberOfNodes; k++) for (l=0; l<numberOfNodes; l++) if (k!=l) { // what about an edge from k to l? if (gain[k][l]>maxGain) { maxFrom = k; maxTo = l; maxGain = gain[k][l]; }; } // add an edge with the best gain over all edge additions if its gain is positive, in the other case // the greedy algorithm stops if (maxGain>0) { // add the new edge G->addEdge(maxFrom,maxTo); // the edge can't be added anymore gain[maxFrom][maxTo] = gain[maxTo][maxFrom] = -1; // edges that would create cycles with the new edge have to be disabled checkCycles(maxFrom,maxTo,gain,G); // is the ending node of the new edge full yet? full[maxTo]=(G->getNumIn(maxTo)==boaParams->maxIncoming); // recompute the gains that are needed recomputeGains(maxTo,gain,full,G,population); } else finito = 1; }; // ---------------------------------------------------------------------------------------------- // free the memory used by the gain array for (k=0; k<numberOfNodes; k++) Free(gain[k]); Free(gain); // free memory used by an auxilary array "full" Free(full); // get back return 0;};// ================================================================================//// name: generateNewInstances//// function: generates new instances of the variables (strings) according to // the joint distribution encoded by the constructed network and the// conditional probabilities in a modeled data set//// parameters: parents......the modeled population// offspring....where the new instances should be stored// G............the network used as a model// boaParams....the parameters sent to the BOA//// returns: (int) 0//// ================================================================================int generateNewInstances(Population *parents, Population *offspring, AcyclicOrientedGraph *G, BoaParams *boaParams){ long i; long N,M; int k,l; int numberOfNodes; int **group, *groupSize; long *numGroupInstances,*numGroupInstances0,maxNumGroupInstances; float **marginalAll, **marginalAllButOne; long *count; char *added; int numAdded; char canAdd; char *x; int where; int position; float prob1; // set some variables numberOfNodes = boaParams->n; N = parents->N; M = offspring->N; // allocate the memory for the matrix of groups, their sizes and marginal frequencies group = (int**) Calloc(numberOfNodes,sizeof(int*)); groupSize = (int*) Calloc(numberOfNodes,sizeof(int)); numGroupInstances = (long*) Calloc(numberOfNodes,sizeof(long)); numGroupInstances0 = (long*) Calloc(numberOfNodes,sizeof(long)); marginalAll = (float**) Calloc(numberOfNodes,sizeof(float*)); marginalAllButOne = (float**) Calloc(numberOfNodes,sizeof(float*)); for (k=0; k<numberOfNodes; k++) group[k]=(int*) Calloc(numberOfNodes,sizeof(int)); // allocate the memory for some auxilary array used for ordering vertices topologically added = (char*) Malloc(numberOfNodes); // create the array of incoming edges numbers for all vartices (with the node // on the first position) and the number of them. // + allocate the memory for marginal frequencies maxNumGroupInstances = 0; for (k=0; k<numberOfNodes; k++) { // create k-th group (corresponding to the k-th variable) group[k][0]=k; if (G->getNumIn(k)>0) memcpy(&(group[k][1]),G->getParentList(k),G->getNumIn(k)*sizeof(int)); groupSize[k]=G->getNumIn(k)+1; // compute the number of its instances numGroupInstances[k] = 1<<groupSize[k]; numGroupInstances0[k] = numGroupInstances[k]>>1; // allocate the memory for marginals and counts marginalAll[k] = (float*) Calloc(numGroupInstances[k],sizeof(float)); marginalAllButOne[k] = (float*) Calloc(numGroupInstances0[k],sizeof(float)); // update maxNumGroupInstances if (numGroupInstances[k]>maxNumGroupInstances) maxNumGroupInstances=numGroupInstances[k]; } count = (long*) Calloc(maxNumGroupInstances,sizeof(long)); // topologically order nodes (and the corresponding groups of the nodes and their parents) for (k=0; k<numberOfNodes; k++) added[k]=0; numAdded=0; while (numAdded<numberOfNodes) { for (k=numAdded; k<numberOfNodes; k++) { canAdd=1; for (l=1; l<groupSize[k]; l++) if (!added[group[k][l]]) canAdd=0; if (canAdd) { added[group[k][0]]=1; if (k!=numAdded) { swapPointers((void**) &(group[k]),(void**) &(group[numAdded])); swapInt(&(groupSize[k]), &(groupSize[numAdded])); swapLong(&(numGroupInstances[k]),&(numGroupInstances[numAdded])); swapLong(&(numGroupInstances0[k]),&(numGroupInstances0[numAdded])); swapPointers((void**) &(marginalAll[k]), (void**) &(marginalAll[numAdded])); swapPointers((void**) &(marginalAllButOne[k]), (void**) &(marginalAllButOne[numAdded])); } numAdded++; } } } // calculate the marginal frequencies for created groups of positions for (k=0; k<numberOfNodes; k++) { computeCounts(group[k],groupSize[k],parents,count); for (l=0; l<numGroupInstances[k]; l++) marginalAll[k][l] = (float) count[l]/N; for (l=0; l<numGroupInstances0[k]; l++) marginalAllButOne[k][l] = marginalAll[k][l]+marginalAll[k][l+numGroupInstances0[k]]; }; // generate the ith coordinate (chromosome) for all dhildren for (i=0; i<M; i++) { x = offspring->x[i]; for (k=0; k<numberOfNodes; k++) { position = group[k][0]; if (groupSize[k]==1) prob1 = marginalAll[k][1]; else { x[position] = 0; where = indexedBinaryToInt(x,group[k],groupSize[k]); prob1 = 1-marginalAll[k][where]/marginalAllButOne[k][where]; } if (drand()<prob1) x[position]=1; else x[position]=0; } } // free the memory used by marginal frequencies (for groups of bits) and the "count" array for (k=0; k<numberOfNodes; k++) { Free(marginalAll[k]); Free(marginalAllButOne[k]); }; Free(count); // free the memory used by the matrix of groups, their sizes and marginal frequencies for (k=0; k<numberOfNodes; k++) Free(group[k]); Free(group); Free(groupSize); Free(numGroupInstances); Free(numGroupInstances0); Free(marginalAll); Free(marginalAllButOne); // get back return 0;};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -