⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bayesnet.cpp

📁 gibbs
💻 CPP
字号:
#include <string>#include <iostream>#include <sstream>#include <expat.h>#include <list>#include "BayesNet.h"#include "DecisionTree.h"// HACK -- leaving this as module data prevents us from// parsing more than one BayesNet at a time.string elementCdata = "";DecisionTree* dtree = NULL;int currVar = -1;bool inTypeVariables = false;int maxVarState = -1;int getVarIndex(const char* colname){    int ret = 0;    int pow = 1;    for (int i = strlen(colname) - 1; isdigit(colname[i]); i--) {        ret += pow * (colname[i] - '0');        pow *= 10;    }    return ret - 1;}const XML_Char* getAtt(const XML_Char* attName, const XML_Char** attList){    for (int i = 0; attList[i]; i += 2) {        if (!strcmp(attList[i], attName)) {            return attList[i+1];        }    }    return NULL;}void characterDataHandler(void* userData, const XML_Char *cdata, int len){    elementCdata.append(cdata, len);}void startElementHandler(void* userData, const XML_Char *name,        const XML_Char **atts){    // DEBUG    //cout << "Found start element: " << name << endl;    BayesNet* model = (BayesNet*)userData;    if (!strcmp(name, "TypeVariables")) {        inTypeVariables = true;    } else if (!strcmp(name, "Variable")) {        // HACK: this only works if every single variable        // has a typeVariable with a name of the form:        // "TypeK n-state" OR "TypeK continuous"        if (!inTypeVariables) {            const char* typeVarName = getAtt("typeVariable", atts);            int numStates = -1;            // DEBUG / HACK -- Really ought to handle general case,            // when typeVariable might not be specified.            if (typeVarName != NULL) {                if (strstr(typeVarName, "ontinuous")) {                    model->addVar(-1);                } else {                    // We expect the number of states to appear after                    // a space, in a name of the form: "TypeK n-state"                    for (unsigned int i = 0; i < strlen(typeVarName); i++) {                        if (isspace(typeVarName[i])) {                            numStates = atoi(typeVarName + i + 1);                            break;                        }                    }                    model->addVar(numStates);                }            } else {                const char* type = getAtt("type", atts);                if (type == NULL) {                    cout << "ERROR: no type variable or type specified!\n";                    return;                }                // Add continuous vars now.  Add categorical vars once we've                // counted their states.                if (!strcmp(type, "continuous")) {                    model->addVar(-1);                }            }        }    } else if (!strcmp(name, "State")) {        if (!inTypeVariables) {            maxVarState++;        }    } else if (!strcmp(name, "LocalModel")) {        const char* varName = getAtt("variable", atts);        if (!varName) {            cout << "ERROR: no variable specified in LocalModel!\n";        }        currVar = getVarIndex(varName);        // DEBUG        //cout << "Reading model for variable " << currVar << endl;    } else if (!strcmp(name, "Input")) {        const char* varName = getAtt("variable", atts);        if (!varName) {            cout << "ERROR: no variable specified in Input!\n";        }        model->addChild(getVarIndex(varName), currVar);    } else if (!strcmp(name, "DecisionTree")) {        // dtree = new DecisionTree(currVar, model->schema.getRange(currVar));        dtree = model->getDecisionTree(currVar);    } else if (!strcmp(name, "Vertex")) {        const char* splitName = getAtt("split", atts);        if (!splitName) {            cout << "ERROR: no split variable specified at Vertex!\n";        }        dtree->beginVertex(getVarIndex(splitName));    } else if (!strcmp(name, "Branch")) {        dtree->beginBranch();    } else if (!strcmp(name, "Multinomial")) {        dtree->beginMultinomial();    }    elementCdata.clear();}void endElementHandler(void* userData, const XML_Char *name){    // DEBUG    //cout << "Found end element: " << name << endl;    BayesNet* model = (BayesNet*)userData;    if (!strcmp(name, "TypeVariables")) {        inTypeVariables = false;    } else if (!strcmp(name, "Variable")) {        if (!inTypeVariables) {            if (maxVarState >= 0) {                model->addVar(maxVarState + 1);            }            maxVarState = -1;        }    } else if (!strcmp(name, "DecisionTree")) {        /*        model->setDecisionTree(currVar, dtree);        dtree = NULL;        */    } else if (!strcmp(name, "Vertex")) {        dtree->endVertex();    } else if (!strcmp(name, "Branch")) {        dtree->endBranch();    } else if (!strcmp(name, "Values")) {        list<Range> values;        istringstream in(elementCdata);        while (in) {            Range r;            in >> r;            if (in) {                values.push_back(r);            }        }        dtree->endValues(values);    } else if (!strcmp(name, "Probs")) {        unsigned int base = 0;        int index = 0;        double *probs = new double[dtree->getRange()];        // DEBUG        //cout << "Probs: " << elementCdata << " (";        for (unsigned int i = 0; i < elementCdata.length(); i++) {            if (isspace(elementCdata[i])) {                if (i > base) {                    probs[index++]                         = atof(elementCdata.substr(base, i-base).c_str());                    // DEBUG                    //cout << probs[index-1] << " ";                }                base = i + 1;            }        }        if (elementCdata.length() > base) {            probs[index++] = atof(elementCdata.substr(base).c_str());            // DEBUG            //cout << probs[index-1] << " ";        }        // DEBUG        //cout << ")\n";        dtree->endProbs(probs);    } else if (!strcmp(name, "Mean")) {        dtree->endMean(atof(elementCdata.c_str()));    } else if (!strcmp(name, "SD")) {        dtree->endSD(atof(elementCdata.c_str()));    } else if (!strcmp(name, "ProbMissing")) {        dtree->endProbMissing(atof(elementCdata.c_str()));    } else if (!strcmp(name, "BinGaussian")) {        dtree->endBinGaussian();    }    elementCdata.clear();}// DEBUG#include <stdio.h>void loadModel(FILE* in, BayesNet& model){    XML_Parser parser = XML_ParserCreate(NULL);    if (!parser) {        cout << "ERROR: could not allocate memory for parser!\n";        return;    }    XML_SetUserData(parser, &model);    XML_SetElementHandler(parser, startElementHandler, endElementHandler);    XML_SetCharacterDataHandler(parser, characterDataHandler);    char buf[10240];    while (!feof(in)) {        int bytesRead = fread(buf, 1, 1000, in);        /* DEBUG        ((char*)buf)[bytesRead] = '\0';        printf((char*)buf);        cout << "in.eof()? " << feof(in) << endl;        */        if ( !XML_Parse(parser, buf, bytesRead, feof(in)) ) {            cout << "Parse error at line " << XML_GetCurrentLineNumber(parser)                << ":\n" << XML_ErrorString(XML_GetErrorCode(parser)) << endl;            return;        }    }    XML_ParserFree(parser);// We have no reason at present to get the splits of a continuous variable.// This was debugging code for functionality that's no longer used.#if 0    for (int i = 0; i < model.getNumVars(); i++) {        if (model.getRange(i) >= 0) {            continue;        }        // Find all splits        list<double> allsplits;        for (int v = 0; v < model.getNumVars(); v++) {            DecisionTree* currTree = model.getDecisionTree(v);            list<double> currsplits = currTree->getSplits(i);            allsplits.splice(allsplits.end(), currsplits);        }        allsplits.sort();        // Print out values        cout << i << ":";        list<double>::iterator iter;        for (iter = allsplits.begin(); iter != allsplits.end(); iter++) {            cout << " " << *iter;        }        cout << endl;    }#endif    return;}vector<double> BayesNet::MBdist(int var, VarSet& allVars) const{    const Leaf* l = decisionTrees[var]->getLeaf(allVars.getArray());    int numVals = decisionTrees[var]->getRange();    vector<double> distrib(numVals);    double totalProb = -1.0/0.0000000000001;    // Compute probability distribution across all values,     // using Markov-blanket inference.    double origVal = allVars[var];    for (int i = 0; i < numVals; i++) {        // Compute p(x | parents(x))        distrib[i] = l->getLogProb(i);        // DEBUG        if (isnan(distrib[i])) {            cout << "distrib[" << i << "] = 0.0!\n";        }        // Scale by p(children(x) | parents(children(x)))        list<int>::const_iterator c;        for (c = children[var].begin(); c != children[var].end(); c++) {            allVars[var] = i;            distrib[i] += getLogProb(*c, allVars.getArray());            // DEBUG            if (isnan(distrib[i])) {                cout << "distrib[" << i << "] = 0.0 after child " << *c << "\n";            }        }        // Keep track of total probability, so we can normalize        if (totalProb - distrib[i] < -10) {            // If distrib[i] is much larger, just use it.            totalProb = distrib[i];        } else if (totalProb - distrib[i] < 10) {            // If they're within the same range, add them            totalProb = log(exp(totalProb - distrib[i]) + 1) + distrib[i];        }    }    allVars[var] = origVal;    if (isnan(totalProb)) {        cout << "totalProb = 0!\n";    }    // Normalize    for (int i = 0; i < numVals; i++) {        distrib[i] = exp(distrib[i] - totalProb);    }    return distrib;}double BayesNet::MBsample(int var, VarSet& allVars) const{    const Leaf* l = decisionTrees[var]->getLeaf(allVars.getArray());    int numVals;    vector<double> actualVals;    if (decisionTrees[var]->getRange() < 0) {        DT::BinGaussian* g = (DT::BinGaussian*)l;        double minVal = g->mean - 4 * g->SD;        double maxVal = g->mean + 4 * g->SD;        int numIncs = 1000;        double dx = (maxVal - minVal)/numIncs;        numVals = numIncs;        for (double currX = minVal; currX < maxVal; currX += dx) {            actualVals.push_back(currX);        }        actualVals.push_back(VarSet::UNKNOWN);    } else {        numVals = decisionTrees[var]->getRange();        for (int i = 0; i < numVals; i++) {            actualVals.push_back(i);        }    }    vector<double> distrib(numVals);    //double totalProb = 0.0;    double totalProb = -1.0/0.0000000000001;    // Compute probability distribution across all values,     // using Markov-blanket inference.    double origVal = allVars[var];    for (int i = 0; i < numVals; i++) {        allVars[var] = actualVals[i];        // Compute p(x | parents(x))        distrib[i] = l->getLogProb(actualVals[i]);        // Scale by p(children(x) | parents(children(x)))        list<int>::const_iterator c;        for (c = children[var].begin(); c != children[var].end(); c++) {            distrib[i] += getLogProb(*c, allVars.getArray());        }        // Keep track of total probability, so we can normalize        if (totalProb - distrib[i] < -10) {            // If distrib[i] is much larger, just use it.            totalProb = distrib[i];        } else if (totalProb - distrib[i] < 10) {            // If they're within the same range, add them            totalProb = log(exp(totalProb - distrib[i]) + 1) + distrib[i];        }    }    allVars[var] = origVal;    // Normalize    for (int i = 0; i < numVals; i++) {        distrib[i] = exp(distrib[i] - totalProb);    }    // Sample    double p = (double)rand() / RAND_MAX;    for (int i = 0; i < numVals; i++) {        p -= distrib[i];        if (p < 0) {            return actualVals[i];        }    }    // DEBUG    cout << "Error: defaulting to last value!\n";    cout << "p = " << p << endl;    cout << "var = " << var << endl;    cout << "numVals = " << numVals <<  " ("         << decisionTrees[var]->getRange() << ")\n";    for (int i = 0; i < numVals; i++) {        cout << distrib[i] << " ";    }    cout << endl;    return actualVals.back();}// Generate a complete samplevoid BayesNet::wholeSample(VarSet& allvars) const{    /* HACK --     * If you need a variable to be sampled, it must be     * marked as UNTESTED in allvars before calling this function.     * UNKNOWN is simply not sufficient, because UNKNOWN is a legal     * value for continuous variables.  (For discrete variables,     * the unknown value is a separate value taken care of when building     * the dataset.  It must be handled specially for continuous variables.)     */    int knownVars = 0;    for (int i = 0; i < numVars; i++) {        /*        if (allvars.isObserved(i) && allvars.isTested(i)) {            knownVars++;        }        */        if (allvars.isTested(i)) {            knownVars++;        }    }    // Sample each unknown variable whose parents have been specified.    // Keep going until all variables have been sampled.    while (knownVars < numVars) {        for (int i = 0; i < numVars; i++) {            if (/*!allvars.isObserved(i) || */ !allvars.isTested(i)) {                bool unspecifiedParent = false;                list<int>::const_iterator p;                for (p = parents[i].begin(); p != parents[i].end(); p++) {                    if (/* !allvars.isObserved(*p) || */ !allvars.isTested(*p)) {                        unspecifiedParent = true;                        break;                    }                }                if (!unspecifiedParent) {                    allvars[i] = decisionTrees[i]->sample(allvars.getArray());                    knownVars++;                }            }        }    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -