⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dist_methods.cpp

📁 ncbi源码
💻 CPP
字号:
/* * =========================================================================== * PRODUCTION $Log: dist_methods.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 18:09:24  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.8 * PRODUCTION * =========================================================================== *//*  $Id: dist_methods.cpp,v 1000.1 2004/06/01 18:09:24 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors:  Josh Cherry * * File Description:  Distance methods for phylogenic tree reconstruction * */#include <ncbi_pch.hpp>#include <algo/phy_tree/dist_methods.hpp>#include <math.h>#include "fastme/graph.h"BEGIN_NCBI_SCOPEUSING_SCOPE(objects);/// d = -3/4 ln(1 - (4/3)p).void CDistMethods::JukesCantorDist(const TMatrix& frac_diff,                                   TMatrix& result){    result.Resize(frac_diff.GetRows(), frac_diff.GetCols());    for (size_t i = 0;  i < frac_diff.GetRows();  ++i) {        for (size_t j = 0;  j < frac_diff.GetCols();  ++j) {            result(i, j) = -0.75 * log(1 - (4.0 / 3.0) * frac_diff(i, j));        }    }}/// d = -ln(1 - p)void CDistMethods::PoissonDist(const TMatrix& frac_diff,                               TMatrix& result){    result.Resize(frac_diff.GetRows(), frac_diff.GetCols());    for (size_t i = 0;  i < frac_diff.GetRows();  ++i) {        for (size_t j = 0;  j < frac_diff.GetCols();  ++j) {            result(i, j) = -log(1 - frac_diff(i, j));        }    }}/// d = -ln(1 - p - 0.2p^2)void CDistMethods::KimuraDist(const TMatrix& frac_diff,                                     TMatrix& result){    result.Resize(frac_diff.GetRows(), frac_diff.GetCols());    for (size_t i = 0;  i < frac_diff.GetRows();  ++i) {        for (size_t j = 0;  j < frac_diff.GetCols();  ++j) {            result(i, j) = -log(1 - frac_diff(i, j)                                - 0.2 * frac_diff(i, j) * frac_diff(i, j));        }    }}/// As per Hillis et al. (Ed.), Molecular Systematics, pg. 488-489CDistMethods::TTree *CDistMethods::NjTree(const TMatrix& dist_mat,                                          const vector<string>& labels){    // prepare initial tree (a star phylogeny)    TTree *tree = new TTree;    for (unsigned int i = 0;  i < dist_mat.GetRows();  ++i) {        TTree *new_node = tree->AddNode();        new_node->GetValue().SetId(i);        if (labels.empty()) {            new_node->GetValue().SetLabel() = 'N' + NStr::IntToString(i);        } else {            new_node->GetValue().SetLabel() = labels[i];        }    }    // now the real work; do N - 2 neighbor joinings    int next_id = dist_mat.GetRows();    TMatrix dmat = dist_mat;    dmat.Resize(2 * dist_mat.GetRows() - 2, 2 * dist_mat.GetRows() - 2);    vector<double> r(dmat.GetRows());    for (unsigned int n = dist_mat.GetRows();  n > 2;  --n) {        int i, j;        double m;        TTree::TNodeList_I neighbor1, neighbor2;        // first compute r_i        for (TTree::TNodeList_I it1 = tree->SubNodeBegin();             it1 != tree->SubNodeEnd();  ++it1) {            i = (*it1)->GetValue().GetId();            double sum = 0;            for (TTree::TNodeList_I it2 = tree->SubNodeBegin();                 it2 != tree->SubNodeEnd();  ++it2) {                if (it1 == it2) {                    continue;                }                j = (*it2)->GetValue().GetId();                sum += dmat(i, j);            }            r[i] = sum;        }        // find where M_{i, j} is minimal        double min_m = 0;        for (TTree::TNodeList_I it1 = tree->SubNodeBegin();             it1 != tree->SubNodeEnd();  ++it1) {            TTree::TNodeList_I it2 = it1;            ++it2;            for (;  it2 != tree->SubNodeEnd();  ++it2) {                i = (*it1)->GetValue().GetId();                j = (*it2)->GetValue().GetId();                m = dmat(i, j) - (r[i] + r[j]) / (n - 2);                if (m < min_m) {                    neighbor1 = it1;                    neighbor2 = it2;                    min_m = m;                }            }        }        // join the neighbors        TTree *new_node = new TTree;        i = (*neighbor1)->GetValue().GetId();        j = (*neighbor2)->GetValue().GetId();        int u = next_id++;        new_node->GetValue().SetId(u);        double viu = dmat(i, j) / 2 + (r[i] - r[j]) / (2 * (n - 2));        double vju = dmat(i, j) - viu;        (*neighbor1)->GetValue().SetDist(viu);        (*neighbor2)->GetValue().SetDist(vju);        new_node->AddNode(tree->DetachNode(neighbor1));        new_node->AddNode(tree->DetachNode(neighbor2));        tree->AddNode(new_node);        // add new distances to dmat        for (TTree::TNodeList_CI it1 = tree->SubNodeBegin();             it1 != tree->SubNodeEnd();  ++it1) {            int k = (*it1)->GetValue().GetId();            if (k == u) {                continue;            }            dmat(k, u) = dmat(u, k)                = (dmat(i, k) + dmat(j, k) - dmat(i, j)) / 2;        }    }    // Now the root has just two children, whose distances    // have not been set.  Could do different things here.    // Let's make a trifurcation.    TTree::TNodeList_I it1 = tree->SubNodeBegin();    TTree::TNodeList_I it2 = it1;    ++it2;    if ((*it1)->IsLeaf()) {        swap(*it1, *it2);    }    double d = dmat((*it1)->GetValue().GetId(), (*it2)->GetValue().GetId());    (*it2)->GetValue().SetDist(d);    (*it1)->AddNode(tree->DetachNode(it2));    TTree *rv = tree->DetachNode(it1);    delete tree;  // just the original root node    return rv;}// The following is a wrapper for Richard Desper's fast minimume evolution// code.  The user passes in a CDistMethods::TMatrix, and gets back// a CDistMethods::TTree.  Behind the scenes, the code converts the// TTMatrix to the representation used by fastme, runs the fastme// algorithm using fastme_run, and converts the resulting tree// to the CDistMethods::TTree representation.BEGIN_SCOPE(fastme)typedef char boolean;boolean leaf(meNode *v);meTree* fastme_run(double** D_in, int numSpecies_in, char **labels,                   int btype_in, int wtype_in, int ntype_in);double **initDoubleMatrix(int d);void freeMatrix(double **D, int size);void freeTree(meTree *T);END_SCOPE(fastme)// Functions to convert from tree representation used by fastme codestatic void s_AddFastMeSubtree(fastme::meNode *me_node,                               CDistMethods::TTree* node,                               const vector<string>& labels){    if (fastme::leaf(me_node)) {        int id = NStr::StringToInt(me_node->label);        node->GetValue().SetId(id);        if (!labels.empty()) {            node->GetValue().SetLabel(labels[id]);        } else {            node->GetValue().SetLabel(me_node->label);        }        return;    }    // not a leaf; add two subtrees    // first left    CDistMethods::TTree* child_node = node->AddNode();    child_node->GetValue().SetDist(me_node->leftEdge->distance);    s_AddFastMeSubtree(me_node->leftEdge->head, child_node, labels);    // then right    child_node = node->AddNode();    child_node->GetValue().SetDist(me_node->rightEdge->distance);    s_AddFastMeSubtree(me_node->rightEdge->head, child_node, labels);}static CDistMethods::TTree* s_ConvertFastMeTree(fastme::meTree *me_tree,                                                const vector<string>& labels) {    CDistMethods::TTree *tree = new CDistMethods::TTree;    fastme::meEdge *edge;    edge = me_tree->root->leftEdge;    CDistMethods::TTree *node = tree->AddNode();    node->GetValue().SetDist(edge->distance);    int id = NStr::StringToInt(me_tree->root->label);    node->GetValue().SetId(id);    if (!labels.empty()) {        node->GetValue().SetLabel(labels[id]);    } else {        node->GetValue().SetLabel(me_tree->root->label);    }    s_AddFastMeSubtree(edge->head, tree, labels);    return tree;}// The publically visible interfaceCDistMethods::TTree *CDistMethods::FastMeTree(const TMatrix& dist_mat,                                              const vector<string>& labels,                                              EFastMePar btype,                                              EFastMePar wtype,                                              EFastMePar ntype){    double **dfme = fastme::initDoubleMatrix(dist_mat.GetRows());    for (unsigned int i = 0;  i < dist_mat.GetRows();  ++i) {        for (unsigned int j = 0;  j < dist_mat.GetRows();  ++j) {            dfme[i][j] = dist_mat(i, j);        }    }    // leaf labels for fastme code; just pass in    // "0", "1", etc., and fill in real labels later    char **clabels = new char*[dist_mat.GetRows()];    vector<string> dummy_labels;    dummy_labels.resize(dist_mat.GetRows());    for (unsigned int i = 0;  i < dist_mat.GetRows();  ++i) {        dummy_labels[i] = NStr::IntToString(i);        clabels[i] = const_cast<char *>(dummy_labels[i].c_str());    }    fastme::meTree *me_out =        fastme::fastme_run(dfme, dist_mat.GetRows(), clabels,                           btype, wtype, ntype);    fastme::freeMatrix(dfme, dist_mat.GetRows());    delete [] clabels;    TTree *me_tree = s_ConvertFastMeTree(me_out, labels);    fastme::freeTree(me_out);    return me_tree;}// Calculate pairwise fractional differencesdouble CDistMethods::Divergence(const string& seq1, const string& seq2){    int diff_count = 0;    int pos_count = 0;    for (unsigned int pos = 0;  pos < seq1.size();  ++pos) {        if (seq1[pos] == '-' || seq2[pos] == '-') {            continue;  // ignore this position if a seq has a gap        }        pos_count++;        if (seq1[pos] != seq2[pos]) {            diff_count++;        }    }    return diff_count / (double) pos_count;}void CDistMethods::Divergence(const CAlnVec& avec_in, TMatrix& result){    // want to change gap char of CAlnVec, but no copy constructor,    // so effectively copy in a round-about way    CAlnVec avec(avec_in.GetDenseg(), avec_in.GetScope());    avec.SetGapChar('-');    int nseqs = avec.GetNumRows();    result.Resize(nseqs, nseqs);    string seqi, seqj;    for (int i = 0;  i < nseqs;  ++i) {        result(i, i) = 0;  // 0 difference from itself        avec.GetWholeAlnSeqString(i, seqi);        for (int j = i + 1;  j < nseqs;  ++j) {            avec.GetWholeAlnSeqString(j, seqj);            result(i, j) = result(j, i) = CDistMethods::Divergence(seqi, seqj);        }    }}#if 0void CDistMethods::Divergence(const CAlignment& aln, TMatrix& result){    int nseqs = aln.GetSeqs().size();    result.Resize(nseqs, nseqs);    for (int i = 0;  i < nseqs;  ++i) {        result(i, i) = 0;  // 0 difference from itself        for (int j = i + 1;  j < nseqs;  ++j) {            result(i, j) = result(j, i) =                 CDistMethods::Divergence(aln.GetSeqs()[i], aln.GetSeqs()[j]);        }    }}#endifEND_NCBI_SCOPE/* * =========================================================================== * $Log: dist_methods.cpp,v $ * Revision 1000.1  2004/06/01 18:09:24  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.8 * * Revision 1.8  2004/05/21 21:41:03  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.7  2004/03/17 17:57:48  jcherry * Made fastme set the node ids of leaf nodes * * Revision 1.6  2004/02/19 16:43:45  jcherry * Temporarily disable one form of Divergence() method * * Revision 1.5  2004/02/19 13:20:04  dicuccio * Roll back to version 1.3 * * Revision 1.3  2004/02/10 23:07:46  ucko * +<math.h> for log() * * Revision 1.2  2004/02/10 17:01:19  dicuccio * Use swap() instead of iter_swap(), as the latter isn't found on MSVC * * Revision 1.1  2004/02/10 15:15:58  jcherry * Initial version * * =========================================================================== */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -