首页 › 资源下载 › 生物技术 › ncbi源码 › 源码查看
mm_aligner.cpp

来自「ncbi源码」· C++ 代码 · 共 1,022 行 · 第 1/2 页
CPP
1,022 行
/* * =========================================================================== * PRODUCTION $Log: mm_aligner.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 18:04:46  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.20 * PRODUCTION * =========================================================================== *//* $Id: mm_aligner.cpp,v 1000.1 2004/06/01 18:04:46 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE                           *               National Center for Biotechnology Information *                                                                           *  This software/database is a "United States Government Work" under the    *  terms of the United States Copyright Act.  It was written as part of     *  the author's official duties as a United States Government employee and  *  thus cannot be copyrighted.  This software/database is freely available  *  to the public for use. The National Library of Medicine and the U.S.     *  Government have not placed any restriction on its use or reproduction.   *                                                                           *  Although all reasonable efforts have been taken to ensure the accuracy   *  and reliability of the software and data, the NLM and the U.S.            *  Government do not and cannot warrant the performance or results that     *  may be obtained by using this software or data. The NLM and the U.S.     *  Government disclaim all warranties, express or implied, including        *  warranties of performance, merchantability or fitness for any particular *  purpose.                                                                 *                                                                           *  Please cite the author in any work or product based on this material.    * * =========================================================================== * * Authors:  Yuri Kapustin * * File Description:  CMMAligner implementation *                    * =========================================================================== * */#include <ncbi_pch.hpp>#include "mm_aligner_threads.hpp"#include <corelib/ncbimtx.hpp>#include <corelib/ncbi_system.hpp>#include <algo/align/align_exception.hpp>BEGIN_NCBI_SCOPECMMAligner::CMMAligner():    m_mt(false),    m_maxthreads(1){}CMMAligner::CMMAligner( const char* seq1, size_t len1,                        const char* seq2, size_t len2,                        const SNCBIPackedScoreMatrix* scoremat )  : CNWAligner(seq1, len1, seq2, len2, scoremat),    m_mt(false),    m_maxthreads(1){}void CMMAligner::EnableMultipleThreads(bool allow){    m_maxthreads = (m_mt = allow)? GetCpuCount(): 1;}CNWAligner::TScore CMMAligner::x_Run(){    m_terminate = false;    if(m_prg_callback) {        m_prg_info.m_iter_total = 2*m_SeqLen1*m_SeqLen2;        m_prg_info.m_iter_done = 0;        m_terminate = m_prg_callback(&m_prg_info);    }    if(m_terminate) {        return m_score = 0;    }        m_score = kMin_Int;    m_TransList.clear();    m_TransList.push_back(eTS_None);    SCoordRect m (0, 0, m_SeqLen1 - 1, m_SeqLen2 - 1);    x_DoSubmatrix(m, m_TransList.end(), false, false); // top-level call    if(m_terminate) {        return m_score = 0;    }    // reverse_copy not supported by some compilers    list<ETranscriptSymbol>::const_iterator ii = m_TransList.begin();    size_t nsize = m_TransList.size() - 1;    m_Transcript.clear();    m_Transcript.resize(nsize);    for(size_t k = 1; k <= nsize; ++k)        m_Transcript[nsize - k] = *++ii;    return m_score;}// trace bit coding (four bits per value): D E Ec Fc// D:  1 if diagonal; 0 - otherwise// E:  1 if space in 1st sequence; 0 if space in 2nd sequence// Ec: 1 if gap in 1st sequence was extended; 0 if it is was opened// Fc: 1 if gap in 2nd sequence was extended; 0 if it is was opened//const unsigned char kMaskFc  = 0x0001;const unsigned char kMaskEc  = 0x0002;const unsigned char kMaskE   = 0x0004;const unsigned char kMaskD   = 0x0008;DEFINE_STATIC_FAST_MUTEX (masterlist_mutex);void CMMAligner::x_DoSubmatrix( const SCoordRect& submatr,    list<ETranscriptSymbol>::iterator translist_pos,    bool left_top, bool right_bottom ){    if( m_terminate ) {        return;    }    const int dimI = submatr.i2 - submatr.i1 + 1;    const int dimJ = submatr.j2 - submatr.j1 + 1;    if(dimI < 1 || dimJ < 1) return;    bool top_level = submatr.i1 == 0 && submatr.j1 == 0 &&        submatr.i2 == m_SeqLen1-1 && submatr.j2 == m_SeqLen2-1;    if(dimI < 3 || dimJ < 3) { // terminal case        CFastMutexGuard guard (masterlist_mutex);        list<ETranscriptSymbol> lts;        TScore score = x_RunTerm(submatr, left_top, right_bottom, lts);        if(top_level) m_score = score;        m_TransList.splice(translist_pos, lts);        return;    }    const size_t I = submatr.i1 + dimI / 2;    const size_t dim = dimJ + 1;    // run Needleman-Wunsch without storing full traceback matrix    SCoordRect rtop (submatr.i1, submatr.j1, I, submatr.j2);    vector<TScore> vEtop (dim), vFtop (dim), vGtop (dim);    vector<unsigned char> trace_top (dim);    SCoordRect rbtm (I + 1, submatr.j1, submatr.i2 , submatr.j2);    vector<TScore> vEbtm (dim), vFbtm (dim), vGbtm (dim);    vector<unsigned char> trace_btm (dim);        if( m_mt && m_maxthreads > 1 && MM_RequestNewThread(m_maxthreads) ) {        CThreadRunOnTop* thr2 = new CThreadRunOnTop ( this, &rtop,                                            &vEtop, &vFtop, &vGtop,                                            &trace_top, left_top );        thr2->Run();        x_RunBtm(rbtm, vEbtm, vFbtm, vGbtm, trace_btm, right_bottom);        thr2->Join(0);    }    else {        x_RunTop(rtop, vEtop, vFtop, vGtop, trace_top, left_top);        x_RunBtm(rbtm, vEbtm, vFbtm, vGbtm, trace_btm, right_bottom);    }    if( m_terminate ) {        return;    }    // locate the transition point    size_t trans_pos;    ETransitionType trans_type;    TScore score1 = x_FindBestJ ( vEtop, vFtop, vGtop, vEbtm, vFbtm, vGbtm,                                  trans_pos, trans_type );    if(top_level)        m_score = score1;    // modify traces at the transition point if applicable    if(trans_type == eII) {        unsigned char mask_top = kMaskE;        if(trace_top[trans_pos] & kMaskEc)            mask_top |= kMaskEc;        trace_top[trans_pos] = mask_top;        trace_btm[trans_pos] = kMaskE | kMaskEc;    }    if(trans_type == eDD) {        trace_top[trans_pos] = (trace_top[trans_pos] & kMaskFc)? kMaskFc: 0;        trace_btm[trans_pos] = kMaskFc;    }    // extend a subpath to both directions    vector<unsigned char>::const_iterator trace_it_top =         trace_top.begin() + trans_pos;    list<ETranscriptSymbol> subpath_left;    size_t steps_left = x_ExtendSubpath(trace_it_top, false, subpath_left);    vector<unsigned char>::const_iterator trace_it_btm =         trace_btm.begin() + trans_pos;    list<ETranscriptSymbol> subpath_right;    size_t steps_right = x_ExtendSubpath(trace_it_btm, true, subpath_right);    // check if we reached left or top line    bool bNoLT = false;    int nc0 = trans_pos - steps_left;    if(nc0 < 0) {        NCBI_THROW(CAlgoAlignException, eInternal,                   "Assertion: LT underflow");    }    bool bLeft = nc0 == 0;    bool bTop  = I == submatr.i1;    if(bLeft && !bTop) {        int jump = I - submatr.i1;        subpath_left.insert(subpath_left.begin(), jump, eTS_Delete);    }    if(!bLeft && bTop) {        subpath_left.insert(subpath_left.begin(), nc0, eTS_Insert);    }    if(bLeft || bTop) {        bNoLT = true;    }    // check if we reached right or bottom line    bool bNoRB = false;    int nc1 = trans_pos + steps_right;    if(nc1 > dimJ) {        NCBI_THROW(CAlgoAlignException, eInternal,                   "Assertion: RB overflow");    }    bool bRight = nc1 == dimJ;    bool bBottom  = I == submatr.i2 - 1;    if(bRight && !bBottom) {        int jump = submatr.i2 - I - 1;        subpath_right.insert(subpath_right.end(), jump, eTS_Delete);    }    if(!bRight && bBottom) {        int jump = dimJ - nc1;        subpath_right.insert(subpath_right.end(), jump, eTS_Insert);    }    if(bRight || bBottom) {        bNoRB = true;    }    // find out if we have special left-top and/or right-bottom    // on the new sub-matrices    bool rb = subpath_left.front() == eTS_Delete;    bool lt = subpath_right.back() == eTS_Delete;    // coalesce the pieces together and insert into the master list    subpath_left.splice( subpath_left.end(), subpath_right );    list<ETranscriptSymbol>::iterator ti0, ti1;    {{        CFastMutexGuard  guard (masterlist_mutex);        ti0 = translist_pos;        --ti0;        m_TransList.splice( translist_pos, subpath_left );        ++ti0;        ti1 = translist_pos;    }}    // Recurse submatrices:    SCoordRect rlt;    if(!bNoLT) {        // left top        size_t left_bnd = submatr.j1 + trans_pos - steps_left - 1;        if(left_bnd >= submatr.j1) {            rlt.i1 = submatr.i1;            rlt.j1 = submatr.j1;            rlt.i2 = I - 1;            rlt.j2 = left_bnd;        }        else {            NCBI_THROW(CAlgoAlignException, eInternal,                       "Assertion: Left boundary out of range");        }    }    SCoordRect rrb;    if(!bNoRB) {        // right bottom        size_t right_bnd = submatr.j1 + trans_pos + steps_right;        if(right_bnd <= submatr.j2) {            rrb.i1 = I + 2;            rrb.j1 = right_bnd;            rrb.i2 = submatr.i2;            rrb.j2 = submatr.j2;        }        else {            NCBI_THROW(CAlgoAlignException, eInternal,                       "Assertion: Right boundary out of range");        }    }    if(!bNoLT && !bNoRB) {        if( m_mt && m_maxthreads > 1 && MM_RequestNewThread(m_maxthreads) ) {            // find out which rect is larger            if(rlt.GetArea() > rrb.GetArea()) {                // do rb in a second thread                CThread* thr2 = new CThreadDoSM( this, &rrb, ti1,                                                 lt, right_bottom);                thr2->Run();                // do lt in this thread                x_DoSubmatrix(rlt, ti0, left_top, rb);                thr2->Join(0);            }            else {                // do lt in a second thread                CThread* thr2 = new CThreadDoSM( this, &rlt, ti0,                                                 left_top, rb);                thr2->Run();                // do rb in this thread                x_DoSubmatrix(rrb, ti1, lt, right_bottom);                thr2->Join(0);            }        }        else {  // just do both            x_DoSubmatrix(rlt, ti0, left_top, rb);            x_DoSubmatrix(rrb, ti1, lt, right_bottom);        }    }    else if(!bNoLT) {        x_DoSubmatrix(rlt, ti0, left_top, rb);    }    else if(!bNoRB) {        x_DoSubmatrix(rrb, ti1, lt, right_bottom);    }}CNWAligner::TScore CMMAligner::x_FindBestJ (    const vector<TScore>& vEtop, const vector<TScore>& vFtop,    const vector<TScore>& vGtop, const vector<TScore>& vEbtm,    const vector<TScore>& vFbtm, const vector<TScore>& vGbtm,    size_t& pos,    ETransitionType& trans_type ) const{    const size_t dim = vEtop.size();    TScore score = kMin_Int;    TScore trans_alts [9];    bool bFreeGapLeft2  = m_esf_L2 && dim == m_SeqLen2 + 1;    bool bFreeGapRight2 = m_esf_R2 && dim == m_SeqLen2 + 1;    for(size_t i = 0; i < dim ; ++i) {        trans_alts [0] = vEtop[i] + vEbtm[i] - m_Wg;   // II        trans_alts [1] = vFtop[i] + vEbtm[i];          // DI        trans_alts [2] = vGtop[i] + vEbtm[i];          // GI        trans_alts [3] = vEtop[i] + vFbtm[i];          // ID        TScore wg = (bFreeGapLeft2 && i == 0 || bFreeGapRight2 && i == dim -1)?                    0: m_Wg;        trans_alts [4] = vFtop[i] + vFbtm[i] - wg;     // DD        trans_alts [5] = vGtop[i] + vFbtm[i];          // GD        trans_alts [6] = vEtop[i] + vGbtm[i];          // IG        trans_alts [7] = vFtop[i] + vGbtm[i];          // DG        trans_alts [8] = vGtop[i] + vGbtm[i];          // GG        for(size_t k = 0; k < 9; ++k) {            if(trans_alts[k] > score) {                score = trans_alts[k];                pos = i;                trans_type = (ETransitionType)k;            }        }    }    return score;}size_t CMMAligner::x_ExtendSubpath (    vector<unsigned char>::const_iterator trace_it,    bool direction,    list<ETranscriptSymbol>& subpath ) const{    subpath.clear();    size_t step_counter = 0;    if(direction) {        while(true) {            unsigned char Key = *trace_it;            if (Key & kMaskD) {                subpath.push_back(eTS_Match);                ++step_counter;                break;            }            else if (Key & kMaskE) {                subpath.push_back(eTS_Insert);                ++step_counter;                ++trace_it;                while(Key & kMaskEc) {                    Key = *trace_it++;                    subpath.push_back(eTS_Insert);                    ++step_counter;                }            }            else if ((Key & kMaskE) == 0) {                subpath.push_back(eTS_Delete);                break;            }            else {                NCBI_THROW(CAlgoAlignException, eInternal,                           "Assertion: incorrect backtrace symbol "                           "(right expansion)");            }        }    }    else {        while(true) {            unsigned char Key = *trace_it;            if (Key & kMaskD) {                subpath.push_front(eTS_Match);                ++step_counter;                break;            }            else if (Key & kMaskE) {                subpath.push_front(eTS_Insert);                ++step_counter;                --trace_it;                while(Key & kMaskEc) {                    Key = *trace_it--;                    subpath.push_front(eTS_Insert);                    ++step_counter;                }            }            else if ((Key & kMaskE) == 0) {                subpath.push_front(eTS_Delete);                break;            }            else {                NCBI_THROW(CAlgoAlignException, eInternal,                           "Assertion: incorrect backtrace symbol "                           "(left expansion)");            }        }    }    return step_counter;}#ifdef NCBI_THREADSDEFINE_STATIC_FAST_MUTEX (progress_mutex);#endifvoid CMMAligner::x_RunTop ( const SCoordRect& rect,             vector<TScore>& vE, vector<TScore>& vF, vector<TScore>& vG,             vector<unsigned char>& trace, bool lt ) const{    if( m_terminate ) {        return;    }    const size_t dim1 = rect.i2 - rect.i1 + 1;    const size_t dim2 = rect.j2 - rect.j1 + 1;    const size_t N1   = dim1 + 1;    const size_t N2   = dim2 + 1;    vector<TScore> stl_rowV (N2), stl_rowF (N2);    TScore* rowV    = &stl_rowV [0];    TScore* rowF    = &stl_rowF [0];    TScore* pV = rowV - 1;    const char* seq1 = m_Seq1 - 1 + rect.i1;    const char* seq2 = m_Seq2 - 1 + rect.j1;    const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;    bool bFreeGapLeft1  = m_esf_L1 && rect.i1 == 0;    bool bFreeGapLeft2  = m_esf_L2 && rect.j1 == 0;    bool bFreeGapRight2  = m_esf_R2 && rect.j2 == m_SeqLen2 - 1;    // progress reporting    const size_t prg_rep_rate = 100;    const size_t prg_rep_increment = prg_rep_rate*N2;    // first row    TScore wg = bFreeGapLeft1? 0: m_Wg;    TScore ws = bFreeGapLeft1? 0: m_Ws;    rowV[0] = wg;    size_t i, j;    for (j = 1; j < N2; ++j) {        rowV[j] = pV[j] + ws;        rowF[j] = kInfMinus;            }    rowV[0] = 0;    // recurrences    wg = bFreeGapLeft2? 0: m_Wg;    ws = bFreeGapLeft2? 0: m_Ws;    TScore V  = 0;    TScore V0 = lt? 0: wg;    TScore E, G, n0;    for(i = 1;  i < N1 - 1;  ++i) {                V = V0 += ws;        E = kInfMinus;
mm_aligner.cpp - 源码说明

本页面展示了「ncbi源码」中的 mm_aligner.cpp 源码文件，采用 C++ 编程语言编写，共 1,022 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ncbi相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?