⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nw_spliced_aligner16.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/* * =========================================================================== * PRODUCTION $Log: nw_spliced_aligner16.cpp,v $ * PRODUCTION Revision 1000.3  2004/06/01 18:05:00  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.13 * PRODUCTION * =========================================================================== *//* $Id: nw_spliced_aligner16.cpp,v 1000.3 2004/06/01 18:05:00 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE                          *               National Center for Biotechnology Information*                                                                          *  This software/database is a "United States Government Work" under the   *  terms of the United States Copyright Act.  It was written as part of    *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S.    *  Government have not placed any restriction on its use or reproduction.  *                                                                          *  Although all reasonable efforts have been taken to ensure the accuracy  *  and reliability of the software and data, the NLM and the U.S.          *  Government do not and cannot warrant the performance or results that    *  may be obtained by using this software or data. The NLM and the U.S.    *  Government disclaim all warranties, express or implied, including       *  warranties of performance, merchantability or fitness for any particular*  purpose.                                                                *                                                                          *  Please cite the author in any work or product based on this material.   ** ===========================================================================** Authors:  Yuri Kapustin** File Description:  CSplicedAligner16*                   * ===========================================================================**/#include <ncbi_pch.hpp>#include <algo/align/nw_spliced_aligner16.hpp>#include <algo/align/align_exception.hpp>BEGIN_NCBI_SCOPEconst unsigned char g_nwspl_donor[splice_type_count_16][2] = {    {'G','T'}, {'G','C'}, {'A','T'}, {'?','?'}};const unsigned char g_nwspl_acceptor[splice_type_count_16][2] = {    {'A','G'}, {'A','G'}, {'A','C'}, {'?','?'}};const unsigned char g_topidx = splice_type_count_16 - 1;CSplicedAligner16::CSplicedAligner16(){    for(unsigned char st = 0; st < splice_type_count_16; ++st) {        m_Wi[st] = GetDefaultWi(st);    }}CSplicedAligner16::CSplicedAligner16(const char* seq1, size_t len1,                                       const char* seq2, size_t len2)    : CSplicedAligner(seq1, len1, seq2, len2){    for(unsigned char st = 0; st < splice_type_count_16; ++st) {        m_Wi[st] = GetDefaultWi(st);    }}CNWAligner::TScore CSplicedAligner16::GetDefaultWi(unsigned char splice_type){   switch(splice_type) {        case 0: return -8;  // GT/AG        case 1: return -12; // GC/AG        case 2: return -14; // AT/AC        case 3: return -18; // ??/??        default: {            NCBI_THROW(CAlgoAlignException,                       eInvalidSpliceTypeIndex,                       "Invalid splice type index");        }    }}// Bit coding (eleven bits per value) for backtrace.// --------------------------------------------------// [11-8] donors (bitwise OR for multiple types)//        1000     ??    (??/??) - arbitrary pair//        0100     AT    (AT/AC)//        0010     GC    (GC/AG)//        0001     GT    (GT/AG)// [7-5]  acceptor type//        100      ?? (??/??)//        011      AC (AT/AC)//        010      AG (GC/AG)//        001      AG (GT/AG)//        000      no acceptor// [4]    Fc:      1 if gap in 2nd sequence was extended; 0 if it is was opened// [3]    Ec:      1 if gap in 1st sequence was extended; 0 if it is was opened// [2]    E:       1 if space in 1st sequence; 0 if space in 2nd sequence// [1]    D:       1 if diagonal; 0 - otherwiseconst unsigned char kMaskFc       = 0x0001;const unsigned char kMaskEc       = 0x0002;const unsigned char kMaskE        = 0x0004;const unsigned char kMaskD        = 0x0008;// Evaluate dynamic programming matrix. Create transcript.CNWAligner::TScore CSplicedAligner16::x_Align (                         const char* seg1, size_t len1,                         const char* seg2, size_t len2,                         vector<ETranscriptSymbol>* transcript ){    TScore V = 0;    const size_t N1 = len1 + 1;    const size_t N2 = len2 + 1;    vector<TScore> stl_rowV (N2), stl_rowF (N2);    TScore* rowV    = &stl_rowV[0];    TScore* rowF    = &stl_rowF[0];    // index calculation: [i,j] = i*n2 + j    vector<Uint2> stl_bm (N1*N2);    Uint2* backtrace_matrix = &stl_bm[0];    TScore* pV = rowV - 1;    const char* seq1 = seg1 - 1;    const char* seq2 = seg2 - 1;    const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;    bool bFreeGapLeft1  = m_esf_L1 && seg1 == m_Seq1;    bool bFreeGapRight1 = m_esf_R1 && m_Seq1 + m_SeqLen1 - len1 == seg1;    bool bFreeGapLeft2  = m_esf_L2 && seg2 == m_Seq2;    bool bFreeGapRight2 = m_esf_R2 && m_Seq2 + m_SeqLen2 - len2 == seg2;    TScore wgleft1   = bFreeGapLeft1? 0: m_Wg;    TScore wsleft1   = bFreeGapLeft1? 0: m_Ws;    TScore wg1 = wgleft1, ws1 = wsleft1;    // recurrences    TScore wgleft2   = bFreeGapLeft2? 0: m_Wg;    TScore wsleft2   = bFreeGapLeft2? 0: m_Ws;    TScore V_max, vAcc;    TScore V0 = 0;    TScore E, G, n0;    Uint2 tracer;    // store candidate donors    size_t* jAllDonors [splice_type_count_16];    TScore* vAllDonors [splice_type_count_16];    vector<size_t> stl_jAllDonors (splice_type_count_16 * N2);    vector<TScore> stl_vAllDonors (splice_type_count_16 * N2);    for(unsigned char st = 0; st < splice_type_count_16; ++st) {        jAllDonors[st] = &stl_jAllDonors[st*N2];        vAllDonors[st] = &stl_vAllDonors[st*N2];    }    size_t  jTail[splice_type_count_16], jHead[splice_type_count_16];    TScore  vBestDonor [splice_type_count_16];    size_t  jBestDonor [splice_type_count_16];    // fake row    rowV[0] = kInfMinus;    size_t k;    for (k = 0; k < N2; k++) {        rowV[k] = rowF[k] = kInfMinus;    }    k = 0;    size_t i, j = 0, k0;    unsigned char ci;    for(i = 0;  i < N1;  ++i, j = 0) {               V = i > 0? (V0 += wsleft2) : 0;        E = kInfMinus;        k0 = k;        backtrace_matrix[k++] = kMaskFc;        ci = i > 0? seq1[i]: 'N';        for(unsigned char st = 0; st < splice_type_count_16; ++st) {            jTail[st] = jHead[st] = 0;            vBestDonor[st] = kInfMinus;        }        if(i == N1 - 1 && bFreeGapRight1) {                wg1 = ws1 = 0;        }        TScore wg2 = m_Wg, ws2 = m_Ws;                    // detect donor candidate        if(N2 > 2) {	  for(unsigned char st = 0; st < g_topidx; ++st) {                if(seq2[1] == g_nwspl_donor[st][0] &&                   seq2[2] == g_nwspl_donor[st][1]) {                    jAllDonors[st][jTail[st]] = j;                    vAllDonors[st][jTail[st]] = V;                    ++(jTail[st]);                }            }        }        // the first max value        jAllDonors[g_topidx][jTail[g_topidx]] = j;        vAllDonors[g_topidx][jTail[g_topidx]] = V_max = V;        ++(jTail[3]);        for (j = 1; j < N2; ++j, ++k) {                        G = pV[j] + sm[ci][(unsigned char)seq2[j]];            pV[j] = V;            n0 = V + wg1;            if(E >= n0) {                E += ws1;      // continue the gap                tracer = kMaskEc;            }            else {                E = n0 + ws1;  // open a new gap                tracer = 0;            }            if(j == N2 - 1 && bFreeGapRight2) {                wg2 = ws2 = 0;            }            n0 = rowV[j] + wg2;            if(rowF[j] >= n0) {                rowF[j] += ws2;                tracer |= kMaskFc;            }            else {                rowF[j] = n0 + ws2;            }            // evaluate the score (V)            if (E >= rowF[j]) {                if(E >= G) {                    V = E;                    tracer |= kMaskE;                }                else {                    V = G;                    tracer |= kMaskD;                }            } else {                if(rowF[j] >= G) {                    V = rowF[j];                }                else {                    V = G;                    tracer |= kMaskD;                }            }            // find out if there are new donors            for(unsigned char st = 0; st < splice_type_count_16; ++st) {                if(jTail[st] > jHead[st])  {                    if(j - jAllDonors[st][jHead[st]] >= m_IntronMinSize) {                        if(vAllDonors[st][jHead[st]] > vBestDonor[st]) {                            vBestDonor[st] = vAllDonors[st][jHead[st]];                            jBestDonor[st] = jAllDonors[st][jHead[st]];                        }                        ++(jHead[st]);                    }                }            }                            // check splice signal            size_t dnr_pos = 0;            Uint2 tracer_dnr = 0xFFFF;            Uint2 tracer_acc = 0;	    for(unsigned char st = 0; st < splice_type_count_16; ++st) {                if(seq2[j-1] == g_nwspl_acceptor[st][0] &&                   seq2[j] == g_nwspl_acceptor[st][1] &&                   vBestDonor[st] > kInfMinus || st == g_topidx) {                    vAcc = vBestDonor[st] + m_Wi[st];                    if(vAcc > V) {                        V = vAcc;                        tracer_acc = (st+1) << 4;                        dnr_pos = k0 + jBestDonor[st];                        tracer_dnr = 0x0080 << st;                    }                }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -