⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nw_spliced_aligner32.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/* * =========================================================================== * PRODUCTION $Log: nw_spliced_aligner32.cpp,v $ * PRODUCTION Revision 1000.3  2004/06/01 18:05:06  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.15 * PRODUCTION * =========================================================================== *//* $Id: nw_spliced_aligner32.cpp,v 1000.3 2004/06/01 18:05:06 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE                          *               National Center for Biotechnology Information*                                                                          *  This software/database is a "United States Government Work" under the   *  terms of the United States Copyright Act.  It was written as part of    *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S.    *  Government have not placed any restriction on its use or reproduction.  *                                                                          *  Although all reasonable efforts have been taken to ensure the accuracy  *  and reliability of the software and data, the NLM and the U.S.          *  Government do not and cannot warrant the performance or results that    *  may be obtained by using this software or data. The NLM and the U.S.    *  Government disclaim all warranties, express or implied, including       *  warranties of performance, merchantability or fitness for any particular*  purpose.                                                                *                                                                          *  Please cite the author in any work or product based on this material.   ** ===========================================================================** Authors:  Yuri Kapustin** File Description:  CSplicedAligner32*                   * ===========================================================================**/#include <ncbi_pch.hpp>#include <algo/align/nw_spliced_aligner32.hpp>#include <algo/align/align_exception.hpp>BEGIN_NCBI_SCOPEconst unsigned char g_nwspl32_donor[splice_type_count_32][2] = {    {'G','T'}, // donor type 1 in CDonorAcceptorMatrix coding    {'G','C'}, // 2    {'A','T'}  // 3};const unsigned char g_nwspl32_acceptor[splice_type_count_32][2] = {    {'A','G'}, // 1    {'A','G'}, // 2    {'A','C'}  // 3};// Transcript coding// (lower bits contain jump value for gaps and introns)const Uint4 kTypeDiag   = 0x00000000;  // single match or mismatchconst Uint4 kTypeGap    = 0x40000000;  // gap - any typeconst Uint4 kTypeIntron = 0x80000000;  // intron// Donor-acceptor static object used// to figure out what donors and aceptors// could be transformed to any given na pair// by mutating either of its characters (but not both).// donor types:      bits 5-7// acceptor types:   bits 1-3class CDonorAcceptorMatrix {public:    CDonorAcceptorMatrix() {        memset(m_matrix, 0, sizeof m_matrix);        m_matrix['A']['A'] = 0x47; //       3 ; 1, 2, 3        m_matrix['A']['G'] = 0x47; //       3 ; 1, 2, 3        m_matrix['A']['T'] = 0x57; // 1,    3 ; 1, 2, 3        m_matrix['A']['C'] = 0x67; //    2, 3 ; 1, 2, 3        m_matrix['G']['A'] = 0x33; // 1, 2    ;        m_matrix['G']['G'] = 0x33; // 1, 2    ; 1, 2        m_matrix['G']['T'] = 0x70; // 1, 2, 3 ;        m_matrix['G']['C'] = 0x34; // 1, 2    ;       3        m_matrix['T']['A'] = 0x00; //         ;        m_matrix['T']['G'] = 0x03; //         ; 1, 2        m_matrix['T']['T'] = 0x50; // 1,    3 ;        m_matrix['T']['C'] = 0x24; //    2    ;       3        m_matrix['C']['A'] = 0x00; //         ;        m_matrix['C']['G'] = 0x03; //         ; 1, 2        m_matrix['C']['T'] = 0x50; // 1,    3 ;        m_matrix['C']['C'] = 0x24; //    2    ;       3    }    const Uint1* GetMatrix() const {        return m_matrix[0];    }private:    Uint1 m_matrix [256][256];};namespace {    CDonorAcceptorMatrix g_dnr_acc_matrix;}CSplicedAligner32::CSplicedAligner32():      m_Wd1(GetDefaultWd1()),      m_Wd2(GetDefaultWd2()){    for(unsigned char st = 0; st < splice_type_count_32; ++st) {        m_Wi[st] = GetDefaultWi(st);    }}CSplicedAligner32::CSplicedAligner32(const char* seq1, size_t len1,                                       const char* seq2, size_t len2)    : CSplicedAligner(seq1, len1, seq2, len2),      m_Wd1(GetDefaultWd1()),      m_Wd2(GetDefaultWd2()){    for(unsigned char st = 0; st < splice_type_count_32; ++st) {        m_Wi[st] = GetDefaultWi(st);    }}CNWAligner::TScore CSplicedAligner32::GetDefaultWi(unsigned char splice_type){    switch(splice_type) {        case 0: return -15; // GT/AG        case 1: return -18; // GC/AG        case 2: return -21; // AT/AC        default: {            NCBI_THROW(CAlgoAlignException,                       eInvalidSpliceTypeIndex,                       "Invalid splice type index");        }    }}// Evaluate dynamic programming matrix. Create transcript.CNWAligner::TScore CSplicedAligner32::x_Align (                         const char* seg1, size_t len1,                         const char* seg2, size_t len2,                         vector<ETranscriptSymbol>* transcript ){    const size_t N1 = len1 + 1;    const size_t N2 = len2 + 1;    vector<TScore> stl_rowV (N2), stl_rowF (N2);    TScore* rowV    = &stl_rowV[0];    TScore* rowF    = &stl_rowF[0];    // index calculation: [i,j] = i*n2 + j    vector<Uint4> stl_bm (N1*N2);    Uint4* backtrace_matrix = &stl_bm[0];    TScore* pV = rowV - 1;    const char* seq1   = seg1 - 1;    const char* seq2   = seg2 - 1;    const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;    bool bFreeGapLeft1  = m_esf_L1 && seg1 == m_Seq1;    bool bFreeGapRight1 = m_esf_R1 && m_Seq1 + m_SeqLen1 - len1 == seg1;    bool bFreeGapLeft2  = m_esf_L2 && seg2 == m_Seq2;    bool bFreeGapRight2 = m_esf_R2 && m_Seq2 + m_SeqLen2 - len2 == seg2;    TScore wgleft1   = bFreeGapLeft1? 0: m_Wg;    TScore wsleft1   = bFreeGapLeft1? 0: m_Ws;    TScore wg1 = wgleft1, ws1 = wsleft1;    // recurrences    TScore wgleft2   = bFreeGapLeft2? 0: m_Wg;    TScore wsleft2   = bFreeGapLeft2? 0: m_Ws;    TScore V  = 0;    TScore V0 = 0;    TScore E, G, n0;    Uint4 type;    // store candidate donors    size_t* jAllDonors [splice_type_count_32];    TScore* vAllDonors [splice_type_count_32];    vector<size_t> stl_jAllDonors (splice_type_count_32 * N2);    vector<TScore> stl_vAllDonors (splice_type_count_32 * N2);    for(unsigned char st = 0; st < splice_type_count_32; ++st) {        jAllDonors[st] = &stl_jAllDonors[st*N2];        vAllDonors[st] = &stl_vAllDonors[st*N2];    }    size_t  jTail[splice_type_count_32], jHead[splice_type_count_32];    TScore  vBestDonor   [splice_type_count_32];    size_t  jBestDonor   [splice_type_count_32];    // place to store gap opening starts    size_t ins_start;    vector<size_t> stl_del_start(N2);    size_t* del_start = &stl_del_start[0];    // donor/acceptor matrix    const Uint1 * dnr_acc_matrix = g_dnr_acc_matrix.GetMatrix();    // fake row (above lambda)    rowV[0] = kInfMinus;    size_t k;    for (k = 0; k < N2; k++) {        rowV[k] = rowF[k] = kInfMinus;	del_start[k] = k;    }    k = 0;    size_t i, j = 0, k0;    unsigned char ci;    for(i = 0;  i < N1;  ++i, j = 0) {        V = i > 0? (V0 += wsleft2) : 0;        E = kInfMinus;        ins_start = k0 = k;        backtrace_matrix[k++] = kTypeGap; // | del_start[0]        ci = i > 0? seq1[i]: 'N';        for(unsigned char st = 0; st < splice_type_count_32; ++st) {            jTail[st] = jHead[st] = 0;            vBestDonor[st] = kInfMinus;        }        if(i == N1 - 1 && bFreeGapRight1) {                wg1 = ws1 = 0;        }        TScore wg2 = m_Wg, ws2 = m_Ws;                    // detect donor candidate        if(N2 > 2) {            unsigned char d1 = seq2[1], d2 = seq2[2];            Uint1 dnr_type = 0xF0 & dnr_acc_matrix[(size_t(d1)<<8)|d2];            for(Uint1 st = 0; st < splice_type_count_32; ++st ) {                jAllDonors[st][jTail[st]] = j;                if(dnr_type & (0x10 << st)) {                    vAllDonors[st][jTail[st]] =                         ( d1 == g_nwspl32_donor[st][0] && d2 == g_nwspl32_donor[st][1] ) ?                        V: (V + m_Wd1);                }                else { // both chars distorted                    vAllDonors[st][jTail[st]] = V + m_Wd2;                }                ++(jTail[st]);            }        }        for (j = 1; j < N2; ++j, ++k) {                        G = pV[j] + sm[ci][(unsigned char)seq2[j]];            pV[j] = V;            n0 = V + wg1;            if(E >= n0) {                E += ws1;      // continue the gap            }            else {                E = n0 + ws1;  // open a new gap		ins_start = k-1;            }            if(j == N2 - 1 && bFreeGapRight2) {                wg2 = ws2 = 0;            }            n0 = rowV[j] + wg2;            if(rowF[j] >= n0) {                rowF[j] += ws2;            }            else {                rowF[j] = n0 + ws2;                del_start[j] = k-N2;            }            // evaluate the score (V)            if (E >= rowF[j]) {                if(E >= G) {                    V = E;                    type = kTypeGap | ins_start;                }                else {                    V = G;                    type = kTypeDiag;                }            } else {                if(rowF[j] >= G) {                    V = rowF[j];                    type = kTypeGap | del_start[j];                }                else {                    V = G;                    type = kTypeDiag;                }            }            // find out if there are new donors            for(unsigned char st = 0; st < splice_type_count_32; ++st) {                if(jTail[st] > jHead[st])  {                    if(j - jAllDonors[st][jHead[st]] >= m_IntronMinSize) {                        if(vAllDonors[st][jHead[st]] > vBestDonor[st]) {                            vBestDonor[st] = vAllDonors[st][jHead[st]];                            jBestDonor[st] = jAllDonors[st][jHead[st]];                        }                        ++(jHead[st]);                    }                }            }                            // check splice signal            Uint4 dnr_pos = kMax_UI4;            unsigned char c1 = seq2[j-1], c2 = seq2[j];            Uint1 acc_mask = 0x0F & dnr_acc_matrix[(size_t(c1)<<8)|c2];            for(Uint1 st = 0; st < splice_type_count_32; ++st ) {                if(acc_mask & (0x01 << st)) {                    TScore vAcc = vBestDonor[st] + m_Wi[st];                    if( c1 != g_nwspl32_acceptor[st][0] || c2 != g_nwspl32_acceptor[st][1] ) {                        vAcc += m_Wd1;                    }                    if(vAcc > V) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -