📄 nw_spliced_aligner16.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: nw_spliced_aligner16.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 18:05:00 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.13 * PRODUCTION * =========================================================================== *//* $Id: nw_spliced_aligner16.cpp,v 1000.3 2004/06/01 18:05:00 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information* * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular* purpose. * * Please cite the author in any work or product based on this material. ** ===========================================================================** Authors: Yuri Kapustin** File Description: CSplicedAligner16* * ===========================================================================**/#include <ncbi_pch.hpp>#include <algo/align/nw_spliced_aligner16.hpp>#include <algo/align/align_exception.hpp>BEGIN_NCBI_SCOPEconst unsigned char g_nwspl_donor[splice_type_count_16][2] = { {'G','T'}, {'G','C'}, {'A','T'}, {'?','?'}};const unsigned char g_nwspl_acceptor[splice_type_count_16][2] = { {'A','G'}, {'A','G'}, {'A','C'}, {'?','?'}};const unsigned char g_topidx = splice_type_count_16 - 1;CSplicedAligner16::CSplicedAligner16(){ for(unsigned char st = 0; st < splice_type_count_16; ++st) { m_Wi[st] = GetDefaultWi(st); }}CSplicedAligner16::CSplicedAligner16(const char* seq1, size_t len1, const char* seq2, size_t len2) : CSplicedAligner(seq1, len1, seq2, len2){ for(unsigned char st = 0; st < splice_type_count_16; ++st) { m_Wi[st] = GetDefaultWi(st); }}CNWAligner::TScore CSplicedAligner16::GetDefaultWi(unsigned char splice_type){ switch(splice_type) { case 0: return -8; // GT/AG case 1: return -12; // GC/AG case 2: return -14; // AT/AC case 3: return -18; // ??/?? default: { NCBI_THROW(CAlgoAlignException, eInvalidSpliceTypeIndex, "Invalid splice type index"); } }}// Bit coding (eleven bits per value) for backtrace.// --------------------------------------------------// [11-8] donors (bitwise OR for multiple types)// 1000 ?? (??/??) - arbitrary pair// 0100 AT (AT/AC)// 0010 GC (GC/AG)// 0001 GT (GT/AG)// [7-5] acceptor type// 100 ?? (??/??)// 011 AC (AT/AC)// 010 AG (GC/AG)// 001 AG (GT/AG)// 000 no acceptor// [4] Fc: 1 if gap in 2nd sequence was extended; 0 if it is was opened// [3] Ec: 1 if gap in 1st sequence was extended; 0 if it is was opened// [2] E: 1 if space in 1st sequence; 0 if space in 2nd sequence// [1] D: 1 if diagonal; 0 - otherwiseconst unsigned char kMaskFc = 0x0001;const unsigned char kMaskEc = 0x0002;const unsigned char kMaskE = 0x0004;const unsigned char kMaskD = 0x0008;// Evaluate dynamic programming matrix. Create transcript.CNWAligner::TScore CSplicedAligner16::x_Align ( const char* seg1, size_t len1, const char* seg2, size_t len2, vector<ETranscriptSymbol>* transcript ){ TScore V = 0; const size_t N1 = len1 + 1; const size_t N2 = len2 + 1; vector<TScore> stl_rowV (N2), stl_rowF (N2); TScore* rowV = &stl_rowV[0]; TScore* rowF = &stl_rowF[0]; // index calculation: [i,j] = i*n2 + j vector<Uint2> stl_bm (N1*N2); Uint2* backtrace_matrix = &stl_bm[0]; TScore* pV = rowV - 1; const char* seq1 = seg1 - 1; const char* seq2 = seg2 - 1; const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s; bool bFreeGapLeft1 = m_esf_L1 && seg1 == m_Seq1; bool bFreeGapRight1 = m_esf_R1 && m_Seq1 + m_SeqLen1 - len1 == seg1; bool bFreeGapLeft2 = m_esf_L2 && seg2 == m_Seq2; bool bFreeGapRight2 = m_esf_R2 && m_Seq2 + m_SeqLen2 - len2 == seg2; TScore wgleft1 = bFreeGapLeft1? 0: m_Wg; TScore wsleft1 = bFreeGapLeft1? 0: m_Ws; TScore wg1 = wgleft1, ws1 = wsleft1; // recurrences TScore wgleft2 = bFreeGapLeft2? 0: m_Wg; TScore wsleft2 = bFreeGapLeft2? 0: m_Ws; TScore V_max, vAcc; TScore V0 = 0; TScore E, G, n0; Uint2 tracer; // store candidate donors size_t* jAllDonors [splice_type_count_16]; TScore* vAllDonors [splice_type_count_16]; vector<size_t> stl_jAllDonors (splice_type_count_16 * N2); vector<TScore> stl_vAllDonors (splice_type_count_16 * N2); for(unsigned char st = 0; st < splice_type_count_16; ++st) { jAllDonors[st] = &stl_jAllDonors[st*N2]; vAllDonors[st] = &stl_vAllDonors[st*N2]; } size_t jTail[splice_type_count_16], jHead[splice_type_count_16]; TScore vBestDonor [splice_type_count_16]; size_t jBestDonor [splice_type_count_16]; // fake row rowV[0] = kInfMinus; size_t k; for (k = 0; k < N2; k++) { rowV[k] = rowF[k] = kInfMinus; } k = 0; size_t i, j = 0, k0; unsigned char ci; for(i = 0; i < N1; ++i, j = 0) { V = i > 0? (V0 += wsleft2) : 0; E = kInfMinus; k0 = k; backtrace_matrix[k++] = kMaskFc; ci = i > 0? seq1[i]: 'N'; for(unsigned char st = 0; st < splice_type_count_16; ++st) { jTail[st] = jHead[st] = 0; vBestDonor[st] = kInfMinus; } if(i == N1 - 1 && bFreeGapRight1) { wg1 = ws1 = 0; } TScore wg2 = m_Wg, ws2 = m_Ws; // detect donor candidate if(N2 > 2) { for(unsigned char st = 0; st < g_topidx; ++st) { if(seq2[1] == g_nwspl_donor[st][0] && seq2[2] == g_nwspl_donor[st][1]) { jAllDonors[st][jTail[st]] = j; vAllDonors[st][jTail[st]] = V; ++(jTail[st]); } } } // the first max value jAllDonors[g_topidx][jTail[g_topidx]] = j; vAllDonors[g_topidx][jTail[g_topidx]] = V_max = V; ++(jTail[3]); for (j = 1; j < N2; ++j, ++k) { G = pV[j] + sm[ci][(unsigned char)seq2[j]]; pV[j] = V; n0 = V + wg1; if(E >= n0) { E += ws1; // continue the gap tracer = kMaskEc; } else { E = n0 + ws1; // open a new gap tracer = 0; } if(j == N2 - 1 && bFreeGapRight2) { wg2 = ws2 = 0; } n0 = rowV[j] + wg2; if(rowF[j] >= n0) { rowF[j] += ws2; tracer |= kMaskFc; } else { rowF[j] = n0 + ws2; } // evaluate the score (V) if (E >= rowF[j]) { if(E >= G) { V = E; tracer |= kMaskE; } else { V = G; tracer |= kMaskD; } } else { if(rowF[j] >= G) { V = rowF[j]; } else { V = G; tracer |= kMaskD; } } // find out if there are new donors for(unsigned char st = 0; st < splice_type_count_16; ++st) { if(jTail[st] > jHead[st]) { if(j - jAllDonors[st][jHead[st]] >= m_IntronMinSize) { if(vAllDonors[st][jHead[st]] > vBestDonor[st]) { vBestDonor[st] = vAllDonors[st][jHead[st]]; jBestDonor[st] = jAllDonors[st][jHead[st]]; } ++(jHead[st]); } } } // check splice signal size_t dnr_pos = 0; Uint2 tracer_dnr = 0xFFFF; Uint2 tracer_acc = 0; for(unsigned char st = 0; st < splice_type_count_16; ++st) { if(seq2[j-1] == g_nwspl_acceptor[st][0] && seq2[j] == g_nwspl_acceptor[st][1] && vBestDonor[st] > kInfMinus || st == g_topidx) { vAcc = vBestDonor[st] + m_Wi[st]; if(vAcc > V) { V = vAcc; tracer_acc = (st+1) << 4; dnr_pos = k0 + jBestDonor[st]; tracer_dnr = 0x0080 << st; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -