📄 nw_spliced_aligner32.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: nw_spliced_aligner32.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 18:05:06 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.15 * PRODUCTION * =========================================================================== *//* $Id: nw_spliced_aligner32.cpp,v 1000.3 2004/06/01 18:05:06 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information* * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular* purpose. * * Please cite the author in any work or product based on this material. ** ===========================================================================** Authors: Yuri Kapustin** File Description: CSplicedAligner32* * ===========================================================================**/#include <ncbi_pch.hpp>#include <algo/align/nw_spliced_aligner32.hpp>#include <algo/align/align_exception.hpp>BEGIN_NCBI_SCOPEconst unsigned char g_nwspl32_donor[splice_type_count_32][2] = { {'G','T'}, // donor type 1 in CDonorAcceptorMatrix coding {'G','C'}, // 2 {'A','T'} // 3};const unsigned char g_nwspl32_acceptor[splice_type_count_32][2] = { {'A','G'}, // 1 {'A','G'}, // 2 {'A','C'} // 3};// Transcript coding// (lower bits contain jump value for gaps and introns)const Uint4 kTypeDiag = 0x00000000; // single match or mismatchconst Uint4 kTypeGap = 0x40000000; // gap - any typeconst Uint4 kTypeIntron = 0x80000000; // intron// Donor-acceptor static object used// to figure out what donors and aceptors// could be transformed to any given na pair// by mutating either of its characters (but not both).// donor types: bits 5-7// acceptor types: bits 1-3class CDonorAcceptorMatrix {public: CDonorAcceptorMatrix() { memset(m_matrix, 0, sizeof m_matrix); m_matrix['A']['A'] = 0x47; // 3 ; 1, 2, 3 m_matrix['A']['G'] = 0x47; // 3 ; 1, 2, 3 m_matrix['A']['T'] = 0x57; // 1, 3 ; 1, 2, 3 m_matrix['A']['C'] = 0x67; // 2, 3 ; 1, 2, 3 m_matrix['G']['A'] = 0x33; // 1, 2 ; m_matrix['G']['G'] = 0x33; // 1, 2 ; 1, 2 m_matrix['G']['T'] = 0x70; // 1, 2, 3 ; m_matrix['G']['C'] = 0x34; // 1, 2 ; 3 m_matrix['T']['A'] = 0x00; // ; m_matrix['T']['G'] = 0x03; // ; 1, 2 m_matrix['T']['T'] = 0x50; // 1, 3 ; m_matrix['T']['C'] = 0x24; // 2 ; 3 m_matrix['C']['A'] = 0x00; // ; m_matrix['C']['G'] = 0x03; // ; 1, 2 m_matrix['C']['T'] = 0x50; // 1, 3 ; m_matrix['C']['C'] = 0x24; // 2 ; 3 } const Uint1* GetMatrix() const { return m_matrix[0]; }private: Uint1 m_matrix [256][256];};namespace { CDonorAcceptorMatrix g_dnr_acc_matrix;}CSplicedAligner32::CSplicedAligner32(): m_Wd1(GetDefaultWd1()), m_Wd2(GetDefaultWd2()){ for(unsigned char st = 0; st < splice_type_count_32; ++st) { m_Wi[st] = GetDefaultWi(st); }}CSplicedAligner32::CSplicedAligner32(const char* seq1, size_t len1, const char* seq2, size_t len2) : CSplicedAligner(seq1, len1, seq2, len2), m_Wd1(GetDefaultWd1()), m_Wd2(GetDefaultWd2()){ for(unsigned char st = 0; st < splice_type_count_32; ++st) { m_Wi[st] = GetDefaultWi(st); }}CNWAligner::TScore CSplicedAligner32::GetDefaultWi(unsigned char splice_type){ switch(splice_type) { case 0: return -15; // GT/AG case 1: return -18; // GC/AG case 2: return -21; // AT/AC default: { NCBI_THROW(CAlgoAlignException, eInvalidSpliceTypeIndex, "Invalid splice type index"); } }}// Evaluate dynamic programming matrix. Create transcript.CNWAligner::TScore CSplicedAligner32::x_Align ( const char* seg1, size_t len1, const char* seg2, size_t len2, vector<ETranscriptSymbol>* transcript ){ const size_t N1 = len1 + 1; const size_t N2 = len2 + 1; vector<TScore> stl_rowV (N2), stl_rowF (N2); TScore* rowV = &stl_rowV[0]; TScore* rowF = &stl_rowF[0]; // index calculation: [i,j] = i*n2 + j vector<Uint4> stl_bm (N1*N2); Uint4* backtrace_matrix = &stl_bm[0]; TScore* pV = rowV - 1; const char* seq1 = seg1 - 1; const char* seq2 = seg2 - 1; const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s; bool bFreeGapLeft1 = m_esf_L1 && seg1 == m_Seq1; bool bFreeGapRight1 = m_esf_R1 && m_Seq1 + m_SeqLen1 - len1 == seg1; bool bFreeGapLeft2 = m_esf_L2 && seg2 == m_Seq2; bool bFreeGapRight2 = m_esf_R2 && m_Seq2 + m_SeqLen2 - len2 == seg2; TScore wgleft1 = bFreeGapLeft1? 0: m_Wg; TScore wsleft1 = bFreeGapLeft1? 0: m_Ws; TScore wg1 = wgleft1, ws1 = wsleft1; // recurrences TScore wgleft2 = bFreeGapLeft2? 0: m_Wg; TScore wsleft2 = bFreeGapLeft2? 0: m_Ws; TScore V = 0; TScore V0 = 0; TScore E, G, n0; Uint4 type; // store candidate donors size_t* jAllDonors [splice_type_count_32]; TScore* vAllDonors [splice_type_count_32]; vector<size_t> stl_jAllDonors (splice_type_count_32 * N2); vector<TScore> stl_vAllDonors (splice_type_count_32 * N2); for(unsigned char st = 0; st < splice_type_count_32; ++st) { jAllDonors[st] = &stl_jAllDonors[st*N2]; vAllDonors[st] = &stl_vAllDonors[st*N2]; } size_t jTail[splice_type_count_32], jHead[splice_type_count_32]; TScore vBestDonor [splice_type_count_32]; size_t jBestDonor [splice_type_count_32]; // place to store gap opening starts size_t ins_start; vector<size_t> stl_del_start(N2); size_t* del_start = &stl_del_start[0]; // donor/acceptor matrix const Uint1 * dnr_acc_matrix = g_dnr_acc_matrix.GetMatrix(); // fake row (above lambda) rowV[0] = kInfMinus; size_t k; for (k = 0; k < N2; k++) { rowV[k] = rowF[k] = kInfMinus; del_start[k] = k; } k = 0; size_t i, j = 0, k0; unsigned char ci; for(i = 0; i < N1; ++i, j = 0) { V = i > 0? (V0 += wsleft2) : 0; E = kInfMinus; ins_start = k0 = k; backtrace_matrix[k++] = kTypeGap; // | del_start[0] ci = i > 0? seq1[i]: 'N'; for(unsigned char st = 0; st < splice_type_count_32; ++st) { jTail[st] = jHead[st] = 0; vBestDonor[st] = kInfMinus; } if(i == N1 - 1 && bFreeGapRight1) { wg1 = ws1 = 0; } TScore wg2 = m_Wg, ws2 = m_Ws; // detect donor candidate if(N2 > 2) { unsigned char d1 = seq2[1], d2 = seq2[2]; Uint1 dnr_type = 0xF0 & dnr_acc_matrix[(size_t(d1)<<8)|d2]; for(Uint1 st = 0; st < splice_type_count_32; ++st ) { jAllDonors[st][jTail[st]] = j; if(dnr_type & (0x10 << st)) { vAllDonors[st][jTail[st]] = ( d1 == g_nwspl32_donor[st][0] && d2 == g_nwspl32_donor[st][1] ) ? V: (V + m_Wd1); } else { // both chars distorted vAllDonors[st][jTail[st]] = V + m_Wd2; } ++(jTail[st]); } } for (j = 1; j < N2; ++j, ++k) { G = pV[j] + sm[ci][(unsigned char)seq2[j]]; pV[j] = V; n0 = V + wg1; if(E >= n0) { E += ws1; // continue the gap } else { E = n0 + ws1; // open a new gap ins_start = k-1; } if(j == N2 - 1 && bFreeGapRight2) { wg2 = ws2 = 0; } n0 = rowV[j] + wg2; if(rowF[j] >= n0) { rowF[j] += ws2; } else { rowF[j] = n0 + ws2; del_start[j] = k-N2; } // evaluate the score (V) if (E >= rowF[j]) { if(E >= G) { V = E; type = kTypeGap | ins_start; } else { V = G; type = kTypeDiag; } } else { if(rowF[j] >= G) { V = rowF[j]; type = kTypeGap | del_start[j]; } else { V = G; type = kTypeDiag; } } // find out if there are new donors for(unsigned char st = 0; st < splice_type_count_32; ++st) { if(jTail[st] > jHead[st]) { if(j - jAllDonors[st][jHead[st]] >= m_IntronMinSize) { if(vAllDonors[st][jHead[st]] > vBestDonor[st]) { vBestDonor[st] = vAllDonors[st][jHead[st]]; jBestDonor[st] = jAllDonors[st][jHead[st]]; } ++(jHead[st]); } } } // check splice signal Uint4 dnr_pos = kMax_UI4; unsigned char c1 = seq2[j-1], c2 = seq2[j]; Uint1 acc_mask = 0x0F & dnr_acc_matrix[(size_t(c1)<<8)|c2]; for(Uint1 st = 0; st < splice_type_count_32; ++st ) { if(acc_mask & (0x01 << st)) { TScore vAcc = vBestDonor[st] + m_Wi[st]; if( c1 != g_nwspl32_acceptor[st][0] || c2 != g_nwspl32_acceptor[st][1] ) { vAcc += m_Wd1; } if(vAcc > V) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -