📄 splign.cpp
字号:
++k0; } int k1 = int(seg_dim - 1); while(k1 >= int(k0)) { SSegment& s = segments[k1]; if(s.m_exon) { if(s.m_idty < m_minidty || m_endgaps) { s.ImproveFromRight(Seq1, Seq2); } if(s.m_idty >= m_minidty) { break; } } --k1; } const size_t SeqLen2 = m_genomic.size(); const size_t SeqLen1 = m_polya_start == kMax_UInt? m_mrna.size(): m_polya_start; // fill the right-hand gap, if any if( segments[seg_dim - 1].m_exon && segments[seg_dim - 1].m_box[1] < SeqLen1 - 1) { SSegment g; g.m_exon = false; g.m_box[0] = segments[seg_dim - 1].m_box[1] + 1; g.m_box[1] = SeqLen1 - 1; g.m_box[2] = segments[seg_dim - 1].m_box[3] + 1; g.m_box[3] = SeqLen2 - 1; g.m_idty = 0; g.m_len = g.m_box[1] - g.m_box[0] + 1; g.m_annot = "<GAP>"; g.m_details = string(Seq1 + g.m_box[0], g.m_box[1] + 1 - g.m_box[0]); segments.push_back(g); ++seg_dim; } // turn to gaps exons with low identity for(size_t k = 0; k < seg_dim; ++k) { SSegment& s = segments[k]; if(s.m_exon && s.m_idty < m_minidty) { s.m_exon = false; s.m_idty = 0; s.m_len = s.m_box[1] - s.m_box[0] + 1; s.m_annot = "<GAP>"; s.m_details.resize(0); s.m_details.append(Seq1 + s.m_box[0], s.m_box[1] + 1 - s.m_box[0]); } } // turn to gaps extra-short exons preceeded/followed by gaps bool gap_prev = false; for(size_t k = 0; k < seg_dim; ++k) { SSegment& s = segments[k]; if(s.m_exon == false) { gap_prev = true; } else { size_t length = s.m_box[1] - s.m_box[0] + 1; bool gap_next = false; if(k + 1 < seg_dim) { gap_next = !segments[k+1].m_exon; } if(length <= 5 && (gap_prev || gap_next)) { s.m_exon = false; s.m_idty = 0; s.m_len = s.m_box[1] - s.m_box[0] + 1; s.m_annot = "<GAP>"; s.m_details.resize(0); s.m_details.append(Seq1 + s.m_box[0], s.m_box[1] + 1 - s.m_box[0]); } gap_prev = false; } } // now merge all adjacent gaps int gap_start_idx = -1; if(segments.size() && segments[0].m_exon == false) { gap_start_idx = 0; } size_t segs_dim = segments.size(); for(size_t k = 0; k < segs_dim; ++k) { SSegment& s = segments[k]; if(!s.m_exon) { if(gap_start_idx == -1) { gap_start_idx = int(k); if(k > 0) { s.m_box[0] = segments[k-1].m_box[1] + 1; s.m_box[2] = segments[k-1].m_box[3] + 1; } } } else { if(gap_start_idx >= 0) { SSegment& g = segments[gap_start_idx]; g.m_box[1] = s.m_box[0] - 1; g.m_box[3] = s.m_box[2] - 1; g.m_len = g.m_box[1] - g.m_box[0] + 1; g.m_details.resize(0); g.m_details.append(Seq1 + g.m_box[0], g.m_box[1] + 1 - g.m_box[0]); m_segments.push_back(g); gap_start_idx = -1; } m_segments.push_back(s); } } if(gap_start_idx >= 0) { SSegment& g = segments[gap_start_idx]; g.m_box[1] = segments[segs_dim-1].m_box[1]; g.m_box[3] = segments[segs_dim-1].m_box[3]; g.m_len = g.m_box[1] - g.m_box[0] + 1; g.m_details.resize(0); g.m_details.append(Seq1 + g.m_box[0], g.m_box[1] + 1 - g.m_box[0]); m_segments.push_back(g); }}// try improving the segment by cutting it from the leftvoid CSplign::SSegment::ImproveFromLeft(const char* seq1, const char* seq2){ const size_t min_query_size = 4; int i0 = int(m_box[1] - m_box[0] + 1), i0_max = i0; if(i0 < int(min_query_size)) { return; } // find the top score suffix int i1 = int(m_box[3] - m_box[2] + 1), i1_max = i1; CNWAligner::TScore score_max = 0, s = 0; const CNWAligner::TScore wm = 1; const CNWAligner::TScore wms = -1; const CNWAligner::TScore wg = 0; const CNWAligner::TScore ws = -1; string::reverse_iterator irs0 = m_details.rbegin(), irs1 = m_details.rend(), irs = irs0, irs_max = irs0; for( ; irs != irs1; ++irs) { switch(*irs) { case 'M': { s += wm; --i0; --i1; } break; case 'R': { s += wms; --i0; --i1; } break; case 'I': { s += ws; if(irs > irs0 && *(irs-1)!='I') s += wg; --i1; } break; case 'D': { s += ws; if(irs > irs0 && *(irs-1)!='D') s += wg; --i0; } } if(s >= score_max) { score_max = s; i0_max = i0; i1_max = i1; irs_max = irs; } } // work around a weird case of equally optimal // but detrimental for our purposes alignment // -check the actual sequence chars size_t head = 0; while(i0_max > 0 && i1_max > 0) { if(seq1[m_box[0]+i0_max-1] == seq2[m_box[2]+i1_max-1]) { --i0_max; --i1_max; ++head; } else { break; } } // if the resulting segment is still long enough if(m_box[1] - m_box[0] + 1 - i0_max >= min_query_size && i0_max > 0) { // resize m_box[0] += i0_max; m_box[2] += i1_max; m_details.erase(0, m_details.size() - (irs_max - irs0 + 1)); m_details.insert(m_details.begin(), head, 'M'); RestoreIdentity(); // update the first two annotation symbols if(m_annot.size() > 2 && m_annot[2] == '<') { int j1 = m_box[2] - 2; char c1 = j1 >= 0? seq2[j1]: ' '; m_annot[0] = c1; int j2 = m_box[2] - 2; char c2 = j2 >= 0? seq2[j2]: ' '; m_annot[1] = c2; } }}// try improving the segment by cutting it from the rightvoid CSplign::SSegment::ImproveFromRight(const char* seq1, const char* seq2){ const size_t min_query_size = 4; if(m_box[1] - m_box[0] + 1 < min_query_size) { return; } // find the top score prefix int i0 = -1, i0_max = i0; int i1 = -1, i1_max = i1; CNWAligner::TScore score_max = 0, s = 0; const CNWAligner::TScore wm = 1; const CNWAligner::TScore wms = -1; const CNWAligner::TScore wg = 0; const CNWAligner::TScore ws = -1; string::iterator irs0 = m_details.begin(), irs1 = m_details.end(), irs = irs0, irs_max = irs0; for( ; irs != irs1; ++irs) { switch(*irs) { case 'M': { s += wm; ++i0; ++i1; } break; case 'R': { s += wms; ++i0; ++i1; } break; case 'I': { s += ws; if(irs > irs0 && *(irs-1) != 'I') s += wg; ++i1; } break; case 'D': { s += ws; if(irs > irs0 && *(irs-1) != 'D') s += wg; ++i0; } } if(s >= score_max) { score_max = s; i0_max = i0; i1_max = i1; irs_max = irs; } } int dimq = int(m_box[1] - m_box[0] + 1); int dims = int(m_box[3] - m_box[2] + 1); // work around a weird case of equally optimal // but detrimental for our purposes alignment // -check the actual sequences size_t tail = 0; while(i0_max < dimq - 1 && i1_max < dims - 1) { if(seq1[m_box[0]+i0_max+1] == seq2[m_box[2]+i1_max+1]) { ++i0_max; ++i1_max; ++tail; } else { break; } } dimq += tail; dims += tail; // if the resulting segment is still long enough if(i0_max >= int(min_query_size) && i0_max < dimq - 1) { m_box[1] = m_box[0] + i0_max; m_box[3] = m_box[2] + i1_max; m_details.resize(irs_max - irs0 + 1); m_details.insert(m_details.end(), tail, 'M'); RestoreIdentity(); // update the last two annotation chars const size_t adim = m_annot.size(); if(adim > 2 && m_annot[adim - 3] == '>') { m_annot[adim-2] = seq2[m_box[3] + 1]; m_annot[adim-1] = seq2[m_box[3] + 2]; } }}void CSplign::SSegment::RestoreIdentity(){ // restore length and identity m_len = m_details.size(); string::const_iterator ib = m_details.begin(), ie = m_details.end(); size_t count = 0; // not using std::count here due to known incompatibilty for(string::const_iterator ii = ib; ii != ie; ++ii) { if(*ii == 'M') ++count; } m_idty = double(count) / m_len;}const char* CSplign::SSegment::GetDonor() const { const size_t adim = m_annot.size(); return (adim > 2 && m_annot[adim - 3] == '>')? (m_annot.c_str() + adim - 2): 0;}const char* CSplign::SSegment::GetAcceptor() const { const size_t adim = m_annot.size(); return (adim > 3 && m_annot[2] == '<')? m_annot.c_str(): 0;}END_NCBI_SCOPE/* * =========================================================================== * $Log: splign.cpp,v $ * Revision 1000.0 2004/06/01 18:12:28 gouriano * PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.12 * * Revision 1.12 2004/05/24 16:13:57 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.11 2004/05/19 13:37:48 kapustin * Remove test dumping code * * Revision 1.10 2004/05/18 21:43:40 kapustin * Code cleanup * * Revision 1.9 2004/05/04 15:23:45 ucko * Split splign code out of xalgoalign into new xalgosplign. * * Revision 1.8 2004/05/03 15:22:18 johnson * added typedefs for public stl types * * Revision 1.7 2004/04/30 15:00:47 kapustin * Support ASN formatting * * Revision 1.6 2004/04/26 15:38:45 kapustin * Add model_id as a CSplign member * * Revision 1.5 2004/04/23 18:43:47 ucko * <cmath> -> <math.h>, since some older compilers (MIPSpro) lack the wrappers. * * Revision 1.4 2004/04/23 16:52:04 kapustin * Change the way we get donor address * * Revision 1.3 2004/04/23 14:37:44 kapustin * *** empty log message *** * * =========================================================================== */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -