📄 splign_hitparser.cpp
字号:
jj->Move(3, hit.m_ai[2] - 1, m_Prot2Nucl); for(size_t d = 0; d < 2; ++d) { b[d] = hit.m_ai[0] <= jj->m_ai[d] && jj->m_ai[d] <= hit.m_ai[1]; } } break; } } // check if *jj still exists if(!jj->IsConsistent()) { m_Out.erase(jj); bContinue = true; break; } } if(bContinue) break; } else bFirstLoop = false; size_t i = 0; for(i = 0; i < 2; ++i) { b[i] = hit.m_ai[0] <= jj->m_ai[i] && jj->m_ai[i] <= hit.m_ai[1]; } for(i = 2; i < 4; ++i) { b[i] = hit.m_ai[2] <= jj->m_ai[i] && jj->m_ai[i] <= hit.m_ai[3]; } if(b[0] && b[1] || b[2] && b[3]) { m_Out.erase(jj); bContinue = true; break; } } while(b[0] || b[1] || b[2] || b[3]); if(bContinue) continue; // the hit has dissapeared // (2) double ad[] = { hit.m_ai[0] - jj->m_ai[0], jj->m_ai[1] - hit.m_ai[1], hit.m_ai[2] - jj->m_ai[2], jj->m_ai[3] - hit.m_ai[3] }; bool ab[] = {ad[0] > 0, ad[1] > 0, ad[2] > 0, ad[3] > 0 }; bool bNewHit = false; CHit hit2; if(ab[0] && ab[1] || ab[2] && ab[3]) { // split *jj int n1, n2, n1x, n2x; if(jj->IsStraight()) { if(!(ab[2] && ab[3])) { n1 = 1; n2 = 0; n1x = hit.m_ai[0] - 1; n2x = hit.m_ai[1] + 1; } else if(!(ab[0] && ab[1])) { n1 = 3; n2 = 2; n1x = hit.m_ai[2] - 1; n2x = hit.m_ai[3] + 1; } else { n1 = (ad[0] < ad[2])? 1: 3; n2 = (ad[1] < ad[3])? 0: 2; n1x = (ad[0] < ad[2])? hit.m_ai[0] - 1: hit.m_ai[2] - 1; n2x = (ad[1] < ad[3])? hit.m_ai[1] + 1: hit.m_ai[3] + 1; } } else { if(!(ab[2] && ab[3])) { n1 = 1; n2 = 0; n1x = hit.m_ai[0] - 1; n2x = hit.m_ai[1] + 1; } else if(!(ab[0] && ab[1])) { n1 = 2; n2 = 3; n1x = hit.m_ai[3] + 1; n2x = hit.m_ai[2] - 1; } else { n1 = (ad[0] < ad[3])? 1: 2; n2 = (ad[1] < ad[2])? 0: 3; n1x = (ad[0] < ad[3])? hit.m_ai[0] - 1: hit.m_ai[3] + 1; n2x = (ad[1] < ad[2])? hit.m_ai[1] + 1: hit.m_ai[2] - 1; } } hit2 = *jj; bNewHit = true; jj->Move(n1, n1x, m_Prot2Nucl); if(!jj->IsConsistent()) { m_Out.erase(jj); } else { // some trapezoidal hits may still overlap bRestart = true; } hit2.Move(n2, n2x, m_Prot2Nucl); if(hit2.IsConsistent()) { // some trapezoidal hits may still overlap hit2.UpdateAttributes(); bRestart = true; } else { bNewHit = false; } } jj->UpdateAttributes(); if(bNewHit) { m_Out.push_back(hit2); } if(bRestart) break; } if(m_SameOrder) { x_FilterByOrder(i0, false); } // step 4: re-assess groups and continue the cycle if(bRestart) break; } } if(m_MinQueryCoverage > 0.) { // calculate query coverage double dQCov = CalcCoverage(m_Out.begin(), m_Out.end(), 'q'); if(dQCov < m_MinQueryCoverage* nQuerySize) m_Out.clear(); } if(m_MinSubjCoverage > 0.) { // calculate subj coverage double dSCov = CalcCoverage(m_Out.begin(), m_Out.end(), 's'); if(dSCov < m_MinSubjCoverage* nSubjSize) m_Out.clear(); } return 0;}void CHitParser::x_FilterByOrder(size_t offset, bool use_chainfilter){ size_t dim = m_Out.size(); if(offset >= dim) return; vector<CHit>::iterator ii_beg = m_Out.begin() + offset; if(use_chainfilter) { // groupwise chain filtering /* sort(ii_beg, m_Out.end(), hit_groupid_less()); int nGroupID = m_Out[0].m_GroupID; size_t start = 0, fin = 0, target = 0; while(fin < dim) { for(; fin < dim && nGroupID == m_Out[fin].m_GroupID; ++fin); start = fin; nGroupID = m_Out[fin].m_GroupID; vector<CHit> vh (ii_beg + start, ii_beg + fin); CChainFilter cf (vh); cf.Run(); copy(vh.begin(), vh.end(), ii_beg + target); target += vh.size(); } m_Out.resize(target); */ } else { // greedy filtering vector<CHit>::iterator ii2 = ii_beg, iend2; //for(iend2 = m_Out.end(); ii2 < iend2; ++ii2) { // iend2 = remove_if(ii2 + 1, iend2, // bind1st(hit_not_same_order(), *ii2)); //} iend2 = remove_if(ii2 + 1, m_Out.end(), bind1st(hit_not_same_order(), *ii2)); m_Out.erase(iend2, m_Out.end()); }}// Calculates proximity btw any two hits.// Return value can range from 0 (similar) to 1 (different)double CHitParser::x_CalculateProximity(const CHit& h1, const CHit& h2) const{ // if(m_SameOrder) // { // hit_not_same_order hnso; // if(hnso(h1,h2)) return 1.; // } double adm [] = { m_ange[1] - m_ange[0] + 1, m_ange[3] - m_ange[2] + 1}; double dm = max(adm[0], adm[1]); double ad [2]; for(int i = 0; i < 4; i += 2) { int i0 = i, i1 = i + 1; if(h1.m_ai[i1] <= h2.m_ai[i0]) // no intersection ad[i/2] = double(h2.m_ai[i0] - h1.m_ai[i1] - 1) / dm;// adm[i/2]; else if(h2.m_ai[i1] <= h1.m_ai[i0]) // no intersection ad[i/2] = double(h1.m_ai[i0] - h2.m_ai[i1] - 1) / dm;//adm[i/2]; else // inclusion if(h1.m_ai[i0] <= h2.m_ai[i0] && h2.m_ai[i1] <= h1.m_ai[i1] || h2.m_ai[i0] <= h1.m_ai[i0] && h1.m_ai[i1] <= h2.m_ai[i1] ) ad[i/2] = 1.0; else // intersection if(h2.m_ai[i0] <= h1.m_ai[i0] && h1.m_ai[i0] <= h2.m_ai[i1]) ad[i/2] = double(h2.m_ai[i1] - h1.m_ai[i0] + 1) / (h1.m_ai[i1] - h2.m_ai[i0] + 1); else ad[i/2] = double(h1.m_ai[i1] - h2.m_ai[i0] + 1) / (h2.m_ai[i1] - h1.m_ai[i0] + 1); } return max(ad[0],ad[1]);}int CHitParser::x_RunMSGS(bool bSelectGroupsOnly, int& nGroupId){ if( m_group_identification_method != eNone || bSelectGroupsOnly ) { if(m_group_identification_method == eQueryCoverage) { x_GroupsIdentifyByCoverage(0, m_Out.size(), nGroupId, 0, 'q'); } else { x_GroupsIdentifyByCoverage(0, m_Out.size(), nGroupId, 0, 's'); } x_SyncGroupsByMaxDist(nGroupId); } if(m_OutputAllGroups && !bSelectGroupsOnly && m_Out.size()) { // make copies to m_Out here // new subj string must reflect both number and strand // like [s4], [i8] // first we count the number of groups size_t nGroupCount = 1; {{ sort(m_Out.begin(), m_Out.end(), hit_groupid_less()); vector<CHit>::iterator ibegin = m_Out.begin(), iend = m_Out.end(), ii = ibegin; int ngid = ii->m_GroupID; for(++ii; ii != iend; ++ii) { if(ii->m_GroupID != ngid) { ngid = ii->m_GroupID; ++nGroupCount; } } }} {{ // determine strands of the groups // 1 = straight, 0 = inverse, -1 = both / undefined vector<char> vStrands (nGroupCount, -1); vector<CHit>::iterator ibegin = m_Out.begin(), ii = ibegin, iend = m_Out.end(); for(size_t ig = 0; ig < nGroupCount; ++ig) { vStrands[ig] = ii->IsStraight()? 1: 0; int ngid = ii->m_GroupID; for(; ii != iend; ii++) { if(ii->m_GroupID != ngid) break; char cs = ii->IsStraight()? 1: 0; if(cs != vStrands[ig]) vStrands[ig] = -1; } } // now separate the groups ii = ibegin; string strSubject0 = ii->m_Subj; int ngc_s = 0, ngc_i = 0; for(size_t ig = 0; ig < nGroupCount; ig++) { int ngid = ii->m_GroupID; CNcbiOstrstream oss; // new subject char c0 = (vStrands[ig] == -1)? 'x': (vStrands[ig] == 0)? 'm': 'p'; int ngc = (c0 == 'i')? ++ngc_i: ++ngc_s; oss << strSubject0 << "_[" << c0 << ngc << ']'; const string new_subj = CNcbiOstrstreamToString(oss); for(; ii != iend; ++ii) { if(ii->m_GroupID != ngid) break; ii->m_Subj = new_subj; } } }} return 0; // we don't resolve ambiguities in this mode } if(bSelectGroupsOnly) return 0; vector<CHit> vh (m_Out); vector<CHit> vh_out; double max_group_score = 0; size_t i = 0, dim = vh.size(); while(i < dim) { int groupid0 = vh[i].m_GroupID; int istart = i; for(; i < dim && groupid0 == vh[i].m_GroupID; ++i); int ifin = i; m_Out.resize(ifin - istart); copy(vh.begin() + istart, vh.begin() + ifin, m_Out.begin()); x_RunMaxScore(); double group_score = 0; size_t dim_group = m_Out.size(); for(size_t k = 0; k < dim_group; ++k) group_score += m_Out[k].m_Score; if(m_OutputAllGroups) { copy(m_Out.begin(), m_Out.end(), back_inserter(vh_out)); } else if(group_score > max_group_score) { max_group_score = group_score; vh_out.clear(); copy(m_Out.begin(), m_Out.end(), back_inserter(vh_out)); } } m_Out = vh_out; return 0;}void CHitParser::x_GroupsIdentifyByCoverage(int iStart, int iStop, int& iGroupId, double dTotalCoverage, char where){ bool bTopLevelCall = (iStart == 0) && (size_t(iStop) == m_Out.size()); if(bTopLevelCall) { // top-level call x_CalcGlobalEnvelop(); if(where == 'q') { stable_sort(m_Out.begin() + iStart, m_Out.begin() + iStop, CHit::PPrecedeS); } else { stable_sort(m_Out.begin() + iStart, m_Out.begin() + iStop, CHit::PPrecedeQ); } dTotalCoverage = CalcCoverage(m_Out.begin() + iStart, m_Out.begin() + iStop, where); } // assign new groupid {{ ++iGroupId; for(int i = iStart; i < iStop; ++i) m_Out[i].m_GroupID = iGroupId; }} if(iStop - iStart <= 1) return; // choose the best location double dMax = dTotalCoverage; int iMax = iStop; unsigned char idx1, idx2; if(where == 'q') { idx1 = 2; idx2 = 3; } else { idx1 = 0; idx2 = 1; } // find all gaps, sort them by size, then // proceed from larger to lower typedef multimap<int, int> mm_t; mm_t mm_gapsize2index; for(int i = iStart; i < iStop - 1; i++) { int gap = m_Out[i+1].m_ai[idx1] - m_Out[i].m_ai[idx2]; if(gap > 0) { mm_gapsize2index.insert(mm_t::value_type(gap, i)); } } mm_t::reverse_iterator mmirb = mm_gapsize2index.rbegin(); mm_t::reverse_iterator mmire = mm_gapsize2index.rend(); mm_t::reverse_iterator mmir; bool match = false; for(mmir = mmirb; mmir != mmire; ++mmir) { int i = mmir->second; int n1 = CalcCoverage(m_Out.begin() + iStart, m_Out.begin() + i + 1, where); int n2 = CalcCoverage(m_Out.begin() + i + 1, m_Out.begin() + iStop, where); double d = (n1 + n2); match = (d - dTotalCoverage) / dTotalCoverage >= m_CovStep; if(match) { dMax = d; iMax = i; break; } } if(match) { x_GroupsIdentifyByCoverage(iStart, iMax + 1, iGroupId, dTotalCoverage, where); x_GroupsIdentifyByCoverage(iMax + 1, iStop, iGroupId, dTotalCoverage, where); }}size_t CHitParser::x_RemoveEqual(){ typedef map<size_t, vector<size_t> > map_t; map_t m; // hit checksum to hits vector<size_t> vhd; // hits to delete size_t dim = m_Out.size(); for(size_t i = 0; i < dim ; ++i) { const CHit& h = m_Out[i]; size_t checksum = ( h.m_an[0] + h.m_an[1] + h.m_an[2] + h.m_an[3] ) % 1000; map_t::iterator mi = m.find(checksum); if(mi == m.end()) { m[checksum].push_back(i); } else { vector<size_t>& vph = mi->second; size_t dim_hits = vph.size(), k = 0; for(; k < dim_hits; ++k) { const CHit& h2 = m_Out[vph[k]]; if( h.m_an[0] == h2.m_an[0] && h.m_an[1] == h2.m_an[1] && h.m_an[2] == h2.m_an[2] && h.m_an[3] == h2.m_an[3] ) { vhd.push_back(i); break; } } if(k == dim_hits) { m[checksum].push_back(i); } } } size_t del_count = 0; if(vhd.size()) { sort(vhd.begin(), vhd.end()); size_t dim_vhd = vhd.size(); size_t vhd_i = 0; for(size_t k = vhd[vhd_i]; k < dim - del_count; ++k) { bool b = false; if(vhd_i < dim_vhd && k == vhd[vhd_i] - del_count) { ++del_count; ++vhd_i; b = true; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -