📄 splign_hitparser.cpp
字号:
size_t k1 = k + del_count; if(k1 < dim) { m_Out[k] = m_Out[k1]; } else break; if(b) --k; } m_Out.resize(dim - del_count); } return del_count;}int CHitParser::Run(EMode erm){ int nCode = x_CheckParameters(); if(nCode) return nCode; vector<CHit> vhInvStrand; // used only if strand mode = auto int& nGroupId = *m_GroupID; // auxiliary variable size_t dim = m_Out.size(); if(dim) { x_TransformCoordinates(true); x_Combine(m_CombinationProximity_pre); {{ // filter by strand or separate strands, if applicable if(m_Strand == eStrand_Plus || m_Strand == eStrand_Minus || m_Strand == eStrand_Auto) { vector<CHit>::iterator ii; for(ii = m_Out.end()-1; ii >= m_Out.begin(); ii--) { bool bStraight = ii->IsStraight(); switch(m_Strand) { case eStrand_Plus: if (bStraight) continue; else m_Out.erase(ii); break; case eStrand_Minus: if (!bStraight) continue; else m_Out.erase(ii); break; case eStrand_Auto: // separate strands before processing if (bStraight) continue; else { vhInvStrand.push_back(*ii); m_Out.erase(ii); } break; } } } x_CalcGlobalEnvelop(); switch(erm) { case eMode_Combine: // already combined break; case eMode_GroupSelect: x_RunMSGS(true, nGroupId); break; case eMode_Normal: x_IdentifyMaxDistGroups(); switch(m_Method) { case eMethod_MaxScore: x_RunMaxScore(); break; case eMethod_MaxScore_GroupSelect: x_RunMSGS(false, nGroupId); break; default: return 1; // not supported } if(!m_OutputAllGroups) { // we need to identify again because "connecting" // hits might might have perished x_IdentifyMaxDistGroups(); x_FilterByMaxDist(); } break; } // if strand mode is auto then we run it again for inverse strand // and then compare the strands if(m_Strand == 3) { vector<CHit> vh0 = m_Out; m_Out = vhInvStrand; x_CalcGlobalEnvelop(); switch(erm) { case eMode_Combine: break; case eMode_GroupSelect: x_RunMSGS(true, nGroupId); break; case eMode_Normal: x_IdentifyMaxDistGroups(); switch(m_Method) { case eMethod_MaxScore: x_RunMaxScore(); break; case eMethod_MaxScore_GroupSelect: x_RunMSGS(false, nGroupId); break; default: return 1; // not supported } x_IdentifyMaxDistGroups(); x_FilterByMaxDist(); break; } if(erm == eMode_Normal && !m_OutputAllGroups) { // leave only the best strand double dScoreStraight = 0., dScoreInverse = 0.; vector<CHit>::iterator ii = vh0.begin(), iend = vh0.end(); for(; ii != iend; ii++) dScoreStraight += ii->m_Score; ii = m_Out.begin(); iend = m_Out.end(); for(; ii != iend; ii++) dScoreInverse += ii->m_Score; if(dScoreStraight > dScoreInverse) m_Out = vh0; } else { // merge the strands since we don't actually want to // eliminate ambiguities here copy(vh0.begin(), vh0.end(), back_inserter(m_Out)); } } x_Combine(m_CombinationProximity_post); }} x_TransformCoordinates(false); } return nCode;}int CHitParser::x_CheckParameters() const{ switch (m_Method) { case eMethod_MaxScore: case eMethod_MaxScore_GroupSelect: break; default: return 1; // unsupported }; switch (m_Strand) { case 0: case 1: case 3: break; // same order and different strands are incompatible case 2: if(m_SameOrder) return 3; else break; default: return 2; // unsupported }; return 0;}// counts the number of collisions// pvh0== 0 => count at m_Outint CHitParser::GetCollisionCount (int& nq, int& ns, const vector<CHit>& vh){ nq = ns = 0; vector<SNode> vi_q, vi_s; vector<CHit>::const_iterator phi0 = vh.begin(), phi1 = vh.end(), phi = phi0; for(phi = phi0; phi != phi1; ++phi) { SNode Nodes [] = { SNode(phi->m_ai[0],true,phi), SNode(phi->m_ai[1],false,phi), SNode(phi->m_ai[2],true,phi), SNode(phi->m_ai[3],false,phi) }; vi_q.push_back(Nodes[0]); vi_q.push_back(Nodes[1]); vi_s.push_back(Nodes[2]); vi_s.push_back(Nodes[3]); } sort(vi_q.begin(),vi_q.end()); sort(vi_s.begin(),vi_s.end()); int nLevel = 0; vector<SNode>::iterator ii; for(ii = vi_q.begin(); ii != vi_q.end(); ii++) if(ii->type) { if(++nLevel == 2) nq++; } else nLevel--; nLevel = 0; vector<SNode>::iterator jj; for(jj = vi_s.begin(); jj != vi_s.end(); jj++) if(jj->type) { if(++nLevel == 2) ns++; } else nLevel--; return nq + ns;}void CHitParser::x_TransformCoordinates(bool bDir){ if(bDir) { // transform hits m_Origin[0] = m_ange[0]; m_Origin[1] = m_ange[2]; vector<CHit>::iterator ii, iend; for(ii = m_Out.begin(), iend = m_Out.end(); ii != iend; ii++) { for(int k=0; k<4; k++) { ii->m_an[k] -= m_Origin[k/2]; ii->m_ai[k] -= m_Origin[k/2]; } } } else { vector<CHit>::iterator ii, iend; vector<vector<CHit>::iterator> vhi_del; for(ii = m_Out.begin(), iend = m_Out.end(); ii != iend; ii++) { for(int k=0; k<4; k++) { ii->m_an[k] += m_Origin[k/2]; ii->m_ai[k] += m_Origin[k/2]; } if(ii->m_ai[1] == ii->m_ai[0] || ii->m_ai[3] == ii->m_ai[2]) vhi_del.push_back(ii); } // delete the slack vector<vector<CHit>::iterator>::reverse_iterator i1, i1e; for(i1 = vhi_del.rbegin(), i1e = vhi_del.rend(); i1 != i1e; i1++) m_Out.erase(*i1); } x_CalcGlobalEnvelop();}void CHitParser::x_FilterByMaxDist(){ // sum and compare the scores map<int,double> mgr2sc; int nSize = m_Out.size(); for(int j = 0; j < nSize; j++) { const int nClustID = CMaxDistClIdGet()(m_Out[j]); mgr2sc[nClustID] += m_Out[j].m_Score; } double dmax = -1e38; map<int,double>::iterator im, im1; for(im1 = im = mgr2sc.begin(); im != mgr2sc.end(); im++) { if(im->second > dmax) { dmax = im->second; im1 = im; } } // leave only the top-score max dist group vector<CHit>::iterator ie = remove_if( m_Out.begin(), m_Out.end(), bind2nd(hit_out_of_group(),im1->first)); m_Out.erase(ie,m_Out.end());}void CHitParser::x_IdentifyMaxDistGroups(){ DoSingleLinkageClustering ( m_Out, CMaxDistClustPred(m_MaxHitDistQ, m_MaxHitDistS), CMaxDistClIdSet(), CMaxDistClIdGet() );}void CHitParser::x_SyncGroupsByMaxDist(int& nGroupID){ if(m_Out.size() == 0) return; // order by GroupID stable_sort(m_Out.begin(), m_Out.end(), hit_groupid_less()); // renumber the groups to make sure that // we have same nClustID throughout the group vector<CHit>::iterator ibegin = m_Out.begin(), iend = m_Out.end(), ii = ibegin; int ngid = ii->m_GroupID, nmdcid = ii->m_MaxDistClustID; ii->m_GroupID = ++nGroupID; for(++ii; ii != iend; ++ii) { if(ngid == ii->m_GroupID && nmdcid != ii->m_MaxDistClustID) { ++nGroupID; nmdcid = ii->m_MaxDistClustID; } if(ngid != ii->m_GroupID) { ++nGroupID; ngid = ii->m_GroupID; } ii->m_GroupID = nGroupID; }}size_t CHitParser::GetSeqLength(const string& accession){ return 0; /* using namespace objects; map<string, size_t>::const_iterator i1 = m_seqsizes.find(accession), i2 = m_seqsizes.end(); if(i1 != i2) { size_t s = i1->second; return s; } CObjectManager om; om.RegisterDataLoader ( *new CGBDataLoader("ID", new CId1Reader, 2), CObjectManager::eDefault ); CScope scope(om); scope.AddDefaults(); CSeq_id seq_id (accession); CBioseq_Handle bioseq_handle = scope.GetBioseqHandle(seq_id); if ( bioseq_handle ) { const CBioseq& bioseq = bioseq_handle.GetBioseq(); const CSeq_inst& inst = bioseq.GetInst(); if(inst.IsSetLength()) { size_t s = inst.GetLength(); m_seqsizes[accession] = s; return s; } } return 0; */}bool CHitGroup::CheckSameOrder(){ if(m_vHits.size() == 0) return true; vector<int>::const_iterator ivh0 = m_vHits.begin(), ivh1 = m_vHits.end(), ii, jj; hit_not_same_order hnso; for(ii = ivh0; ii < ivh1; ++ii) { for(jj = ivh0; jj < ii; ++jj) { if(hnso( (*m_pHits)[*ii], (*m_pHits)[*jj] )) return false; } } return true;}void CHitParser::x_Combine(double dProximity){ if(dProximity < 0 || m_Out.size() == 0) { return; } x_CalcGlobalEnvelop(); size_t total_hits = m_Out.size(); vector<CHit*> vh (total_hits); CHit* phit = &(m_Out[0]); for(size_t i = 0; i < total_hits; ++i) { vh[i] = phit++; } typedef vector<CHit*>::iterator TVHitPtrI; // separate strands sort(vh.begin(), vh.end(), CHit::PPrecedeStrand_ptr); TVHitPtrI strand_minus_start = find_if(vh.begin(), vh.end(), bind2nd(hit_ptr_strand_is(), true)); typedef pair<TVHitPtrI, TVHitPtrI> THitRange; THitRange strands [2]; strands[0].first = vh.begin(); strands[0].second = strands[1].first = strand_minus_start; strands[1].second = strands[0].first + total_hits; for(size_t strand = 0; strand < 2; ++strand) { TVHitPtrI start = strands[strand].first; TVHitPtrI finish = strands[strand].second; size_t dim = finish - start; if(dim < 2) continue; size_t count; do { count = 0; stable_sort(start, finish, CHit::PGreaterScore_ptr); for(TVHitPtrI i1 = start; i1 < finish - 1; ++i1) { if(*i1 == 0) continue; for(TVHitPtrI j1 = i1 + 1; j1 < finish; ++j1) { if(*j1 == 0) continue; double d = x_CalculateProximity(**i1, **j1); if(d <= dProximity) { CHit hc (**i1, **j1); **i1 = hc; *j1 = 0; ++count; } } } } while(count); } // flush deletions, compress pointers vector, fill original hit vector TVHitPtrI iv = remove(vh.begin(), vh.end(), (CHit*) 0); vh.erase(iv, vh.end()); TVHitPtrI ib = vh.begin(), ie = vh.end(); sort(ib, ie, CHit::PGreaterScore_ptr); vector<CHit> vout (vh.size()); for(iv = ib; iv != ie; ++iv) { vout[iv - ib] = **iv; } m_Out = vout; return;}END_NCBI_SCOPE/** ===========================================================================** $Log: splign_hitparser.cpp,v $* Revision 1000.0 2004/06/01 18:12:53 gouriano* PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.7** Revision 1.7 2004/05/24 16:13:57 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.6 2004/05/18 21:43:40 kapustin* Code cleanup** Revision 1.5 2004/05/03 21:53:57 kapustin* Eliminate OM-dependant code** Revision 1.4 2004/04/26 16:52:44 ucko* Add an explicit "typename" annotation required by GCC 3.4, and adjust* the code to take advantage of the ITERATE macro.** Revision 1.3 2004/04/23 14:37:44 kapustin* *** empty log message ***** * ===========================================================================*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -