alnmix.cpp
来自「ncbi源码」· C++ 代码 · 共 1,672 行 · 第 1/5 页
CPP
1,672 行
} #endif TSeqs::iterator refseq_it = m_Rows.begin(); bool orig_refseq = true; while (true) { CAlnMixSeq * refseq = 0; while (refseq_it != m_Rows.end()) { refseq = *(refseq_it++); if (refseq->m_StartIt != refseq->m_Starts.end()) { break; } else { refseq = 0; } } if ( !refseq ) { // Done // add the gapped segments if any if (gapped_segs.size()) { if (m_MergeFlags & fGapJoin) { // request to try to align // gapped segments w/ equal len x_ConsolidateGaps(gapped_segs); } else if (m_MergeFlags & fMinGap) { // request to try to align // all gapped segments x_MinimizeGaps(gapped_segs); } NON_CONST_ITERATE (TSegmentsContainer, seg_i, gapped_segs) { m_Segments.push_back(&**seg_i); } gapped_segs.clear(); } break; // from the main loop } // for each refseq segment while (refseq->m_StartIt != refseq->m_Starts.end()) { stack< CRef<CAlnMixSegment> > seg_stack; seg_stack.push(refseq->m_StartIt->second); while ( !seg_stack.empty() ) { bool pop_seg = true; // check the gapped segments on the left ITERATE (CAlnMixSegment::TStartIterators, start_its_i, seg_stack.top()->m_StartIts) { CAlnMixSeq * row = start_its_i->first; if (row->m_StartIt != start_its_i->second) {#if _DEBUG if (row->m_PositiveStrand ? row->m_StartIt->first > start_its_i->second->first : row->m_StartIt->first < start_its_i->second->first) { string errstr = string("CAlnMix::x_CreateSegmentsVector():") + " Internal error: Integrity broken" + " row=" + NStr::IntToString(row->m_RowIndex) + " seq=" + NStr::IntToString(row->m_SeqIndex) + " row->m_StartIt->first=" + NStr::IntToString(row->m_StartIt->first) + " start_its_i->second->first=" + NStr::IntToString(start_its_i->second->first) + " refseq->m_StartIt->first=" + NStr::IntToString(refseq->m_StartIt->first) + " strand=" + (row->m_PositiveStrand ? "plus" : "minus"); NCBI_THROW(CAlnException, eMergeFailure, errstr); }#endif seg_stack.push(row->m_StartIt->second); pop_seg = false; break; } } if (pop_seg) { // inc/dec iterators for each row of the seg ITERATE (CAlnMixSegment::TStartIterators, start_its_i, seg_stack.top()->m_StartIts) { CAlnMixSeq * row = start_its_i->first;#if _DEBUG if (row->m_PositiveStrand && row->m_StartIt->first > start_its_i->second->first || !row->m_PositiveStrand && row->m_StartIt->first < start_its_i->second->first) { string errstr = string("CAlnMix::x_CreateSegmentsVector():") + " Internal error: Integrity broken" + " row=" + NStr::IntToString(row->m_RowIndex) + " seq=" + NStr::IntToString(row->m_SeqIndex) + " row->m_StartIt->first=" + NStr::IntToString(row->m_StartIt->first) + " start_its_i->second->first=" + NStr::IntToString(start_its_i->second->first) + " refseq->m_StartIt->first=" + NStr::IntToString(refseq->m_StartIt->first) + " strand=" + (row->m_PositiveStrand ? "plus" : "minus"); NCBI_THROW(CAlnException, eMergeFailure, errstr); }#endif if (row->m_PositiveStrand) { row->m_StartIt++; } else { if (row->m_StartIt == row->m_Starts.begin()) { row->m_StartIt = row->m_Starts.end(); } else { row->m_StartIt--; } } } if (seg_stack.size() > 1) { // add to the gapped segments gapped_segs.push_back(seg_stack.top()); seg_stack.pop(); } else { // add the gapped segments if any if (gapped_segs.size()) { if (m_MergeFlags & fGapJoin) { // request to try to align // gapped segments w/ equal len x_ConsolidateGaps(gapped_segs); } else if (m_MergeFlags & fMinGap) { // request to try to align // all gapped segments x_MinimizeGaps(gapped_segs); } if (orig_refseq) { NON_CONST_ITERATE (TSegmentsContainer, seg_i, gapped_segs) { m_Segments.push_back(&**seg_i); } gapped_segs.clear(); } } // add the refseq segment if (orig_refseq) { m_Segments.push_back(seg_stack.top()); } else { gapped_segs.push_back(seg_stack.top()); } seg_stack.pop(); } // if (seg_stack.size() > 1) } // if (popseg) } // while ( !seg_stack.empty() ) } // while (refseq->m_StartIt != refseq->m_Starts.end()) orig_refseq = false; } // while (true) if (m_MergeFlags & fFillUnalignedRegions) { vector<TSignedSeqPos> starts; vector<TSeqPos> lens; starts.resize(m_Rows.size(), -1); lens.resize(m_Rows.size(), 0); TSeqPos len = 0; CAlnMap::TNumrow rowidx; TSegments::iterator seg_i = m_Segments.begin(); while (seg_i != m_Segments.end()) { len = (*seg_i)->m_Len; ITERATE (CAlnMixSegment::TStartIterators, start_its_i, (*seg_i)->m_StartIts) { CAlnMixSeq * row = start_its_i->first; rowidx = row->m_RowIndex; TSignedSeqPos& prev_start = starts[rowidx]; TSeqPos& prev_len = lens[rowidx]; TSeqPos start = start_its_i->second->first; const bool plus = row->m_PositiveStrand; const int& width = row->m_Width; TSeqPos prev_start_plus_len = prev_start + prev_len * width; TSeqPos start_plus_len = start + len * width; if (prev_start >= 0) { if (plus && prev_start_plus_len < start || !plus && start_plus_len < (TSeqPos) prev_start) { // create a new seg CRef<CAlnMixSegment> seg (new CAlnMixSegment); TSeqPos new_start; if (row->m_PositiveStrand) { new_start = prev_start + prev_len * width; seg->m_Len = (start - new_start) / width; } else { new_start = start_plus_len; seg->m_Len = (prev_start - new_start) / width; } row->m_Starts[new_start] = seg; CAlnMixSeq::TStarts::iterator start_i = start_its_i->second; seg->m_StartIts[row] = row->m_PositiveStrand ? --start_i : ++start_i; seg_i = m_Segments.insert(seg_i, seg); seg_i++; } } prev_start = start; prev_len = len; } seg_i++; } }}void CAlnMix::x_ConsolidateGaps(TSegmentsContainer& gapped_segs){ TSegmentsContainer::iterator seg1_i, seg2_i; seg2_i = seg1_i = gapped_segs.begin(); if (seg2_i != gapped_segs.end()) { seg2_i++; } bool cache = false; string s1; TSeqPos start1; int score1; CAlnMixSeq * seq1; CAlnMixSeq * seq2; while (seg2_i != gapped_segs.end()) { CAlnMixSegment * seg1 = *seg1_i; CAlnMixSegment * seg2 = *seg2_i; // check if this seg possibly aligns with the previous one bool possible = true; if (seg2->m_Len == seg1->m_Len && seg2->m_StartIts.size() == 1) { seq2 = seg2->m_StartIts.begin()->first; // check if this seq was already used ITERATE (CAlnMixSegment::TStartIterators, st_it, (*seg1_i)->m_StartIts) { if (st_it->first == seq2) { possible = false; break; } } // check if score is sufficient if (possible && m_AddFlags & fCalcScore) { if (!cache) { seq1 = seg1->m_StartIts.begin()->first; start1 = seg1->m_StartIts[seq1]->first; TSeqPos start1_plus_len = start1 + seg1->m_Len * seq1->m_Width; if (seq1->m_PositiveStrand) { seq1->m_BioseqHandle->GetSeqVector (CBioseq_Handle::eCoding_Iupac, CBioseq_Handle::eStrand_Plus). GetSeqData(start1, start1_plus_len, s1); } else { CSeqVector seq_vec = seq1->m_BioseqHandle->GetSeqVector (CBioseq_Handle::eCoding_Iupac, CBioseq_Handle::eStrand_Minus); TSeqPos size = seq_vec.size(); seq_vec.GetSeqData(size - start1_plus_len, size - start1, s1); } score1 = CAlnVec::CalculateScore(s1, s1, seq1->m_IsAA, seq1->m_IsAA); cache = true; } string s2; const TSeqPos& start2 = seg2->m_StartIts[seq2]->first; TSeqPos start2_plus_len = start2 + seg2->m_Len * seq2->m_Width; if (seq2->m_PositiveStrand) { seq2->m_BioseqHandle->GetSeqVector (CBioseq_Handle::eCoding_Iupac, CBioseq_Handle::eStrand_Plus). GetSeqData(start2, start2_plus_len, s2); } else { CSeqVector seq_vec = seq2->m_BioseqHandle->GetSeqVector (CBioseq_Handle::eCoding_Iupac, CBioseq_Handle::eStrand_Minus); TSeqPos size = seq_vec.size(); seq_vec.GetSeqData(size - start2_plus_len, size - start2, s2); } int score2 = CAlnVec::CalculateScore(s1, s2, seq1->m_IsAA, seq2->m_IsAA); if (score2 < 75 * score1 / 100) { possible = false; } } } else { possible = false; } if (possible) { // consolidate the ones so far // add the new row seg1->m_StartIts[seq2] = seg2->m_StartIts.begin()->second; // point the row's start position to the beginning seg seg2->m_StartIts.begin()->second->secon
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?