alnmix.cpp

来自「ncbi源码」· C++ 代码 · 共 1,672 行 · 第 1/5 页

CPP
1,672
字号
             }       #endif    TSeqs::iterator refseq_it = m_Rows.begin();    bool orig_refseq = true;    while (true) {        CAlnMixSeq * refseq = 0;        while (refseq_it != m_Rows.end()) {            refseq = *(refseq_it++);            if (refseq->m_StartIt != refseq->m_Starts.end()) {                break;            } else {                refseq = 0;            }        }        if ( !refseq ) {            // Done            // add the gapped segments if any            if (gapped_segs.size()) {                if (m_MergeFlags & fGapJoin) {                    // request to try to align                    // gapped segments w/ equal len                    x_ConsolidateGaps(gapped_segs);                } else if (m_MergeFlags & fMinGap) {                    // request to try to align                     // all gapped segments                    x_MinimizeGaps(gapped_segs);                }                NON_CONST_ITERATE (TSegmentsContainer,                                   seg_i, gapped_segs) {                    m_Segments.push_back(&**seg_i);                }                gapped_segs.clear();            }            break; // from the main loop        }        // for each refseq segment        while (refseq->m_StartIt != refseq->m_Starts.end()) {            stack< CRef<CAlnMixSegment> > seg_stack;            seg_stack.push(refseq->m_StartIt->second);                        while ( !seg_stack.empty() ) {                                bool pop_seg = true;                // check the gapped segments on the left                ITERATE (CAlnMixSegment::TStartIterators, start_its_i,                         seg_stack.top()->m_StartIts) {                    CAlnMixSeq * row = start_its_i->first;                    if (row->m_StartIt != start_its_i->second) {#if _DEBUG                        if (row->m_PositiveStrand ?                            row->m_StartIt->first >                            start_its_i->second->first :                            row->m_StartIt->first <                            start_its_i->second->first) {                            string errstr =                                string("CAlnMix::x_CreateSegmentsVector():")                                + " Internal error: Integrity broken" +                                " row=" + NStr::IntToString(row->m_RowIndex) +                                " seq=" + NStr::IntToString(row->m_SeqIndex)                                + " row->m_StartIt->first="                                + NStr::IntToString(row->m_StartIt->first)                                + " start_its_i->second->first=" +                                NStr::IntToString(start_its_i->second->first)                                + " refseq->m_StartIt->first=" +                                NStr::IntToString(refseq->m_StartIt->first)                                + " strand=" +                                (row->m_PositiveStrand ? "plus" : "minus");                            NCBI_THROW(CAlnException, eMergeFailure, errstr);                        }#endif                        seg_stack.push(row->m_StartIt->second);                        pop_seg = false;                        break;                    }                }                if (pop_seg) {                    // inc/dec iterators for each row of the seg                    ITERATE (CAlnMixSegment::TStartIterators, start_its_i,                             seg_stack.top()->m_StartIts) {                        CAlnMixSeq * row = start_its_i->first;#if _DEBUG                        if (row->m_PositiveStrand  &&                            row->m_StartIt->first >                             start_its_i->second->first  ||                            !row->m_PositiveStrand  &&                            row->m_StartIt->first <                            start_its_i->second->first) {                            string errstr =                                string("CAlnMix::x_CreateSegmentsVector():")                                + " Internal error: Integrity broken" +                                " row=" + NStr::IntToString(row->m_RowIndex) +                                " seq=" + NStr::IntToString(row->m_SeqIndex)                                + " row->m_StartIt->first="                                + NStr::IntToString(row->m_StartIt->first)                                + " start_its_i->second->first=" +                                NStr::IntToString(start_its_i->second->first)                                + " refseq->m_StartIt->first=" +                                NStr::IntToString(refseq->m_StartIt->first)                                + " strand=" +                                (row->m_PositiveStrand ? "plus" : "minus");                            NCBI_THROW(CAlnException, eMergeFailure, errstr);                        }#endif                        if (row->m_PositiveStrand) {                            row->m_StartIt++;                        } else {                            if (row->m_StartIt == row->m_Starts.begin()) {                                row->m_StartIt = row->m_Starts.end();                            } else {                                row->m_StartIt--;                            }                        }                    }                    if (seg_stack.size() > 1) {                        // add to the gapped segments                        gapped_segs.push_back(seg_stack.top());                        seg_stack.pop();                    } else {                        // add the gapped segments if any                        if (gapped_segs.size()) {                            if (m_MergeFlags & fGapJoin) {                                // request to try to align                                // gapped segments w/ equal len                                x_ConsolidateGaps(gapped_segs);                            } else if (m_MergeFlags & fMinGap) {                                // request to try to align                                 // all gapped segments                                x_MinimizeGaps(gapped_segs);                            }                            if (orig_refseq) {                                NON_CONST_ITERATE (TSegmentsContainer,                                                   seg_i, gapped_segs) {                                    m_Segments.push_back(&**seg_i);                                }                                gapped_segs.clear();                            }                        }                        // add the refseq segment                        if (orig_refseq) {                            m_Segments.push_back(seg_stack.top());                        } else {                            gapped_segs.push_back(seg_stack.top());                        }                        seg_stack.pop();                    } // if (seg_stack.size() > 1)                } // if (popseg)            } // while ( !seg_stack.empty() )        } // while (refseq->m_StartIt != refseq->m_Starts.end())        orig_refseq = false;    } // while (true)    if (m_MergeFlags & fFillUnalignedRegions) {        vector<TSignedSeqPos> starts;        vector<TSeqPos> lens;        starts.resize(m_Rows.size(), -1);        lens.resize(m_Rows.size(), 0);        TSeqPos len = 0;        CAlnMap::TNumrow rowidx;        TSegments::iterator seg_i = m_Segments.begin();        while (seg_i != m_Segments.end()) {            len = (*seg_i)->m_Len;            ITERATE (CAlnMixSegment::TStartIterators, start_its_i,                     (*seg_i)->m_StartIts) {                CAlnMixSeq * row = start_its_i->first;                rowidx = row->m_RowIndex;                TSignedSeqPos& prev_start = starts[rowidx];                TSeqPos& prev_len = lens[rowidx];                TSeqPos start = start_its_i->second->first;                const bool plus = row->m_PositiveStrand;                const int& width = row->m_Width;                TSeqPos prev_start_plus_len = prev_start + prev_len * width;                TSeqPos start_plus_len = start + len * width;                if (prev_start >= 0) {                    if (plus  &&  prev_start_plus_len < start  ||                        !plus  &&  start_plus_len < (TSeqPos) prev_start) {                        // create a new seg                        CRef<CAlnMixSegment> seg (new CAlnMixSegment);                        TSeqPos new_start;                        if (row->m_PositiveStrand) {                            new_start = prev_start + prev_len * width;                            seg->m_Len = (start - new_start) / width;                        } else {                            new_start = start_plus_len;                            seg->m_Len = (prev_start - new_start) / width;                        }                                                    row->m_Starts[new_start] = seg;                        CAlnMixSeq::TStarts::iterator start_i =                            start_its_i->second;                        seg->m_StartIts[row] =                             row->m_PositiveStrand ?                            --start_i :                            ++start_i;                                                    seg_i = m_Segments.insert(seg_i, seg);                        seg_i++;                    }                }                prev_start = start;                prev_len = len;            }            seg_i++;        }    }}void CAlnMix::x_ConsolidateGaps(TSegmentsContainer& gapped_segs){    TSegmentsContainer::iterator seg1_i, seg2_i;    seg2_i = seg1_i = gapped_segs.begin();    if (seg2_i != gapped_segs.end()) {        seg2_i++;    }    bool         cache = false;    string       s1;    TSeqPos      start1;    int          score1;    CAlnMixSeq * seq1;    CAlnMixSeq * seq2;    while (seg2_i != gapped_segs.end()) {        CAlnMixSegment * seg1 = *seg1_i;        CAlnMixSegment * seg2 = *seg2_i;        // check if this seg possibly aligns with the previous one        bool possible = true;                    if (seg2->m_Len == seg1->m_Len  &&             seg2->m_StartIts.size() == 1) {            seq2 = seg2->m_StartIts.begin()->first;            // check if this seq was already used            ITERATE (CAlnMixSegment::TStartIterators,                     st_it,                     (*seg1_i)->m_StartIts) {                if (st_it->first == seq2) {                    possible = false;                    break;                }            }            // check if score is sufficient            if (possible  &&  m_AddFlags & fCalcScore) {                if (!cache) {                    seq1 = seg1->m_StartIts.begin()->first;                                        start1 = seg1->m_StartIts[seq1]->first;                    TSeqPos start1_plus_len =                         start1 + seg1->m_Len * seq1->m_Width;                                            if (seq1->m_PositiveStrand) {                        seq1->m_BioseqHandle->GetSeqVector                            (CBioseq_Handle::eCoding_Iupac,                             CBioseq_Handle::eStrand_Plus).                            GetSeqData(start1, start1_plus_len, s1);                    } else {                        CSeqVector seq_vec =                             seq1->m_BioseqHandle->GetSeqVector                            (CBioseq_Handle::eCoding_Iupac,                             CBioseq_Handle::eStrand_Minus);                        TSeqPos size = seq_vec.size();                        seq_vec.GetSeqData(size - start1_plus_len,                                           size - start1,                                            s1);                    }                                                    score1 =                         CAlnVec::CalculateScore(s1, s1,                                                seq1->m_IsAA,                                                seq1->m_IsAA);                    cache = true;                }                                string s2;                const TSeqPos& start2 = seg2->m_StartIts[seq2]->first;                TSeqPos start2_plus_len =                     start2 + seg2->m_Len * seq2->m_Width;                                            if (seq2->m_PositiveStrand) {                    seq2->m_BioseqHandle->GetSeqVector                        (CBioseq_Handle::eCoding_Iupac,                         CBioseq_Handle::eStrand_Plus).                        GetSeqData(start2, start2_plus_len, s2);                } else {                    CSeqVector seq_vec =                         seq2->m_BioseqHandle->GetSeqVector                        (CBioseq_Handle::eCoding_Iupac,                         CBioseq_Handle::eStrand_Minus);                    TSeqPos size = seq_vec.size();                    seq_vec.GetSeqData(size - start2_plus_len,                                       size - start2,                                        s2);                }                                                int score2 =                     CAlnVec::CalculateScore(s1, s2, seq1->m_IsAA, seq2->m_IsAA);                if (score2 < 75 * score1 / 100) {                    possible = false;                }            }                    } else {            possible = false;        }        if (possible) {            // consolidate the ones so far                        // add the new row            seg1->m_StartIts[seq2] = seg2->m_StartIts.begin()->second;                        // point the row's start position to the beginning seg            seg2->m_StartIts.begin()->second->secon

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?