alnmix.cpp

来自「ncbi源码」· C++ 代码 · 共 1,672 行 · 第 1/5 页

CPP
1,672
字号
                        CRef<CAlnMixSeq> aln_seq2 = ds_seq[row2];                        match->m_AlnSeq1 = aln_seq1;                        match->m_Start1 = start1;                        match->m_AlnSeq2 = aln_seq2;                        match->m_Start2 = start2;                        match->m_Len = len;                        match->m_DSIndex = ds_index;                        // determine the strand                        match->m_StrandsDiffer = false;                        ENa_strand strand1 = eNa_strand_plus;                        ENa_strand strand2 = eNa_strand_plus;                        if (strands_exist) {                            if (dsp->GetStrands()[seg_off + row1]                                 == eNa_strand_minus) {                                strand1 = eNa_strand_minus;                            }                            if (dsp->GetStrands()[seg_off + row2]                                 == eNa_strand_minus) {                                strand2 = eNa_strand_minus;                            }                            if (strand1 == eNa_strand_minus  &&                                strand2 != eNa_strand_minus  ||                                strand1 != eNa_strand_minus  &&                                strand2 == eNa_strand_minus) {                                match->m_StrandsDiffer = true;                            }                        }                        //Determine the score                        if (m_AddFlags & fCalcScore) {                            // calc the score by seq comp                            string s1, s2;                            if (strand1 == eNa_strand_minus) {                                CSeqVector seq_vec =                                     aln_seq1->m_BioseqHandle->GetSeqVector                                    (CBioseq_Handle::eCoding_Iupac,                                     CBioseq_Handle::eStrand_Minus);                                TSeqPos size = seq_vec.size();                                seq_vec.GetSeqData(size - (start1 + len),                                                   size - start1,                                                    s1);                            } else {                                aln_seq1->m_BioseqHandle->GetSeqVector                                    (CBioseq_Handle::eCoding_Iupac,                                     CBioseq_Handle::eStrand_Plus).                                    GetSeqData(start1, start1 + len, s1);                            }                                                            if (strand2 ==  eNa_strand_minus) {                                CSeqVector seq_vec =                                     aln_seq2->m_BioseqHandle->GetSeqVector                                    (CBioseq_Handle::eCoding_Iupac,                                     CBioseq_Handle::eStrand_Minus);                                TSeqPos size = seq_vec.size();                                seq_vec.GetSeqData(size - (start2 + len),                                                   size - start2,                                                    s2);                            } else {                                aln_seq2->m_BioseqHandle->GetSeqVector                                    (CBioseq_Handle::eCoding_Iupac,                                     CBioseq_Handle::eStrand_Plus).                                    GetSeqData(start2, start2 + len, s2);                            }                            //verify that we were able to load all data                            if (s1.length() != len || s2.length() != len) {                                string errstr = string("CAlnMix::Add(): ")                                    + "Unable to load data for segment "                                    + NStr::IntToString(seg) +                                    ", rows " + NStr::IntToString(row1) +                                    " and " + NStr::IntToString(row2);                                NCBI_THROW(CAlnException, eInvalidSegment,                                           errstr);                            }                            match->m_Score =                                 CAlnVec::CalculateScore                                (s1, s2, aln_seq1->m_IsAA, aln_seq2->m_IsAA);                        } else {                            match->m_Score = len;                        }                                                aln_seq1->m_Score += match->m_Score;                        aln_seq2->m_Score += match->m_Score;                    }                }                if (single_chunk) {                    //record it                    CRef<CAlnMixMatch> match(new CAlnMixMatch);                    match->m_Score = 0;                    match->m_AlnSeq1 = aln_seq1;                    match->m_Start1 = start1;                    match->m_AlnSeq2 = 0;                    match->m_Start2 = 0;                    match->m_Len = len;                    match->m_StrandsDiffer = false;                    match->m_DSIndex = m_InputDSs.size();                    m_Matches.push_back(match);                }            }        }        seg_off += dsp->GetDim();    }}void CAlnMix::x_Merge(){    bool first_refseq = true; // mark the first loop    if (m_MergeFlags & fSortSeqsByScore) {        stable_sort(m_Seqs.begin(), m_Seqs.end(), x_CompareAlnSeqScores);    }    // Find the refseq (if such exists)    {{        m_SingleRefseq = false;        m_IndependentDSs = m_InputDSs.size() > 1;        unsigned int ds_cnt;        NON_CONST_ITERATE (TSeqs, it, m_Seqs){            ds_cnt = (*it)->m_DS_Count;            if (ds_cnt > 1) {                m_IndependentDSs = false;            }            if (ds_cnt == m_InputDSs.size()) {                m_SingleRefseq = true;                if ( !first_refseq ) {                    CAlnMixSeq * refseq = *it;                    m_Seqs.erase(it);                    m_Seqs.insert(m_Seqs.begin(), refseq);                }                break;            }            first_refseq = false;        }    }}    // Index the sequences    {{        int seq_idx=0;        ITERATE (TSeqs, seq_i, m_Seqs) {            (*seq_i)->m_SeqIndex = seq_idx++;        }    }}    // Set the widths if the mix contains both AA & NA    // or in case we force translation    if (m_ContainsNA  &&  m_ContainsAA  ||  m_AddFlags & fForceTranslation) {        ITERATE (TSeqs, seq_i, m_Seqs) {            (*seq_i)->m_Width = (*seq_i)->m_IsAA ? 1 : 3;        }    }    // Sort matches by score    stable_sort(m_Matches.begin(), m_Matches.end(), x_CompareAlnMatchScores);    CAlnMixSeq * refseq = 0, * seq1 = 0, * seq2 = 0;    TSeqPos start, start1, start2, len, curr_len;    int width1, width2;    CAlnMixMatch * match;    CAlnMixSeq::TMatchList::iterator match_list_iter1, match_list_iter2;    CAlnMixSeq::TMatchList::iterator match_list_i;    TSecondRowFits second_row_fits;    refseq = *(m_Seqs.begin());    TMatches::iterator match_i = m_Matches.begin();    CRef<CAlnMixSegment> seg;    CAlnMixSeq::TStarts::iterator start_i, lo_start_i, hi_start_i, tmp_start_i;    first_refseq = true; // mark the first loop    while (true) {        // reached the end?        if (first_refseq ?            match_i == m_Matches.end()  &&  m_Matches.size() :            match_list_i == refseq->m_MatchList.end()) {            // move on to the next refseq            refseq->m_RefBy = 0;            // try to find the best scoring 'connected' candidate            ITERATE (TSeqs, it, m_Seqs){                if ( !((*it)->m_MatchList.empty())  &&                     (*it)->m_RefBy == refseq) {                    refseq = *it;                    break;                }            }            if (refseq->m_RefBy == 0) {                // no candidate was found 'connected' to the refseq                // continue with the highest scoring candidate                ITERATE (TSeqs, it, m_Seqs){                    if ( !((*it)->m_MatchList.empty()) ) {                        refseq = *it;                        break;                    }                }            }            if (refseq->m_MatchList.empty()) {                break; // done            } else {                first_refseq = false;                match_list_i = refseq->m_MatchList.begin();            }            continue;        } else {            // iterate            match = first_refseq ? *(match_i++) : *(match_list_i++);        }        curr_len = len = match->m_Len;        // is it a match with this refseq?        if (match->m_AlnSeq1 == refseq) {            seq1 = match->m_AlnSeq1;            start1 = match->m_Start1;            match_list_iter1 = match->m_MatchIter1;            seq2 = match->m_AlnSeq2;            start2 = match->m_Start2;            match_list_iter2 = match->m_MatchIter2;        } else if (match->m_AlnSeq2 == refseq) {            seq1 = match->m_AlnSeq2;            start1 = match->m_Start2;            match_list_iter1 = match->m_MatchIter2;            seq2 = match->m_AlnSeq1;            start2 = match->m_Start1;            match_list_iter2 = match->m_MatchIter1;        } else {            seq1 = match->m_AlnSeq1;            seq2 = match->m_AlnSeq2;            // mark the two refseqs, they are candidates for next refseq(s)            seq1->m_MatchList.push_back(match);            (match->m_MatchIter1 = seq1->m_MatchList.end())--;            if (seq2) {                seq2->m_MatchList.push_back(match);                (match->m_MatchIter2 = seq2->m_MatchList.end())--;            }            // mark that there's no match with this refseq            seq1 = 0;        }        // save the match info into the segments map        if (seq1) {            // order the match            match->m_AlnSeq1 = seq1;            match->m_Start1 = start1;            match->m_AlnSeq2 = seq2;            match->m_Start2 = start2;            width1 = seq1->m_Width;            if (seq2) {                width2 = seq2->m_Width;            }            // in case of translated refseq            if (width1 == 3) {                // check the frame                 int frame = start1 % 3;                if (seq1->m_Starts.empty()) {                    seq1->m_Frame = frame;                } else {                    while (seq1->m_Frame != frame) {                        if (!seq1->m_ExtraRow) {                            // create an extra row                            CRef<CAlnMixSeq> row (new CAlnMixSeq);                            row->m_BioseqHandle = seq1->m_BioseqHandle;                            row->m_SeqId = seq1->m_SeqId;                            row->m_Width = seq1->m_Width;                            row->m_Frame = frame;                            row->m_SeqIndex = seq1->m_SeqIndex;                            if (m_MergeFlags & fQuerySeqMergeOnly) {                                row->m_DSIndex = match->m_DSIndex;                            }                            m_ExtraRows.push_back(row);                            seq1->m_ExtraRow = row;                            seq1 = match->m_AlnSeq1 = seq1->m_ExtraRow;                            break;                        }                        seq1 = match->m_AlnSeq1 = seq1->m_ExtraRow;                    }                }            }            // this match is used, erase from seq1 list            if ( !first_refseq ) {                if ( !refseq->m_MatchList.empty() ) {                    refseq->m_MatchList.erase(match_list_iter1);                }            }            // if a subject sequence place it in the proper row            if ( !first_refseq  &&  m_MergeFlags & fQuerySeqMergeOnly) {                bool proper_row_found = false;                while (true) {                    if (seq1->m_DSIndex == match->m_DSIndex) {                        proper_row_found = true;                        break;                    } else {                        if (seq1->m_ExtraRow) {                            seq1 = match->m_AlnSeq2 = seq1->m_ExtraRow;                        } else {                            break;                        }                    }                }                if ( !proper_row_found ) {                    NCBI_THROW(CAlnException, eMergeFailure,                               "CAlnMix::x_Merge(): "                               "Proper row not found for the match. "                               "Cannot use fQuerySeqMergeOnly?");                }            }                        CAlnMixSeq::TStarts& starts = seq1->m_Starts;            if (seq2) {                // mark it, it is linked to the refseq                seq2->m_RefBy = refseq;                // this match is used erase from seq2 list

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?