alnmix.cpp
来自「ncbi源码」· C++ 代码 · 共 1,672 行 · 第 1/5 页
CPP
1,672 行
CRef<CAlnMixSeq> aln_seq2 = ds_seq[row2]; match->m_AlnSeq1 = aln_seq1; match->m_Start1 = start1; match->m_AlnSeq2 = aln_seq2; match->m_Start2 = start2; match->m_Len = len; match->m_DSIndex = ds_index; // determine the strand match->m_StrandsDiffer = false; ENa_strand strand1 = eNa_strand_plus; ENa_strand strand2 = eNa_strand_plus; if (strands_exist) { if (dsp->GetStrands()[seg_off + row1] == eNa_strand_minus) { strand1 = eNa_strand_minus; } if (dsp->GetStrands()[seg_off + row2] == eNa_strand_minus) { strand2 = eNa_strand_minus; } if (strand1 == eNa_strand_minus && strand2 != eNa_strand_minus || strand1 != eNa_strand_minus && strand2 == eNa_strand_minus) { match->m_StrandsDiffer = true; } } //Determine the score if (m_AddFlags & fCalcScore) { // calc the score by seq comp string s1, s2; if (strand1 == eNa_strand_minus) { CSeqVector seq_vec = aln_seq1->m_BioseqHandle->GetSeqVector (CBioseq_Handle::eCoding_Iupac, CBioseq_Handle::eStrand_Minus); TSeqPos size = seq_vec.size(); seq_vec.GetSeqData(size - (start1 + len), size - start1, s1); } else { aln_seq1->m_BioseqHandle->GetSeqVector (CBioseq_Handle::eCoding_Iupac, CBioseq_Handle::eStrand_Plus). GetSeqData(start1, start1 + len, s1); } if (strand2 == eNa_strand_minus) { CSeqVector seq_vec = aln_seq2->m_BioseqHandle->GetSeqVector (CBioseq_Handle::eCoding_Iupac, CBioseq_Handle::eStrand_Minus); TSeqPos size = seq_vec.size(); seq_vec.GetSeqData(size - (start2 + len), size - start2, s2); } else { aln_seq2->m_BioseqHandle->GetSeqVector (CBioseq_Handle::eCoding_Iupac, CBioseq_Handle::eStrand_Plus). GetSeqData(start2, start2 + len, s2); } //verify that we were able to load all data if (s1.length() != len || s2.length() != len) { string errstr = string("CAlnMix::Add(): ") + "Unable to load data for segment " + NStr::IntToString(seg) + ", rows " + NStr::IntToString(row1) + " and " + NStr::IntToString(row2); NCBI_THROW(CAlnException, eInvalidSegment, errstr); } match->m_Score = CAlnVec::CalculateScore (s1, s2, aln_seq1->m_IsAA, aln_seq2->m_IsAA); } else { match->m_Score = len; } aln_seq1->m_Score += match->m_Score; aln_seq2->m_Score += match->m_Score; } } if (single_chunk) { //record it CRef<CAlnMixMatch> match(new CAlnMixMatch); match->m_Score = 0; match->m_AlnSeq1 = aln_seq1; match->m_Start1 = start1; match->m_AlnSeq2 = 0; match->m_Start2 = 0; match->m_Len = len; match->m_StrandsDiffer = false; match->m_DSIndex = m_InputDSs.size(); m_Matches.push_back(match); } } } seg_off += dsp->GetDim(); }}void CAlnMix::x_Merge(){ bool first_refseq = true; // mark the first loop if (m_MergeFlags & fSortSeqsByScore) { stable_sort(m_Seqs.begin(), m_Seqs.end(), x_CompareAlnSeqScores); } // Find the refseq (if such exists) {{ m_SingleRefseq = false; m_IndependentDSs = m_InputDSs.size() > 1; unsigned int ds_cnt; NON_CONST_ITERATE (TSeqs, it, m_Seqs){ ds_cnt = (*it)->m_DS_Count; if (ds_cnt > 1) { m_IndependentDSs = false; } if (ds_cnt == m_InputDSs.size()) { m_SingleRefseq = true; if ( !first_refseq ) { CAlnMixSeq * refseq = *it; m_Seqs.erase(it); m_Seqs.insert(m_Seqs.begin(), refseq); } break; } first_refseq = false; } }} // Index the sequences {{ int seq_idx=0; ITERATE (TSeqs, seq_i, m_Seqs) { (*seq_i)->m_SeqIndex = seq_idx++; } }} // Set the widths if the mix contains both AA & NA // or in case we force translation if (m_ContainsNA && m_ContainsAA || m_AddFlags & fForceTranslation) { ITERATE (TSeqs, seq_i, m_Seqs) { (*seq_i)->m_Width = (*seq_i)->m_IsAA ? 1 : 3; } } // Sort matches by score stable_sort(m_Matches.begin(), m_Matches.end(), x_CompareAlnMatchScores); CAlnMixSeq * refseq = 0, * seq1 = 0, * seq2 = 0; TSeqPos start, start1, start2, len, curr_len; int width1, width2; CAlnMixMatch * match; CAlnMixSeq::TMatchList::iterator match_list_iter1, match_list_iter2; CAlnMixSeq::TMatchList::iterator match_list_i; TSecondRowFits second_row_fits; refseq = *(m_Seqs.begin()); TMatches::iterator match_i = m_Matches.begin(); CRef<CAlnMixSegment> seg; CAlnMixSeq::TStarts::iterator start_i, lo_start_i, hi_start_i, tmp_start_i; first_refseq = true; // mark the first loop while (true) { // reached the end? if (first_refseq ? match_i == m_Matches.end() && m_Matches.size() : match_list_i == refseq->m_MatchList.end()) { // move on to the next refseq refseq->m_RefBy = 0; // try to find the best scoring 'connected' candidate ITERATE (TSeqs, it, m_Seqs){ if ( !((*it)->m_MatchList.empty()) && (*it)->m_RefBy == refseq) { refseq = *it; break; } } if (refseq->m_RefBy == 0) { // no candidate was found 'connected' to the refseq // continue with the highest scoring candidate ITERATE (TSeqs, it, m_Seqs){ if ( !((*it)->m_MatchList.empty()) ) { refseq = *it; break; } } } if (refseq->m_MatchList.empty()) { break; // done } else { first_refseq = false; match_list_i = refseq->m_MatchList.begin(); } continue; } else { // iterate match = first_refseq ? *(match_i++) : *(match_list_i++); } curr_len = len = match->m_Len; // is it a match with this refseq? if (match->m_AlnSeq1 == refseq) { seq1 = match->m_AlnSeq1; start1 = match->m_Start1; match_list_iter1 = match->m_MatchIter1; seq2 = match->m_AlnSeq2; start2 = match->m_Start2; match_list_iter2 = match->m_MatchIter2; } else if (match->m_AlnSeq2 == refseq) { seq1 = match->m_AlnSeq2; start1 = match->m_Start2; match_list_iter1 = match->m_MatchIter2; seq2 = match->m_AlnSeq1; start2 = match->m_Start1; match_list_iter2 = match->m_MatchIter1; } else { seq1 = match->m_AlnSeq1; seq2 = match->m_AlnSeq2; // mark the two refseqs, they are candidates for next refseq(s) seq1->m_MatchList.push_back(match); (match->m_MatchIter1 = seq1->m_MatchList.end())--; if (seq2) { seq2->m_MatchList.push_back(match); (match->m_MatchIter2 = seq2->m_MatchList.end())--; } // mark that there's no match with this refseq seq1 = 0; } // save the match info into the segments map if (seq1) { // order the match match->m_AlnSeq1 = seq1; match->m_Start1 = start1; match->m_AlnSeq2 = seq2; match->m_Start2 = start2; width1 = seq1->m_Width; if (seq2) { width2 = seq2->m_Width; } // in case of translated refseq if (width1 == 3) { // check the frame int frame = start1 % 3; if (seq1->m_Starts.empty()) { seq1->m_Frame = frame; } else { while (seq1->m_Frame != frame) { if (!seq1->m_ExtraRow) { // create an extra row CRef<CAlnMixSeq> row (new CAlnMixSeq); row->m_BioseqHandle = seq1->m_BioseqHandle; row->m_SeqId = seq1->m_SeqId; row->m_Width = seq1->m_Width; row->m_Frame = frame; row->m_SeqIndex = seq1->m_SeqIndex; if (m_MergeFlags & fQuerySeqMergeOnly) { row->m_DSIndex = match->m_DSIndex; } m_ExtraRows.push_back(row); seq1->m_ExtraRow = row; seq1 = match->m_AlnSeq1 = seq1->m_ExtraRow; break; } seq1 = match->m_AlnSeq1 = seq1->m_ExtraRow; } } } // this match is used, erase from seq1 list if ( !first_refseq ) { if ( !refseq->m_MatchList.empty() ) { refseq->m_MatchList.erase(match_list_iter1); } } // if a subject sequence place it in the proper row if ( !first_refseq && m_MergeFlags & fQuerySeqMergeOnly) { bool proper_row_found = false; while (true) { if (seq1->m_DSIndex == match->m_DSIndex) { proper_row_found = true; break; } else { if (seq1->m_ExtraRow) { seq1 = match->m_AlnSeq2 = seq1->m_ExtraRow; } else { break; } } } if ( !proper_row_found ) { NCBI_THROW(CAlnException, eMergeFailure, "CAlnMix::x_Merge(): " "Proper row not found for the match. " "Cannot use fQuerySeqMergeOnly?"); } } CAlnMixSeq::TStarts& starts = seq1->m_Starts; if (seq2) { // mark it, it is linked to the refseq seq2->m_RefBy = refseq; // this match is used erase from seq2 list
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?