validerror_align.cpp
来自「ncbi源码」· C++ 代码 · 共 1,121 行 · 第 1/3 页
CPP
1,121 行
if ( stdseg.IsSetIds() ) { if ( dim != stdseg.GetIds().size() ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegsDimMismatch, "Mismatch between specified dimension (" + NStr::UIntToString(dim) + ") and number of Seq-ids (" + NStr::UIntToString(stdseg.GetIds().size()) + ")", align); cont = false; continue; } } } if( cont ) { x_ValidateStrand(std_segs, align); x_ValidateFastaLike(std_segs, align); x_ValidateSegmentGap(std_segs, align); if ( m_Imp.IsRemoteFetch() ) { x_ValidateSeqId(align); x_ValidateSeqLength(std_segs, align); } }}template <typename T>bool CValidError_align::x_ValidateDim(T& obj, const CSeq_align& align, size_t part){ if ( !obj.IsSetDim() ) { string msg = "Dim not set"; if ( part > 0 ) { msg += " Part " + NStr::UIntToString(part); } PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegsInvalidDim, msg, align); return false; } size_t dim = obj.GetDim(); if ( dim < 2 ) { string msg = "Dimension (" + NStr::UIntToString(dim) + ") must be at least 2."; if ( part > 0 ) { msg += " Part " + NStr::UIntToString(part); } PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegsInvalidDim, msg, align); return false; } return true;}//===========================================================================// x_ValidateStrand://// Check if the strand is consistent in SeqAlignment of global// or partial type.//===========================================================================void CValidError_align::x_ValidateStrand(const TDenseg& denseg, const CSeq_align& align){ if ( !denseg.IsSetStrands() ) { return; } size_t dim = denseg.GetDim(); size_t numseg = denseg.GetNumseg(); const CDense_seg::TStrands& strands = denseg.GetStrands(); // go through id for each alignment sequence for ( size_t id = 0; id < dim; ++id ) { ENa_strand strand1 = strands[id]; for ( size_t seg = 0; seg < numseg; ++seg ) { ENa_strand strand2 = strands[id + (seg * dim)]; // skip undefined strand if ( strand2 == eNa_strand_unknown || strand2 == eNa_strand_other ) { continue; } if ( strand1 == eNa_strand_unknown || strand1 == eNa_strand_other ) { strand1 = strand2; continue; } // strands should be same for a given seq-id if ( strand1 != strand2 ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_StrandRev, "The strand labels for SeqId " + denseg.GetIds()[id]->AsFastaString() + " are inconsistent across the alignment. " "The first inconsistent region is the " + NStr::UIntToString(seg), align); break; } } }}void CValidError_align::x_ValidateStrand(const TPacked& packed, const CSeq_align& align){ if ( !packed.IsSetStrands() ) { return; } size_t dim = packed.GetDim(); const CPacked_seg::TPresent& present = packed.GetPresent(); const CPacked_seg::TStrands& strands = packed.GetStrands(); CPacked_seg::TStrands::const_iterator strand = strands.begin(); vector<ENa_strand> id_strands(dim, eNa_strand_unknown); Uchar mask = 0x01; size_t present_size = present.size(); for ( size_t i = 0; i < present_size; ++i ) { if ( present[i] == 0 ) { // no bits in this char continue; } for ( int shift = 7; shift >= 0; --shift ) { if ( present[i] & (mask << shift) ) { size_t id = i * 8 + 7 - shift; if ( *strand == eNa_strand_unknown || *strand == eNa_strand_other ) { ++strand; continue; } if ( id_strands[id] == eNa_strand_unknown || id_strands[id] == eNa_strand_other ) { id_strands[id] = *strand; ++strand; continue; } if ( id_strands[id] != *strand ) { CPacked_seg::TIds::const_iterator id_iter = packed.GetIds().begin(); for ( ; id; --id ) { ++id_iter; } PostErr(eDiag_Error, eErr_SEQ_ALIGN_StrandRev, "Strand for SeqId " + (*id_iter)->AsFastaString() + " are inconsistent across the alignment", align); } ++strand; } } }}void CValidError_align::x_ValidateStrand(const TStd& std_segs, const CSeq_align& align){ map< CConstRef<CSeq_id>, ENa_strand > strands; ITERATE ( TStd, stdseg, std_segs ) { ITERATE ( CStd_seg::TLoc, loc_iter, (*stdseg)->GetLoc() ) { const CSeq_loc& loc = **loc_iter; if ( !IsOneBioseq(loc, m_Scope) ) { // !!! should probably be an error continue; } CConstRef<CSeq_id> id(&GetId(loc, m_Scope)); ENa_strand strand = GetStrand(loc, m_Scope); if ( strand == eNa_strand_unknown || strand == eNa_strand_other ) { continue; } if ( strands[id] == eNa_strand_unknown || strands[id] == eNa_strand_other ) { strands[id] = strand; } if ( strands[id] != strand ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_StrandRev, "Strands for SeqId " + id->AsFastaString() + " are inconsistent across the alignment", align); } } }}//===========================================================================// x_ValidateFastaLike://// Check if an alignment is FASTA-like. // Alignment is FASTA-like if all gaps are at the end with dimensions > 2.//===========================================================================void CValidError_align::x_ValidateFastaLike(const TDenseg& denseg, const CSeq_align& align){ // check only global or partial type if ( (align.GetType() != CSeq_align::eType_global && align.GetType() != CSeq_align::eType_partial) || denseg.GetDim() <= 2 ) { return; } size_t dim = denseg.GetDim(); size_t numseg = denseg.GetNumseg(); vector<string> fasta_like; for ( size_t id = 0; id < dim; ++id ) { bool gap = false; const CDense_seg::TStarts& starts = denseg.GetStarts(); for ( size_t seg = 0; seg < numseg; ++ seg ) { // if start value is -1, set gap flag to true if ( starts[id + (dim * seg)] < 0 ) { gap = true; } else if ( gap ) { // if a positive start value is found after the initial -1 // start value, it's not fasta like. //no need to check this sequence further break; } if ( seg == numseg - 1 && gap ) { // if no more positive start value are found after the initial // -1 start value, it's fasta like fasta_like.push_back(denseg.GetIds()[id]->AsFastaString()); } } } if ( !fasta_like.empty() ) { string fasta_like_ids; bool first = true; ITERATE( vector<string>, idstr, fasta_like ) { if ( first ) { first = false; } else { fasta_like_ids += ", "; } fasta_like_ids += *idstr; } fasta_like_ids += "."; PostErr(eDiag_Error, eErr_SEQ_ALIGN_FastaLike, "This may be a fasta-like alignment for SeqIds: " + fasta_like_ids, align); }} void CValidError_align::x_ValidateFastaLike(const TPacked& packed, const CSeq_align& align){ // check only global or partial type if ( align.GetType() == CSeq_align::eType_global || align.GetType() == CSeq_align::eType_partial || packed.GetDim() <= 2 ) { return; } static Uchar bits[] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; size_t dim = packed.GetDim(); size_t numseg = packed.GetNumseg(); CPacked_seg::TIds::const_iterator id_iter = packed.GetIds().begin(); const CPacked_seg::TPresent& present = packed.GetPresent(); for ( size_t id = 0; id < dim; ++id, ++id_iter ) { bool gap = false; for ( size_t seg = 0; seg < numseg; ++ seg ) { // if a segment is not present, set gap flag to true size_t i = id + (dim * seg); if ( !(present[i / 8] & bits[i % 8]) ) { gap = true; } else if ( gap ) { // if a segment is found later, then it's not fasta like, // no need to check this sequence further. break; } if ( seg == numseg - 1 && gap ) { // if no more segments are found for this sequence, // then it's fasta like. PostErr(eDiag_Error, eErr_SEQ_ALIGN_FastaLike, "This may be a fasta-like alignment for SeqId: " + (*id_iter)->AsFastaString(), align); } } }}void CValidError_align::x_ValidateFastaLike(const TStd& std_segs, const CSeq_align& align){ // check only global or partial type if ( align.GetType() == CSeq_align::eType_global || align.GetType() == CSeq_align::eType_partial ) { return; } // suspected seq-ids typedef map< CConstRef<CSeq_id>, bool > TIdGapMap; TIdGapMap fasta_like; ITERATE ( TStd, stdseg, std_segs ) { ITERATE ( CStd_seg::TLoc, loc_iter, (*stdseg)->GetLoc() ) { const CSeq_loc& loc = **loc_iter; if ( !IsOneBioseq(loc, m_Scope) ) { // !!! should probably be an error continue; } CConstRef<CSeq_id> id(&GetId(loc, m_Scope)); bool gap = loc.IsEmpty() || loc.IsNull(); if ( gap && fasta_like.find(id) == fasta_like.end() ) { fasta_like[id] = true; } if ( !gap && fasta_like.find(id) != fasta_like.end() ) { fasta_like[id] = false; } } } ITERATE ( TIdGapMap, iter, fasta_like ) { if ( iter->second ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_FastaLike, "This may be a fasta-like alignment for SeqId: " + iter->first->AsFastaString(), align); } }}//===========================================================================// x_ValidateSegmentGap://// Check if there is a gap for all sequences in a segment.//===========================================================================void CValidError_align::x_ValidateSegmentGap(const TDenseg& denseg, const CSeq_align& align){ int numseg = denseg.GetNumseg(); int dim = denseg.GetDim(); const CDense_seg::TStarts& starts = denseg.GetStarts(); for ( int seg = 0; seg < numseg; ++seg ) { bool seggap = true; for ( int id = 0; id < dim; ++id ) { if ( starts[seg * dim + id] != -1 ) { seggap = false;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?