validerror_align.cpp
来自「ncbi源码」· C++ 代码 · 共 1,121 行 · 第 1/3 页
CPP
1,121 行
break; } } if ( seggap ) { // no sequence is present in this segment PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegmentGap, "Segment " + NStr::UIntToString(seg) + " contains only gaps.", align); } }}void CValidError_align::x_ValidateSegmentGap(const TPacked& packed, const CSeq_align& align){ static Uchar bits[] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; size_t numseg = packed.GetNumseg(); size_t dim = packed.GetDim(); const CPacked_seg::TPresent& present = packed.GetPresent(); for ( size_t seg = 0; seg < numseg; ++seg) { size_t id = 0; for ( ; id < dim; ++id ) { size_t i = id + (dim * seg); if ( (present[i / 8] & bits[i % 8]) ) { break; } } if ( id == dim ) { // no sequence is present in this segment PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegmentGap, "Segment " + NStr::UIntToString(seg) + "contains only gaps.", align); } } }void CValidError_align::x_ValidateSegmentGap(const TStd& std_segs, const CSeq_align& align){ size_t seg = 0; ITERATE ( TStd, stdseg, std_segs ) { bool gap = true; ITERATE ( CStd_seg::TLoc, loc, (*stdseg)->GetLoc() ) { if ( !(*loc)->IsEmpty() || !(*loc)->IsEmpty() ) { gap = false; break; } } if ( gap ) { // no sequence is present in this segment PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegmentGap, "Segment " + NStr::UIntToString(seg) + "contains only gaps.", align); } ++seg; }}//===========================================================================// x_ValidateSeqIdInSeqAlign://// Validate SeqId in sequence alignment.//===========================================================================void CValidError_align::x_ValidateSeqId(const CSeq_align& align){ vector< CRef< CSeq_id > > ids; x_GetIds(align, ids); ITERATE( vector< CRef< CSeq_id > >, id_iter, ids ) { const CSeq_id& id = **id_iter; if ( id.IsLocal() ) { if ( !m_Scope->GetBioseqHandle(id) ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_SeqIdProblem, "The sequence corresponding to SeqId " + id.AsFastaString() + " could not be found.", align); } } }}void CValidError_align::x_GetIds(const CSeq_align& align, vector< CRef< CSeq_id > >& ids){ ids.clear(); switch ( align.GetSegs().Which() ) { case CSeq_align::C_Segs::e_Dendiag: ITERATE( TDendiag, diag_seg, align.GetSegs().GetDendiag() ) { const vector< CRef< CSeq_id > >& diag_ids = (*diag_seg)->GetIds(); copy(diag_ids.begin(), diag_ids.end(), back_inserter(ids)); } break; case CSeq_align::C_Segs::e_Denseg: ids = align.GetSegs().GetDenseg().GetIds(); break; case CSeq_align::C_Segs::e_Packed: copy(align.GetSegs().GetPacked().GetIds().begin(), align.GetSegs().GetPacked().GetIds().end(), back_inserter(ids)); break; case CSeq_align::C_Segs::e_Std: ITERATE( TStd, std_seg, align.GetSegs().GetStd() ) { ITERATE( CStd_seg::TLoc, loc, (*std_seg)->GetLoc() ) { CSeq_id* idp = const_cast<CSeq_id*>(&GetId(**loc, m_Scope)); CRef<CSeq_id> ref(idp); ids.push_back(ref); } } break; default: break; }}//===========================================================================// x_ValidateSeqLength://// Check segment length, start and end point in Dense_diag, Dense_seg, // Packed_seg and Std_seg.//===========================================================================// Make sure that, in Dense_diag alignment, segment length is not greater// than Bioseq lengthvoid CValidError_align::x_ValidateSeqLength(const CDense_diag& dendiag, size_t dendiag_num, const CSeq_align& align){ size_t dim = dendiag.GetDim(); TSeqPos len = dendiag.GetLen(); const CDense_diag::TIds& ids = dendiag.GetIds(); CDense_diag::TStarts::const_iterator starts_iter = dendiag.GetStarts().begin(); for ( size_t id = 0; id < dim; ++id ) { TSeqPos bslen = GetLength(*(ids[id]), m_Scope); TSeqPos start = *starts_iter; // verify start if ( start > bslen ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_StartMorethanBiolen, "Start (" + NStr::UIntToString(start) + ") exceeds bioseq length (" + NStr::UIntToString(bslen) + ") for seq-id " + ids[id]->AsFastaString() + "in dendiag " + NStr::UIntToString(dendiag_num), align); } // verify length if ( start + len > bslen ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_SumLenStart, "Start + length (" + NStr::UIntToString(start + len) + ") exceeds bioseq length (" + NStr::UIntToString(bslen) + ") for seq-id " + ids[id]->AsFastaString() + "in dendiag " + NStr::UIntToString(dendiag_num), align); } ++starts_iter; }} void CValidError_align::x_ValidateSeqLength(const TDenseg& denseg, const CSeq_align& align){ int dim = denseg.GetDim(); int numseg = denseg.GetNumseg(); const CDense_seg::TIds& ids = denseg.GetIds(); const CDense_seg::TStarts& starts = denseg.GetStarts(); const CDense_seg::TLens& lens = denseg.GetLens(); bool minus = false; for ( int id = 0; id < dim; ++id ) { TSeqPos bslen = GetLength(*(ids[id]), m_Scope); minus = denseg.IsSetStrands() && denseg.GetStrands()[id] == eNa_strand_minus; for ( int seg = 0; seg < numseg; ++seg ) { size_t curr_index = id + (minus ? numseg - seg - 1 : seg) * dim; // no need to verify if segment is not present if ( starts[curr_index] == -1 ) { continue; } size_t lens_index = minus ? numseg - seg - 1 : seg; // verify that start plus segment does not exceed total bioseq len if ( starts[curr_index] + lens[lens_index] > bslen ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_SumLenStart, "Start + segment length (" + NStr::UIntToString(starts[curr_index] + lens[lens_index]) + ") exceeds bioseq length (" + NStr::UIntToString(bslen) + ")", align); } // find the next segment that is present size_t next_index = curr_index; int next_seg; for ( next_seg = seg + 1; next_seg < numseg; ++next_seg ) { next_index = id + (minus ? numseg - next_seg - 1 : next_seg) * dim; if ( starts[next_index] != -1 ) { break; } } if ( next_seg == numseg || next_index == curr_index ) { continue; } // length plus start should be equal to the closest next // start that is not -1 if ( starts[curr_index] + (TSignedSeqPos)lens[lens_index] != starts[next_index] ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_DensegLenStart, "Start + segment length (" + NStr::UIntToString(starts[curr_index] + lens[lens_index]) + ") exceeds next start (" + NStr::UIntToString(starts[next_index]) + ")", align); } } }}void CValidError_align::x_ValidateSeqLength(const TPacked& packed, const CSeq_align& align){ static Uchar bits[] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; size_t dim = packed.GetDim(); size_t numseg = packed.GetNumseg(); //CPacked_seg::TStarts::const_iterator start = // packed.GetStarts().begin(); const CPacked_seg::TPresent& present = packed.GetPresent(); for ( size_t id = 0; id < dim; ++id ) { for ( size_t seg = 0; seg < numseg; ++seg ) { size_t i = id + seg * dim; if ( (present[i / 8] & bits[i % 8]) ) { // !!! } } }}void CValidError_align::x_ValidateSeqLength(const TStd& std_segs, const CSeq_align& align){ ITERATE( TStd, iter, std_segs ) { const CStd_seg& stdseg = **iter; ITERATE ( CStd_seg::TLoc, loc_iter, stdseg.GetLoc() ) { const CSeq_loc& loc = **loc_iter; if ( loc.IsWhole() || loc.IsEmpty() || loc.IsNull() ) { continue; } if ( !IsOneBioseq(loc, m_Scope) ) { continue; } TSeqPos from = loc.GetTotalRange().GetFrom(); TSeqPos to = loc.GetTotalRange().GetTo(); TSeqPos loclen = GetLength( loc, m_Scope); TSeqPos bslen = GetLength(GetId(loc, m_Scope), m_Scope); string bslen_str = NStr::UIntToString(bslen); string label; loc.GetLabel(&label); if ( from > bslen - 1 ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_StartMorethanBiolen, "Loaction: " + label + ". From (" + NStr::UIntToString(from) + ") is more than bioseq length (" + bslen_str + ")", align); } if ( to > bslen - 1 ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_EndMorethanBiolen, "Loaction: " + label + ". To (" + NStr::UIntToString(to) + ") is more than bioseq length (" + bslen_str + ")", align); } if ( loclen > bslen ) { PostErr(eDiag_Error, eErr_SEQ_ALIGN_LenMorethanBiolen, "Loaction: " + label + ". Length (" + NStr::UIntToString(loclen) + ") is more than bioseq length (" + bslen_str + ")", align); } } }}END_SCOPE(validator)END_SCOPE(objects)END_NCBI_SCOPE/** ===========================================================================** $Log: validerror_align.cpp,v $* Revision 1000.1 2004/06/01 19:47:45 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.8** Revision 1.8 2004/05/21 21:42:56 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.7 2003/09/30 18:30:38 shomrat* changed unsigned to signed to eliminate infinite loop** Revision 1.6 2003/06/02 16:06:43 dicuccio* Rearranged src/objects/ subtree. This includes the following shifts:* - src/objects/asn2asn --> arc/app/asn2asn* - src/objects/testmedline --> src/objects/ncbimime/test* - src/objects/objmgr --> src/objmgr* - src/objects/util --> src/objmgr/util* - src/objects/alnmgr --> src/objtools/alnmgr* - src/objects/flat --> src/objtools/flat* - src/objects/validator --> src/objtools/validator* - src/objects/cddalignview --> src/objtools/cddalignview* In addition, libseq now includes six of the objects/seq... libs, and libmmdb* replaces the three libmmdb? libs.** Revision 1.5 2003/05/28 16:24:40 shomrat* Report a single FastaLike error from each alignment** Revision 1.4 2003/04/29 14:58:07 shomrat* Implemented SeqAlign validation** Revision 1.3 2003/03/31 14:40:52 shomrat* $id: -> $id$** Revision 1.2 2002/12/24 16:52:42 shomrat* Changes to include directives** Revision 1.1 2002/12/23 20:15:59 shomrat* Initial submission after splitting former implementation*** ===========================================================================*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?