validerror_align.cpp

来自「ncbi源码」· C++ 代码 · 共 1,121 行 · 第 1/3 页

CPP
1,121
字号
                break;            }        }        if ( seggap ) {            // no sequence is present in this segment            PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegmentGap,                "Segment " + NStr::UIntToString(seg) + " contains only gaps.",                align);        }    }}void CValidError_align::x_ValidateSegmentGap(const TPacked& packed, const CSeq_align& align){    static Uchar bits[] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };    size_t numseg = packed.GetNumseg();    size_t dim = packed.GetDim();    const CPacked_seg::TPresent& present = packed.GetPresent();    for ( size_t seg = 0; seg < numseg;  ++seg) {        size_t id = 0;        for ( ; id < dim; ++id ) {            size_t i = id + (dim * seg);            if ( (present[i / 8] & bits[i % 8]) ) {                break;            }        }        if ( id == dim ) {            // no sequence is present in this segment            PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegmentGap,                "Segment " + NStr::UIntToString(seg) + "contains only gaps.",                align);        }    }       }void CValidError_align::x_ValidateSegmentGap(const TStd& std_segs, const CSeq_align& align){    size_t seg = 0;    ITERATE ( TStd, stdseg, std_segs ) {        bool gap = true;        ITERATE ( CStd_seg::TLoc, loc, (*stdseg)->GetLoc() ) {            if ( !(*loc)->IsEmpty()  ||  !(*loc)->IsEmpty() ) {                gap = false;                break;            }        }        if ( gap ) {            // no sequence is present in this segment            PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegmentGap,                "Segment " + NStr::UIntToString(seg) + "contains only gaps.",                align);        }        ++seg;    }}//===========================================================================// x_ValidateSeqIdInSeqAlign:////  Validate SeqId in sequence alignment.//===========================================================================void CValidError_align::x_ValidateSeqId(const CSeq_align& align){    vector< CRef< CSeq_id > > ids;    x_GetIds(align, ids);    ITERATE( vector< CRef< CSeq_id > >, id_iter, ids ) {        const CSeq_id& id = **id_iter;        if ( id.IsLocal() ) {            if ( !m_Scope->GetBioseqHandle(id) ) {                PostErr(eDiag_Error, eErr_SEQ_ALIGN_SeqIdProblem,                    "The sequence corresponding to SeqId " +                     id.AsFastaString() + " could not be found.",                    align);            }        }    }}void CValidError_align::x_GetIds(const CSeq_align& align, vector< CRef< CSeq_id > >& ids){    ids.clear();    switch ( align.GetSegs().Which() ) {    case CSeq_align::C_Segs::e_Dendiag:        ITERATE( TDendiag, diag_seg, align.GetSegs().GetDendiag() ) {            const vector< CRef< CSeq_id > >& diag_ids = (*diag_seg)->GetIds();            copy(diag_ids.begin(), diag_ids.end(), back_inserter(ids));        }        break;            case CSeq_align::C_Segs::e_Denseg:        ids = align.GetSegs().GetDenseg().GetIds();        break;            case CSeq_align::C_Segs::e_Packed:        copy(align.GetSegs().GetPacked().GetIds().begin(),             align.GetSegs().GetPacked().GetIds().end(),             back_inserter(ids));        break;            case CSeq_align::C_Segs::e_Std:        ITERATE( TStd, std_seg, align.GetSegs().GetStd() ) {            ITERATE( CStd_seg::TLoc, loc, (*std_seg)->GetLoc() ) {                CSeq_id* idp = const_cast<CSeq_id*>(&GetId(**loc, m_Scope));                CRef<CSeq_id> ref(idp);                ids.push_back(ref);            }        }        break;                default:        break;    }}//===========================================================================// x_ValidateSeqLength:////  Check segment length, start and end point in Dense_diag, Dense_seg,  //  Packed_seg and Std_seg.//===========================================================================// Make sure that, in Dense_diag alignment, segment length is not greater// than Bioseq lengthvoid CValidError_align::x_ValidateSeqLength(const CDense_diag& dendiag, size_t dendiag_num, const CSeq_align& align){    size_t dim = dendiag.GetDim();    TSeqPos len = dendiag.GetLen();    const CDense_diag::TIds& ids = dendiag.GetIds();        CDense_diag::TStarts::const_iterator starts_iter =             dendiag.GetStarts().begin();        for ( size_t id = 0; id < dim; ++id ) {        TSeqPos bslen = GetLength(*(ids[id]), m_Scope);        TSeqPos start = *starts_iter;        // verify start        if ( start > bslen ) {            PostErr(eDiag_Error, eErr_SEQ_ALIGN_StartMorethanBiolen,                    "Start (" + NStr::UIntToString(start) +                    ") exceeds bioseq length (" +                    NStr::UIntToString(bslen) +                    ") for seq-id " + ids[id]->AsFastaString() +                    "in dendiag " + NStr::UIntToString(dendiag_num),                    align);        }                // verify length        if ( start + len > bslen ) {            PostErr(eDiag_Error, eErr_SEQ_ALIGN_SumLenStart,                    "Start + length (" + NStr::UIntToString(start + len) +                    ") exceeds bioseq length (" +                    NStr::UIntToString(bslen) +                    ") for seq-id " + ids[id]->AsFastaString() +                    "in dendiag " + NStr::UIntToString(dendiag_num),                    align);        }        ++starts_iter;    }}        void CValidError_align::x_ValidateSeqLength(const TDenseg& denseg, const CSeq_align& align){    int dim     = denseg.GetDim();    int numseg  = denseg.GetNumseg();    const CDense_seg::TIds& ids       = denseg.GetIds();    const CDense_seg::TStarts& starts = denseg.GetStarts();    const CDense_seg::TLens& lens      = denseg.GetLens();    bool minus = false;    for ( int id = 0; id < dim; ++id ) {        TSeqPos bslen = GetLength(*(ids[id]), m_Scope);        minus = denseg.IsSetStrands()  &&            denseg.GetStrands()[id] == eNa_strand_minus;                for ( int seg = 0; seg < numseg; ++seg ) {            size_t curr_index =                 id + (minus ? numseg - seg - 1 : seg) * dim;            // no need to verify if segment is not present            if ( starts[curr_index] == -1 ) {                continue;            }            size_t lens_index = minus ? numseg - seg - 1 : seg;            // verify that start plus segment does not exceed total bioseq len            if ( starts[curr_index] + lens[lens_index] > bslen ) {                PostErr(eDiag_Error, eErr_SEQ_ALIGN_SumLenStart,                    "Start + segment length (" +                     NStr::UIntToString(starts[curr_index] + lens[lens_index]) +                    ") exceeds bioseq length (" +                    NStr::UIntToString(bslen) + ")", align);            }            // find the next segment that is present            size_t next_index = curr_index;            int next_seg;            for ( next_seg = seg + 1; next_seg < numseg; ++next_seg ) {                next_index =                     id + (minus ? numseg - next_seg - 1 : next_seg) * dim;                                if ( starts[next_index] != -1 ) {                    break;                }            }            if ( next_seg == numseg  ||  next_index == curr_index ) {                continue;            }            // length plus start should be equal to the closest next             // start that is not -1            if ( starts[curr_index] + (TSignedSeqPos)lens[lens_index] !=                starts[next_index] ) {                PostErr(eDiag_Error, eErr_SEQ_ALIGN_DensegLenStart,                    "Start + segment length (" +                     NStr::UIntToString(starts[curr_index] + lens[lens_index]) +                    ") exceeds next start (" +                    NStr::UIntToString(starts[next_index]) + ")", align);            }        }    }}void CValidError_align::x_ValidateSeqLength(const TPacked& packed, const CSeq_align& align){    static Uchar bits[] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };    size_t dim = packed.GetDim();    size_t numseg = packed.GetNumseg();    //CPacked_seg::TStarts::const_iterator start =    //    packed.GetStarts().begin();    const CPacked_seg::TPresent& present = packed.GetPresent();    for ( size_t id = 0; id < dim; ++id ) {        for ( size_t seg = 0; seg < numseg; ++seg ) {            size_t i = id + seg * dim;            if ( (present[i / 8] & bits[i % 8]) ) {                // !!!            }        }    }}void CValidError_align::x_ValidateSeqLength(const TStd& std_segs, const CSeq_align& align){    ITERATE( TStd, iter, std_segs ) {        const CStd_seg& stdseg = **iter;        ITERATE ( CStd_seg::TLoc, loc_iter, stdseg.GetLoc() ) {            const CSeq_loc& loc = **loc_iter;                if ( loc.IsWhole()  || loc.IsEmpty()  ||  loc.IsNull() ) {                continue;            }            if ( !IsOneBioseq(loc, m_Scope) ) {                continue;            }            TSeqPos from = loc.GetTotalRange().GetFrom();            TSeqPos to   = loc.GetTotalRange().GetTo();            TSeqPos loclen = GetLength( loc, m_Scope);            TSeqPos bslen = GetLength(GetId(loc, m_Scope), m_Scope);            string  bslen_str = NStr::UIntToString(bslen);            string label;            loc.GetLabel(&label);            if ( from > bslen - 1 ) {                 PostErr(eDiag_Error, eErr_SEQ_ALIGN_StartMorethanBiolen,                    "Loaction: " + label + ". From (" +                     NStr::UIntToString(from) +                     ") is more than bioseq length (" + bslen_str + ")",                     align);            }            if ( to > bslen - 1 ) {                 PostErr(eDiag_Error, eErr_SEQ_ALIGN_EndMorethanBiolen,                    "Loaction: " + label + ". To (" + NStr::UIntToString(to) +                    ") is more than bioseq length (" + bslen_str + ")", align);            }            if ( loclen > bslen ) {                PostErr(eDiag_Error, eErr_SEQ_ALIGN_LenMorethanBiolen,                    "Loaction: " + label + ". Length (" +                     NStr::UIntToString(loclen) +                     ") is more than bioseq length (" + bslen_str + ")", align);            }        }    }}END_SCOPE(validator)END_SCOPE(objects)END_NCBI_SCOPE/** ===========================================================================** $Log: validerror_align.cpp,v $* Revision 1000.1  2004/06/01 19:47:45  gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.8** Revision 1.8  2004/05/21 21:42:56  gorelenk* Added PCH ncbi_pch.hpp** Revision 1.7  2003/09/30 18:30:38  shomrat* changed unsigned to signed to eliminate infinite loop** Revision 1.6  2003/06/02 16:06:43  dicuccio* Rearranged src/objects/ subtree.  This includes the following shifts:*     - src/objects/asn2asn --> arc/app/asn2asn*     - src/objects/testmedline --> src/objects/ncbimime/test*     - src/objects/objmgr --> src/objmgr*     - src/objects/util --> src/objmgr/util*     - src/objects/alnmgr --> src/objtools/alnmgr*     - src/objects/flat --> src/objtools/flat*     - src/objects/validator --> src/objtools/validator*     - src/objects/cddalignview --> src/objtools/cddalignview* In addition, libseq now includes six of the objects/seq... libs, and libmmdb* replaces the three libmmdb? libs.** Revision 1.5  2003/05/28 16:24:40  shomrat* Report a single FastaLike error from each alignment** Revision 1.4  2003/04/29 14:58:07  shomrat* Implemented SeqAlign validation** Revision 1.3  2003/03/31 14:40:52  shomrat* $id: -> $id$** Revision 1.2  2002/12/24 16:52:42  shomrat* Changes to include directives** Revision 1.1  2002/12/23 20:15:59  shomrat* Initial submission after splitting former implementation*** ===========================================================================*/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?