validerror_align.cpp

来自「ncbi源码」· C++ 代码 · 共 1,121 行 · 第 1/3 页

CPP
1,121
字号
        if ( stdseg.IsSetIds() ) {            if ( dim != stdseg.GetIds().size() ) {                PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegsDimMismatch,                    "Mismatch between specified dimension (" +                    NStr::UIntToString(dim) +                     ") and number of Seq-ids (" +                     NStr::UIntToString(stdseg.GetIds().size()) + ")",                    align);                cont = false;                continue;            }        }    }    if( cont ) {        x_ValidateStrand(std_segs, align);        x_ValidateFastaLike(std_segs, align);        x_ValidateSegmentGap(std_segs, align);                if ( m_Imp.IsRemoteFetch() ) {            x_ValidateSeqId(align);            x_ValidateSeqLength(std_segs, align);        }    }}template <typename T>bool CValidError_align::x_ValidateDim(T& obj, const CSeq_align& align, size_t part){    if ( !obj.IsSetDim() ) {        string msg = "Dim not set";        if ( part > 0 ) {            msg += " Part " + NStr::UIntToString(part);        }        PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegsInvalidDim, msg, align);        return false;    }    size_t dim = obj.GetDim();            if ( dim < 2 ) {        string msg = "Dimension (" + NStr::UIntToString(dim) +             ") must be at least 2.";        if ( part > 0 ) {            msg += " Part " + NStr::UIntToString(part);        }        PostErr(eDiag_Error, eErr_SEQ_ALIGN_SegsInvalidDim, msg, align);        return false;    }    return true;}//===========================================================================// x_ValidateStrand:////  Check if the  strand is consistent in SeqAlignment of global//  or partial type.//===========================================================================void CValidError_align::x_ValidateStrand(const TDenseg& denseg, const CSeq_align& align){    if ( !denseg.IsSetStrands() ) {        return;    }        size_t dim = denseg.GetDim();    size_t numseg = denseg.GetNumseg();    const CDense_seg::TStrands& strands = denseg.GetStrands();    // go through id for each alignment sequence    for ( size_t id = 0; id < dim; ++id ) {        ENa_strand strand1 = strands[id];                for ( size_t seg = 0; seg < numseg; ++seg ) {            ENa_strand strand2 = strands[id + (seg * dim)];            // skip undefined strand            if ( strand2 == eNa_strand_unknown  ||                 strand2 == eNa_strand_other ) {                continue;            }            if ( strand1 == eNa_strand_unknown  ||                 strand1 == eNa_strand_other ) {                strand1 = strand2;                continue;            }            // strands should be same for a given seq-id            if ( strand1 != strand2 ) {                PostErr(eDiag_Error, eErr_SEQ_ALIGN_StrandRev,                    "The strand labels for SeqId " +                     denseg.GetIds()[id]->AsFastaString() +                    " are inconsistent across the alignment. "                    "The first inconsistent region is the " +                     NStr::UIntToString(seg), align);                    break;            }        }    }}void CValidError_align::x_ValidateStrand(const TPacked& packed, const CSeq_align& align){    if ( !packed.IsSetStrands() ) {        return;    }    size_t dim = packed.GetDim();    const CPacked_seg::TPresent& present = packed.GetPresent();    const CPacked_seg::TStrands& strands = packed.GetStrands();    CPacked_seg::TStrands::const_iterator strand = strands.begin();        vector<ENa_strand> id_strands(dim, eNa_strand_unknown);    Uchar mask = 0x01;    size_t present_size = present.size();    for ( size_t i = 0; i < present_size; ++i ) {        if ( present[i] == 0 ) {   // no bits in this char            continue;        }        for ( int shift = 7; shift >= 0; --shift ) {            if ( present[i] & (mask << shift) ) {                size_t id = i * 8 + 7 - shift;                if ( *strand == eNa_strand_unknown  ||                       *strand == eNa_strand_other ) {                    ++strand;                    continue;                }                                if ( id_strands[id] == eNa_strand_unknown  ||                      id_strands[id] == eNa_strand_other ) {                    id_strands[id] = *strand;                    ++strand;                    continue;                }                                if ( id_strands[id] != *strand ) {                    CPacked_seg::TIds::const_iterator id_iter =                        packed.GetIds().begin();                    for ( ; id; --id ) {                        ++id_iter;                    }                                        PostErr(eDiag_Error, eErr_SEQ_ALIGN_StrandRev,                        "Strand for SeqId " + (*id_iter)->AsFastaString() +                        " are inconsistent across the alignment", align);                }                ++strand;            }        }    }}void CValidError_align::x_ValidateStrand(const TStd& std_segs, const CSeq_align& align){    map< CConstRef<CSeq_id>, ENa_strand > strands;    ITERATE ( TStd, stdseg, std_segs ) {        ITERATE ( CStd_seg::TLoc, loc_iter, (*stdseg)->GetLoc() ) {            const CSeq_loc& loc = **loc_iter;                        if ( !IsOneBioseq(loc, m_Scope) ) {                // !!! should probably be an error                continue;            }            CConstRef<CSeq_id> id(&GetId(loc, m_Scope));            ENa_strand strand = GetStrand(loc, m_Scope);            if ( strand == eNa_strand_unknown  ||                  strand == eNa_strand_other ) {                continue;            }            if ( strands[id] == eNa_strand_unknown  ||                 strands[id] == eNa_strand_other ) {                strands[id] = strand;            }            if ( strands[id] != strand ) {                PostErr(eDiag_Error, eErr_SEQ_ALIGN_StrandRev,                    "Strands for SeqId " + id->AsFastaString() +                     " are inconsistent across the alignment", align);            }        }    }}//===========================================================================// x_ValidateFastaLike:////  Check if an alignment is FASTA-like. //  Alignment is FASTA-like if all gaps are at the end with dimensions > 2.//===========================================================================void CValidError_align::x_ValidateFastaLike(const TDenseg& denseg, const CSeq_align& align){    // check only global or partial type    if ( (align.GetType() != CSeq_align::eType_global  &&         align.GetType() != CSeq_align::eType_partial) ||         denseg.GetDim() <= 2 ) {        return;    }    size_t dim = denseg.GetDim();    size_t numseg = denseg.GetNumseg();    vector<string> fasta_like;    for ( size_t id = 0; id < dim; ++id ) {        bool gap = false;                const CDense_seg::TStarts& starts = denseg.GetStarts();        for ( size_t seg = 0; seg < numseg; ++ seg ) {            // if start value is -1, set gap flag to true            if ( starts[id + (dim * seg)] < 0 ) {                gap = true;            } else if ( gap  ) {                // if a positive start value is found after the initial -1                 // start value, it's not fasta like.                 //no need to check this sequence further                break;            }             if ( seg == numseg - 1  &&  gap ) {                // if no more positive start value are found after the initial                // -1 start value, it's fasta like                fasta_like.push_back(denseg.GetIds()[id]->AsFastaString());            }        }    }    if ( !fasta_like.empty() ) {        string fasta_like_ids;        bool first = true;        ITERATE( vector<string>, idstr, fasta_like ) {            if ( first ) {                first = false;            } else {                fasta_like_ids += ", ";            }            fasta_like_ids += *idstr;        }        fasta_like_ids += ".";        PostErr(eDiag_Error, eErr_SEQ_ALIGN_FastaLike,            "This may be a fasta-like alignment for SeqIds: " +             fasta_like_ids, align);    }}           void CValidError_align::x_ValidateFastaLike(const TPacked& packed, const CSeq_align& align){    // check only global or partial type    if ( align.GetType() == CSeq_align::eType_global  ||         align.GetType() == CSeq_align::eType_partial ||         packed.GetDim() <= 2 ) {        return;    }    static Uchar bits[] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 };        size_t dim = packed.GetDim();    size_t numseg = packed.GetNumseg();    CPacked_seg::TIds::const_iterator id_iter = packed.GetIds().begin();    const CPacked_seg::TPresent& present = packed.GetPresent();    for ( size_t id = 0; id < dim;  ++id, ++id_iter ) {        bool gap = false;        for ( size_t seg = 0; seg < numseg; ++ seg ) {            // if a segment is not present, set gap flag to true            size_t i = id + (dim * seg);            if ( !(present[i / 8] & bits[i % 8]) ) {                gap = true;            } else if ( gap ) {                // if a segment is found later, then it's not fasta like,                 // no need to check this sequence further.                break;            }             if ( seg == numseg - 1  &&  gap ) {                // if no more segments are found for this sequence,                 // then it's  fasta like.                PostErr(eDiag_Error, eErr_SEQ_ALIGN_FastaLike,                    "This may be a fasta-like alignment for SeqId: " +                     (*id_iter)->AsFastaString(), align);            }        }    }}void CValidError_align::x_ValidateFastaLike(const TStd& std_segs, const CSeq_align& align){    // check only global or partial type    if ( align.GetType() == CSeq_align::eType_global  ||         align.GetType() == CSeq_align::eType_partial ) {        return;    }    // suspected seq-ids    typedef map< CConstRef<CSeq_id>, bool > TIdGapMap;    TIdGapMap fasta_like;    ITERATE ( TStd, stdseg, std_segs ) {        ITERATE ( CStd_seg::TLoc, loc_iter, (*stdseg)->GetLoc() ) {            const CSeq_loc& loc = **loc_iter;                        if ( !IsOneBioseq(loc, m_Scope) ) {                // !!! should probably be an error                continue;            }            CConstRef<CSeq_id> id(&GetId(loc, m_Scope));            bool gap = loc.IsEmpty()  ||  loc.IsNull();            if ( gap  &&  fasta_like.find(id) == fasta_like.end() ) {                fasta_like[id] = true;            }            if ( !gap  &&  fasta_like.find(id) != fasta_like.end() ) {                fasta_like[id] = false;            }        }    }        ITERATE ( TIdGapMap, iter, fasta_like ) {        if ( iter->second ) {            PostErr(eDiag_Error, eErr_SEQ_ALIGN_FastaLike,                "This may be a fasta-like alignment for SeqId: " +                 iter->first->AsFastaString(), align);        }    }}//===========================================================================// x_ValidateSegmentGap://// Check if there is a gap for all sequences in a segment.//===========================================================================void CValidError_align::x_ValidateSegmentGap(const TDenseg& denseg, const CSeq_align& align){    int numseg  = denseg.GetNumseg();    int dim     = denseg.GetDim();    const CDense_seg::TStarts& starts = denseg.GetStarts();    for ( int seg = 0; seg < numseg; ++seg ) {        bool seggap = true;        for ( int id = 0; id < dim; ++id ) {            if ( starts[seg * dim + id] != -1 ) {                seggap = false;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?