seq_loc_mapper.cpp

来自「ncbi源码」· C++ 代码 · 共 1,895 行 · 第 1/5 页

CPP
1,895
字号
                for (size_t i = 0; i < (*std_seg)->GetIds().size(); ++i) {                    if ((*std_seg)->GetIds()[i]->Equals(to_id)) {                        to_row = i;                        break;                    }                }                if (to_row == size_t(-1)) {                    NCBI_THROW(CLocMapperException, eBadAlignment,                               "Target ID not found in the alignment");                }                x_InitAlign(**std_seg, to_row);            }            break;        }    case CSeq_align::C_Segs::e_Packed:        {            const CPacked_seg& pseg = map_align.GetSegs().GetPacked();            size_t to_row = size_t(-1);            for (size_t i = 0; i < pseg.GetIds().size(); ++i) {                if (pseg.GetIds()[i]->Equals(to_id)) {                    to_row = i;                    break;                }            }            if (to_row == size_t(-1)) {                NCBI_THROW(CLocMapperException, eBadAlignment,                           "Target ID not found in the alignment");            }            x_InitAlign(pseg, to_row);            break;        }    case CSeq_align::C_Segs::e_Disc:        {            const CSeq_align_set& aln_set = map_align.GetSegs().GetDisc();            ITERATE(CSeq_align_set::Tdata, aln, aln_set.Get()) {                x_Initialize(**aln, to_id);            }            break;        }    default:        NCBI_THROW(CLocMapperException, eBadAlignment,                   "Unsupported alignment type");    }}void CSeq_loc_Mapper::x_Initialize(const CSeq_align& map_align,                                   size_t            to_row){    switch ( map_align.GetSegs().Which() ) {    case CSeq_align::C_Segs::e_Dendiag:        {            const TDendiag& diags = map_align.GetSegs().GetDendiag();            ITERATE(TDendiag, diag_it, diags) {                x_InitAlign(**diag_it, to_row);            }            break;        }    case CSeq_align::C_Segs::e_Denseg:        {            const CDense_seg& dseg = map_align.GetSegs().GetDenseg();            x_InitAlign(dseg, to_row);            break;        }    case CSeq_align::C_Segs::e_Std:        {            const TStd& std_segs = map_align.GetSegs().GetStd();            ITERATE(TStd, std_seg, std_segs) {                x_InitAlign(**std_seg, to_row);            }            break;        }    case CSeq_align::C_Segs::e_Packed:        {            const CPacked_seg& pseg = map_align.GetSegs().GetPacked();            x_InitAlign(pseg, to_row);            break;        }    case CSeq_align::C_Segs::e_Disc:        {            // The same row in each sub-alignment?            const CSeq_align_set& aln_set = map_align.GetSegs().GetDisc();            ITERATE(CSeq_align_set::Tdata, aln, aln_set.Get()) {                x_Initialize(**aln, to_row);            }            break;        }    default:        NCBI_THROW(CLocMapperException, eBadAlignment,                   "Unsupported alignment type");    }}void CSeq_loc_Mapper::x_InitAlign(const CDense_diag& diag, size_t to_row){    if (diag.GetStarts()[to_row] < 0) {        // Destination ID is not present in this dense-diag        return;    }    size_t dim = diag.GetDim();    _ASSERT(to_row < dim);    if (dim != diag.GetIds().size()) {        ERR_POST(Warning << "Invalid 'ids' size in dendiag");        dim = min(dim, diag.GetIds().size());    }    if (dim != diag.GetStarts().size()) {        ERR_POST(Warning << "Invalid 'starts' size in dendiag");        dim = min(dim, diag.GetStarts().size());    }    bool have_strands = diag.IsSetStrands();    if (have_strands && dim != diag.GetStrands().size()) {        ERR_POST(Warning << "Invalid 'strands' size in dendiag");        dim = min(dim, diag.GetStrands().size());    }    ENa_strand dst_strand = have_strands ?        diag.GetStrands()[to_row] : eNa_strand_unknown;    const CSeq_id& dst_id = *diag.GetIds()[to_row];    if (!m_Dst_width) {        m_Dst_width = x_CheckSeqWidth(dst_id, m_Dst_width);    }    for (size_t row = 0; row < dim; ++row) {        if (row == to_row) {            continue;        }        if (diag.GetStarts()[row] < 0) {            continue;        }        // In alignments with multiple sequence types segment length        // should be multiplied by character width.        int src_width = 0;        const CSeq_id& src_id = *diag.GetIds()[row];        src_width = x_CheckSeqWidth(src_id, src_width);        m_UseWidth |= (src_width != m_Dst_width);        int dst_width_rel = (m_UseWidth) ? 1 : m_Dst_width;        int src_width_rel = (m_UseWidth) ? 1 : src_width;        TSeqPos src_start = diag.GetStarts()[row]*dst_width_rel;        TSeqPos src_len = diag.GetLen()*src_width*dst_width_rel;        TSeqPos dst_start = diag.GetStarts()[to_row]*src_width_rel;        TSeqPos dst_len = diag.GetLen()*m_Dst_width*src_width_rel;        ENa_strand src_strand = have_strands ?            diag.GetStrands()[row] : eNa_strand_unknown;        x_NextMappingRange(            src_id, src_start, src_len, src_strand,            dst_id, dst_start, dst_len, dst_strand);        _ASSERT(!src_len  &&  !dst_len);    }}void CSeq_loc_Mapper::x_InitAlign(const CDense_seg& denseg, size_t to_row){    size_t dim = denseg.GetDim();    _ASSERT(to_row < dim);    size_t numseg = denseg.GetNumseg();    // claimed dimension may not be accurate :-/    if (numseg != denseg.GetLens().size()) {        ERR_POST(Warning << "Invalid 'lens' size in denseg");        numseg = min(numseg, denseg.GetLens().size());    }    if (dim != denseg.GetIds().size()) {        ERR_POST(Warning << "Invalid 'ids' size in denseg");        dim = min(dim, denseg.GetIds().size());    }    if (dim*numseg != denseg.GetStarts().size()) {        ERR_POST(Warning << "Invalid 'starts' size in denseg");        dim = min(dim*numseg, denseg.GetStarts().size()) / numseg;    }    bool have_strands = denseg.IsSetStrands();    if (have_strands && dim*numseg != denseg.GetStrands().size()) {        ERR_POST(Warning << "Invalid 'strands' size in denseg");        dim = min(dim*numseg, denseg.GetStrands().size()) / numseg;    }    const CSeq_id& dst_id = *denseg.GetIds()[to_row];    if (!m_Dst_width) {        if ( denseg.IsSetWidths() ) {            m_Dst_width = denseg.GetWidths()[to_row];        }        else {            m_Dst_width = x_CheckSeqWidth(dst_id, m_Dst_width);        }    }    for (size_t row = 0; row < dim; ++row) {        if (row == to_row) {            continue;        }        const CSeq_id& src_id = *denseg.GetIds()[row];        int src_width = 0;        if ( denseg.IsSetWidths() ) {            src_width = denseg.GetWidths()[row];        }        else {            src_width = x_CheckSeqWidth(src_id, src_width);        }        m_UseWidth |= (src_width != m_Dst_width);        int dst_width_rel = (m_UseWidth) ? 1 : m_Dst_width;        int src_width_rel = (m_UseWidth) ? 1 : src_width;        for (size_t seg = 0; seg < numseg; ++seg) {            int i_src_start = denseg.GetStarts()[seg*dim + row];            int i_dst_start = denseg.GetStarts()[seg*dim + to_row];            if (i_src_start < 0  ||  i_dst_start < 0) {                continue;            }            ENa_strand dst_strand = have_strands ?                denseg.GetStrands()[seg*dim + to_row] : eNa_strand_unknown;            ENa_strand src_strand = have_strands ?                denseg.GetStrands()[seg*dim + row] : eNa_strand_unknown;            // In alignments with multiple sequence types segment length            // should be multiplied by character width.            TSeqPos src_len = denseg.GetLens()[seg]*src_width*dst_width_rel;            TSeqPos dst_len = denseg.GetLens()[seg]*m_Dst_width*src_width_rel;            TSeqPos src_start = (TSeqPos)(i_src_start)*dst_width_rel;            TSeqPos dst_start = (TSeqPos)(i_dst_start)*src_width_rel;            x_NextMappingRange(                src_id, src_start, src_len, src_strand,                dst_id, dst_start, dst_len, dst_strand);            _ASSERT(!src_len  &&  !dst_len);        }    }}void CSeq_loc_Mapper::x_InitAlign(const CStd_seg& sseg, size_t to_row){    size_t dim = sseg.GetDim();    if (dim != sseg.GetLoc().size()) {        ERR_POST(Warning << "Invalid 'loc' size in std-seg");        dim = min(dim, sseg.GetLoc().size());    }    if (sseg.IsSetIds()        && dim != sseg.GetIds().size()) {        ERR_POST(Warning << "Invalid 'ids' size in std-seg");        dim = min(dim, sseg.GetIds().size());    }    const CSeq_loc& dst_loc = *sseg.GetLoc()[to_row];    for (size_t row = 0; row < dim; ++row ) {        if (row == to_row) {            continue;        }        const CSeq_loc& src_loc = *sseg.GetLoc()[row];        if ( src_loc.IsEmpty() ) {            // skipped row in this segment            continue;        }        x_Initialize(src_loc, dst_loc);    }}void CSeq_loc_Mapper::x_InitAlign(const CPacked_seg& pseg, size_t to_row){    size_t dim    = pseg.GetDim();    size_t numseg = pseg.GetNumseg();    // claimed dimension may not be accurate :-/    if (numseg != pseg.GetLens().size()) {        ERR_POST(Warning << "Invalid 'lens' size in packed-seg");        numseg = min(numseg, pseg.GetLens().size());    }    if (dim != pseg.GetIds().size()) {        ERR_POST(Warning << "Invalid 'ids' size in packed-seg");        dim = min(dim, pseg.GetIds().size());    }    if (dim*numseg != pseg.GetStarts().size()) {        ERR_POST(Warning << "Invalid 'starts' size in packed-seg");        dim = min(dim*numseg, pseg.GetStarts().size()) / numseg;    }    bool have_strands = pseg.IsSetStrands();    if (have_strands && dim*numseg != pseg.GetStrands().size()) {        ERR_POST(Warning << "Invalid 'strands' size in packed-seg");        dim = min(dim*numseg, pseg.GetStrands().size()) / numseg;    }    const CSeq_id& dst_id = *pseg.GetIds()[to_row];    if (!m_Dst_width) {        m_Dst_width = x_CheckSeqWidth(dst_id, m_Dst_width);    }    for (size_t row = 0; row < dim; ++row) {        if (row == to_row) {            continue;        }        const CSeq_id& src_id = *pseg.GetIds()[row];        int src_width = 0;        src_width = x_CheckSeqWidth(src_id, src_width);        m_UseWidth |= (src_width != m_Dst_width);        int dst_width_rel = (m_UseWidth) ? 1 : m_Dst_width;        int src_width_rel = (m_UseWidth) ? 1 : src_width;        for (size_t seg = 0; seg < numseg; ++seg) {            if (!pseg.GetPresent()[row]  ||  !pseg.GetPresent()[to_row]) {                continue;            }            ENa_strand dst_strand = have_strands ?                pseg.GetStrands()[seg*dim + to_row] : eNa_strand_unknown;            ENa_strand src_strand = have_strands ?                pseg.GetStrands()[seg*dim + row] : eNa_strand_unknown;            // In alignments with multiple sequence types segment length            // should be multiplied by character width.            TSeqPos src_len = pseg.GetLens()[seg]*src_width*dst_width_rel;            TSeqPos dst_len = pseg.GetLens()[seg]*m_Dst_width*src_width_rel;            TSeqPos src_start = pseg.GetStarts()[seg*dim + row]*dst_width_rel;            TSeqPos dst_start = pseg.GetStarts()[seg*dim + to_row]*src_width_rel;            x_NextMappingRange(                src_id, src_start, src_len, src_strand,                dst_id, dst_start, dst_len, dst_strand);            _ASSERT(!src_len  &&  !dst_len);        }    }}void CSeq_loc_Mapper::x_Initialize(const CSeqMap& seq_map,                                   const CSeq_id* top_id){    CSeqMap::const_iterator seg_it =        seq_map.begin_resolved(m_Scope.GetPointerOrNull(), size_t(-1),        CSeqMap::fFindRef);    TSeqPos top_start = kInvalidSeqPos;    TSeqPos top_stop = kInvalidSeqPos;    TSeqPos dst_seg_start = kInvalidSeqPos;    CConstRef<CSeq_id> dst_id;    for ( ; seg_it; ++seg_it) {        _ASSERT(seg_it.GetType() == CSeqMap::eSeqRef);        if (seg_it.GetPosition() > top_stop  ||  !dst_id) {            // New top-level segment            top_start = seg_it.GetPosition();            top_stop = seg_it.GetEndPosition() - 1;            if (top_id) {                // Top level is a bioseq                dst_id.Reset(top_id);                dst_seg_start = top_start;            }            else {                // Top level is a seq-loc, positions are                // on the first-level references                dst_id = seg_it.GetRefSeqid().GetSeqId();                dst_seg_start = seg_it.GetRefPosition();                continue;            }        }        // when top_id is set, destination position = GetPosition(),        // else it needs to be calculated from top_start/stop and dst_start/stop.        TSeqPos dst_from = dst_seg_start + seg_it.GetPosition() - top_start;        _ASSERT(dst_from >= dst_seg_start);        TSeqPos dst_len = seg_it.GetLength();        CConstRef<CSeq_id> src_id(seg_it.GetRefSeqid().GetSeqId());        TSeqPos src_from = seg_it.GetRefPosition();        TSeqPos src_len = dst_len;        ENa_strand src_strand = seg_it.GetRefMinusStrand() ?            eNa_strand_minus : eNa_strand_unknown;        x_NextMappingRange(*src_id, src_from, src_len, src_strand,            *dst_id, dst_from, dst_len, eNa_strand_unknown);        _ASSERT(src_len == 0  &&  dst_len == 0);    }}void CSeq_loc_Mapper::x_Initialize(const CSeqMap& seq_map,                                   size_t         depth,                                   const CSeq_id* top_id){    CSeqMap::const_iterator seg_it =        seq_map.begin_resolved(m_Scope.GetPointerOrNull(), depth,        CSeqMap::fFindRef);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?