seq_align_mapper.cpp

来自「ncbi源码」· C++ 代码 · 共 1,238 行 · 第 1/3 页

CPP
1,238
字号
        return aln_row.m_Id;    }    // Sorted mappings    typedef vector< CRef<CMappingRange> > TSortedMappings;    TSortedMappings mappings;    CSeq_loc_Mapper::TRangeMap::iterator rg_it = rmap.begin();    for ( ; rg_it; ++rg_it) {        mappings.push_back(rg_it->second);    }    sort(mappings.begin(), mappings.end(), CMappingRangeRef_Less());    bool mapped = false;    CSeq_id_Handle dst_id;    EAlignFlags align_flags = eAlign_Normal;    int width_flag = mapper.m_UseWidth ?        mapper.m_Widths[aln_row.m_Id] : 0;    int dst_width = width_flag ? 3 : 1;    TSeqPos start = aln_row.m_Start;    if (width_flag & CSeq_loc_Mapper::fWidthProtToNuc) {        dst_width = 1;        if (aln_row.m_Width == 3) {            start *= 3;        }    }    else if (!width_flag  &&  aln_row.m_Width == 3) {        dst_width = 3;        start *= 3;    }    TSeqPos stop = start + seg.m_Len - 1;    TSeqPos left_shift = 0;    for (size_t map_idx = 0; map_idx < mappings.size(); ++map_idx) {        CRef<CMappingRange> mapping(mappings[map_idx]);        // Adjust mapping coordinates according to width        if (width_flag & CSeq_loc_Mapper::fWidthProtToNuc) {            if (width_flag & CSeq_loc_Mapper::fWidthNucToProt) {                // Copy mapping                mapping.Reset(new CMappingRange(*mappings[map_idx]));                mapping->m_Src_from *= 3;                mapping->m_Src_to = (mapping->m_Src_to + 1)*3 - 1;                mapping->m_Dst_from *= 3;            }        }        else if (!width_flag  &&  aln_row.m_Width == 3) {            // Copy mapping            mapping.Reset(new CMappingRange(*mappings[map_idx]));            mapping->m_Src_from *= 3;            mapping->m_Src_to = (mapping->m_Src_to + 1)*3 - 1;            mapping->m_Dst_from *= 3;        }        if (!mapping->CanMap(start, stop,            aln_row.m_IsSetStrand, aln_row.m_Strand)) {            // Mapping does not apply to this segment/row            continue;        }        // Check destination id        if ( dst_id ) {            if (mapping->m_Dst_id_Handle != dst_id) {                align_flags = eAlign_MultiId;            }        }        dst_id = mapping->m_Dst_id_Handle;        // At least part of the interval was converted. Calculate        // trimming coords, trim each row.        TSeqPos dl = mapping->m_Src_from <= start ?            0 : mapping->m_Src_from - start;        TSeqPos dr = mapping->m_Src_to >= stop ?            0 : stop - mapping->m_Src_to;        if (dl > 0) {            // Add segment for the skipped range            SAlignment_Segment& lseg = x_InsertSeg(seg_it,                dl, seg.m_Rows.size());            for (size_t r = 0; r < seg.m_Rows.size(); ++r) {                SAlignment_Segment::SAlignment_Row& lrow =                    lseg.CopyRow(r, seg.m_Rows[r]);                if (r == row) {                    lrow.m_Start = kInvalidSeqPos;                    lrow.m_Id = dst_id;                }                else if (lrow.m_Start != kInvalidSeqPos) {                    int width = seg.m_Rows[r].m_Width ?                        seg.m_Rows[r].m_Width : 1;                    if (lrow.SameStrand(aln_row)) {                        lrow.m_Start += left_shift/width;                    }                    else {                        lrow.m_Start +=                            (seg.m_Len - lseg.m_Len - left_shift)/width;                    }                }            }        }        start += dl;        left_shift += dl;        // At least part of the interval was converted.        SAlignment_Segment& mseg = x_InsertSeg(seg_it,            stop - dr - start + 1, seg.m_Rows.size());        if (!dl  &&  !dr) {            // copy scores if there's no truncation            mseg.SetScores(seg.m_Scores);        }        for (size_t r = 0; r < seg.m_Rows.size(); ++r) {            SAlignment_Segment::SAlignment_Row& mrow =                mseg.CopyRow(r, seg.m_Rows[r]);            if (r == row) {                // translate id and coords                CMappingRange::TRange mapped_rg =                    mapping->Map_Range(start, stop - dr);                ENa_strand dst_strand = eNa_strand_unknown;                mapping->Map_Strand(                    aln_row.m_IsSetStrand,                    aln_row.m_Strand,                    &dst_strand);                mrow.m_Id = mapping->m_Dst_id_Handle;                mrow.m_Start = mapped_rg.GetFrom()/dst_width;                mrow.m_IsSetStrand |= (dst_strand != eNa_strand_unknown);                mrow.m_Strand = dst_strand;                mrow.SetMapped();                mseg.m_HaveStrands |= mrow.m_IsSetStrand;            }            else {                if (mrow.m_Start != kInvalidSeqPos) {                    int width = seg.m_Rows[r].m_Width ?                        seg.m_Rows[r].m_Width : 1;                    if (mrow.SameStrand(aln_row)) {                        mrow.m_Start += left_shift/width;                    }                    else {                        mrow.m_Start +=                            (seg.m_Len - left_shift - mseg.m_Len)/width;                    }                }            }        }        left_shift += mseg.m_Len;        start += mseg.m_Len;        mapped = true;    }    if (align_flags == eAlign_MultiId  &&  m_AlignFlags == eAlign_Normal) {        m_AlignFlags = align_flags;    }    if ( !mapped ) {        // Do not erase the segment, just change the row ID and reset start        seg.m_Rows[row].m_Start = kInvalidSeqPos;        seg.m_Rows[row].m_Id = rmap.begin()->second->m_Dst_id_Handle;        seg.m_Rows[row].SetMapped();        return seg.m_Rows[row].m_Id;    }    if (start <= stop) {        // Add the remaining unmapped range        SAlignment_Segment& rseg = x_InsertSeg(seg_it,            stop - start + 1, seg.m_Rows.size());        for (size_t r = 0; r < seg.m_Rows.size(); ++r) {            SAlignment_Segment::SAlignment_Row& rrow =                rseg.CopyRow(r, seg.m_Rows[r]);            if (r == row) {                rrow.m_Start = kInvalidSeqPos;                rrow.m_Id = dst_id;            }            else if (rrow.m_Start != kInvalidSeqPos) {                int width = seg.m_Rows[r].m_Width ?                    seg.m_Rows[r].m_Width : 1;                if (rrow.SameStrand(aln_row)) {                    rrow.m_Start += left_shift/width;                }            }        }    }    m_Segs.erase(old_it);    return align_flags == eAlign_MultiId ? CSeq_id_Handle() : dst_id;}// Get mapped alignmentvoid CSeq_align_Mapper::x_GetDstDendiag(CRef<CSeq_align>& dst) const{    TDendiag& diags = dst->SetSegs().SetDendiag();    ITERATE(TSegments, seg_it, m_Segs) {        const SAlignment_Segment& seg = *seg_it;        CRef<CDense_diag> diag(new CDense_diag);        diag->SetDim(seg.m_Rows.size());        diag->SetLen(seg.m_Len);        ITERATE(SAlignment_Segment::TRows, row, seg.m_Rows) {            CRef<CSeq_id> id(new CSeq_id);            id.Reset(&const_cast<CSeq_id&>(*row->m_Id.GetSeqId()));            diag->SetIds().push_back(id);            diag->SetStarts().push_back(row->GetSegStart());            if (seg.m_HaveStrands) { // per-segment strands                diag->SetStrands().push_back(row->m_Strand);            }        }        if ( seg.m_Scores.size() ) {            diag->SetScores() = seg.m_Scores;        }        diags.push_back(diag);    }}void CSeq_align_Mapper::x_GetDstDenseg(CRef<CSeq_align>& dst) const{    CDense_seg& dseg = dst->SetSegs().SetDenseg();    dseg.SetDim(m_Segs.front().m_Rows.size());    dseg.SetNumseg(m_Segs.size());    if ( m_Segs.front().m_Scores.size() ) {        dseg.SetScores() = m_Segs.front().m_Scores;    }    ITERATE(SAlignment_Segment::TRows, row, m_Segs.front().m_Rows) {        CRef<CSeq_id> id(new CSeq_id);        id.Reset(&const_cast<CSeq_id&>(*row->m_Id.GetSeqId()));        dseg.SetIds().push_back(id);        if (m_HaveWidths) {            dseg.SetWidths().push_back(row->m_Width);        }    }    ITERATE(TSegments, seg_it, m_Segs) {        dseg.SetLens().push_back(seg_it->m_Len);        ITERATE(SAlignment_Segment::TRows, row, seg_it->m_Rows) {            dseg.SetStarts().push_back(row->GetSegStart());            if (m_HaveStrands) { // per-alignment strands                dseg.SetStrands().push_back(row->m_Strand);            }        }    }}void CSeq_align_Mapper::x_GetDstStd(CRef<CSeq_align>& dst) const{    TStd& std_segs = dst->SetSegs().SetStd();    ITERATE(TSegments, seg_it, m_Segs) {        CRef<CStd_seg> std_seg(new CStd_seg);        std_seg->SetDim(seg_it->m_Rows.size());        if ( seg_it->m_Scores.size() ) {            std_seg->SetScores() = seg_it->m_Scores;        }        ITERATE(SAlignment_Segment::TRows, row, seg_it->m_Rows) {            CRef<CSeq_id> id(new CSeq_id);            id.Reset(&const_cast<CSeq_id&>(*row->m_Id.GetSeqId()));            std_seg->SetIds().push_back(id);            CRef<CSeq_loc> loc(new CSeq_loc);            if (row->m_Start == kInvalidSeqPos) {                // empty                loc->SetEmpty(*id);            }            else {                // interval                loc->SetInt().SetId(*id);                TSeqPos len = seg_it->m_Len;                if (row->m_Width) {                    len /= row->m_Width;                }                loc->SetInt().SetFrom(row->m_Start);                // len may be 0 after dividing by width                loc->SetInt().SetTo(row->m_Start + (len ? len - 1 : 0));                if (row->m_IsSetStrand) {                    loc->SetInt().SetStrand(row->m_Strand);                }            }            std_seg->SetLoc().push_back(loc);        }        std_segs.push_back(std_seg);    }}void CSeq_align_Mapper::x_GetDstPacked(CRef<CSeq_align>& dst) const{    CPacked_seg& pseg = dst->SetSegs().SetPacked();    pseg.SetDim(m_Segs.front().m_Rows.size());    pseg.SetNumseg(m_Segs.size());    if ( m_Segs.front().m_Scores.size() ) {        pseg.SetScores() = m_Segs.front().m_Scores;    }    ITERATE(SAlignment_Segment::TRows, row, m_Segs.front().m_Rows) {        CRef<CSeq_id> id(new CSeq_id);        id.Reset(&const_cast<CSeq_id&>(*row->m_Id.GetSeqId()));        pseg.SetIds().push_back(id);    }    ITERATE(TSegments, seg_it, m_Segs) {        pseg.SetLens().push_back(seg_it->m_Len);        ITERATE(SAlignment_Segment::TRows, row, seg_it->m_Rows) {            pseg.SetStarts().push_back(row->GetSegStart());            pseg.SetPresent().push_back(row->m_Start != kInvalidSeqPos);            if (m_HaveStrands) {                pseg.SetStrands().push_back(row->m_Strand);            }        }    }}void CSeq_align_Mapper::x_GetDstDisc(CRef<CSeq_align>& dst) const{    CSeq_align_set::Tdata& data = dst->SetSegs().SetDisc().Set();    ITERATE(TSubAligns, it, m_SubAligns) {        data.push_back(it->GetDstAlign());    }}CRef<CSeq_align> CSeq_align_Mapper::GetDstAlign(void) const{    if (m_DstAlign) {        return m_DstAlign;    }    CRef<CSeq_align> dst(new CSeq_align);    dst->SetType(m_OrigAlign->GetType());    if (m_OrigAlign->IsSetDim()) {        dst->SetDim(m_OrigAlign->GetDim());    }    if (m_OrigAlign->IsSetScore()) {        dst->SetScore() = m_OrigAlign->GetScore();    }    if (m_OrigAlign->IsSetBounds()) {        dst->SetBounds() = m_OrigAlign->GetBounds();    }    switch ( m_OrigAlign->GetSegs().Which() ) {    case CSeq_align::C_Segs::e_Dendiag:        {            x_GetDstDendiag(dst);            break;        }    case CSeq_align::C_Segs::e_Denseg:        {            if (m_AlignFlags == eAlign_Normal) {                x_GetDstDenseg(dst);            }            else {                x_GetDstDendiag(dst);            }            break;        }    case CSeq_align::C_Segs::e_Std:        {            x_GetDstStd(dst);            break;        }    case CSeq_align::C_Segs::e_Packed:        {            if (m_AlignFlags == eAlign_Normal) {                x_GetDstPacked(dst);            }            else {                x_GetDstDendiag(dst);            }            break;        }    case CSeq_align::C_Segs::e_Disc:        {            x_GetDstDisc(dst);            break;        }    default:        {            dst->Assign(*m_OrigAlign);            break;        }    }    return m_DstAlign = dst;}END_SCOPE(objects)END_NCBI_SCOPE/** ---------------------------------------------------------------------------* $Log: seq_align_mapper.cpp,v $* Revision 1000.1  2004/06/01 19:23:43  gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10** Revision 1.10  2004/05/27 17:52:11  grichenk* Fixed warnings** Revision 1.9  2004/05/26 14:57:47  ucko* Change some types from unsigned int to size_t for consistency with STL* containers' size() methods.** Revision 1.8  2004/05/26 14:29:20  grichenk* Redesigned CSeq_align_Mapper: preserve non-mapping intervals,* fixed strands handling, improved performance.** Revision 1.7  2004/05/21 21:42:12  gorelenk* Added PCH ncbi_pch.hpp** Revision 1.6  2004/05/13 19:10:57  grichenk* Preserve scores in mapped alignments** Revision 1.5  2004/05/10 20:22:24  grichenk* Fixed more warnings (removed inlines)** Revision 1.4  2004/04/12 14:35:59  grichenk* Fixed mapping of alignments between nucleotides and proteins** Revision 1.3  2004/04/07 18:36:12  grichenk* Fixed std-seg mapping** Revision 1.2  2004/04/01 20:11:17  rsmith* add missing break to switch on seq-id types.** Revision 1.1  2004/03/30 15:40:35  grichenk* Initial revision*** ===========================================================================*/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?