seq_align_mapper.cpp

来自「ncbi源码」· C++ 代码 · 共 1,238 行 · 第 1/3 页

CPP
1,238
字号
/* * =========================================================================== * PRODUCTION $Log: seq_align_mapper.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 19:23:43  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10 * PRODUCTION * =========================================================================== *//*  $Id: seq_align_mapper.cpp,v 1000.1 2004/06/01 19:23:43 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aleksey Grichenko** File Description:*   Alignment mapper**/#include <ncbi_pch.hpp>#include <objmgr/impl/seq_align_mapper.hpp>#include <objmgr/seq_id_mapper.hpp>#include <objmgr/seq_loc_mapper.hpp>#include <objmgr/objmgr_exception.hpp>#include <objects/seqalign/seqalign__.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)SAlignment_Segment::SAlignment_Segment(int len, size_t dim)    : m_Len(len),      m_Rows(dim),      m_HaveStrands(false){    return;}SAlignment_Segment::SAlignment_Row&SAlignment_Segment::GetRow(size_t idx){    _ASSERT(m_Rows.size() > idx);    return m_Rows[idx];}SAlignment_Segment::SAlignment_Row&SAlignment_Segment::CopyRow(size_t idx, const SAlignment_Row& src_row){    SAlignment_Row& dst_row = GetRow(idx);    dst_row = src_row;    return dst_row;}SAlignment_Segment::SAlignment_Row& SAlignment_Segment::AddRow(size_t idx,                                                               const CSeq_id& id,                                                               int start,                                                               bool is_set_strand,                                                               ENa_strand strand,                                                               int width){    SAlignment_Row& row = GetRow(idx);    row.m_Id = CSeq_id_Handle::GetHandle(id);    row.m_Start = start < 0 ? kInvalidSeqPos : start;    row.m_IsSetStrand = is_set_strand;    row.m_Strand = strand;    row.m_Width = width;    m_HaveStrands |= is_set_strand;    return row;}SAlignment_Segment::SAlignment_Row& SAlignment_Segment::AddRow(size_t idx,                                                               const CSeq_id_Handle& id,                                                               int start,                                                               bool is_set_strand,                                                               ENa_strand strand,                                                               int width){    SAlignment_Row& row = GetRow(idx);    row.m_Id = id;    row.m_Start = start < 0 ? kInvalidSeqPos : start;    row.m_IsSetStrand = is_set_strand;    row.m_Strand = strand;    row.m_Width = width;    m_HaveStrands |= is_set_strand;    return row;}void SAlignment_Segment::SetScores(const TScores& scores){    m_Scores = scores;}CSeq_align_Mapper::CSeq_align_Mapper(const CSeq_align& align)    : m_OrigAlign(&align),      m_DstAlign(0),      m_HaveStrands(false),      m_HaveWidths(false),      m_Dim(0),      m_AlignFlags(eAlign_Normal){    switch ( align.GetSegs().Which() ) {    case CSeq_align::C_Segs::e_Dendiag:        x_Init(align.GetSegs().GetDendiag());        break;    case CSeq_align::C_Segs::e_Denseg:        x_Init(align.GetSegs().GetDenseg());        break;    case CSeq_align::C_Segs::e_Std:        x_Init(align.GetSegs().GetStd());        break;    case CSeq_align::C_Segs::e_Packed:        x_Init(align.GetSegs().GetPacked());        break;    case CSeq_align::C_Segs::e_Disc:        x_Init(align.GetSegs().GetDisc());        break;    default:        break;    }}SAlignment_Segment& CSeq_align_Mapper::x_PushSeg(int len, size_t dim){    m_Segs.push_back(SAlignment_Segment(len, dim));    return m_Segs.back();}SAlignment_Segment& CSeq_align_Mapper::x_InsertSeg(TSegments::iterator& where,                                                   int len,                                                   size_t dim){    return *m_Segs.insert(where, SAlignment_Segment(len, dim));}void CSeq_align_Mapper::x_Init(const TDendiag& diags){    ITERATE(TDendiag, diag_it, diags) {        const CDense_diag& diag = **diag_it;        size_t dim = diag.GetDim();        if (dim != diag.GetIds().size()) {            ERR_POST(Warning << "Invalid 'ids' size in dendiag");            dim = min(dim, diag.GetIds().size());        }        if (dim != diag.GetStarts().size()) {            ERR_POST(Warning << "Invalid 'starts' size in dendiag");            dim = min(dim, diag.GetStarts().size());        }        m_HaveStrands = diag.IsSetStrands();        if (m_HaveStrands && dim != diag.GetStrands().size()) {            ERR_POST(Warning << "Invalid 'strands' size in dendiag");            dim = min(dim, diag.GetStrands().size());        }        if (dim != m_Dim) {            if ( m_Dim ) {                m_AlignFlags = eAlign_MultiDim;            }            m_Dim = max(dim, m_Dim);        }        SAlignment_Segment& seg = x_PushSeg(diag.GetLen(), dim);        ENa_strand strand = eNa_strand_unknown;        if ( diag.IsSetScores() ) {            seg.SetScores(diag.GetScores());        }        for (size_t row = 0; row < dim; ++row) {            if ( m_HaveStrands ) {                strand = diag.GetStrands()[row];            }            seg.AddRow(row,                *diag.GetIds()[row],                diag.GetStarts()[row],                m_HaveStrands,                strand,                0);        }    }}void CSeq_align_Mapper::x_Init(const CDense_seg& denseg){    m_Dim = denseg.GetDim();    size_t numseg = denseg.GetNumseg();    // claimed dimension may not be accurate :-/    if (numseg != denseg.GetLens().size()) {        ERR_POST(Warning << "Invalid 'lens' size in denseg");        numseg = min(numseg, denseg.GetLens().size());    }    if (m_Dim != denseg.GetIds().size()) {        ERR_POST(Warning << "Invalid 'ids' size in denseg");        m_Dim = min(m_Dim, denseg.GetIds().size());    }    if (m_Dim*numseg != denseg.GetStarts().size()) {        ERR_POST(Warning << "Invalid 'starts' size in denseg");        m_Dim = min(m_Dim*numseg, denseg.GetStarts().size()) / numseg;    }    m_HaveStrands = denseg.IsSetStrands();    if (m_HaveStrands && m_Dim*numseg != denseg.GetStrands().size()) {        ERR_POST(Warning << "Invalid 'strands' size in denseg");        m_Dim = min(m_Dim*numseg, denseg.GetStrands().size()) / numseg;    }    m_HaveWidths = denseg.IsSetWidths();    ENa_strand strand = eNa_strand_unknown;    for (size_t seg = 0;  seg < numseg;  seg++) {        SAlignment_Segment& alnseg = x_PushSeg(denseg.GetLens()[seg], m_Dim);        if ( seg == 0  &&  denseg.IsSetScores() ) {            // Set scores for the first segment only to avoid multiple copies            alnseg.SetScores(denseg.GetScores());        }        for (unsigned int row = 0;  row < m_Dim;  row++) {            if ( m_HaveStrands ) {                strand = denseg.GetStrands()[seg*m_Dim + row];            }            alnseg.AddRow(row,                *denseg.GetIds()[row],                denseg.GetStarts()[seg*m_Dim + row],                m_HaveStrands,                strand,                m_HaveWidths ? denseg.GetWidths()[row] : 0);        }    }}void CSeq_align_Mapper::x_Init(const TStd& sseg){    typedef map<CSeq_id_Handle, int> TSegLenMap;    TSegLenMap seglens;    ITERATE ( CSeq_align::C_Segs::TStd, it, sseg ) {        const CStd_seg& stdseg = **it;        size_t dim = stdseg.GetDim();        if (stdseg.IsSetIds()            && dim != stdseg.GetIds().size()) {            ERR_POST(Warning << "Invalid 'ids' size in std-seg");            dim = min(dim, stdseg.GetIds().size());        }        SAlignment_Segment& seg = x_PushSeg(0, dim);        if ( stdseg.IsSetScores() ) {            seg.SetScores(stdseg.GetScores());        }        int seg_len = 0;        bool multi_width = false;        ITERATE ( CStd_seg::TLoc, it_loc, (*it)->GetLoc() ) {            const CSeq_loc& loc = **it_loc;            const CSeq_id* id = 0;            int start = -1;            int len = 0;            ENa_strand strand = eNa_strand_unknown;            bool have_strand = false;            unsigned int row_idx = 0;            for (CSeq_loc_CI rg(loc); rg; ++rg, ++row_idx) {                if (row_idx > dim) {                    ERR_POST(Warning << "Invalid number of rows in std-seg");                    dim = row_idx;                    seg.m_Rows.resize(dim);                }                id = &rg.GetSeq_id();                if ( rg.IsEmpty() ) {                    start = kInvalidSeqPos;                }                else {                    start = rg.GetRange().GetFrom();                    len = rg.GetRange().GetLength();                    strand = rg.GetStrand();                    if (strand != eNa_strand_unknown) {                        m_HaveStrands = have_strand = true;                    }                }                if (len > 0) {                    if (seg_len == 0) {                        seg_len = len;                    }                    else if (len != seg_len) {                        multi_width = true;                        if (len/3 == seg_len) {                            seg_len = len;                        }                        else if (len*3 != seg_len) {                            NCBI_THROW(CAnnotException, eBadLocation,                                    "Rows have different lengths in std-seg");                        }                    }                }                seglens[seg.AddRow(row_idx,                    *id,                    start,                    have_strand,                    strand,                    0).m_Id] = len;            }            if (dim != m_Dim) {                if ( m_Dim ) {                    m_AlignFlags = eAlign_MultiDim;                }                m_Dim = max(dim, m_Dim);            }        }        seg.m_Len = seg_len;        if ( multi_width ) {            // Adjust each segment width. Do not check if sequence always            // has the same width.            for (size_t i = 0; i < seg.m_Rows.size(); ++i) {                if (seglens[seg.m_Rows[i].m_Id] != seg_len) {                    seg.m_Rows[i].m_Width = 3;                }            }        }        seglens.clear();        m_HaveWidths |= multi_width;    }}void CSeq_align_Mapper::x_Init(const CPacked_seg& pseg){    m_Dim = pseg.GetDim();    size_t numseg = pseg.GetNumseg();    // claimed dimension may not be accurate :-/    if (numseg != pseg.GetLens().size()) {        ERR_POST(Warning << "Invalid 'lens' size in packed-seg");        numseg = min(numseg, pseg.GetLens().size());    }    if (m_Dim != pseg.GetIds().size()) {        ERR_POST(Warning << "Invalid 'ids' size in packed-seg");        m_Dim = min(m_Dim, pseg.GetIds().size());    }    if (m_Dim*numseg != pseg.GetStarts().size()) {        ERR_POST(Warning << "Invalid 'starts' size in packed-seg");        m_Dim = min(m_Dim*numseg, pseg.GetStarts().size()) / numseg;    }    m_HaveStrands = pseg.IsSetStrands();    if (m_HaveStrands && m_Dim*numseg != pseg.GetStrands().size()) {        ERR_POST(Warning << "Invalid 'strands' size in packed-seg");        m_Dim = min(m_Dim*numseg, pseg.GetStrands().size()) / numseg;    }    ENa_strand strand = eNa_strand_unknown;    for (size_t seg = 0;  seg < numseg;  seg++) {        SAlignment_Segment& alnseg = x_PushSeg(pseg.GetLens()[seg], m_Dim);        if ( seg == 0  &&  pseg.IsSetScores() ) {            // Set scores for the first segment only to avoid multiple copies            alnseg.SetScores(pseg.GetScores());        }        for (unsigned int row = 0;  row < m_Dim;  row++) {            if ( m_HaveStrands ) {                strand = pseg.GetStrands()[seg*m_Dim + row];            }            alnseg.AddRow(row,                 *pseg.GetIds()[row],                (pseg.GetPresent()[seg*m_Dim + row] ?                pseg.GetStarts()[seg*m_Dim + row] : kInvalidSeqPos),                m_HaveStrands,                strand,                0);        }    }}void CSeq_align_Mapper::x_Init(const CSeq_align_set& align_set){    const CSeq_align_set::Tdata& data = align_set.Get();    ITERATE(CSeq_align_set::Tdata, it, data) {        m_SubAligns.push_back(CSeq_align_Mapper(**it));    }}// Mapping through CSeq_loc_Conversion_Setstruct CConversionRef_Less{    bool operator()(const CRef<CSeq_loc_Conversion>& x,                    const CRef<CSeq_loc_Conversion>& y) const;};bool CConversionRef_Less::operator()(const CRef<CSeq_loc_Conversion>& x,                                     const CRef<CSeq_loc_Conversion>& y) const{    if (x->m_Src_id_Handle != y->m_Src_id_Handle) {        return x->m_Src_id_Handle < y->m_Src_id_Handle;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?