seq_align_mapper.cpp
来自「ncbi源码」· C++ 代码 · 共 1,238 行 · 第 1/3 页
CPP
1,238 行
/* * =========================================================================== * PRODUCTION $Log: seq_align_mapper.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:23:43 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10 * PRODUCTION * =========================================================================== *//* $Id: seq_align_mapper.cpp,v 1000.1 2004/06/01 19:23:43 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aleksey Grichenko** File Description:* Alignment mapper**/#include <ncbi_pch.hpp>#include <objmgr/impl/seq_align_mapper.hpp>#include <objmgr/seq_id_mapper.hpp>#include <objmgr/seq_loc_mapper.hpp>#include <objmgr/objmgr_exception.hpp>#include <objects/seqalign/seqalign__.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)SAlignment_Segment::SAlignment_Segment(int len, size_t dim) : m_Len(len), m_Rows(dim), m_HaveStrands(false){ return;}SAlignment_Segment::SAlignment_Row&SAlignment_Segment::GetRow(size_t idx){ _ASSERT(m_Rows.size() > idx); return m_Rows[idx];}SAlignment_Segment::SAlignment_Row&SAlignment_Segment::CopyRow(size_t idx, const SAlignment_Row& src_row){ SAlignment_Row& dst_row = GetRow(idx); dst_row = src_row; return dst_row;}SAlignment_Segment::SAlignment_Row& SAlignment_Segment::AddRow(size_t idx, const CSeq_id& id, int start, bool is_set_strand, ENa_strand strand, int width){ SAlignment_Row& row = GetRow(idx); row.m_Id = CSeq_id_Handle::GetHandle(id); row.m_Start = start < 0 ? kInvalidSeqPos : start; row.m_IsSetStrand = is_set_strand; row.m_Strand = strand; row.m_Width = width; m_HaveStrands |= is_set_strand; return row;}SAlignment_Segment::SAlignment_Row& SAlignment_Segment::AddRow(size_t idx, const CSeq_id_Handle& id, int start, bool is_set_strand, ENa_strand strand, int width){ SAlignment_Row& row = GetRow(idx); row.m_Id = id; row.m_Start = start < 0 ? kInvalidSeqPos : start; row.m_IsSetStrand = is_set_strand; row.m_Strand = strand; row.m_Width = width; m_HaveStrands |= is_set_strand; return row;}void SAlignment_Segment::SetScores(const TScores& scores){ m_Scores = scores;}CSeq_align_Mapper::CSeq_align_Mapper(const CSeq_align& align) : m_OrigAlign(&align), m_DstAlign(0), m_HaveStrands(false), m_HaveWidths(false), m_Dim(0), m_AlignFlags(eAlign_Normal){ switch ( align.GetSegs().Which() ) { case CSeq_align::C_Segs::e_Dendiag: x_Init(align.GetSegs().GetDendiag()); break; case CSeq_align::C_Segs::e_Denseg: x_Init(align.GetSegs().GetDenseg()); break; case CSeq_align::C_Segs::e_Std: x_Init(align.GetSegs().GetStd()); break; case CSeq_align::C_Segs::e_Packed: x_Init(align.GetSegs().GetPacked()); break; case CSeq_align::C_Segs::e_Disc: x_Init(align.GetSegs().GetDisc()); break; default: break; }}SAlignment_Segment& CSeq_align_Mapper::x_PushSeg(int len, size_t dim){ m_Segs.push_back(SAlignment_Segment(len, dim)); return m_Segs.back();}SAlignment_Segment& CSeq_align_Mapper::x_InsertSeg(TSegments::iterator& where, int len, size_t dim){ return *m_Segs.insert(where, SAlignment_Segment(len, dim));}void CSeq_align_Mapper::x_Init(const TDendiag& diags){ ITERATE(TDendiag, diag_it, diags) { const CDense_diag& diag = **diag_it; size_t dim = diag.GetDim(); if (dim != diag.GetIds().size()) { ERR_POST(Warning << "Invalid 'ids' size in dendiag"); dim = min(dim, diag.GetIds().size()); } if (dim != diag.GetStarts().size()) { ERR_POST(Warning << "Invalid 'starts' size in dendiag"); dim = min(dim, diag.GetStarts().size()); } m_HaveStrands = diag.IsSetStrands(); if (m_HaveStrands && dim != diag.GetStrands().size()) { ERR_POST(Warning << "Invalid 'strands' size in dendiag"); dim = min(dim, diag.GetStrands().size()); } if (dim != m_Dim) { if ( m_Dim ) { m_AlignFlags = eAlign_MultiDim; } m_Dim = max(dim, m_Dim); } SAlignment_Segment& seg = x_PushSeg(diag.GetLen(), dim); ENa_strand strand = eNa_strand_unknown; if ( diag.IsSetScores() ) { seg.SetScores(diag.GetScores()); } for (size_t row = 0; row < dim; ++row) { if ( m_HaveStrands ) { strand = diag.GetStrands()[row]; } seg.AddRow(row, *diag.GetIds()[row], diag.GetStarts()[row], m_HaveStrands, strand, 0); } }}void CSeq_align_Mapper::x_Init(const CDense_seg& denseg){ m_Dim = denseg.GetDim(); size_t numseg = denseg.GetNumseg(); // claimed dimension may not be accurate :-/ if (numseg != denseg.GetLens().size()) { ERR_POST(Warning << "Invalid 'lens' size in denseg"); numseg = min(numseg, denseg.GetLens().size()); } if (m_Dim != denseg.GetIds().size()) { ERR_POST(Warning << "Invalid 'ids' size in denseg"); m_Dim = min(m_Dim, denseg.GetIds().size()); } if (m_Dim*numseg != denseg.GetStarts().size()) { ERR_POST(Warning << "Invalid 'starts' size in denseg"); m_Dim = min(m_Dim*numseg, denseg.GetStarts().size()) / numseg; } m_HaveStrands = denseg.IsSetStrands(); if (m_HaveStrands && m_Dim*numseg != denseg.GetStrands().size()) { ERR_POST(Warning << "Invalid 'strands' size in denseg"); m_Dim = min(m_Dim*numseg, denseg.GetStrands().size()) / numseg; } m_HaveWidths = denseg.IsSetWidths(); ENa_strand strand = eNa_strand_unknown; for (size_t seg = 0; seg < numseg; seg++) { SAlignment_Segment& alnseg = x_PushSeg(denseg.GetLens()[seg], m_Dim); if ( seg == 0 && denseg.IsSetScores() ) { // Set scores for the first segment only to avoid multiple copies alnseg.SetScores(denseg.GetScores()); } for (unsigned int row = 0; row < m_Dim; row++) { if ( m_HaveStrands ) { strand = denseg.GetStrands()[seg*m_Dim + row]; } alnseg.AddRow(row, *denseg.GetIds()[row], denseg.GetStarts()[seg*m_Dim + row], m_HaveStrands, strand, m_HaveWidths ? denseg.GetWidths()[row] : 0); } }}void CSeq_align_Mapper::x_Init(const TStd& sseg){ typedef map<CSeq_id_Handle, int> TSegLenMap; TSegLenMap seglens; ITERATE ( CSeq_align::C_Segs::TStd, it, sseg ) { const CStd_seg& stdseg = **it; size_t dim = stdseg.GetDim(); if (stdseg.IsSetIds() && dim != stdseg.GetIds().size()) { ERR_POST(Warning << "Invalid 'ids' size in std-seg"); dim = min(dim, stdseg.GetIds().size()); } SAlignment_Segment& seg = x_PushSeg(0, dim); if ( stdseg.IsSetScores() ) { seg.SetScores(stdseg.GetScores()); } int seg_len = 0; bool multi_width = false; ITERATE ( CStd_seg::TLoc, it_loc, (*it)->GetLoc() ) { const CSeq_loc& loc = **it_loc; const CSeq_id* id = 0; int start = -1; int len = 0; ENa_strand strand = eNa_strand_unknown; bool have_strand = false; unsigned int row_idx = 0; for (CSeq_loc_CI rg(loc); rg; ++rg, ++row_idx) { if (row_idx > dim) { ERR_POST(Warning << "Invalid number of rows in std-seg"); dim = row_idx; seg.m_Rows.resize(dim); } id = &rg.GetSeq_id(); if ( rg.IsEmpty() ) { start = kInvalidSeqPos; } else { start = rg.GetRange().GetFrom(); len = rg.GetRange().GetLength(); strand = rg.GetStrand(); if (strand != eNa_strand_unknown) { m_HaveStrands = have_strand = true; } } if (len > 0) { if (seg_len == 0) { seg_len = len; } else if (len != seg_len) { multi_width = true; if (len/3 == seg_len) { seg_len = len; } else if (len*3 != seg_len) { NCBI_THROW(CAnnotException, eBadLocation, "Rows have different lengths in std-seg"); } } } seglens[seg.AddRow(row_idx, *id, start, have_strand, strand, 0).m_Id] = len; } if (dim != m_Dim) { if ( m_Dim ) { m_AlignFlags = eAlign_MultiDim; } m_Dim = max(dim, m_Dim); } } seg.m_Len = seg_len; if ( multi_width ) { // Adjust each segment width. Do not check if sequence always // has the same width. for (size_t i = 0; i < seg.m_Rows.size(); ++i) { if (seglens[seg.m_Rows[i].m_Id] != seg_len) { seg.m_Rows[i].m_Width = 3; } } } seglens.clear(); m_HaveWidths |= multi_width; }}void CSeq_align_Mapper::x_Init(const CPacked_seg& pseg){ m_Dim = pseg.GetDim(); size_t numseg = pseg.GetNumseg(); // claimed dimension may not be accurate :-/ if (numseg != pseg.GetLens().size()) { ERR_POST(Warning << "Invalid 'lens' size in packed-seg"); numseg = min(numseg, pseg.GetLens().size()); } if (m_Dim != pseg.GetIds().size()) { ERR_POST(Warning << "Invalid 'ids' size in packed-seg"); m_Dim = min(m_Dim, pseg.GetIds().size()); } if (m_Dim*numseg != pseg.GetStarts().size()) { ERR_POST(Warning << "Invalid 'starts' size in packed-seg"); m_Dim = min(m_Dim*numseg, pseg.GetStarts().size()) / numseg; } m_HaveStrands = pseg.IsSetStrands(); if (m_HaveStrands && m_Dim*numseg != pseg.GetStrands().size()) { ERR_POST(Warning << "Invalid 'strands' size in packed-seg"); m_Dim = min(m_Dim*numseg, pseg.GetStrands().size()) / numseg; } ENa_strand strand = eNa_strand_unknown; for (size_t seg = 0; seg < numseg; seg++) { SAlignment_Segment& alnseg = x_PushSeg(pseg.GetLens()[seg], m_Dim); if ( seg == 0 && pseg.IsSetScores() ) { // Set scores for the first segment only to avoid multiple copies alnseg.SetScores(pseg.GetScores()); } for (unsigned int row = 0; row < m_Dim; row++) { if ( m_HaveStrands ) { strand = pseg.GetStrands()[seg*m_Dim + row]; } alnseg.AddRow(row, *pseg.GetIds()[row], (pseg.GetPresent()[seg*m_Dim + row] ? pseg.GetStarts()[seg*m_Dim + row] : kInvalidSeqPos), m_HaveStrands, strand, 0); } }}void CSeq_align_Mapper::x_Init(const CSeq_align_set& align_set){ const CSeq_align_set::Tdata& data = align_set.Get(); ITERATE(CSeq_align_set::Tdata, it, data) { m_SubAligns.push_back(CSeq_align_Mapper(**it)); }}// Mapping through CSeq_loc_Conversion_Setstruct CConversionRef_Less{ bool operator()(const CRef<CSeq_loc_Conversion>& x, const CRef<CSeq_loc_Conversion>& y) const;};bool CConversionRef_Less::operator()(const CRef<CSeq_loc_Conversion>& x, const CRef<CSeq_loc_Conversion>& y) const{ if (x->m_Src_id_Handle != y->m_Src_id_Handle) { return x->m_Src_id_Handle < y->m_Src_id_Handle;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?