📄 su_alignment_set.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: su_alignment_set.cpp,v $ * PRODUCTION Revision 1000.0 2004/06/01 18:14:18 gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== *//* $Id: su_alignment_set.cpp,v 1000.0 2004/06/01 18:14:18 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Authors: Paul Thiessen** File Description:* Classes to hold sets of alignments** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbistl.hpp>#include <objects/seqalign/Dense_diag.hpp>#include <objects/seqalign/Dense_seg.hpp>#include <map>#include <memory>#include "su_alignment_set.hpp"#include "su_sequence_set.hpp"#include "su_block_multiple_alignment.hpp"#include "su_private.hpp"USING_NCBI_SCOPE;USING_SCOPE(objects);BEGIN_SCOPE(struct_util)AlignmentSet::AlignmentSet(const SeqAnnotList& seqAnnots, const SequenceSet& sequenceSet){ // screen alignments to make sure they're a type we can handle typedef list < CRef < CSeq_align > > SeqAlignList; SeqAlignList seqAligns; SeqAnnotList::const_iterator n, ne = seqAnnots.end(); for (n=seqAnnots.begin(); n!=ne; ++n) { if (!n->GetObject().GetData().IsAlign()) THROW_MESSAGE("AlignmentSet::AlignmentSet() - confused by Seq-annot data format"); if (n != seqAnnots.begin()) TRACE_MESSAGE("multiple Seq-annots"); CSeq_annot::C_Data::TAlign::const_iterator a, ae = n->GetObject().GetData().GetAlign().end(); for (a=n->GetObject().GetData().GetAlign().begin(); a!=ae; ++a) { if (!(a->GetObject().GetType() != CSeq_align::eType_partial || a->GetObject().GetType() != CSeq_align::eType_diags) || !a->GetObject().IsSetDim() || a->GetObject().GetDim() != 2 || (!a->GetObject().GetSegs().IsDendiag() && !a->GetObject().GetSegs().IsDenseg())) THROW_MESSAGE("AlignmentSet::AlignmentSet() - confused by alignment type"); seqAligns.push_back(*a); } } if (seqAligns.size() == 0) THROW_MESSAGE("AlignmentSet::AlignmentSet() - must have at least one Seq-align"); // we need to determine the identity of the master sequence; most rigorous way is to look // for a Seq-id that is present in all pairwise alignments m_master = NULL; const Sequence *seq1 = NULL, *seq2 = NULL; bool seq1PresentInAll = true, seq2PresentInAll = true; // first, find sequences for first pairwise alignment const CSeq_id& frontSid = seqAligns.front()->GetSegs().IsDendiag() ? seqAligns.front()->GetSegs().GetDendiag().front()->GetIds().front().GetObject() : seqAligns.front()->GetSegs().GetDenseg().GetIds().front().GetObject(); const CSeq_id& backSid = seqAligns.front()->GetSegs().IsDendiag() ? seqAligns.front()->GetSegs().GetDendiag().front()->GetIds().back().GetObject() : seqAligns.front()->GetSegs().GetDenseg().GetIds().back().GetObject(); SequenceSet::SequenceList::const_iterator s, se = sequenceSet.m_sequences.end(); for (s=sequenceSet.m_sequences.begin(); s!=se; ++s) { if ((*s)->MatchesSeqId(frontSid)) seq1 = *s; if ((*s)->MatchesSeqId(backSid)) seq2 = *s; if (seq1 && seq2) break; } if (!(seq1 && seq2)) THROW_MESSAGE("AlignmentSet::AlignmentSet() - can't match first pair of Seq-ids to Sequences"); // now, make sure one of these sequences is present in all the other pairwise alignments SeqAlignList::const_iterator a = seqAligns.begin(), ae = seqAligns.end(); for (++a; a!=ae; ++a) { const CSeq_id& frontSid2 = (*a)->GetSegs().IsDendiag() ? (*a)->GetSegs().GetDendiag().front()->GetIds().front().GetObject() : (*a)->GetSegs().GetDenseg().GetIds().front().GetObject(); const CSeq_id& backSid2 = (*a)->GetSegs().IsDendiag() ? (*a)->GetSegs().GetDendiag().front()->GetIds().back().GetObject() : (*a)->GetSegs().GetDenseg().GetIds().back().GetObject(); if (!seq1->MatchesSeqId(frontSid2) && !seq1->MatchesSeqId(backSid2)) seq1PresentInAll = false; if (!seq2->MatchesSeqId(frontSid2) && !seq2->MatchesSeqId(backSid2)) seq2PresentInAll = false; } if (!seq1PresentInAll && !seq2PresentInAll) THROW_MESSAGE("AlignmentSet::AlignmentSet() - " "all pairwise sequence alignments must have a common master sequence"); else if (seq1PresentInAll && !seq2PresentInAll) m_master = seq1; else if (seq2PresentInAll && !seq1PresentInAll) m_master = seq2; else if (seq1PresentInAll && seq2PresentInAll && seq1 == seq2) m_master = seq1; // if still ambiguous, just use the first one for now if (!m_master) { WARNING_MESSAGE("alignment master sequence is ambiguous - using the first one (" << seq1->IdentifierString() << ')'); m_master = seq1; } TRACE_MESSAGE("determined master sequence: " << m_master->IdentifierString()); // parse the pairwise alignments SeqAlignList::const_iterator l, le = seqAligns.end(); for (l=seqAligns.begin(); l!=le; ++l) m_alignments.push_back( CRef<MasterSlaveAlignment>(new MasterSlaveAlignment(**l, m_master, sequenceSet))); TRACE_MESSAGE("number of alignments: " << m_alignments.size());}AlignmentSet * AlignmentSet::CreateFromMultiple( const BlockMultipleAlignment *multiple, SeqAnnotList *newAsnAlignmentData, const SequenceSet& sequenceSet, const vector < unsigned int > *rowOrder){ newAsnAlignmentData->clear(); // sanity check on the row order map if (rowOrder) { map < unsigned int, unsigned int > rowCheck; for (unsigned int i=0; i<rowOrder->size(); ++i) rowCheck[(*rowOrder)[i]] = i; if (rowOrder->size() != multiple->NRows() || rowCheck.size() != multiple->NRows() || (*rowOrder)[0] != 0) { ERROR_MESSAGE("AlignmentSet::CreateFromMultiple() - bad row order vector"); return NULL; } } // create a single Seq-annot, with 'align' data that holds one Seq-align per slave SeqAnnotList newSeqAnnots; CRef < CSeq_annot > seqAnnot(new CSeq_annot()); newSeqAnnots.push_back(seqAnnot); CSeq_annot::C_Data::TAlign& seqAligns = seqAnnot->SetData().SetAlign(); seqAligns.resize((multiple->NRows() == 1) ? 1 : multiple->NRows() - 1); CSeq_annot::C_Data::TAlign::iterator sa = seqAligns.begin(); BlockMultipleAlignment::UngappedAlignedBlockList blocks;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -