cav_alignset.cpp
来自「ncbi源码」· C++ 代码 · 共 411 行
CPP
411 行
/* * =========================================================================== * PRODUCTION $Log: cav_alignset.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:41:12 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.5 * PRODUCTION * =========================================================================== *//* $Id: cav_alignset.cpp,v 1000.2 2004/06/01 19:41:12 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Authors: Paul Thiessen** File Description:* Classes to hold sets of master/slave pairwise alignments** ===========================================================================*/#include <ncbi_pch.hpp>#include <objects/seqalign/Seq_align.hpp>#include <objects/seqalign/Dense_diag.hpp>#include <objects/seqalign/Dense_seg.hpp>#include <objects/seqloc/Seq_id.hpp>#include <objects/seqloc/PDB_seq_id.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqloc/Textseq_id.hpp>#include <objects/seqloc/PDB_mol_id.hpp>#include <objects/seq/Bioseq.hpp>#include <map>#include <objtools/cddalignview/cav_alignset.hpp>#include <objtools/cddalignview/cav_seqset.hpp>#include <objtools/cddalignview/cddalignview.h>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);typedef list < const CSeq_align * > SeqAlignList;typedef vector < CRef < CSeq_id > > SeqIdList;static bool IsAMatch(const Sequence *seq, const CSeq_id& sid){ if (sid.IsGi()) { if (sid.GetGi() == seq->gi) return true; return false; } if (sid.IsPdb()) { if (sid.GetPdb().GetMol().Get() == seq->pdbID) { if (sid.GetPdb().IsSetChain()) { if (sid.GetPdb().GetChain() != seq->pdbChain) { return false; } } return true; } return false; } if (sid.IsLocal() && sid.GetLocal().IsStr()) { if (sid.GetLocal().GetStr() == seq->pdbID) return true; return false; } if (sid.IsGenbank() && sid.GetGenbank().IsSetAccession()) { if (sid.GetGenbank().GetAccession() == seq->accession) return true; return false; } if (sid.IsSwissprot() && sid.GetSwissprot().IsSetAccession()) { return (sid.GetSwissprot().GetAccession() == seq->accession); } ERR_POST(Error << "IsAMatch - can't match this type of Seq-id"); return false;}AlignmentSet::AlignmentSet(SequenceSet *sequenceSet, const SeqAnnotList& seqAnnots) : master(NULL), status(CAV_ERROR_ALIGNMENTS){ SeqAlignList seqaligns; // first, make a list of alignments SeqAnnotList::const_iterator n, ne = seqAnnots.end(); for (n=seqAnnots.begin(); n!=ne; ++n) { if (!n->GetObject().GetData().IsAlign()) { ERR_POST(Error << "AlignmentSet::AlignmentSet() - confused by alignment data format"); return; } CSeq_annot::C_Data::TAlign::const_iterator a, ae = n->GetObject().GetData().GetAlign().end(); for (a=n->GetObject().GetData().GetAlign().begin(); a!=ae; ++a) { // verify this is a type of alignment we can deal with if (!(a->GetObject().GetType() != CSeq_align::eType_partial || a->GetObject().GetType() != CSeq_align::eType_diags) || !a->GetObject().IsSetDim() || a->GetObject().GetDim() != 2 || (!a->GetObject().GetSegs().IsDendiag() && !a->GetObject().GetSegs().IsDenseg())) { ERR_POST(Error << "AlignmentSet::AlignmentSet() - confused by alignment type"); return; } seqaligns.push_back(a->GetPointer()); } } if (seqaligns.size() == 0) { ERR_POST(Error << "AlignmentSet::AlignmentSet() - no valid Seq-aligns present"); return; } // figure out which sequence is the master // if there aren't any structures, then the first sequence of each pair must be the same, // and is the master else { SeqAlignList::const_iterator a, ae = seqaligns.end(); for (a=seqaligns.begin(); a!=ae; ++a) { const SeqIdList& sids = (*a)->GetSegs().IsDendiag() ? (*a)->GetSegs().GetDendiag().front()->GetIds() : (*a)->GetSegs().GetDenseg().GetIds(); if (!master) { SequenceSet::SequenceList::const_iterator s, se = sequenceSet->sequences.end(); for (s=sequenceSet->sequences.begin(); s!=se; ++s) { if (IsAMatch(*s, sids.front().GetObject())) { master = *s; break; } } } if (!IsAMatch(master, sids.front().GetObject())) { ERR_POST(Error << "AlignmentSet::AlignmentSet() - master must be first sequence of every alignment"); return; } } sequenceSet->master = master; } if (master) { ERR_POST(Info << "determined that master sequence is " << master->GetTitle()); } else { ERR_POST(Error << "AlignmentSet::AlignmentSet() - couldn't determine which is master sequence"); return; } // then make an alignment from each Seq-align; for any sequence that has structure, make // sure that that Sequence object only appears once in the alignment SeqAlignList::const_iterator s, se = seqaligns.end(); int i = 1; for (s=seqaligns.begin(); s!=se; ++s, ++i) { const MasterSlaveAlignment *alignment = new MasterSlaveAlignment(sequenceSet, master, **s); if (!alignment || alignment->Status() != CAV_SUCCESS) { ERR_POST(Error << "Error parsing master/slave pairwise alignment #" << i); status = alignment->Status(); return; } alignments.push_back(alignment); } ERR_POST(Info << "number of alignments: " << alignments.size()); status = CAV_SUCCESS;}AlignmentSet::~AlignmentSet(void){ for (int i=0; i<alignments.size(); ++i) delete alignments[i];}///// MasterSlaveAlignment methods /////static bool SameSequence(const Sequence *s, const CBioseq& bioseq){ CBioseq::TId::const_iterator i, ie = bioseq.GetId().end(); for (i=bioseq.GetId().begin(); i!=ie; ++i) { if (IsAMatch(s, **i)) return true; } return false;}MasterSlaveAlignment::MasterSlaveAlignment( const SequenceSet *sequenceSet, const Sequence *masterSequence, const CSeq_align& seqAlign) : master(masterSequence), slave(NULL), status(CAV_ERROR_PAIRWISE){ // resize alignment and block vector masterToSlave.resize(master->Length(), -1); SequenceSet::SequenceList::const_iterator s = sequenceSet->sequences.begin(), se = sequenceSet->sequences.end(); // find slave sequence for this alignment, and order (master or slave first) const SeqIdList& sids = seqAlign.GetSegs().IsDendiag() ? seqAlign.GetSegs().GetDendiag().front()->GetIds() : seqAlign.GetSegs().GetDenseg().GetIds(); bool masterFirst = true; for (; s!=se; ++s) { if (SameSequence(*s, master->bioseqASN.GetObject())) continue; if (IsAMatch(*s, sids.back().GetObject())) { break; } else if (IsAMatch(*s, sids.front().GetObject())) { masterFirst = false; break; } } if (s == se) { // special case of master seq. aligned to itself if (IsAMatch(master, sids.back().GetObject()) && IsAMatch(master, sids.front().GetObject())) { slave = master; } else { ERR_POST(Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "couldn't find matching unaligned slave sequence"); return; } } else { slave = *s; } int i, masterRes, slaveRes; // unpack dendiag alignment if (seqAlign.GetSegs().IsDendiag()) { CSeq_align::C_Segs::TDendiag::const_iterator d , de = seqAlign.GetSegs().GetDendiag().end(); for (d=seqAlign.GetSegs().GetDendiag().begin(); d!=de; ++d) { const CDense_diag& block = d->GetObject(); if (!block.IsSetDim() || block.GetDim() != 2 || block.GetIds().size() != 2 || block.GetStarts().size() != 2) { ERR_POST(Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "incorrect dendiag block dimensions"); return; } // make sure identities of master and slave sequences match in each block if ((masterFirst && (!IsAMatch(master, block.GetIds().front().GetObject()) || !IsAMatch(slave, block.GetIds().back().GetObject()))) || (!masterFirst && (!IsAMatch(master, block.GetIds().back().GetObject()) || !IsAMatch(slave, block.GetIds().front().GetObject())))) { ERR_POST(Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "mismatched Seq-id in dendiag block"); return; } // finally, actually unpack the data into the alignment vector for (i=0; i<block.GetLen(); ++i) { if (masterFirst) { masterRes = block.GetStarts().front() + i; slaveRes = block.GetStarts().back() + i; } else { masterRes = block.GetStarts().back() + i; slaveRes = block.GetStarts().front() + i; } if (masterRes >= master->Length() || slaveRes >= slave->Length()) { ERR_POST(Critical << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "seqloc in dendiag block > length of sequence!"); return; } masterToSlave[masterRes] = slaveRes; } } } // unpack denseg alignment else if (seqAlign.GetSegs().IsDenseg()) { const CDense_seg& block = seqAlign.GetSegs().GetDenseg(); if (!block.IsSetDim() && block.GetDim() != 2 || block.GetIds().size() != 2 || block.GetStarts().size() != 2 * block.GetNumseg() || block.GetLens().size() != block.GetNumseg()) { ERR_POST(Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "incorrect denseg block dimension"); return; } // make sure identities of master and slave sequences match in each block if ((masterFirst && (!IsAMatch(master, block.GetIds().front().GetObject()) || !IsAMatch(slave, block.GetIds().back().GetObject()))) || (!masterFirst && (!IsAMatch(master, block.GetIds().back().GetObject()) || !IsAMatch(slave, block.GetIds().front().GetObject())))) { ERR_POST(Error << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "mismatched Seq-id in denseg block"); return; } // finally, actually unpack the data into the alignment vector CDense_seg::TStarts::const_iterator starts = block.GetStarts().begin(); CDense_seg::TLens::const_iterator lens, le = block.GetLens().end(); for (lens=block.GetLens().begin(); lens!=le; ++lens) { if (masterFirst) { masterRes = *(starts++); slaveRes = *(starts++); } else { slaveRes = *(starts++); masterRes = *(starts++); } if (masterRes != -1 && slaveRes != -1) { // skip gaps if ((masterRes + *lens - 1) >= master->Length() || (slaveRes + *lens - 1) >= slave->Length()) { ERR_POST(Critical << "MasterSlaveAlignment::MasterSlaveAlignment() - \n" "seqloc in denseg block > length of sequence!"); return; } for (i=0; i<*lens; ++i) masterToSlave[masterRes + i] = slaveRes + i; } } } status = CAV_SUCCESS;}END_NCBI_SCOPE/** ---------------------------------------------------------------------------* $Log: cav_alignset.cpp,v $* Revision 1000.2 2004/06/01 19:41:12 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.5** Revision 1.5 2004/05/21 21:42:51 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.4 2004/05/08 10:56:20 thiessen* better handling of repeated sequences** Revision 1.3 2004/03/15 18:51:27 thiessen* prefer prefix vs. postfix ++/-- operators** Revision 1.2 2003/06/02 16:06:41 dicuccio* Rearranged src/objects/ subtree. This includes the following shifts:* - src/objects/asn2asn --> arc/app/asn2asn* - src/objects/testmedline --> src/objects/ncbimime/test* - src/objects/objmgr --> src/objmgr* - src/objects/util --> src/objmgr/util* - src/objects/alnmgr --> src/objtools/alnmgr* - src/objects/flat --> src/objtools/flat* - src/objects/validator --> src/objtools/validator* - src/objects/cddalignview --> src/objtools/cddalignview* In addition, libseq now includes six of the objects/seq... libs, and libmmdb* replaces the three libmmdb? libs.** Revision 1.1 2003/03/19 19:04:12 thiessen* move again** Revision 1.1 2003/03/19 05:33:43 thiessen* move to src/app/cddalignview** Revision 1.7 2003/02/03 17:52:03 thiessen* move CVS Log to end of file** Revision 1.6 2002/07/11 17:16:54 thiessen* change for STL type in object lib** Revision 1.5 2002/02/08 19:53:17 thiessen* add annotation to text/HTML displays** Revision 1.4 2001/07/12 21:32:12 thiessen* update for swissprot accessions** Revision 1.3 2001/01/29 18:13:33 thiessen* split into C-callable library + main** Revision 1.2 2001/01/22 15:55:11 thiessen* correctly set up ncbi namespacing** Revision 1.1 2001/01/22 13:15:23 thiessen* initial checkin**/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?