utils.cpp
来自「ncbi源码」· C++ 代码 · 共 1,009 行 · 第 1/3 页
CPP
1,009 行
/* * =========================================================================== * PRODUCTION $Log: utils.cpp,v $ * PRODUCTION Revision 1000.0 2004/06/01 21:21:32 gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.7 * PRODUCTION * =========================================================================== *//* $Id: utils.cpp,v 1000.0 2004/06/01 21:21:32 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Mike DiCuccio * * File Description: * General utility classes for GUI projects. */#include <ncbi_pch.hpp>#include <gui/objutils/utils.hpp>#include <gui/objutils/label.hpp>#include <gui/objutils/prot_product.hpp>#include <gui/objutils/alignment.hpp>#include <gui/objutils/graph.hpp>#include <gui/objutils/alignment_smear_layout.hpp>#include <gui/objutils/mate_pair.hpp>#include <gui/objutils/prot_product.hpp>#include <gui/objutils/graph.hpp>#include <algorithm>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqalign/Score.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_loc_mix.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Packed_seqint.hpp>#include <objects/seqloc/Packed_seqpnt.hpp>#include <objects/seqloc/Seq_bond.hpp>#include <objects/general/Int_fuzz.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Object_id.hpp>#include <objmgr/align_ci.hpp>#include <objmgr/annot_ci.hpp>#include <objmgr/graph_ci.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/scope.hpp>#include <objmgr/util/feature.hpp>#include <objmgr/util/sequence.hpp>#include <objtools/alnmgr/alnmix.hpp>#include <serial/iterator.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);//// functor for sorting features based on their length//struct SFeatLengthPredicate{ bool operator()(const CMappedFeat& feat0, const CMappedFeat& feat1) const { TSeqRange r0 = feat0.GetLocation().GetTotalRange(); TSeqRange r1 = feat1.GetLocation().GetTotalRange(); return (r0.GetLength() < r1.GetLength()); }};//// functor for sorting features based on the NCBI feature sort order//struct SFeatSortPredicate{ bool operator()(const CMappedFeat& feat0, const CMappedFeat& feat1) const { const CSeq_feat& f0 = feat0.GetOriginalFeature(); const CSeq_feat& f1 = feat1.GetOriginalFeature(); return (f0.Compare(f1, feat0.GetLocation(), feat1.GetLocation()) < 0); }};//// retrieve an annot selector//SAnnotSelector CSeqUtils::GetAnnotSelector(void){ SAnnotSelector sel; sel // consider overlaps by total range... .SetOverlapTotalRange() // resolve all segments... .SetResolveAll() // make sure we see all named annots (except SNPs) .ExcludeNamedAnnots("SNP") // stop at the first set of whatever .SetAdaptiveDepth(true) // stop at the first set of whatever .SetSegmentSelectFirst(); return sel;}//// retrieve an annot selector for our selected annotations//SAnnotSelector CSeqUtils::GetAnnotSelector(SAnnotSelector::TAnnotType c){ SAnnotSelector sel = GetAnnotSelector(); sel // limit by our annotation type .SetAnnotType(c); return sel;}//// retrieve an annot selector for our selected annotations//SAnnotSelector CSeqUtils::GetAnnotSelector(SAnnotSelector::TFeatType feat){ SAnnotSelector sel = GetAnnotSelector(CSeq_annot::TData::e_Ftable); sel // retrieve feature type and subtype of interest .SetFeatType(feat); return sel;}SAnnotSelector CSeqUtils::GetAnnotSelector(SAnnotSelector::TFeatSubtype sub){ SAnnotSelector sel = GetAnnotSelector(CSeq_annot::TData::e_Ftable); sel // retrieve feature type and subtype of interest .SetFeatSubtype(sub); return sel;}//// GetLandmarkFeatures()// This returns a list of landmark features for a given sequence. The current// implementation is a bit hackish; this function is mostly a hook and will be// replaced in ID2 by a retrieval for a specific named annotation that// contains precalculated landmark features//void CSeqUtils::GetLandmarkFeatures(const CBioseq_Handle& handle, const TSeqRange& range, size_t max_feats, CLayoutFeat::TFeatList& feats){ // begin with all gene features SAnnotSelector sel = GetAnnotSelector(CSeqFeatData::e_Gene); //SetResolveDepth(handle, range, sel); // retrieve all of our genes CFeat_CI iter(handle, range.GetFrom(), range.GetTo(), sel); if (iter.GetSize() < max_feats) { // retrieve them all for ( ; iter; ++iter) { CRef<CLayoutFeat> ref(new CLayoutFeat(*iter)); feats.push_back(ref); } } else { // we do some screening - separate hypothetical and actual vector<CMappedFeat> hyp_feats; vector<CMappedFeat> act_feats; // preallocate a rough approximation of the right amount of space hyp_feats.reserve(iter.GetSize() / 2); act_feats.reserve(iter.GetSize() / 2); string str; // sift through our features, separating hypothetical from // real for ( ; iter; ++iter) { str.erase(); feature::GetLabel(iter->GetOriginalFeature(), &str, feature::eContent); // hypothetical is defined as having a gene name like // 'LOC123456' if (str.find("LOC") == 0 && str.find_first_not_of("0123456789", 3) == string::npos) { hyp_feats.push_back(*iter); } else { act_feats.push_back(*iter); } } if (act_feats.size() < max_feats) { // we need to add some hypothetical genes to the pool std::sort(hyp_feats.begin(), hyp_feats.end(), SFeatLengthPredicate()); act_feats.insert(act_feats.end(), hyp_feats.end() - (max_feats - act_feats.size()), hyp_feats.end()); std::sort(act_feats.begin(), act_feats.end(), SFeatSortPredicate()); } else if (act_feats.size() > max_feats) { // we have too many genes, so eliminate the short ones std::sort(act_feats.begin(), act_feats.end(), SFeatLengthPredicate()); act_feats.erase(act_feats.begin(), act_feats.begin() + (act_feats.size() - max_feats)); std::sort(act_feats.begin(), act_feats.end(), SFeatSortPredicate()); } // // now we've got enough genes // ITERATE (vector<CMappedFeat>, iter, act_feats) { CRef<CLayoutFeat> ref(new CLayoutFeat(*iter)); feats.push_back(ref); } }}//// GetFeatures()// this retrieves and cross-links a set of features from the document// representing the basic molecular biology pathway. This will detail the// relationship between genes <-> rnas <-> coding regions <-> proteins, where// known.//void CSeqUtils::GetFeatures(const CBioseq_Handle& handle, const TSeqRange& range, CSeqFeatData::E_Choice feat_type, CLayoutFeat::TFeatList& feats, TFeatureFlags flags){ SAnnotSelector selector = GetAnnotSelector(feat_type); GetFeatures(handle, range, selector, feats, flags);}void CSeqUtils::GetFeatures(const CBioseq_Handle& handle, const TSeqRange& range, SAnnotSelector sel, CLayoutFeat::TFeatList& feats, TFeatureFlags flags){ feats.clear(); if ( !handle ) { return; } CFeat_CI feature_iter(handle, range.GetFrom(), range.GetTo(), sel); feats.clear(); feats.reserve(feature_iter.GetSize()); for (; feature_iter ; ++feature_iter) { const CMappedFeat& feat = *feature_iter; CRef<CLayoutFeat> fref(new CLayoutFeat(feat)); feats.push_back(fref); } if (flags & fFeature_LinkFeatures) { LinkFeatures(feats); }}//// LinkFeatures()// This builds explicit links between features, creating a hierarchical tree of// features.//void CSeqUtils::LinkFeatures(CLayoutFeat::TFeatList& feats){ CLayoutFeat::TFeatList out_feats; out_feats.reserve(feats.size()); NON_CONST_ITERATE (CLayoutFeat::TFeatList, iter, feats) { CLayoutFeat& feat = **iter; string label; feature::GetLabel(feat.GetFeature(), &label, feature::eBoth); CSeqFeatData::ESubtype parent_type = CSeqFeatData::eSubtype_bad; switch (feat.GetFeature().GetData().GetSubtype()) { case CSeqFeatData::eSubtype_cdregion: if (dynamic_cast<const CLayoutProtProd*> (&feat)) { // search for preceding CDS for this protein product parent_type = CSeqFeatData::eSubtype_cdregion; } else { // search for preceding mRNA parent_type = CSeqFeatData::eSubtype_mRNA; } break; case CSeqFeatData::eSubtype_gene: // don't link out_feats.push_back(*iter); continue;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?