utils.cpp

来自「ncbi源码」· C++ 代码 · 共 1,009 行 · 第 1/3 页

CPP
1,009
字号
/* * =========================================================================== * PRODUCTION $Log: utils.cpp,v $ * PRODUCTION Revision 1000.0  2004/06/01 21:21:32  gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.7 * PRODUCTION * =========================================================================== *//*  $Id: utils.cpp,v 1000.0 2004/06/01 21:21:32 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors:  Mike DiCuccio * * File Description: *    General utility classes for GUI projects. */#include <ncbi_pch.hpp>#include <gui/objutils/utils.hpp>#include <gui/objutils/label.hpp>#include <gui/objutils/prot_product.hpp>#include <gui/objutils/alignment.hpp>#include <gui/objutils/graph.hpp>#include <gui/objutils/alignment_smear_layout.hpp>#include <gui/objutils/mate_pair.hpp>#include <gui/objutils/prot_product.hpp>#include <gui/objutils/graph.hpp>#include <algorithm>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seqalign/Score.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_loc_mix.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Packed_seqint.hpp>#include <objects/seqloc/Packed_seqpnt.hpp>#include <objects/seqloc/Seq_bond.hpp>#include <objects/general/Int_fuzz.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Object_id.hpp>#include <objmgr/align_ci.hpp>#include <objmgr/annot_ci.hpp>#include <objmgr/graph_ci.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/scope.hpp>#include <objmgr/util/feature.hpp>#include <objmgr/util/sequence.hpp>#include <objtools/alnmgr/alnmix.hpp>#include <serial/iterator.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);//// functor for sorting features based on their length//struct SFeatLengthPredicate{    bool operator()(const CMappedFeat& feat0,                    const CMappedFeat& feat1) const    {        TSeqRange r0 = feat0.GetLocation().GetTotalRange();        TSeqRange r1 = feat1.GetLocation().GetTotalRange();        return (r0.GetLength() < r1.GetLength());    }};//// functor for sorting features based on the NCBI feature sort order//struct SFeatSortPredicate{    bool operator()(const CMappedFeat& feat0,                    const CMappedFeat& feat1) const    {        const CSeq_feat& f0 = feat0.GetOriginalFeature();        const CSeq_feat& f1 = feat1.GetOriginalFeature();        return (f0.Compare(f1, feat0.GetLocation(), feat1.GetLocation()) < 0);    }};//// retrieve an annot selector//SAnnotSelector CSeqUtils::GetAnnotSelector(void){    SAnnotSelector sel;    sel        // consider overlaps by total range...        .SetOverlapTotalRange()        // resolve all segments...        .SetResolveAll()        // make sure we see all named annots (except SNPs)        .ExcludeNamedAnnots("SNP")        // stop at the first set of whatever        .SetAdaptiveDepth(true)        // stop at the first set of whatever        .SetSegmentSelectFirst();    return sel;}//// retrieve an annot selector for our selected annotations//SAnnotSelector CSeqUtils::GetAnnotSelector(SAnnotSelector::TAnnotType c){    SAnnotSelector sel = GetAnnotSelector();    sel        // limit by our annotation type        .SetAnnotType(c);    return sel;}//// retrieve an annot selector for our selected annotations//SAnnotSelector CSeqUtils::GetAnnotSelector(SAnnotSelector::TFeatType  feat){    SAnnotSelector sel = GetAnnotSelector(CSeq_annot::TData::e_Ftable);    sel        // retrieve feature type and subtype of interest        .SetFeatType(feat);    return sel;}SAnnotSelector CSeqUtils::GetAnnotSelector(SAnnotSelector::TFeatSubtype sub){    SAnnotSelector sel = GetAnnotSelector(CSeq_annot::TData::e_Ftable);    sel        // retrieve feature type and subtype of interest        .SetFeatSubtype(sub);    return sel;}//// GetLandmarkFeatures()// This returns a list of landmark features for a given sequence.  The current// implementation is a bit hackish; this function is mostly a hook and will be// replaced in ID2 by a retrieval for a specific named annotation that// contains precalculated landmark features//void CSeqUtils::GetLandmarkFeatures(const CBioseq_Handle&  handle,                                    const TSeqRange&          range,                                    size_t                 max_feats,                                    CLayoutFeat::TFeatList&   feats){    // begin with all gene features    SAnnotSelector sel = GetAnnotSelector(CSeqFeatData::e_Gene);    //SetResolveDepth(handle, range, sel);    // retrieve all of our genes    CFeat_CI iter(handle, range.GetFrom(), range.GetTo(), sel);    if (iter.GetSize() < max_feats) {        // retrieve them all        for ( ;  iter;  ++iter) {            CRef<CLayoutFeat> ref(new CLayoutFeat(*iter));            feats.push_back(ref);        }    } else {        // we do some screening - separate hypothetical and actual        vector<CMappedFeat> hyp_feats;        vector<CMappedFeat> act_feats;        // preallocate a rough approximation of the right amount of space        hyp_feats.reserve(iter.GetSize() / 2);        act_feats.reserve(iter.GetSize() / 2);        string str;        // sift through our features, separating hypothetical from        // real        for ( ;  iter;  ++iter) {            str.erase();            feature::GetLabel(iter->GetOriginalFeature(),                              &str, feature::eContent);            // hypothetical is defined as having a gene name like            // 'LOC123456'            if (str.find("LOC") == 0  &&                str.find_first_not_of("0123456789", 3) == string::npos) {                hyp_feats.push_back(*iter);            } else {                act_feats.push_back(*iter);            }        }        if (act_feats.size() < max_feats) {            // we need to add some hypothetical genes to the pool            std::sort(hyp_feats.begin(), hyp_feats.end(),                      SFeatLengthPredicate());            act_feats.insert(act_feats.end(),                             hyp_feats.end() - (max_feats - act_feats.size()),                             hyp_feats.end());            std::sort(act_feats.begin(), act_feats.end(),                      SFeatSortPredicate());        } else if (act_feats.size() > max_feats) {            // we have too many genes, so eliminate the short ones            std::sort(act_feats.begin(), act_feats.end(),                      SFeatLengthPredicate());            act_feats.erase(act_feats.begin(),                act_feats.begin() + (act_feats.size() - max_feats));            std::sort(act_feats.begin(), act_feats.end(),                      SFeatSortPredicate());        }        //        // now we've got enough genes        //        ITERATE (vector<CMappedFeat>, iter, act_feats) {            CRef<CLayoutFeat> ref(new CLayoutFeat(*iter));            feats.push_back(ref);        }    }}//// GetFeatures()// this retrieves and cross-links a set of features from the document// representing the basic molecular biology pathway.  This will detail the// relationship between genes <-> rnas <-> coding regions <-> proteins, where// known.//void CSeqUtils::GetFeatures(const CBioseq_Handle&   handle,                            const TSeqRange&        range,                            CSeqFeatData::E_Choice  feat_type,                            CLayoutFeat::TFeatList& feats,                            TFeatureFlags           flags){    SAnnotSelector selector = GetAnnotSelector(feat_type);    GetFeatures(handle, range, selector, feats, flags);}void CSeqUtils::GetFeatures(const CBioseq_Handle&   handle,                            const TSeqRange&        range,                            SAnnotSelector          sel,                            CLayoutFeat::TFeatList& feats,                            TFeatureFlags           flags){    feats.clear();    if ( !handle ) {        return;    }    CFeat_CI feature_iter(handle, range.GetFrom(), range.GetTo(), sel);    feats.clear();    feats.reserve(feature_iter.GetSize());    for (;  feature_iter ;  ++feature_iter) {        const CMappedFeat& feat = *feature_iter;        CRef<CLayoutFeat> fref(new CLayoutFeat(feat));        feats.push_back(fref);    }    if (flags & fFeature_LinkFeatures) {        LinkFeatures(feats);    }}//// LinkFeatures()// This builds explicit links between features, creating a hierarchical tree of// features.//void CSeqUtils::LinkFeatures(CLayoutFeat::TFeatList& feats){    CLayoutFeat::TFeatList out_feats;    out_feats.reserve(feats.size());    NON_CONST_ITERATE (CLayoutFeat::TFeatList, iter, feats) {        CLayoutFeat&        feat = **iter;        string label;        feature::GetLabel(feat.GetFeature(), &label, feature::eBoth);        CSeqFeatData::ESubtype parent_type = CSeqFeatData::eSubtype_bad;        switch (feat.GetFeature().GetData().GetSubtype()) {        case CSeqFeatData::eSubtype_cdregion:            if (dynamic_cast<const CLayoutProtProd*> (&feat)) {                // search for preceding CDS for this protein product                parent_type = CSeqFeatData::eSubtype_cdregion;            } else {                // search for preceding mRNA                parent_type = CSeqFeatData::eSubtype_mRNA;            }            break;        case CSeqFeatData::eSubtype_gene:            // don't link            out_feats.push_back(*iter);            continue;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?