feature_item.cpp

来自「ncbi源码」· C++ 代码 · 共 2,018 行 · 第 1/5 页

CPP
2,018
字号
/* * =========================================================================== * PRODUCTION $Log: feature_item.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:44:14  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.26 * PRODUCTION * =========================================================================== *//*  $Id: feature_item.cpp,v 1000.2 2004/06/01 19:44:14 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Aaron Ucko, NCBI*          Mati Shomrat** File Description:*   new (early 2003) flat-file generator -- representation of features*   (mainly of interest to implementors)** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <serial/iterator.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Heterogen.hpp>#include <objects/seqfeat/Org_ref.hpp>#include <objects/seqfeat/OrgName.hpp>#include <objects/seqfeat/OrgMod.hpp>#include <objects/seqfeat/Code_break.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objects/seqfeat/Imp_feat.hpp>#include <objects/seqfeat/RNA_ref.hpp>#include <objects/seqfeat/Trna_ext.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_point.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Packed_seqpnt.hpp>#include <objects/seqloc/Textseq_id.hpp>#include <objects/general/Object_id.hpp>#include <objmgr/scope.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/seq_loc_mapper.hpp>#include <objmgr/util/sequence.hpp>#include <objmgr/util/feature.hpp>#include <util/static_set.hpp>#include <util/static_map.hpp>#include <util/sequtil/sequtil.hpp>#include <util/sequtil/sequtil_convert.hpp>#include <algorithm>#include <objtools/format/formatter.hpp>#include <objtools/format/items/feature_item.hpp>#include <objtools/format/context.hpp>#include <objtools/format/items/qualifiers.hpp>#include "utils.hpp"// On Mac OS X 10.3, FixMath.h defines ff as a one-argument macro(!)#ifdef ff#  undef ff#endifBEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)USING_SCOPE(sequence);// -- static functionsconst CGb_qual* s_GetQual(const CSeq_feat& feat, const string& qual){    ITERATE(CSeq_feat::TQual, it, feat.GetQual()) {        if ( (*it)->CanGetQual()  &&             NStr::CompareNocase((*it)->GetQual(), qual) == 0  &&             (*it)->CanGetVal()  &&  !(*it)->GetVal().empty() ) {            return *it;        }    }    return 0;}static bool s_ValidId(const CSeq_id& id){    return id.IsGenbank()  ||  id.IsEmbl()    ||  id.IsDdbj()  ||           id.IsOther()    ||  id.IsPatent()  ||             id.IsTpg()      ||  id.IsTpe()     ||  id.IsTpd();}static bool s_CheckQuals_cdregion(const CSeq_feat& feat, CBioseqContext& ctx){    if ( !ctx.Config().CheckCDSProductId() ) {        return true;    }        CScope& scope = ctx.GetScope();    // non-pseudo CDS must have /product    bool pseudo = feat.CanGetPseudo()  &&  feat.GetPseudo();    if ( !pseudo ) {        const CGene_ref* grp = feat.GetGeneXref();        if ( grp == 0 ) {            CConstRef<CSeq_feat> gene =                 GetOverlappingGene(feat.GetLocation(), scope);            if ( gene ) {                pseudo = gene->CanGetPseudo()  &&  gene->GetPseudo();                if ( !pseudo ) {                    grp = &(gene->GetData().GetGene());                }            }        }        if ( !pseudo  &&  grp != 0 ) {            pseudo = grp->GetPseudo();        }    }    bool just_stop = false;    if ( feat.CanGetLocation() ) {        const CSeq_loc& loc = feat.GetLocation();        if ( loc.IsPartialLeft()  &&  !loc.IsPartialRight() ) {            if ( GetLength(loc, &scope) <= 5 ) {                just_stop = true;            }        }    }    if ( pseudo ||  just_stop ) {        return true;    }     // make sure the product has a valid accession    if ( feat.CanGetProduct() ) {        const CSeq_id* id = 0;        try {            id = &(GetId(feat.GetProduct(), &ctx.GetScope()));        } catch ( CException& ) {}        if ( id != 0 ) {            if ( (id->IsGi()  &&  id->GetGi() > 0) ||  id->IsLocal() ) {                CBioseq_Handle prod = scope.GetBioseqHandle(*id);                if ( prod ) {                    ITERATE (CBioseq_Handle::TId, it, prod.GetId()) {                        if ( s_ValidId(*it->GetSeqId()) ) {                            const CTextseq_id* tsip = it->GetSeqId()->GetTextseq_Id();                            if ( tsip != 0  &&  tsip->IsSetAccession()  &&                                 ValidateAccession(tsip->GetAccession()) ) {                                return true;                            }                        }                    }                } else if ( id->IsGi()  &&  id->GetGi() > 0 ) {                    // RELEASE_MODE requires that /protein_id is an accession                    // !!! need to be able to retrieve accession from server withou                    // retrieving the entire bioseq                    return true;                }            } else if ( s_ValidId(*id) ) {                const CTextseq_id* tsip = id->GetTextseq_Id();                if ( tsip != 0  &&  tsip->CanGetAccession()  &&                     ValidateAccession(tsip->GetAccession()) ) {                    return true;                }            }        }    } else {  // no product        if ( feat.CanGetExcept()  &&  feat.GetExcept()  &&             feat.CanGetExcept_text() ) {            if ( NStr::Find(feat.GetExcept_text(),                    "rearrangement required for product") != NPOS ) {                return true;            }        }    }    return false;}// conflict and old_sequence require a publication printable on the segmentstatic bool s_HasPub(const CSeq_feat& feat, CBioseqContext& ctx){    if ( !feat.CanGetCit() ) {        return false;    }    ITERATE(CBioseqContext::TReferences, it, ctx.GetReferences()) {        if ( (*it)->Matches(feat.GetCit()) ) {            return true;        }    }    return false;}static bool s_CheckQuals_conflict(const CSeq_feat& feat, CBioseqContext& ctx){    if ( !feat.CanGetCit() ) {        // RefSeq allows conflict with accession in comment instead of sfp->cit        if ( ctx.IsRefSeq()  &&             feat.CanGetComment()  &&  !feat.GetComment().empty() ) {            return true;        }    } else {        return s_HasPub(feat, ctx);    }    return false;}static bool s_CheckQuals_old_seq(const CSeq_feat& feat, CBioseqContext& ctx){        return s_HasPub(feat, ctx);}static bool s_CheckQuals_gene(const CSeq_feat& feat){    // gene requires /gene or /locus_tag, but desc or syn can be mapped to /gene    const CSeqFeatData::TGene& gene = feat.GetData().GetGene();    if ( (gene.CanGetLocus()      &&  !gene.GetLocus().empty())      ||         (gene.CanGetLocus_tag()  &&  !gene.GetLocus_tag().empty())  ||         (gene.CanGetDesc()       &&  !gene.GetDesc().empty())       ||         (!gene.GetSyn().empty()  &&  !gene.GetSyn().front().empty()) ) {        return true;    }    return false;}static bool s_CheckQuals_bind(const CSeq_feat& feat){    // protein_bind or misc_binding require eFQ_bound_moiety    return s_GetQual(feat, "bound_moiety") != 0;}static bool s_CheckQuals_mod_base(const CSeq_feat& feat){    // modified_base requires eFQ_mod_base    return s_GetQual(feat, "mod_base") != 0;}static bool s_CheckMandatoryQuals(const CSeq_feat& feat, CBioseqContext& ctx){    switch ( feat.GetData().GetSubtype() ) {    case CSeqFeatData::eSubtype_cdregion:        {            return s_CheckQuals_cdregion(feat, ctx);        }    case CSeqFeatData::eSubtype_conflict:        {            return s_CheckQuals_conflict(feat, ctx);        }    case CSeqFeatData::eSubtype_old_sequence:        {            return s_CheckQuals_old_seq(feat, ctx);        }    case CSeqFeatData::eSubtype_gene:        {            return s_CheckQuals_gene(feat);        }    case CSeqFeatData::eSubtype_protein_bind:    case CSeqFeatData::eSubtype_misc_binding:        {            return s_CheckQuals_bind(feat);        }    case CSeqFeatData::eSubtype_modified_base:        {            return s_CheckQuals_mod_base(feat);        }    default:        break;    }    return true;}static bool s_SkipFeature(const CSeq_feat& feat, CBioseqContext& ctx){    CSeqFeatData::E_Choice type    = feat.GetData().Which();    CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype();               if ( subtype == CSeqFeatData::eSubtype_pub              ||         subtype == CSeqFeatData::eSubtype_non_std_residue  ||         subtype == CSeqFeatData::eSubtype_biosrc           ||         subtype == CSeqFeatData::eSubtype_rsite            ||         subtype == CSeqFeatData::eSubtype_seq ) {        return true;    }        const CFlatFileConfig& cfg = ctx.Config();    // check feature customization flags    if ( cfg.ValidateFeats()  &&        (subtype == CSeqFeatData::eSubtype_bad  ||         subtype == CSeqFeatData::eSubtype_virion) ) {        return true;    }        if ( ctx.IsNuc()  &&  subtype == CSeqFeatData::eSubtype_het ) {        return true;    }        if ( cfg.HideImpFeats()  &&  type == CSeqFeatData::e_Imp ) {        return true;    }        if ( cfg.HideSNPFeatures()  &&  subtype == CSeqFeatData::eSubtype_variation ) {        return true;    }    if ( cfg.HideExonFeatures()  &&  subtype == CSeqFeatData::eSubtype_exon ) {        return true;    }    if ( cfg.HideIntronFeatures()  &&  subtype == CSeqFeatData::eSubtype_intron ) {        return true;    }    if ( cfg.HideRemoteImpFeats()  &&  type == CSeqFeatData::e_Imp ) {        if ( subtype == CSeqFeatData::eSubtype_variation  ||             subtype == CSeqFeatData::eSubtype_exon       ||             subtype == CSeqFeatData::eSubtype_intron     ||             subtype == CSeqFeatData::eSubtype_misc_feature ) {            return true;        }    }    // skip genes in DDBJ format    if ( cfg.IsFormatDDBJ()  &&  type == CSeqFeatData::e_Gene ) {        return true;    }    // To Do:    // !!! supress full length comment features    // if RELEASE mode, make sure we have all info to create mandatory quals.    if ( cfg.NeedRequiredQuals() ) {        return !s_CheckMandatoryQuals(feat, ctx);    }    return false;}static const string s_BondList[] = {    "",    "disulfide",    "thiolester",    "xlink",    "thioether",    "unclassified"};static const string s_SiteList[] = {    "",    "active",    "binding",    "cleavage",    "inhibit",    "modified",    "glycosylation",    "myristoylation",    "mutagenized",    "metal-binding",    "phosphorylation",    "acetylation",    "amidation",    "methylation",    "hydroxylation",    "sulfatation",

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?