flat_feature.cpp

来自「ncbi源码」· C++ 代码 · 共 822 行 · 第 1/2 页

CPP
822
字号
/* * =========================================================================== * PRODUCTION $Log: flat_feature.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 19:43:05  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.11 * PRODUCTION * =========================================================================== *//*  $Id: flat_feature.cpp,v 1000.1 2004/06/01 19:43:05 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Aaron Ucko, NCBI** File Description:*   new (early 2003) flat-file generator -- representation of features*   (mainly of interest to implementors)** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/flat/flat_quals.hpp>#include <objtools/flat/flat_gbseq_formatter.hpp>#include <serial/iterator.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seqfeat/seqfeat__.hpp>#include <objmgr/scope.hpp>#include <objmgr/util/sequence.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)CRef<CFlatFeature> IFlattishFeature::Format(void) const{    // extremely rough cut for now -- qualifiers still in progress!    if (m_FF) {        return m_FF;    }    m_FF.Reset(new CFlatFeature(GetKey(),                                *new CFlatLoc(*m_Loc, *m_Context), *m_Feat));    x_AddQuals();    x_FormatQuals();    return m_FF;}void CFlattishFeature::x_AddQuals(void) const{    CScope&             scope = m_Context->GetHandle().GetScope();    const CSeqFeatData& data  = m_Feat->GetData();    m_Type = data.GetSubtype();    // add various generic qualifiers...    if (m_Feat->IsSetComment()) {        x_AddQual(eFQ_seqfeat_note, new CFlatStringQV(m_Feat->GetComment()));    }    if (m_Feat->IsSetProduct()) {        if (m_IsProduct) {            x_AddQual(eFQ_coded_by, new CFlatSeqLocQV(m_Feat->GetLocation()));        } else {            CBioseq_Handle prod = scope.GetBioseqHandle(m_Feat->GetProduct());            if ( !m_Context->IsProt() ) {                EFeatureQualifier slot                    = ((prod.GetBioseqCore()->GetInst().GetMol()                        == CSeq_inst::eMol_aa)                       ? eFQ_translation : eFQ_transcription);                x_AddQual(slot, new CFlatSeqDataQV(m_Feat->GetProduct()));            }            try {                const CSeq_id& id = sequence::GetId(m_Feat->GetProduct(),                                                    &scope);                if (id.IsGi()) {                    // cheat slightly                    x_AddQual(eFQ_db_xref,                              new CFlatStringQV                              ("GI:" + NStr::IntToString(id.GetGi())));                }                if (data.IsCdregion()) {                    x_AddQual(eFQ_protein_id, new CFlatSeqIdQV(id));                }            } catch (sequence::CNotUnique&) {            }        }    }    if ( !data.IsGene() ) {        CConstRef<CSeq_feat> gene_feat            = sequence::GetBestOverlappingFeat(m_Feat->GetLocation(),                                               CSeqFeatData::e_Gene,                                               sequence::eOverlap_Simple,                                               scope);        if (gene_feat) {            const CGene_ref& gene = gene_feat->GetData().GetGene();            string label;            gene.GetLabel(&label);            if ( !label.empty() ) {                // XXX - should expand certain SGML entities                x_AddQual(eFQ_gene, new CFlatStringQV(label));            }            if (gene.IsSetDb()  &&  !data.IsCdregion()  &&  !data.IsRna() ) {                x_AddQual(eFQ_gene_xref, new CFlatXrefQV(gene.GetDb()));            }            if (gene.IsSetLocus_tag()  &&  gene.GetLocus_tag() != label) {                x_AddQual(eFQ_locus_tag,                          new CFlatStringQV(gene.GetLocus_tag()));            }        }    }    if (m_Feat->IsSetQual()) {        x_ImportQuals(m_Feat->GetQual());    }    if (m_Feat->IsSetTitle()) {        x_AddQual(eFQ_label, new CFlatLabelQV(m_Feat->GetTitle()));    }    if (m_Feat->IsSetCit()) {        x_AddQual(eFQ_citation, new CFlatPubSetQV(m_Feat->GetCit()));    }    if (m_Feat->IsSetExp_ev()) {        x_AddQual(eFQ_evidence, new CFlatExpEvQV(m_Feat->GetExp_ev()));    }    if (m_Feat->IsSetDbxref()) {        x_AddQual(eFQ_db_xref, new CFlatXrefQV(m_Feat->GetDbxref()));    }    if (m_Feat->IsSetPseudo()) {        x_AddQual(eFQ_pseudo, new CFlatBoolQV(m_Feat->GetPseudo()));    }    if (m_Feat->IsSetExcept_text()) {        x_AddQual(eFQ_exception, new CFlatStringQV(m_Feat->GetExcept_text()));    }    switch (data.Which()) {    case CSeqFeatData::e_Gene:      x_AddQuals(data.GetGene());      break;    case CSeqFeatData::e_Cdregion:  x_AddQuals(data.GetCdregion());  break;    case CSeqFeatData::e_Prot:      x_AddQuals(data.GetProt());      break;        // ...    default: break;    }}void CFlattishFeature::x_AddQuals(const CGene_ref& gene) const{    bool got_name = false;    if (gene.IsSetLocus()  &&  !gene.GetLocus().empty() ) {        x_AddQual(eFQ_gene, new CFlatStringQV(gene.GetLocus()));        got_name = true;    }    if (gene.IsSetDesc()  &&   !gene.GetDesc().empty() ) {        x_AddQual(got_name ? eFQ_gene_desc : eFQ_gene,                  new CFlatStringQV(gene.GetDesc()));        got_name = true;    }    if (gene.IsSetSyn()  &&  !gene.GetSyn().empty() ) {        ITERATE (CGene_ref::TSyn, it, gene.GetSyn()) {            if ( !it->empty() ) {                x_AddQual(got_name ? eFQ_gene_syn : eFQ_gene,                          new CFlatStringQV(*it));                got_name = true;            }        }    }    if (gene.IsSetAllele()  &&  !gene.GetAllele().empty() ) {        x_AddQual(eFQ_gene_allele, new CFlatStringQV(gene.GetAllele()));    }    if (gene.IsSetMaploc()  &&  !gene.GetMaploc().empty() ) {        x_AddQual(eFQ_gene_map, new CFlatStringQV(gene.GetMaploc()));    }    if (gene.IsSetDb()) {        x_AddQual(eFQ_gene_xref, new CFlatXrefQV(gene.GetDb()));    }    if (gene.IsSetLocus_tag()) {        x_AddQual(eFQ_locus_tag, new CFlatStringQV(gene.GetLocus_tag()));    }}void CFlattishFeature::x_AddQuals(const CCdregion& cds) const{    if (m_IsProduct) {        return; // We don't need directions when we have the sequence!    } else if ( !m_Feat->IsSetProduct() ) {        // warn?        return;    }    CScope& scope = m_Context->GetHandle().GetScope();    CConstRef<CSeq_feat> prod        = sequence::GetBestOverlappingFeat(m_Feat->GetProduct(),                                           CSeqFeatData::e_Prot,                                           sequence::eOverlap_Contains,                                           scope);    if (prod) {        string label;        prod->GetData().GetProt().GetLabel(&label);        x_AddQual(eFQ_cds_product, new CFlatStringQV(label));    }    if (cds.IsSetFrame()) {        x_AddQual(eFQ_codon_start, new CFlatIntQV(cds.GetFrame()));    }    if (cds.IsSetCode()) {        int id = cds.GetCode().GetId();        if (id == 255) { // none found, so substitute default            id = 1;        }        if (id != 1  ||            dynamic_cast<CFlatGBSeqFormatter*>(&m_Context->GetFormatter())) {            x_AddQual(eFQ_transl_table, new CFlatIntQV(id));        }        const string& ncbieaa = cds.GetCode().GetNcbieaa();        if ( !ncbieaa.empty() ) {            const string& std_ncbieaa = CGen_code_table::GetNcbieaa(id);            if ( !std_ncbieaa.empty() ) {                for (unsigned int i = 0;  i < ncbieaa.size();  ++i) {                    if (ncbieaa[i] != std_ncbieaa[i]) {                        x_AddQual(eFQ_codon, new CFlatCodonQV(i, ncbieaa[i]));                    }                }            }        }    }    if (cds.IsSetCode_break()) {        x_AddQual(eFQ_transl_except,                  new CFlatCodeBreakQV(cds.GetCode_break()));    }}void CFlattishFeature::x_AddQuals(const CProt_ref& prot) const{    bool got_name = false;    if (prot.IsSetName()  &&  !prot.GetName().empty() ) {        ITERATE (CProt_ref::TName, it, prot.GetName()) {            if ( !it->empty() ) {                x_AddQual(got_name ? eFQ_prot_names : eFQ_product,                          new CFlatStringQV(*it));                got_name = true;            }        }    }#if 0    if (prot.IsSetDesc()  &&   !prot.GetDesc().empty() ) {        x_AddQual(got_name ? eFQ_prot_desc : eFQ_prot,                  new CFlatStringQV(prot.GetDesc()));        got_name = true;    }    if (prot.IsSetSyn()  &&  !prot.GetSyn().empty() ) {        typedef CProt_ref::TSyn::const_iterator TProtSyn_CI;        TProtSyn_CI first = prot.GetSyn().begin(), last = prot.GetSyn().end();        if ( !got_name  &&  first != last) {            x_AddQual(eFQ_prot, new CFlatStringQV(*first));            got_name = true;            ++first;        }        while (first != last) {            x_AddQual(eFQ_prot_syn, new CFlatStringQV(*first));            ++first;        }    }    if (prot.IsSetAllele()  &&  !prot.GetAllele().empty() ) {        x_AddQual(eFQ_prot_allele, new CFlatStringQV(prot.GetAllele()));    }    if (prot.IsSetMaploc()  &&  !prot.GetMaploc().empty() ) {        x_AddQual(eFQ_prot_map, new CFlatStringQV(prot.GetMaploc()));    }    if (prot.IsSetDb()) {        x_AddQual(eFQ_prot_xref, new CFlatXrefQV(prot.GetDb()));    }    if (prot.IsSetLocus_tag()) {        x_AddQual(eFQ_locus_tag, new CFlatStringQV(prot.GetLocus_tag()));    }#endif}struct SLegalImport {    const char*       m_Name;    EFeatureQualifier m_Value;    operator string(void) const { return m_Name; }};void CFlattishFeature::x_ImportQuals(const CSeq_feat::TQual& quals) const{    static const SLegalImport kLegalImports[] = {        // Must be in case-insensitive alphabetical order!#define DO_IMPORT(x) { #x, eFQ_##x }        DO_IMPORT(allele),        DO_IMPORT(bound_moiety),        DO_IMPORT(clone),        DO_IMPORT(codon),        DO_IMPORT(cons_splice),        DO_IMPORT(direction),        DO_IMPORT(EC_number),        DO_IMPORT(frequency),        DO_IMPORT(function),        DO_IMPORT(insertion_seq),        DO_IMPORT(label),        DO_IMPORT(map),        DO_IMPORT(mod_base),        DO_IMPORT(number),        DO_IMPORT(organism),        DO_IMPORT(PCR_conditions),        DO_IMPORT(phenotype),        { "product", eFQ_product_quals },        DO_IMPORT(replace),        DO_IMPORT(rpt_family),        DO_IMPORT(rpt_type),        DO_IMPORT(rpt_unit),        DO_IMPORT(standard_name),        DO_IMPORT(transposon),        DO_IMPORT(usedin)#undef DO_IMPORT    };    static const SLegalImport* kLegalImportsEnd        = kLegalImports + sizeof(kLegalImports)/sizeof(SLegalImport);    ITERATE (CSeq_feat::TQual, it, quals) {        const string&       name = (*it)->GetQual();        const SLegalImport* li   = lower_bound(kLegalImports, kLegalImportsEnd,                                               name, PNocase());        EFeatureQualifier   slot = eFQ_illegal_qual;        if (li != kLegalImportsEnd && !NStr::CompareNocase(li->m_Name,name)) {            slot = li->m_Value;        }        switch (slot) {        case eFQ_codon:        case eFQ_cons_splice:        case eFQ_direction:        case eFQ_mod_base:        case eFQ_number:        case eFQ_rpt_type:        case eFQ_rpt_unit:        case eFQ_usedin:            // XXX -- each of these should really get its own class            // (to verify correct syntax)            x_AddQual(slot, new CFlatStringQV((*it)->GetVal(),                                              CFlatQual::eUnquoted));            break;        case eFQ_label:            x_AddQual(slot, new CFlatLabelQV((*it)->GetVal()));            break;        case eFQ_illegal_qual:            x_AddQual(slot, new CFlatIllegalQV(**it));            break;        default:            // XXX - should split off EC_number and replace            // (to verify correct syntax)            x_AddQual(slot, new CFlatStringQV((*it)->GetVal()));            break;        }    }}void CFlattishFeature::x_FormatQuals(void) const{    m_FF->SetQuals().reserve(m_Quals.size());#define DO_QUAL(x) x_FormatQual(eFQ_##x, #x)    DO_QUAL(partial);    DO_QUAL(gene);    DO_QUAL(locus_tag);    DO_QUAL(product);    x_FormatQual(eFQ_prot_EC_number, "EC_number");    x_FormatQual(eFQ_prot_activity,  "function");    DO_QUAL(standard_name);    DO_QUAL(coded_by);    DO_QUAL(derived_from);    x_FormatQual(eFQ_prot_name, "name");    DO_QUAL(region_name);    DO_QUAL(bond_type);    DO_QUAL(site_type);    DO_QUAL(sec_str_type);    DO_QUAL(heterogen);#define DO_NOTE(x) x_FormatNoteQual(eFQ_##x, #x)    DO_NOTE(gene_desc);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?