flat_head.cpp

来自「ncbi源码」· C++ 代码 · 共 759 行 · 第 1/2 页

CPP
759
字号
/* * =========================================================================== * PRODUCTION $Log: flat_head.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:43:17  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.8 * PRODUCTION * =========================================================================== *//*  $Id: flat_head.cpp,v 1000.2 2004/06/01 19:43:17 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Aaron Ucko, NCBI** File Description:*   New (early 2003) flat-file generator -- representation of "header"*   data, which translates into a format-dependent sequence of paragraphs.** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/flat/flat_head.hpp>#include <corelib/ncbiutil.hpp>#include <serial/iterator.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Object_id.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seqblock/GB_block.hpp>#include <objects/seqblock/PDB_block.hpp>#include <objects/seqblock/PDB_replace.hpp>#include <objects/seqblock/PIR_block.hpp>#include <objects/seqblock/PRF_ExtraSrc.hpp>#include <objects/seqblock/PRF_block.hpp>#include <objects/seqblock/SP_block.hpp>#include <objects/seqfeat/OrgName.hpp>#include <objects/seqloc/PDB_mol_id.hpp>#include <objects/seqloc/PDB_seq_id.hpp>#include <objects/seqloc/Textseq_id.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/scope.hpp>#include <objmgr/seqdesc_ci.hpp>#include <objmgr/util/sequence.hpp>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)inlinevoid CFlatHead::x_AddDate(const CDate& date){    if (m_UpdateDate.Empty()        ||  date.Compare(*m_UpdateDate) == CDate::eCompare_after) {        m_UpdateDate = &date;    }    if (m_CreateDate.Empty()        ||  date.Compare(*m_CreateDate) == CDate::eCompare_before) {        m_CreateDate = &date;    }}CFlatHead::CFlatHead(CFlatContext& ctx)    : m_Strandedness(CSeq_inst::eStrand_not_set),      m_Topology    (CSeq_inst::eTopology_not_set),      m_GBDivision  (0),      m_Context     (&ctx){    CScope&                     scope = ctx.GetHandle().GetScope();    CBioseq_Handle::TBioseqCore seq   = ctx.GetHandle().GetBioseqCore();    ctx.m_PrimaryID = FindBestChoice(seq->GetId(), CSeq_id::Score);    ctx.m_Accession = ctx.GetPrimaryID().GetSeqIdString(true);    {{        const CTextseq_id* tsid = ctx.GetPrimaryID().GetTextseq_Id();        if (tsid  &&  tsid->IsSetName()) {            m_Locus = tsid->GetName();        } else if (tsid  &&  tsid->IsSetAccession()) {            m_Locus = tsid->GetAccession();        } else {            // complain?            m_Locus = ctx.GetPrimaryID().GetSeqIdString(false);        }    }}    ITERATE (CBioseq::TId, it, seq->GetId()) {        if (*it != &ctx.GetPrimaryID()) {            m_OtherIDs.push_back(*it);        }        switch ((*it)->Which()) {        case CSeq_id::e_Gi:            ctx.m_GI = (*it)->GetGi();            break;        case CSeq_id::e_Other:            ctx.m_IsRefSeq = true;            break;        case CSeq_id::e_Tpg: case CSeq_id::e_Tpe: case CSeq_id::e_Tpd:            ctx.m_IsTPA = true;            break;        default:            break;        }        CSeq_id::EAccessionInfo ai = (*it)->IdentifyAccession();        if ((ai & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_wgs            &&  NStr::EndsWith((*it)->GetTextseq_Id()->GetAccession(),                               "000000")) {            ctx.m_IsWGSMaster = true;        } else if (ai == CSeq_id::eAcc_refseq_genome) {            ctx.m_IsRefSeqGenome = true;        }    }    {{        const CSeq_inst& inst = seq->GetInst();        if (inst.IsSetStrand()) {            m_Strandedness = inst.GetStrand();        }        m_Topology     = inst.GetTopology();    }}    ctx.m_Length = sequence::GetLength(ctx.GetLocation(), &scope);    m_Definition = sequence::GetTitle(ctx.GetHandle());    if ( !NStr::EndsWith(m_Definition, ".") ) {        m_Definition += '.';    }    if (ctx.IsProt()) { // populate m_SourceIDs        x_AddDBSource();    }    for (CSeqdesc_CI it(ctx.GetHandle());  it;  ++it) {        switch (it->Which()) {            // bother translating old GIBB-* data?        case CSeqdesc::e_Org:        case CSeqdesc::e_Source:            if ( !m_GBDivision ) {                // iterate to deal with hybrids                for (CTypeConstIterator<COrgName> orgn(*it);  orgn;  ++orgn) {                    if (orgn->IsSetDiv()) {                        m_GBDivision = &orgn->GetDiv();                        BREAK(orgn);                    }                }            }            break;        case CSeqdesc::e_Genbank:        {            const CGB_block& gb = it->GetGenbank();            if (gb.IsSetExtra_accessions()) {                m_SecondaryIDs.insert(m_SecondaryIDs.end(),                                      gb.GetExtra_accessions().begin(),                                      gb.GetExtra_accessions().end());            }            if (gb.IsSetEntry_date()) {                x_AddDate(gb.GetEntry_date());            }            if (gb.IsSetDiv()) {                m_GBDivision = &gb.GetDiv();            }            break;        }        case CSeqdesc::e_Sp:        {            const CSP_block& sp = it->GetSp();            if (sp.IsSetExtra_acc()) {                m_SecondaryIDs.insert(m_SecondaryIDs.end(),                                      sp.GetExtra_acc().begin(),                                      sp.GetExtra_acc().end());            }            if (sp.IsSetCreated()) {                x_AddDate(sp.GetCreated());            }            if (sp.IsSetSequpd()) {                x_AddDate(sp.GetSequpd());            }            if (sp.IsSetAnnotupd()) {                x_AddDate(sp.GetAnnotupd());            }            break;        }        case CSeqdesc::e_Embl:        {            const CEMBL_block& embl = it->GetEmbl();            if (embl.IsSetDiv()) {                m_EMBLDivision = embl.GetDiv();            }            x_AddDate(embl.GetCreation_date()); // mandatory field            x_AddDate(embl.GetUpdate_date()); // mandatory field            if (embl.IsSetExtra_acc()) {                m_SecondaryIDs.insert(m_SecondaryIDs.end(),                                      embl.GetExtra_acc().begin(),                                      embl.GetExtra_acc().end());            }            break;        }        case CSeqdesc::e_Create_date:            x_AddDate(it->GetCreate_date());            break;        case CSeqdesc::e_Update_date:            x_AddDate(it->GetUpdate_date());            break;        case CSeqdesc::e_Pdb:        {            const CPDB_block& pdb = it->GetPdb();            x_AddDate(pdb.GetDeposition()); // mandatory field            // replacement history -> secondary IDs?            break;        }        case CSeqdesc::e_Molinfo:        {            const CMolInfo& mi = it->GetMolinfo();            if (mi.IsSetBiomol()) {                ctx.m_Biomol = mi.GetBiomol();            }        }        default:            break;        }    }}const char* CFlatHead::GetMolString(void) const{    const IFlatFormatter& f = m_Context->GetFormatter();    if (f.GetDatabase() == IFlatFormatter::eDB_EMBL        &&  f.GetMode() <= IFlatFormatter::eMode_Entrez) {        switch (m_Context->GetBiomol()) {        case CMolInfo::eBiomol_genomic:        case CMolInfo::eBiomol_other_genetic:        case CMolInfo::eBiomol_genomic_mRNA:            return "DNA";        case CMolInfo::eBiomol_pre_RNA:        case CMolInfo::eBiomol_mRNA:        case CMolInfo::eBiomol_rRNA:        case CMolInfo::eBiomol_tRNA:        case CMolInfo::eBiomol_snRNA:        case CMolInfo::eBiomol_scRNA:        case CMolInfo::eBiomol_cRNA:        case CMolInfo::eBiomol_snoRNA:        case CMolInfo::eBiomol_transcribed_RNA:            return "RNA";        case CMolInfo::eBiomol_peptide:            return "AA ";        default:            switch (m_Context->GetMol()) {            case CSeq_inst::eMol_dna: return "DNA";            case CSeq_inst::eMol_rna: return "RNA";            case CSeq_inst::eMol_aa:  return "AA ";            default:                  return "xxx";            }        }    } else {        switch (m_Context->GetBiomol()) {        case CMolInfo::eBiomol_genomic:          return "DNA";        case CMolInfo::eBiomol_pre_RNA:          return "RNA";        case CMolInfo::eBiomol_mRNA:             return "mRNA";        case CMolInfo::eBiomol_rRNA:             return "rRNA";        case CMolInfo::eBiomol_tRNA:             return "tRNA";        case CMolInfo::eBiomol_snRNA:            return "uRNA";        case CMolInfo::eBiomol_scRNA:            return "scRNA";        case CMolInfo::eBiomol_peptide:          return " AA";        case CMolInfo::eBiomol_other_genetic:    return "DNA";        case CMolInfo::eBiomol_genomic_mRNA:     return "DNA";        case CMolInfo::eBiomol_cRNA:             return "RNA";        case CMolInfo::eBiomol_snoRNA:           return "snoRNA";        case CMolInfo::eBiomol_transcribed_RNA:  return "RNA";        default:            switch (m_Context->GetMol()) {            case CSeq_inst::eMol_dna: return "DNA";            case CSeq_inst::eMol_rna: return "RNA";            case CSeq_inst::eMol_aa:  return " AA";            default:                  return "   ";            }        }    }}inlinestatic int s_ScoreForDBSource(const CRef<CSeq_id>& x) {    switch (x->Which()) {    case CSeq_id::e_not_set:                        return kMax_Int;    case CSeq_id::e_Gi:                             return 31;    case CSeq_id::e_Giim:                           return 30;    case CSeq_id::e_Local: case CSeq_id::e_General: return 20;    case CSeq_id::e_Other:                          return 18;    case CSeq_id::e_Gibbmt:                         return 16;    case CSeq_id::e_Gibbsq: case CSeq_id::e_Patent: return 15;    case CSeq_id::e_Pdb:                            return 12;    default:                                        return 10;    }}void CFlatHead::x_AddDBSource(void){    CBioseq_Handle::TBioseqCore seq = m_Context->GetHandle().GetBioseqCore();    const CSeq_id* id = FindBestChoice(seq->GetId(), s_ScoreForDBSource);    if ( !id ) {        m_DBSource.push_back("UNKNOWN");        return;    }    switch (id->Which()) {    case CSeq_id::e_Pir:        m_DBSource.push_back(x_FormatDBSourceID(*id));        x_AddPIRBlock();        break;    case CSeq_id::e_Swissprot:        m_DBSource.push_back(x_FormatDBSourceID(*id));        x_AddSPBlock();        break;    case CSeq_id::e_Prf:        m_DBSource.push_back(x_FormatDBSourceID(*id));        x_AddPRFBlock();        break;    case CSeq_id::e_Pdb:        m_DBSource.push_back(x_FormatDBSourceID(*id));        x_AddPDBBlock();        break;    case CSeq_id::e_General:        if ( !NStr::StartsWith(id->GetGeneral().GetDb(), "PID") ) {            m_DBSource.push_back("UNKNOWN");            break;        }        // otherwise, fall through    case CSeq_id::e_Gibbsq: case CSeq_id::e_Gibbmt: case CSeq_id::e_Giim:    case CSeq_id::e_Genbank: case CSeq_id::e_Embl: case CSeq_id::e_Other:    case CSeq_id::e_Gi: case CSeq_id::e_Ddbj:    case CSeq_id::e_Tpg: case CSeq_id::e_Tpe: case CSeq_id::e_Tpd:    {        set<CBioseq_Handle> sources;        CScope&             scope = m_Context->GetHandle().GetScope();        for (CFeat_CI it(scope, m_Context->GetLocation(),                         CSeqFeatData::e_not_set,                         SAnnotSelector::eOverlap_Intervals,                         SAnnotSelector::eResolve_TSE, CFeat_CI::e_Product);             it;  ++it) {            for (CTypeConstIterator<CSeq_id> id2(it->GetLocation());                 id2;  ++id2) {                sources.insert(scope.GetBioseqHandle(*id2));

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?