flat_gff_formatter.cpp

来自「ncbi源码」· C++ 代码 · 共 436 行 · 第 1/2 页

CPP
436
字号
/* * =========================================================================== * PRODUCTION $Log: flat_gff_formatter.cpp,v $ * PRODUCTION Revision 1000.3  2004/06/01 19:43:14  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== *//*  $Id: flat_gff_formatter.cpp,v 1000.3 2004/06/01 19:43:14 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Aaron Ucko, Wratko Hlavina** File Description:*   Flat formatter for Generic Feature Format (incl. Gene Transfer Format)** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/flat/flat_gff_formatter.hpp>#include <objtools/flat/flat_head.hpp>#include <objtools/flat/flat_items.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Int_fuzz.hpp>#include <objects/seqfeat/Cdregion.hpp>#include <objects/seqfeat/Gb_qual.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/util/sequence.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)CFlatGFFFormatter::CFlatGFFFormatter(IFlatTextOStream& stream, CScope& scope,                                     EMode mode, TGFFFlags gff_flags,                                     EStyle style, TFlags flags)    : IFlatFormatter(scope, mode, style, flags),      m_GFFFlags(gff_flags), m_Stream(&stream){    list<string> header;    header.push_back("##gff-version 2");    header.push_back("##source-version NCBI C++ formatter 0.1");    header.push_back("##date " + CurrentTime().AsString("Y-M-D"));    stream.AddParagraph(header);}void CFlatGFFFormatter::FormatHead(const CFlatHead& head){    m_Stream->NewSequence();    list<string> l;    switch (m_Context->GetMol()) {    case CSeq_inst::eMol_dna:  m_SeqType = "DNA";      break;    case CSeq_inst::eMol_rna:  m_SeqType = "RNA";      break;    case CSeq_inst::eMol_aa:   m_SeqType = "Protein";  break;    default:                   m_SeqType.erase();      break;    }    if ( !m_SeqType.empty() ) {        l.push_back("##Type " + m_SeqType + ' '                    + m_Context->GetAccession());    }    m_Date.erase();    head.GetUpdateDate().GetDate(&m_Date, "%4Y-%{%2M%|??%}-%{%2D%|??%}");    m_Strandedness = head.GetStrandedness();    m_EndSequence.erase();    m_Stream->AddParagraph(l, &head);}void CFlatGFFFormatter::FormatFeature(const IFlattishFeature& f){    const CSeq_feat& seqfeat = f.GetFeat();    string           key(f.GetKey()), oldkey;    bool             gtf     = false;    // CSeq_loc         tentative_stop;    if ((m_GFFFlags & fGTFCompat)  &&  !m_Context->IsProt()        &&  (key == "CDS"  ||  key == "exon")) {        gtf = true;    } else if ((m_GFFFlags & fGTFCompat)               &&  m_Context->GetMol() == CSeq_inst::eMol_dna               &&  seqfeat.GetData().IsRna()) {        oldkey = key;        key    = "exon";        gtf    = true;    } else if ((m_GFFFlags & fGTFOnly) == fGTFOnly) {        return;    }    CFlatFeature& feat = *f.Format();    list<string>  l;    list<string>  attr_list;    if ( !oldkey.empty() ) {        attr_list.push_back("gbkey \"" + oldkey + "\";");    }    ITERATE (CFlatFeature::TQuals, it, feat.GetQuals()) {        string name = (*it)->GetName();        if (name == "codon_start"  ||  name == "translation"            ||  name == "transcription") {            continue; // suppressed to reduce verbosity        } else if (name == "number"  &&  key == "exon") {            name = "exon_number";        } else if ((m_GFFFlags & fGTFCompat)  &&  !m_Context->IsProt()                   &&  name == "gene") {            string gene_id = x_GetGeneID(feat, (*it)->GetValue());            attr_list.push_front                ("transcript_id \"" + gene_id + '.' + m_Date + "\";");            attr_list.push_front("gene_id \"" + gene_id + "\";");            continue;        }        string value;        NStr::Replace((*it)->GetValue(), " \b", kEmptyStr, value);        string value2(NStr::PrintableString(value));        // some parsers may be dumb, so quote further        value.erase();        ITERATE (string, c, value2) {            switch (*c) {            case ' ':  value += "\\x20"; break;            case '\"': value += "x22";   break; // already backslashed            case '#':  value += "\\x23"; break;            default:   value += *c;            }        }        attr_list.push_back(name + " \"" + value + "\";");    }    string attrs(NStr::Join(attr_list, " "));    string source = x_GetSourceName(f);    int frame = -1;    if (seqfeat.GetData().IsCdregion()  &&  !m_Context->IsProt() ) {        const CCdregion& cds = seqfeat.GetData().GetCdregion();        frame = max(cds.GetFrame() - 1, 0);    }    x_AddFeature(l, f.GetLoc(), source, key, "." /*score*/, frame, attrs, gtf);    if (gtf  &&  seqfeat.GetData().IsCdregion()) {        const CCdregion& cds = seqfeat.GetData().GetCdregion();        if ( !f.GetLoc().IsPartialLeft() ) {            CRef<CSeq_loc> tentative_start;            {{                CRef<SRelLoc::TRange> range(new SRelLoc::TRange);                SRelLoc::TRanges      ranges;                range->SetFrom(frame);                range->SetTo(frame + 2);                ranges.push_back(range);                tentative_start = SRelLoc(f.GetLoc(), ranges).Resolve(m_Scope);            }}            string s;            m_Context->GetHandle().GetSequenceView                (*tentative_start, CBioseq_Handle::eViewConstructed)                .GetSeqData(0, 3, s);            const CTrans_table* tt;            if (cds.IsSetCode()) {                tt = &CGen_code_table::GetTransTable(cds.GetCode());            } else {                tt = &CGen_code_table::GetTransTable(1);            }            if (s.size() == 3                &&  tt->IsAnyStart(tt->SetCodonState(s[0], s[1], s[2]))) {                x_AddFeature(l, *tentative_start, source, "start_codon",                             "." /* score */, 0, attrs, gtf);            }        }        if ( !f.GetLoc().IsPartialRight()  &&  seqfeat.IsSetProduct() ) {            TSeqPos loc_len = sequence::GetLength(f.GetLoc(), m_Scope);            TSeqPos prod_len = sequence::GetLength(seqfeat.GetProduct(),                                                   m_Scope);            CRef<CSeq_loc> tentative_stop;            if (loc_len >= frame + 3 * prod_len + 3) {                SRelLoc::TRange range;                range.SetFrom(frame + 3 * prod_len);                range.SetTo  (frame + 3 * prod_len + 2);                // needs to be partial for TranslateCdregion to DTRT                range.SetFuzz_from().SetLim(CInt_fuzz::eLim_lt);                SRelLoc::TRanges ranges;                ranges.push_back(CRef<SRelLoc::TRange>(&range));                tentative_stop = SRelLoc(f.GetLoc(), ranges).Resolve(m_Scope);            }            if (tentative_stop.NotEmpty()  &&  !tentative_stop->IsNull()) {                string s;                CCdregion_translate::TranslateCdregion

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?