gff_formatter.cpp
来自「ncbi源码」· C++ 代码 · 共 628 行 · 第 1/2 页
CPP
628 行
/* * =========================================================================== * PRODUCTION $Log: gff_formatter.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:44:46 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7 * PRODUCTION * =========================================================================== *//* $Id: gff_formatter.cpp,v 1000.1 2004/06/01 19:44:46 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, NCBI* Mati Shomrat** File Description:* **/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objects/general/Date.hpp>#include <objects/seq/Bioseq.hpp>#include <objmgr/util/sequence.hpp>#include <objmgr/util/feature.hpp>#include <objmgr/seq_vector.hpp>#include <objtools/format/gff_formatter.hpp>#include <objtools/format/items/locus_item.hpp>#include <objtools/format/items/date_item.hpp>#include <objtools/format/items/feature_item.hpp>#include <objtools/format/items/basecount_item.hpp>#include <objtools/format/items/sequence_item.hpp>#include <objtools/format/items/ctrl_items.hpp>#include <objtools/format/context.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)CGFFFormatter::CGFFFormatter(void) : m_GFFFlags(CGFFFormatter::fGTFCompat){}void CGFFFormatter::Start(IFlatTextOStream& text_os){ list<string> l; l.push_back("##gff-version 2"); l.push_back("##source-version NCBI C++ formatter 0.1"); l.push_back("##date " + CurrentTime().AsString("Y-M-D")); text_os.AddParagraph(l);}void CGFFFormatter::StartSection(const CStartSectionItem& ssec, IFlatTextOStream& text_os){ list<string> l; CBioseqContext& bctx = *ssec.GetContext(); switch (bctx.GetMol()) { case CSeq_inst::eMol_dna: m_SeqType = "DNA"; break; case CSeq_inst::eMol_rna: m_SeqType = "RNA"; break; case CSeq_inst::eMol_aa: m_SeqType = "Protein"; break; default: m_SeqType.erase(); break; } if ( !m_SeqType.empty() ) { l.push_back("##Type " + m_SeqType + ' ' + bctx.GetAccession()); } text_os.AddParagraph(l);}void CGFFFormatter::EndSection(const CEndSectionItem&, IFlatTextOStream& text_os){ if ( !m_EndSequence.empty() ) { list<string> l; l.push_back(m_EndSequence); text_os.AddParagraph(l); }}void CGFFFormatter::FormatLocus(const CLocusItem& locus, IFlatTextOStream& text_os){ m_Strandedness = locus.GetStrand();}void CGFFFormatter::FormatDate(const CDateItem& date, IFlatTextOStream& text_os){ m_Date.erase(); const CDate* d = date.GetUpdateDate(); if ( d != 0 ) { d->GetDate(&m_Date, "%4Y-%{%2M%|??%}-%{%2D%|??%}"); }}/////////////////////////////////////////////////////////////////////////////// FEATURESvoid CGFFFormatter::FormatFeature(const CFeatureItemBase& f, IFlatTextOStream& text_os){ const CSeq_feat& seqfeat = f.GetFeat(); string key(f.GetKey()), oldkey; bool gtf = false; CBioseqContext& ctx = *f.GetContext(); CScope* scope = &ctx.GetScope(); // CSeq_loc tentative_stop; if ((m_GFFFlags & fGTFCompat) && !ctx.IsProt() && (key == "CDS" || key == "exon")) { gtf = true; } else if ((m_GFFFlags & fGTFCompat) && ctx.GetMol() == CSeq_inst::eMol_dna && seqfeat.GetData().IsRna()) { oldkey = key; key = "exon"; gtf = true; } else if ((m_GFFFlags & fGTFOnly) == fGTFOnly) { return; } CConstRef<CFlatFeature> feat = f.Format(); list<string> l; list<string> attr_list; if ( !oldkey.empty() ) { attr_list.push_back("gbkey \"" + oldkey + "\";"); } ITERATE (CFlatFeature::TQuals, it, feat->GetQuals()) { string name = (*it)->GetName(); if (name == "codon_start" || name == "translation" || name == "transcription") { continue; // suppressed to reduce verbosity } else if (name == "number" && key == "exon") { name = "exon_number"; } else if ((m_GFFFlags & fGTFCompat) && !ctx.IsProt() && name == "gene") { string gene_id = x_GetGeneID(*feat, (*it)->GetValue(), ctx); if (key != "gene") { string transcript_id = x_GetTranscriptID(*feat, gene_id, ctx); attr_list.push_front ("transcript_id \"" + transcript_id + "\";"); } attr_list.push_front("gene_id \"" + gene_id + "\";"); continue; } string value; NStr::Replace((*it)->GetValue(), " \b", kEmptyStr, value); string value2(NStr::PrintableString(value)); // some parsers may be dumb, so quote further value.erase(); ITERATE (string, c, value2) { switch (*c) { case ' ': value += "\\x20"; break; case '\"': value += "x22"; break; // already backslashed case '#': value += "\\x23"; break; default: value += *c; } } attr_list.push_back(name + " \"" + value + "\";"); } string attrs(NStr::Join(attr_list, " ")); string source = x_GetSourceName(ctx); int frame = -1; if (seqfeat.GetData().IsCdregion() && !ctx.IsProt() ) { const CCdregion& cds = seqfeat.GetData().GetCdregion(); frame = max(cds.GetFrame() - 1, 0); } CRef<CSeq_loc> tentative_stop; if (gtf && seqfeat.GetData().IsCdregion()) { const CCdregion& cds = seqfeat.GetData().GetCdregion(); if ( !f.GetLoc().IsPartialRight() && seqfeat.IsSetProduct() ) { TSeqPos loc_len = sequence::GetLength(f.GetLoc(), scope); TSeqPos prod_len = sequence::GetLength(seqfeat.GetProduct(), scope); if (loc_len >= frame + 3 * prod_len + 3) { SRelLoc::TRange range; range.SetFrom(frame + 3 * prod_len); range.SetTo (frame + 3 * prod_len + 2); // needs to be partial for TranslateCdregion to DTRT range.SetFuzz_from().SetLim(CInt_fuzz::eLim_lt); SRelLoc::TRanges ranges; ranges.push_back(CRef<SRelLoc::TRange>(&range)); tentative_stop = SRelLoc(f.GetLoc(), ranges).Resolve(scope); } if (tentative_stop.NotEmpty() && !tentative_stop->IsNull()) { string s; CCdregion_translate::TranslateCdregion (s, ctx.GetHandle(), *tentative_stop, cds); if (s != "*") { tentative_stop.Reset(); } } else { tentative_stop.Reset(); } } } x_AddFeature(l, f.GetLoc(), source, key, "." /*score*/, frame, attrs, gtf, ctx, tentative_stop); if (gtf && seqfeat.GetData().IsCdregion()) { const CCdregion& cds = seqfeat.GetData().GetCdregion(); if ( !f.GetLoc().IsPartialLeft() ) { CRef<CSeq_loc> tentative_start; {{ CRef<SRelLoc::TRange> range(new SRelLoc::TRange); SRelLoc::TRanges ranges; range->SetFrom(frame); range->SetTo(frame + 2); ranges.push_back(range); tentative_start = SRelLoc(f.GetLoc(), ranges).Resolve(scope); }} string s; ctx.GetHandle().GetSequenceView (*tentative_start, CBioseq_Handle::eViewConstructed) .GetSeqData(0, 3, s); const CTrans_table* tt; if (cds.IsSetCode()) { tt = &CGen_code_table::GetTransTable(cds.GetCode()); } else { tt = &CGen_code_table::GetTransTable(1); } if (s.size() == 3 && tt->IsAnyStart(tt->SetCodonState(s[0], s[1], s[2]))) { x_AddFeature(l, *tentative_start, source, "start_codon", "." /* score */, 0, attrs, gtf, ctx); } } if ( tentative_stop ) { x_AddFeature(l, *tentative_stop, source, "stop_codon", "." /* score */, 0, attrs, gtf, ctx); } } text_os.AddParagraph(l, &seqfeat);}/////////////////////////////////////////////////////////////////////////////// BASE COUNT// used as a trigger for the sequence headervoid CGFFFormatter::FormatBasecount(const CBaseCountItem& bc, IFlatTextOStream& text_os){ if ( !(m_GFFFlags & fShowSeq) ) return; CBioseqContext& ctx = *bc.GetContext(); list<string> l; l.push_back("##" + m_SeqType + ' ' + ctx.GetAccession()); text_os.AddParagraph(l); m_EndSequence = "##end-" + m_SeqType;}/////////////////////////////////////////////////////////////////////////////// SEQUENCEvoid CGFFFormatter::FormatSequence(const CSequenceItem& seq, IFlatTextOStream& text_os){ if ( !(m_GFFFlags & fShowSeq) )
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?