flat_gff_formatter.cpp
来自「ncbi源码」· C++ 代码 · 共 436 行 · 第 1/2 页
CPP
436 行
/* * =========================================================================== * PRODUCTION $Log: flat_gff_formatter.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 19:43:14 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== *//* $Id: flat_gff_formatter.cpp,v 1000.3 2004/06/01 19:43:14 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, Wratko Hlavina** File Description:* Flat formatter for Generic Feature Format (incl. Gene Transfer Format)** ===========================================================================*/#include <ncbi_pch.hpp>#include <objtools/flat/flat_gff_formatter.hpp>#include <objtools/flat/flat_head.hpp>#include <objtools/flat/flat_items.hpp>#include <objects/general/Dbtag.hpp>#include <objects/general/Int_fuzz.hpp>#include <objects/seqfeat/Cdregion.hpp>#include <objects/seqfeat/Gb_qual.hpp>#include <objects/seqfeat/Genetic_code_table.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/util/sequence.hpp>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)CFlatGFFFormatter::CFlatGFFFormatter(IFlatTextOStream& stream, CScope& scope, EMode mode, TGFFFlags gff_flags, EStyle style, TFlags flags) : IFlatFormatter(scope, mode, style, flags), m_GFFFlags(gff_flags), m_Stream(&stream){ list<string> header; header.push_back("##gff-version 2"); header.push_back("##source-version NCBI C++ formatter 0.1"); header.push_back("##date " + CurrentTime().AsString("Y-M-D")); stream.AddParagraph(header);}void CFlatGFFFormatter::FormatHead(const CFlatHead& head){ m_Stream->NewSequence(); list<string> l; switch (m_Context->GetMol()) { case CSeq_inst::eMol_dna: m_SeqType = "DNA"; break; case CSeq_inst::eMol_rna: m_SeqType = "RNA"; break; case CSeq_inst::eMol_aa: m_SeqType = "Protein"; break; default: m_SeqType.erase(); break; } if ( !m_SeqType.empty() ) { l.push_back("##Type " + m_SeqType + ' ' + m_Context->GetAccession()); } m_Date.erase(); head.GetUpdateDate().GetDate(&m_Date, "%4Y-%{%2M%|??%}-%{%2D%|??%}"); m_Strandedness = head.GetStrandedness(); m_EndSequence.erase(); m_Stream->AddParagraph(l, &head);}void CFlatGFFFormatter::FormatFeature(const IFlattishFeature& f){ const CSeq_feat& seqfeat = f.GetFeat(); string key(f.GetKey()), oldkey; bool gtf = false; // CSeq_loc tentative_stop; if ((m_GFFFlags & fGTFCompat) && !m_Context->IsProt() && (key == "CDS" || key == "exon")) { gtf = true; } else if ((m_GFFFlags & fGTFCompat) && m_Context->GetMol() == CSeq_inst::eMol_dna && seqfeat.GetData().IsRna()) { oldkey = key; key = "exon"; gtf = true; } else if ((m_GFFFlags & fGTFOnly) == fGTFOnly) { return; } CFlatFeature& feat = *f.Format(); list<string> l; list<string> attr_list; if ( !oldkey.empty() ) { attr_list.push_back("gbkey \"" + oldkey + "\";"); } ITERATE (CFlatFeature::TQuals, it, feat.GetQuals()) { string name = (*it)->GetName(); if (name == "codon_start" || name == "translation" || name == "transcription") { continue; // suppressed to reduce verbosity } else if (name == "number" && key == "exon") { name = "exon_number"; } else if ((m_GFFFlags & fGTFCompat) && !m_Context->IsProt() && name == "gene") { string gene_id = x_GetGeneID(feat, (*it)->GetValue()); attr_list.push_front ("transcript_id \"" + gene_id + '.' + m_Date + "\";"); attr_list.push_front("gene_id \"" + gene_id + "\";"); continue; } string value; NStr::Replace((*it)->GetValue(), " \b", kEmptyStr, value); string value2(NStr::PrintableString(value)); // some parsers may be dumb, so quote further value.erase(); ITERATE (string, c, value2) { switch (*c) { case ' ': value += "\\x20"; break; case '\"': value += "x22"; break; // already backslashed case '#': value += "\\x23"; break; default: value += *c; } } attr_list.push_back(name + " \"" + value + "\";"); } string attrs(NStr::Join(attr_list, " ")); string source = x_GetSourceName(f); int frame = -1; if (seqfeat.GetData().IsCdregion() && !m_Context->IsProt() ) { const CCdregion& cds = seqfeat.GetData().GetCdregion(); frame = max(cds.GetFrame() - 1, 0); } x_AddFeature(l, f.GetLoc(), source, key, "." /*score*/, frame, attrs, gtf); if (gtf && seqfeat.GetData().IsCdregion()) { const CCdregion& cds = seqfeat.GetData().GetCdregion(); if ( !f.GetLoc().IsPartialLeft() ) { CRef<CSeq_loc> tentative_start; {{ CRef<SRelLoc::TRange> range(new SRelLoc::TRange); SRelLoc::TRanges ranges; range->SetFrom(frame); range->SetTo(frame + 2); ranges.push_back(range); tentative_start = SRelLoc(f.GetLoc(), ranges).Resolve(m_Scope); }} string s; m_Context->GetHandle().GetSequenceView (*tentative_start, CBioseq_Handle::eViewConstructed) .GetSeqData(0, 3, s); const CTrans_table* tt; if (cds.IsSetCode()) { tt = &CGen_code_table::GetTransTable(cds.GetCode()); } else { tt = &CGen_code_table::GetTransTable(1); } if (s.size() == 3 && tt->IsAnyStart(tt->SetCodonState(s[0], s[1], s[2]))) { x_AddFeature(l, *tentative_start, source, "start_codon", "." /* score */, 0, attrs, gtf); } } if ( !f.GetLoc().IsPartialRight() && seqfeat.IsSetProduct() ) { TSeqPos loc_len = sequence::GetLength(f.GetLoc(), m_Scope); TSeqPos prod_len = sequence::GetLength(seqfeat.GetProduct(), m_Scope); CRef<CSeq_loc> tentative_stop; if (loc_len >= frame + 3 * prod_len + 3) { SRelLoc::TRange range; range.SetFrom(frame + 3 * prod_len); range.SetTo (frame + 3 * prod_len + 2); // needs to be partial for TranslateCdregion to DTRT range.SetFuzz_from().SetLim(CInt_fuzz::eLim_lt); SRelLoc::TRanges ranges; ranges.push_back(CRef<SRelLoc::TRange>(&range)); tentative_stop = SRelLoc(f.GetLoc(), ranges).Resolve(m_Scope); } if (tentative_stop.NotEmpty() && !tentative_stop->IsNull()) { string s; CCdregion_translate::TranslateCdregion
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?