flat_gff_formatter.cpp
来自「ncbi源码」· C++ 代码 · 共 436 行 · 第 1/2 页
CPP
436 行
(s, m_Context->GetHandle(), *tentative_stop, cds); if (s == "*") { x_AddFeature(l, *tentative_stop, source, "stop_codon", "." /* score */, 0, attrs, gtf); } } } } m_Stream->AddParagraph(l, &f, &seqfeat);}void CFlatGFFFormatter::FormatDataHeader(const CFlatDataHeader& dh){ if ( !(m_GFFFlags & fShowSeq) ) return; list<string> l; l.push_back("##" + m_SeqType + ' ' + m_Context->GetAccession()); m_Stream->AddParagraph(l, &dh); m_EndSequence = "##end-" + m_SeqType;}void CFlatGFFFormatter::FormatData(const CFlatData& data){ if ( !(m_GFFFlags & fShowSeq) ) return; list<string> l; CSeqVector v = m_Context->GetHandle().GetSequenceView (data.GetLoc(), CBioseq_Handle::eViewConstructed, CBioseq_Handle::eCoding_Iupac); CSeqVector_CI vi(v); while (vi) { string s; vi.GetSeqData(s, 70); l.push_back("##" + s); } m_Stream->AddParagraph(l, &data, &data.GetLoc());}void CFlatGFFFormatter::EndSequence(void){ if ( !m_EndSequence.empty() ) { list<string> l; l.push_back(m_EndSequence); m_Stream->AddParagraph(l); }}string CFlatGFFFormatter::x_GetGeneID(const CFlatFeature& feat, const string& gene){ const CSeq_feat& seqfeat = feat.GetFeat(); string main_acc; if (m_Context->InSegSet()) { const CSeq_id& id = *m_Context->GetSegMaster()->GetId().front(); main_acc = m_Context->GetPreferredSynonym(id).GetSeqIdString(true); } else { main_acc = m_Context->GetAccession(); } string gene_id = main_acc + ':' + gene; CConstRef<CSeq_feat> gene_feat = sequence::GetBestOverlappingFeat (seqfeat.GetLocation(), CSeqFeatData::e_Gene, sequence::eOverlap_Interval, *m_Scope); TFeatVec& v = m_Genes[gene_id]; TFeatVec::const_iterator it = find(v.begin(), v.end(), gene_feat); int n; if (it == v.end()) { n = v.size(); v.push_back(gene_feat); } else { n = it - v.begin(); } if (n > 0) { gene_id += '.' + NStr::IntToString(n + 1); } return gene_id;}string CFlatGFFFormatter::x_GetSourceName(const IFlattishFeature&){ // XXX - get from annot name (not presently available from IFF)? switch (m_Context->GetPrimaryID().Which()) { case CSeq_id::e_Local: return "Local"; case CSeq_id::e_Gibbsq: case CSeq_id::e_Gibbmt: case CSeq_id::e_Giim: case CSeq_id::e_Gi: return "GenInfo"; case CSeq_id::e_Genbank: return "Genbank"; case CSeq_id::e_Swissprot: return "SwissProt"; case CSeq_id::e_Patent: return "Patent"; case CSeq_id::e_Other: return "RefSeq"; case CSeq_id::e_General: return m_Context->GetPrimaryID().GetGeneral().GetDb(); default: { string source (CSeq_id::SelectionName(m_Context->GetPrimaryID().Which())); return NStr::ToUpper(source); } }}void CFlatGFFFormatter::x_AddFeature(list<string>& l, const CSeq_loc& loc, const string& source, const string& key, const string& score, int frame, const string& attrs, bool gtf){ int exon_number = 1; for (CSeq_loc_CI it(loc); it; ++it) { TSeqPos from = it.GetRange().GetFrom(), to = it.GetRange().GetTo(); char strand = '+'; if (IsReverse(it.GetStrand())) { strand = '-'; } else if (it.GetRange().IsWhole() || (m_Strandedness <= CSeq_inst::eStrand_ss && m_Context->GetMol() != CSeq_inst::eMol_dna)) { strand = '.'; // N/A } if (it.GetRange().IsWhole()) { to = sequence::GetLength(it.GetSeq_id(), m_Scope) - 1; } string extra_attrs; if (gtf && attrs.find("exon_number ") == NPOS) { CSeq_loc loc2; CSeq_interval& si = loc2.SetInt(); si.SetFrom(from); si.SetTo(to); si.SetStrand(it.GetStrand()); si.SetId(const_cast<CSeq_id&>(it.GetSeq_id())); CConstRef<CSeq_feat> exon = sequence::GetBestOverlappingFeat (loc2, CSeqFeatData::eSubtype_exon, sequence::eOverlap_Contains, *m_Scope); if (exon.NotEmpty() && exon->IsSetQual()) { ITERATE (CSeq_feat::TQual, q, exon->GetQual()) { if ( !NStr::CompareNocase((*q)->GetQual(), "number") ) { int n = NStr::StringToNumeric((*q)->GetVal()); if (n >= exon_number) { exon_number = n; break; } } } } extra_attrs = " exon_number \"" + NStr::IntToString(exon_number) + "\";"; ++exon_number; } if ( sequence::IsSameBioseq(it.GetSeq_id(), m_Context->GetPrimaryID(), m_Scope) ) { // conditionalize printing, but update state regardless l.push_back(m_Context->GetAccession() + '\t' + source + '\t' + key + '\t' + NStr::UIntToString(from + 1) + '\t' + NStr::UIntToString(to + 1) + '\t' + score + '\t' + strand + '\t' + (frame >= 0 ? char(frame + '0') : '.') + "\t" + attrs + extra_attrs); } if (frame >= 0) { frame = (frame + to - from + 1) % 3; } }}END_SCOPE(objects)END_NCBI_SCOPE/** ===========================================================================** $Log: flat_gff_formatter.cpp,v $* Revision 1000.3 2004/06/01 19:43:14 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6** Revision 1.6 2004/05/21 21:42:53 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.5 2003/12/03 20:53:53 ucko* Also quote #s in values to avoid trouble with naive comment recognizers.** Revision 1.4 2003/11/04 20:00:28 ucko* Edit " \b" sequences (used as hints for wrapping) out from qualifier values** Revision 1.3 2003/10/18 01:36:34 ucko* Tweak to work around MSVC stupidity.** Revision 1.2 2003/10/17 21:06:35 ucko* Reworked GTF mode per Wratko's critique:* - Now handles multi-exonic(!) start and stop codons.* - Treats all RNA features on DNA as exons.* - Sets exon_number attribute for GTF 1 compatibility.* - Quotes other attributes better.** Revision 1.1 2003/10/08 21:11:45 ucko* New GFF/GTF formatter*** ===========================================================================*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?