gff_formatter.cpp
来自「ncbi源码」· C++ 代码 · 共 628 行 · 第 1/2 页
CPP
628 行
return; CBioseqContext& ctx = *seq.GetContext(); list<string> l; CSeqVector v = seq.GetSequence(); v.SetCoding(CBioseq_Handle::eCoding_Iupac); CSeqVector_CI vi(v); string s; while (vi) { s.erase(); vi.GetSeqData(s, 70); l.push_back("##" + s); } text_os.AddParagraph(l, ctx.GetHandle().GetCompleteBioseq());}// Privatestring CGFFFormatter::x_GetGeneID(const CFlatFeature& feat, const string& gene, CBioseqContext& ctx) const{ const CSeq_feat& seqfeat = feat.GetFeat(); string main_acc; if (ctx.IsPart()) { const CSeq_id& id = *(ctx.GetMaster().GetHandle().GetSeqId()); main_acc = ctx.GetPreferredSynonym(id).GetSeqIdString(true); } else { main_acc = ctx.GetAccession(); } string gene_id = main_acc + ':' + gene; CConstRef<CSeq_feat> gene_feat = sequence::GetBestOverlappingFeat (seqfeat.GetLocation(), CSeqFeatData::e_Gene, sequence::eOverlap_Interval, ctx.GetScope()); return gene_id;}string CGFFFormatter::x_GetTranscriptID(const CFlatFeature& feat, const string& gene_id, CBioseqContext& ctx) const{ const CSeq_feat& seqfeat = feat.GetFeat(); // if our feature already is an mRNA, we need look no further CConstRef<CSeq_feat> mrna_feat; if (seqfeat.GetData().GetSubtype() == CSeqFeatData::eSubtype_mRNA) { mrna_feat.Reset(&seqfeat); } else { // search for a best overlapping mRNA // we start with a scan through the product accessions because we need // to insure that the chosen transcript does indeed match what we want if (seqfeat.IsSetProduct()) { try { // this may throw, if the product spans multiple sequences // this would be extremely unlikely, but we catch anyway const CSeq_id& product_id = sequence::GetId(seqfeat.GetProduct()); SAnnotSelector sel; sel.SetOverlapIntervals() .ExcludeNamedAnnots("SNP") .SetResolveAll() .SetFeatSubtype(CSeqFeatData::eSubtype_mRNA); CFeat_CI feat_iter(ctx.GetScope(), seqfeat.GetLocation(), sel); for ( ; feat_iter && !mrna_feat; ++feat_iter) { // we grab the mRNA product, if available, and scan it for // a CDS feature. the CDS feature should point to the same // product as our current feature. const CSeq_feat& mrna = feat_iter->GetOriginalFeature(); if ( !mrna.IsSetProduct() ) { continue; } // make sure the feature contains our feature of interest sequence::ECompare comp = sequence::Compare(mrna.GetLocation(), seqfeat.GetLocation()); if (comp != sequence::eContains && comp != sequence::eSame) { continue; } CBioseq_Handle handle = ctx.GetScope().GetBioseqHandle(mrna.GetProduct()); if ( !handle ) { continue; } SAnnotSelector cds_sel(sel); cds_sel.SetFeatSubtype(CSeqFeatData::eSubtype_cdregion); CFeat_CI other_iter(ctx.GetScope(), mrna.GetProduct(), cds_sel); for ( ; other_iter && !mrna_feat; ++other_iter) { const CSeq_feat& cds = other_iter->GetOriginalFeature(); if ( !cds.IsSetProduct() ) { continue; } CBioseq_Handle prot_handle = ctx.GetScope().GetBioseqHandle(cds.GetProduct()); if ( !prot_handle ) { continue; } if (prot_handle.IsSynonym(product_id)) { // got it! mrna_feat.Reset(&mrna); break; } } } } catch (...) { } } // // try to find the best by overlaps alone // if ( !mrna_feat ) { mrna_feat = sequence::GetBestOverlappingFeat (seqfeat.GetLocation(), CSeqFeatData::eSubtype_mRNA, sequence::eOverlap_CheckIntervals, ctx.GetScope()); } } // // check if the mRNA feature we found has a product // if (mrna_feat.GetPointer() && mrna_feat->IsSetProduct()) { try { const CSeq_id& id = sequence::GetId(mrna_feat->GetProduct()); string transcript_id = ctx.GetPreferredSynonym(id).GetSeqIdString(true); return transcript_id; } catch (...) { } } // // nothing found, so fake it // // failed to get transcript id, so we fake a globally unique one based // on the gene id m_Transcripts[gene_id].push_back(CConstRef<CSeq_feat>(&seqfeat)); string transcript_id = gene_id; transcript_id += ":unknown_transcript_"; transcript_id += NStr::IntToString(m_Transcripts[gene_id].size()); return transcript_id;}string CGFFFormatter::x_GetSourceName(CBioseqContext& ctx) const{ // XXX - get from annot name (not presently available from IFF)? switch ( ctx.GetPrimaryId()->Which() ) { case CSeq_id::e_Local: return "Local"; case CSeq_id::e_Gibbsq: case CSeq_id::e_Gibbmt: case CSeq_id::e_Giim: case CSeq_id::e_Gi: return "GenInfo"; case CSeq_id::e_Genbank: return "Genbank"; case CSeq_id::e_Swissprot: return "SwissProt"; case CSeq_id::e_Patent: return "Patent"; case CSeq_id::e_Other: return "RefSeq"; case CSeq_id::e_General: return ctx.GetPrimaryId()->GetGeneral().GetDb(); default: { string source (CSeq_id::SelectionName(ctx.GetPrimaryId()->Which())); return NStr::ToUpper(source); } }}void CGFFFormatter::x_AddFeature(list<string>& l, const CSeq_loc& loc, const string& source, const string& key, const string& score, int frame, const string& attrs, bool gtf, CBioseqContext& ctx, bool tentative_stop) const{ int num_exons = 0; for (CSeq_loc_CI it(loc); it; ++it) { ++num_exons; } int exon_number = 1; for (CSeq_loc_CI it(loc); it; ++it) { TSeqPos from = it.GetRange().GetFrom(), to = it.GetRange().GetTo(); char strand = '+'; if (IsReverse(it.GetStrand())) { strand = '-'; } else if (it.GetRange().IsWhole() || (m_Strandedness <= CSeq_inst::eStrand_ss && ctx.GetMol() != CSeq_inst::eMol_dna)) { strand = '.'; // N/A } if (it.GetRange().IsWhole()) { to = sequence::GetLength(it.GetSeq_id(), &ctx.GetScope()) - 1; } if ( tentative_stop && (exon_number == num_exons) ) { to -= 3; } string extra_attrs; if (gtf && attrs.find("exon_number ") == NPOS) { CSeq_loc loc2; CSeq_interval& si = loc2.SetInt(); si.SetFrom(from); si.SetTo(to); si.SetStrand(it.GetStrand()); si.SetId(const_cast<CSeq_id&>(it.GetSeq_id())); CConstRef<CSeq_feat> exon = sequence::GetBestOverlappingFeat (loc2, CSeqFeatData::eSubtype_exon, sequence::eOverlap_Contains, ctx.GetScope()); if (exon.NotEmpty() && exon->IsSetQual()) { ITERATE (CSeq_feat::TQual, q, exon->GetQual()) { if ( !NStr::CompareNocase((*q)->GetQual(), "number") ) { int n = NStr::StringToNumeric((*q)->GetVal()); if (n >= exon_number) { exon_number = n; break; } } } } extra_attrs = " exon_number \"" + NStr::IntToString(exon_number) + "\";"; ++exon_number; } if ( sequence::IsSameBioseq(it.GetSeq_id(), *ctx.GetPrimaryId(), &ctx.GetScope()) ) { // conditionalize printing, but update state regardless l.push_back(ctx.GetAccession() + '\t' + source + '\t' + key + '\t' + NStr::UIntToString(from + 1) + '\t' + NStr::UIntToString(to + 1) + '\t' + score + '\t' + strand + '\t' + (frame >= 0 ? char(frame + '0') : '.') + "\t" + attrs + extra_attrs); } if (frame >= 0) { frame = (frame + to - from + 1) % 3; } }}END_SCOPE(objects)END_NCBI_SCOPE/** ===========================================================================** $Log: gff_formatter.cpp,v $* Revision 1000.1 2004/06/01 19:44:46 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7** Revision 1.7 2004/05/21 21:42:54 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.6 2004/05/08 12:12:33 dicuccio* Altered handling of transcript_id - make transcript_id globally unique and* trackable across CDS features as well as mRNA features** Revision 1.5 2004/05/06 17:54:29 shomrat* Use CConstRef instead of reference** Revision 1.4 2004/04/22 16:02:12 shomrat* fixed stop_codon** Revision 1.3 2004/03/25 20:42:41 shomrat* Master returns a CBioseq_Handle instead of a CBioseq** Revision 1.2 2004/02/11 16:59:11 shomrat* removed unused variable** Revision 1.1 2004/01/14 16:07:29 shomrat* Initial Revision*** ===========================================================================*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?