gather_items.cpp
来自「ncbi源码」· C++ 代码 · 共 1,327 行 · 第 1/3 页
CPP
1,327 行
void s_SetSelection(SAnnotSelector& sel, CBioseqContext& ctx){ const CFlatFileConfig& cfg = ctx.Config(); // set feature types to be collected {{ sel.SetAnnotType(CSeq_annot::C_Data::e_Ftable); // source features are collected elsewhere sel.ExcludeFeatType(CSeqFeatData::e_Biosrc); // pub features are used in the REFERENCES section sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_pub); // some feature types are always excluded (deprecated?) sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue); sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite); sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq); // exclude other types based on user flags if ( cfg.HideImpFeats() ) { sel.ExcludeFeatType(CSeqFeatData::e_Imp); } if ( cfg.HideRemoteImpFeats() ) { sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_variation); sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon); sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron); sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature); } if ( cfg.HideSNPFeatures() ) { sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_variation); } if ( cfg.HideExonFeatures() ) { sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon); } if ( cfg.HideIntronFeatures() ) { sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron); } }} sel.SetOverlapType(SAnnotSelector::eOverlap_Intervals); if ( GetStrand(ctx.GetLocation(), &ctx.GetScope()) == eNa_strand_minus ) { sel.SetSortOrder(SAnnotSelector::eSortOrder_Reverse); // sort in reverse biological order } else { sel.SetSortOrder(SAnnotSelector::eSortOrder_Normal); } sel.SetLimitTSE(ctx.GetHandle().GetTopLevelEntry()); sel.SetResolveTSE();}static bool s_FeatEndsOnBioseq(const CSeq_feat& feat, const CBioseq_Handle& seq){ CSeq_loc_CI last; for ( CSeq_loc_CI it(feat.GetLocation()); it; ++it ) { last = it; } return (last && seq.IsSynonym(last.GetSeq_id()));}static CSeq_loc_Mapper* s_CreateMapper(CBioseqContext& ctx){ if ( ctx.GetMapper() != 0 ) { return ctx.GetMapper(); } // do not create mapper if not segmented or segmented but not doing master style. const CFlatFileConfig& cfg = ctx.Config(); if ( !ctx.IsSegmented() || !(cfg.IsStyleMaster() || cfg.IsFormatFTable()) ) { return 0; } CSeq_loc_Mapper* mapper = new CSeq_loc_Mapper(ctx.GetHandle()); if ( mapper != 0 ) { mapper->SetMergeAbutting(); mapper->PreserveDestinationLocs(); mapper->KeepNonmappingRanges(); } return mapper;}static bool s_CopyCDSFromCDNA(CBioseqContext& ctx){ return ctx.IsInGPS() && !ctx.IsInNucProt() && ctx.Config().CopyCDSFromCDNA();}static void s_FixLocation(CConstRef<CSeq_loc>& feat_loc, CBioseqContext& ctx){ if ( !feat_loc->IsMix() ) { return; } bool partial5 = feat_loc->IsPartialLeft(); bool partial3 = feat_loc->IsPartialRight(); CRef<CSeq_loc> loc(SeqLocMerge(ctx.GetHandle(), feat_loc->GetMix().Get(), fFuseAbutting | fMergeIntervals)); loc->SetPartialLeft(partial5); loc->SetPartialRight(partial3); feat_loc.Reset(loc);}void CFlatGatherer::x_GatherFeaturesOnLocation(const CSeq_loc& loc, SAnnotSelector& sel, CBioseqContext& ctx) const{ CScope& scope = ctx.GetScope(); CFlatItemOStream& out = *m_ItemOS; CRef<CSeq_loc_Mapper> mapper(s_CreateMapper(ctx)); for ( CFeat_CI it(scope, loc, sel); it; ++it ) { const CSeq_feat& feat = it->GetOriginalFeature(); // if part show only features ending on that part if ( ctx.IsPart() && !s_FeatEndsOnBioseq(feat, ctx.GetHandle()) ) { continue; } CConstRef<CSeq_loc> feat_loc(&feat.GetLocation()); if ( mapper ) { feat_loc.Reset(mapper->Map(*feat_loc)); s_FixLocation(feat_loc, ctx); } out << new CFeatureItem(feat, ctx, feat_loc); // Add more features depending on user preferences switch ( feat.GetData().GetSubtype() ) { case CSeqFeatData::eSubtype_mRNA: {{ // optionally map CDS from cDNA onto genomic if ( s_CopyCDSFromCDNA(ctx) && feat.IsSetProduct() ) { x_CopyCDSFromCDNA(feat, ctx); } break; }} case CSeqFeatData::eSubtype_cdregion: {{ if ( !ctx.Config().IsFormatFTable() ) { x_GetFeatsOnCdsProduct(it->GetOriginalFeature(), ctx, mapper); } break; }} default: break; } }}void CFlatGatherer::x_CopyCDSFromCDNA(const CSeq_feat& feat, CBioseqContext& ctx) const{ CScope& scope = ctx.GetScope(); CBioseq_Handle cdna = scope.GetBioseqHandle(feat.GetProduct()); if ( !cdna ) { return; } // NB: There is only one CDS on an mRNA CFeat_CI cds(cdna, 0, 0, CSeqFeatData::e_Cdregion); if ( cds ) { // map mRNA location to the genomic CSeq_loc_Mapper mapper(feat, CSeq_loc_Mapper::eProductToLocation, &scope); CRef<CSeq_loc> cds_loc = mapper.Map(cds->GetLocation()); *m_ItemOS << new CFeatureItem(cds->GetOriginalFeature(), ctx, cds_loc, CFeatureItem::eMapped_from_cdna); }}void CFlatGatherer::x_GatherFeatures(void) const{ CBioseqContext& ctx = *m_Current; const CFlatFileConfig& cfg = ctx.Config(); CScope& scope = ctx.GetScope(); CFlatItemOStream& out = *m_ItemOS; SAnnotSelector sel; const SAnnotSelector* selp = ctx.GetAnnotSelector(); if ( selp == 0 ) { s_SetSelection(sel, ctx); selp = &sel; } // optionally map gene from genomic onto cDNA if ( ctx.IsInGPS() && cfg.CopyGeneToCDNA() && ctx.GetBiomol() == CMolInfo::eBiomol_mRNA ) { const CSeq_feat* mrna = GetmRNAForProduct(ctx.GetHandle()); if ( mrna != 0 ) { CConstRef<CSeq_feat> gene = GetOverlappingGene(mrna->GetLocation(), scope); if ( gene != 0 ) { CRef<CSeq_loc> loc(new CSeq_loc); loc->SetWhole(*ctx.GetPrimaryId()); out << new CFeatureItem(*gene, ctx, loc, CFeatureItem::eMapped_from_genomic); } } } CSeq_loc loc; if ( ctx.GetMasterLocation() != 0 ) { loc.Assign(*ctx.GetMasterLocation()); } else { loc.SetWhole().Assign(*ctx.GetHandle().GetSeqId()); } // collect features if ( ctx.IsSegmented() && cfg.IsStyleMaster() && cfg.OldFeatsOrder() ) { if ( ctx.GetAnnotSelector() == 0 ) { sel.SetResolveNone(); } // first do the master bioeseq x_GatherFeaturesOnLocation(loc, sel, ctx); // map the location on the segments CSeq_loc_Mapper mapper(1, ctx.GetHandle()); CRef<CSeq_loc> seg_loc(mapper.Map(loc)); if ( seg_loc ) { // now go over each of the segments for ( CSeq_loc_CI it(*seg_loc); it; ++it ) { x_GatherFeaturesOnLocation(it.GetSeq_loc(), sel, ctx); } } } else { x_GatherFeaturesOnLocation(loc, sel, ctx); } if ( ctx.IsProt() ) { // Also collect features which this protein is their product. // Currently there are only two possible candidates: Coding regions // and Prot features (rare). // look for the Cdregion feature for this protein const CSeq_feat* cds = GetCDSForProduct(ctx.GetHandle()); if ( cds != 0 ) { out << new CFeatureItem(*cds, ctx, &cds->GetProduct(), CFeatureItem::eMapped_from_cdna); } // look for Prot features (only for RefSeq records or // GenBank not release_mode). if ( ctx.IsRefSeq() || !cfg.ForGBRelease() ) { SAnnotSelector sel(CSeqFeatData::e_Prot, true); sel.SetLimitTSE(ctx.GetHandle().GetTopLevelEntry()); sel.SetResolveMethod(SAnnotSelector::eResolve_TSE); sel.SetOverlapType(SAnnotSelector::eOverlap_Intervals); for ( CFeat_CI it(ctx.GetHandle(), 0, 0, sel); it; ++it ) { out << new CFeatureItem(it->GetOriginalFeature(), ctx, &it->GetProduct(), CFeatureItem::eMapped_from_prot); } } }}static bool s_IsCDD(const CSeq_feat& feat){ ITERATE(CSeq_feat::TDbxref, it, feat.GetDbxref()) { if ( (*it)->GetType() == CDbtag::eDbtagType_CDD ) { return true; } } return false;}void CFlatGatherer::x_GetFeatsOnCdsProduct(const CSeq_feat& feat, CBioseqContext& ctx, CRef<CSeq_loc_Mapper>& mapper) const{ _ASSERT(feat.GetData().IsCdregion()); const CFlatFileConfig& cfg = ctx.Config(); if ( cfg.HideCDSProdFeatures() ) { return; } if ( !feat.CanGetProduct() ) { return; } CScope& scope = ctx.GetScope(); CBioseq_Handle prot = scope.GetBioseqHandle(feat.GetProduct()); if ( !prot ) { return; } CFeat_CI prev; bool first = true; CSeq_loc_Mapper prot_to_nuc(feat, CSeq_loc_Mapper::eProductToLocation, &scope); // explore mat_peptides, sites, etc. for ( CFeat_CI it(prot, 0, 0); it; ++it ) { CSeqFeatData::ESubtype subtype = it->GetData().GetSubtype(); if ( !(subtype == CSeqFeatData::eSubtype_region) && !(subtype == CSeqFeatData::eSubtype_site) && !(subtype == CSeqFeatData::eSubtype_bond) && !(subtype == CSeqFeatData::eSubtype_mat_peptide_aa) && !(subtype == CSeqFeatData::eSubtype_sig_peptide_aa) && !(subtype == CSeqFeatData::eSubtype_transit_peptide_aa) && !(subtype == CSeqFeatData::eSubtype_preprotein) ) { continue; } if ( cfg.HideCDDFeats() && subtype == CSeqFeatData::eSubtype_region && s_IsCDD(it->GetOriginalFeature()) ) { // passing this test prevents mapping of COG CDD region features continue; } // suppress duplicate features (on protein) if ( !first ) { const CSeq_loc& loc_curr = it->GetLocation(); const CSeq_loc& loc_prev = prev->GetLocation(); const CSeq_feat& feat_curr = it->GetOriginalFeature(); const CSeq_feat& feat_prev = prev->GetOriginalFeature(); if ( feat_prev.Compare(feat_curr, loc_curr, loc_prev) == 0 ) { continue; } } // map prot location to nuc location CRef<CSeq_loc> loc(prot_to_nuc.Map(it->GetLocation())); // possibly map again (e.g. from part to master) if ( loc.NotEmpty() && mapper.NotEmpty() ) { loc.Reset(mapper->Map(*loc)); } if ( !loc || loc->IsNull() ) { continue; } // make sure feature is within sublocation if ( ctx.GetMasterLocation() != 0 ) { const CSeq_loc& mloc = *ctx.GetMasterLocation(); if ( Compare(mloc, *loc, &scope) != eContains ) { continue; } } *m_ItemOS << new CFeatureItem(it->GetOriginalFeature(), ctx, loc, CFeatureItem::eMapped_from_prot); prev = it; first = false; } }END_SCOPE(objects)END_NCBI_SCOPE/** ===========================================================================** $Log: gather_items.cpp,v $* Revision 1000.2 2004/06/01 19:44:32 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.21** Revision 1.21 2004/05/21 21:42:54 gorelenk* Added PCH ncbi_pch.hpp** Revision 1.20 2004/05/06 17:52:21 shomrat* Fixed feature location** Revision 1.19 2004/04/27 15:12:16 shomrat* Added logic for partial range formatting** Revision 1.18 2004/04/22 16:00:25 shomrat* Changes in context** Revision 1.17 2004/04/13 16:47:15 shomrat* Added GBSeq format** Revision 1.16 2004/04/07 14:51:24 shomrat* Fixed typo** Revision 1.15 2004/04/07 14:27:47 shomrat* FTable format always on master bioseq** Revision 1.14 2004/03/31 17:16:04 shomrat* Set current bioseq once in calling function** Revision 1.13 2004/03/30 20:31:09 shomrat* Bug fix** Revision 1.12 2004/03/26 17:24:55 shomrat* Changes to comment gathering** Revision 1.11 2004/03/25 20:39:47 shomrat* Use handles** Revision 1.10 2004/03/18 15:39:40 shomrat* + Filtering of displayed records** Revision 1.9 2004/03/12 16:57:54 shomrat* Filter viewable bioseqs; Use new location mapping** Revision 1.8 2004/03/10 16:22:44 shomrat* Use reference to object** Revision 1.7 2004/03/05 18:45:19 shomrat* changes to feature gathering** Revision 1.6 2004/02/19 18:11:25 shomrat* Set feature iterator selector based on user flags** Revision 1.5 2004/02/11 22:52:41 shomrat* using types in flag file** Revision 1.4 2004/02/11 16:52:12 shomrat* completed implementation of featture gathering** Revision 1.3 2004/01/14 16:15:03 shomrat* minor changes to accomodate for GFF format** Revision 1.2 2003/12/18 17:43:34 shomrat* context.hpp moved** Revision 1.1 2003/12/17 20:21:48 shomrat* Initial Revision (adapted from flat lib)*** ===========================================================================*/
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?