gather_items.cpp

来自「ncbi源码」· C++ 代码 · 共 1,327 行 · 第 1/3 页

CPP
1,327
字号
void s_SetSelection(SAnnotSelector& sel, CBioseqContext& ctx){    const CFlatFileConfig& cfg = ctx.Config();    // set feature types to be collected    {{        sel.SetAnnotType(CSeq_annot::C_Data::e_Ftable);        // source features are collected elsewhere        sel.ExcludeFeatType(CSeqFeatData::e_Biosrc);        // pub features are used in the REFERENCES section        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_pub);        // some feature types are always excluded (deprecated?)        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_non_std_residue);        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_rsite);        sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_seq);        // exclude other types based on user flags        if ( cfg.HideImpFeats() ) {            sel.ExcludeFeatType(CSeqFeatData::e_Imp);        }        if ( cfg.HideRemoteImpFeats() ) {            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_variation);            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon);            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron);            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_misc_feature);        }        if ( cfg.HideSNPFeatures() ) {            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_variation);        }        if ( cfg.HideExonFeatures() ) {            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_exon);        }        if ( cfg.HideIntronFeatures() ) {            sel.ExcludeFeatSubtype(CSeqFeatData::eSubtype_intron);        }    }}    sel.SetOverlapType(SAnnotSelector::eOverlap_Intervals);    if ( GetStrand(ctx.GetLocation(), &ctx.GetScope()) == eNa_strand_minus ) {        sel.SetSortOrder(SAnnotSelector::eSortOrder_Reverse);  // sort in reverse biological order    } else {        sel.SetSortOrder(SAnnotSelector::eSortOrder_Normal);    }    sel.SetLimitTSE(ctx.GetHandle().GetTopLevelEntry());    sel.SetResolveTSE();}static bool s_FeatEndsOnBioseq(const CSeq_feat& feat, const CBioseq_Handle& seq){    CSeq_loc_CI last;    for ( CSeq_loc_CI it(feat.GetLocation()); it; ++it ) {        last = it;    }        return (last  &&  seq.IsSynonym(last.GetSeq_id()));}static CSeq_loc_Mapper* s_CreateMapper(CBioseqContext& ctx){    if ( ctx.GetMapper() != 0 ) {        return ctx.GetMapper();    }    // do not create mapper if not segmented or segmented but not doing master style.    const CFlatFileConfig& cfg = ctx.Config();    if ( !ctx.IsSegmented()  || !(cfg.IsStyleMaster()  ||  cfg.IsFormatFTable()) ) {        return 0;    }    CSeq_loc_Mapper* mapper = new CSeq_loc_Mapper(ctx.GetHandle());    if ( mapper != 0 ) {        mapper->SetMergeAbutting();        mapper->PreserveDestinationLocs();        mapper->KeepNonmappingRanges();    }    return mapper;}static bool s_CopyCDSFromCDNA(CBioseqContext& ctx){    return ctx.IsInGPS()  &&  !ctx.IsInNucProt()  &&  ctx.Config().CopyCDSFromCDNA();}static void s_FixLocation(CConstRef<CSeq_loc>& feat_loc, CBioseqContext& ctx){    if ( !feat_loc->IsMix() ) {        return;    }    bool partial5 = feat_loc->IsPartialLeft();    bool partial3 = feat_loc->IsPartialRight();    CRef<CSeq_loc> loc(SeqLocMerge(ctx.GetHandle(), feat_loc->GetMix().Get(),        fFuseAbutting | fMergeIntervals));    loc->SetPartialLeft(partial5);    loc->SetPartialRight(partial3);    feat_loc.Reset(loc);}void CFlatGatherer::x_GatherFeaturesOnLocation(const CSeq_loc& loc, SAnnotSelector& sel, CBioseqContext& ctx) const{    CScope& scope = ctx.GetScope();    CFlatItemOStream& out = *m_ItemOS;    CRef<CSeq_loc_Mapper> mapper(s_CreateMapper(ctx));    for ( CFeat_CI it(scope, loc, sel); it; ++it ) {        const CSeq_feat& feat = it->GetOriginalFeature();                // if part show only features ending on that part        if ( ctx.IsPart()  &&               !s_FeatEndsOnBioseq(feat, ctx.GetHandle()) ) {            continue;        }                CConstRef<CSeq_loc> feat_loc(&feat.GetLocation());        if ( mapper ) {            feat_loc.Reset(mapper->Map(*feat_loc));            s_FixLocation(feat_loc, ctx);        }                        out << new CFeatureItem(feat, ctx, feat_loc);        // Add more features depending on user preferences        switch ( feat.GetData().GetSubtype() ) {        case CSeqFeatData::eSubtype_mRNA:            {{                // optionally map CDS from cDNA onto genomic                if ( s_CopyCDSFromCDNA(ctx)   &&  feat.IsSetProduct() ) {                    x_CopyCDSFromCDNA(feat, ctx);                }                break;            }}        case CSeqFeatData::eSubtype_cdregion:            {{                  if ( !ctx.Config().IsFormatFTable() ) {                    x_GetFeatsOnCdsProduct(it->GetOriginalFeature(), ctx, mapper);                }                break;            }}        default:            break;        }    }}void CFlatGatherer::x_CopyCDSFromCDNA(const CSeq_feat& feat, CBioseqContext& ctx) const{    CScope& scope = ctx.GetScope();    CBioseq_Handle cdna = scope.GetBioseqHandle(feat.GetProduct());    if ( !cdna ) {        return;    }    // NB: There is only one CDS on an mRNA    CFeat_CI cds(cdna, 0, 0, CSeqFeatData::e_Cdregion);    if ( cds ) {        // map mRNA location to the genomic        CSeq_loc_Mapper mapper(feat,                               CSeq_loc_Mapper::eProductToLocation,                               &scope);        CRef<CSeq_loc> cds_loc = mapper.Map(cds->GetLocation());        *m_ItemOS << new CFeatureItem(cds->GetOriginalFeature(), ctx, cds_loc,                                      CFeatureItem::eMapped_from_cdna);    }}void CFlatGatherer::x_GatherFeatures(void) const{    CBioseqContext& ctx = *m_Current;    const CFlatFileConfig& cfg = ctx.Config();    CScope& scope = ctx.GetScope();    CFlatItemOStream& out = *m_ItemOS;    SAnnotSelector sel;    const SAnnotSelector* selp = ctx.GetAnnotSelector();    if ( selp == 0 ) {        s_SetSelection(sel, ctx);        selp = &sel;    }    // optionally map gene from genomic onto cDNA    if ( ctx.IsInGPS()  &&  cfg.CopyGeneToCDNA()  &&         ctx.GetBiomol() == CMolInfo::eBiomol_mRNA ) {        const CSeq_feat* mrna = GetmRNAForProduct(ctx.GetHandle());        if ( mrna != 0 ) {            CConstRef<CSeq_feat> gene =                 GetOverlappingGene(mrna->GetLocation(), scope);            if ( gene != 0 ) {                CRef<CSeq_loc> loc(new CSeq_loc);                loc->SetWhole(*ctx.GetPrimaryId());                out << new CFeatureItem(*gene, ctx, loc,                                         CFeatureItem::eMapped_from_genomic);            }        }    }    CSeq_loc loc;    if ( ctx.GetMasterLocation() != 0 ) {        loc.Assign(*ctx.GetMasterLocation());    } else {        loc.SetWhole().Assign(*ctx.GetHandle().GetSeqId());    }    // collect features    if ( ctx.IsSegmented()  &&  cfg.IsStyleMaster()  &&  cfg.OldFeatsOrder() ) {        if ( ctx.GetAnnotSelector() == 0 ) {            sel.SetResolveNone();        }                // first do the master bioeseq        x_GatherFeaturesOnLocation(loc, sel, ctx);        // map the location on the segments                CSeq_loc_Mapper mapper(1, ctx.GetHandle());        CRef<CSeq_loc> seg_loc(mapper.Map(loc));        if ( seg_loc ) {            // now go over each of the segments            for ( CSeq_loc_CI it(*seg_loc); it; ++it ) {                x_GatherFeaturesOnLocation(it.GetSeq_loc(), sel, ctx);            }        }    } else {        x_GatherFeaturesOnLocation(loc, sel, ctx);    }        if ( ctx.IsProt() ) {        // Also collect features which this protein is their product.        // Currently there are only two possible candidates: Coding regions        // and Prot features (rare).                // look for the Cdregion feature for this protein        const CSeq_feat* cds = GetCDSForProduct(ctx.GetHandle());        if ( cds != 0 ) {            out << new CFeatureItem(*cds, ctx, &cds->GetProduct(),                     CFeatureItem::eMapped_from_cdna);        }        // look for Prot features (only for RefSeq records or        // GenBank not release_mode).        if ( ctx.IsRefSeq()  ||  !cfg.ForGBRelease() ) {            SAnnotSelector sel(CSeqFeatData::e_Prot, true);            sel.SetLimitTSE(ctx.GetHandle().GetTopLevelEntry());            sel.SetResolveMethod(SAnnotSelector::eResolve_TSE);            sel.SetOverlapType(SAnnotSelector::eOverlap_Intervals);            for ( CFeat_CI it(ctx.GetHandle(), 0, 0, sel); it; ++it ) {                  out << new CFeatureItem(it->GetOriginalFeature(),                                        ctx,                                        &it->GetProduct(),                                        CFeatureItem::eMapped_from_prot);            }        }    }}static bool s_IsCDD(const CSeq_feat& feat){    ITERATE(CSeq_feat::TDbxref, it, feat.GetDbxref()) {        if ( (*it)->GetType() == CDbtag::eDbtagType_CDD ) {            return true;        }    }    return false;}void CFlatGatherer::x_GetFeatsOnCdsProduct(const CSeq_feat& feat, CBioseqContext& ctx, CRef<CSeq_loc_Mapper>& mapper) const{    _ASSERT(feat.GetData().IsCdregion());    const CFlatFileConfig& cfg = ctx.Config();        if ( cfg.HideCDSProdFeatures() ) {        return;    }        if ( !feat.CanGetProduct() ) {        return;    }    CScope& scope = ctx.GetScope();    CBioseq_Handle  prot = scope.GetBioseqHandle(feat.GetProduct());    if ( !prot ) {        return;    }            CFeat_CI prev;    bool first = true;    CSeq_loc_Mapper prot_to_nuc(feat, CSeq_loc_Mapper::eProductToLocation, &scope);    // explore mat_peptides, sites, etc.    for ( CFeat_CI it(prot, 0, 0); it; ++it ) {        CSeqFeatData::ESubtype subtype = it->GetData().GetSubtype();        if ( !(subtype == CSeqFeatData::eSubtype_region)              &&             !(subtype == CSeqFeatData::eSubtype_site)                &&             !(subtype == CSeqFeatData::eSubtype_bond)                &&             !(subtype == CSeqFeatData::eSubtype_mat_peptide_aa)      &&             !(subtype == CSeqFeatData::eSubtype_sig_peptide_aa)      &&             !(subtype == CSeqFeatData::eSubtype_transit_peptide_aa)  &&             !(subtype == CSeqFeatData::eSubtype_preprotein) ) {            continue;        }        if ( cfg.HideCDDFeats()  &&             subtype == CSeqFeatData::eSubtype_region  &&             s_IsCDD(it->GetOriginalFeature()) ) {            // passing this test prevents mapping of COG CDD region features            continue;        }        // suppress duplicate features (on protein)        if ( !first ) {            const CSeq_loc& loc_curr = it->GetLocation();            const CSeq_loc& loc_prev = prev->GetLocation();            const CSeq_feat& feat_curr = it->GetOriginalFeature();            const CSeq_feat& feat_prev = prev->GetOriginalFeature();            if ( feat_prev.Compare(feat_curr, loc_curr, loc_prev) == 0 ) {                continue;            }        }        // map prot location to nuc location        CRef<CSeq_loc> loc(prot_to_nuc.Map(it->GetLocation()));        // possibly map again (e.g. from part to master)        if ( loc.NotEmpty()  &&  mapper.NotEmpty() ) {            loc.Reset(mapper->Map(*loc));        }        if ( !loc  ||  loc->IsNull() ) {            continue;        }        // make sure feature is within sublocation        if ( ctx.GetMasterLocation() != 0 ) {            const CSeq_loc& mloc = *ctx.GetMasterLocation();            if ( Compare(mloc, *loc, &scope) != eContains ) {                continue;            }        }                *m_ItemOS << new CFeatureItem(it->GetOriginalFeature(), ctx,             loc, CFeatureItem::eMapped_from_prot);        prev = it;        first = false;    }    }END_SCOPE(objects)END_NCBI_SCOPE/** ===========================================================================** $Log: gather_items.cpp,v $* Revision 1000.2  2004/06/01 19:44:32  gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.21** Revision 1.21  2004/05/21 21:42:54  gorelenk* Added PCH ncbi_pch.hpp** Revision 1.20  2004/05/06 17:52:21  shomrat* Fixed feature location** Revision 1.19  2004/04/27 15:12:16  shomrat* Added logic for partial range formatting** Revision 1.18  2004/04/22 16:00:25  shomrat* Changes in context** Revision 1.17  2004/04/13 16:47:15  shomrat* Added GBSeq format** Revision 1.16  2004/04/07 14:51:24  shomrat* Fixed typo** Revision 1.15  2004/04/07 14:27:47  shomrat* FTable format always on master bioseq** Revision 1.14  2004/03/31 17:16:04  shomrat* Set current bioseq once in calling function** Revision 1.13  2004/03/30 20:31:09  shomrat* Bug fix** Revision 1.12  2004/03/26 17:24:55  shomrat* Changes to comment gathering** Revision 1.11  2004/03/25 20:39:47  shomrat* Use handles** Revision 1.10  2004/03/18 15:39:40  shomrat* + Filtering of displayed records** Revision 1.9  2004/03/12 16:57:54  shomrat* Filter viewable bioseqs; Use new location mapping** Revision 1.8  2004/03/10 16:22:44  shomrat* Use reference to object** Revision 1.7  2004/03/05 18:45:19  shomrat* changes to feature gathering** Revision 1.6  2004/02/19 18:11:25  shomrat* Set feature iterator selector based on user flags** Revision 1.5  2004/02/11 22:52:41  shomrat* using types in flag file** Revision 1.4  2004/02/11 16:52:12  shomrat* completed implementation of featture gathering** Revision 1.3  2004/01/14 16:15:03  shomrat* minor changes to accomodate for GFF format** Revision 1.2  2003/12/18 17:43:34  shomrat* context.hpp moved** Revision 1.1  2003/12/17 20:21:48  shomrat* Initial Revision (adapted from flat lib)*** ===========================================================================*/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?