blob_splitter_maker.cpp

来自「ncbi源码」· C++ 代码 · 共 599 行 · 第 1/2 页

CPP
599
字号
{    if ( gi_count < 4 ) {        for ( int i = 0; i < gi_count; ++i ) {            CRef<CID2_Seq_loc> add(new CID2_Seq_loc);            add->SetGi_whole(gi_start + i);            AddLoc(loc, add);        }    }    else {        CRef<CID2_Seq_loc> add(new CID2_Seq_loc);        add->SetGi_whole_range().SetStart(gi_start);        add->SetGi_whole_range().SetCount(gi_count);        AddLoc(loc, add);    }}void AddLoc(CRef<CID2_Seq_loc>& loc, const TWhole_set& whole_set){    int gi_start = 0, gi_count = 0;    ITERATE ( TWhole_set, it, whole_set ) {        if ( gi_count == 0 || *it != gi_start + gi_count ) {            AddLoc(loc, gi_start, gi_count);            gi_start = *it;            gi_count = 0;        }        ++gi_count;    }    AddLoc(loc, gi_start, gi_count);}CRef<CID2_Seq_loc> MakeLoc(const CSeqsRange& range){    TWhole_set whole_set;    TInt_set int_set;    ITERATE ( CSeqsRange, it, range ) {        int gi = it->first.GetGi();        CSeqsRange::TRange range = it->second.GetTotalRange();        if ( range == range.GetWhole() ) {            whole_set.insert(gi);            _ASSERT(int_set.count(gi) == 0);        }        else {            int_set[gi].insert(range);            _ASSERT(whole_set.count(gi) == 0);        }    }    CRef<CID2_Seq_loc> loc;    AddLoc(loc, int_set);    AddLoc(loc, whole_set);    _ASSERT(loc);    return loc;}void CBlobSplitterImpl::MakeID2Chunk(int id, const SChunkInfo& info){    CRef<CID2S_Chunk> chunk(new CID2S_Chunk);    CRef<CID2S_Chunk_Info> chunk_info(new CID2S_Chunk_Info);    chunk_info->SetId(CID2S_Chunk_Id(id));    typedef map<CAnnotName, SAllAnnots> TAllAnnots;    TAllAnnots all_annots;    ITERATE ( SChunkInfo::TChunkAnnots, it, info.m_Annots ) {        CRef<CID2S_Chunk_Data> data(new CID2S_Chunk_Data);        chunk->SetData().push_back(data);        CID2S_Chunk_Data::TId& id = data->SetId();        if ( it->first > 0 ) {            id.SetGi(it->first);        }        else {            id.SetBioseq_set(-it->first);        }        ITERATE ( SChunkInfo::TIdAnnots, annot_it, it->second ) {            CRef<CSeq_annot> annot = MakeSeq_annot(*annot_it->first,                                                   annot_it->second);            data->SetAnnots().push_back(annot);            // collect locations            CAnnotName name = CSeq_annot_SplitInfo::GetName(*annot_it->first);            all_annots[name].Add(*annot);        }    }    NON_CONST_ITERATE ( TAllAnnots, nit, all_annots ) {        nit->second.SplitInfo();        const CAnnotName& annot_name = nit->first;        ITERATE ( SAllAnnots::TSplitAnnots, it, nit->second.m_SplitAnnots ) {            const SAllAnnots::TTypeSet& type_set = it->first;            const CSeqsRange& location = it->second;            CRef<CID2S_Chunk_Content> content(new CID2S_Chunk_Content);            CID2S_Seq_annot_Info& annot_info = content->SetSeq_annot();            if ( annot_name.IsNamed() ) {                annot_info.SetName(annot_name.GetName());            }            typedef CSeqFeatData::ESubtype TSubtype;            typedef CSeqFeatData::E_Choice TFeatType;            typedef set<TSubtype> TSubtypes;            typedef map<TFeatType, TSubtypes> TFeatTypes;            TFeatTypes feat_types;            ITERATE ( SAllAnnots::TTypeSet, tit, type_set ) {                const SAnnotTypeSelector& t = *tit;                switch ( t.GetAnnotType() ) {                case CSeq_annot::C_Data::e_Align:                    annot_info.SetAlign();                    break;                case CSeq_annot::C_Data::e_Graph:                    annot_info.SetGraph();                    break;                case CSeq_annot::C_Data::e_Ftable:                    feat_types[t.GetFeatType()].insert(t.GetFeatSubtype());                    break;                }            }            ITERATE ( TFeatTypes, tit, feat_types ) {                TFeatType t = tit->first;                const TSubtypes& subtypes = tit->second;                bool all_subtypes =                    subtypes.find(CSeqFeatData::eSubtype_any) !=                    subtypes.end();                if ( !all_subtypes ) {                    all_subtypes = true;                    for ( TSubtype st = CSeqFeatData::eSubtype_bad;                          st <= CSeqFeatData::eSubtype_max;                          st = TSubtype(st+1) ) {                        if ( CSeqFeatData::GetTypeFromSubtype(st) == t &&                             subtypes.find(st) == subtypes.end() ) {                            all_subtypes = false;                            break;                        }                    }                }                CRef<CID2S_Feat_type_Info> type_info(new CID2S_Feat_type_Info);                type_info->SetType(t);                if ( !all_subtypes ) {                    ITERATE ( TSubtypes, stit, subtypes ) {                        type_info->SetSubtypes().push_back(*stit);                    }                }                annot_info.SetFeat().push_back(type_info);            }            annot_info.SetSeq_loc(*MakeLoc(location));            chunk_info->SetContent().push_back(content);        }    }#if 0    NcbiCout << "Objects: in SChunkInfo: " << info.CountAnnotObjects() <<        " in CID2S_Chunk: " << CountAnnotObjects(*chunk) << '\n';#endif    m_ID2_Chunks[CID2S_Chunk_Id(id)] = chunk;    m_Split_Info->SetChunks().push_back(chunk_info);}void CBlobSplitterImpl::AttachToSkeleton(const SChunkInfo& info){    ITERATE ( SChunkInfo::TChunkAnnots, it, info.m_Annots ) {        TBioseqs::iterator seq_it = m_Bioseqs.find(it->first);        _ASSERT(seq_it != m_Bioseqs.end());        _ASSERT(bool(seq_it->second.m_Bioseq) || bool(seq_it->second.m_Bioseq_set));        ITERATE ( SChunkInfo::TIdAnnots, annot_it, it->second ) {            CRef<CSeq_annot> annot = MakeSeq_annot(*annot_it->first,                                                   annot_it->second);            if ( seq_it->second.m_Bioseq ) {                seq_it->second.m_Bioseq->SetAnnot().push_back(annot);            }            else {                seq_it->second.m_Bioseq_set->SetAnnot().push_back(annot);            }        }    }}CRef<CSeq_annot>CBlobSplitterImpl::MakeSeq_annot(const CSeq_annot& src,                                 const TAnnotObjects& objs){    CRef<CSeq_annot> annot(new CSeq_annot);    if ( src.IsSetId() ) {        CSeq_annot::TId& id = annot->SetId();        ITERATE ( CSeq_annot::TId, it, src.GetId() ) {            id.push_back(Ref(&NonConst(**it)));        }    }    if ( src.IsSetDb() ) {        annot->SetDb(src.GetDb());    }    if ( src.IsSetName() ) {        annot->SetName(src.GetName());    }    if ( src.IsSetDesc() ) {        annot->SetDesc(NonConst(src.GetDesc()));    }    switch ( src.GetData().Which() ) {    case CSeq_annot::C_Data::e_Ftable:        ITERATE ( CLocObjects_SplitInfo, it, objs ) {            CObject& obj = NonConst(*it->m_Object);            annot->SetData().SetFtable()                .push_back(Ref(&dynamic_cast<CSeq_feat&>(obj)));        }        break;    case CSeq_annot::C_Data::e_Align:        ITERATE ( CLocObjects_SplitInfo, it, objs ) {            CObject& obj = NonConst(*it->m_Object);            annot->SetData().SetAlign()                .push_back(Ref(&dynamic_cast<CSeq_align&>(obj)));        }        break;    case CSeq_annot::C_Data::e_Graph:        ITERATE ( CLocObjects_SplitInfo, it, objs ) {            CObject& obj = NonConst(*it->m_Object);            annot->SetData().SetGraph()                .push_back(Ref(&dynamic_cast<CSeq_graph&>(obj)));        }        break;    }    return annot;}size_t CBlobSplitterImpl::CountAnnotObjects(const TID2Chunks& chunks){    size_t count = 0;    ITERATE ( TID2Chunks, it, chunks ) {        count += CountAnnotObjects(*it->second);    }    return count;}size_t CBlobSplitterImpl::CountAnnotObjects(const CID2S_Chunk& chunk){    size_t count = 0;    for ( CTypeConstIterator<CSeq_annot> it(ConstBegin(chunk)); it; ++it ) {        count += CSeq_annot_SplitInfo::CountAnnotObjects(*it);    }    return count;}END_SCOPE(objects)END_NCBI_SCOPE/** ---------------------------------------------------------------------------* $Log: blob_splitter_maker.cpp,v $* Revision 1000.2  2004/06/01 19:24:50  gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10** Revision 1.10  2004/05/21 21:42:13  gorelenk* Added PCH ncbi_pch.hpp** Revision 1.9  2004/02/04 18:05:40  grichenk* Added annotation filtering by set of types/subtypes.* Renamed *Choice to *Type in SAnnotSelector.** Revision 1.8  2004/01/22 20:10:42  vasilche* 1. Splitted ID2 specs to two parts.* ID2 now specifies only protocol.* Specification of ID2 split data is moved to seqsplit ASN module.* For now they are still reside in one resulting library as before - libid2.* As the result split specific headers are now in objects/seqsplit.* 2. Moved ID2 and ID1 specific code out of object manager.* Protocol is processed by corresponding readers.* ID2 split parsing is processed by ncbi_xreader library - used by all readers.* 3. Updated OBJMGR_LIBS correspondingly.** Revision 1.7  2004/01/07 17:36:24  vasilche* Moved id2_split headers to include/objmgr/split.* Fixed include path to genbank.** Revision 1.6  2003/12/03 19:30:45  kuznets* Misprint fixed** Revision 1.5  2003/12/02 19:12:24  vasilche* Fixed compilation on MSVC.** Revision 1.4  2003/12/01 18:37:10  vasilche* Separate different annotation types in split info to reduce memory usage.** Revision 1.3  2003/11/26 23:04:58  vasilche* Removed extra semicolons after BEGIN_SCOPE and END_SCOPE.** Revision 1.2  2003/11/26 17:56:02  vasilche* Implemented ID2 split in ID1 cache.* Fixed loading of splitted annotations.** Revision 1.1  2003/11/12 16:18:28  vasilche* First implementation of ID2 blob splitter withing cache.** ===========================================================================*/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?