📄 test_helper.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: test_helper.cpp,v $ * PRODUCTION Revision 1000.4 2004/06/01 19:25:11 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.58 * PRODUCTION * =========================================================================== *//* $Id: test_helper.cpp,v 1000.4 2004/06/01 19:25:11 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Authors: Eugene Vasilchenko, Aleksey Grichenko, Denis Vakatov** File Description:* Bio sequence data generator to test Object Manager*/#include <ncbi_pch.hpp>#include "test_helper.hpp"#include <corelib/ncbithr.hpp>#include <objects/seqloc/Seq_point.hpp>#include <serial/object.hpp>#include <serial/objistr.hpp>#include <serial/objostr.hpp>#include <serial/objcopy.hpp>#include <serial/objectinfo.hpp>#include <serial/iterator.hpp>#include <serial/objectiter.hpp>#include <serial/serial.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seq_descr.hpp>#include <objects/seq/Seqdesc.hpp>#include <objects/seq/Seq_annot.hpp>#include <objects/seq/IUPACna.hpp>#include <objects/seq/NCBIeaa.hpp>#include <objects/seq/NCBI2na.hpp>#include <objects/seq/Seq_ext.hpp>#include <objects/seq/Seg_ext.hpp>#include <objects/seqset/Bioseq_set.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqfeat/SeqFeatData.hpp>#include <objects/seqfeat/Feat_id.hpp>#include <objects/seqfeat/Cdregion.hpp>#include <objects/seqfeat/Genetic_code.hpp>#include <objects/seqalign/Dense_diag.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seqalign/Seq_align.hpp>#include <objmgr/bioseq_handle.hpp>#include <objmgr/seq_vector.hpp>#include <objmgr/seq_vector_ci.hpp>#include <objmgr/seq_descr_ci.hpp>#include <objmgr/feat_ci.hpp>#include <objmgr/align_ci.hpp>#include <objects/seq/seqport_util.hpp>#include <objects/general/Date.hpp>// #include <objects/util/sequence.hpp>#include <test/test_assert.h> /* This header must go last */BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)bool CDataGenerator::sm_DumpEntries = false;/************************************************************************ 1.2.1.1. Bio sequences for testing Test entry = 1 top-level entry + 2 sub-entries (continuous sequence) TSE contains 1 description each sub-entry has two Seq_ids: local and GI, (local ids = 11+1000i, 12+1000i) one description, two annotations (Seq_feat) first - for an interval, local Seq_id: "plus" strand for one sub-entry, "minus" strand for another one second - for the whole seq, GI Seq_id one sub-entry has also an alignment annotation************************************************************************/CSeq_entry& CDataGenerator::CreateTestEntry1(int index){ // create top level seq entry CRef<CSeq_entry> entry(new CSeq_entry); CBioseq_set& set = entry->SetSet(); // class = nucleic acid and coded proteins set.SetClass(CBioseq_set::eClass_nuc_prot); // add description (name) list< CRef<CSeqdesc> >& descr = set.SetDescr().Set(); CRef<CSeqdesc> desc(new CSeqdesc); desc->SetName("D1 from TSE1-"+NStr::IntToString(index)); descr.push_back(desc); // list of sub-entries list< CRef<CSeq_entry> >& seq_set = set.SetSeq_set(); // Sub-entry Seq 11 {{ CRef<CSeq_entry> sub_entry(new CSeq_entry); CBioseq& seq = sub_entry->SetSeq(); CBioseq::TId& id_list = seq.SetId(); // list of Ids (local + gi) CRef<CSeq_id> id(new CSeq_id); id->SetLocal().SetStr("seq"+NStr::IntToString(11+index*1000)); id_list.push_back(id); id.Reset(new CSeq_id); id->SetGi(11+index*1000); id_list.push_back(id); // Description (name) list< CRef<CSeqdesc> >& descr1 = seq.SetDescr().Set(); CRef<CSeqdesc> desc1(new CSeqdesc); desc1->SetName("D1 from BS11-"+NStr::IntToString(index)); descr1.push_back(desc1); // Instance (sequence data) CSeq_inst& inst = seq.SetInst(); // representation class = continuous sequence inst.SetRepr(CSeq_inst::eRepr_raw); // molecule class in living organism = dna inst.SetMol(CSeq_inst::eMol_dna); // length of sequence in residues inst.SetLength(40); // seq data in Iupacna inst.SetSeq_data().SetIupacna().Set( "CAGCAGCGGTACAGGAGGGTGAGACATCCCAGAGCGGTGC"); // strandedness in living organism = double strand inst.SetStrand(CSeq_inst::eStrand_ds); // Annotations list< CRef<CSeq_annot> >& annot_list = seq.SetAnnot(); CRef<CSeq_annot> annot(new CSeq_annot); // list of features list< CRef<CSeq_feat> >& ftable = annot->SetData().SetFtable(); {{ CRef<CSeq_feat> feat(new CSeq_feat); // feature Id feat->SetId().SetLocal().SetStr("F1: lcl|11"); // the specific data CSeqFeatData& fdata = feat->SetData(); CCdregion& cdreg = fdata.SetCdregion(); cdreg.SetFrame(CCdregion::eFrame_one); // genetic code used list< CRef< CGenetic_code::C_E > >& gcode = cdreg.SetCode().Set(); CRef< CGenetic_code::C_E > ce(new CGenetic_code::C_E); ce->SetId(111); // TSE=1; seq=1; feat=1 gcode.push_back(ce); // feature location on the sequence: Seq_interval (local seq_Id) CSeq_interval& floc = feat->SetLocation().SetInt(); floc.SetId().SetLocal().SetStr ("seq"+NStr::IntToString(11+index*1000)); floc.SetFrom(20); floc.SetTo(30); ftable.push_back(feat); }} {{ CRef<CSeq_feat> feat(new CSeq_feat); // feature Id feat->SetId().SetLocal().SetStr("F2: gi|11"); // the specific data CSeqFeatData& fdata = feat->SetData(); CCdregion& cdreg = fdata.SetCdregion(); cdreg.SetFrame(CCdregion::eFrame_one); // genetic code used list< CRef< CGenetic_code::C_E > >& gcode = cdreg.SetCode().Set(); CRef< CGenetic_code::C_E > ce(new CGenetic_code::C_E); ce->SetId(112); // TSE=1; seq=1; feat=2 gcode.push_back(ce); // feature location on the sequence (seq_Id + "whole" sequence) feat->SetLocation().SetWhole().SetGi(11+index*1000); ftable.push_back(feat); }} annot_list.push_back(annot); // Add sub-entry seq_set.push_back(sub_entry); }} // Sub-entry Seq 12 {{ CRef<CSeq_entry> sub_entry(new CSeq_entry); CBioseq& seq = sub_entry->SetSeq(); CBioseq::TId& id_list = seq.SetId(); // list of Ids (local + gi) CRef<CSeq_id> id(new CSeq_id); id->SetLocal().SetStr("seq"+NStr::IntToString(12+index*1000)); id_list.push_back(id); id.Reset(new CSeq_id); id->SetGi(12+index*1000); id_list.push_back(id); // Instance (sequence data) CSeq_inst& inst = seq.SetInst(); // representation class = continuous sequence inst.SetRepr(CSeq_inst::eRepr_raw); // molecule class in living organism = dna inst.SetMol(CSeq_inst::eMol_dna); // length of sequence in residues inst.SetLength(40); // seq data in Iupacna inst.SetSeq_data().SetIupacna().Set( "CAATAACCTCAGCAGCAACAAGTGGCTTCCAGCGCCCTCC"); // strandedness in living organism = double strand inst.SetStrand(CSeq_inst::eStrand_ds); // Annotations list< CRef<CSeq_annot> >& annot_list = seq.SetAnnot(); {{ CRef<CSeq_annot> annot(new CSeq_annot); // list of features list< CRef<CSeq_feat> >& ftable = annot->SetData().SetFtable(); {{ CRef<CSeq_feat> feat(new CSeq_feat); // feature Id feat->SetId().SetLocal().SetStr("F3: gi|12"); // the specific data CSeqFeatData& fdata = feat->SetData(); CCdregion& cdreg = fdata.SetCdregion(); cdreg.SetFrame(CCdregion::eFrame_one); // genetic code used list< CRef< CGenetic_code::C_E > >& gcode = cdreg.SetCode().Set(); CRef< CGenetic_code::C_E > ce(new CGenetic_code::C_E); ce->SetId(123); // TSE=1; seq=2; feat=3 gcode.push_back(ce); // feature location on the sequence: Seq_interval (gi seq_Id) CSeq_interval& floc = feat->SetLocation().SetInt(); floc.SetId().SetGi(12+index*1000); floc.SetFrom(20); floc.SetTo(30); // minus strand floc.SetStrand(eNa_strand_minus); ftable.push_back(feat); }} {{ CRef<CSeq_feat> feat(new CSeq_feat); // feature Id feat->SetId().SetLocal().SetStr("F4: lcl|12"); // the specific data CSeqFeatData& fdata = feat->SetData(); CCdregion& cdreg = fdata.SetCdregion(); cdreg.SetFrame(CCdregion::eFrame_one); // genetic code used list< CRef< CGenetic_code::C_E > >& gcode = cdreg.SetCode().Set(); CRef< CGenetic_code::C_E > ce(new CGenetic_code::C_E); ce->SetId(124); // TSE=1; seq=2; feat=4 gcode.push_back(ce); // feature location on the sequence (seq_Id + "whole" sequence) feat->SetLocation().SetWhole().SetLocal().SetStr ("seq"+NStr::IntToString(12+index*1000)); ftable.push_back(feat); }} annot_list.push_back(annot); }} {{ CRef<CSeq_annot> annot(new CSeq_annot); // list of seq alignments list< CRef<CSeq_align> >& atable = annot->SetData().SetAlign(); {{ // CAGCAGC: // 11[0], 12[9] CRef<CSeq_align> align(new CSeq_align); align->SetType(CSeq_align::eType_not_set); // alignment data CSeq_align::C_Segs& segs = align->SetSegs(); // for (multiway) diagonals CSeq_align::C_Segs::TDendiag& diag_list = segs.SetDendiag(); CRef<CDense_diag> diag(new CDense_diag); // dimensionality = 2 diag->SetDim(2); // list of Seq_ids (gi) (sequences in order) CDense_diag::TIds& id_list = diag->SetIds(); CRef<CSeq_id> id(new CSeq_id); id->SetGi(11+index*1000); id_list.push_back(id); id.Reset(new CSeq_id); id->SetGi(12+index*1000); id_list.push_back(id); // start OFFSETS in ids order CDense_diag::TStarts& start_list = diag->SetStarts(); start_list.push_back(0); start_list.push_back(9); diag->SetLen(7); diag_list.push_back(diag); atable.push_back(align); }} annot_list.push_back(annot); }} // Add sub-entry seq_set.push_back(sub_entry); }} if ( sm_DumpEntries ) { NcbiCout << "-------------------- " "TestEntry1 --------------------" << NcbiEndl; auto_ptr<CObjectOStream> out(CObjectOStream::Open(eSerial_AsnText, NcbiCout)); *out << *entry; } return *entry.Release();}/************************************************************************ 1.2.1.2. Bio sequences for testing Test entry = 1 top-level entry + 2 sub-entries (continuous sequence) each sub-entry has two Seq_ids: local and GI, (local ids = 11+1000i, 12+1000i) No descriptions, No annotations************************************************************************/CSeq_entry& CDataGenerator::CreateTestEntry1a(int index){ // create top level seq entry CRef<CSeq_entry> entry(new CSeq_entry); CBioseq_set& set = entry->SetSet(); // class = nucleic acid and coded proteins set.SetClass(CBioseq_set::eClass_nuc_prot); list< CRef<CSeq_entry> >& seq_set = set.SetSeq_set(); // Sub-entry Seq 11 {{ CRef<CSeq_entry> sub_entry(new CSeq_entry); CBioseq& seq = sub_entry->SetSeq(); CBioseq::TId& id_list = seq.SetId(); // list of Ids (local + gi) CRef<CSeq_id> id(new CSeq_id); id->SetLocal().SetStr("seq"+NStr::IntToString(11+index*1000)); id_list.push_back(id); id.Reset(new CSeq_id); id->SetGi(11+index*1000); id_list.push_back(id); // Instance (sequence data) CSeq_inst& inst = seq.SetInst(); // representation class = continuous sequence inst.SetRepr(CSeq_inst::eRepr_raw); // molecule class in living organism = dna inst.SetMol(CSeq_inst::eMol_dna); // length of sequence in residues inst.SetLength(40); // seq data in Iupacna inst.SetSeq_data().SetIupacna().Set( "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); // strandedness in living organism = double strand inst.SetStrand(CSeq_inst::eStrand_ds); // Add sub-entry seq_set.push_back(sub_entry); }} // Sub-entry Seq 12 {{ CRef<CSeq_entry> sub_entry(new CSeq_entry); CBioseq& seq = sub_entry->SetSeq(); CBioseq::TId& id_list = seq.SetId(); // list of Ids (local + gi) CRef<CSeq_id> id(new CSeq_id); id->SetLocal().SetStr("seq"+NStr::IntToString(12+index*1000)); id_list.push_back(id); id.Reset(new CSeq_id); id->SetGi(12+index*1000); id_list.push_back(id); // Instance (sequence data) CSeq_inst& inst = seq.SetInst(); inst.SetRepr(CSeq_inst::eRepr_raw); inst.SetMol(CSeq_inst::eMol_dna); // length of sequence in residues inst.SetLength(40); // seq data in Iupacna inst.SetSeq_data().SetIupacna().Set( "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"); inst.SetStrand(CSeq_inst::eStrand_ds); // Add sub-entry seq_set.push_back(sub_entry); }} if ( sm_DumpEntries ) { NcbiCout << "-------------------- "
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -