agp_read.cpp
来自「ncbi源码」· C++ 代码 · 共 253 行
CPP
253 行
/* * =========================================================================== * PRODUCTION $Log: agp_read.cpp,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:46:12 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== *//* $Id: agp_read.cpp,v 1000.1 2004/06/01 19:46:12 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Josh Cherry * * File Description: Read agp file */#include <ncbi_pch.hpp>#include <objtools/readers/agp_read.hpp>#include <objtools/readers/reader_exception.hpp>#include <objects/seqloc/Seq_interval.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <objects/seqset/Seq_entry.hpp>#include <objects/seq/Delta_seq.hpp>#include <objects/seq/Seq_inst.hpp>#include <objects/seq/Seq_literal.hpp>#include <objects/seq/Seq_ext.hpp>#include <objects/seq/Delta_ext.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CRef<CBioseq_set> AgpRead(CNcbiIstream& is){ vector<CRef<CSeq_entry> > entries; AgpRead(is, entries); CRef<CBioseq_set> bioseq_set(new CBioseq_set); ITERATE (vector<CRef<CSeq_entry> >, iter, entries) { bioseq_set->SetSeq_set().push_back(*iter); } return bioseq_set;}void AgpRead(CNcbiIstream& is, vector<CRef<CSeq_entry> >& entries){ vector<CRef<CBioseq> > bioseqs; AgpRead(is, bioseqs); NON_CONST_ITERATE (vector<CRef<CBioseq> >, bioseq, bioseqs) { CRef<CSeq_entry> entry(new CSeq_entry); entry->SetSeq(**bioseq); entries.push_back(entry); }}void AgpRead(CNcbiIstream& is, vector<CRef<CBioseq> >& bioseqs){ string line; vector<string> fields; string current_object; CRef<CBioseq> bioseq; CRef<CSeq_inst> seq_inst; int last_to; int part_num, last_part_num; TSeqPos length; int line_num = 0; while (NcbiGetlineEOL(is, line)) { line_num++; if (line[0] == '#') { // comment line; skip continue; } if (line.find_first_not_of(" \t\n\r") == string::npos) { // skip lines containing only white space continue; } fields.clear(); NStr::Tokenize(line, "\t", fields); // Number of fields can be 9 or 8, but 8 is valid // only if field[4] == "N". if (fields.size() != 9) { if (fields.size() >= 5 && fields[4] != "N") { NCBI_THROW2(CObjReaderParseException, eFormat, string("error at line ") + NStr::IntToString(line_num) + ": found " + NStr::IntToString(fields.size()) + " columns; there should be 9", is.tellg() - CT_POS_TYPE(0)); } else if (fields.size() != 8) { NCBI_THROW2(CObjReaderParseException, eFormat, string("error at line ") + NStr::IntToString(line_num) + ": found " + NStr::IntToString(fields.size()) + " columns; there should be 8 or 9", is.tellg() - CT_POS_TYPE(0)); } } if (fields[0] != current_object || !bioseq) { // close out old one, start a new one if (bioseq) { seq_inst->SetLength(length); bioseq->SetInst(*seq_inst); bioseqs.push_back(bioseq); } current_object = fields[0]; seq_inst.Reset(new CSeq_inst); seq_inst->SetRepr(CSeq_inst::eRepr_delta); seq_inst->SetMol(CSeq_inst::eMol_dna); bioseq.Reset(new CBioseq); CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Local, current_object, current_object)); bioseq->SetId().push_back(id); last_to = 0; last_part_num = 0; length = 0; } // validity checks part_num = NStr::StringToInt(fields[3]); if (part_num != last_part_num + 1) { NCBI_THROW2(CObjReaderParseException, eFormat, string("error at line ") + NStr::IntToString(line_num) + ": part number out of order", is.tellg() - CT_POS_TYPE(0)); } last_part_num = part_num; if (NStr::StringToInt(fields[1]) != last_to + 1) { NCBI_THROW2(CObjReaderParseException, eFormat, string("error at line ") + NStr::IntToString(line_num) + ": begining not equal to previous end + 1", is.tellg() - CT_POS_TYPE(0)); } last_to = NStr::StringToInt(fields[2]); // build a Delta-seq, either a Seq-literal (for a gap) or a Seq-loc CRef<CDelta_seq> delta_seq(new CDelta_seq); if (fields[4] == "N") { // a gap TSeqPos gap_len = NStr::StringToInt(fields[5]); delta_seq->SetLiteral().SetLength(gap_len); length += gap_len; } else if (fields[4].size() == 1 && fields[4].find_first_of("ADFGPOW") == 0) { CSeq_loc& loc = delta_seq->SetLoc(); CRef<CSeq_id> comp_id(new CSeq_id(fields[5])); loc.SetInt().SetId(*comp_id); loc.SetInt().SetFrom(NStr::StringToInt(fields[6]) - 1); loc.SetInt().SetTo (NStr::StringToInt(fields[7]) - 1); length += loc.GetInt().GetTo() - loc.GetInt().GetFrom() + 1; if (fields[8] == "+") { loc.SetInt().SetStrand(eNa_strand_plus); } else if (fields[8] == "-") { loc.SetInt().SetStrand(eNa_strand_minus); } else if (fields[8] == "0") { loc.SetInt().SetStrand(eNa_strand_unknown); } else if (fields[8] == "na") { loc.SetInt().SetStrand(eNa_strand_other); } else { NCBI_THROW2(CObjReaderParseException, eFormat, string("error at line ") + NStr::IntToString(line_num) + ": invalid " "orientation " + fields[8], is.tellg() - CT_POS_TYPE(0)); } } else { NCBI_THROW2(CObjReaderParseException, eFormat, string("error at line ") + NStr::IntToString(line_num) + ": invalid " "component type " + fields[4], is.tellg() - CT_POS_TYPE(0)); } seq_inst->SetExt().SetDelta().Set().push_back(delta_seq); } // deal with the last one if (bioseq) { seq_inst->SetLength(length); bioseq->SetInst(*seq_inst); bioseqs.push_back(bioseq); }}END_NCBI_SCOPE/* * ===================================================================== * $Log: agp_read.cpp,v $ * Revision 1000.1 2004/06/01 19:46:12 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * * Revision 1.6 2004/05/25 20:49:58 jcherry * Let the last column of an "N" line be missing, not just empty * * Revision 1.5 2004/05/21 21:42:55 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.4 2004/02/19 22:57:52 ucko * Accommodate stricter implementations of CT_POS_TYPE. * * Revision 1.3 2004/01/05 23:01:37 jcherry * Support unknown ("0") or irrelevant ("na") strand designation * * Revision 1.2 2003/12/08 23:39:20 jcherry * Set length of Seq-inst * * Revision 1.1 2003/12/08 15:49:32 jcherry * Initial version * * ===================================================================== */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?