⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 splign_app.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/* * =========================================================================== * PRODUCTION $Log: splign_app.cpp,v $ * PRODUCTION Revision 1000.6  2004/06/01 18:05:26  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.24 * PRODUCTION * =========================================================================== *//* $Id: splign_app.cpp,v 1000.6 2004/06/01 18:05:26 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE                           *               National Center for Biotechnology Information *                                                                           *  This software/database is a "United States Government Work" under the    *  terms of the United States Copyright Act.  It was written as part of     *  the author's official duties as a United States Government employee and  *  thus cannot be copyrighted.  This software/database is freely available  *  to the public for use. The National Library of Medicine and the U.S.     *  Government have not placed any restriction on its use or reproduction.   *                                                                           *  Although all reasonable efforts have been taken to ensure the accuracy   *  and reliability of the software and data, the NLM and the U.S.           *  Government do not and cannot warrant the performance or results that     *  may be obtained by using this software or data. The NLM and the U.S.     *  Government disclaim all warranties, express or implied, including        *  warranties of performance, merchantability or fitness for any particular *  purpose.                                                                 *                                                                           *  Please cite the author in any work or product based on this material.    * * =========================================================================== * * Author:  Yuri Kapustin * * File Description: Splign application *                   */#include <ncbi_pch.hpp>#include "splign_app.hpp"#include "splign_app_exception.hpp"#include <corelib/ncbistd.hpp>#include <serial/objostrasn.hpp>#include <serial/serial.hpp>#include <algo/align/nw_spliced_aligner16.hpp>#include <algo/align/nw_spliced_aligner32.hpp>#include <algo/align/splign/splign.hpp>#include <algo/align/splign/splign_simple.hpp>#include <algo/align/splign/splign_formatter.hpp>#include <objmgr/object_manager.hpp>#include <objmgr/scope.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seqalign/Seq_align.hpp>#include <objects/seqloc/Seq_loc.hpp>#include <algo/blast/api/bl2seq.hpp>#include <iostream>#include <memory>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);const char kQuality_high[] = "high";const char kQuality_low[] = "low";void CSplignApp::Init(){  HideStdArgs( fHideLogfile | fHideConffile | fHideVersion);    auto_ptr<CArgDescriptions> argdescr(new CArgDescriptions);  string program_name ("Splign v.1.06");#ifdef GENOME_PIPELINE  program_name += 'p';#endif  argdescr->SetUsageContext(GetArguments().GetProgramName(), program_name);  argdescr->AddOptionalKey    ("index", "index",     "Batch mode index file (use -mkidx to generate).",     CArgDescriptions::eString);  argdescr->AddFlag ("mkidx", "Generate batch mode index and quit.", true);    argdescr->AddOptionalKey    ("hits", "hits",     "Batch mode hit file. "     "This file defines the set of sequences to align and "     "is also used to guide alignments.",     CArgDescriptions::eString);  argdescr->AddOptionalKey    ("query", "query",     "FastA file with the spliced sequence. Use in pairwise mode only.",     CArgDescriptions::eString);  argdescr->AddOptionalKey    ("subj", "subj",     "FastA file with the genomic sequence. Use in pairwise mode only.",     CArgDescriptions::eString);  argdescr->AddDefaultKey    ("log", "log", "Splign log file",     CArgDescriptions::eString, "splign.log");  argdescr->AddOptionalKey    ("asn", "asn", "ASN.1 output file name", CArgDescriptions::eString);  argdescr->AddDefaultKey    ("strand", "strand", "Spliced sequence's strand.",     CArgDescriptions::eString, "plus");  argdescr->AddFlag ("noendgaps",		     "Skip detection of unaligning regions at the ends.",                      true);  argdescr->AddFlag ("nopolya", "Assume no Poly(A) tail.",  true);  argdescr->AddDefaultKey    ("compartment_penalty", "compartment_penalty",     "Penalty to open a new compartment.",     CArgDescriptions::eDouble,     "0.75");  argdescr->AddDefaultKey    ("min_query_cov", "min_query_cov",     "Min query coverage by initial Blast hits. "     "Queries with less coverage will not be aligned.",     CArgDescriptions::eDouble, "0.25");  argdescr->AddDefaultKey    ("min_idty", "identity",     "Minimal exon identity. Lower identity segments "     "will be marked as gaps.",     CArgDescriptions::eDouble, "0.75");#ifdef GENOME_PIPELINE  argdescr->AddDefaultKey    ("quality", "quality", "Genomic sequence quality.",     CArgDescriptions::eString, kQuality_high);    argdescr->AddDefaultKey    ("Wm", "match", "match score",     CArgDescriptions::eInteger,     NStr::IntToString(CNWAligner::GetDefaultWm()).c_str());    argdescr->AddDefaultKey    ("Wms", "mismatch", "mismatch score",     CArgDescriptions::eInteger,     NStr::IntToString(CNWAligner::GetDefaultWms()).c_str());    argdescr->AddDefaultKey    ("Wg", "gap", "gap opening score",     CArgDescriptions::eInteger,     NStr::IntToString(CNWAligner::GetDefaultWg()).c_str());    argdescr->AddDefaultKey    ("Ws", "space", "gap extension (space) score",     CArgDescriptions::eInteger,     NStr::IntToString(CNWAligner::GetDefaultWs()).c_str());    argdescr->AddDefaultKey    ("Wi0", "Wi0", "Conventional intron (GT/AG) score",     CArgDescriptions::eInteger,     NStr::IntToString(CSplicedAligner16::GetDefaultWi(0)).c_str());    argdescr->AddDefaultKey    ("Wi1", "Wi1", "Conventional intron (GC/AG) score",     CArgDescriptions::eInteger,     NStr::IntToString(CSplicedAligner16::GetDefaultWi(1)).c_str());    argdescr->AddDefaultKey    ("Wi2", "Wi2", "Conventional intron (AT/AC) score",     CArgDescriptions::eInteger,     NStr::IntToString(CSplicedAligner16::GetDefaultWi(2)).c_str());    argdescr->AddDefaultKey    ("Wi3", "Wi3", "Arbitrary intron score",     CArgDescriptions::eInteger,     NStr::IntToString(CSplicedAligner16::GetDefaultWi(3)).c_str());    // restrictions  CArgAllow_Strings* constrain_errlevel = new CArgAllow_Strings;  constrain_errlevel->Allow(kQuality_low)->Allow(kQuality_high);  argdescr->SetConstraint("quality", constrain_errlevel);  #endif      CArgAllow_Strings* constrain_strand = new CArgAllow_Strings;  constrain_strand->Allow("plus")->Allow("minus");  argdescr->SetConstraint("strand", constrain_strand);  CArgAllow* constrain01 = new CArgAllow_Doubles(0,1);  argdescr->SetConstraint("min_idty", constrain01);  argdescr->SetConstraint("compartment_penalty", constrain01);  argdescr->SetConstraint("min_query_cov", constrain01);      SetupArgDescriptions(argdescr.release());}bool CSplignApp::x_GetNextPair(istream* ifs, vector<CHit>* hits){  hits->clear();  if(!m_pending.size() && (!ifs || !*ifs) ) {    return false;  }  if(!m_pending.size()) {    string query, subj;    if(m_firstline.size()) {      CHit hit (m_firstline.c_str());      query = hit.m_Query;      subj  = hit.m_Subj;      m_pending.push_back(hit);    }    char buf [1024];    while(ifs) {      buf[0] = 0;      CT_POS_TYPE pos0 = ifs->tellg();      ifs->getline(buf, sizeof buf, '\n');      CT_POS_TYPE pos1 = ifs->tellg();      if(pos1 == pos0) break; // GCC hack      if(buf[0] == '#') continue; // skip comments      const char* p = buf; // skip leading spaces      while(*p == ' ' || *p == '\t') ++p;      if(*p == 0) continue; // skip empty lines      CHit hit (p);      if(query.size() == 0) {	query = hit.m_Query;      }      if(subj.size() == 0) {	subj = hit.m_Subj;      }      if(hit.m_ai[0] > hit.m_ai[1]) {        NCBI_THROW(CSplignAppException,                   eBadData,                   "Hit with reversed query coordinates detected." );      }      if(hit.m_ai[2] == hit.m_ai[3]) { // skip single bases        continue;      }      if(hit.m_Query != query || hit.m_Subj != subj) {	m_firstline = p;	break;      }      m_pending.push_back(hit);    }  }  const size_t pending_size = m_pending.size();  if(pending_size) {    const string& query = m_pending[0].m_Query;    const string& subj  = m_pending[0].m_Subj;    size_t i = 1;    for(; i < pending_size; ++i) {      const CHit& h = m_pending[i];      if(h.m_Query != query || h.m_Subj != subj) {        break;      }    }    hits->resize(i);    copy(m_pending.begin(), m_pending.begin() + i, hits->begin());    m_pending.erase(m_pending.begin(), m_pending.begin() + i);  }  return hits->size() > 0;}void CSplignApp::x_LogStatus(size_t model_id, const string& query,                             const string& subj,			     bool error, const string& msg){  string error_tag (error? "Error: ": "");  if(model_id == 0) {    m_logstream << '-';  }  else {    m_logstream << model_id;  }  m_logstream << '\t' << query << '\t' << subj               << '\t' << error_tag << msg << endl;}istream* CSplignApp::x_GetPairwiseHitStream(                         CSeqLoaderPairwise& seq_loader) const{    vector<char> query, subj;    seq_loader.Load(seq_loader.GetQueryStringId(), &query, 0, kMax_UInt);    seq_loader.Load(seq_loader.GetSubjStringId(), &subj, 0, kMax_UInt);        CRef<CObjectManager> objmgr(new CObjectManager);    CRef<CSeq_loc> seqloc_query;    CRef<CScope> scope_query;    {{    scope_query.Reset(new CScope(*objmgr));    scope_query->AddDefaults();    CRef<CSeq_entry> se (seq_loader.GetQuerySeqEntry());    scope_query->AddTopLevelSeqEntry(*se);    seqloc_query.Reset(new CSeq_loc);    seqloc_query->SetWhole().Assign(*(se->GetSeq().GetId().front()));    }}    CRef<CSeq_loc> seqloc_subj;    CRef<CScope> scope_subj;    {{    scope_subj.Reset(new CScope(*objmgr));    scope_subj->AddDefaults();    CRef<CSeq_entry> se (seq_loader.GetSubjSeqEntry());    scope_subj->AddTopLevelSeqEntry(*se);    seqloc_subj.Reset(new CSeq_loc);    seqloc_subj->SetWhole().Assign(*(se->GetSeq().GetId().front()));    }}    blast::CBl2Seq Blast(blast::SSeqLoc(seqloc_query.GetNonNullPointer(),                                        scope_query.GetNonNullPointer()),                         blast::SSeqLoc(seqloc_subj.GetNonNullPointer(),                                        scope_subj.GetNonNullPointer()),                         blast::eMegablast);    blast::TSeqAlignVector blast_output (Blast.Run());    if (!blast_output.empty() && blast_output.front().NotEmpty() &&

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -