⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 splign_util.cpp

📁 ncbi源码
💻 CPP
字号:
/* * =========================================================================== * PRODUCTION $Log: splign_util.cpp,v $ * PRODUCTION Revision 1000.0  2004/06/01 18:13:08  gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.8 * PRODUCTION * =========================================================================== *//* $Id: splign_util.cpp,v 1000.0 2004/06/01 18:13:08 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE                          *               National Center for Biotechnology Information*                                                                          *  This software/database is a "United States Government Work" under the   *  terms of the United States Copyright Act.  It was written as part of    *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S.    *  Government have not placed any restriction on its use or reproduction.  *                                                                          *  Although all reasonable efforts have been taken to ensure the accuracy  *  and reliability of the software and data, the NLM and the U.S.          *  Government do not and cannot warrant the performance or results that    *  may be obtained by using this software or data. The NLM and the U.S.    *  Government disclaim all warranties, express or implied, including       *  warranties of performance, merchantability or fitness for any particular*  purpose.                                                                *                                                                          *  Please cite the author in any work or product based on this material.   ** ===========================================================================** Author:  Yuri Kapustin** File Description:  Helper functions*                   * ===========================================================================*/#include <ncbi_pch.hpp>#include "splign_util.hpp"#include "splign_hitparser.hpp"#include <algo/align/align_exception.hpp>#include <algorithm>#include <math.h>BEGIN_NCBI_SCOPE#ifdef GENOME_PIPELINECInfTable::CInfTable(size_t cols): m_cols(cols){  m_data.resize(m_cols, 0);}size_t CInfTable::Load(const string& filename){  ifstream ifs (filename.c_str());  size_t read = 0;  char line [1024];  while(ifs) {    line[0] = 0;    ifs.getline(line, sizeof line, '\n');    if(line[0]) {      if(line[0] != '#') {	if(!x_ReadColumns(line)) {	  return 0;	}	string accession;	SInfo info;	bool parsed = x_ParseLine(line, accession, info);	if(!parsed) {	  return 0;	}	m_map[accession] = info;	++read;      }    }  }  return read;}bool CInfTable::x_ReadColumns(char* line){  char* p0 = line;  char* pe = p0 + strlen(line);  char* p = p0;  for(size_t i = 0; i < m_cols; ++i) {    p = find(p0, pe, '\t');    if(i+1 < m_cols && p == pe) {      return false;    }    m_data[i] = p0;    *p = 0;    p0 = p + 1;  }  return true;}bool CInfTable::GetInfo(const string& id, SInfo& info) {  map<string, SInfo>::const_iterator ii = m_map.find(id);  if(ii != m_map.end()) {    info = ii->second;    return true;  }  return false;}bool CInf_mRna::x_ParseLine (const char* line,			     string& accession, SInfo& info){  if(!line || !*line) {    return false;  }  accession = x_GetCol(0);  const char* buf_size = x_GetCol(2);  sscanf(buf_size, "%d", &info.m_size);  const char* buf = x_GetCol(3);  SInfo::EStrand strand = SInfo::ePlus;  if(buf[0]=='m') {    strand = SInfo::eMinus;  }  buf = x_GetCol(4);  if(buf[0] != '-') {    char c;    sscanf(buf, "%d,%d%c",	   &info.m_polya_start,	   &info.m_polya_end,	   &c);    if(c == 'm') {      strand = SInfo::eMinus;    }  }  buf = x_GetCol(9);  if(strcmp(buf, "wrong_strand") == 0) {    strand = SInfo::eUnknown;  }  info.m_strand = strand;  return true;}bool CInf_EST::x_ParseLine (const char* line, string& accession, SInfo& info){  if(!line || !*line) {    return false;  }  accession = x_GetCol(0);  const char* buf = x_GetCol(3);  switch(buf[0]) {      case '5': info.m_strand = SInfo::ePlus;    break;      case '3': info.m_strand = SInfo::eMinus;   break;      default:  info.m_strand = SInfo::eUnknown; break;  }  buf = x_GetCol(6);  if(buf[0] != '-') {    char c;    sscanf(buf, "%d,%d%c",	   &info.m_polya_start,	   &info.m_polya_end,	   &c);    if(c == 'm' && info.m_strand == SInfo::ePlus ||       c == 'p' && info.m_strand == SInfo::eMinus) {      info.m_strand = SInfo::eUnknown;    }  }  return true;}#endif // GENOME_PIPELINEbool IsConsensus(const char* donor, const char* acceptor){  return donor && acceptor &&    (donor[0] == 'G' && (donor[1] == 'C' || donor[1] == 'T'))    &&    (acceptor[0] == 'A' && acceptor[1] == 'G');}void CleaveOffByTail(vector<CHit>* hits, size_t polya_start){  const size_t hit_dim = hits->size();  vector<size_t> vdel;  for(size_t i = 0; i < hit_dim; ++i) {    CHit& hit = (*hits)[i];    if(size_t(hit.m_ai[0]) >= polya_start) {      vdel.push_back(i);    }    else if(size_t(hit.m_ai[1]) >= polya_start) {      hit.Move(1, polya_start-1, false);      if(hit.IsConsistent() == false) {	vdel.push_back(i);      }    }  }  const size_t del_dim = vdel.size();  if(del_dim > 0) {    for( size_t i = 0, j = 0, k = 0; j < hit_dim; ) {      if(k < del_dim && j == vdel[k]) {	++k;	++j;      }      else if(i < j) {	(*hits)[i++] = (*hits)[j++];      }      else { // i == j	++i; ++j;      }    }    hits->resize(hit_dim - del_dim);  }}void XFilter(vector<CHit>* hits) {  if(hits->size() == 0) return;  int group_id = 0;  CHitParser hp (*hits, group_id);  hp.m_Strand = CHitParser::eStrand_Both;  hp.m_SameOrder = false;  hp.m_Method = CHitParser::eMethod_MaxScore;  hp.m_CombinationProximity_pre  = -1;  hp.m_CombinationProximity_post = -1;  hp.m_MinQueryCoverage = 0;  hp.m_MinSubjCoverage = 0;  hp.m_MaxHitDistQ = kMax_Int;  hp.m_MaxHitDistS = kMax_Int;  hp.m_Prot2Nucl = false;  hp.m_SplitQuery = CHitParser::eSplitMode_Clear;  hp.m_SplitSubject = CHitParser::eSplitMode_Clear;  hp.m_group_identification_method = CHitParser::eNone;  hp.m_Query = (*hits)[0].m_Query;  hp.m_Subj =  (*hits)[0].m_Subj;    hp.Run( CHitParser::eMode_Normal );  *hits = hp.m_Out;}void GetHitsMinMax(const vector<CHit>& vh,		   size_t* qmin, size_t* qmax,		   size_t* smin, size_t* smax){  const size_t hit_dim = vh.size();    *qmin = kMax_UInt, *qmax = 0;  *smin = *qmin, *smax = *qmax;    for(size_t i = 0; i < hit_dim; ++i) {    if(size_t(vh[i].m_ai[0]) < *qmin)      *qmin = vh[i].m_ai[0];    if(size_t(vh[i].m_ai[1]) > *qmax)      *qmax = vh[i].m_ai[1];    if(size_t(vh[i].m_ai[2]) < *smin)      *smin = vh[i].m_ai[2];    if(size_t(vh[i].m_ai[3]) > *smax)      *smax = vh[i].m_ai[3];  }}// apply run-length encodingstring RLE(const string& in){  string out;  const size_t dim = in.size();  if(dim == 0) {    return kEmptyStr;  }  const char* p = in.c_str();  char c0 = p[0];  out.append(1, c0);  size_t count = 1;  for(size_t k = 1; k < dim; ++k) {    char c = p[k];    if(c != c0) {      c0 = c;      if(count > 1) {        out += NStr::IntToString(count);      }      count = 1;      out.append(1, c0);    }    else {      ++count;    }  }  if(count > 1) {    out += NStr::IntToString(count);  }  return out;}CNWAligner::TScore ScoreByTranscript(const CNWAligner& aligner,				     const char* transcript){  static const string kBadSymMsg = "Unknown symbol in transcript: ";  const int Wg  = aligner.GetWg();  const int Ws  = aligner.GetWs();  const int Wm  = aligner.GetWm();  const int Wms = aligner.GetWms();  const size_t dim = strlen(transcript);  if(dim == 0) return 0;  CNWAligner::TScore score = 0;  char state1, state2;  switch(transcript[0]) {    case 'M':    case 'R':  state1 = state2 = 0;    break;    case 'I':  state1 = 1; state2 = 0; score += Wg;    break;    case 'D':  state1 = 0; state2 = 1; score += Wg;    break;    default:      NCBI_THROW(CAlgoAlignException, eInternal, kBadSymMsg + transcript[0]);  }  for(size_t i = 0; i < dim; ++i) {    switch(transcript[i]) {      case 'M': {        state1 = state2 = 0;        score += Wm;      }      break;      case 'R': {        state1 = state2 = 0;        score += Wms;      }      break;      case 'I': {        if(state1 != 1) score += Wg;        state1 = 1; state2 = 0;        score += Ws;      }      break;      case 'D': {        if(state2 != 1) score += Wg;        state1 = 0; state2 = 1;        score += Ws;      }      break;      default:        NCBI_THROW(CAlgoAlignException, eInternal, kBadSymMsg + transcript[i]);    }  }  return score;}END_NCBI_SCOPE/* * =========================================================================== * * $Log: splign_util.cpp,v $ * Revision 1000.0  2004/06/01 18:13:08  gouriano * PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.8 * * Revision 1.8  2004/05/24 16:13:57  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.7  2004/05/17 14:50:57  kapustin * Add/remove/rearrange some includes and object declarations * * Revision 1.6  2004/04/23 20:33:32  ucko * Fix remaining use of sprintf. * * Revision 1.5  2004/04/23 18:43:58  ucko * <cmath> -> <math.h>, since some older compilers (MIPSpro) lack the wrappers. * * Revision 1.4  2004/04/23 16:50:48  kapustin * sprintf->CNcbiOstrstream * * Revision 1.3  2004/04/23 14:37:44  kapustin * *** empty log message *** * * * =========================================================================== */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -