⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 splign_formatter.cpp

📁 ncbi源码
💻 CPP
字号:
/* * =========================================================================== * PRODUCTION $Log: splign_formatter.cpp,v $ * PRODUCTION Revision 1000.0  2004/06/01 18:12:43  gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== *//* $Id: splign_formatter.cpp,v 1000.0 2004/06/01 18:12:43 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE                           *               National Center for Biotechnology Information *                                                                           *  This software/database is a "United States Government Work" under the    *  terms of the United States Copyright Act.  It was written as part of     *  the author's official duties as a United States Government employee and  *  thus cannot be copyrighted.  This software/database is freely available  *  to the public for use. The National Library of Medicine and the U.S.     *  Government have not placed any restriction on its use or reproduction.   *                                                                           *  Although all reasonable efforts have been taken to ensure the accuracy   *  and reliability of the software and data, the NLM and the U.S.            *  Government do not and cannot warrant the performance or results that     *  may be obtained by using this software or data. The NLM and the U.S.     *  Government disclaim all warranties, express or implied, including        *  warranties of performance, merchantability or fitness for any particular *  purpose.                                                                 *                                                                           *  Please cite the author in any work or product based on this material.    * * =========================================================================== * * Author:  Yuri Kapustin * * =========================================================================== * */#include <ncbi_pch.hpp>#include "splign_util.hpp"#include <objects/seqalign/Seq_align.hpp>#include <objects/seqloc/Seq_id.hpp>#include <algo/align/splign/splign_formatter.hpp>#include <objects/seqalign/Score.hpp>#include <objects/general/Object_id.hpp>#include <objects/seqalign/Dense_seg.hpp>BEGIN_NCBI_SCOPEUSING_SCOPE(objects);CSplignFormatter::CSplignFormatter (const CSplign& splign):    m_splign(&splign){    static const char kErrMsg[] = "ID_not_set";    m_QueryId = m_SubjId = kErrMsg;}string CSplignFormatter::AsText(void) const{  CNcbiOstrstream oss;  oss.precision(3);  const vector<CSplign::SAlignedCompartment>& acs = m_splign->GetResult();  ITERATE(vector<CSplign::SAlignedCompartment>, ii, acs) {        for(size_t i = 0, seg_dim = ii->m_segments.size(); i < seg_dim; ++i) {      const CSplign::SSegment& seg = ii->m_segments[i];      oss << ii->m_id << '\t' << m_QueryId << '\t' << m_SubjId << '\t';      if(seg.m_exon) {        oss << seg.m_idty << '\t';      }      else {        oss << "-\t";      }            oss << seg.m_len << '\t';            if(ii->m_QueryStrand) {        oss << ii->m_qmin + seg.m_box[0] + 1 << '\t'            << ii->m_qmin + seg.m_box[1] + 1 << '\t';      }      else {        oss << ii->m_mrnasize - seg.m_box[0] << '\t'            << ii->m_mrnasize - seg.m_box[1] << '\t';      }            if(seg.m_exon) {                if(ii->m_SubjStrand) {          oss << ii->m_smin + seg.m_box[2] + 1 << '\t'               << ii->m_smin + seg.m_box[3] + 1 << '\t';        }        else {          oss << ii->m_smax - seg.m_box[2] + 1 << '\t'               << ii->m_smax - seg.m_box[3] + 1 << '\t';        }      }      else {        oss << "-\t-\t";      }            if(seg.m_exon) {                oss << seg.m_annot << '\t';        oss << RLE(seg.m_details);#ifdef GENOME_PIPELINE        oss << '\t' << ScoreByTranscript(*(m_splign->GetAligner()),                                         seg.m_details.c_str());#endif      }      else {        if(i == 0) {          oss << "<L-Gap>\t";        }        else if(i == seg_dim - 1) {          oss << "<R-Gap>\t";        }        else {          oss << "<M-Gap>\t";        }        oss << '-';#ifdef GENOME_PIPELINE        oss << "\t-";#endif      }      oss << endl;    }  }  return CNcbiOstrstreamToString(oss);}CRef<CSeq_align_set> CSplignFormatter::AsSeqAlignSet(void) const{    const vector<CSplign::SAlignedCompartment>& acs = m_splign->GetResult();    CRef<CSeq_align_set> rv (new CSeq_align_set);    CSeq_align_set::Tdata& data = rv->Set();    ITERATE(vector<CSplign::SAlignedCompartment>, ii, acs) {            vector<size_t> boxes;        vector<string> transcripts;        vector<CNWAligner::TScore> scores;        for(size_t i = 0, seg_dim = ii->m_segments.size(); i < seg_dim; ++i) {            const CSplign::SSegment& seg = ii->m_segments[i];            if(seg.m_exon) {                                if(ii->m_QueryStrand) {                    boxes.push_back(ii->m_qmin + seg.m_box[0]);                    boxes.push_back(ii->m_qmin + seg.m_box[1]);                }                else {                    boxes.push_back(ii->m_mrnasize - seg.m_box[0] - 1);                    boxes.push_back(ii->m_mrnasize - seg.m_box[1] - 1);                }                if(ii->m_SubjStrand) {                    boxes.push_back(ii->m_smin + seg.m_box[2]);                    boxes.push_back(ii->m_smin + seg.m_box[3]);                }                else {                    boxes.push_back(ii->m_smax - seg.m_box[2]);                     boxes.push_back(ii->m_smax - seg.m_box[3]);                }                transcripts.push_back(seg.m_details);                scores.push_back(ScoreByTranscript(*(m_splign->GetAligner()),                                                   seg.m_details.c_str()));            }        }               CRef<CSeq_align> sa (x_Compartment2SeqAlign(boxes,transcripts,scores));        data.push_back(sa);    }        return rv;}CRef<CSeq_align> CSplignFormatter::x_Compartment2SeqAlign (    const vector<size_t>& boxes,    const vector<string>& transcripts,    const vector<int>&    scores ) const{    const size_t num_exons = boxes.size() / 4;    CRef<CSeq_align> sa (new CSeq_align);    sa->Reset();    // this is a discontinuous alignment    sa->SetType(CSeq_align::eType_disc);    sa->SetDim(2);      // seq-ids    CRef<CSeq_id> id1 ( new CSeq_id (m_QueryId) );    if(id1->Which()==CSeq_id::e_not_set) {        id1.Reset(new CSeq_id(CSeq_id::e_Local, m_QueryId, kEmptyStr));    }        CRef<CSeq_id> id2 ( new CSeq_id (m_SubjId) );    if(id2->Which()==CSeq_id::e_not_set) {        id2.Reset(new CSeq_id(CSeq_id::e_Local, m_SubjId, kEmptyStr));    }    // create seq-align-set    CRef< CSeq_align::C_Segs > segs (new CSeq_align::C_Segs);    CSeq_align_set& sas = segs->SetDisc();    list<CRef<CSeq_align> >& sas_data = sas.Set();        for(size_t i = 0; i < num_exons; ++i) {      CRef<CSeq_align> sa (new CSeq_align);      sa->Reset();      sa->SetDim(2);      sa->SetType(CSeq_align::eType_global);      CRef<CScore> score (new CScore);      CRef<CObject_id> id (new CObject_id);      id->SetStr("splign");      score->SetId(*id);      CRef< CScore::C_Value > val (new CScore::C_Value);      val->SetInt(scores[i]);      score->SetValue(*val);      CSeq_align::TScore& scorelist = sa->SetScore();      scorelist.push_back(score);      CRef<CSeq_align::C_Segs> segs (new CSeq_align::C_Segs);      CDense_seg& ds = segs->SetDenseg();      ds.SetNumseg(0);      ds.SetDim(2);      vector< CRef< CSeq_id > > &ids = ds.SetIds();      ids.push_back(id1);      ids.push_back(id2);      const size_t* box = &(*(boxes.begin() + i*4));      x_Exon2DS(box, transcripts[i], &ds);      sa->SetSegs(*segs);      sas_data.push_back(sa);    }        sa->SetSegs(*segs);    return sa;}void CSplignFormatter::x_Exon2DS(const size_t* box, const string& trans,                                 CDense_seg* pds ) const{    bool strand = box[2] <= box[3];    CDense_seg& ds = *pds;    vector< TSignedSeqPos > &starts  = ds.SetStarts();    vector< TSeqPos >       &lens    = ds.SetLens();    vector< ENa_strand >    &strands = ds.SetStrands();        // iterate through the transcript    size_t seg_count = 0;    size_t start1 = 0, pos1 = 0; // relative to exon start in mrna    size_t start2 = 0, pos2 = 0; // and genomic    size_t seg_len = 0;	    string::const_iterator ib = trans.begin(), ie = trans.end(), ii = ib;    unsigned char seg_type;    char c = *ii++;    if(c == 'M' || c == 'R') {      seg_type = 0; // matches and mismatches      ++pos1;      ++pos2;    }    else if (c == 'I') {      seg_type = 1;  // inserts      ++pos2;    }    else {      seg_type = 2;  // dels      ++pos1;    }        while(ii < ie) {      c = *ii;      if(isalpha(c)) {	if(seg_type == 0 && (c == 'M' || c == 'R')) {	  ++pos1;	  ++pos2;	  ++ii;	}	else {	  // close current seg	  starts.push_back((seg_type == 1)? -1: TSignedSeqPos(box[0]+start1));	  starts.push_back((seg_type == 2)? -1:                           TSignedSeqPos(box[2]+(strand?start2:(1-pos2))));	  strands.push_back(eNa_strand_plus);	  strands.push_back(strand? eNa_strand_plus: eNa_strand_minus);	  switch(seg_type) {	  case 0: seg_len = pos1 - start1; break;	  case 1: seg_len = pos2 - start2; break;	  case 2: seg_len = pos1 - start1; break;	  }	  lens.push_back(seg_len);	  ++seg_count;	  	  // start a new seg	  start1 = pos1;	  start2 = pos2;	  if(c == 'M' || c == 'R') {	    seg_type = 0; // matches and mismatches	    ++pos1;	    ++pos2;	  }	  else if (c == 'I') {	    seg_type = 1;  // inserts	    ++pos2;	  }	  else {	    seg_type = 2;  // dels	    ++pos1;	  }	  ++ii;	}      }      else {	size_t len = 0;	while(ii < ie && ('0' <= *ii && *ii <= '9')) {	  len = 10*len + *ii - '0';	  ++ii;	}	--len;	switch(seg_type) {	case 0: pos1 += len; pos2 += len; break;	case 1: pos2 += len; break;	case 2: pos1 += len; break;	}      }    }        starts.push_back( (seg_type == 1)? -1: TSignedSeqPos(box[0] + start1) );    starts.push_back( (seg_type == 2)? -1:		      TSignedSeqPos(box[2] + (strand? start2: (1 - pos2))) );    strands.push_back(eNa_strand_plus);    strands.push_back(strand? eNa_strand_plus: eNa_strand_minus);	    switch(seg_type) {    case 0: seg_len = pos1 - start1; break;    case 1: seg_len = pos2 - start2; break;    case 2: seg_len = pos1 - start1; break;    }    lens.push_back(seg_len);    ++seg_count;    ds.SetNumseg(seg_count);}END_NCBI_SCOPE/* * =========================================================================== * * $Log: splign_formatter.cpp,v $ * Revision 1000.0  2004/06/01 18:12:43  gouriano * PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.6 * * Revision 1.6  2004/05/24 16:13:57  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.5  2004/05/04 15:23:45  ucko * Split splign code out of xalgoalign into new xalgosplign. * * Revision 1.4  2004/04/30 15:00:47  kapustin * Support ASN formatting * * Revision 1.3  2004/04/23 14:37:44  kapustin * *** empty log message *** * * * =========================================================================== */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -