⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 splign.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 3 页
字号:
            const double a0 = double(qdim0) / qdim;            smax = size_t(floor (box0[3] + a0*dist));            if(smax == box1[2]) {              --smax;            }          }        }        else { // !same_strand          smax = kMax_UInt;        }      }           try {        SAlignedCompartment ac = x_RunOnCompartment(&comp_hits, smin, smax);        ac.m_id = ++m_model_id;        ac.m_segments = m_segments;        ac.m_error = false;        ac.m_msg = "Ok";        m_result.push_back(ac);      }      catch(CException& e) {        m_result.push_back(SAlignedCompartment(0, true, e.GetMsg().c_str()));      }      smin = same_strand? (smax + 1): 0;    }}// naive polya detectionsize_t CSplign::x_TestPolyA(void){  const size_t dim = m_mrna.size();  int i = dim - 1;  for(; i >= 0; --i) {    if(m_mrna[i] != 'A') break;  }  const size_t len = dim - i - 1;;  return len > 3 ? i + 1 : kMax_UInt;}CSplign::SAlignedCompartment CSplign::x_RunOnCompartment( THits* hits,                                                          size_t range_left,                                                          size_t range_right ){  SAlignedCompartment rv;  m_segments.clear();  if(range_left > range_right) {    NCBI_THROW( CAlgoAlignException, eInternal, "Invalid range data");  }  XFilter(hits);  if(hits->size() == 0) {    NCBI_THROW( CAlgoAlignException, eNoData,		"No hits left after filtering");  }  const string query ( hits->front().m_Query );  const string subj  ( hits->front().m_Subj );  const size_t mrna_size = m_mrna.size();    if( !m_strand ) {    // make reverse complimentary    reverse (m_mrna.begin(), m_mrna.end());    transform(m_mrna.begin(), m_mrna.end(), m_mrna.begin(),	      SCompliment());    // adjust the hits    for(size_t i = 0, n = hits->size(); i < n; ++i) {      CHit& h = (*hits)[i];      bool plus = h.IsStraight();      size_t a0 = mrna_size - h.m_ai[0] + 1;      size_t a1 = mrna_size - h.m_ai[1] + 1;      h.m_an[0] = h.m_ai[0] = a1;      h.m_an[1] = h.m_ai[1] = a0;      // change strand      if(plus) {	h.m_an[2] = h.m_ai[3];	h.m_an[3] = h.m_ai[2];      }      else {	h.m_an[2] = h.m_ai[2];	h.m_an[3] = h.m_ai[3];      }    }  }  m_polya_start = m_nopolya? kMax_UInt: x_TestPolyA();  if(m_polya_start < kMax_UInt) {    CleaveOffByTail(hits, m_polya_start + 1); // cleave off hits beyond cds    if(hits->size() == 0) {      NCBI_THROW( CAlgoAlignException,                  eNoData,                  "No hits found beyond Poly(A), if any");    }  }  // find regions of interest on mRna (query)  // and contig (subj)  size_t qmin, qmax, smin, smax;  GetHitsMinMax(*hits, &qmin, &qmax, &smin, &smax);  --qmin; --qmax; --smin; --smax;  qmin = 0;  qmax = m_polya_start < kMax_UInt? m_polya_start - 1: mrna_size - 1;  smin = max(0, int(smin - m_max_genomic_ext));  smax += m_max_genomic_ext;   if(smin < range_left) {    smin = range_left;  }  if(smax > range_right) {    smax = range_right;  }    bool ctg_strand = (*hits)[0].IsStraight();  m_genomic.clear();  m_sa->Load(subj, &m_genomic, smin, smax);  const size_t ctg_end = smin + m_genomic.size();  if(ctg_end - 1 < smax) { // perhabs adjust smax    smax = ctg_end - 1;  }  if(!ctg_strand) {    // make reverse complementary    // for the contig's area of interest    reverse (m_genomic.begin(), m_genomic.end());    transform(m_genomic.begin(), m_genomic.end(), m_genomic.begin(),	      SCompliment());    // flip the hits    for(size_t i = 0, n = hits->size(); i < n; ++i) {      CHit& h = (*hits)[i];      size_t a2 = smax - (h.m_ai[3] - smin) + 2;      size_t a3 = smax - (h.m_ai[2] - smin) + 2;      h.m_an[2] = h.m_ai[2] = a2;      h.m_an[3] = h.m_ai[3] = a3;    }  }  rv.m_QueryStrand = m_strand;  rv.m_SubjStrand  = ctg_strand;  rv.m_qmin = qmin;  rv.m_smin = smin;  rv.m_smax = smax;  rv.m_mrnasize = mrna_size;  // shift hits so that they originate from qmin, smin;  // also make them zero-based  for(size_t i = 0, n = hits->size(); i < n; ++i) {    CHit& h = (*hits)[i];    h.m_an[0] = h.m_ai[0] -= qmin + 1;    h.m_an[1] = h.m_ai[1] -= qmin + 1;    h.m_an[2] = h.m_ai[2] -= smin + 1;    h.m_an[3] = h.m_ai[3] -= smin + 1;  }    x_SetPattern( hits );  x_Run(&m_mrna.front(), &m_genomic.front());   const size_t seg_dim = m_segments.size();  if(seg_dim == 0) {    NCBI_THROW( CAlgoAlignException, eNoData, "No alignment found.");  }  // try to extend the last segment into the PolyA area    if(m_polya_start < kMax_UInt && seg_dim && m_segments[seg_dim-1].m_exon) {    CSplign::SSegment& s = const_cast<CSplign::SSegment&>(				       m_segments[seg_dim-1]);    const char* p0 = &m_mrna.front() + s.m_box[1] + 1;    const char* q = &m_genomic.front() + s.m_box[3] + 1;    const char* p = p0;    const char* pe = &m_mrna.front() + mrna_size;    const char* qe = &m_genomic.front() + m_genomic.size();    for(; p < pe && q < qe; ++p, ++q) {      if(*p != 'A') break;      if(*p != *q) break;    }    const size_t sh = p - p0;    if(sh) {      // resize      s.m_box[1] += sh;      s.m_box[3] += sh;      s.m_details.append(sh, 'M');      s.RestoreIdentity();      // correct annotation      const size_t ann_dim = s.m_annot.size();      if(ann_dim > 2 && s.m_annot[ann_dim - 3] == '>') {          ++q;          s.m_annot[ann_dim - 2] = q < qe? *q: ' ';          ++q;          s.m_annot[ann_dim - 1] = q < qe? *q: ' ';      }      m_polya_start += sh;    }  }  int j = seg_dim - 1;  // look for PolyA in trailing segments:  // if a segment is mostly 'A's then we add it to PolyA  for(; j >= 0; --j) {    const CSplign::SSegment& s = m_segments[j];    const char* p0 = &m_mrna[qmin] + s.m_box[0];    const char* p1 = &m_mrna[qmin] + s.m_box[1] + 1;    size_t count = 0;    for(const char* pc = p0; pc != p1; ++pc) {      if(*pc == 'A') ++count;    }    const size_t len = p1 - p0;    double min_a_content = 0.799;    // also check splices    if(s.m_exon && j > 0 && m_segments[j-1].m_exon) {      if(!IsConsensus(m_segments[j-1].GetDonor(), s.GetAcceptor())) {        min_a_content = 0.599;      }    }    if(!s.m_exon) {        min_a_content = 0.599;    }    if(double(count)/len < min_a_content) {        break;    }  }  if(j >= 0 && j < int(seg_dim - 1)) {    m_polya_start = m_segments[j].m_box[1] + 1;  }  // test if we have at least one exon  bool some_exons = false;  for(int i = 0; i <= j; ++i ) {    if(m_segments[i].m_exon) {      some_exons = true;      break;    }  }  if(!some_exons) {    NCBI_THROW( CAlgoAlignException, eNoData,                "No exons found above identity limit.");  }  m_segments.resize(j + 1);  return rv;}void CSplign::x_Run(const char* Seq1, const char* Seq2){    deque<SSegment> segments;    for(size_t i = 0, map_dim = m_alnmap.size(); i < map_dim; ++i) {        const SAlnMapElem& zone = m_alnmap[i];        // setup sequences        m_aligner->SetSequences(Seq1 + zone.m_box[0],            zone.m_box[1] - zone.m_box[0] + 1,            Seq2 + zone.m_box[2],            zone.m_box[3] - zone.m_box[2] + 1,            false);        // prepare the pattern        vector<size_t> pattern;        if(zone.m_pattern_start >= 0) {            copy(m_pattern.begin() + zone.m_pattern_start,            m_pattern.begin() + zone.m_pattern_end + 1,            back_inserter(pattern));            for(size_t j = 0, pt_dim = pattern.size(); j < pt_dim; j += 4) {//#define DBG_DUMP_PATTERN#ifdef  DBG_DUMP_PATTERN	      cerr << pattern[j] << '\t' << pattern[j+1] << '\t'		   << pattern[j+2] << '\t' << pattern[j+3] << endl;#endif                pattern[j]   -= zone.m_box[0];                pattern[j+1] -= zone.m_box[0];                pattern[j+2] -= zone.m_box[2];                pattern[j+3] -= zone.m_box[2];            }            if(pattern.size()) {                m_aligner->SetPattern(pattern);            }            // setup esf            m_aligner->SetEndSpaceFree(true, true, true, true);                        // align            m_aligner->Run();// #define DBG_DUMP_TYPE2#ifdef  DBG_DUMP_TYPE2            {{            CNWFormatter fmt (*m_aligner);            string txt;            fmt.AsText(&txt, CNWFormatter::eFormatType2);            cerr << txt;            }}  #endif            // create list of segments            CNWFormatter formatter (*m_aligner);            string exons;            formatter.AsText(&exons, CNWFormatter::eFormatExonTableEx);      	                CNcbiIstrstream iss_exons (exons.c_str());            while(iss_exons) {                string id1, id2, txt, repr;                size_t q0, q1, s0, s1, size;                double idty;                iss_exons >> id1 >> id2 >> idty >> size			  >> q0 >> q1 >> s0 >> s1 >> txt >> repr;                if(!iss_exons) break;                q0 += zone.m_box[0];                q1 += zone.m_box[0];                s0 += zone.m_box[2];                s1 += zone.m_box[2];                SSegment e;                e.m_exon = true;                e.m_idty = idty;                e.m_len = size;                e.m_box[0] = q0; e.m_box[1] = q1;                e.m_box[2] = s0; e.m_box[3] = s1;                e.m_annot = txt;                e.m_details = repr;                segments.push_back(e);            }            // append a gap            if(i + 1 < map_dim) {                SSegment g;                g.m_exon = false;                g.m_box[0] = zone.m_box[1] + 1;                g.m_box[1] = m_alnmap[i+1].m_box[0] - 1;                g.m_box[2] = zone.m_box[3] + 1;                g.m_box[3] = m_alnmap[i+1].m_box[2] - 1;                g.m_idty = 0;                g.m_len = g.m_box[1] - g.m_box[0] + 1;                g.m_annot = "<GAP>";                g.m_details.append(Seq1+g.m_box[0], g.m_box[1]-g.m_box[0]+1);                segments.push_back(g);            }        }    } // zone iterations end    // segment-level postprocessing    size_t seg_dim = segments.size();    if(seg_dim == 0) {        return;    }    // First go from the ends and see if we    // can improve boundary exons    size_t k0 = 0;    while(k0 < seg_dim) {        SSegment& s = segments[k0];        if(s.m_exon) {            if(s.m_idty < m_minidty || m_endgaps) {                s.ImproveFromLeft(Seq1, Seq2);            }            if(s.m_idty >= m_minidty) {                break;            }        }        ++k0;    }    // fill the left-hand gap, if any    if(segments[0].m_exon && segments[0].m_box[0] > 0) {        segments.push_front(SSegment());        SSegment& g = segments.front();        g.m_exon = false;        g.m_box[0] = 0;        g.m_box[1] = segments[0].m_box[0] - 1;        g.m_box[2] = 0;        g.m_box[3] = segments[0].m_box[2] - 1;        g.m_idty = 0;        g.m_len = segments[0].m_box[0];        g.m_annot = "<GAP>";        g.m_details = string(Seq1 + g.m_box[0], g.m_box[1] + 1 - g.m_box[0]);        ++seg_dim;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -