⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 splign_compartment_finder.cpp

📁 ncbi源码
💻 CPP
字号:
/* * =========================================================================== * PRODUCTION $Log: splign_compartment_finder.cpp,v $ * PRODUCTION Revision 1000.0  2004/06/01 18:12:33  gouriano * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.5 * PRODUCTION * =========================================================================== *//* $Id: splign_compartment_finder.cpp,v 1000.0 2004/06/01 18:12:33 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE                          *               National Center for Biotechnology Information*                                                                          *  This software/database is a "United States Government Work" under the   *  terms of the United States Copyright Act.  It was written as part of    *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S.    *  Government have not placed any restriction on its use or reproduction.  *                                                                          *  Although all reasonable efforts have been taken to ensure the accuracy  *  and reliability of the software and data, the NLM and the U.S.          *  Government do not and cannot warrant the performance or results that    *  may be obtained by using this software or data. The NLM and the U.S.    *  Government disclaim all warranties, express or implied, including       *  warranties of performance, merchantability or fitness for any particular*  purpose.                                                                *                                                                          *  Please cite the author in any work or product based on this material.   ** ===========================================================================** Author:  Yuri Kapustin** File Description: Compartment Finder*                   * ===========================================================================*/#include <ncbi_pch.hpp>#include "splign_compartment_finder.hpp"#include <corelib/ncbi_limits.hpp>#include <algo/align/align_exception.hpp>#include <algorithm>BEGIN_NCBI_SCOPEvoid CCompartmentFinder::CCompartment::UpdateMinMax() {  m_box[0] = m_box[2] = kMax_UInt;  m_box[1] = m_box[3] = 0;  ITERATE(vector<const CHit*>, ph, m_members) {        const CHit* hit = *ph;    if(size_t(hit->m_ai[0]) < m_box[0]) {      m_box[0] = hit->m_ai[0];    }    if(size_t(hit->m_ai[2]) < m_box[2]) {      m_box[2] = hit->m_ai[2];    }    if(size_t(hit->m_ai[1]) > m_box[1]) {      m_box[1] = hit->m_ai[1];    }    if(size_t(hit->m_ai[3]) > m_box[3]) {      m_box[3] = hit->m_ai[3];    }  }}bool CCompartmentFinder::CCompartment::GetStrand() const {  if(m_members.size()) {    return m_members[0]->IsStraight();  }  else {        NCBI_THROW( CAlgoAlignException,                    eInternal,                    "Strand requested on an empty compartment" );  }}CCompartmentFinder::CCompartmentFinder(vector<CHit>::const_iterator start,                                       vector<CHit>::const_iterator finish):  m_intron_min(GetDefaultIntronMin()),  m_intron_max(GetDefaultIntronMax()),  m_penalty(GetDefaultPenalty()),  m_min_coverage(GetDefaultMinCoverage()),  m_iter(-1){  const size_t dim = finish - start;  if(dim >  0) {    m_hits.resize(dim);    const CHit* p0 = &(*start);    for(size_t i = 0; i < dim; ++i) {      m_hits[i] = p0++;    }  }}size_t CCompartmentFinder::Run(){  m_compartments.clear();  const size_t dimhits = m_hits.size();  if(dimhits == 0) {    return 0;  }  // sort here to make sure that later at every step  // all potential sources (hits from where to continue)  // are visible  sort(m_hits.begin(), m_hits.end(), CHit::PSubjLow_QueryLow_ptr);  // insert dummy element  list<SQueryMark> qmarks; // ordered list of query marks  qmarks.clear();  qmarks.push_back(SQueryMark(0, 0, -1));  const list<SQueryMark>::iterator li_b = qmarks.begin();  // *** Generic hit iteration (could be optimized of course) ***  // For every hit:  // - find out if its query mark already exists (qm_new)  // - for query mark below the current:  //   -- check if it could be extended  //   -- evaluate extension potential  // - for every other qm:  //   -- check if its compartment could be terminated  //   -- evaluate potential of terminating the compartment  //      to open a new one  // - select the best potential  // - if qm_new, create the new query mark, otherwise  //   update the existing one if the new score is higher  // - if query mark was created or updated,   //   save the hit's status ( previous hit, extension or opening)  //  vector<SHitStatus> hitstatus (dimhits);  for(size_t i = 0; i < dimhits; ++i) {    const list<SQueryMark>::iterator li_e = qmarks.end();    const CHit& h = *m_hits[i];    list<SQueryMark>::iterator li0 = lower_bound(li_b, li_e,                                                 SQueryMark(h.m_ai[1], 0, -2));    bool qm_new = (li0 == li_e)? true: (size_t(h.m_ai[1]) < li0->m_coord);    int best_ext_score = kMin_Int;    list<SQueryMark>::iterator li_best_ext = li_b;    int best_open_score = kMin_Int;    list<SQueryMark>::iterator li_best_open = li_b;    for( list<SQueryMark>::iterator li = li_b; li != li_e; ++li) {      const CHit* phc = (li->m_hit == -1)? 0: m_hits[li->m_hit];      // check if good for extention      if(li->m_hit < int(i)) {        size_t q0 = h.m_ai[0], s0 = h.m_ai[2]; // possible continuation point        bool good;        if(li->m_hit == -1) {          good = false;        }        else {          if(phc->m_ai[1] >= h.m_ai[1]) {            good = false;          }          else {            if(phc->m_ai[1] < h.m_ai[0]) {              q0 = h.m_ai[0];              s0 = h.m_ai[2];            }            else {              // find where this point would be on h              q0 = phc->m_ai[1] + 1;              s0 += q0 - h.m_ai[0];            }            int intron = int(s0) - phc->m_ai[3] - 1;            good =  int(m_intron_min) <= intron && intron <= int(m_intron_max);          }                  }        if(good) {          int ext_score = li->m_score +              int(0.01 * h.m_Idnty * (h.m_ai[1] - q0 + 1));          if(ext_score > best_ext_score) {            best_ext_score = ext_score;            li_best_ext = li;          }        }      }            // check if good for closing and opening      if(li->m_hit == -1 ||  phc->m_ai[3] < h.m_ai[2]) {        int score_open = li->m_score - m_penalty +          int(0.01*h.m_Idnty*(h.m_ai[1] - h.m_ai[0] + 1));        if(score_open > best_open_score) {          best_open_score = score_open;          li_best_open = li;        }      }    }    SHitStatus::EType hit_type;    int prev_hit;    int best_score;    if(best_ext_score > best_open_score) {      hit_type = SHitStatus::eExtension;      prev_hit = li_best_ext->m_hit;      best_score = best_ext_score;    }    else {      hit_type = SHitStatus::eOpening;      prev_hit = li_best_open->m_hit;      best_score = best_open_score;    }    bool updated = false;    if(qm_new) {      qmarks.insert(li0, SQueryMark(h.m_ai[1], best_score, i));      updated = true;    }    else {      if(best_score > li0->m_score) {        li0->m_score = best_score;        li0->m_hit = i;        updated = true;      }    }    if(updated) {      hitstatus[i].m_type = hit_type;      hitstatus[i].m_prev = prev_hit;    }  }  // *** backtrace ***  // - find query mark with the highest score  // - trace it back until the dummy  list<SQueryMark>::iterator li_best = li_b;  ++li_best;  int score_best = kMin_Int;  for(list<SQueryMark>::iterator li = li_best, li_e = qmarks.end();      li != li_e; ++li) {    if(li->m_score > score_best) {      score_best = li->m_score;      li_best = li;    }  }  if( int(score_best + m_penalty) >= int(m_min_coverage) ) {    int i = li_best->m_hit;    CCompartment* pc = 0;    bool new_compartment = true;    while(i != -1) {      if(new_compartment) {        new_compartment = false;        m_compartments.push_back(CCompartment());        pc = &m_compartments.back();      }      pc->AddMember(m_hits[i]);            if(hitstatus[i].m_type == SHitStatus::eOpening) {        new_compartment = true;      }      i = hitstatus[i].m_prev;    }  }  return m_compartments.size();}CCompartmentFinder::CCompartment* CCompartmentFinder::GetFirst(){  if(m_compartments.size()) {    m_iter = 0;    return &m_compartments[m_iter++];  }  else {    m_iter = -1;    return 0;  }}void CCompartmentFinder::OrderCompartments(void) {    for(int i = 0, dim = m_compartments.size(); i < dim; ++i) {    m_compartments[i].UpdateMinMax();  }  sort(m_compartments.begin(), m_compartments.end(), PLowerSubj);}CCompartmentFinder::CCompartment* CCompartmentFinder::GetNext(){  const size_t dim = m_compartments.size();  if(m_iter == -1 || m_iter >= int(dim)) {    m_iter = -1;    return 0;  }  else {    return &m_compartments[m_iter++];  }}CCompartmentAccessor::CCompartmentAccessor(vector<CHit>::iterator istart,                                           vector<CHit>::iterator ifinish,                                           size_t comp_penalty,                                           size_t min_coverage){    // separate strands for CompartmentFinder    vector<CHit>::iterator ib = istart, ie = ifinish, ii = ib, iplus_beg = ie;    sort(ib, ie, CHit::PPrecedeStrand);    size_t minus_subj_min = kMax_UInt, minus_subj_max = 0;    for(ii = ib; ii != ie; ++ii) {      if(ii->IsStraight()) {        iplus_beg = ii;        break;      }      else {        if(size_t(ii->m_ai[2]) < minus_subj_min) {          minus_subj_min = ii->m_ai[2];        }        if(size_t(ii->m_ai[3]) > minus_subj_max) {          minus_subj_max = ii->m_ai[3];        }      }    }    // minus    {{      // flip      for(ii = ib; ii != iplus_beg; ++ii) {        int s0 = minus_subj_min + minus_subj_max - ii->m_ai[3];        int s1 = minus_subj_min + minus_subj_max - ii->m_ai[2];        ii->m_an[2] = ii->m_ai[2] = s0;        ii->m_an[3] = ii->m_ai[3] = s1;      }      CCompartmentFinder finder (ib, iplus_beg);      finder.SetMinCoverage(min_coverage);      finder.SetPenalty(comp_penalty);      finder.Run();      // un-flip      for(ii = ib; ii != iplus_beg; ++ii) {        int s0 = minus_subj_min + minus_subj_max - ii->m_ai[3];        int s1 = minus_subj_min + minus_subj_max - ii->m_ai[2];        ii->m_an[3] = ii->m_ai[2] = s0;        ii->m_an[2] = ii->m_ai[3] = s1;      }      x_Copy2Pending(finder);    }}    // plus    {{      CCompartmentFinder finder (iplus_beg, ie);      finder.SetMinCoverage(min_coverage);      finder.SetPenalty(comp_penalty);      finder.Run();      x_Copy2Pending(finder);    }}}void CCompartmentAccessor::x_Copy2Pending(CCompartmentFinder& finder){  finder.OrderCompartments();  // copy to pending  for(CCompartmentFinder::CCompartment* compartment = finder.GetFirst(); compartment;      compartment = finder.GetNext()) {        m_pending.push_back(vector<CHit>(0));    vector<CHit>& vh = m_pending.back();        for(const CHit* ph = compartment->GetFirst(); ph; ph = compartment->GetNext()) {      vh.push_back(*ph);    }        const size_t* box = compartment->GetBox();    m_ranges.push_back(box[0]-1);    m_ranges.push_back(box[1]-1);    m_ranges.push_back(box[2]-1);    m_ranges.push_back(box[3]-1);    m_strands.push_back(compartment->GetStrand());  }}bool CCompartmentAccessor::GetFirst(vector<CHit>& compartment) {  m_iter = 0;  return GetNext(compartment);}bool CCompartmentAccessor::GetNext(vector<CHit>& compartment) {  compartment.clear();  if(m_iter < m_pending.size()) {    compartment = m_pending[m_iter++];    return true;  }  else {    return false;  }}END_NCBI_SCOPE/* * =========================================================================== * $Log: splign_compartment_finder.cpp,v $ * Revision 1000.0  2004/06/01 18:12:33  gouriano * PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.5 * * Revision 1.5  2004/05/24 16:13:57  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.4  2004/05/18 21:43:40  kapustin * Code cleanup * * Revision 1.3  2004/04/23 14:37:44  kapustin * *** empty log message *** * * ========================================================================== */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -