首页 › 资源下载 › 生物技术 › ncbi源码 › 源码查看
splign_hitparser.cpp

来自「ncbi源码」· C++ 代码 · 共 1,371 行 · 第 1/3 页
CPP
1,371 行
            size_t k1 = k + del_count;            if(k1 < dim) {                m_Out[k] = m_Out[k1];            }            else  break;            if(b) --k;        }        m_Out.resize(dim - del_count);    }    return del_count;}int CHitParser::Run(EMode erm){    int nCode = x_CheckParameters();    if(nCode) return nCode;    vector<CHit> vhInvStrand;      // used only if strand mode = auto    int& nGroupId = *m_GroupID;  // auxiliary variable    size_t dim = m_Out.size();    if(dim)    {        x_TransformCoordinates(true);        x_Combine(m_CombinationProximity_pre);        {{        // filter by strand or separate strands, if applicable        if(m_Strand == eStrand_Plus || m_Strand == eStrand_Minus ||           m_Strand == eStrand_Auto)        {            vector<CHit>::iterator ii;            for(ii = m_Out.end()-1; ii >= m_Out.begin(); ii--)            {                bool bStraight = ii->IsStraight();                switch(m_Strand)                {                    case eStrand_Plus:                        if (bStraight) continue; else m_Out.erase(ii);                        break;                    case eStrand_Minus:                        if (!bStraight) continue; else m_Out.erase(ii);                        break;                    case eStrand_Auto: //  separate strands before processing                        if (bStraight) continue;                        else                        {                            vhInvStrand.push_back(*ii);                            m_Out.erase(ii);                        }                        break;                }            }        }        x_CalcGlobalEnvelop();        switch(erm)        {            case eMode_Combine: // already combined                break;            case eMode_GroupSelect:                x_RunMSGS(true, nGroupId);                break;            case eMode_Normal:                x_IdentifyMaxDistGroups();                switch(m_Method)                {                    case eMethod_MaxScore:                        x_RunMaxScore(); break;                    case eMethod_MaxScore_GroupSelect:                        x_RunMSGS(false, nGroupId); break;                    default: return 1; //  not supported                }                if(!m_OutputAllGroups) {                    // we need to identify again because "connecting"                    // hits might might have perished                    x_IdentifyMaxDistGroups();                      x_FilterByMaxDist();                }                break;        }        // if strand mode is auto then we run it again for inverse strand        // and then compare the strands        if(m_Strand == 3)        {            vector<CHit> vh0 = m_Out;            m_Out = vhInvStrand;            x_CalcGlobalEnvelop();            switch(erm)            {                case eMode_Combine:                    break;                case eMode_GroupSelect:                    x_RunMSGS(true, nGroupId);                    break;                case eMode_Normal:                    x_IdentifyMaxDistGroups();                    switch(m_Method)                    {                        case eMethod_MaxScore:                            x_RunMaxScore(); break;                        case eMethod_MaxScore_GroupSelect:                            x_RunMSGS(false, nGroupId); break;                        default: return 1; //  not supported                    }                    x_IdentifyMaxDistGroups();                    x_FilterByMaxDist();                    break;            }            if(erm == eMode_Normal && !m_OutputAllGroups)            { // leave only the best strand                double dScoreStraight = 0., dScoreInverse = 0.;                vector<CHit>::iterator ii = vh0.begin(), iend = vh0.end();                for(; ii != iend; ii++) dScoreStraight += ii->m_Score;                ii = m_Out.begin(); iend = m_Out.end();                for(; ii != iend; ii++) dScoreInverse += ii->m_Score;                if(dScoreStraight > dScoreInverse) m_Out = vh0;            }            else            { // merge the strands since we don't actually want to              // eliminate ambiguities here              copy(vh0.begin(), vh0.end(), back_inserter(m_Out));            }        }        x_Combine(m_CombinationProximity_post);        }}        x_TransformCoordinates(false);    }    return nCode;}int CHitParser::x_CheckParameters() const{    switch (m_Method)    {        case eMethod_MaxScore:        case eMethod_MaxScore_GroupSelect:        break;        default: return 1; // unsupported    };    switch (m_Strand)    {        case 0: case 1: case 3: break;            // same order and different strands are incompatible        case 2: if(m_SameOrder) return 3;                        else break;        default: return 2; // unsupported    };    return 0;}// counts the number of collisions// pvh0== 0 => count at m_Outint CHitParser::GetCollisionCount (int& nq, int& ns, const vector<CHit>& vh){    nq = ns = 0;    vector<SNode> vi_q, vi_s;    vector<CHit>::const_iterator phi0 = vh.begin(),      phi1 = vh.end(), phi = phi0;    for(phi = phi0; phi != phi1; ++phi)    {        SNode Nodes [] = {            SNode(phi->m_ai[0],true,phi), SNode(phi->m_ai[1],false,phi),            SNode(phi->m_ai[2],true,phi), SNode(phi->m_ai[3],false,phi)        };        vi_q.push_back(Nodes[0]); vi_q.push_back(Nodes[1]);        vi_s.push_back(Nodes[2]); vi_s.push_back(Nodes[3]);    }    sort(vi_q.begin(),vi_q.end());    sort(vi_s.begin(),vi_s.end());    int nLevel = 0;    vector<SNode>::iterator ii;    for(ii = vi_q.begin(); ii != vi_q.end(); ii++)        if(ii->type) { if(++nLevel == 2) nq++; } else nLevel--;    nLevel = 0;    vector<SNode>::iterator jj;    for(jj = vi_s.begin(); jj != vi_s.end(); jj++)        if(jj->type) { if(++nLevel == 2) ns++; } else nLevel--;    return nq + ns;}void CHitParser::x_TransformCoordinates(bool bDir){    if(bDir)    {        // transform hits        m_Origin[0] = m_ange[0];        m_Origin[1] = m_ange[2];        vector<CHit>::iterator ii, iend;        for(ii = m_Out.begin(), iend = m_Out.end(); ii != iend; ii++)        {            for(int k=0; k<4; k++)            {                ii->m_an[k] -= m_Origin[k/2];                ii->m_ai[k] -= m_Origin[k/2];            }        }    }    else    {        vector<CHit>::iterator ii, iend;        vector<vector<CHit>::iterator> vhi_del;        for(ii = m_Out.begin(), iend = m_Out.end(); ii != iend; ii++)        {            for(int k=0; k<4; k++)            {                ii->m_an[k] += m_Origin[k/2];                ii->m_ai[k] += m_Origin[k/2];            }            if(ii->m_ai[1] == ii->m_ai[0] || ii->m_ai[3] == ii->m_ai[2])                vhi_del.push_back(ii);        }        // delete the slack        vector<vector<CHit>::iterator>::reverse_iterator i1, i1e;        for(i1 = vhi_del.rbegin(), i1e = vhi_del.rend(); i1 != i1e; i1++)            m_Out.erase(*i1);    }    x_CalcGlobalEnvelop();}void CHitParser::x_FilterByMaxDist(){    //  sum and compare the scores    map<int,double> mgr2sc;    int nSize = m_Out.size();    for(int j = 0; j < nSize; j++)    {        const int nClustID = CMaxDistClIdGet()(m_Out[j]);        mgr2sc[nClustID] += m_Out[j].m_Score;    }    double dmax = -1e38;    map<int,double>::iterator im, im1;    for(im1 = im = mgr2sc.begin(); im != mgr2sc.end(); im++)    {        if(im->second > dmax)        {            dmax = im->second;            im1 = im;        }    }    // leave only the top-score max dist group    vector<CHit>::iterator ie = remove_if(        m_Out.begin(), m_Out.end(), bind2nd(hit_out_of_group(),im1->first));    m_Out.erase(ie,m_Out.end());}void CHitParser::x_IdentifyMaxDistGroups(){    DoSingleLinkageClustering ( m_Out,        CMaxDistClustPred(m_MaxHitDistQ, m_MaxHitDistS),        CMaxDistClIdSet(),        CMaxDistClIdGet() );}void CHitParser::x_SyncGroupsByMaxDist(int& nGroupID){    if(m_Out.size() == 0) return;    // order by GroupID    stable_sort(m_Out.begin(), m_Out.end(), hit_groupid_less());    // renumber the groups to make sure that    // we have same nClustID throughout the group    vector<CHit>::iterator ibegin = m_Out.begin(), iend = m_Out.end(),        ii = ibegin;    int ngid = ii->m_GroupID, nmdcid = ii->m_MaxDistClustID;    ii->m_GroupID = ++nGroupID;    for(++ii; ii != iend; ++ii)    {        if(ngid == ii->m_GroupID && nmdcid != ii->m_MaxDistClustID)        {            ++nGroupID;            nmdcid = ii->m_MaxDistClustID;        }        if(ngid != ii->m_GroupID)        {            ++nGroupID;            ngid = ii->m_GroupID;        }        ii->m_GroupID = nGroupID;    }}size_t CHitParser::GetSeqLength(const string& accession){    return 0;    /*  using namespace objects;  map<string, size_t>::const_iterator i1 = m_seqsizes.find(accession),    i2 = m_seqsizes.end();  if(i1 != i2) {    size_t s = i1->second;    return s;  }    CObjectManager om;  om.RegisterDataLoader ( *new CGBDataLoader("ID", new CId1Reader, 2),			  CObjectManager::eDefault );  CScope scope(om);  scope.AddDefaults();      CSeq_id seq_id (accession);    CBioseq_Handle bioseq_handle = scope.GetBioseqHandle(seq_id);    if ( bioseq_handle ) {    const CBioseq& bioseq = bioseq_handle.GetBioseq();    const CSeq_inst& inst = bioseq.GetInst();    if(inst.IsSetLength()) {      size_t s = inst.GetLength();      m_seqsizes[accession] = s;      return s;    }  }  return 0;    */}bool CHitGroup::CheckSameOrder(){    if(m_vHits.size() == 0) return true;    vector<int>::const_iterator ivh0 = m_vHits.begin(),        ivh1 = m_vHits.end(), ii, jj;    hit_not_same_order hnso;    for(ii = ivh0; ii < ivh1; ++ii) {        for(jj = ivh0; jj < ii; ++jj) {            if(hnso( (*m_pHits)[*ii], (*m_pHits)[*jj] ))                return false;        }    }    return true;}void CHitParser::x_Combine(double dProximity){    if(dProximity < 0 || m_Out.size() == 0) {        return;    }    x_CalcGlobalEnvelop();    size_t total_hits = m_Out.size();    vector<CHit*> vh (total_hits);    CHit* phit = &(m_Out[0]);    for(size_t i = 0; i < total_hits; ++i) {        vh[i] = phit++;    }    typedef vector<CHit*>::iterator TVHitPtrI;    // separate strands    sort(vh.begin(), vh.end(),  CHit::PPrecedeStrand_ptr);    TVHitPtrI strand_minus_start =         find_if(vh.begin(), vh.end(), bind2nd(hit_ptr_strand_is(), true));    typedef pair<TVHitPtrI, TVHitPtrI> THitRange;    THitRange strands [2];    strands[0].first = vh.begin();    strands[0].second = strands[1].first = strand_minus_start;    strands[1].second = strands[0].first + total_hits;     for(size_t strand = 0; strand < 2; ++strand) {        TVHitPtrI start  = strands[strand].first;        TVHitPtrI finish = strands[strand].second;                size_t dim = finish - start;        if(dim < 2) continue;        size_t count;        do {            count = 0;            stable_sort(start, finish, CHit::PGreaterScore_ptr);            for(TVHitPtrI i1 = start; i1 < finish - 1; ++i1) {                if(*i1 == 0) continue;                for(TVHitPtrI j1 = i1 + 1; j1 < finish; ++j1) {                    if(*j1 == 0) continue;                    double d = x_CalculateProximity(**i1, **j1);                    if(d <= dProximity) {                        CHit hc (**i1, **j1);                        **i1 = hc;                        *j1 = 0;                        ++count;                    }                }            }        }        while(count);    }    // flush deletions, compress pointers vector, fill original hit vector    TVHitPtrI iv = remove(vh.begin(), vh.end(), (CHit*) 0);    vh.erase(iv, vh.end());    TVHitPtrI ib = vh.begin(), ie = vh.end();    sort(ib, ie, CHit::PGreaterScore_ptr);    vector<CHit> vout (vh.size());    for(iv = ib; iv != ie; ++iv) {        vout[iv - ib] = **iv;    }    m_Out = vout;    return;}END_NCBI_SCOPE/** ===========================================================================** $Log: splign_hitparser.cpp,v $* Revision 1000.0  2004/06/01 18:12:53  gouriano* PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.7** Revision 1.7  2004/05/24 16:13:57  gorelenk* Added PCH ncbi_pch.hpp** Revision 1.6  2004/05/18 21:43:40  kapustin* Code cleanup** Revision 1.5  2004/05/03 21:53:57  kapustin* Eliminate OM-dependant code** Revision 1.4  2004/04/26 16:52:44  ucko* Add an explicit "typename" annotation required by GCC 3.4, and adjust* the code to take advantage of the ITERATE macro.** Revision 1.3  2004/04/23 14:37:44  kapustin* *** empty log message ***** * ===========================================================================*/
splign_hitparser.cpp - 源码说明

本页面展示了「ncbi源码」中的 splign_hitparser.cpp 源码文件，采用 C++ 编程语言编写，共 1,371 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ncbi相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?