⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nw_spliced_aligner.cpp

📁 ncbi源码
💻 CPP
字号:
/* * =========================================================================== * PRODUCTION $Log: nw_spliced_aligner.cpp,v $ * PRODUCTION Revision 1000.1  2004/06/01 18:04:57  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.8 * PRODUCTION * =========================================================================== *//* $Id: nw_spliced_aligner.cpp,v 1000.1 2004/06/01 18:04:57 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE                          *               National Center for Biotechnology Information*                                                                          *  This software/database is a "United States Government Work" under the   *  terms of the United States Copyright Act.  It was written as part of    *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S.    *  Government have not placed any restriction on its use or reproduction.  *                                                                          *  Although all reasonable efforts have been taken to ensure the accuracy  *  and reliability of the software and data, the NLM and the U.S.          *  Government do not and cannot warrant the performance or results that    *  may be obtained by using this software or data. The NLM and the U.S.    *  Government disclaim all warranties, express or implied, including       *  warranties of performance, merchantability or fitness for any particular*  purpose.                                                                *                                                                          *  Please cite the author in any work or product based on this material.   ** ===========================================================================** Authors:  Yuri Kapustin** File Description:  Base class for spliced aligners.*                   * ===========================================================================**/#include <ncbi_pch.hpp>#include <algo/align/nw_spliced_aligner.hpp>#include <algo/align/align_exception.hpp>BEGIN_NCBI_SCOPECSplicedAligner::CSplicedAligner():    m_IntronMinSize(GetDefaultIntronMinSize()){    SetEndSpaceFree(true, true, false, false);}    CSplicedAligner::CSplicedAligner(const char* seq1, size_t len1,                                 const char* seq2, size_t len2)    : CNWAligner(seq1, len1, seq2, len2),      m_IntronMinSize(GetDefaultIntronMinSize()){    SetEndSpaceFree(true, true, false, false);}void CSplicedAligner::SetWi  (unsigned char splice_type, TScore value){    if(splice_type < GetSpliceTypeCount()) {        x_GetSpliceScores()[splice_type]  = value;    }    else     {        NCBI_THROW(CAlgoAlignException,                   eInvalidSpliceTypeIndex,                   "Invalid splice type index");    }}CSplicedAligner::TScore CSplicedAligner::GetWi  (unsigned char splice_type){    if(splice_type < GetSpliceTypeCount()) {        return x_GetSpliceScores()[splice_type];    }    else     {        NCBI_THROW(CAlgoAlignException,                   eInvalidSpliceTypeIndex,                   "Invalid splice type index");    }}unsigned char CSplicedAligner::x_CalcFingerPrint64(    const char* beg, const char* end, size_t& err_index){    if(beg >= end) {        return 0xFF;    }    unsigned char fp = 0, code;    for(const char* p = beg; p < end; ++p) {        switch(*p) {        case 'A': code = 0;    break;        case 'G': code = 0x01; break;        case 'T': code = 0x02; break;        case 'C': code = 0x03; break;        default:  err_index = p - beg; return 0x40; // incorrect char        }        fp = 0x3F & ((fp << 2) | code);    }    return fp;}const char* CSplicedAligner::x_FindFingerPrint64(    const char* beg, const char* end,    unsigned char fingerprint, size_t size,    size_t& err_index){    if(beg + size > end) {        err_index = 0;        return 0;    }    const char* p0 = beg;    size_t err_idx = 0; --p0;    unsigned char fp = 0x40;    while(fp == 0x40 && p0 < end) {        p0 += err_idx + 1;        fp = x_CalcFingerPrint64(p0, p0 + size, err_idx);    }    if(p0 >= end) {        return end;  // not found    }        unsigned char code;    while(fp != fingerprint && ++p0 < end) {        switch(*(p0 + size - 1)) {        case 'A': code = 0;    break;        case 'G': code = 0x01; break;        case 'T': code = 0x02; break;        case 'C': code = 0x03; break;        default:  err_index = p0 + size - 1 - beg;                  return 0;        }                fp = 0x3F & ((fp << 2) | code );    }    return p0;}struct nwaln_mrnaseg {    nwaln_mrnaseg(size_t i1, size_t i2, unsigned char fp0):        a(i1), b(i2), fp(fp0) {}    size_t a, b;    unsigned char fp;};struct nwaln_mrnaguide {    nwaln_mrnaguide(size_t i1, size_t i2, size_t i3, size_t i4):        q0(i1), q1(i2), s0(i3), s1(i4) {}    size_t q0, q1, s0, s1;};size_t CSplicedAligner::MakePattern(const size_t guide_size){    vector<nwaln_mrnaseg> segs;    size_t err_idx;    for(size_t i = 0; i + guide_size <= m_SeqLen1; ) {        const char* beg = m_Seq1 + i;        const char* end = m_Seq1 + i + guide_size;        unsigned char fp = x_CalcFingerPrint64(beg, end, err_idx);        if(fp != 0x40) {            segs.push_back(nwaln_mrnaseg(i, i + guide_size - 1, fp));            i += guide_size;        }        else {            i += err_idx + 1;        }    }    vector<nwaln_mrnaguide> guides;    size_t idx = 0;    const char* beg = m_Seq2 + idx;    const char* end = m_Seq2 + m_SeqLen2;    for(size_t i = 0, seg_count = segs.size();        beg + guide_size <= end && i < seg_count; ++i) {        const char* p = 0;        const char* beg0 = beg;        while( p == 0 && beg + guide_size <= end ) {            p = x_FindFingerPrint64( beg, end, segs[i].fp,                                     guide_size, err_idx );            if(p == 0) { // incorrect char                beg += err_idx + 1;             }            else if (p < end) {// fingerprints match but check actual sequences                const char* seq1 = m_Seq1 + segs[i].a;                const char* seq2 = p;                size_t k;                for(k = 0; k < guide_size; ++k) {                    if(seq1[k] != seq2[k]) break;                }                if(k == guide_size) { // real match                    size_t i1 = segs[i].a;                    size_t i2 = segs[i].b;                    size_t i3 = seq2 - m_Seq2;                    size_t i4 = i3 + guide_size - 1;                    size_t guides_dim = guides.size();                    if( guides_dim == 0 ||                        i1 - 1 > guides[guides_dim - 1].q1 ||                        i3 - 1 > guides[guides_dim - 1].s1    ) {                        guides.push_back(nwaln_mrnaguide(i1, i2, i3, i4));                    }                    else { // expand the last guide                        guides[guides_dim - 1].q1 = i2;                        guides[guides_dim - 1].s1 = i4;                    }                    beg0 = p + guide_size;                }                else {  // spurious match                    beg = p + 1;                    p = 0;                }            }        }        beg = beg0; // restore start pos in genomic sequence    }    // initialize m_guides    size_t guides_dim = guides.size();    m_guides.clear();    m_guides.resize(4*guides_dim);    const size_t offs = 10;    for(size_t k = 0; k < guides_dim; ++k) {        size_t q0 = (guides[k].q0 + guides[k].q1) / 2;        size_t s0 = (guides[k].s0 + guides[k].s1) / 2;        m_guides[4*k]         = q0 - offs;        m_guides[4*k + 1]     = q0 + offs;        m_guides[4*k + 2]     = s0 - offs;        m_guides[4*k + 3]     = s0 + offs;    }     return m_guides.size();   }END_NCBI_SCOPE/* * =========================================================================== * $Log: nw_spliced_aligner.cpp,v $ * Revision 1000.1  2004/06/01 18:04:57  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.8 * * Revision 1.8  2004/05/21 21:41:02  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.7  2004/05/17 14:50:56  kapustin * Add/remove/rearrange some includes and object declarations * * Revision 1.6  2004/04/23 14:39:47  kapustin * Add Splign library and other changes * * Revision 1.4  2003/10/27 21:00:17  kapustin * Set intron penalty defaults differently for 16- and 32-bit versions according to the expected quality of sequences those variants are supposed to be used with. * * Revision 1.3  2003/09/30 19:50:04  kapustin * Make use of standard score matrix interface * * Revision 1.2  2003/09/26 14:43:18  kapustin * Remove exception specifications * * Revision 1.1  2003/09/02 22:34:49  kapustin * Initial revision * * =========================================================================== */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -