polya.hpp

来自「ncbi源码」· HPP 代码 · 共 303 行

HPP
303
字号
/* * =========================================================================== * PRODUCTION $Log: polya.hpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 18:04:31  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6 * PRODUCTION * =========================================================================== *//*  $Id: polya.hpp,v 1000.2 2004/06/01 18:04:31 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author: Philip Johnson** File Description: finds mRNA 3' modification (poly-A tails)** ---------------------------------------------------------------------------*/#ifndef ALGO_SEQUENCE___POLYA__HPP#define ALGO_SEQUENCE___POLYA__HPP#include <corelib/ncbistd.hpp>BEGIN_NCBI_SCOPEenum EPolyTail {    ePolyTail_None,    ePolyTail_A3, //> 3' poly-A tail    ePolyTail_T5  //> 5' poly-T head (submitted to db reversed?)};////////////////////////////////////////////////////////////////////////////////// PRE : two random access iterators pointing to sequence data [begin,/// end)/// POST: poly-A tail cleavage site, if any (-1 if not)template <typename Iterator>TSignedSeqPosFindPolyA(Iterator begin, Iterator end);////////////////////////////////////////////////////////////////////////////////// PRE : two random access iterators pointing to sequence data [begin,/// end)/// POST: cleavageSite (if any) and whether we found a poly-A tail, a poly-T/// head, or neithertemplate <typename Iterator>EPolyTailFindPolyTail(Iterator begin, Iterator end, TSignedSeqPos &cleavageSite);////////////////////////////////////////////////////////////////////////////////// Implementation [in header because of templates]template<typename Iterator>class CRevComp_It {public:    CRevComp_It(void) {}    CRevComp_It(const Iterator &it) {        m_Base = it;    }    char operator*(void) const {        Iterator tmp = m_Base;        switch (*--tmp) {        case 'A': return 'T';        case 'T': return 'A';        case 'C': return 'G';        case 'G': return 'C';        default: return *tmp;        }    }    CRevComp_It& operator++(void) {        --m_Base;        return *this;    }    CRevComp_It operator++(int) {        CRevComp_It it = m_Base;        --m_Base;        return it;    }    CRevComp_It& operator+=(int i) {        m_Base -= i;        return *this;    }    CRevComp_It& operator-=(int i) {        m_Base += i;        return *this;    }    CRevComp_It operator+ (int i) const {        CRevComp_It it(m_Base);        it += i;        return it;    }    CRevComp_It operator- (int i) const {        CRevComp_It it(m_Base);        it -= i;        return it;    }    int operator- (const CRevComp_It &it) const {        return it.m_Base - m_Base;    }        //booleans    bool operator>= (const CRevComp_It &it) const {        return m_Base <= it.m_Base;    }    bool operator>  (const CRevComp_It &it) const {        return m_Base < it.m_Base;    }    bool operator<= (const CRevComp_It &it) const {        return m_Base >= it.m_Base;    }    bool operator<  (const CRevComp_It &it) const {        return m_Base > it.m_Base;    }    bool operator== (const CRevComp_It &it) const {        return m_Base == it.m_Base;    }    bool operator!= (const CRevComp_It &it) const {        return m_Base != it.m_Base;    }private:    Iterator m_Base;};///////////////////////////////////////////////////////////////////////////////// PRE : same conditions as STL 'search', but iterators must have ptrdiff_t// difference type// POST: same as STL 'search'template <typename ForwardIterator1, typename ForwardIterator2>ForwardIterator1 ItrSearch(ForwardIterator1 first1, ForwardIterator1 last1,                           ForwardIterator2 first2, ForwardIterator2 last2){    ptrdiff_t d1 = last1 - first1;    ptrdiff_t d2 = last2 - first2;    if (d1 < d2) {        return last1;    }    ForwardIterator1 current1 = first1;    ForwardIterator2 current2 = first2;    while (current2 != last2) {        if (!(*current1 == *current2)) {            if (d1-- == d2) {                return last1;            } else {                current1 = ++first1;                current2 = first2;            }        } else {            ++current1;            ++current2;        }    }    return (current2 == last2) ? first1 : last1;}///////////////////////////////////////////////////////////////////////////////// PRE : two random access iterators pointing to sequence data [begin,// end)// POST: poly-A tail cleavage site, if any (-1 if not)template <typename Iterator>TSignedSeqPos FindPolyA(Iterator begin, Iterator end){    string motif1("AATAAA");    string motif2("ATTAAA");    Iterator pos = max(begin, end-250);    Iterator uStrmMotif = pos;    while (uStrmMotif != end) {        pos = uStrmMotif;        uStrmMotif = ItrSearch(pos, end, motif1.begin(), motif1.end());        if (uStrmMotif == end) {            uStrmMotif = ItrSearch(pos, end, motif2.begin(), motif2.end());        }        if (uStrmMotif != end) {            uStrmMotif += 6; // skip over upstream motif            pos = uStrmMotif;            if (end - pos < 10) { //min skip of 10                break;            }            Iterator maxCleavage = (end - pos < 30) ? end : pos + 30;            unsigned int aRun = 0;            for (pos += 10;  pos < maxCleavage  &&  aRun < 3;  ++pos) {                if (*pos == 'A') {                    ++aRun;                } else {                    aRun = 0;                }            }                        if (aRun) {                pos -= aRun;            }            TSignedSeqPos cleavageSite = pos - begin;            //now let's look for poly-adenylated tail..            unsigned int numA = 0, numOther = 0;            while (pos < end) {                if (*pos == 'A') {                    ++numA;                } else {                    ++numOther;                }                ++pos;            }            if (numOther + numA > 0  &&                ((double) numA / (numA+numOther)) > 0.95) {                return cleavageSite;            }        }    }    return -1;}///////////////////////////////////////////////////////////////////////////////// PRE : two random access iterators pointing to sequence data [begin,// end)// POST: cleavageSite (if any) and whether we found a poly-A tail, a poly-T// head, or neithertemplate<typename Iterator>EPolyTail FindPolyTail(Iterator begin, Iterator end,                       TSignedSeqPos &cleavageSite){    cleavageSite = FindPolyA(begin, end);    if (cleavageSite >= 0) {        return ePolyTail_A3;    } else {        int seqLen = end - begin;        cleavageSite = FindPolyA(CRevComp_It<Iterator>(end),                                 CRevComp_It<Iterator>(begin));        if (cleavageSite >= 0) {            cleavageSite = seqLen - cleavageSite - 1;            return ePolyTail_T5;        }    }    return ePolyTail_None;}END_NCBI_SCOPE/** ===========================================================================* $Log: polya.hpp,v $* Revision 1000.2  2004/06/01 18:04:31  gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6** Revision 1.6  2004/05/12 19:13:36  johnson* make sure no iterator ever goes past end** Revision 1.5  2004/05/11 18:36:56  johnson* made reverse iterator more standard (== base-1); avoid add a value to the* iterator that could potentially take it past the end** Revision 1.4  2004/04/28 15:19:46  johnson* Uses templated iterators; no longer accepts user suggestion for cleavage* site** Revision 1.3  2003/12/31 20:41:39  johnson* FindPolySite takes cleavage prompt for both 3' poly-A and 5' poly-T** Revision 1.2  2003/12/30 21:28:31  johnson* added msvc export specifiers** ===========================================================================*/#endif /*ALGO_SEQUENCE___POLYA__HPP*/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?