⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parse.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 3 页
字号:
            if (dynamic_cast<const SingleExon*>(pe)) {                genes.push_front(Gene(pe->Strand(),true,true));                startcodon = true;                stopcodon = true;            } else if (dynamic_cast<const LastExon*>(pe)) {                if (pe->isPlus()) {                    genes.push_front(Gene(pe->Strand(),false,true));                } else {                    genes.front().leftend = true;                }                stopcodon = true;            } else if (dynamic_cast<const FirstExon*>(pe)) {                if (pe->isMinus()) {                    genes.push_front(Gene(pe->Strand(),false,true));                } else {                    genes.front().leftend = true;                }                startcodon = true;            }            Gene& curgen = genes.front();            int estart = pe->Start();            int estop = pe->Stop();            if (pe->isPlus()) {                int ph = (pe->Phase() - estop + estart)%3;                if (ph < 0) {                    ph += 3;                }                curgen.cds_shift = ph;            } else if (curgen.empty()) {                curgen.cds_shift = pe->Phase();            }            if (pe->isMinus()  &&  startcodon) {                curgen.cds.push_back(nT);                curgen.cds.push_back(nA);                curgen.cds.push_back(nC);            }            for (int i = estop;  i >= estart;  --i) {                curgen.cds.push_back(*seqscr.SeqPtr(i,Plus));            }            if (pe->isPlus()  &&  startcodon) {                curgen.cds.push_back(nG);                curgen.cds.push_back(nT);                curgen.cds.push_back(nA);            }            int a = seqscr.SeqMap(estart,true);            bool a_insertion = (estart != seqscr.RevSeqMap(a,true));            int b = seqscr.SeqMap(estop,false);            bool b_insertion = (estop != seqscr.RevSeqMap(b,true));            pair<CClusterSet::ConstIt,CClusterSet::ConstIt> lim = cls.equal_range(CCluster(a,b));            CClusterSet::ConstIt first = lim.first;            CClusterSet::ConstIt last = lim.second;            if (lim.first != lim.second) {                --last;            }            int lastid = 0;            int margin = numeric_limits<int>::min();   // difference between end of exon and end of alignment exon            bool rightexon = false;            if (((startcodon  &&  pe->isMinus())  ||  (stopcodon  &&  pe->isPlus()))  &&  lim.first != lim.second)   // rightside UTR            {                for (CCluster::ConstIt it = last->begin();  it != last->end();  ++it) {                    const AlignVec& algn = *it;                    if (algn.Type() == AlignVec::Prot) {                        continue;                    }                    for (int i = algn.size() - 1;  i >= 0;  --i) {                        if (b < algn[i].first) {                            curgen.push_back(ExonData(algn[i].first,algn[i].second,ExonData::Utr));                            curgen.back().chain_id.insert(algn.ID());                             rightexon = true;                        } else if (algn[i].second >= a  &&  algn[i].second - b > margin) {                            margin = algn[i].second - b;                            lastid = algn.ID();                        }                    }                }            }            int remain = 0;            if ((pe->isMinus()  &&  startcodon)  ||  (pe->isPlus()  &&  stopcodon))   // start / stop position correction            {                if (margin >= 3) {                    b += 3;                    margin -= 3;                } else if (margin >= 0  &&  rightexon) {                    b += margin;                    remain = 3 - margin;                    margin = 0;                } else {                    b += 3;                    margin = 0;                }            }            if (margin > 0) {                curgen.push_back(ExonData(b + 1,b + margin,ExonData::Utr));                 curgen.back().chain_id.insert(lastid);             } else if (remain > 0) {                ExonData& e = curgen.back();                int aa = e.start;                e.start += remain;                int bb = aa + remain - 1;                ExonData er(aa,bb,ExonData::Cds);                er.chain_id = e.chain_id;                curgen.push_back(er);            }            curgen.push_back(ExonData(a,b,ExonData::Cds));            ExonData& exon = curgen.back();            margin = numeric_limits<int>::min();            bool leftexon = false;            lastid = 0;            for (CClusterSet::ConstIt cls_it = lim.first;  cls_it != lim.second;  ++cls_it) {                for (CCluster::ConstIt it = cls_it->begin();  it != cls_it->end();  ++it)                {                    const AlignVec& algn = *it;                    for (int i = 0;  i < algn.size();  ++i) {                        if (exon.stop < algn[i].first) {                            break;                        } else if (algn[i].second < exon.start) {                            leftexon = true;                            continue;                        } else if (algn.Type() == AlignVec::Prot) {                            exon.prot_id.insert(algn.ID());                         } else {                            exon.chain_id.insert(algn.ID());                            if (a - algn[i].first > margin) {                                margin = a - algn[i].first;                                lastid = algn.ID();                            }                        }                    }                }            }            const TFrameShifts& fs = seqscr.SeqFrameShifts();            for (int i = 0;  i < fs.size();  ++i) {                if ((fs[i].Loc() == a  &&  a_insertion)  ||  // exon starts from insertion                    (fs[i].Loc() == b + 1  &&  b_insertion)  ||  // exon ends by insertion                    (fs[i].Loc() <= b  &&  fs[i].Loc() > a)) {                    exon.fshifts.push_back(fs[i]);                }            }            remain = 0;            if ((pe->isPlus()  &&  startcodon)  ||  (pe->isMinus()  &&  stopcodon))   // start / stop position correction            {                if (margin >= 3) {                    exon.start -= 3;                    margin -= 3;                } else if (margin >= 0  &&  leftexon) {                    exon.start -= margin;                    remain = 3 - margin;                    margin = 0;                } else {                    exon.start -= 3;                    margin = 0;                }            }            if (((startcodon  &&  pe->isPlus())  ||  (stopcodon  &&  pe->isMinus()))  &&  lim.first != lim.second)   // lefttside UTR            {                int estart = exon.start;                if (margin > 0) {                    curgen.push_back(ExonData(estart - margin,estart - 1,ExonData::Utr));                     curgen.back().chain_id.insert(lastid);                 }                for (CCluster::ConstIt it = first->begin();  it != first->end();  ++it) {                    const AlignVec& algn = *it;                    if (algn.Type() == AlignVec::Prot) {                        continue;                    }                    for (int i = algn.size() - 1;  i >= 0;  --i) {                        if (estart > algn[i].second) {                            int aa = algn[i].first;                            int bb = algn[i].second;                            if (remain > 0) {                                curgen.push_back(ExonData(bb - remain + 1,bb,ExonData::Cds));                                curgen.back().chain_id.insert(algn.ID());                                 bb -= remain;                                remain = 0;                            }                            curgen.push_back(ExonData(aa,bb,ExonData::Utr));                             curgen.back().chain_id.insert(algn.ID());                         }                    }                }            }        }    }    for (list<Gene>::iterator it = genes.begin();  it != genes.end();  ++it) {        Gene& g = *it;        reverse(g.begin(),g.end());        if (g.Strand() == Plus) {            reverse(g.cds.begin(),g.cds.end());        } else {            for (int i = 0;  i < g.cds.size();  ++i) {                g.cds[i] = toMinus[g.cds[i]];            }        }    }    return genes;}void Parse::PrintInfo() const{    vector<const HMM_State*> states;    for (const HMM_State* p = Path();  p != 0;  p = p->LeftState()) states.push_back(p);    reverse(states.begin(),states.end());    Out(" ",15);    Out("Start",8);    Out("Stop",8);    Out("Score",8);    Out("BrScr",8);    Out("LnScr",8);    Out("RgnScr",8);    Out("TrmScr",8);    Out("AccScr",8);    cout << endl;    for (int i = 0;  i < states.size();  ++i) {        const HMM_State* p = states[i];        if (dynamic_cast<const Intergenic*>(p)) {            cout << endl;        }        Out(p->GetStateName(),13);        Out(p->isPlus() ? '+' : '-',2);        int a = seqscr.SeqMap(p->Start(),true);        int b = seqscr.SeqMap(p->Stop(),false);        Out(a,8);        Out(b,8);        StateScores sc = p->GetStateScores();        Out(sc.score,8);        Out(sc.branch,8);        Out(sc.length,8);        Out(sc.region,8);        Out(sc.term,8);        Out(p->Score(),8);        cout << endl;    }}void LogicalCheck(const HMM_State& st, const SeqScores& ss){    typedef const Intergenic* Ig;    typedef const Intron* I;    typedef const FirstExon* FE;    typedef const InternalExon* IE;    typedef const LastExon* LE;    typedef const SingleExon* SE;    typedef const Exon* E;    bool out;    int phase,strand;    vector< pair<int,int> > exons;    for (const HMM_State* state = &st;  state != 0;  state = state->LeftState()) {        const HMM_State* left = state->LeftState();        if (left == 0) {            if ( !dynamic_cast<Ig>(state)  &&  !dynamic_cast<I>(state) ) {                NCBI_THROW(CGnomonException, eGenericError,                           "GNOMON: Logical error 1");            }            out = true;        } else if (dynamic_cast<Ig>(state)) {            if ( !dynamic_cast<SE>(left)  &&                   !(dynamic_cast<LE>(left)  &&  left->isPlus())  &&                   !(dynamic_cast<FE>(left)  &&  left->isMinus()) ) {                NCBI_THROW(CGnomonException, eGenericError,                           "GNOMON: Logical error 2");            }            out = true;        } else if (I ps = dynamic_cast<I>(state)) {            E p;            int ph = -1;            if (((p = dynamic_cast<FE>(left))  &&  left->isPlus())  ||                  ((p = dynamic_cast<LE>(left))  &&  left->isMinus())  ||                  (p = dynamic_cast<IE>(left))) {                ph = p->Phase();            }            if ((ph < 0)  ||  (ps->Strand() != left->Strand())  ||                  (ps->isPlus()  &&  (ph + 1)%3 != ps->Phase())  ||                  (ps->isMinus()  &&  ph != ps->Phase())) {                NCBI_THROW(CGnomonException, eGenericError,                           "GNOMON: Logical error 3");            }            out = false;        } else if (SE ps = dynamic_cast<SE>(state)) {            if ( !(dynamic_cast<Ig>(left)  &&  ps->Strand() == left->Strand()) ) {                NCBI_THROW(CGnomonException, eGenericError,                           "GNOMON: Logical error 4");            }            int a = ps->Start(), b = ps->Stop();            if ((b - a+1)%3  ||                  (ps->isPlus()  &&  (!ss.isStart(a,Plus)  ||  !ss.isStop(b + 1,Plus)))  ||                  (ps->isMinus()  &&  (!ss.isStart(b,Minus)  ||  !ss.isStop(a - 1,Minus))))  {                NCBI_THROW(CGnomonException, eGenericError,                           "GNOMON: Logical error 5");            }        } else if (FE ps = dynamic_cast<FE>(state)) {            I p;            if ((ps->Strand() != left->Strand())  ||                  (!(dynamic_cast<Ig>(left)  &&  ps->isPlus())  &&                   !((p = dynamic_cast<I>(left))  &&  ps->isMinus()))) {                NCBI_THROW(CGnomonException, eGenericError,                           "GNOMON: Logical error 6");            }            int a = ps->Start(), b = ps->Stop();            if (ps->isPlus()) {                if (ps->Phase() != (b - a)%3) {                    NCBI_THROW(CGnomonException, eGenericError,                               "GNOMON: Logical error 7");                }            } else {                if (p->Phase() != (b + 1-a)%3) {                    NCBI_THROW(CGnomonException, eGenericError,                               "GNOMON: Logical error 8");                }            }            //          if ((ps->isPlus()  &&  (!ss.isStart(a,Plus)  ||  !ss.isGT(b + 1,Plus)))  ||              //             (ps->isMinus()  &&  (!ss.isStart(b,Minus)  ||  !ss.isGT(a - 1,Minus))))             //          { cout << "Logical error9\n"; exit(1); }        } else if (IE ps = dynamic_cast<IE>(state)) {            I p;            if ((ps->Strand() != left->Strand())  ||                  !(p = dynamic_cast<I>(left))) {                NCBI_THROW(CGnomonException, eGenericError,                           "GNOMON: Logical error 10");            }            int a = ps->Start(), b = ps->Stop();            if (ps->isPlus()) {                if (ps->Phase() != (p->Phase() + b-a)%3) {                    NCBI_THROW(CGnomonException, eGenericError,                               "GNOMON: Logical error 11");                }            } else {                if (p->Phase() != (ps->Phase() + b+1 - a)%3) {                    NCBI_THROW(CGnomonException, eGenericError,                               "GNOMON: Logical error 12");                }            }            //          if ((ps->isPlus()  &&  (!ss.isAG(a - 1,Plus)  ||  !ss.isGT(b + 1,Plus)))  ||              //             (ps->isMinus()  &&  (!ss.isAG(b + 1,Minus)  ||  !ss.isGT(a - 1,Minus))))             //          { cout << "Logical error13\n"; exit(1); }        } else if (LE ps = dynamic_cast<LE>(state)) {            I p;            if ((ps->Strand() != left->Strand())  ||                  (!(dynamic_cast<Ig>(left)  &&  ps->isMinus())  &&                   !((p = dynamic_cast<I>(left))  &&  ps->isPlus()))) {                NCBI_THROW(CGnomonException, eGenericError,                           "GNOMON: Logical error 14");            }            int a = ps->Start(), b = ps->Stop();            if (ps->isPlus()) {                if ((p->Phase() + b - a) % 3 != 2) {                    NCBI_THROW(CGnomonException, eGenericError,                               "GNOMON: Logical error 15");                }            } else {                if ((ps->Phase() + b-a)%3 != 2) {                    NCBI_THROW(CGnomonException, eGenericError,                               "GNOMON: Logical error 16");                }            }            //          if ((ps->isPlus()  &&  (!ss.isAG(a - 1,Plus)  ||  !ss.isStop(b + 1,Plus)))  ||              //             (ps->isMinus()  &&  (!ss.isStop(a - 1,Minus)  ||  !ss.isAG(b + 1,Minus))))             //          { cout << "Logical error17\n"; exit(1); }        }        if (E ps = dynamic_cast<E>(state)) {            phase = ps->Phase();            strand = ps->Strand();            exons.push_back(make_pair(ps->Start(),ps->Stop()));            out = false;        }        if (out  &&  !exons.empty()) {            IVec seq;            if (strand == Plus) {                for (int i = exons.size() - 1;  i >= 0;  --i) {                    int a = exons[i].first;                    int b = exons[i].second;                    seq.insert(seq.end(),ss.SeqPtr(a,Plus),ss.SeqPtr(b,Plus) + 1);                }                phase = (phase - exons.back().second + exons.back().first)%3;                if (phase < 0) {                    phase += 3;                }            } else {                for (int i = 0;  i < exons.size();  ++i) {                    int b = exons[i].first;                    int a = exons[i].second;                    seq.insert(seq.end(),ss.SeqPtr(a,Minus),ss.SeqPtr(b,Minus) + 1);                }                int l = seq.size() - 1;                phase = (phase + exons.back().second - exons.back().first - l)%3;                if (phase < 0) {                    phase += 3;                }            }            for (int i = (3 - phase) % 3;  i + 2 < seq.size();  i += 3) {                if ((seq[i] == nT  &&  seq[i + 1] == nA  &&  seq[i + 2] == nA)  ||                      (seq[i] == nT  &&  seq[i + 1] == nA  &&  seq[i + 2] == nG)  ||                      (seq[i] == nT  &&  seq[i + 1] == nG  &&  seq[i + 2] == nA))  {                    NCBI_THROW(CGnomonException, eGenericError,                               "GNOMON: Logical error 18");                }            }            exons.clear();            //          cout << "Gene is cleared\n";        }    }}END_NCBI_SCOPE/* * =========================================================================== * $Log: parse.cpp,v $ * Revision 1000.2  2004/06/01 18:08:37  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.3 * * Revision 1.3  2004/05/21 21:41:03  gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.2  2003/11/06 15:02:21  ucko * Use iostream interface from ncbistre.hpp for GCC 2.95 compatibility. * * Revision 1.1  2003/10/24 15:07:25  dicuccio * Initial revision * * =========================================================================== */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -