📄 parse.cpp
字号:
if (dynamic_cast<const SingleExon*>(pe)) { genes.push_front(Gene(pe->Strand(),true,true)); startcodon = true; stopcodon = true; } else if (dynamic_cast<const LastExon*>(pe)) { if (pe->isPlus()) { genes.push_front(Gene(pe->Strand(),false,true)); } else { genes.front().leftend = true; } stopcodon = true; } else if (dynamic_cast<const FirstExon*>(pe)) { if (pe->isMinus()) { genes.push_front(Gene(pe->Strand(),false,true)); } else { genes.front().leftend = true; } startcodon = true; } Gene& curgen = genes.front(); int estart = pe->Start(); int estop = pe->Stop(); if (pe->isPlus()) { int ph = (pe->Phase() - estop + estart)%3; if (ph < 0) { ph += 3; } curgen.cds_shift = ph; } else if (curgen.empty()) { curgen.cds_shift = pe->Phase(); } if (pe->isMinus() && startcodon) { curgen.cds.push_back(nT); curgen.cds.push_back(nA); curgen.cds.push_back(nC); } for (int i = estop; i >= estart; --i) { curgen.cds.push_back(*seqscr.SeqPtr(i,Plus)); } if (pe->isPlus() && startcodon) { curgen.cds.push_back(nG); curgen.cds.push_back(nT); curgen.cds.push_back(nA); } int a = seqscr.SeqMap(estart,true); bool a_insertion = (estart != seqscr.RevSeqMap(a,true)); int b = seqscr.SeqMap(estop,false); bool b_insertion = (estop != seqscr.RevSeqMap(b,true)); pair<CClusterSet::ConstIt,CClusterSet::ConstIt> lim = cls.equal_range(CCluster(a,b)); CClusterSet::ConstIt first = lim.first; CClusterSet::ConstIt last = lim.second; if (lim.first != lim.second) { --last; } int lastid = 0; int margin = numeric_limits<int>::min(); // difference between end of exon and end of alignment exon bool rightexon = false; if (((startcodon && pe->isMinus()) || (stopcodon && pe->isPlus())) && lim.first != lim.second) // rightside UTR { for (CCluster::ConstIt it = last->begin(); it != last->end(); ++it) { const AlignVec& algn = *it; if (algn.Type() == AlignVec::Prot) { continue; } for (int i = algn.size() - 1; i >= 0; --i) { if (b < algn[i].first) { curgen.push_back(ExonData(algn[i].first,algn[i].second,ExonData::Utr)); curgen.back().chain_id.insert(algn.ID()); rightexon = true; } else if (algn[i].second >= a && algn[i].second - b > margin) { margin = algn[i].second - b; lastid = algn.ID(); } } } } int remain = 0; if ((pe->isMinus() && startcodon) || (pe->isPlus() && stopcodon)) // start / stop position correction { if (margin >= 3) { b += 3; margin -= 3; } else if (margin >= 0 && rightexon) { b += margin; remain = 3 - margin; margin = 0; } else { b += 3; margin = 0; } } if (margin > 0) { curgen.push_back(ExonData(b + 1,b + margin,ExonData::Utr)); curgen.back().chain_id.insert(lastid); } else if (remain > 0) { ExonData& e = curgen.back(); int aa = e.start; e.start += remain; int bb = aa + remain - 1; ExonData er(aa,bb,ExonData::Cds); er.chain_id = e.chain_id; curgen.push_back(er); } curgen.push_back(ExonData(a,b,ExonData::Cds)); ExonData& exon = curgen.back(); margin = numeric_limits<int>::min(); bool leftexon = false; lastid = 0; for (CClusterSet::ConstIt cls_it = lim.first; cls_it != lim.second; ++cls_it) { for (CCluster::ConstIt it = cls_it->begin(); it != cls_it->end(); ++it) { const AlignVec& algn = *it; for (int i = 0; i < algn.size(); ++i) { if (exon.stop < algn[i].first) { break; } else if (algn[i].second < exon.start) { leftexon = true; continue; } else if (algn.Type() == AlignVec::Prot) { exon.prot_id.insert(algn.ID()); } else { exon.chain_id.insert(algn.ID()); if (a - algn[i].first > margin) { margin = a - algn[i].first; lastid = algn.ID(); } } } } } const TFrameShifts& fs = seqscr.SeqFrameShifts(); for (int i = 0; i < fs.size(); ++i) { if ((fs[i].Loc() == a && a_insertion) || // exon starts from insertion (fs[i].Loc() == b + 1 && b_insertion) || // exon ends by insertion (fs[i].Loc() <= b && fs[i].Loc() > a)) { exon.fshifts.push_back(fs[i]); } } remain = 0; if ((pe->isPlus() && startcodon) || (pe->isMinus() && stopcodon)) // start / stop position correction { if (margin >= 3) { exon.start -= 3; margin -= 3; } else if (margin >= 0 && leftexon) { exon.start -= margin; remain = 3 - margin; margin = 0; } else { exon.start -= 3; margin = 0; } } if (((startcodon && pe->isPlus()) || (stopcodon && pe->isMinus())) && lim.first != lim.second) // lefttside UTR { int estart = exon.start; if (margin > 0) { curgen.push_back(ExonData(estart - margin,estart - 1,ExonData::Utr)); curgen.back().chain_id.insert(lastid); } for (CCluster::ConstIt it = first->begin(); it != first->end(); ++it) { const AlignVec& algn = *it; if (algn.Type() == AlignVec::Prot) { continue; } for (int i = algn.size() - 1; i >= 0; --i) { if (estart > algn[i].second) { int aa = algn[i].first; int bb = algn[i].second; if (remain > 0) { curgen.push_back(ExonData(bb - remain + 1,bb,ExonData::Cds)); curgen.back().chain_id.insert(algn.ID()); bb -= remain; remain = 0; } curgen.push_back(ExonData(aa,bb,ExonData::Utr)); curgen.back().chain_id.insert(algn.ID()); } } } } } } for (list<Gene>::iterator it = genes.begin(); it != genes.end(); ++it) { Gene& g = *it; reverse(g.begin(),g.end()); if (g.Strand() == Plus) { reverse(g.cds.begin(),g.cds.end()); } else { for (int i = 0; i < g.cds.size(); ++i) { g.cds[i] = toMinus[g.cds[i]]; } } } return genes;}void Parse::PrintInfo() const{ vector<const HMM_State*> states; for (const HMM_State* p = Path(); p != 0; p = p->LeftState()) states.push_back(p); reverse(states.begin(),states.end()); Out(" ",15); Out("Start",8); Out("Stop",8); Out("Score",8); Out("BrScr",8); Out("LnScr",8); Out("RgnScr",8); Out("TrmScr",8); Out("AccScr",8); cout << endl; for (int i = 0; i < states.size(); ++i) { const HMM_State* p = states[i]; if (dynamic_cast<const Intergenic*>(p)) { cout << endl; } Out(p->GetStateName(),13); Out(p->isPlus() ? '+' : '-',2); int a = seqscr.SeqMap(p->Start(),true); int b = seqscr.SeqMap(p->Stop(),false); Out(a,8); Out(b,8); StateScores sc = p->GetStateScores(); Out(sc.score,8); Out(sc.branch,8); Out(sc.length,8); Out(sc.region,8); Out(sc.term,8); Out(p->Score(),8); cout << endl; }}void LogicalCheck(const HMM_State& st, const SeqScores& ss){ typedef const Intergenic* Ig; typedef const Intron* I; typedef const FirstExon* FE; typedef const InternalExon* IE; typedef const LastExon* LE; typedef const SingleExon* SE; typedef const Exon* E; bool out; int phase,strand; vector< pair<int,int> > exons; for (const HMM_State* state = &st; state != 0; state = state->LeftState()) { const HMM_State* left = state->LeftState(); if (left == 0) { if ( !dynamic_cast<Ig>(state) && !dynamic_cast<I>(state) ) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 1"); } out = true; } else if (dynamic_cast<Ig>(state)) { if ( !dynamic_cast<SE>(left) && !(dynamic_cast<LE>(left) && left->isPlus()) && !(dynamic_cast<FE>(left) && left->isMinus()) ) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 2"); } out = true; } else if (I ps = dynamic_cast<I>(state)) { E p; int ph = -1; if (((p = dynamic_cast<FE>(left)) && left->isPlus()) || ((p = dynamic_cast<LE>(left)) && left->isMinus()) || (p = dynamic_cast<IE>(left))) { ph = p->Phase(); } if ((ph < 0) || (ps->Strand() != left->Strand()) || (ps->isPlus() && (ph + 1)%3 != ps->Phase()) || (ps->isMinus() && ph != ps->Phase())) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 3"); } out = false; } else if (SE ps = dynamic_cast<SE>(state)) { if ( !(dynamic_cast<Ig>(left) && ps->Strand() == left->Strand()) ) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 4"); } int a = ps->Start(), b = ps->Stop(); if ((b - a+1)%3 || (ps->isPlus() && (!ss.isStart(a,Plus) || !ss.isStop(b + 1,Plus))) || (ps->isMinus() && (!ss.isStart(b,Minus) || !ss.isStop(a - 1,Minus)))) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 5"); } } else if (FE ps = dynamic_cast<FE>(state)) { I p; if ((ps->Strand() != left->Strand()) || (!(dynamic_cast<Ig>(left) && ps->isPlus()) && !((p = dynamic_cast<I>(left)) && ps->isMinus()))) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 6"); } int a = ps->Start(), b = ps->Stop(); if (ps->isPlus()) { if (ps->Phase() != (b - a)%3) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 7"); } } else { if (p->Phase() != (b + 1-a)%3) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 8"); } } // if ((ps->isPlus() && (!ss.isStart(a,Plus) || !ss.isGT(b + 1,Plus))) || // (ps->isMinus() && (!ss.isStart(b,Minus) || !ss.isGT(a - 1,Minus)))) // { cout << "Logical error9\n"; exit(1); } } else if (IE ps = dynamic_cast<IE>(state)) { I p; if ((ps->Strand() != left->Strand()) || !(p = dynamic_cast<I>(left))) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 10"); } int a = ps->Start(), b = ps->Stop(); if (ps->isPlus()) { if (ps->Phase() != (p->Phase() + b-a)%3) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 11"); } } else { if (p->Phase() != (ps->Phase() + b+1 - a)%3) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 12"); } } // if ((ps->isPlus() && (!ss.isAG(a - 1,Plus) || !ss.isGT(b + 1,Plus))) || // (ps->isMinus() && (!ss.isAG(b + 1,Minus) || !ss.isGT(a - 1,Minus)))) // { cout << "Logical error13\n"; exit(1); } } else if (LE ps = dynamic_cast<LE>(state)) { I p; if ((ps->Strand() != left->Strand()) || (!(dynamic_cast<Ig>(left) && ps->isMinus()) && !((p = dynamic_cast<I>(left)) && ps->isPlus()))) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 14"); } int a = ps->Start(), b = ps->Stop(); if (ps->isPlus()) { if ((p->Phase() + b - a) % 3 != 2) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 15"); } } else { if ((ps->Phase() + b-a)%3 != 2) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 16"); } } // if ((ps->isPlus() && (!ss.isAG(a - 1,Plus) || !ss.isStop(b + 1,Plus))) || // (ps->isMinus() && (!ss.isStop(a - 1,Minus) || !ss.isAG(b + 1,Minus)))) // { cout << "Logical error17\n"; exit(1); } } if (E ps = dynamic_cast<E>(state)) { phase = ps->Phase(); strand = ps->Strand(); exons.push_back(make_pair(ps->Start(),ps->Stop())); out = false; } if (out && !exons.empty()) { IVec seq; if (strand == Plus) { for (int i = exons.size() - 1; i >= 0; --i) { int a = exons[i].first; int b = exons[i].second; seq.insert(seq.end(),ss.SeqPtr(a,Plus),ss.SeqPtr(b,Plus) + 1); } phase = (phase - exons.back().second + exons.back().first)%3; if (phase < 0) { phase += 3; } } else { for (int i = 0; i < exons.size(); ++i) { int b = exons[i].first; int a = exons[i].second; seq.insert(seq.end(),ss.SeqPtr(a,Minus),ss.SeqPtr(b,Minus) + 1); } int l = seq.size() - 1; phase = (phase + exons.back().second - exons.back().first - l)%3; if (phase < 0) { phase += 3; } } for (int i = (3 - phase) % 3; i + 2 < seq.size(); i += 3) { if ((seq[i] == nT && seq[i + 1] == nA && seq[i + 2] == nA) || (seq[i] == nT && seq[i + 1] == nA && seq[i + 2] == nG) || (seq[i] == nT && seq[i + 1] == nG && seq[i + 2] == nA)) { NCBI_THROW(CGnomonException, eGenericError, "GNOMON: Logical error 18"); } } exons.clear(); // cout << "Gene is cleared\n"; } }}END_NCBI_SCOPE/* * =========================================================================== * $Log: parse.cpp,v $ * Revision 1000.2 2004/06/01 18:08:37 gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.3 * * Revision 1.3 2004/05/21 21:41:03 gorelenk * Added PCH ncbi_pch.hpp * * Revision 1.2 2003/11/06 15:02:21 ucko * Use iostream interface from ncbistre.hpp for GCC 2.95 compatibility. * * Revision 1.1 2003/10/24 15:07:25 dicuccio * Initial revision * * =========================================================================== */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -