⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parse.cpp

📁 ncbi源码
💻 CPP
📖 第 1 页 / 共 3 页
字号:
        }        if (leminus[ph].back().Score() > p->Score()) {            p = &leminus[ph].back();        }        if (feplus[ph].back().Score() > p->Score()) {            p = &feplus[ph].back();        }    }    path = p;}template<class T> void Out(T t, int w, CNcbiOstream& to = cout){    to.width(w);    to.setf(IOS_BASE::right,IOS_BASE::adjustfield);    to << t;}void Out(double t, int w, CNcbiOstream& to = cout, int prec = 1){    to.width(w);    to.setf(IOS_BASE::right,IOS_BASE::adjustfield);    to.setf(IOS_BASE::fixed,IOS_BASE::floatfield);    to.precision(prec);    if (t > 1000000000) {        to << "+Inf";    } else if (t < -1000000000) {        to << "-Inf";    } else {        to << t;    }}inline char toACGT(int c){    switch (c) {    case nA:         return 'A';    case nC:         return 'C';    case nG:         return 'G';    case nT:         return 'T';    case nN:         return 'N';    }}int Parse::PrintGenes(CNcbiOstream& to, CNcbiOstream& toprot, bool complete) const{    enum {DNA_Align = 1, Prot_Align = 2 };    list<Gene> genes = GetGenes();    int right = seqscr.SeqMap(seqscr.SeqLen() - 1,true);    if (complete  &&  !genes.empty()  &&  !genes.back().RightComplete()) {        int partial_start = genes.back().front().Start();        genes.pop_back();        if ( !genes.empty() ) // end of the last complete gene        {            right = genes.back().back().Stop();        } else {            if (partial_start > seqscr.SeqMap(0,true) + 1000) {                right = partial_start - 100;            } else {                return -1;   // calling program MUST be aware of this!!!!!!!            }        }    }    to << "\n" << seqscr.Contig() << '_' << seqscr.SeqMap(0,true) << '_' << right << "\n\n";    to << genes.size() << " genes found\n";    set<int> chain_id, prot_id;    for (list<Gene>::iterator it = genes.begin();  it != genes.end();  ++it) {        const Gene& gene = *it;        for (int i = 0;  i < gene.size();  ++i) {            ITERATE(set<int>, iter, gene[i].ChainID()) {                chain_id.insert(*iter);            }            ITERATE(set<int>, iter, gene[i].ProtID()) {                prot_id.insert(*iter);            }            //chain_id.insert(gene[i].ChainID().begin(), gene[i].ChainID().end());            //prot_id.insert(gene[i].ProtID().begin(),  gene[i].ProtID().end());        }    }    to << (chain_id.size() + prot_id.size()) << " alignments used\n";    int num = 0;    for (list<Gene>::iterator it = genes.begin();  it != genes.end();  ++it) {        ++num;        to << "\n\n";        Out(" ",11,to);        Out("Start",10,to);        Out("Stop",10,to);        Out("Length",7,to);        Out("Align",10,to);        Out("Prot",12,to);        Out("FShift",7,to);        to << endl;        const Gene& gene = *it;        int gene_start = -1;        int gene_stop;        int align_type = 0;        for (int i = 0;  i < gene.size();  ++i) {            const ExonData& exon = gene[i];            const TFrameShifts& fshifts = exon.ExonFrameShifts();            int estart = exon.Start();            if (gene_start < 0  &&  exon.Type() == ExonData::Cds) {                gene_start = estart;            }            if ( !exon.ChainID().empty() ) {                align_type = align_type|DNA_Align;            }            if ( !exon.ProtID().empty() ) {                align_type = align_type|Prot_Align;            }            for (int k = 0;  k < fshifts.size();  ++k) {                int estop = fshifts[k].Loc() - 1;                Out(num,4,to);                Out((exon.Type() == ExonData::Cds) ? "CDS" : "UTR",5,to);                 Out((gene.Strand() == Plus) ? '+' : '-',2,to);                Out(estart,10,to);                Out(estop,10,to);                Out(estop - estart + 1,7,to);                if (exon.ChainID().empty()) {                    Out("-",10,to);                } else {                    Out(*exon.ChainID().begin(),10,to);                }                if (exon.ProtID().empty()) {                    Out("-",12,to);                } else {                    Out(*exon.ProtID().begin(),12,to);                }                if (fshifts[k].IsInsertion()) {                    Out("+",7,to);                    estart = estop;                } else {                    Out("-",7,to);                    estart = estop + 2;                }                to << endl;            }            int estop = exon.Stop();            Out(num,4,to);            Out((exon.Type() == ExonData::Cds) ? "CDS" : "UTR",5,to);             Out((gene.Strand() == Plus) ? '+' : '-',2,to);            Out(estart,10,to);            Out(estop,10,to);            Out(estop - estart + 1,7,to);            if (exon.ChainID().empty()) {                Out("-",10,to);            } else {                Out(*exon.ChainID().begin(),10,to);            }            if (exon.ProtID().empty()) {                Out("-",12,to);            } else {                Out(*exon.ProtID().begin(),12,to);            }            Out("*",7,to);            to << endl;            if (exon.Type() == ExonData::Cds) {                gene_stop = estop;            }        }           toprot << '>' << seqscr.Contig() << "_"             << gene_start << "_"             << gene_stop << "_"            << ((gene.Strand() == Plus) ? "plus" : "minus") << "_"            << align_type << endl;        const IVec& cds = gene.CDS();        int i;        for (i = 0;  i < cds.size() - 2;  i +=3) {            toprot << aa_table[cds[i]*25 + cds[i + 1]*5 + cds[i + 2]];            if ((i + 3)%150 == 0) {                toprot << endl;            }        }        if (i%150 != 0) {            toprot << endl;        }        //      for (i = 0;  i < cds.size();  ++i)        //      {        //          toprot << toACGT(cds[i]);        //          if ((i + 3)%50 == 0) toprot << endl;        //      }        //      if (i%50 != 0) toprot << endl;    }    return right;}/*int Parse::PrintGenes(CNcbiOstream& to, CNcbiOstream& toprot, bool complete) const{    const char* aa_table = "KNKNXTTTTTRSRSXIIMIXXXXXXQHQHXPPPPPRRRRRLLLLLXXXXXEDEDXAAAAAGGGGGVVVVVXXXXX * Y*YXSSSSS * CWCXLFLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";    const HMM_State* plast = Path();    if (complete  &&  dynamic_cast<const Intron*>(plast)) {        while (!dynamic_cast<const Intergenic*>(plast)) {            plast = plast->LeftState();        }        if ( !plast->LeftState() ) {            return plast->Start() + seqscr.SeqMap(0,true) - 1;        } else {            plast = plast->LeftState();        }    }    const CClusterSet& algns = seqscr.Alignments();    int right = seqscr.SeqMap(plast->Stop(),false);  // either end of chunk or and of last complete gene and its alignments    for (CClusterSet::ConstIt it = algns.begin();  it != algns.end();  ++it) {        if (it->Limits().Include(right)) {            right = it->Limits().second;        }    }    to << "\n" << seqscr.Contig() << '_' << seqscr.SeqMap(0,true) << '_' << right << "\n\n";    vector<const Exon*> states;    const Exon* pe;    int gennum = 0;    for (const HMM_State* p = plast;  p != 0;  p = p->LeftState()) {        if (pe = dynamic_cast<const Exon*>(p)) {            states.push_back(pe);        }        if ( !states.empty() ) {            if ( !p->LeftState()  ||  dynamic_cast<const Intergenic*>(p) ) {                ++gennum;            }        }    }    reverse(states.begin(),states.end());    int algnum = 0, algused = 0;    IPair chunk(seqscr.SeqMap(0,true),seqscr.SeqMap(seqscr.SeqLen() - 1,false));    for (CClusterSet::ConstIt it = algns.begin();  it != algns.end();  ++it) {        IPair lim = it->Limits();        if (lim.Intersect(chunk)) {            algnum += it->size();        }        for (int i = 0;  i < states.size();  ++i) {            int a = seqscr.SeqMap(states[i]->Start(),true);            int b = seqscr.SeqMap(states[i]->Stop(),false);            IPair exon(a,b);            if (exon.Intersect(lim)) {                algused += it->size();                break;            }        }    }    IVec ids(states.size(),0), prot_ids(states.size(),0);    for (int i = 0;  i < states.size();  ++i) {        int a = seqscr.SeqMap(states[i]->Start(),true);        int b = seqscr.SeqMap(states[i]->Stop(),false);        IPair exon(a,b);        for (CClusterSet::ConstIt cls_it = algns.begin();  cls_it != algns.end();  ++cls_it) {            for (CCluster::ConstIt it = cls_it->begin();  it != cls_it->end();  ++it)            {                const AlignVec& al = *it;                for (int j = 0;  j < al.size();  ++j) {                    if (exon.Intersect(al[j])) {                        if (al.Type() == AlignVec::Prot) {                            prot_ids[i] = al.ID();                        } else {                            ids[i] = al.ID();                            if (exon.first != al[j].first  ||  exon.second != al[j].second) {                                ids[i] = -ids[i];                            }                        }                    }                }            }        }    }    to << gennum << " genes found\n";    to << algused << " alignments used out of " << algnum << endl;    if (gennum == 0) {        return seqscr.SeqMap(plast->Stop(),false);    }    IVec cds;    int num = 0;    double gene_fscore, left_score;    int gene_start, gene_stop, align_type;    enum {DNA_Align = 1, Prot_Align = 2 };    for (int i = 0;  i < states.size();  ++i) {        pe = states[i];        int shift = 0;        if (i == 0  ||              dynamic_cast<const SingleExon*>(pe)  ||              (pe->isPlus()  &&  dynamic_cast<const FirstExon*>(pe))  ||              (pe->isMinus()  &&  dynamic_cast<const LastExon*>(pe)))        {            gene_start = seqscr.SeqMap(pe->Start(),true);            ++num;            to << "\n\n";            Out(" ",19,to);            Out("Start",10,to);            Out("Stop",10,to);            Out("Length",7,to);            Out("Align",10,to);            Out("Prot",12,to);            Out("FShift",7,to);            to << endl;            left_score = pe->LeftState()->Score();            cds.clear();            const Intron* pi = dynamic_cast<const Intron*>(pe->LeftState());            if (pi != 0) {                int ph = pi->Phase();                if (pe->isPlus()) {                    shift = (3 - ph)%3;                } else {                    shift = ph;                }            }            align_type = 0;        }        //If first / last base is insertion, this information is lost        vector<IPair> subexons;        int a = seqscr.SeqMap(pe->Start(),true);        int b = a;        for (int k = pe->Start() + 1;  k <= pe->Stop();  ++k) {            int c = seqscr.SeqMap(k,false);            if (c == b) {                subexons.push_back(IPair(a,b));                a = b;            }            if (c == b + 2) {                subexons.push_back(IPair(a,b));                a = c;            }            b = c;        }        subexons.push_back(IPair(a,b));        for (int k = 0;  k < subexons.size();  ++k) {            int a = subexons[k].first;            int b = subexons[k].second;            Out(num,4,to);            Out(pe->GetStateName(),13,to);            Out(pe->isPlus() ? '+' : '-',2,to);            Out(a,10,to);            Out(b,10,to);            Out(b - a+1,7,to);            if (ids[i]) {                align_type = align_type|DNA_Align;                Out(abs(ids[i]),9,to);                to << ((ids[i] > 0) ? ' ' : '*');            } else {                Out("-",9,to);                to << ' ';            }            if (prot_ids[i]) {                align_type = align_type|Prot_Align;                Out(prot_ids[i],12,to);            } else {                Out("-",12,to);            }            if (k != 0) {                if (a == subexons[k - 1].second) {                    Out("+",7,to);                } else if (a == subexons[k - 1].second + 2) {                    Out("-",7,to);                } else {                    Out("*",7,to);                }            } else {                Out("*",7,to);            }            to << endl;        }        bool last = false;        if (i == states.size() - 1  ||              dynamic_cast<const SingleExon*>(pe)  ||              (pe->isPlus()  &&  dynamic_cast<const LastExon*>(pe))  ||              (pe->isMinus()  &&  dynamic_cast<const FirstExon*>(pe)))        {            last = true;            gene_stop = seqscr.SeqMap(pe->Stop(),false);        }        copy(seqscr.SeqPtr(pe->Start() + shift,Plus),             seqscr.SeqPtr(pe->Stop() + 1,Plus),back_inserter(cds));        if (last) {            to << endl;            cds.resize(cds.size() - cds.size()%3);            if (pe->isMinus()) {                for (int i = 0;  i < cds.size();  ++i) cds[i] = toMinus[cds[i]];                reverse(cds.begin(),cds.end());            }            toprot << '>' << seqscr.Contig() << "_"                 << gene_start << "_"                 << gene_stop << "_"                << (pe->isPlus() ? "plus" : "minus") << "_"                << align_type << endl;            int i;            for (i = 0;  i < cds.size() - 2;  i +=3) {                toprot << aa_table[cds[i]*25 + cds[i + 1]*5 + cds[i + 2]];                if ((i + 3)%150 == 0) {                    toprot << endl;                }            }            if (i%150 != 0) {                toprot << endl;            }        }    }    return right;}*/list<Gene> Parse::GetGenes() const{    list<Gene> genes;    const CClusterSet& cls = seqscr.Alignments();    if (dynamic_cast<const Intron*>(Path())) {        genes.push_front(Gene(Path()->Strand(),false,false));    }    for (const HMM_State* p = Path();  p != 0;  p = p->LeftState()) {        if (const Exon* pe = dynamic_cast<const Exon*>(p)) {            bool stopcodon = false;            bool startcodon = false;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -