📄 models.cpp
字号:
SeqScores::SeqScores(Terminal& a, Terminal& d, Terminal& stt, Terminal& stp, CodingRegion& cr, NonCodingRegion& ncr, NonCodingRegion& ing, CVec& original_sequence, int from, int to, const CClusterSet& cls, const TFrameShifts& initial_fshifts, bool repeats, bool leftwall, bool rightwall, string cntg) : acceptor(a), donor(d), start(stt), stop(stp), cdr(cr), ncdr(ncr), intrg(ing), cluster_set(cls), fshifts(initial_fshifts), shift(from), contig(cntg){ for (CClusterSet::ConstIt it_cls = cls.begin(); it_cls != cls.end(); ++it_cls) { if (it_cls->Limits().first <= from || it_cls->Limits().second >= to) { continue; } for (CCluster::ConstIt it = it_cls->begin(); it != it_cls->end(); ++it) { const AlignVec& algn = *it; if (algn.Type() == AlignVec::Prot) { for (int i = 0; i < algn.size() - 1; ++i) { int a = algn[i].second + 1; int b = algn[i + 1].first - 1; int len = b - a + 1; int res = len % 3; if (res < 0) { res += 3; } int loc = (a + b) / 2; if (len < Intron::MinIntron() && res != 0) { if (res == 1) { fshifts.push_back(CFrameShiftInfo(loc,false)); } // local copy else { fshifts.push_back(CFrameShiftInfo(loc,true,'N')); } } } } } } sort(fshifts.begin(),fshifts.end()); TFrameShifts::iterator fsi = fshifts.begin(); while (fsi != fshifts.end() && fsi->Loc() < from) { ++fsi; } CVec sequence; rev_seq_map.resize(to - from + 1); for (int i = from; i <= to; ++i) { if (fsi == fshifts.end() || i != fsi->Loc()) { sequence.push_back(original_sequence[i]); rev_seq_map[i - shift] = seq_map.size(); seq_map.push_back(i); } else if (fsi->IsInsertion()) // Insertion { sequence.push_back(fsi->InsertValue()); seq_map.push_back(-1); sequence.push_back(original_sequence[i]); rev_seq_map[i - shift] = -1; seq_map.push_back(i); ++fsi; } else // Deletion { rev_seq_map[i - shift] = -1; ++fsi; } } int len = seq_map.size(); try { notining.resize(len,-1); inalign.resize(len,-1); for (int strand = 0; strand < 2; ++strand) { seq[strand].resize(len); ascr[strand].resize(len,BadScore); dscr[strand].resize(len,BadScore); sttscr[strand].resize(len,BadScore); stpscr[strand].resize(len,BadScore); ncdrscr[strand].resize(len,BadScore); ingscr[strand].resize(len,BadScore); notinintron[strand].resize(len,-1); for (int frame = 0; frame < 3; ++frame) { cdrscr[strand][frame].resize(len,BadScore); laststop[strand][frame].resize(len,-1); notinexon[strand][frame].resize(len,-1); } for (int ph = 0; ph < 2; ++ph) { asplit[strand][ph].resize(len,0); dsplit[strand][ph].resize(len,0); } } } catch(bad_alloc&) { string msg("Not enough memory in SeqScores: "); msg += NStr::IntToString(from) + " "; msg += NStr::IntToString(to); NCBI_THROW(CGnomonException, eGenericError, msg); } for (int i = 0; i < len; ++i) { char c = sequence[i]; if (repeats && c != toupper(c)) { laststop[Plus ][0][i] = i; laststop[Plus ][1][i] = i; laststop[Plus ][2][i] = i; laststop[Minus][0][i] = i; laststop[Minus][1][i] = i; laststop[Minus][2][i] = i; } int l = ACGT(c); seq[Plus][i] = l; seq[Minus][len - 1-i] = toMinus[l]; } CClusterSet cls_local(cntg); for (CClusterSet::ConstIt it_cls = cls.begin(); it_cls != cls.end(); ++it_cls) { if (it_cls->Limits().first < from || it_cls->Limits().second > to) { continue; } for (CCluster::ConstIt it = it_cls->begin(); it != it_cls->end(); ++it) { AlignVec algn(it->Strand(),it->ID(),it->Type()); if (it->CdsLimits().first >= 0) { algn.SetCdsLimits(IPair(RevSeqMap(it->CdsLimits().first,true),RevSeqMap(it->CdsLimits().second,false))); } for (int k = 0; k < it->size(); ++k) { int a = (*it)[k].first; int b = (*it)[k].second; a = RevSeqMap(a,true); b = RevSeqMap(b,false); algn.Insert(IPair(a,b)); for (int i = a; i <= b; ++i) { // ignoring repeats in alignmnets laststop[Plus ][0][i] = -1; laststop[Plus ][1][i] = -1; laststop[Plus ][2][i] = -1; laststop[Minus][0][i] = -1; laststop[Minus][1][i] = -1; laststop[Minus][2][i] = -1; } } cls_local.InsertAlignment(algn); } } for (CClusterSet::ConstIt it_cls = cls_local.begin(); it_cls != cls_local.end(); ++it_cls) { for (CCluster::ConstIt it = it_cls->begin(); it != it_cls->end(); ++it) { const AlignVec& algn = *it; int strand = algn.Strand(); int otherstrand = (strand == Plus) ? Minus : Plus; int type = algn.Type(); if (type != AlignVec::Prot) { if (strand == Plus) { // accept NONCONSENSUS alignment splices for (int k = 0; k < algn.size(); ++k) { int i = algn[k].first; if (k != 0) { ascr[strand][i - 1] = 0; } i = algn[k].second; if (k != algn.size() - 1) { dscr[strand][i] = 0; } } } else { for (int k = 0; k < algn.size(); ++k) { int i = algn[k].first; if (k != 0) { dscr[strand][i - 1] = 0; } i = algn[k].second; if (k != algn.size() - 1) { ascr[strand][i] = 0; } } } for (int k = 0; k < algn.size() - 1; ++k) { int introna = algn[k].second + 1; int intronb = algn[k + 1].first - 1; for (int pnt = introna; pnt <= intronb; ++pnt) { for (int frame = 0; frame < 3; ++frame) { notinexon[strand][frame][pnt] = pnt; } } } for (int k = 0; k < algn.size(); ++k) { int exona = algn[k].first; int exonb = algn[k].second; // if (algn.Type() != AlignVec::RefSeq) // { // if (k == 0) exona = exonb; // for the first and last exons we // if (k == algn.size() - 1) exonb = exona; // respect only one point for non RefSeq // } for (int pnt = exona; pnt <= exonb; ++pnt) { notinintron[strand][pnt] = pnt; } } } else { for (int k = 0; k < algn.size(); ++k) { int exona = algn[k].first; int exonb = algn[k].second; int pnt = (exona + exonb) / 2; pnt += exonb%3 - pnt%3; // this is the right codon end that was checked when alignments were computed // enforcing stop if present or collapsing to one point if (strand == Plus && k == algn.size() - 1 && isStop(exonb + 1,strand)) { exona = pnt; } else if (strand == Minus && k == 0 && isStop(exona - 1,strand)) { exonb = pnt; } else { exona = exonb = pnt; } int codon_end = exonb%3; int f_beg = (strand == Plus) ? 2 - codon_end : codon_end; for (int pnt = exona; pnt <= exonb; ++pnt) { notinintron[strand][pnt] = pnt; for (int frame = 0; frame < 3; ++frame) { if (frame != f_beg) { notinexon[strand][frame][pnt] = pnt; } } } } } int a = algn.Limits().first; int b = algn.Limits().second; for (int i = a; i <= b; ++i) { inalign[i] = a; notinintron[otherstrand][i] = i; for (int frame = 0; frame < 3; ++frame) { notinexon[otherstrand][frame][i] = i; } } if (strand == Plus) { a += 3; } // start now is a part of intergenic!!!! else { b -= 3; } notining[b] = a; if (algn.Type() == AlignVec::Prot) { for (int i = a; i <= b; ++i) { notining[i] = i; } } else if (algn.CdsLimits().first >= 0) { for (int i = algn.CdsLimits().first; i <= algn.CdsLimits().second; ++i) notining[i] = i; } } } const int RepeatMargin = 15; // repeat trimming for (int i = 0; i < len - 1; ++i) { bool rpta = laststop[Plus][0][i] < 0; bool rptb = laststop[Plus][0][i + 1] < 0; if ( !rpta && rptb ) { // b - first repeat int j = i + 1; for (; j <= min(i + RepeatMargin,len - 1); ++j) { laststop[Plus ][0][j] = -1; laststop[Plus ][1][j] = -1; laststop[Plus ][2][j] = -1; laststop[Minus][0][j] = -1; laststop[Minus][1][j] = -1; laststop[Minus][2][j] = -1; } i = j - 1; } else if (rpta && !rptb) { // a - last repeat int j = max(0,i - RepeatMargin + 1); for (; j <= i; ++j) { laststop[Plus ][0][j] = -1; laststop[Plus ][1][j] = -1; laststop[Plus ][2][j] = -1; laststop[Minus][0][j] = -1; laststop[Minus][1][j] = -1; laststop[Minus][2][j] = -1; } } } if (leftwall) { notinintron[Plus][0] = notinintron[Minus][0] = 0; } if (rightwall) { notinintron[Plus][len - 1] = notinintron[Minus][len - 1] = len - 1; } for (int strand = 0; strand < 2; ++strand) { IVec& nin = notinintron[strand]; for (int i = 1; i < len; ++i) { nin[i] = max(nin[i - 1],nin[i]); } for (int frame = 0; frame < 3; ++frame) { IVec& nex = notinexon[strand][frame]; for (int i = 1; i < len; ++i) { nex[i] = max(nex[i - 1],nex[i]); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -