📄 omssa.cpp
字号:
#endif, Peaks, true); // keep a list of redundant peptides map <string, CMSHits * > PepDone; // add to hitset by score for(iScoreList = ScoreList.begin(), iSearch = 0; iScoreList != ScoreList.end(); iScoreList++, iSearch++) { CMSHits * Hit; CMSPepHit * Pephit; MSHit = iScoreList->second; header = 0; while (readdb_get_header(rdfp, MSHit->GetSeqIndex(), &header, &sip, &blastdefline)) { bestid = SeqIdFindBest(sip, SEQID_GI); if(!bestid) continue; string seqstring; string deflinestring(blastdefline); int iseq; length = readdb_get_sequence(rdfp, MSHit->GetSeqIndex(), &Sequence); for (iseq = MSHit->GetStart(); iseq <= MSHit->GetStop(); iseq++) { seqstring += UniqueAA[Sequence[iseq]]; }#ifdef CHECKGI CheckGi(bestid->data.intvalue);#endif if(PepDone.find(seqstring) != PepDone.end()) { Hit = PepDone[seqstring]; } else { Hit = new CMSHits; Hit->SetPepstring(seqstring); double Score = iScoreList->first; Hit->SetEvalue(Score); Hit->SetPvalue(Score/Peaks-> GetPeptidesExamined(MSHit-> GetCharge())); Hit->SetCharge(MSHit->GetCharge()); CRef<CMSHits> hitref(Hit); HitSet->SetHits().push_back(hitref); PepDone[seqstring] = Hit; } Pephit = new CMSPepHit; Pephit->SetStart(MSHit->GetStart()); Pephit->SetStop(MSHit->GetStop()); Pephit->SetGi(bestid->data.intvalue); Pephit->SetDefline(deflinestring); CRef<CMSPepHit> pepref(Pephit); Hit->SetPephits().push_back(pepref); MemFree(blastdefline); SeqIdSetFree(sip); } } ScoreList.clear(); }}// calculate the evalues of the top hits and sortvoid CSearch::CalcNSort(TScoreList& ScoreList, double Threshold, CMSPeak* Peaks, bool NewScore){ int iCharges; int iHitList; int length; unsigned char *Sequence; for(iCharges = 0; iCharges < Peaks->GetNumCharges(); iCharges++) { TMSHitList& HitList = Peaks->GetHitList(iCharges); for(iHitList = 0; iHitList != Peaks->GetHitListIndex(iCharges); iHitList++) {#ifdef CHECKGI unsigned header; char *blastdefline; SeqId *sip, *bestid; header = 0; readdb_get_header(rdfp, HitList[iHitList].GetSeqIndex(), &header, &sip,&blastdefline); bestid = SeqIdFindBest(sip, SEQID_GI); if(!bestid) continue; CheckGi(bestid->data.intvalue); MemFree(blastdefline); SeqIdSetFree(sip);#endif // recompute ladder length = readdb_get_sequence(rdfp, HitList[iHitList].GetSeqIndex(), &Sequence); int tempMass = HitList[iHitList].GetMass(); int tempStart = HitList[iHitList].GetStart(); int tempStop = HitList[iHitList].GetStop(); int Charge = HitList[iHitList].GetCharge(); int Which = Peaks->GetWhich(Charge); double a = CalcPoissonMean(tempStart, tempStop, tempMass, Peaks, Charge, Threshold); if ( a < 0) continue; // error return; if(HitList[iHitList].GetHits() < a) continue; double pval; if(NewScore) { int High, Low, NumPeaks, NumLo, NumHi; Peaks->HighLow(High, Low, NumPeaks, tempMass, Charge, Threshold, NumLo, NumHi); double TopHitProb = ((double)MSNUMTOP)/NumPeaks; double Normal = CalcNormalTopHit(a, TopHitProb); pval = CalcPvalueTopHit(a, HitList[iHitList].GetHits(Threshold, Peaks->GetMaxI(Which)), Peaks->GetPeptidesExamined(Charge), Normal, TopHitProb);#ifdef MSSTATRUN cout << pval << " " << a << " " << Charge << endl; // cerr << "pval = " << pval << " Normal = " << Normal << endl;#endif } else { pval = CalcPvalue(a, HitList[iHitList]. GetHits(Threshold, Peaks->GetMaxI(Which)), Peaks->GetPeptidesExamined(Charge)); } double eval = pval * Peaks->GetPeptidesExamined(Charge); ScoreList.insert(pair<const double, CMSHit *> (eval, &(HitList[iHitList]))); } } }//calculates the poisson times the top hit probabilitydouble CSearch::CalcPoissonTopHit(double Mean, int i, double TopHitProb){double retval;#ifdef NCBI_OS_MSWIN retval = exp(-Mean) * pow(Mean, i) / exp(LnGamma(i+1));#else retval = exp(-Mean) * pow(Mean, i) / exp(lgamma(i+1));#endifretval *= 1.0L - pow((1.0L-TopHitProb), i);return retval;}double CSearch::CalcPoisson(double Mean, int i){#ifdef NCBI_OS_MSWIN return exp(-Mean) * pow(Mean, i) / exp(LnGamma(i+1));#else return exp(-Mean) * pow(Mean, i) / exp(lgamma(i+1));#endif}double CSearch::CalcPvalue(double Mean, int Hits, int n){ if(Hits <= 0) return 1.0L; int i; double retval(0.0L); for(i = 0; i < Hits; i++) retval += CalcPoisson(Mean, i); retval = 1.0L - pow(retval, n); if(retval <= 0.0L) retval = numeric_limits<double>::min(); return retval;}#define MSDOUBLELIMIT 0.0000000000000001L// do full summation of probability distributiondouble CSearch::CalcNormalTopHit(double Mean, double TopHitProb){ int i; double retval(0.0L), before(-1.0L), increment; for(i = 1; i < 1000; i++) { increment = CalcPoissonTopHit(Mean, i, TopHitProb); // convergence hack -- on linux (at least) convergence test doesn't work // for gcc release build if(increment <= MSDOUBLELIMIT) break; // if(increment <= numeric_limits<double>::epsilon()) break; retval += increment; if(retval == before) break; // convergence before = retval; } return retval;}double CSearch::CalcPvalueTopHit(double Mean, int Hits, int n, double Normal, double TopHitProb){ if(Hits <= 0) return 1.0L; int i; double retval(0.0L), increment; for(i = 1; i < Hits; i++) { increment = CalcPoissonTopHit(Mean, i, TopHitProb); if(increment <= MSDOUBLELIMIT) break; retval += increment; } retval /= Normal; retval = 1.0L - pow(retval, n); if(retval <= 0.0L) retval = numeric_limits<double>::min(); return retval;}double CSearch::CalcPoissonMean(int Start, int Stop, int Mass, CMSPeak *Peaks, int Charge, double Threshold){ double retval(1.0); double m; // mass of ladder double o; // min m/z value of peaks double r; // max m/z value of peaks double h; // number of calculated m/z values in one ion series double v1; // number of m/z values above mh/2 and below mh (+1 product ions) double v2; // number of m/z value below mh/2 double v; // number of peaks double t; // mass tolerance width int High, Low, NumPeaks, NumLo, NumHi; Peaks->HighLow(High, Low, NumPeaks, Mass, Charge, Threshold, NumLo, NumHi); if(High == -1) return -1.0; // error o = (double)Low/MSSCALE; r = (double)High/MSSCALE; v1 = NumHi; v2 = NumLo; v = NumPeaks; m = Mass/(double)MSSCALE; h = Stop - Start; t = (Peaks->GetTol())/(double)MSSCALE; // see 12/13/02 notebook, pg. 127 retval = 4.0 * t * h * v / m; if(Charge >= kConsiderMult) { retval *= (m - 3*o + r)/(r - o); } #if 0 // variation that counts +1 and +2 separately // see 8/19/03 notebook, pg. 71 retval = 4.0 * t * h / m; if(Charge >= kConsiderMult) { // retval *= (m - 3*o + r)/(r - o); retval *= (3 * v2 + v1); } else retval *= (v2 + v1);#endif return retval;}CSearch::~CSearch(){ if(rdfp) readdb_destruct(rdfp);}/*$Log: omssa.cpp,v $Revision 1000.4 2004/06/01 18:09:10 gourianoPRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.20Revision 1.20 2004/05/27 20:52:15 lewisgbetter exception checking, use of AutoPtr, command line parsingRevision 1.19 2004/05/21 21:41:03 gorelenkAdded PCH ncbi_pch.hppRevision 1.18 2004/04/06 19:53:20 lewisgallow adjustment of precursor charges that allow multiply charged product ionsRevision 1.17 2004/04/05 20:49:16 lewisgfix missed mass bug and reorganize codeRevision 1.16 2004/03/31 02:00:26 ucko+<algorithm> for sort()Revision 1.15 2004/03/30 19:36:59 lewisgmultiple mod codeRevision 1.14 2004/03/16 20:18:54 gorelenkChanged includes of private headers.Revision 1.13 2004/03/01 18:24:08 lewisgbetter mod handlingRevision 1.12 2003/12/22 21:58:00 lewisgtop hit code and variable mod fixesRevision 1.11 2003/12/04 23:39:08 lewisgno-overlap hits and various bugfixesRevision 1.10 2003/11/20 22:32:50 lewisgfix end of sequence X bugRevision 1.9 2003/11/20 15:40:53 lewisgfix hitlist bug, change defaultsRevision 1.8 2003/11/17 18:36:56 lewisgfix cref use to make sure ref counts are decremented at loop terminationRevision 1.7 2003/11/14 20:28:06 lewisgscale precursor tolerance by chargeRevision 1.6 2003/11/13 19:07:38 lewisgbugs: iterate completely over nr, don't initialize blastdb by iterationRevision 1.5 2003/11/10 22:24:12 lewisgallow hitlist size to vary Revision 1.4 2003/10/24 21:28:41 lewisg add omssa, xomssa, omssacl to win32 build, including dll Revision 1.3 2003/10/22 15:03:32 lewisg limits and string compare changed for gcc 2.95 compatibility Revision 1.2 2003/10/21 21:12:17 lewisg reorder headers Revision 1.1 2003/10/20 21:32:13 lewisg ommsa toolkit version Revision 1.18 2003/10/07 18:02:28 lewisg prep for toolkit Revision 1.17 2003/10/06 18:14:17 lewisg threshold vary Revision 1.16 2003/08/14 23:49:22 lewisg first pass at variable mod Revision 1.15 2003/08/06 18:29:11 lewisg support for filenames, numbers using regex Revision 1.14 2003/07/21 20:25:03 lewisg fix missing peak bug Revision 1.13 2003/07/19 15:07:38 lewisg indexed peaks Revision 1.12 2003/07/18 20:50:34 lewisg *** empty log message *** Revision 1.11 2003/07/17 18:45:50 lewisg multi dta support Revision 1.10 2003/07/07 16:17:51 lewisg new poisson distribution and turn off histogram Revision 1.9 2003/05/01 14:52:10 lewisg fixes to scoring Revision 1.8 2003/04/18 20:46:52 lewisg add graphing to omssa Revision 1.7 2003/04/07 16:47:16 lewisg pc fixes Revision 1.6 2003/04/04 18:43:51 lewisg tax cut Revision 1.5 2003/04/02 18:49:51 lewisg improved score, architectural changes Revision 1.4 2003/03/21 21:14:40 lewisg merge ming's code, other stuff Revision 1.3 2003/02/10 19:37:56 lewisg perf and web page cleanup Revision 1.2 2003/02/07 16:18:23 lewisg bugfixes for perf and to work on exp data Revision 1.1 2003/02/03 20:39:02 lewisg omssa cgi */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -