📄 algorithm.cpp
字号:
/* RNA Secondary Structure Prediction, Using the Algorithm of Zuker C++ version by David Mathews, copyright 1996, 1997 Programmed for Isis Pharmaceuticals and the Turner Lab, Chemistry Department, University of Rochester*//* June 14, 2005. M. Zuker changes "infinity" to "0" for motifs containing non-standard bases (not a, c, g, u/t) that are not paired. For example, "dangle[i][j][k][l]" is now set to infinity if any of i, j or k is non-standard. This will continue to be true if i or j is non-standard. However, if i and j are standard and k, the dangling base, is non-standard, then the free energy will be set to 0 (neutral). Similarly for tstacki, tstackh, int11, int12 and int22 when the non-standard bases are not paired.*/#include <stdio.h>#include <string>#include <iostream>#include <fstream> #include <math.h>#include <stdlib.h>using std::cin;using std::cout;using std::ios;using std::ifstream;using std::ofstream;#include "platform.cpp" //platform.cpp contains information specific to the machine#define maxfil 100 //maximum length of file names#define infinity 9999999 //an arbitrary value given to infinity#define maxtloop 100 //maximum tetraloops allowed (info read from tloop)#define maxstructures 1010 //maximum number of structures in ct file#define maxbases 10000 //maximum number of bases in a structure#define ctheaderlength 125 //maximum length of string containing info on sequence#define ga_bonus -10 //the value of a bonus for the "almost coaxial stacking" //case in efn2#define amax 400 //this is a maximum line length for void linout (below)#define col 80 //this is the number of columns in an output file#define numlen 8 //maximum digits in a number#define maxnopair 600 //maximum number of bases that can be forced single#include "structure.cpp" //uncommented out for sgis#include "algorithm.h"//********************************functions:/* Function efn2The energy calculator of Zukercalculates the free energy of each structural conformation in a structurestructures cannot have pseudoknotsstructnum indicates which structure to calculate the free energythe default, 0, indicates "all" structures*/void efn2(datatable *data, structure *ct, int structnum){ int i, j, k, open, null, stz, count, sum, sum1, ip = 0, jp = 0; stackstruct stack; int **coax, **helix; int **fce; bool inter, flag; int start, stop; //int n5, n3; /* stack = a place to keep track of where efn2 is located in a structure inter = indicates whether there is an intermolecular interaction involved in a multi-branch loop */ stack.sp = 0; //set stack counter fce = new int *[ct->numofbases+1]; for (i=0; i<=ct->numofbases; i++) fce[i] = new int [ct->numofbases + 1-i]; for (i=0; i<=ct->numofbases; i++) { for (j=i; j<=ct->numofbases; j++) { fce[i][j-i] = 0; } } if (ct->intermolecular) {//this indicates an intermolecular folding for (i=0; i<3; i++) { forceinterefn(ct->inter[i], ct, fce); } } if (structnum!=0) { start = structnum; stop = structnum; } else { start = 1; stop = ct->numofstructures; } for (count=start; count<=stop; count++){//one structure at a time ct->energy[count]=0; push(&stack, 1, ct->numofbases, 1, 0); //put the whole structure onto the stack subroutine://loop starts here (I chose the goto statement to speed operation) pull(&stack, &i, &j, &open, &null, &stz); //take a substructure off the stack //cout << "pulled "<<i<<" energy = "<<ct->energy[count]<<"\n"; //cin >> k; while (stz!=1){ while (ct->basepr[count][i]==j) { //are i and j paired? while (ct->basepr[count][i+1]==j-1) {//are i, j and i+1, j-1 stacked? ct->energy[count] = ct->energy[count] + erg1(i, j, i+1, j-1, ct, data); i++; j--; } sum = 0; k = i + 1; // now efn2 is past the paired region, so define // the intervening non-paired segment while (k<j) { if (ct->basepr[count][k]>k) { sum++; ip = k; k = ct->basepr[count][k] + 1; jp = k-1; } else if (ct->basepr[count][k]==0) k++; } if (sum==0) {//hairpin loop ct->energy[count] = ct->energy[count] + erg3(i, j, ct, data, fce[i][j-i]); goto subroutine; } else if (sum==1) {//bulge/internal loop ct->energy[count] = ct->energy[count] + erg2(i, j, ip, jp, ct, data, fce[i][ip-i], fce[jp][j-jp]); //cout << "after internal loop energy = "<<ct->energy[count]<<"\n"; //cout << "the internal loop was = "<<erg2(i, j, ip, jp, ct, data, fce[i][ip], fce[jp][j])<<"\n"; //cout << "i = "<<i<<" j = "<<j<<"\n"; i = ip; j = jp; } else {//multi-branch loop sum = sum + 1; //total helixes = sum + 1 //initialize array helix and array coax helix = new int *[sum+1]; for (k=0; k<=sum; k++) helix[k] = new int [2]; coax = new int *[sum+1]; for (k=0; k<=sum; k++) coax[k] = new int [sum+1]; //find each helix and store info in array helix // also place these helixes onto the stack // and calculate energy of the intervening unpaired nucs helix[0][0] = i; helix[0][1] = j; inter = false; sum1 = 0; for (k=1; k<sum; k++) { ip = helix[k-1][0]+1; while (ct->basepr[count][ip]==0) ip++; if (fce[helix[k-1][0]][ip-helix[k-1][0]]==5) inter = true; //add the terminal AU penalty if necessary ct->energy[count] = ct->energy[count] + penalty(ip, ct->basepr[count][ip], ct, data); helix[k][1] = ip; helix[k][0] = ct->basepr[count][ip]; push (&stack, ip, ct->basepr[count][ip], 1, 0); sum1 = sum1+(ip - helix[k-1][0]-1); } helix[sum][0] = helix[0][0]; helix[sum][1] = helix[0][1]; sum1 = sum1 + helix[sum][1]-helix[sum-1][0]-1; if (inter) {//intermolecular interaction ct->energy[count] = ct->energy[count]+data->init; //give the initiation penalty } else {//not an intermolecular interaction, treat like a normal // multibranch loop: //give the multibranch loop bonus: ct->energy[count] = ct->energy[count] + data->efn2a; //give the energy for each entering helix ct->energy[count] = ct->energy[count] + sum*data->efn2c; if (sum1<=6) {ct->energy[count]=ct->energy[count] + sum1*(data->efn2b); } else { //cout << "efn2: i = "<<i<<" j = "<<j<<" "<<int(11.*log(double(((sum1)/6.))) + 0.5)<<"\n"; //cout << "energy = "<<ct->energy[count]<<"\n"; ct->energy[count]=ct->energy[count] + 6*(data->efn2b) + int(11.*log(double(((sum1)/6.))) + 0.5); } } //Now calculate the energy of stacking: for (k=0; k<=sum; k++) {//k+1 indicates the number of helixes consider if (k==0) { //this is the energy of stacking bases for (ip=0; ip<sum; ip++) { coax[ip][ip] = 0; flag = false; if (((ip==0) && ((helix[0][1]-helix[sum-1][0])>1))|| ((ip!=0)&&((helix[ip][1]-helix[ip-1][0])>1))) flag=true; if (((helix[ip+1][1] - helix[ip][0]) > 1)&&flag) { //use tstackm numbers //n5 = ct->numseq[helix[ip][0]+1]; //n3 = ct->numseq[helix[ip][1]-1]; coax[ip][ip] = data->tstkm[ct->numseq[helix[ip][0]]] [ct->numseq[helix[ip][1]]] [ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip][1]-1]]; } else {//do not use tstackm numbers, use individual terminal // stack numbers if ((helix[ip+1][1] - helix[ip][0])>1) { coax[ip][ip] = min(0, erg4 (helix[ip][0], helix[ip][1], helix[ip][0]+1, 1, ct, data, false)); } if (ip==0) { if ((helix[0][1]-helix[sum-1][0])>1) { coax[ip][ip] = coax[ip][ip]+ min(0, erg4 (helix[ip][0], helix[ip][1], helix[ip][1]-1, 2, ct, data, false)); } } else { if ((helix[ip][1]-helix[ip-1][0])>1) { coax[ip][ip] = coax[ip][ip]+ min(0, erg4 (helix[ip][0], helix[ip][1], helix[ip][1]-1, 2, ct, data, false)); } } } } coax[sum][sum] = coax[0][0]; } else if (k==1) {//now consider whether coaxial stacking is //more favorable than just stacked bases for (ip=0; ip<sum; ip++) { //cout << ip << "\n"; //see if they're close enough to stack if ((helix[ip+1][1] - helix[ip][0])==1) { //flush stacking: coax[ip][ip+1] = min((coax[ip][ip] + coax[ip+1][ip+1]), data->coax[ct->numseq[helix[ip][1]]] [ct->numseq[helix[ip][0]]] [ct->numseq[helix[ip+1][1]]] [ct->numseq[helix[ip+1][0]]]); } else if (((helix[ip+1][1] - helix[ip][0])==2)) { //possible intervening mismatch: coax[ip][ip+1] = coax[ip][ip]+coax[ip+1][ip+1]; if (ip!=0) { if ((helix[ip][1] - helix[ip-1][0])>1) { coax[ip][ip+1] = min(coax[ip][ip+1], data->tstackcoax[ct->numseq[helix[ip][0]]] [ct->numseq[helix[ip][1]]] [ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip][1]-1]]+ data->coaxstack[ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip][1]-1]] [ct->numseq[helix[ip+1][1]]] [ct->numseq[helix[ip+1][0]]]); } } else { //ip==0 if ((helix[0][1]-helix[sum-1][0])>1) { coax[ip][ip+1] = min(coax[ip][ip+1], data->tstackcoax[ct->numseq[helix[ip][1]]] [ct->numseq[helix[ip][0]]] [ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip][1]-1]] + data->coaxstack[ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip][1]-1]] [ct->numseq[helix[ip+1][1]]] [ct->numseq[helix[ip+1][0]]]); } } if (ip!=(sum-1)) { if ((helix[ip+2][1]-helix[ip+1][0])>1) { coax[ip][ip+1] = min(coax[ip][ip+1], data->tstackcoax[ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip+1][0]+1]] [ct->numseq[helix[ip+1][1]]] [ct->numseq[helix[ip+1][0]]] + data->coaxstack[ct->numseq[helix[ip][0]]] [ct->numseq[helix[ip][1]]] [ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip+1][0]+1]]); } } else { //ip = sum - 1 if ((helix[1][1]-helix[0][0])>1) { coax[ip][ip+1] = min(coax[ip][ip+1], data->tstackcoax[ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip+1][0]+1]] [ct->numseq[helix[ip+1][1]]] [ct->numseq[helix[ip+1][0]]] + data->coaxstack[ct->numseq[helix[ip][0]]] [ct->numseq[helix[ip][1]]] [ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip+1][0]+1]]); } } } else {//no possible stacks coax[ip][ip+1] = coax[ip][ip] + coax[ip+1][ip+1]; } } } else if (k>1&&k<sum) { for (i=0; (i+k)<=sum; i++) { coax[i][i+k] = coax[i][i]+coax[i+1][i+k]; for (j=1; j<k; j++) { coax[i][i+k] = min(coax[i][i+k], coax[i][i+j]+coax[i+j+1][i+k]); } } } else if (k==sum) { ct->energy[count] = ct->energy[count] + min(coax[0][sum-1], coax[1][sum]); //cout << "efn2: cs = "<<" "<<min(coax[0][sum-1], coax[1][sum])<<"\n"; //cout << "structure = "<<count<<"\n"; //cout << "helix[0][0] = "<<helix[0][0]<<"\n"; //cout << min(coax[0][sum-1], coax[1][sum])<<"\n"; //for (i=0; i<=sum; i++) { // for (j=i; j<=sum; j++) { // cout << "i "<<i<<" j "<<j<<" coax[i][j] = "<<coax[i][j]<<"\n"; // } //} } } for (k=0; k<=sum; k++) delete[] helix[k]; delete[] helix; for (k=0; k<=sum; k++) delete[] coax[k]; delete[] coax; goto subroutine; } } //this is the exterior loop: , i = 1 //Find the number of helixes exiting the loop, store this in sum: sum = 0; while (i<ct->numofbases) { if (ct->basepr[count][i]!=0) { sum++; i = ct->basepr[count][i]; } i++; } //initialize array helix and array coax helix = new int *[sum]; for (k=0; k<sum; k++) helix[k] = new int [2]; coax = new int *[sum]; for (k=0; k<sum; k++) coax[k] = new int [sum]; //find each helix and store info in array helix // also place these helixes onto the stack ip = 1; for (k=0; k<sum; k++) { while (ct->basepr[count][ip]==0) ip++; //add terminal au penalty if necessary ct->energy[count]=ct->energy[count]+ penalty(ip, ct->basepr[count][ip], ct, data); helix[k][1] = ip; helix[k][0] = ct->basepr[count][ip]; push (&stack, ip, ct->basepr[count][ip], 1, 0); ip = ct->basepr[count][ip]+1; } //Now calculate the energy of stacking: for (k=0; k<sum; k++) {//k+1 indicates the number of helixes consider if (k==0) { //this is the energy of stacking bases for (ip=0; ip<sum; ip++) { coax[ip][ip] = 0; if (ip<sum-1) {//not at 3' end of structure if ((helix[ip+1][1] - helix[ip][0])>1) { //try 3' dangle coax[ip][ip] = min(0, erg4 (helix[ip][0], helix[ip][1], helix[ip][0]+1, 1, ct, data, false)); } } else { //at 3' end of structure if ((ct->numofbases - helix[ip][0])>=1) { //try 3' dangle coax[ip][ip] = min(0, erg4 (helix[ip][0], helix[ip][1], helix[ip][0]+1, 1, ct, data, false)); } } if (ip==0) { if ((helix[0][1])>1) { coax[ip][ip] = coax[ip][ip]+ min(0, erg4 (helix[ip][0], helix[ip][1], helix[ip][1]-1, 2, ct, data, false)); } } else { if ((helix[ip][1]-helix[ip-1][0])>=1) { coax[ip][ip] = coax[ip][ip]+ min(0, erg4 (helix[ip][0], helix[ip][1], helix[ip][1]-1, 2, ct, data, false)); } } } } else if (k==1) {//now consider whether coaxial stacking is //more favorable than just stacked bases for (ip=0; ip<sum-1; ip++) { //see if they're close enough to stack if ((helix[ip+1][1] - helix[ip][0])==1) { //flush stacking: coax[ip][ip+1] = min((coax[ip][ip] + coax[ip+1][ip+1]), data->coax[ct->numseq[helix[ip][1]]] [ct->numseq[helix[ip][0]]] [ct->numseq[helix[ip+1][1]]] [ct->numseq[helix[ip+1][0]]]); } else if (((helix[ip+1][1] - helix[ip][0])==2)) { //possible intervening mismatch: coax[ip][ip+1] = coax[ip][ip]+coax[ip+1][ip+1]; if (ip!=0) { if ((helix[ip][1] - helix[ip-1][0])>1) { coax[ip][ip+1] = min(coax[ip][ip+1], data->tstackcoax[ct->numseq[helix[ip][0]]] [ct->numseq[helix[ip][1]]] [ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip][1]-1]]+ data->coaxstack[ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip][1]-1]] [ct->numseq[helix[ip+1][1]]] [ct->numseq[helix[ip+1][0]]]); } } else { //ip==0 if ((helix[0][1])>1) { coax[ip][ip+1] = min(coax[ip][ip+1], data->tstackcoax[ct->numseq[helix[ip][1]]] [ct->numseq[helix[ip][0]]] [ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip][1]-1]] + data->coaxstack[ct->numseq[helix[ip][0]+1]] [ct->numseq[helix[ip][1]-1]] [ct->numseq[helix[ip+1][1]]]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -