📄 main.c
字号:
tabPosition[tabCount]=i; tabCount++; } } if (tabCount !=numSamples) { printf("\nError: number of columns in the 1st row: %d\n",tabCount+1); printf("It should equal to %d (1+%d: the number of samples specified in file) [%s]\n", numSamples+1,numSamples,inputFile); printf("Check both input file[%s] and datafile[%s] for the discrepancy.\n\n", inputFile,dataFileName); exit(0); } columnCount=0; rowCount=0; for (i=1; i<tabCount; i++) { numWhiteSpace=0; tmp[0]='\0'; for (k=0,j=tabPosition[i-1]+1; j<tabPosition[i]; j++,k++) { tmp[k]=buffer[j]; if (tmp[k]==' ') numWhiteSpace++; } tmp[k]='\0'; if (tmp[0]=='\0' || numWhiteSpace==tabPosition[i]-tabPosition[i-1]-1) { printf("No sample name: row[%d] column[%d]\n",rowCount+1,columnCount+2); strcpy(sample[columnCount].name,"SampleNameUnknown"); } else strcpy(sample[columnCount].name,tmp); columnCount++; } numWhiteSpace=0; tmp[0]='\0'; for (k=0,j=tabPosition[tabCount-1]+1; j<len; j++,k++) { tmp[k]=buffer[j]; if (tmp[k]==' ') numWhiteSpace++; } tmp[k]='\0'; if (tmp[0]=='\0' || numWhiteSpace==len-tabPosition[tabCount-1]-2) { printf("\nNo sample name: row[%d] column[%d]\n",rowCount+1,columnCount+1); strcpy(sample[columnCount].name,"SampleNameUnknown"); } else strcpy(sample[columnCount].name,tmp); columnCount++; rowCount++; if (columnCount !=numSamples) { printf("Error: number of samples in %s (%d) and %s (%d) doesn't match\n", dataFileName,columnCount,inputFile,numSamples); exit(0); } /*-------------------------------------------------------------------*/ /* reading class type */ /*-------------------------------------------------------------------*/ fgets(buffer,10000,fp); len=strlen(buffer); buffer[len-1]='\0'; tabCount=0; for (i=0; i<len; i++) { if(buffer[i]=='\t') { tabPosition[tabCount]=i; tabCount++; } } if (tabCount !=numSamples) { printf("\nError: number of columns in the 2nd row: %d\n",tabCount+1); printf("It should equal to 1+number of samples[%d] specified in file[%s]\n", numSamples,inputFile); printf("Check both input file[%s] and datafile[%s] for discrepancy.\n\n", inputFile,dataFileName); exit(0); } columnCount=0; for (i=1; i<tabCount; i++) { numWhiteSpace=0; tmp[0]='\0'; for (k=0,j=tabPosition[i-1]+1; j<tabPosition[i]; j++,k++) { tmp[k]=buffer[j]; if (tmp[k]==' ') numWhiteSpace++; } tmp[k]='\0'; if (tmp[0]=='\0' || numWhiteSpace==tabPosition[i]-tabPosition[i-1]-1) { printf("\nNo class information: row[%d] column[%d]\n",rowCount+1,columnCount+2); exit(0); } else sscanf(tmp,"%d",&(sample[columnCount].class)); columnCount++; } numWhiteSpace=0; tmp[0]='\0'; for (k=0,j=tabPosition[tabCount-1]+1; j<len; j++,k++) { tmp[k]=buffer[j]; if (tmp[k]==' ') numWhiteSpace++; } tmp[k]='\0'; if (tmp[0]=='\0' || numWhiteSpace==len-tabPosition[tabCount-1]-2) { printf("\nNo class information: row[%d] column[%d]\n",rowCount+1,columnCount+1); exit(0); } else sscanf(tmp,"%d",&(sample[columnCount].class)); columnCount++; rowCount++; if (columnCount !=numSamples) { printf("Error: number of samples in %s (%d) and %s (%d) doesn't match\n", dataFileName,columnCount,inputFile,numSamples); exit(0); } printf(" Sample (array) name Class\n"); printf(" ---------------------------- -------------\n"); for (i=0; i<numSamples; i++) { printf(" %20s\t%15d\n",sample[i].name,sample[i].class); } /*-------------------------------------------------------------------*/ /* reading variable (gene,m/z idenfier) name and expression data */ /*-------------------------------------------------------------------*/ expValue=alloc_double_double(numSamples,numVariables); for (i=0; i<numVariables; i++) { for (j=0; j<numSamples; j++) missingIndicator[i][j]='0'; } missingCount=0; while (!feof(fp)) { if (fgets(buffer,10000,fp)) { len=strlen(buffer); buffer[len-1]='\0'; tabCount=0; for (i=0; i<len; i++) { if(buffer[i]=='\t') { tabPosition[tabCount]=i; tabCount++; } } if (tabCount !=numSamples) { printf("\nError: number of columns in row[%d]: %d\n",rowCount+1,tabCount+1); printf("It should equal to 1+number of samples[%d] specified in file[%s]\n", numSamples,inputFile); printf("Check both input file[%s] and datafile[%s] for discrepancy.\n\n", inputFile,dataFileName); exit(0); } strncpy(variableName[rowCount-2],buffer,tabPosition[0]); columnCount=0; for (i=1; i<tabCount; i++) { numWhiteSpace=0; tmp[0]='\0'; for (k=0,j=tabPosition[i-1]+1; j<tabPosition[i]; j++,k++) { tmp[k]=buffer[j]; if (tmp[k]==' ') numWhiteSpace++; } tmp[k]='\0'; if (tmp[0]=='\0' || numWhiteSpace==tabPosition[i]-tabPosition[i-1]-1) { printf("\nMissing value: row[%d] column[%d]\n",rowCount+1,columnCount+1); missingIndicator[rowCount-2][columnCount]='1'; missingCount++; } else { sscanf(tmp,"%lf",&(expValue[columnCount][rowCount-2])); } columnCount++; } numWhiteSpace=0; tmp[0]='\0'; for (k=0,j=tabPosition[tabCount-1]+1; j<len; j++,k++) { tmp[k]=buffer[j]; if (tmp[k]==' ') numWhiteSpace++; } tmp[k]='\0'; if (tmp[0]=='\0' || numWhiteSpace==len-tabPosition[tabCount-1]-2) { printf("\nMissing value: row[%d] column[%d]\n",rowCount+1,columnCount+1); missingIndicator[rowCount-2][columnCount]='1'; missingCount++; } else { sscanf(tmp,"%lf",&(expValue[columnCount][rowCount-2])); } columnCount++; rowCount++; if (rowCount>numVariables+2) { printf("Error: number of variables (genes,m/z) in file[%s] = %d and in file[%s] = %d\n", dataFileName,rowCount-2,inputFile,numVariables); printf("Make sure no extra empty line at the end of the file[%s]\n",dataFileName); exit(0); } } }; if (rowCount !=numVariables+2) { printf("number of variables (genes,m/z) in file[%s] = %d doesn't match that specified in %s = %d\n", inputFile,rowCount-2,dataFileName,numVariables); exit(0); } fclose(fp); printf("\nReading data file done...\n"); printf("Number of missing values: %d\n\n",missingCount); if (missingCount>0) { printf("Each of the missing values will be replaced by the mean value\n"); printf("of the class to which the missing sample belongs in the row.\n\n"); } if (tabPosition) { free(tabPosition); tabPosition=NULL; } return (expValue);}int filteration(double **expValue,int numSamples,int numVariables,char **variableName, double cutoffThreshold) { register int i,j,k; int cn; double **value; char **name; value=alloc_double_double(numSamples,numVariables); name =alloc_char_char(numVariables,3000); k=0; for (i=0; i<numVariables; i++) { cn=0; for (j=0; j<numSamples; j++) { if (expValue[j][i]<cutoffThreshold) { expValue[j][i]=cutoffThreshold; cn++; } } if (cn<(int)(0.7*numSamples)) { strcpy(name[k],variableName[i]); for (j=0; j<numSamples; j++) value[j][k]=expValue[j][i]; k++; } } printf("number of rows passed the cutoff: %5d\n",k); for (i=0; i<k; i++) { strcpy(variableName[i],name[i]); for (j=0; j<numSamples; j++) expValue[j][i]=value[j][i]; } if (value[0]) { free(value[0]); value[0]=NULL; } if (value) { free(value); value=NULL; } if (name[0]) { free(name[0]); name[0]=NULL; } if (name) { free(name); name=NULL; } return (k);}void log_transform(double **expValue,int numSamples,int numVariables) { register int i,j; for (i=0; i<numVariables; i++) { for (j=0; j<numSamples; j++) { if (expValue[j][i]<=0) { printf("\nCan't apply log-transformation to negative values or zero.\n"); printf("Row[%d] Column[%d]\n\n",i+3,j+1); exit(0); } else expValue[j][i]=log(expValue[j][i])/log(2.0); } }}void debug_print(double **expValue,int numSamples,int numVariables,char **variableName, SampleInfo *sample,int flag) { FILE *fp; register int i,j; if (flag==0) fp=fopen("debug_original.dat","w"); else fp=fopen("debug_filtered.dat","w"); fprintf(fp,"Sample\t"); for (j=0; j<numSamples; j++) { if (j<numSamples-1) fprintf(fp,"%s\t",sample[j].name); else fprintf(fp,"%s\n",sample[j].name); } fprintf(fp,"Class\t"); for (j=0; j<numSamples; j++) { if (j<numSamples-1) fprintf(fp,"%1d\t",sample[j].class); else fprintf(fp,"%1d\n",sample[j].class); } for (i=0; i<numVariables; i++) { fprintf(fp,"%s\t",variableName[i]); for (j=0; j<numSamples; j++) { if (j<numSamples-1) fprintf(fp,"%5.3f\t",expValue[j][i]); else fprintf(fp,"%5.3f\n",expValue[j][i]); } } fclose(fp);}Class *assign_class(SampleInfo *sample,int numSamples) { register int i,j; int used,cmax,cmin,numclass; int *cn; Class *class; class=alloc_class(numSamples); numclass=0; for (i=0; i<numSamples; i++) { used=0; for (j=0; j<numclass; j++) { if (class->type[j]==sample[i].class) { used=1; break; } } if (!used) { class->type[numclass]=sample[i].class;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -