⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ss.c

📁 一个自制的生物信息学程序。 这个程序目的是在基因组文件bsubt168_whole.nuc(部分)中查找设置文件ss.ini所记录的短核苷酸序列CTTAAG(这种短核苷酸一般称寡核苷酸)及其互补
💻 C
字号:
#include <STDARG.H>
#include <ctype.h>
#include <stdio.h>
#include <conio.h>
#include <dir.h>
#include <string.h>

#define Inifn "ss.ini" //initialization file name (this name also appears in variable Ghowmany)
#define Genfn "bsubt1~1.nuc" //"bsubt168_whole.nuc"
#define Tmpfn "ss.tmp"
#define Outfn "ss.csv"
#define Outfnf "ss" //output file name front part
#define Outfne "dat" //output file name extension part
#define FnLen 50 //file name length
#define CommentLen 50
#define RecseqLen 10 //max recognized sequence length
#define ShortcharLen 10

FILE *Gfpini=NULL, *Gfpgen=NULL, *Gfptmp=NULL, *Gfpout=NULL; //Gfpini: file pointer of initialization file; Gfpgen: genome; Gfpout: output
char Ggenfn[FnLen]=Genfn, Gtmpfn[FnLen]=Tmpfn, Goutfn[FnLen]=Outfn; //Ggenfn: genome file name; Goutfn: output file name
char Gmemno[]="Memory is not enough to continue the program. Program exit. Press any key...\n";
char Gfileno[]="File doesn't exist. Program exit. Press any key...\n";
char Gfilesize[]="The genome file size is\n%ld.\n";
char Gsearchfin[]="\nSearch finished. The result was saved in <ss.csv> which can be viewed by Excel. Press any key...\n";
char Gnocompli[]="Find a bad codon %c. Program exit. Press any key...\n";
int Gslidelen;
char Grecseq[RecseqLen];
int Grecseqlen;
int Gdispflag=0;

void /*int*/ errexit(char * errmsg, ...)
{
  va_list argptr;
  //int cnt;

  if(Gfpini!=NULL) fclose(Gfpini);
  if(Gfpgen!=NULL) fclose(Gfpgen);
  if(Gfpout!=NULL) fclose(Gfpout);

  va_start(argptr, errmsg);
  /*cnt =*/ vprintf(errmsg, argptr);
  va_end(argptr);

  //return(cnt);

  //printf(errmsg);
  getch();
  exit(0);
}

void readini() //read initialization information from file or keyboard
{
  if((Gfpini=fopen(Inifn,"rt"))!=NULL)
  {
    fscanf(Gfpini,"%d\n%s",&Gslidelen,Grecseq);
    strcpy(Grecseq,strlwr(Grecseq));
    fclose(Gfpini);
    Grecseqlen=strlen(Grecseq);
  }
  else //if ss.ini doesn't exist.
    errexit(Gfileno);
}

void opengen() //open genome file to read [verify; if not exist, exit; findfirst]
{
  struct ffblk f;

  findfirst(Ggenfn,&f,0);
  printf(Gfilesize,f.ff_fsize);
  if((Gfpgen=fopen(Ggenfn,"rt"))==NULL) errexit(Gfileno);
}

void opentmp()
{
  if((Gfptmp=fopen(Gtmpfn,"wt"))==NULL) errexit(Gfileno);
}

//open the 1st output file to write (value pair: bp position, No. of matched)
//[findfirst; if not exist, name "ss.dat"; or else, name "ss1.dat", "ss2.dat", ...]
void openout()
{
  if((Gfpout=fopen(Goutfn,"wt"))==NULL) errexit(Gfileno);
}

void initialize()
{
  readini();
  opengen();
  opentmp();
}

char compli(char c)
{
  switch((char)tolower((char)c))
  {
    case 'a': return 't';
    case 't': return 'a';
    case 'g': return 'c';
    case 'c': return 'g';
    case 'u': return 'a';
    case 'n': return 'n';
    default: errexit(Gnocompli,c);
  }

  return '\0'; //no use, just for abandon warning
}

void search() //should also search complimented recognized sequence, record
{
  char c;
  int i,firstfind=0,where=0,comflag=0; //comflag: set compliment flag
  long count=0;

  do
  {
     c = fgetc(Gfpgen);
     if(c=='\n') continue;
     count++;
     if(Gdispflag) printf("\r%ld",count);

     if(firstfind==0&&(c==Grecseq[0]||c==compli(Grecseq[0])))
     {
       if(c==compli(Grecseq[0])) comflag=1;
       else comflag=0;
       firstfind=1;
       where=1;
       continue;
     }

     if(firstfind)
       if(comflag==0)
	 if(c==Grecseq[where])
	 {
	   where++;
	   if(where==Grecseqlen) //a match finished
	   {
	     firstfind=0;
	     where=0;
	     fprintf(Gfptmp,"%ld\n",count-Grecseqlen+1);
	     fflush(Gfptmp);
	   }
	 }
	 else //if comflag=0 but not match, this letter abandoned
	 {
	   firstfind=0;
	   where=0;

	   //is the abandoned a new begin?
	   if(c==Grecseq[0]||c==compli(Grecseq[0]))
	   {
	     if(c==compli(Grecseq[0])) comflag=1;
	     else comflag=0;
	     firstfind=1;
	     where=1;
	     continue;
	   }
	 }
       else //if comflag=1
	 if(c==compli(Grecseq[where]))
	 {
	   where++;
	   if(where==Grecseqlen) //a match finished
	   {
	     firstfind=0;
	     where=0;
	     fprintf(Gfptmp,"%ld\n",count-Grecseqlen+1);
	     fflush(Gfptmp);
	   }
	 }
	 else //if comflag=1 but not match, this letter abandoned
	 {
	   firstfind=0;
	   where=0;

	   //is the abandoned a new begin?
	   if(c==Grecseq[0]||c==compli(Grecseq[0]))
	   {
	     if(c==compli(Grecseq[0])) comflag=1;
	     else comflag=0;
	     firstfind=1;
	     where=1;
	     continue;
	   }
	 }

  } while (c != EOF);

  printf("\nGenome reading finished.\n");
  //putchar(7); //beep

  fclose(Gfptmp);
  Gfptmp=NULL;
  fclose(Gfpgen);
}

void finish()
{
  long read,count=1;
  int nomat=0;

  if((Gfptmp=fopen(Gtmpfn,"rt"))==NULL) errexit(Gfileno);
  openout();

  while(!feof(Gfptmp))
  {
    fscanf(Gfptmp,"%ld",&read);
    if(read>Gslidelen*count)
    {
      count=(read-read%Gslidelen)/Gslidelen+1;
      fprintf(Gfpout,"%ld,%d\n",Gslidelen*count,nomat);
      nomat=1;
    }
    else
      nomat++;
  }

  if(nomat!=0) fprintf(Gfpout,"%ld,%d\n",Gslidelen*count,nomat-1);

  fclose(Gfpout);
  fclose(Gfptmp);

  printf(Gsearchfin);
  getch();
}

void main() //int argc, char**argv
{
  initialize();

  search();

  //readini(); //for read ss.tmp and write ss.dat only
  finish();
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -