⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 shuffle_main.c

📁 hmmer源程序
💻 C
字号:
/***************************************************************** * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. *****************************************************************//* main for shuffle * * shuffle - generate shuffled sequences * Mon Feb 26 16:56:08 1996 *  * CVS $Id: shuffle_main.c,v 1.9 2001/04/23 00:35:33 eddy Exp $ */#include <stdio.h>#include <string.h>#include <time.h>#include "squid.h"char banner[] = "shuffle - generated shuffled (or otherwise randomized) sequence";char usage[]  = "\Usage: shuffle [-options] <seqfile>\n\  Available options:\n\  -h         : help; print version and usage info\n\  -n <n>     : make <n> samples per input seq (default 1)\n\  -t <n>     : truncate/delete inputs to fixed length <n>\n\\n\  Default: shuffle each input randomly, preserving mono-symbol composition.\n\  Other choices (exclusive; can't use more than one) :\n\  -d         : shuffle but preserve both mono- and di-symbol composition\n\  -0         : generate with same 0th order Markov properties as each input\n\  -1         : generate with same 1st order Markov properties as each input\n\  -l         : make iid sequences of same number and length as inputs\n\  -r         : reverse inputs\n\  -w <n>     : regionally shuffle inputs in window size <n>\n\  -i         : make [-n] iid seqs of length [-t] of type [--dna|--amino];\n\               when -i is set, no <seqfile> argument is used\n\";char experts[] = "\  --alignment    : <seqfile> is an alignment; shuffle the columns\n\  --amino        : synthesize protein sequences [default] (see -i, -l)\n\  --dna          : synthesize DNA sequences (see -i, -l))\n\  --informat <s> : specify sequence file format <s>\n\  --nodesc       : remove sequence description lines\n\  --seed <s>     : set random number seed to <s>\n\";struct opt_s OPTIONS[] = {  { "-0",     TRUE, sqdARG_NONE },     /* 0th order Markov                   */  { "-1",     TRUE, sqdARG_NONE },     /* 1st order Markov                   */  { "-d",     TRUE, sqdARG_NONE },     /* digram shuffle                     */  { "-h",     TRUE, sqdARG_NONE },     /* help                               */  { "-i",     TRUE, sqdARG_NONE },     /* make iid seq of set length         */  { "-l",     TRUE, sqdARG_NONE },     /* make iid seq of same length        */  { "-n",     TRUE, sqdARG_INT  },     /* number of shuffles per input seq   */  { "-r",     TRUE, sqdARG_NONE },     /* reverse seq rather than shuffle    */  { "-t",     TRUE, sqdARG_INT },      /* truncation of inputs to fixed len  */  { "-w",     TRUE, sqdARG_INT  },     /* do regional shuffling              */  { "--alignment",FALSE, sqdARG_NONE  },   /* input is alignment; shuff cols */  { "--amino",    FALSE, sqdARG_NONE  },   /* make iid protein seqs [default]*/  { "--dna",      FALSE, sqdARG_NONE },    /* make iid DNA seqs              */  { "--informat", FALSE, sqdARG_STRING },  /* remove desc lines              */  { "--nodesc",   FALSE, sqdARG_NONE },    /* remove desc lines              */  { "--seed",     FALSE, sqdARG_INT },     /* set the random number seed     */};#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))static void shuffle_alignment_file(char *afile, int fmt);intmain(int argc, char **argv){  char  *seqfile;               /* name of sequence file */  SQFILE *dbfp;			/* open sequence file */  int    fmt;			/* format of seqfile  */  char  *seq;			/* sequence */  char   sqname[32];		/* name of an iid sequence */  SQINFO sqinfo;                /* additional sequence info */  char  *shuff;                 /* shuffled sequence */  int    num;			/* number to generate */  int    seed;			/* random number generator seed */  int    i;  int    w;			/* window size for regional shuffle (or 0)   */  int    truncation;		/* fixed length for truncation option (or 0) */  int    no_desc;		/* TRUE to remove description lines */  enum   {		/* shuffling strategy */    DO_SHUFFLE, DO_DPSHUFFLE, DO_MARKOV0, DO_MARKOV1, DO_REVERSE, DO_REGIONAL,    DO_IID_SAMELEN, DO_IID_FIXEDLEN} strategy;  int    do_dna;		/* TRUE to make DNA iid seqs, not protein */  int    do_alignment;		/* TRUE to shuffle alignment columns */  char  *optname;               /* option name */  char  *optarg;                /* option argument (or NULL) */  int    optind;                /* index of next argv[] */    /***********************************************   * Parse command line   ***********************************************/  fmt          = SQFILE_UNKNOWN;	/* autodetect file format by default */  num          = 0;  seed         = (int) time ((time_t *) NULL);  w            = 0;  truncation   = 0;  strategy     = DO_SHUFFLE;  no_desc      = FALSE;  do_dna       = FALSE;  do_alignment = FALSE;  while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, 		&optind, &optname, &optarg))    {      if      (strcmp(optname, "-0")   == 0)  strategy   = DO_MARKOV0;      else if (strcmp(optname, "-1")   == 0)  strategy   = DO_MARKOV1;      else if (strcmp(optname, "-d")   == 0)  strategy   = DO_DPSHUFFLE;      else if (strcmp(optname, "-n")   == 0)  num        = atoi(optarg);       else if (strcmp(optname, "-w")   == 0)  {strategy = DO_REGIONAL; w = atoi(optarg); }      else if (strcmp(optname, "-i")   == 0)  strategy   = DO_IID_FIXEDLEN;      else if (strcmp(optname, "-l")   == 0)  strategy   = DO_IID_SAMELEN;      else if (strcmp(optname, "-r")   == 0)  strategy   = DO_REVERSE;      else if (strcmp(optname, "-t")   == 0)  truncation = atoi(optarg);       else if (strcmp(optname, "--alignment")== 0) do_alignment = TRUE;       else if (strcmp(optname, "--amino")    == 0) do_dna       = FALSE;       else if (strcmp(optname, "--dna")      == 0) do_dna       = TRUE;       else if (strcmp(optname, "--nodesc")   == 0) no_desc      = TRUE;       else if (strcmp(optname, "--seed")     == 0) seed         = atoi(optarg);       else if (strcmp(optname, "--informat") == 0) {	fmt = String2SeqfileFormat(optarg);	if (fmt == SQFILE_UNKNOWN) 	  Die("unrecognized sequence file format \"%s\"", optarg);      }      else if (strcmp(optname, "-h") == 0) {	Banner(stdout, banner);	puts(usage);	puts(experts);        exit(EXIT_SUCCESS);      }    }  /*****************************************************************   * Special case, 1: IID sequence generation.   * -i option is special, because it synthesizes, rather than   * shuffles. Doesn't take a seqfile argument;   * requires -n, -t; and doesn't use the same code logic as the   * other shuffling strategies. Note that we misuse/overload the   * -t "truncation length" option to set our fixed length for   * generating iid sequence.   *****************************************************************/   if (strategy == DO_IID_FIXEDLEN) {    if (num == 0 || truncation == 0)       Die("-i (i.i.d. sequence generation) requires -n,-t to be set\n%s\n",	  usage);    if (argc-optind != 0)       Die("-i (i.i.d. sequence generation) takes no seqfile argument\n%s\n",	  usage);    sre_srandom(seed);    for (i = 0; i < num; i++)      {	if (do_dna) 	  shuff = RandomSequence(DNA_ALPHABET, dnafq, 4, truncation);	else	  shuff = RandomSequence(AMINO_ALPHABET, aafq, 20, truncation);		/* pedantic note: sqname has room for 31 char + \0, so	 * there's room for 24 digits - a 32-bit integer can only run up	 * to 10 digits, and a 64-bit integer to 20, so we don't worry	 * about the following sprintf() overrunning its bounds.	 */                            	sprintf(sqname, "randseq%d", i);	WriteSimpleFASTA(stdout, shuff, sqname, NULL);	free(shuff);      }    return 0;  }  /*****************************************************************   * Check command line    *****************************************************************/  if (argc - optind != 1)     Die("Incorrect number of command line arguments\n%s\n", usage);   seqfile = argv[optind];  if (num == 0) num = 1;	/* set default shuffle number per sequence */  sre_srandom(seed);  /*****************************************************************   * Special case, 2: Alignment shuffling   *****************************************************************/  if (do_alignment)     {      shuffle_alignment_file(seqfile, fmt);      return 0;    }  /*****************************************************************   * Main logic of the shuffling program:    * expect one seqfile argument   *****************************************************************/  if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL)    Die("Failed to open sequence file %s for reading", seqfile);    while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo))    {      shuff = (char *) MallocOrDie ((sqinfo.len + 1) * sizeof(char));      if (no_desc) strcpy(sqinfo.desc, "");      /* If we're truncating seq, do it now.       */      if (truncation > 0)	{ 	  int start;	  if (sqinfo.len < truncation) {	    free(shuff);	    FreeSequence(seq, &sqinfo); 	    continue; 	  }	  start = CHOOSE(sqinfo.len - truncation + 1);	  strncpy(shuff, seq+start, truncation);	  shuff[truncation] = '\0';	  strcpy(seq, shuff);	  sqinfo.len = truncation;	}      for (i = 0; i < num; i++)	{	  switch (strategy) {	  case DO_SHUFFLE:    StrShuffle(shuff, seq);            break;	  case DO_DPSHUFFLE:  StrDPShuffle(shuff, seq);          break;	  case DO_MARKOV0:    StrMarkov0(shuff, seq);            break;	  case DO_MARKOV1:    StrMarkov1(shuff, seq);            break;	  case DO_REVERSE:    StrReverse(shuff, seq);            break;	  case DO_REGIONAL:   StrRegionalShuffle(shuff, seq, w); break;	  case DO_IID_SAMELEN:	    free(shuff);	    shuff = RandomSequence(AMINO_ALPHABET, aafq, 20, sqinfo.len);	    break;	  default: Die("choked on a bad enum; tragic.");	  }	  WriteSeq(stdout, SQFILE_FASTA, shuff, &sqinfo);	}      if (shuff != NULL) free(shuff);      FreeSequence(seq, &sqinfo);    }  SeqfileClose(dbfp);  return 0;}static voidshuffle_alignment_file(char *afile, int fmt){  MSAFILE *afp;  MSA     *msa;  if ((afp = MSAFileOpen(afile, fmt, NULL)) == NULL)    Die("Alignment file %s could not be opened for reading", afile);   while ((msa = MSAFileRead(afp)) != NULL)    {				/* shuffle in place */      AlignmentShuffle(msa->aseq, msa->aseq, msa->nseq, msa->alen);				/* write in same format we read in */      MSAFileWrite(stdout, msa, afp->format, FALSE);      MSAFree(msa);    }   MSAFileClose(afp);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -