⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 seqsplit_main.c

📁 hmmer源程序
💻 C
字号:
/***************************************************************** * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved *  *     This source code is distributed under the terms of the *     GNU General Public License. See the files COPYING and LICENSE *     for details. *****************************************************************//* seqsplit_main.c * SRE, Mon Sep 25 11:43:58 2000 *  * Split sequences into smaller chunks of defined size and overlap; * output a FASTA file. * * Limitations: *   still working in 32 bits -- no sequence can be more than 2 GB *   in size. * CVS $Id: seqsplit_main.c,v 1.2 2000/12/21 23:42:59 eddy Exp $ */#include <stdio.h>#include <string.h>#include "squid.h"#include "msa.h"static char banner[] = "seqsplit - split seqs into chunks of defined size and overlap";static char usage[]  = "\Usage: seqsplit [-options] <seqfile>\n\  Available options:\n\  -h        : help; display usage and version\n\  -o <file> : output the new FASTA file to <file>\n\";  static char experts[] = "\  --informat <s> : specify sequence file format <s>\n\  --length <n>   : set max length of each unique seq frag to <n>\n\  --overlap <n>  : set overlap length to <n> (total frag size = length+overlap)\n\";struct opt_s OPTIONS[] = {  { "-h", TRUE, sqdARG_NONE },      { "-o", TRUE, sqdARG_STRING },      { "--informat", FALSE, sqdARG_STRING },  { "--length",   FALSE, sqdARG_INT },  { "--overlap",  FALSE, sqdARG_INT },};#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))intmain(int argc, char **argv){  char     *seqfile;            /* name of sequence file     */  char     *outfile;		/* name of output file       */  SQFILE   *dbfp;		/* open sequence file        */  FILE     *ofp;		/* open output file          */  int       fmt;		/* format of seqfile         */  char     *seq;		/* sequence                  */  SQINFO    sqinfo;             /* extra info about sequence */  char     *seqfrag;		/* space for a seq fragment  */  int       fraglength;		/* length of unique seq per frag     */  int       overlap;            /* length of overlap. frags are fraglength+overlap*/  char      seqname[256];	/* renamed fragment, w/ coord info */  int       num;		/* number of this fragment   */  int       pos;		/* position in a sequence */  int       len;		/* length of a fragment   */  char     *desc;	    int       nseqs;		/* total number of sequences */  int       nsplit;		/* number of seqs that get split */  int       nnewfrags;		/* total number of new fragments */  char  *optname;  char  *optarg;  int    optind;  /***********************************************   * Parse command line   ***********************************************/  fmt       = SQFILE_UNKNOWN;	/* default: autodetect      */  fraglength = 100000;  overlap   = 1000;  outfile   = NULL;  while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, 		&optind, &optname, &optarg))    {      if      (strcmp(optname, "-o")         == 0)  outfile    = optarg;      else if (strcmp(optname, "--length")   == 0)  fraglength = atoi(optarg);      else if (strcmp(optname, "--overlap")  == 0)  overlap    = atoi(optarg);      else if (strcmp(optname, "--informat") == 0) {	fmt = String2SeqfileFormat(optarg);	if (fmt == SQFILE_UNKNOWN) 	  Die("unrecognized sequence file format \"%s\"", optarg);      }      else if (strcmp(optname, "-h") == 0) {	Banner(stdout, banner);	puts(usage);	puts(experts);        exit(EXIT_SUCCESS);      }    }  if (argc - optind != 1) Die("%s\n", usage);  seqfile = argv[argc-1];  seqfrag = MallocOrDie(sizeof(char) * (fraglength+overlap));  seqfrag[fraglength+overlap] = '\0';  /***********************************************   * Read the file.   ***********************************************/  if (outfile == NULL)  ofp = stdout;   else {    if ((ofp = fopen(outfile, "w")) == NULL)      Die("Failed to open output sequence file %s for writing", outfile);  }  if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL)    Die("Failed to open sequence file %s for reading", seqfile);    nseqs = nsplit = nnewfrags = 0;  while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo))    {      nseqs++;      if (sqinfo.flags & SQINFO_DESC) desc = sqinfo.desc;      else desc = NULL;      if (sqinfo.len <= fraglength+overlap) {	WriteSimpleFASTA(ofp, seq, sqinfo.name, desc);	continue;      }            num = 1;      nsplit++;      for (pos = 0; pos < sqinfo.len; pos += fraglength)	{	  if (sqinfo.len - pos <= overlap) continue;	  strncpy(seqfrag, seq+pos, fraglength+overlap);	  len = strlen(seqfrag);	  sprintf(seqname, "%s/frag%d/%d-%d", 		  sqinfo.name, num, pos+1, pos+len);	  WriteSimpleFASTA(ofp, seqfrag, seqname, desc);	  nnewfrags++;	  num ++;	}      FreeSequence(seq, &sqinfo);    }  SeqfileClose(dbfp);  if (outfile != NULL) fclose(ofp);  printf("Total # of seqs:         %d\n", nseqs);  printf("Affected by splitting:   %d\n", nsplit);  printf("New # of seqs:           %d\n", nseqs-nsplit + nnewfrags);  return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -