📄 sfetch_main.c
字号:
/***************************************************************** * HMMER - Biological sequence analysis with profile HMMs * Copyright (C) 1992-1999 Washington University School of Medicine * All Rights Reserved * * This source code is distributed under the terms of the * GNU General Public License. See the files COPYING and LICENSE * for details. *****************************************************************//* sfetch_main.c, Fri Dec 25 14:22:17 1992, SRE * * sfetch -- a program to extract subsequences from a sequence database * Renamed from "getseq" SRE, Tue Jan 19 10:47:42 1999 (GCG clash) * * CVS $Id: sfetch_main.c,v 1.11 2001/05/09 11:43:59 eddy Exp $ */#include <stdio.h>#include <string.h>#include "squid.h"#include "msa.h"#include "ssi.h"static char banner[] = "sfetch - retrieve a specified sequence from a file";static char usage[] = "\Usage: sfetch [-options] <seqname>\n\ or: sfetch [-options] .\n\ (The second version fetches the first seq in the file.)\n\ Get a sequence from a database.\n\ Available options:\n\ -a : name is an accession number, not a key\n\ -d <seqfile> : get sequence from <seqfile>\n\ -D <database> : instead, get sequence from main database\n\ -h : help; print version and usage info\n\ -r <newname> : rename the fragment <newname>\n\ -f <from> : from which residue (1..N)\n\ -t <to> : to which residue (1..N)\n\ -o <outfile> : direct output to <outfile>\n\ -F <format> : use output format of <format>; see below for\n\ list. Default is original format of database.\n\\n\ Available output formats include:\n\ fasta\n\ genbank\n\ embl\n\ gcg\n\ pir\n\ raw\n\n\ Available databases are: (if $env variables are set correctly)\n\ -Dsw $SWDIR SwissProt\n\ -Dpir $PIRDIR PIR\n\ -Dem $EMBLDIR EMBL\n\ -Dgb $GBDIR GenBank\n\ -Dwp $WORMDIR WormPep\n\ -Dowl $OWLDIR OWL\n";static char experts[] = "\ --informat <s> : specify input sequence file format <s>\n\";struct opt_s OPTIONS[] = { { "-a", TRUE, sqdARG_NONE }, { "-d", TRUE, sqdARG_STRING }, { "-f", TRUE, sqdARG_INT }, { "-h", TRUE, sqdARG_NONE }, { "-o", TRUE, sqdARG_STRING }, { "-r", TRUE, sqdARG_STRING }, { "-t", TRUE, sqdARG_INT }, { "-D", TRUE, sqdARG_STRING }, { "-F", TRUE, sqdARG_STRING }, { "--informat", FALSE, sqdARG_STRING },};#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))/* dbenv maps command line database selection to an environment * variable, from which the database directory is obtained. */struct dbenv_s { char *dbname; /* name of database, as used on command line */ char *ssiname; /* name of GSI index file to look for */ char *envname; /* environment var to get directory path from*/ char *entryend; /* string signifying end of entry */ int addend; /* TRUE if entryend line is part of entry */} dbenv[] ={ { "sw", "swiss.ssi", "SWDIR", "//", TRUE}, { "pir", "pir.ssi", "PIRDIR", "///", TRUE}, { "em", "embl.ssi", "EMBLDIR", "//", TRUE}, { "gb", "genbank.ssi","GBDIR", "//", TRUE}, { "wp", "wormpep.ssi","WORMDIR", ">", FALSE}, { "owl", "owl.ssi", "OWLDIR", ">", FALSE}, /* use FASTA OWL version */};#define NUMDBS (sizeof(dbenv) / sizeof(struct dbenv_s))intmain(int argc, char **argv){ char *dbname; /* master database to search */ char *seqfile; /* name of sequence file to read */ char *ssifile; /* name of SSI index file (if one exists) */ SQFILE *seqfp; /* pointer to open sequence file */ char *getname; /* name of sequence to get from */ int from; /* starting residue, 1..N */ int to; /* ending residue, 1..N */ char *outfile; /* name of file to put output to */ FILE *outfp; /* file pointer to put output to */ int format; /* format of seqfile */ int outfmt; /* output format */ char *seq; /* current working sequence */ SQINFO sqinfo; char *frag; /* extracted subsequence */ int source_start; /* start of seq on original source 1..N */ int source_stop; /* end of seq on original source 1..N */ int source_orient; /* sign of parent: -1 revcomp, +1 normal*/ char *ss; /* secondary structure representation */ SSIFILE *ssi; /* open SSI index file */ SSIOFFSET ssi_offset; /* disk offset for locating sequence */ int used_ssi; /* TRUE if SSI file was used (don't scan) */ int status; /* status returned by an SSI call */ char *rename; /* new name to give fragment */ int reverse_complement; /* do we have to reverse complement? */ int getall; int getfirst; /* TRUE to extract from the first seq, w/o looking at name */ char *outformat; /* output format string */ int by_accession; /* TRUE if name is accession number not key */ int dbidx; char *optname; char *optarg; int optind; /*********************************************** * Parse the command line ***********************************************/ /* initializations and defaults */ format = SQFILE_UNKNOWN; /* autodetect default, overridden by --informat or SSI files */ reverse_complement = 0; getall = TRUE; getfirst= FALSE; dbname = NULL; dbidx = -1; seqfile = NULL; from = -1; to = -1; /* flag that says do the whole thing */ outfile = NULL; getname = NULL; rename = NULL; outformat = NULL; by_accession = FALSE; used_ssi = FALSE; while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, &optind, &optname, &optarg)) { if (strcmp(optname, "-a") == 0) { by_accession = TRUE; } else if (strcmp(optname, "-d") == 0) { seqfile = optarg; } else if (strcmp(optname, "-f") == 0) { from = atoi(optarg); getall = FALSE; } else if (strcmp(optname, "-t") == 0) { to = atoi(optarg); getall = FALSE; } else if (strcmp(optname, "-r") == 0) { rename = optarg; } else if (strcmp(optname, "-o") == 0) { outfile = optarg; } else if (strcmp(optname, "-D") == 0) { dbname = optarg; } else if (strcmp(optname, "-F") == 0) { outformat = optarg; } else if (strcmp(optname, "--informat") == 0) { format = String2SeqfileFormat(optarg); if (format == SQFILE_UNKNOWN) Die("unrecognized input sequence file format \"%s\"", optarg); } else if (strcmp(optname, "-h") == 0) { Banner(stdout, banner); puts(usage); puts(experts); exit(EXIT_SUCCESS); } } if (argc - optind != 1) Die("Incorrect number of command line arguments.\n%s\n", usage); getname = argv[optind]; if (strcmp(getname, ".") == 0) getfirst = TRUE; if (getfirst && seqfile == NULL) Die("You need to specify -d <seqfile> to retrieve a first sequence.\n%s", usage); /*********************************************** * Get name of file to look through, and disk offset, * using SSI file if one exists. Three possibilities: * 1) Look in main DB, which has SSI index in the directory * 2) Look in a file, which has associated SSI index * 3) Look in an unindexed file ***********************************************/ if (dbname != NULL && seqfile != NULL) Die("Can't fetch from *both* a database %s and a file %s\n%s", dbname, seqfile, usage); if (dbname == NULL && seqfile == NULL) { /* try to guess SwissProt, stupidly, but usually works */ if (strchr(getname, '_') != NULL) dbname = Strdup("sw"); else Die("You have to specify either a database or a seqfile\n%s", usage); } if (dbname != NULL) /* Main database. GSI index mandatory. */ { char *dbdir; char *dbfile; int fh; /* find which db this is */ for (dbidx = 0; dbidx < NUMDBS; dbidx++) if (strcmp(dbenv[dbidx].dbname, dbname) == 0) break; if (dbidx == NUMDBS) Die("No such main database %s\n%s", dbname, usage);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -