⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 repeats_tag.c

📁 一个类似于blast算法的基因数据快速搜索算法
💻 C
字号:
/* repeats.tag.c -- make tag file from RepeatMasker output *//* 9/15/98 -- added "-genbank" option *//* 8/13/99 -- added "-integer" (debug) option */#include "util.h"enum {  NAME, TYPE, BUF_SIZE=500};struct foo {	int field;	const char *substr;	const char *val;} Rpt[] = {	/* If the string in col #2 is a substring of the field named by col #1,	   then its PipMaker name is in col #3.  Use the first case that holds.	*/	{ NAME, "Alu",     "Alu" },	{ NAME, "MIR",     "MIR" },	{ TYPE, "L2",      "LINE2" },	{ TYPE, "L1",      "LINE1" },	{ TYPE, "LTR",     "LTR" },	{ TYPE, "ERV",     "LTR" },	{ NAME, "LTR",     "LTR" },	{ NAME, "HERV",    "LTR" },	{ TYPE, "DNA",     "DNA" },	{ NAME, "B1",      "B1" },	{ TYPE, "B2",      "B2" },	{ TYPE, "SINE",    "SINE" },	{ TYPE, "LINE",    "Other" },	{ NAME, "MML",     "Other" },	{ NAME, "BUR1",    "Other" },	{ TYPE, "Other",   "Other" },	{ TYPE, "Unknown", "Other" },	{ TYPE, "RNA",     "RNA" }};static const int Nrpts = sizeof(Rpt) / sizeof(Rpt[0]);int main(int argc, char **argv){	char buf[BUF_SIZE], line[BUF_SIZE], *name, *type, *p;	int i, from, to, simple = 1, genbank = 0, seq_len = 0, nr = 0, nl = 0;	FILE *fp;	argv0 = "repeats_tag";	if (argc == 3 && same_string(argv[1], "-simple"))		simple = 0;	else if (argc == 3 && same_string(argv[1], "-genbank"))		genbank = 1;	else if (argc == 3 && argv[1][0] == '-' &&	    (seq_len = atoi(argv[1]+1)) > 0) {		/* OK */	} else if (argc != 2)		fatalf("args = [-simple] [-genbank] [-integer] RepeatMasker-file\n");	fp = ckopen(argv[argc-1], "r");/*  0   1   2   3     4 5  6       7 8   9       10    11 12 13413 5.6 0.0 0.0 HUMAN 1 54 (92195) C Alu SINE/Alu (238) 62 9*/	printf("%%:repeats\n");	while (fgets(buf, sizeof(buf), fp)) {		char key;		const char *wsp = " \t\n";		++nl;		/* Expect field[0] to be an integer */		for (p = buf; *p == ' '; ++p)			;		if (!isdigit(*p))			continue;		/* Skip field[0..4] */		strcpy(line, buf);		if ((p = strtok(line, wsp)) == 0) continue;		if ((p = strtok(0, wsp)) == 0) continue;		if ((p = strtok(0, wsp)) == 0) continue;		if ((p = strtok(0, wsp)) == 0) continue;		if ((p = strtok(0, wsp)) == 0) continue;		/* Expect field[5] and field[6] to be integers */		if ((p = strtok(0, wsp)) == 0) continue;		if (sscanf(p, "%d", &from) != 1)			fatalf("failed to convert start-point: %s", buf);		if ((p = strtok(0, wsp)) == 0) continue;		if (sscanf(p, "%d", &to) != 1)			fatalf("failed to convert end-point: %s", buf);		if (from <= 0 || from >= to) {			fprintf(stderr, "Addresses out of order: %d %d\n",				from, to);			continue;		}		if (seq_len > 0 && to > seq_len) {			fprintf(stderr, "repeat position is %d ", to);			fprintf(stderr, "whereas sequence length is %d\n",				seq_len);			fatal("error in seq1 or RepeatMasker file.");		}		/* Skip field[7] */		if ((p = strtok(0, wsp)) == 0) continue;		/* Expect field[8] to be "+" or "C" */		if ((p = strtok(0, wsp)) == 0) continue;		key = *p;		if (key != '+' && key != 'C')			fatalf("%s\nImproper format of RepeatMasker file: expected + or C in field[8].",buf);		/* Expect field[9] to be a name */		if ((p = strtok(0, wsp)) == 0) continue;		name = p;		/* Expect field[10] to be a type */		if ((p = strtok(0, wsp)) == 0) continue;		type = p;// fprintf(stderr, "%d,%d %c %s %s\n", from, to, key, name, type);		if (genbank) {			printf("     repeat_region   ");			if (key == '+')				printf("%d..%d\n", from, to);			else				printf("complement(%d..%d)\n",				    from, to);			printf("                     /rpt_family=\"%s\"\n",				name);			continue;		}		++nr;		for (i = 0; i < Nrpts; ++i)			if ( (Rpt[i].field==NAME && strstr(name, Rpt[i].substr))			 || (Rpt[i].field==TYPE && strstr(type, Rpt[i].substr)))				break;		if (i == Nrpts) {			if ((strstr(type,"Low") || strstr(type,"Simple") ||		     		strstr(type,"Satellite")) && simple)					printf("%d %d Simple\n", from, to);			else {				fprintf(stderr, "Unknown repeat at %d-%d, ",					from, to);				fprintf(stderr, "name = %s and type = %s\n",					name, type);			}		} else			printf("%d %d %s %s\n", from, to,			   (key == '+') ? "Right" : "Left", Rpt[i].val);	}	fclose(fp);	if (nl > 0 && nr == 0)		fatal("no repeat elements were specified.  corrupted file?");	return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -