📄 repeats_tag.c
字号:
/* repeats.tag.c -- make tag file from RepeatMasker output *//* 9/15/98 -- added "-genbank" option *//* 8/13/99 -- added "-integer" (debug) option */#include "util.h"enum { NAME, TYPE, BUF_SIZE=500};struct foo { int field; const char *substr; const char *val;} Rpt[] = { /* If the string in col #2 is a substring of the field named by col #1, then its PipMaker name is in col #3. Use the first case that holds. */ { NAME, "Alu", "Alu" }, { NAME, "MIR", "MIR" }, { TYPE, "L2", "LINE2" }, { TYPE, "L1", "LINE1" }, { TYPE, "LTR", "LTR" }, { TYPE, "ERV", "LTR" }, { NAME, "LTR", "LTR" }, { NAME, "HERV", "LTR" }, { TYPE, "DNA", "DNA" }, { NAME, "B1", "B1" }, { TYPE, "B2", "B2" }, { TYPE, "SINE", "SINE" }, { TYPE, "LINE", "Other" }, { NAME, "MML", "Other" }, { NAME, "BUR1", "Other" }, { TYPE, "Other", "Other" }, { TYPE, "Unknown", "Other" }, { TYPE, "RNA", "RNA" }};static const int Nrpts = sizeof(Rpt) / sizeof(Rpt[0]);int main(int argc, char **argv){ char buf[BUF_SIZE], line[BUF_SIZE], *name, *type, *p; int i, from, to, simple = 1, genbank = 0, seq_len = 0, nr = 0, nl = 0; FILE *fp; argv0 = "repeats_tag"; if (argc == 3 && same_string(argv[1], "-simple")) simple = 0; else if (argc == 3 && same_string(argv[1], "-genbank")) genbank = 1; else if (argc == 3 && argv[1][0] == '-' && (seq_len = atoi(argv[1]+1)) > 0) { /* OK */ } else if (argc != 2) fatalf("args = [-simple] [-genbank] [-integer] RepeatMasker-file\n"); fp = ckopen(argv[argc-1], "r");/* 0 1 2 3 4 5 6 7 8 9 10 11 12 13413 5.6 0.0 0.0 HUMAN 1 54 (92195) C Alu SINE/Alu (238) 62 9*/ printf("%%:repeats\n"); while (fgets(buf, sizeof(buf), fp)) { char key; const char *wsp = " \t\n"; ++nl; /* Expect field[0] to be an integer */ for (p = buf; *p == ' '; ++p) ; if (!isdigit(*p)) continue; /* Skip field[0..4] */ strcpy(line, buf); if ((p = strtok(line, wsp)) == 0) continue; if ((p = strtok(0, wsp)) == 0) continue; if ((p = strtok(0, wsp)) == 0) continue; if ((p = strtok(0, wsp)) == 0) continue; if ((p = strtok(0, wsp)) == 0) continue; /* Expect field[5] and field[6] to be integers */ if ((p = strtok(0, wsp)) == 0) continue; if (sscanf(p, "%d", &from) != 1) fatalf("failed to convert start-point: %s", buf); if ((p = strtok(0, wsp)) == 0) continue; if (sscanf(p, "%d", &to) != 1) fatalf("failed to convert end-point: %s", buf); if (from <= 0 || from >= to) { fprintf(stderr, "Addresses out of order: %d %d\n", from, to); continue; } if (seq_len > 0 && to > seq_len) { fprintf(stderr, "repeat position is %d ", to); fprintf(stderr, "whereas sequence length is %d\n", seq_len); fatal("error in seq1 or RepeatMasker file."); } /* Skip field[7] */ if ((p = strtok(0, wsp)) == 0) continue; /* Expect field[8] to be "+" or "C" */ if ((p = strtok(0, wsp)) == 0) continue; key = *p; if (key != '+' && key != 'C') fatalf("%s\nImproper format of RepeatMasker file: expected + or C in field[8].",buf); /* Expect field[9] to be a name */ if ((p = strtok(0, wsp)) == 0) continue; name = p; /* Expect field[10] to be a type */ if ((p = strtok(0, wsp)) == 0) continue; type = p;// fprintf(stderr, "%d,%d %c %s %s\n", from, to, key, name, type); if (genbank) { printf(" repeat_region "); if (key == '+') printf("%d..%d\n", from, to); else printf("complement(%d..%d)\n", from, to); printf(" /rpt_family=\"%s\"\n", name); continue; } ++nr; for (i = 0; i < Nrpts; ++i) if ( (Rpt[i].field==NAME && strstr(name, Rpt[i].substr)) || (Rpt[i].field==TYPE && strstr(type, Rpt[i].substr))) break; if (i == Nrpts) { if ((strstr(type,"Low") || strstr(type,"Simple") || strstr(type,"Satellite")) && simple) printf("%d %d Simple\n", from, to); else { fprintf(stderr, "Unknown repeat at %d-%d, ", from, to); fprintf(stderr, "name = %s and type = %s\n", name, type); } } else printf("%d %d %s %s\n", from, to, (key == '+') ? "Right" : "Left", Rpt[i].val); } fclose(fp); if (nl > 0 && nr == 0) fatal("no repeat elements were specified. corrupted file?"); return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -