📄 sort.c
字号:
#include <u.h>#include <libc.h>#include <bio.h>/*bugs: 00/ff for end of file can conflict with 00/ff characters*/enum{ Nline = 100000, /* default max number of lines saved in memory */ Nmerge = 10, /* max number of temporary files merged */ Nfield = 20, /* max number of argument fields */ Bflag = 1<<0, /* flags per field */ B1flag = 1<<1, Dflag = 1<<2, Fflag = 1<<3, Gflag = 1<<4, Iflag = 1<<5, Mflag = 1<<6, Nflag = 1<<7, Rflag = 1<<8, Wflag = 1<<9, NSstart = 0, /* states for number to key decoding */ NSsign, NSzero, NSdigit, NSpoint, NSfract, NSzerofract, NSexp, NSexpsign, NSexpdigit,};typedef struct Line Line;typedef struct Key Key;typedef struct Merge Merge;typedef struct Field Field;struct Line{ Key* key; int llen; /* always >= 1 */ uchar line[1]; /* always ends in '\n' */};struct Merge{ Key* key; /* copy of line->key so (Line*) looks like (Merge*) */ Line* line; /* line at the head of a merged temp file */ int fd; /* file descriptor */ Biobuf b; /* iobuf for reading a temp file */};struct Key{ int klen; uchar key[1];};struct Field{ int beg1; int beg2; int end1; int end2; long flags; uchar mapto[256]; void (*dokey)(Key*, uchar*, uchar*, Field*);};struct args{ char* ofile; char* tname; Rune tabchar; char cflag; char uflag; char vflag; int nfield; int nfile; Field field[Nfield]; Line** linep; long nline; /* number of lines in this temp file */ long lineno; /* overall ordinal for -s option */ int ntemp; long mline; /* max lines per file */} args;extern int latinmap[];extern Rune* month[12];void buildkey(Line*);void doargs(int, char*[]);void dofield(char*, int*, int*, int, int);void dofile(Biobuf*);void dokey_(Key*, uchar*, uchar*, Field*);void dokey_dfi(Key*, uchar*, uchar*, Field*);void dokey_gn(Key*, uchar*, uchar*, Field*);void dokey_m(Key*, uchar*, uchar*, Field*);void dokey_r(Key*, uchar*, uchar*, Field*);void done(char*);int kcmp(Key*, Key*);void makemapd(Field*);void makemapm(Field*);void mergefiles(int, int, Biobuf*);void mergeout(Biobuf*);void newfield(void);Line* newline(Biobuf*);void nomem(void);void notifyf(void*, char*);void printargs(void);void printout(Biobuf*);void setfield(int, int);uchar* skip(uchar*, int, int, int, int);void sort4(void*, ulong);char* tempfile(int);void tempout(void);void lineout(Biobuf*, Line*);voidmain(int argc, char *argv[]){ int i, f; char *s; Biobuf bbuf; notify(notifyf); /**/ doargs(argc, argv); if(args.vflag) printargs(); for(i=1; i<argc; i++) { s = argv[i]; if(s == 0) continue; if(strcmp(s, "-") == 0) { Binit(&bbuf, 0, OREAD); dofile(&bbuf); Bterm(&bbuf); continue; } f = open(s, OREAD); if(f < 0) { fprint(2, "sort: open %s: %r\n", s); done("open"); } Binit(&bbuf, f, OREAD); dofile(&bbuf); Bterm(&bbuf); close(f); } if(args.nfile == 0) { Binit(&bbuf, 0, OREAD); dofile(&bbuf); Bterm(&bbuf); } if(args.cflag) done(0); if(args.vflag) fprint(2, "=========\n"); f = 1; if(args.ofile) { f = create(args.ofile, OWRITE, 0666); if(f < 0) { fprint(2, "sort: create %s: %r\n", args.ofile); done("create"); } } Binit(&bbuf, f, OWRITE); if(args.ntemp) { tempout(); mergeout(&bbuf); } else { printout(&bbuf); } Bterm(&bbuf); done(0);}voiddofile(Biobuf *b){ Line *l, *ol; int n; if(args.cflag) { ol = newline(b); if(ol == 0) return; for(;;) { l = newline(b); if(l == 0) break; n = kcmp(ol->key, l->key); if(n > 0 || (n == 0 && args.uflag)) { fprint(2, "sort: -c file not in sort\n"); /**/ done("order"); } free(ol->key); free(ol); ol = l; } return; } if(args.linep == 0) { args.linep = malloc(args.mline * sizeof(args.linep)); if(args.linep == 0) nomem(); } for(;;) { l = newline(b); if(l == 0) break; if(args.nline >= args.mline) tempout(); args.linep[args.nline] = l; args.nline++; args.lineno++; }}voidnotifyf(void*, char *s){ if(strcmp(s, "interrupt") == 0) done(0); if(strcmp(s, "hangup") == 0) done(0); if(strcmp(s, "kill") == 0) done(0); if(strncmp(s, "sys: write on closed pipe", 25) == 0) done(0); fprint(2, "sort: note: %s\n", s); abort();}Line*newline(Biobuf *b){ Line *l; char *p; int n, c; p = Brdline(b, '\n'); n = Blinelen(b); if(p == 0) { if(n == 0) return 0; l = 0; for(n=0;;) { if((n & 31) == 0) { l = realloc(l, sizeof(Line) + (n+31)*sizeof(l->line[0])); if(l == 0) nomem(); } c = Bgetc(b); if(c < 0) { fprint(2, "sort: newline added\n"); c = '\n'; } l->line[n++] = c; if(c == '\n') break; } l->llen = n; buildkey(l); return l; } l = malloc(sizeof(Line) + (n-1)*sizeof(l->line[0])); if(l == 0) nomem(); l->llen = n; memmove(l->line, p, n); buildkey(l); return l;}voidlineout(Biobuf *b, Line *l){ int n, m; n = l->llen; m = Bwrite(b, l->line, n); if(n != m) exits("write");}voidtempout(void){ long n; Line **lp, *l; char *tf; int f; Biobuf tb; sort4(args.linep, args.nline); tf = tempfile(args.ntemp); args.ntemp++; f = create(tf, OWRITE, 0666); if(f < 0) { fprint(2, "sort: create %s: %r\n", tf); done("create"); } Binit(&tb, f, OWRITE); lp = args.linep; for(n=args.nline; n>0; n--) { l = *lp++; lineout(&tb, l); free(l->key); free(l); } args.nline = 0; Bterm(&tb); close(f);}voiddone(char *xs){ int i; for(i=0; i<args.ntemp; i++) remove(tempfile(i)); exits(xs);}voidnomem(void){ fprint(2, "sort: out of memory\n"); done("mem");}char*tempfile(int n){ static char file[100]; static uint pid; char *dir; dir = "/tmp"; if(args.tname) dir = args.tname; if(strlen(dir) >= nelem(file)-20) { fprint(2, "temp file directory name is too long: %s\n", dir); done("tdir"); } if(pid == 0) { pid = getpid(); if(pid == 0) { pid = time(0); if(pid == 0) pid = 1; } } sprint(file, "%s/sort.%.4d.%.4d", dir, pid%10000, n); return file;}voidmergeout(Biobuf *b){ int n, i, f; char *tf; Biobuf tb; for(i=0; i<args.ntemp; i+=n) { n = args.ntemp - i; if(n > Nmerge) { tf = tempfile(args.ntemp); args.ntemp++; f = create(tf, OWRITE, 0666); if(f < 0) { fprint(2, "sort: create %s: %r\n", tf); done("create"); } Binit(&tb, f, OWRITE); n = Nmerge; mergefiles(i, n, &tb); Bterm(&tb); close(f); } else mergefiles(i, n, b); }}voidmergefiles(int t, int n, Biobuf *b){ Merge *m, *mp, **mmp; Key *ok; Line *l; char *tf; int i, f, nn; mmp = malloc(n*sizeof(*mmp)); mp = malloc(n*sizeof(*mp)); if(mmp == 0 || mp == 0) nomem(); nn = 0; m = mp; for(i=0; i<n; i++,m++) { tf = tempfile(t+i); f = open(tf, OREAD); if(f < 0) { fprint(2, "sort: reopen %s: %r\n", tf); done("open"); } m->fd = f; Binit(&m->b, f, OREAD); mmp[nn] = m; l = newline(&m->b); if(l == 0) continue; nn++; m->line = l; m->key = l->key; } ok = 0; for(;;) { sort4(mmp, nn); m = *mmp; if(nn == 0) break; for(;;) { l = m->line; if(args.uflag && ok && kcmp(ok, l->key) == 0) { free(l->key); free(l); } else { lineout(b, l); if(ok) free(ok); ok = l->key; free(l); } l = newline(&m->b); if(l == 0) { nn--; mmp[0] = mmp[nn]; break; } m->line = l; m->key = l->key; if(nn > 1 && kcmp(mmp[0]->key, mmp[1]->key) > 0) break; } } if(ok) free(ok); m = mp; for(i=0; i<n; i++,m++) { Bterm(&m->b); close(m->fd); } free(mp); free(mmp);}intkcmp(Key *ka, Key *kb){ int n, m; /* * set n to length of smaller key */ n = ka->klen; m = kb->klen; if(n > m) n = m; return memcmp(ka->key, kb->key, n);}voidprintout(Biobuf *b){ long n; Line **lp, *l; Key *ok; sort4(args.linep, args.nline); lp = args.linep; ok = 0; for(n=args.nline; n>0; n--) { l = *lp++; if(args.uflag && ok && kcmp(ok, l->key) == 0) continue; lineout(b, l); ok = l->key; }}voidsetfield(int n, int c){ Field *f; f = &args.field[n]; switch(c) { default: fprint(2, "sort: unknown option: field.%C\n", c); done("option"); case 'b': /* skip blanks */ f->flags |= Bflag; break; case 'd': /* directory order */ f->flags |= Dflag; break; case 'f': /* fold case */ f->flags |= Fflag; break; case 'g': /* floating point -n case */ f->flags |= Gflag; break; case 'i': /* ignore non-ascii */ f->flags |= Iflag; break; case 'M': /* month */ f->flags |= Mflag; break; case 'n': /* numbers */ f->flags |= Nflag; break; case 'r': /* reverse */ f->flags |= Rflag; break; case 'w': /* ignore white */ f->flags |= Wflag; break; }}voiddofield(char *s, int *n1, int *n2, int off1, int off2){ int c, n; c = *s++; if(c >= '0' && c <= '9') { n = 0; while(c >= '0' && c <= '9') { n = n*10 + (c-'0'); c = *s++; } n -= off1; /* posix committee: rot in hell */ if(n < 0) { fprint(2, "sort: field offset must be positive\n"); done("option"); } *n1 = n; } if(c == '.') { c = *s++; if(c >= '0' && c <= '9') { n = 0; while(c >= '0' && c <= '9') { n = n*10 + (c-'0'); c = *s++; } n -= off2; if(n < 0) { fprint(2, "sort: character offset must be positive\n"); done("option"); } *n2 = n; } } while(c != 0) { setfield(args.nfield, c); c = *s++; }}voidprintargs(void){ int i, n; Field *f; char *prefix; fprint(2, "sort"); for(i=0; i<=args.nfield; i++) { f = &args.field[i]; prefix = " -"; if(i) { n = f->beg1; if(n >= 0) fprint(2, " +%d", n); else fprint(2, " +*"); n = f->beg2; if(n >= 0) fprint(2, ".%d", n); else fprint(2, ".*"); if(f->flags & B1flag) fprint(2, "b"); n = f->end1; if(n >= 0) fprint(2, " -%d", n); else fprint(2, " -*"); n = f->end2; if(n >= 0) fprint(2, ".%d", n); else fprint(2, ".*"); prefix = ""; } if(f->flags & Bflag) fprint(2, "%sb", prefix); if(f->flags & Dflag) fprint(2, "%sd", prefix); if(f->flags & Fflag) fprint(2, "%sf", prefix); if(f->flags & Gflag) fprint(2, "%sg", prefix); if(f->flags & Iflag) fprint(2, "%si", prefix); if(f->flags & Mflag) fprint(2, "%sM", prefix); if(f->flags & Nflag) fprint(2, "%sn", prefix); if(f->flags & Rflag) fprint(2, "%sr", prefix); if(f->flags & Wflag) fprint(2, "%sw", prefix); } if(args.cflag) fprint(2, " -c"); if(args.uflag) fprint(2, " -u"); if(args.ofile) fprint(2, " -o %s", args.ofile); if(args.mline != Nline) fprint(2, " -l %ld", args.mline); fprint(2, "\n");}voidnewfield(void){ int n; Field *f; n = args.nfield + 1; if(n >= Nfield) { fprint(2, "sort: too many fields specified\n"); done("option"); } args.nfield = n; f = &args.field[n]; f->beg1 = -1; f->beg2 = -1; f->end1 = -1; f->end2 = -1;}voiddoargs(int argc, char *argv[]){ int i, c, hadplus; char *s, *p, *q; Field *f; hadplus = 0; args.mline = Nline; for(i=1; i<argc; i++) { s = argv[i]; c = *s++; if(c == '-') { c = *s; if(c == 0) /* forced end of arg marker */ break; argv[i] = 0; /* clobber args processed */ if(c == '.' || (c >= '0' && c <= '9')) { if(!hadplus) newfield(); f = &args.field[args.nfield]; dofield(s, &f->end1, &f->end2, 0, 0); hadplus = 0; continue; } while(c = *s++) switch(c) { case '-': /* end of options */ i = argc; continue; case 'T': /* temp directory */ if(*s == 0) { i++; if(i < argc) { args.tname = argv[i]; argv[i] = 0; } } else args.tname = s; s = strchr(s, 0); break; case 'o': /* output file */ if(*s == 0) { i++; if(i < argc) { args.ofile = argv[i]; argv[i] = 0; } } else args.ofile = s; s = strchr(s, 0); break; case 'k': /* posix key (what were they thinking?) */ p = 0; if(*s == 0) { i++; if(i < argc) { p = argv[i]; argv[i] = 0; } } else p = s; s = strchr(s, 0); if(p == 0) break; newfield(); q = strchr(p, ','); if(q) *q++ = 0; f = &args.field[args.nfield]; dofield(p, &f->beg1, &f->beg2, 1, 1); if(f->flags & Bflag) { f->flags |= B1flag; f->flags &= ~Bflag; } if(q) { dofield(q, &f->end1, &f->end2, 1, 0); if(f->end2 <= 0) f->end1++; } hadplus = 0; break; case 't': /* tab character */ if(*s == 0) { i++; if(i < argc) { chartorune(&args.tabchar, argv[i]); argv[i] = 0; } } else s += chartorune(&args.tabchar, s); if(args.tabchar == '\n') { fprint(2, "aw come on, rob\n"); done("rob"); } break; case 'c': /* check order */ args.cflag = 1; break; case 'u': /* unique */ args.uflag = 1; break; case 'v': /* debugging noise */ args.vflag = 1; break; case 'l': if(*s == 0) { i++; if(i < argc) { args.mline = atol(argv[i]); argv[i] = 0; } } else args.mline = atol(s); s = strchr(s, 0); break; case 'M': /* month */ case 'b': /* skip blanks */ case 'd': /* directory order */ case 'f': /* fold case */ case 'g': /* floating numbers */ case 'i': /* ignore non-ascii */ case 'n': /* numbers */ case 'r': /* reverse */ case 'w': /* ignore white */ if(args.nfield > 0) fprint(2, "sort: global field set after -k\n"); setfield(0, c); break; case 'm': /* option m silently ignored but required by posix */ break; default: fprint(2, "sort: unknown option: -%C\n", c); done("option"); } continue; } if(c == '+') { argv[i] = 0; /* clobber args processed */ c = *s; if(c == '.' || (c >= '0' && c <= '9')) { newfield(); f = &args.field[args.nfield]; dofield(s, &f->beg1, &f->beg2, 0, 0); if(f->flags & Bflag) { f->flags |= B1flag; f->flags &= ~Bflag; } hadplus = 1; continue; } fprint(2, "sort: unknown option: +%C\n", c); done("option"); } args.nfile++; } for(i=0; i<=args.nfield; i++) { f = &args.field[i]; /* * global options apply to fields that * specify no options */ if(f->flags == 0) { f->flags = args.field[0].flags; if(args.field[0].flags & Bflag) f->flags |= B1flag; } /* * build buildkey specification */ switch(f->flags & ~(Bflag|B1flag)) { default: fprint(2, "sort: illegal combination of flags: %lx\n", f->flags); done("option"); case 0: f->dokey = dokey_; break; case Rflag: f->dokey = dokey_r; break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -