📄 sort5.c
字号:
{ /* * field specification */ if (++nfields >= NF) diag(EXIT, catgets(_m_catd, NL_SETN, 4, "too many keys"), ""); fields[nfields] = proto; field(++*argv, 0); } else { /* * file name to sort */ eargv[eargc++] = *argv; } } /* * propagate the default field specification into all field * variables where no fieldspecific bifndrM was given */ q = &fields[0]; for (a = 1; a <= nfields; a++) { p = &fields[a]; if (p->code != proto.code) continue; if (p->ignore != proto.ignore) continue; if (p->fcmp != proto.fcmp) continue; if (p->rflg != proto.rflg) continue; if (p->bflg[0] != proto.bflg[0]) continue; if (p->bflg[1] != proto.bflg[1]) continue; p->code = q->code; p->ignore = q->ignore; p->fcmp = q->fcmp; p->rflg = q->rflg; p->bflg[0] = p->bflg[1] = q->bflg[0]; } /* * if no args given sort stdin */ if (eargc == 0) eargv[eargc++] = "-"; /* * consistency check for check option */ if (cflg && eargc > 1) diag(EXIT, catgets(_m_catd, NL_SETN, 5, "can check only 1 file"), ""); /* * create a safe place to put our output */ safeoutfil(); /* * find a directory for temporary files */ gettmpdir(); /* * set up signal handling */ if (signal(SIGHUP, SIG_IGN) != SIG_IGN) (void) signal(SIGHUP, term); if (signal(SIGINT, SIG_IGN) != SIG_IGN) (void) signal(SIGINT, term); (void) signal(SIGPIPE, term); if (signal(SIGTERM, SIG_IGN) != SIG_IGN) (void) signal(SIGTERM, term); nfiles = eargc; /* * only check sort */ if (cflg) checksort(); /* * sort and/or merge */ if (!mflg) { sort(); doclose(stdin, (char *)0); /* ATT: may close stdin twice */ a = eargc; } else { if (maxrec == 0) maxrec = 512; a = 0; } wasfirst = notfirst = 0; nway = safemem(maxrec); if ((i = nfiles - a) > nway) { /* Do leftovers early */ if ((i %= (nway - 1)) == 0) i = nway - 1; if (i != 1) { os = output((char *)0); merge(a, a+i, 0); a += i; } } for (; a + nway < nfiles || unsafeout && a < eargc; a = i) { i = a + nway; if (i >= nfiles) i = nfiles; os = output((char *)0); merge(a, i, 0); } if (a != nfiles) { os = output(outfil); merge(a, nfiles, 1); } /* * That's all folks */ term(0); /*NOTREACHED*/}/* * checksort -- check whether file is sorted ok */checksort(){ char *lines[2]; /* two lines to compare */ FILE *fp; /* file beeing read */ register int i; /* used to alternate line indices */ register int j; /* used to alternate line indices */ register int r; /* used to alternate line indices */ /* * open the input file to check */ fp = input(0, (char *)0); if (maxrec == 0) maxrec = 512; /* * get two buffers that will be used in turns */ lines[0] = malloc(maxrec); lines[1] = malloc(maxrec); /* * read first line */ if (getline(fp, lines[0], maxrec, 1, 0) == 0) { doclose(fp, setfil(0)); exit(0); } /* * read lines into alternating buffers */ for (i = 0, j = 1; getline(fp, lines[j], maxrec, 1, 0); r=i, i=j, j=r) { /* * check whether they are sorted ok */ if ((r = (*compare)(lines[i], lines[j])) < 0) disorder(catgets(_m_catd, NL_SETN, 7, "disorder: "), lines[j]); if (r == 0 && uflg) disorder(catgets(_m_catd, NL_SETN, 8, "non-unique: "), lines[j]); } /* * free used storage */ free(lines[0]); free(lines[1]); doclose(fp, setfil(0)); exit(0);}/* * sort -- read the input files and sort them a coreload at a time into * temporary files that will be merged during the second phase * * Records are read in from the front of the buffer area. * Pointers to the records are allocated from the back of the buffer. * If a partially read record exhausts the buffer, it is saved and * then copied to the start of the buffer for processing with the * next coreload. * * +------------+-------------+--------+ * | strings | | ptr | * +------------+-------------+--------+ * ^ -> ^ ^ <- ^ * lspace | | ep * cp lp * * CAUTION: * -------- * In the code segment marked below malloc may not be used, and this * includes all functions called! Functions that use malloc for * temporary storage and return the memory before they return are ok. */sort(){ register char *cp; register char **lp; /* pointer to pointer to data read */ FILE *iop; /* pointer to file beeing read */ char *keep = (char *)0; /* pointer to start of buffer overflow area */ char *ekeep = (char *)0;/* pointer to end of buffer overflow area */ char *lspace; /* low end of arena */ unsigned alloc; /* memory known to be available */ char **ep; /* pointer to current end of buffer */ int n; int done = 0; /* set when all input files have been read */ int i = 0; /* index of input file */ int first = 1; /* reading first file and memory still avail */ char inbuf[BUFSIZ]; /* buffer for input file to not destroy arena */ /* * get memory */ if ((alloc = getmem(tryfor, (unsigned)0, &lspace)) == 0) diag(EXIT, catgets(_m_catd, NL_SETN, 9, "allocation error before sort"), ""); /* !! START OF CODE SEGMENT WHERE MALLOC MAY NOT BE USED !! */ ep = (char **)(lspace + alloc); iop = input(i++, inbuf); do { lp = ep - 1; cp = lspace; *lp-- = cp; /* * move record from previous coreload */ if (keep != (char *)0) { recstart = cp; for(; keep < ekeep; *cp++ = *keep++); } /* * test for negative n below only necessary for small machines */#ifdef SMALL while ((n = (char *)(lp - 1) - cp) > 1 || n < 0) { if (n < 0) n = MAXINT;#else while ((n = (char *)(lp - 1) - cp) > 1) {#endif if ((n = getline(iop, cp, n, 0, 0)) == 0) { /* * EOF on input file */ doclose(iop, setfil(i - 1)); if (keep != (char *)0) ; else if (i < eargc) { /* * open the next file */ iop = input(i++, inbuf); continue; } else { /* * have seen all files */ done++; break; } } /* * advance buffer pointer */ cp += n - 1; /* * check whether complete line read */ if (getstate == OKAY) { cp += 2; if ((n = cp - *(lp + 1)) > maxrec) maxrec = n; /* * if tagging force alignment on short boundary */ if (tag && ((int)cp & 0x1)) cp++; *lp-- = cp; keep = (char *)0; } else { /* * the buffer is full */ keep = *(lp + 1); ekeep = ++cp; } if (first && (n = (char *)lp - cp) <= (2 + sizeof(lp)) && n >= 0) { /* * full buffer, try to get more memory */ tryfor = getmem(tryfor, alloc, &lspace); if (tryfor == 0) /* out of mem */ first = 0; else { register char **lmp; register char **mp; /* * calculate where pointers are now */ lspace += memmoved; cp += memmoved; lp += memmoved/sizeof(char **); ep += memmoved/sizeof(char **); keep += memmoved; ekeep += memmoved; /* * move pointers from end of old buffer * to end of new buffer */ lmp = ep + (tryfor/sizeof(char **) - 1); for (mp = ep - 1; mp > lp;) *lmp-- = *mp-- + memmoved; ep += tryfor/sizeof(char **); lp += tryfor/sizeof(char **); alloc += tryfor; } } } if (keep != (char *)0 && *(lp + 1) == lspace) diag(TERM, catgets(_m_catd, NL_SETN, 10, "fatal: record too large"),""); first = 0; lp += 2; /* * open output file: if not seen all input yet divert to * another temporary file else write to final destination. */ os = output((done == 0 || nfiles != eargc) ? (char *)0 : outfil); /* * sort coreload and write it to file os */ msort(lp, ep, done && (nfiles == eargc)); doclose(os, catgets(_m_catd, NL_SETN, 11,"sorting")); } while (done == 0); /* !! END OF CODE SEGMENT WHERE MALLOC MAY NOT BE USED !! */ /* * free meory used by sort */ free(lspace);}/* * msort -- sort a coreload of lines */msort(a, b, done)char **a; /* address of pointer to first line in core */char **b; /* address of pointer to last line in core */int done; /* true if we are doing final output */{ register struct btree **tp; register int i, j, n; char *save; i = (b - a); if (i < 1) return; else if (i == 1) { /* * one line sort - trivial */ (void)putline(*a, os, done); return; } else if (i >= TREEZ) n = TREEZ; /* number of blocks of records */ else n = i; /* * break into n sorted subgroups of approximately equal size */ tp = &(treep[0]); j = 0; do { (*tp++)->rn = j; b = a + (blkcnt[j] = i / n); qksort(a, b); blkcur[j] = a = b; i -= blkcnt[j++]; } while (--n > 0); n = j; /* * make a sorted binary tree using the first record in each group */ for (i = 0; i < n;) { (*--tp)->rp = *(--blkcur[--j]); insert(tp, ++i); } wasfirst = notfirst = 0; bonus = cmpsave(n); j = uflg; tp = &(treep[0]); while (n > 0) { (void)putline((*tp)->rp, os, done); if (j) save = (*tp)->rp; /* * Get another record and insert. Bypass repeats if uflg */ do { i = (*tp)->rn; if (j) while((blkcnt[i] > 1) && (**(blkcur[i]-1) == '\0')) { --blkcnt[i]; --blkcur[i]; } if (--blkcnt[i] > 0) { (*tp)->rp = *(--blkcur[i]); insert(tp, n); } else { if (--n <= 0) break; bonus = cmpsave(n); tp++; } } while (j && (*compare)((*tp)->rp, save) == 0); }}/* * insert -- insert an element into sorted tree * * Insert the element at tp[0] into its proper place in the array of size n * Pretty much Algorith B from 6.2.1 of Knuth, Sorting and Searching * Special case for data that appears to be in correct order */insert(tp, n)struct btree **tp;int n;{ register struct btree **lop; register struct btree **hip; register struct btree **midp; register int c; struct btree *hold; midp = lop = tp; hip = lop++ + (n - 1); if ((wasfirst > notfirst) && (n > 2) && ((*compare)((*tp)->rp, (*lop)->rp) >= 0)) { wasfirst += bonus; return; } while ((c = hip - lop) >= 0) { /* * leave midp at the one tp is in front of */ midp = lop + c / 2; if ((c = (*compare)((*tp)->rp, (*midp)->rp)) == 0) break; /* match */ if (c < 0) lop = ++midp; /* c < 0 => tp > midp */ else hip = midp - 1; /* c > 0 => tp < midp */ } c = midp - tp; if (--c > 0) { /* * number of moves to get tp just before midp */ hip = tp; lop = hip++; hold = *lop; do *lop++ = *hip++; while (--c > 0); *lop = hold; notfirst++; } else wasfirst += bonus;}/* * merge -- merge two already sorted files */merge(a, b, final)int a;int b;int final; /* TRUE if writing final output */{ FILE *tfile[N]; register int nf; /* number of merge files */ register struct btree **tp; register int i; register int j; char *save = malloc(maxrec); /* buffer for one saved line */ char *buffer = malloc(nway * maxrec); /* buffer for files to merge */ char *fbuffer = malloc(nway * BUFSIZ); /* buffer or input */ if (buffer == (char *)0 || save == (char *)0 || fbuffer == (char *)0) diag(TERM, catgets(_m_catd, NL_SETN, 12, "BUG: reallocation error in merge"), ""); tp = &(treep[0]); for (nf = 0, i = a; i < b; i++) { tfile[nf] = input(i, &fbuffer[nf * BUFSIZ]); (*tp)->rp = &buffer[nf * maxrec]; (*tp)->rn = nf; if (getline(tfile[nf], (*tp)->rp, maxrec, 1, tag)) { nf++; tp++; } else doclose(tfile[nf], setfil(i)); } /* * make a sorted btree from the first record of each file */ for (--tp, i = 1; i++ < nf;) insert(--tp, i); bonus = cmpsave(nf); tp = &(treep[0]); j = uflg; while (nf > 0) { (void)putline((*tp)->rp, os, final); if (j) strcpy(save, (*tp)->rp); /* * Get another record and insert. Bypass repeats if uflg */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -