📄 sort.c
字号:
{ if (temp_dir_count == temp_dir_alloc) { temp_dir_alloc = temp_dir_alloc ? temp_dir_alloc * 2 : 16; temp_dirs = xrealloc (temp_dirs, sizeof (temp_dirs) * temp_dir_alloc); } temp_dirs[temp_dir_count++] = dir;}/* Search through the list of temporary files for NAME; remove it if it is found on the list. */static voidzaptemp (const char *name){ struct tempnode *volatile *pnode; struct tempnode *node; for (pnode = &temphead; (node = *pnode); pnode = &node->next) if (node->name == name) { unlink (name); *pnode = node->next; free ((char *) node); break; }}#if HAVE_NL_LANGINFOstatic intstruct_month_cmp (const void *m1, const void *m2){ return strcmp (((const struct month *) m1)->name, ((const struct month *) m2)->name);}#endif/* Initialize the character class tables. */static voidinittables (void){ int i; for (i = 0; i < UCHAR_LIM; ++i) { if (ISBLANK (i)) blanks[i] = 1; if (!ISPRINT (i)) nonprinting[i] = 1; if (!ISALNUM (i) && !ISBLANK (i)) nondictionary[i] = 1; if (ISLOWER (i)) fold_toupper[i] = toupper (i); else fold_toupper[i] = i; }#if HAVE_NL_LANGINFO /* If we're not in the "C" locale, read different names for months. */ if (hard_LC_TIME) { for (i = 0; i < MONTHS_PER_YEAR; i++) { char *s; size_t s_len; size_t j; char *name; s = (char *) nl_langinfo (ABMON_1 + i); s_len = strlen (s); monthtab[i].name = name = (char *) xmalloc (s_len + 1); monthtab[i].val = i + 1; for (j = 0; j < s_len; j++) name[j] = fold_toupper[UCHAR (s[j])]; name[j] = '\0'; } qsort ((void *) monthtab, MONTHS_PER_YEAR, sizeof (struct month), struct_month_cmp); }#endif}/* Specify the amount of main memory to use when sorting. */static voidspecify_sort_size (char const *s){ uintmax_t n; char *suffix; enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ"); /* The default unit is KiB. */ if (e == LONGINT_OK && ISDIGIT (suffix[-1])) { if (n <= UINTMAX_MAX / 1024) n *= 1024; else e = LONGINT_OVERFLOW; } /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */ if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1]) switch (suffix[0]) { case 'b': e = LONGINT_OK; break; case '%': { double mem = physmem_total () * n / 100; /* Use "<", not "<=", to avoid problems with rounding. */ if (mem < UINTMAX_MAX) { n = mem; e = LONGINT_OK; } else e = LONGINT_OVERFLOW; } break; } if (e == LONGINT_OK) { sort_size = n; if (sort_size == n) { sort_size = MAX (sort_size, MIN_SORT_SIZE); return; } e = LONGINT_OVERFLOW; } STRTOL_FATAL_ERROR (s, _("sort size"), e);}/* Return the default sort size. */static size_tdefault_sort_size (void){ /* Let MEM be available memory or 1/8 of total memory, whichever is greater. */ double avail = physmem_available (); double total = physmem_total (); double mem = MAX (avail, total / 8); struct rlimit rlimit; /* Let SIZE be MEM, but no more than the maximum object size or system resource limits. Avoid the MIN macro here, as it is not quite right when only one argument is floating point. Don't bother to check for values like RLIM_INFINITY since in practice they are not much less than SIZE_MAX. */ size_t size = SIZE_MAX; if (mem < size) size = mem; if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size) size = rlimit.rlim_cur;#ifdef RLIMIT_AS if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size) size = rlimit.rlim_cur;#endif /* Leave a large safety margin for the above limits, as failure can occur when they are exceeded. */ size /= 2;#ifdef RLIMIT_RSS /* Leave a 1/16 margin for RSS to leave room for code, stack, etc. Exceeding RSS is not fatal, but can be quite slow. */ if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size) size = rlimit.rlim_cur / 16 * 15;#endif /* Use no less than the minimum. */ return MAX (size, MIN_SORT_SIZE);}/* Return the sort buffer size to use with the input files identified by FPS and FILES, which are alternate paths to the same files. NFILES gives the number of input files; NFPS may be less. Assume that each input line requires LINE_BYTES extra bytes' worth of line information. Return at most SIZE_BOUND. */static size_tsort_buffer_size (FILE *const *fps, int nfps, char *const *files, int nfiles, size_t line_bytes, size_t size_bound){ /* In the worst case, each input byte is a newline. */ size_t worst_case_per_input_byte = line_bytes + 1; /* Keep enough room for one extra input line and an extra byte. This extra room might be needed when preparing to read EOF. */ size_t size = worst_case_per_input_byte + 1; int i; for (i = 0; i < nfiles; i++) { struct stat st; off_t file_size; size_t worst_case; if ((i < nfps ? fstat (fileno (fps[i]), &st) : strcmp (files[i], "-") == 0 ? fstat (STDIN_FILENO, &st) : stat (files[i], &st)) != 0) die (_("stat failed"), files[i]); file_size = S_ISREG (st.st_mode) ? st.st_size : INPUT_FILE_SIZE_GUESS; /* Add the amount of memory needed to represent the worst case where the input consists entirely of newlines followed by a single non-newline. Check for overflow. */ worst_case = file_size * worst_case_per_input_byte + 1; if (file_size != worst_case / worst_case_per_input_byte || size_bound - size <= worst_case) return size_bound; size += worst_case; } return size;}/* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES must be at least sizeof (struct line). Allocate ALLOC bytes initially. */static voidinitbuf (struct buffer *buf, size_t line_bytes, size_t alloc){ /* Ensure that the line array is properly aligned. If the desired size cannot be allocated, repeatedly halve it until allocation succeeds. The smaller allocation may hurt overall performance, but that's better than failing. */ for (;;) { alloc += sizeof (struct line) - alloc % sizeof (struct line); buf->buf = malloc (alloc); if (buf->buf) break; alloc /= 2; if (alloc <= line_bytes + 1) xalloc_die (); } buf->line_bytes = line_bytes; buf->alloc = alloc; buf->used = buf->left = buf->nlines = 0; buf->eof = 0;}/* Return one past the limit of the line array. */static inline struct line *buffer_linelim (struct buffer const *buf){ return (struct line *) (buf->buf + buf->alloc);}/* Return a pointer to the first character of the field specified by KEY in LINE. */static char *begfield (const struct line *line, const struct keyfield *key){ register char *ptr = line->text, *lim = ptr + line->length - 1; register size_t sword = key->sword; register size_t schar = key->schar; register size_t remaining_bytes; if (tab) while (ptr < lim && sword--) { while (ptr < lim && *ptr != tab) ++ptr; if (ptr < lim) ++ptr; } else while (ptr < lim && sword--) { while (ptr < lim && blanks[UCHAR (*ptr)]) ++ptr; while (ptr < lim && !blanks[UCHAR (*ptr)]) ++ptr; } if (key->skipsblanks) while (ptr < lim && blanks[UCHAR (*ptr)]) ++ptr; /* Advance PTR by SCHAR (if possible), but no further than LIM. */ remaining_bytes = lim - ptr; if (schar < remaining_bytes) ptr += schar; else ptr = lim; return ptr;}/* Return the limit of (a pointer to the first character after) the field in LINE specified by KEY. */static char *limfield (const struct line *line, const struct keyfield *key){ register char *ptr = line->text, *lim = ptr + line->length - 1; register size_t eword = key->eword, echar = key->echar; register size_t remaining_bytes; /* Note: from the POSIX spec: The leading field separator itself is included in a field when -t is not used. FIXME: move this comment up... */ /* Move PTR past EWORD fields or to one past the last byte on LINE, whichever comes first. If there are more than EWORD fields, leave PTR pointing at the beginning of the field having zero-based index, EWORD. If a delimiter character was specified (via -t), then that `beginning' is the first character following the delimiting TAB. Otherwise, leave PTR pointing at the first `blank' character after the preceding field. */ if (tab) while (ptr < lim && eword--) { while (ptr < lim && *ptr != tab) ++ptr; if (ptr < lim && (eword | echar)) ++ptr; } else while (ptr < lim && eword--) { while (ptr < lim && blanks[UCHAR (*ptr)]) ++ptr; while (ptr < lim && !blanks[UCHAR (*ptr)]) ++ptr; }#ifdef POSIX_UNSPECIFIED /* The following block of code makes GNU sort incompatible with standard Unix sort, so it's ifdef'd out for now. The POSIX spec isn't clear on how to interpret this. FIXME: request clarification. From: kwzh@gnu.ai.mit.edu (Karl Heuer) Date: Thu, 30 May 96 12:20:41 -0400 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.] [...]I believe I've found another bug in `sort'. $ cat /tmp/sort.in a b c 2 d pq rs 1 t $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in a b c 2 d pq rs 1 t $ /bin/sort -k1.7,1.7 </tmp/sort.in pq rs 1 t a b c 2 d Unix sort produced the answer I expected: sort on the single character in column 7. GNU sort produced different results, because it disagrees on the interpretation of the key-end spec "M.N". Unix sort reads this as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean "skip M-1 fields, then either N-1 characters or the rest of the current field, whichever comes first". This extra clause applies only to key-ends, not key-starts. */ /* Make LIM point to the end of (one byte past) the current field. */ if (tab) { char *newlim; newlim = memchr (ptr, tab, lim - ptr); if (newlim) lim = newlim; } else { char *newlim; newlim = ptr; while (newlim < lim && blanks[UCHAR (*newlim)]) ++newlim; while (newlim < lim && !blanks[UCHAR (*newlim)]) ++newlim; lim = newlim; }#endif /* If we're skipping leading blanks, don't start counting characters until after skipping past any leading blanks. */ if (key->skipsblanks) while (ptr < lim && blanks[UCHAR (*ptr)]) ++ptr; /* Advance PTR by ECHAR (if possible), but no further than LIM. */ remaining_bytes = lim - ptr; if (echar < remaining_bytes) ptr += echar; else ptr = lim; return ptr;}/* FIXME */static voidtrim_trailing_blanks (const char *a_start, char **a_end){ while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))]) --(*a_end);}/* Fill BUF reading from FP, moving buf->left bytes from the end of buf->buf to the beginning first. If EOF is reached and the file wasn't terminated by a newline, supply one. Set up BUF's line table too. FILE is the name of the file corresponding to FP. Return nonzero if some input was read. */static intfillbuf (struct buffer *buf, register FILE *fp, char const *file){ struct keyfield const *key = keylist; char eol = eolchar; size_t line_bytes = buf->line_bytes; size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE; if (buf->eof) return 0; if (buf->used != buf->left) { memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left); buf->used = buf->left; buf->nlines = 0; } for (;;) { char *ptr = buf->buf + buf->used; struct line *linelim = buffer_linelim (buf); struct line *line = linelim - buf->nlines; size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr; char *line_start = buf->nlines ? line->text + line->length : buf->buf; while (line_bytes + 1 < avail) { /* Read as many bytes as possible, but do not read so many bytes that there might not be enough room for the corresponding line array. The worst case is when the rest of the input file consists entirely of newlines, except that the last byte is not a newline. */ size_t readsize = (avail - 1) / (line_bytes + 1); size_t bytes_read = fread (ptr, 1, readsize, fp); char *ptrlim = ptr + bytes_read; char *p; avail -= bytes_read; if (bytes_read != readsize) { if (ferror (fp)) die (_("read failed"), file); if (feof (fp)) { buf->eof = 1; if (buf->buf == ptrlim) return 0; if (ptrlim[-1] != eol) *ptrlim++ = eol; } } /* Find and record each line in the just-read input. */ while ((p = memchr (ptr, eol, ptrlim - ptr))) { ptr = p + 1; line--; line->text = line_start; line->length = ptr - line_start; mergesize = MAX (mergesize, line->length); avail -= line_bytes; if (key) { /* Precompute the position of the first key for efficiency. */ line->keylim = (key->eword == (size_t) -1 ? p : limfield (line, key));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -