📄 sort.c
字号:
/* sort - sort lines of text (with all kinds of options). Copyright (C) 88, 1991-1999 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. Written December 1988 by Mike Haertel. The author may be reached (Email) at the address mike@gnu.ai.mit.edu, or (US mail) as Mike Haertel c/o Free Software Foundation. 豶n E. Hansen added NLS support in 1997. */#include <config.h>#include <sys/types.h>#include <signal.h>#include <stdio.h>#include <assert.h>#include "system.h"#include "long-options.h"#include "error.h"#include "hard-locale.h"#include "memcoll.h"#include "xalloc.h"/* The official name of this program (e.g., no `g' prefix). */#define PROGRAM_NAME "sort"#define AUTHORS "Mike Haertel"/* * The following two definition was added by Shigio YAMAGUCHI * to include this command into GNU GLOBAL package. */#undef GNU_PACKAGE#define GNU_PACKAGE "GNU Textutils"#undef VERSION#define VERSION "2.0"#if defined ENABLE_NLS && HAVE_LANGINFO_H# include <langinfo.h>#endif#if HAVE_PATHCONF && defined _PC_NAME_MAX# define NAME_MAX_IN_DIR(Dir) pathconf (Dir, _PC_NAME_MAX)#else# define NAME_MAX_IN_DIR(Dir) 255#endif#ifndef STDC_HEADERSdouble strtod ();#endifchar *xstrdup ();/* Undefine, to avoid warning about redefinition on some systems. */#undef min#define min(a, b) ((a) < (b) ? (a) : (b))#undef max#define max(a, b) ((a) > (b) ? (a) : (b))#define UCHAR_LIM (UCHAR_MAX + 1)#define UCHAR(c) ((unsigned char) (c))#ifndef DEFAULT_TMPDIR# define DEFAULT_TMPDIR "/tmp"#endif/* Use this as exit status in case of error, not EXIT_FAILURE. This is necessary because EXIT_FAILURE is usually 1 and POSIX requires that sort exit with status 1 IFF invoked with -c and the input is not properly sorted. Any other irregular exit must exit with a status code greater than 1. */#define SORT_FAILURE 2#define C_DECIMAL_POINT '.'#define NEGATION_SIGN '-'#define NUMERIC_ZERO '0'#ifdef ENABLE_NLSstatic char decimal_point;static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator *//* Nonzero if the corresponding locales are hard. */static int hard_LC_COLLATE;static int hard_LC_CTYPE;# if HAVE_NL_LANGINFOstatic int hard_LC_TIME;# endif# define IS_THOUSANDS_SEP(x) ((x) == th_sep)#else# define decimal_point C_DECIMAL_POINT# define IS_THOUSANDS_SEP(x) 0#endif/* The kind of blanks for '-b' to skip in various options. */enum blanktype { bl_start, bl_end, bl_both };/* The character marking end of line. Default to \n. */int eolchar = '\n';/* Lines are held in core as counted strings. */struct line{ char *text; /* Text of the line. */ int length; /* Length including final newline. */ char *keybeg; /* Start of first key. */ char *keylim; /* Limit of first key. */};/* Arrays of lines. */struct lines{ struct line *lines; /* Dynamically allocated array of lines. */ int used; /* Number of slots used. */ int alloc; /* Number of slots allocated. */ int limit; /* Max number of slots to allocate. */};/* Input buffers. */struct buffer{ char *buf; /* Dynamically allocated buffer. */ int used; /* Number of bytes used. */ int alloc; /* Number of bytes allocated. */ int left; /* Number of bytes left after line parsing. */};struct keyfield{ int sword; /* Zero-origin 'word' to start at. */ int schar; /* Additional characters to skip. */ int skipsblanks; /* Skip leading white space at start. */ int eword; /* Zero-origin first word after field. */ int echar; /* Additional characters in field. */ int skipeblanks; /* Skip trailing white space at finish. */ int *ignore; /* Boolean array of characters to ignore. */ char *translate; /* Translation applied to characters. */ int numeric; /* Flag for numeric comparison. Handle strings of digits with optional decimal point, but no exponential notation. */ int general_numeric; /* Flag for general, numeric comparison. Handle numbers in exponential notation. */ int month; /* Flag for comparison by month name. */ int reverse; /* Reverse the sense of comparison. */ struct keyfield *next; /* Next keyfield to try. */};struct month{ char *name; int val;};/* The name this program was run with. */char *program_name;/* Table of white space. */static int blanks[UCHAR_LIM];/* Table of non-printing characters. */static int nonprinting[UCHAR_LIM];/* Table of non-dictionary characters (not letters, digits, or blanks). */static int nondictionary[UCHAR_LIM];/* Translation table folding lower case to upper. FIXME: This doesn't work with multibyte character sets. */static char fold_toupper[UCHAR_LIM];#define MONTHS_PER_YEAR 12#if defined ENABLE_NLS && HAVE_NL_LANGINFO# define MONTHTAB_CONST /* empty */#else# define MONTHTAB_CONST const#endif/* Table mapping month names to integers. Alphabetic order allows binary search. */static MONTHTAB_CONST struct month monthtab[] ={ {"APR", 4}, {"AUG", 8}, {"DEC", 12}, {"FEB", 2}, {"JAN", 1}, {"JUL", 7}, {"JUN", 6}, {"MAR", 3}, {"MAY", 5}, {"NOV", 11}, {"OCT", 10}, {"SEP", 9}};/* During the merge phase, the number of files to merge at once. */#define NMERGE 16/* Initial buffer size for in core sorting. Will not grow unless a line longer than this is seen. */static int sortalloc = 512 * 1024 + 1;/* Initial buffer size for in core merge buffers. Bear in mind that up to NMERGE * mergealloc bytes may be allocated for merge buffers. */static int mergealloc = 16 * 1024 + 1;/* Guess of average line length. */static int linelength = 30;/* Maximum number of elements for the array(s) of struct line's, in bytes. */#define LINEALLOC (256 * 1024)/* Directory in which any temporary files are to be created. */static char *temp_dir;/* Flag to reverse the order of all comparisons. */static int reverse;/* Flag for stable sort. This turns off the last ditch bytewise comparison of lines, and instead leaves lines in the same order they were read if all keys compare equal. */static int stable;/* Tab character separating fields. If NUL, then fields are separated by the empty string between a non-whitespace character and a whitespace character. */static char tab;/* Flag to remove consecutive duplicate lines from the output. Only the last of a sequence of equal lines will be output. */static int unique;/* Nonzero if any of the input files are the standard input. */static int have_read_stdin;/* Lists of key field comparisons to be tried. */static struct keyfield keyhead;voidusage (int status){ if (status != 0) fprintf (stderr, _("Try `%s --help' for more information.\n"), program_name); else { printf (_("\Usage: %s [OPTION]... [FILE]...\n\"), program_name); printf (_("\Write sorted concatenation of all FILE(s) to standard output.\n\\n\ +POS1 [-POS2] start a key at POS1, end it *before* POS2 (obsolescent)\n\ field numbers and character offsets are numbered\n\ starting with zero (contrast with the -k option)\n\ -b ignore leading blanks in sort fields or keys\n\ -c check if given files already sorted, do not sort\n\ -d consider only [a-zA-Z0-9 ] characters in keys\n\ -f fold lower case to upper case characters in keys\n\ -g compare according to general numerical value, imply -b\n\ -i consider only [\\040-\\0176] characters in keys\n\ -k POS1[,POS2] start a key at POS1, end it *at* POS2\n\ field numbers and character offsets are numbered\n\ starting with one (contrast with zero-based +POS form)\n\ -m merge already sorted files, do not sort\n\ -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\ -n compare according to string numerical value, imply -b\n\ -o FILE write result on FILE instead of standard output\n\ -r reverse the result of comparisons\n\ -s stabilize sort by disabling last resort comparison\n\ -t SEP use SEParator instead of non- to whitespace transition\n\ -T DIRECTORY use DIRECTORY for temporary files, not $TMPDIR or %s\n\ -u with -c, check for strict ordering;\n\ with -m, only output the first of an equal sequence\n\ -z end lines with 0 byte, not newline, for find -print0\n\ --help display this help and exit\n\ --version output version information and exit\n\\n\"), DEFAULT_TMPDIR); printf (_("\POS is F[.C][OPTS], where F is the field number and C the character position\n\in the field, both counted from one with -k, from zero with the obsolescent\n\form. OPTS is made up of one or more of Mbdfinr; this effectively disables\n\global -Mbdfinr settings for that key. If no key is given, use the entire\n\line as the key. With no FILE, or when FILE is -, read standard input.\n\") ); puts (_("\nReport bugs to <bug-textutils@gnu.org>.")); } /* Don't use EXIT_FAILURE here in case it is defined to be 1. POSIX requires that sort return 1 IFF invoked with -c and the input is not properly sorted. */ assert (status == 0 || status == SORT_FAILURE); exit (status);}/* The list of temporary files. */static struct tempnode{ char *name; struct tempnode *next;} temphead;/* Clean up any remaining temporary files. */static voidcleanup (void){ struct tempnode *node; for (node = temphead.next; node; node = node->next) unlink (node->name);}static FILE *xtmpfopen (const char *file){ FILE *fp; int fd; /* Open temporary file exclusively, to foil a common denial-of-service attack. */ fd = open (file, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0600); if (fd < 0 || (fp = fdopen (fd, "w")) == NULL) { error (0, errno, "%s", file); cleanup (); exit (SORT_FAILURE); } return fp;}static FILE *xfopen (const char *file, const char *how){ FILE *fp; if (STREQ (file, "-")) { fp = stdin; } else { if ((fp = fopen (file, how)) == NULL) { error (0, errno, "%s", file); cleanup (); exit (SORT_FAILURE); } } if (fp == stdin) have_read_stdin = 1; return fp;}static voidxfclose (FILE *fp){ if (fp == stdin) { /* Allow reading stdin from tty more than once. */ if (feof (fp)) clearerr (fp); } else if (fp == stdout) { if (fflush (fp) != 0) { error (0, errno, _("flushing file")); cleanup (); exit (SORT_FAILURE); } } else { if (fclose (fp) != 0) { error (0, errno, _("error closing file")); cleanup (); exit (SORT_FAILURE); } }}static voidwrite_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file){ if (fwrite (buf, 1, n_bytes, fp) != n_bytes) { error (0, errno, _("%s: write error"), output_file); cleanup (); exit (SORT_FAILURE); }}/* Return a name for a temporary file. */static char *tempname (void){ static unsigned int seq; int len = strlen (temp_dir); char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1); int long_file_names = NAME_MAX_IN_DIR (temp_dir) > 12; struct tempnode *node; /* If long filenames aren't supported, we cannot use filenames longer than 8+3 and still assume they are unique. */ if (long_file_names) sprintf (name, "%s%ssort%5.5d%5.5d", temp_dir, (len && temp_dir[len - 1] != '/') ? "/" : "", (unsigned int) getpid () & 0xffff, seq); else sprintf (name, "%s%ss%5.5d%2.2d.%3.3d", temp_dir, (len && temp_dir[len - 1] != '/') ? "/" : "", (unsigned int) getpid () & 0xffff, seq / 1000, seq % 1000); ++seq; /* Make sure that SEQ's value fits in 5 digits if temp_dir is on an 8.3 filesystem. */ if (!long_file_names && seq >= 100000) seq = 0; node = (struct tempnode *) xmalloc (sizeof (struct tempnode)); node->name = name; node->next = temphead.next; temphead.next = node; return name;}/* Search through the list of temporary files for NAME; remove it if it is found on the list. */static voidzaptemp (const char *name){ struct tempnode *node, *temp; for (node = &temphead; node->next; node = node->next) if (STREQ (name, node->next->name)) break; if (node->next) { temp = node->next; unlink (temp->name); free (temp->name); node->next = temp->next; free ((char *) temp); }}#ifdef ENABLE_NLSstatic intstruct_month_cmp (const void *m1, const void *m2){ return strcmp (((const struct month *) m1)->name, ((const struct month *) m2)->name);}#endif /* NLS *//* Initialize the character class tables. */static voidinittables (void){ int i; for (i = 0; i < UCHAR_LIM; ++i) { if (ISBLANK (i)) blanks[i] = 1; if (!ISPRINT (i)) nonprinting[i] = 1; if (!ISALNUM (i) && !ISBLANK (i)) nondictionary[i] = 1; if (ISLOWER (i)) fold_toupper[i] = toupper (i); else fold_toupper[i] = i; }#if defined ENABLE_NLS && HAVE_NL_LANGINFO /* If we're not in the "C" locale, read different names for months. */ if (hard_LC_TIME) { for (i = 0; i < MONTHS_PER_YEAR; i++) { char *s; size_t s_len; size_t j; char *name; s = (char *) nl_langinfo (ABMON_1 + i); s_len = strlen (s); monthtab[i].name = name = (char *) xmalloc (s_len + 1); monthtab[i].val = i + 1; for (j = 0; j < s_len; j++) name[j] = fold_toupper[UCHAR (s[j])]; name[j] = '\0'; } qsort ((void *) monthtab, MONTHS_PER_YEAR, sizeof (struct month), struct_month_cmp); }#endif /* NLS */}/* Initialize BUF, allocating ALLOC bytes initially. */static voidinitbuf (struct buffer *buf, int alloc){ buf->alloc = alloc; buf->buf = xmalloc (buf->alloc); buf->used = buf->left = 0;}/* Fill BUF reading from FP, moving buf->left bytes from the end of buf->buf to the beginning first. If EOF is reached and the file wasn't terminated by a newline, supply one. Always leave at least one unused byte at the end. Return a count of bytes buffered. */static intfillbuf (struct buffer *buf, FILE *fp){ int cc; memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left); buf->used = buf->left; while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, eolchar, buf->used))) { if (buf->used == buf->alloc - 1) { buf->alloc = buf->alloc * 2 - 1; buf->buf = xrealloc (buf->buf, buf->alloc); } cc = fread (buf->buf + buf->used, 1, buf->alloc - 1 - buf->used, fp); if (ferror (fp)) { error (0, errno, _("read error")); cleanup (); exit (SORT_FAILURE); } buf->used += cc; } if (feof (fp) && buf->used && buf->buf[buf->used - 1] != eolchar) { if (buf->used == buf->alloc - 1) { buf->alloc = buf->alloc * 2 - 1; buf->buf = xrealloc (buf->buf, buf->alloc); } buf->buf[buf->used++] = eolchar; } return buf->used;}/* Initialize LINES, allocating space for ALLOC lines initially. LIMIT is the maximum possible number of lines to allocate space for, ever. */static voidinitlines (struct lines *lines, int alloc, int limit){ lines->alloc = alloc; lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line)); lines->used = 0; lines->limit = limit;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -