sort.c

来自「Linux.Programming.by example 的源代码绝对经典」· C语言代码 · 共 2,500 行 · 第 1/5 页
2,500 行
{  if (temp_dir_count == temp_dir_alloc)    {      temp_dir_alloc = temp_dir_alloc ? temp_dir_alloc * 2 : 16;      temp_dirs = xrealloc (temp_dirs, sizeof (temp_dirs) * temp_dir_alloc);    }  temp_dirs[temp_dir_count++] = dir;}/* Search through the list of temporary files for NAME;   remove it if it is found on the list. */static voidzaptemp (const char *name){  struct tempnode *volatile *pnode;  struct tempnode *node;  for (pnode = &temphead; (node = *pnode); pnode = &node->next)    if (node->name == name)      {	unlink (name);	*pnode = node->next;	free ((char *) node);	break;      }}#if HAVE_NL_LANGINFOstatic intstruct_month_cmp (const void *m1, const void *m2){  return strcmp (((const struct month *) m1)->name,		 ((const struct month *) m2)->name);}#endif/* Initialize the character class tables. */static voidinittables (void){  int i;  for (i = 0; i < UCHAR_LIM; ++i)    {      if (ISBLANK (i))	blanks[i] = 1;      if (!ISPRINT (i))	nonprinting[i] = 1;      if (!ISALNUM (i) && !ISBLANK (i))	nondictionary[i] = 1;      if (ISLOWER (i))	fold_toupper[i] = toupper (i);      else	fold_toupper[i] = i;    }#if HAVE_NL_LANGINFO  /* If we're not in the "C" locale, read different names for months.  */  if (hard_LC_TIME)    {      for (i = 0; i < MONTHS_PER_YEAR; i++)	{	  char *s;	  size_t s_len;	  size_t j;	  char *name;	  s = (char *) nl_langinfo (ABMON_1 + i);	  s_len = strlen (s);	  monthtab[i].name = name = (char *) xmalloc (s_len + 1);	  monthtab[i].val = i + 1;	  for (j = 0; j < s_len; j++)	    name[j] = fold_toupper[UCHAR (s[j])];	  name[j] = '\0';	}      qsort ((void *) monthtab, MONTHS_PER_YEAR,	     sizeof (struct month), struct_month_cmp);    }#endif}/* Specify the amount of main memory to use when sorting.  */static voidspecify_sort_size (char const *s){  uintmax_t n;  char *suffix;  enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");  /* The default unit is KiB.  */  if (e == LONGINT_OK && ISDIGIT (suffix[-1]))    {      if (n <= UINTMAX_MAX / 1024)	n *= 1024;      else	e = LONGINT_OVERFLOW;    }  /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */  if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])    switch (suffix[0])      {      case 'b':	e = LONGINT_OK;	break;      case '%':	{	  double mem = physmem_total () * n / 100;	  /* Use "<", not "<=", to avoid problems with rounding.  */	  if (mem < UINTMAX_MAX)	    {	      n = mem;	      e = LONGINT_OK;	    }	  else	    e = LONGINT_OVERFLOW;	}	break;      }  if (e == LONGINT_OK)    {      sort_size = n;      if (sort_size == n)	{	  sort_size = MAX (sort_size, MIN_SORT_SIZE);	  return;	}      e = LONGINT_OVERFLOW;    }  STRTOL_FATAL_ERROR (s, _("sort size"), e);}/* Return the default sort size.  */static size_tdefault_sort_size (void){  /* Let MEM be available memory or 1/8 of total memory, whichever     is greater.  */  double avail = physmem_available ();  double total = physmem_total ();  double mem = MAX (avail, total / 8);  struct rlimit rlimit;  /* Let SIZE be MEM, but no more than the maximum object size or     system resource limits.  Avoid the MIN macro here, as it is not     quite right when only one argument is floating point.  Don't     bother to check for values like RLIM_INFINITY since in practice     they are not much less than SIZE_MAX.  */  size_t size = SIZE_MAX;  if (mem < size)    size = mem;  if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)    size = rlimit.rlim_cur;#ifdef RLIMIT_AS  if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)    size = rlimit.rlim_cur;#endif  /* Leave a large safety margin for the above limits, as failure can     occur when they are exceeded.  */  size /= 2;#ifdef RLIMIT_RSS  /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.     Exceeding RSS is not fatal, but can be quite slow.  */  if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)    size = rlimit.rlim_cur / 16 * 15;#endif  /* Use no less than the minimum.  */  return MAX (size, MIN_SORT_SIZE);}/* Return the sort buffer size to use with the input files identified   by FPS and FILES, which are alternate paths to the same files.   NFILES gives the number of input files; NFPS may be less.  Assume   that each input line requires LINE_BYTES extra bytes' worth of line   information.  Return at most SIZE_BOUND.  */static size_tsort_buffer_size (FILE *const *fps, int nfps,		  char *const *files, int nfiles,		  size_t line_bytes, size_t size_bound){  /* In the worst case, each input byte is a newline.  */  size_t worst_case_per_input_byte = line_bytes + 1;  /* Keep enough room for one extra input line and an extra byte.     This extra room might be needed when preparing to read EOF.  */  size_t size = worst_case_per_input_byte + 1;  int i;  for (i = 0; i < nfiles; i++)    {      struct stat st;      off_t file_size;      size_t worst_case;      if ((i < nfps ? fstat (fileno (fps[i]), &st)	   : strcmp (files[i], "-") == 0 ? fstat (STDIN_FILENO, &st)	   : stat (files[i], &st))	  != 0)	die (_("stat failed"), files[i]);      file_size = S_ISREG (st.st_mode) ? st.st_size : INPUT_FILE_SIZE_GUESS;      /* Add the amount of memory needed to represent the worst case	 where the input consists entirely of newlines followed by a	 single non-newline.  Check for overflow.  */      worst_case = file_size * worst_case_per_input_byte + 1;      if (file_size != worst_case / worst_case_per_input_byte	  || size_bound - size <= worst_case)	return size_bound;      size += worst_case;    }  return size;}/* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES   must be at least sizeof (struct line).  Allocate ALLOC bytes   initially.  */static voidinitbuf (struct buffer *buf, size_t line_bytes, size_t alloc){  /* Ensure that the line array is properly aligned.  If the desired     size cannot be allocated, repeatedly halve it until allocation     succeeds.  The smaller allocation may hurt overall performance,     but that's better than failing.  */  for (;;)    {      alloc += sizeof (struct line) - alloc % sizeof (struct line);      buf->buf = malloc (alloc);      if (buf->buf)	break;      alloc /= 2;      if (alloc <= line_bytes + 1)	xalloc_die ();    }  buf->line_bytes = line_bytes;  buf->alloc = alloc;  buf->used = buf->left = buf->nlines = 0;  buf->eof = 0;}/* Return one past the limit of the line array.  */static inline struct line *buffer_linelim (struct buffer const *buf){  return (struct line *) (buf->buf + buf->alloc);}/* Return a pointer to the first character of the field specified   by KEY in LINE. */static char *begfield (const struct line *line, const struct keyfield *key){  register char *ptr = line->text, *lim = ptr + line->length - 1;  register size_t sword = key->sword;  register size_t schar = key->schar;  register size_t remaining_bytes;  if (tab)    while (ptr < lim && sword--)      {	while (ptr < lim && *ptr != tab)	  ++ptr;	if (ptr < lim)	  ++ptr;      }  else    while (ptr < lim && sword--)      {	while (ptr < lim && blanks[UCHAR (*ptr)])	  ++ptr;	while (ptr < lim && !blanks[UCHAR (*ptr)])	  ++ptr;      }  if (key->skipsblanks)    while (ptr < lim && blanks[UCHAR (*ptr)])      ++ptr;  /* Advance PTR by SCHAR (if possible), but no further than LIM.  */  remaining_bytes = lim - ptr;  if (schar < remaining_bytes)    ptr += schar;  else    ptr = lim;  return ptr;}/* Return the limit of (a pointer to the first character after) the field   in LINE specified by KEY. */static char *limfield (const struct line *line, const struct keyfield *key){  register char *ptr = line->text, *lim = ptr + line->length - 1;  register size_t eword = key->eword, echar = key->echar;  register size_t remaining_bytes;  /* Note: from the POSIX spec:     The leading field separator itself is included in     a field when -t is not used.  FIXME: move this comment up... */  /* Move PTR past EWORD fields or to one past the last byte on LINE,     whichever comes first.  If there are more than EWORD fields, leave     PTR pointing at the beginning of the field having zero-based index,     EWORD.  If a delimiter character was specified (via -t), then that     `beginning' is the first character following the delimiting TAB.     Otherwise, leave PTR pointing at the first `blank' character after     the preceding field.  */  if (tab)    while (ptr < lim && eword--)      {	while (ptr < lim && *ptr != tab)	  ++ptr;	if (ptr < lim && (eword | echar))	  ++ptr;      }  else    while (ptr < lim && eword--)      {	while (ptr < lim && blanks[UCHAR (*ptr)])	  ++ptr;	while (ptr < lim && !blanks[UCHAR (*ptr)])	  ++ptr;      }#ifdef POSIX_UNSPECIFIED  /* The following block of code makes GNU sort incompatible with     standard Unix sort, so it's ifdef'd out for now.     The POSIX spec isn't clear on how to interpret this.     FIXME: request clarification.     From: kwzh@gnu.ai.mit.edu (Karl Heuer)     Date: Thu, 30 May 96 12:20:41 -0400     [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]     [...]I believe I've found another bug in `sort'.     $ cat /tmp/sort.in     a b c 2 d     pq rs 1 t     $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in     a b c 2 d     pq rs 1 t     $ /bin/sort -k1.7,1.7 </tmp/sort.in     pq rs 1 t     a b c 2 d     Unix sort produced the answer I expected: sort on the single character     in column 7.  GNU sort produced different results, because it disagrees     on the interpretation of the key-end spec "M.N".  Unix sort reads this     as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean     "skip M-1 fields, then either N-1 characters or the rest of the current     field, whichever comes first".  This extra clause applies only to     key-ends, not key-starts.     */  /* Make LIM point to the end of (one byte past) the current field.  */  if (tab)    {      char *newlim;      newlim = memchr (ptr, tab, lim - ptr);      if (newlim)	lim = newlim;    }  else    {      char *newlim;      newlim = ptr;      while (newlim < lim && blanks[UCHAR (*newlim)])	++newlim;      while (newlim < lim && !blanks[UCHAR (*newlim)])	++newlim;      lim = newlim;    }#endif  /* If we're skipping leading blanks, don't start counting characters     until after skipping past any leading blanks.  */  if (key->skipsblanks)    while (ptr < lim && blanks[UCHAR (*ptr)])      ++ptr;  /* Advance PTR by ECHAR (if possible), but no further than LIM.  */  remaining_bytes = lim - ptr;  if (echar < remaining_bytes)    ptr += echar;  else    ptr = lim;  return ptr;}/* FIXME */static voidtrim_trailing_blanks (const char *a_start, char **a_end){  while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])    --(*a_end);}/* Fill BUF reading from FP, moving buf->left bytes from the end   of buf->buf to the beginning first.  If EOF is reached and the   file wasn't terminated by a newline, supply one.  Set up BUF's line   table too.  FILE is the name of the file corresponding to FP.   Return nonzero if some input was read.  */static intfillbuf (struct buffer *buf, register FILE *fp, char const *file){  struct keyfield const *key = keylist;  char eol = eolchar;  size_t line_bytes = buf->line_bytes;  size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;  if (buf->eof)    return 0;  if (buf->used != buf->left)    {      memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);      buf->used = buf->left;      buf->nlines = 0;    }  for (;;)    {      char *ptr = buf->buf + buf->used;      struct line *linelim = buffer_linelim (buf);      struct line *line = linelim - buf->nlines;      size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;      char *line_start = buf->nlines ? line->text + line->length : buf->buf;      while (line_bytes + 1 < avail)	{	  /* Read as many bytes as possible, but do not read so many	     bytes that there might not be enough room for the	     corresponding line array.  The worst case is when the	     rest of the input file consists entirely of newlines,	     except that the last byte is not a newline.  */	  size_t readsize = (avail - 1) / (line_bytes + 1);	  size_t bytes_read = fread (ptr, 1, readsize, fp);	  char *ptrlim = ptr + bytes_read;	  char *p;	  avail -= bytes_read;	  if (bytes_read != readsize)	    {	      if (ferror (fp))		die (_("read failed"), file);	      if (feof (fp))		{		  buf->eof = 1;		  if (buf->buf == ptrlim)		    return 0;		  if (ptrlim[-1] != eol)		    *ptrlim++ = eol;		}	    }	  /* Find and record each line in the just-read input.  */	  while ((p = memchr (ptr, eol, ptrlim - ptr)))	    {	      ptr = p + 1;	      line--;	      line->text = line_start;	      line->length = ptr - line_start;	      mergesize = MAX (mergesize, line->length);	      avail -= line_bytes;	      if (key)		{		  /* Precompute the position of the first key for                     efficiency. */		  line->keylim = (key->eword == (size_t) -1				  ? p				  : limfield (line, key));
sort.c - 源码说明

本页面展示了「Linux.Programming.by example 的源代码绝对经典」中的 sort.c 源码文件，采用 C语言编程语言编写，共 2,500 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Programming相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?