📄 sort.c

📁 代码检索工具GLOBAL源码。可用来浏览分析LINUX源码。
💻 C
📖 第 1 页 / 共 4 页
字号:
}/* Return a pointer to the first character of the field specified   by KEY in LINE. */static char *begfield (const struct line *line, const struct keyfield *key){  register char *ptr = line->text, *lim = ptr + line->length;  register int sword = key->sword, schar = key->schar;  if (tab)    while (ptr < lim && sword--)      {	while (ptr < lim && *ptr != tab)	  ++ptr;	if (ptr < lim)	  ++ptr;      }  else    while (ptr < lim && sword--)      {	while (ptr < lim && blanks[UCHAR (*ptr)])	  ++ptr;	while (ptr < lim && !blanks[UCHAR (*ptr)])	  ++ptr;      }  if (key->skipsblanks)    while (ptr < lim && blanks[UCHAR (*ptr)])      ++ptr;  if (ptr + schar <= lim)    ptr += schar;  else    ptr = lim;  return ptr;}/* Return the limit of (a pointer to the first character after) the field   in LINE specified by KEY. */static char *limfield (const struct line *line, const struct keyfield *key){  register char *ptr = line->text, *lim = ptr + line->length;  register int eword = key->eword, echar = key->echar;  /* Note: from the POSIX spec:     The leading field separator itself is included in     a field when -t is not used.  FIXME: move this comment up... */  /* Move PTR past EWORD fields or to one past the last byte on LINE,     whichever comes first.  If there are more than EWORD fields, leave     PTR pointing at the beginning of the field having zero-based index,     EWORD.  If a delimiter character was specified (via -t), then that     `beginning' is the first character following the delimiting TAB.     Otherwise, leave PTR pointing at the first `blank' character after     the preceding field.  */  if (tab)    while (ptr < lim && eword--)      {	while (ptr < lim && *ptr != tab)	  ++ptr;	if (ptr < lim && (eword || echar > 0))	  ++ptr;      }  else    while (ptr < lim && eword--)      {	while (ptr < lim && blanks[UCHAR (*ptr)])	  ++ptr;	while (ptr < lim && !blanks[UCHAR (*ptr)])	  ++ptr;      }#ifdef POSIX_UNSPECIFIED  /* The following block of code makes GNU sort incompatible with     standard Unix sort, so it's ifdef'd out for now.     The POSIX spec isn't clear on how to interpret this.     FIXME: request clarification.     From: kwzh@gnu.ai.mit.edu (Karl Heuer)     Date: Thu, 30 May 96 12:20:41 -0400     [...]I believe I've found another bug in `sort'.     $ cat /tmp/sort.in     a b c 2 d     pq rs 1 t     $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in     a b c 2 d     pq rs 1 t     $ /bin/sort +0.6 -0.7 </tmp/sort.in     pq rs 1 t     a b c 2 d     Unix sort produced the answer I expected: sort on the single character     in column 6.  GNU sort produced different results, because it disagrees     on the interpretation of the key-end spec "-M.N".  Unix sort reads this     as "skip M fields, then N characters"; but GNU sort wants it to mean     "skip M fields, then either N characters or the rest of the current     field, whichever comes first".  This extra clause applies only to     key-ends, not key-starts.     */  /* Make LIM point to the end of (one byte past) the current field.  */  if (tab)    {      char *newlim;      newlim = memchr (ptr, tab, lim - ptr);      if (newlim)	lim = newlim;    }  else    {      char *newlim;      newlim = ptr;      while (newlim < lim && blanks[UCHAR (*newlim)])	++newlim;      while (newlim < lim && !blanks[UCHAR (*newlim)])	++newlim;      lim = newlim;    }#endif  /* If we're skipping leading blanks, don't start counting characters     until after skipping past any leading blanks.  */  if (key->skipsblanks)    while (ptr < lim && blanks[UCHAR (*ptr)])      ++ptr;  /* Advance PTR by ECHAR (if possible), but no further than LIM.  */  if (ptr + echar <= lim)    ptr += echar;  else    ptr = lim;  return ptr;}/* FIXME */voidtrim_trailing_blanks (const char *a_start, char **a_end){  while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])    --(*a_end);}/* Find the lines in BUF, storing pointers and lengths in LINES.  */static voidfindlines (struct buffer *buf, struct lines *lines){  register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;  struct keyfield *key = keyhead.next;  lines->used = 0;  while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg))	 && lines->used < lines->limit)    {      if (lines->used == lines->alloc)	{	  lines->alloc *= 2;	  lines->lines = (struct line *)	    xrealloc ((char *) lines->lines,		      lines->alloc * sizeof (struct line));	}      ptr++;      lines->lines[lines->used].text = beg;      lines->lines[lines->used].length = ptr - beg;      /* Precompute the position of the first key for efficiency. */      if (key)	{	  if (key->eword >= 0)	    lines->lines[lines->used].keylim =	      limfield (&lines->lines[lines->used], key);	  else	    lines->lines[lines->used].keylim = ptr;	  if (key->sword >= 0)	    lines->lines[lines->used].keybeg =	      begfield (&lines->lines[lines->used], key);	  else	    {	      if (key->skipsblanks)		while (blanks[UCHAR (*beg)])		  ++beg;	      lines->lines[lines->used].keybeg = beg;	    }	  if (key->skipeblanks)	    {	      trim_trailing_blanks (lines->lines[lines->used].keybeg,				    &lines->lines[lines->used].keylim);	    }	}      else	{	  lines->lines[lines->used].keybeg = 0;	  lines->lines[lines->used].keylim = 0;	}      ++lines->used;      beg = ptr;    }  buf->left = lim - beg;}/* Compare strings A and B containing decimal fractions < 1.  Each string   should begin with a decimal point followed immediately by the digits   of the fraction.  Strings not of this form are considered to be zero. *//* The goal here, is to take two numbers a and b... compare these   in parallel.  Instead of converting each, and then comparing the   outcome.  Most likely stopping the comparison before the conversion   is complete.  The algorithm used, in the old sort:   Algorithm: fraccompare   Action   : compare two decimal fractions   accepts  : char *a, char *b   returns  : -1 if a<b, 0 if a=b, 1 if a>b.   implement:   if *a == decimal_point AND *b == decimal_point     find first character different in a and b.     if both are digits, return the difference *a - *b.     if *a is a digit       skip past zeros       if digit return 1, else 0     if *b is a digit       skip past zeros       if digit return -1, else 0   if *a is a decimal_point     skip past decimal_point and zeros     if digit return 1, else 0   if *b is a decimal_point     skip past decimal_point and zeros     if digit return -1, else 0   return 0 */static intfraccompare (register const char *a, register const char *b){  if (*a == decimal_point && *b == decimal_point)    {      while (*++a == *++b)	if (! ISDIGIT (*a))	  return 0;      if (ISDIGIT (*a) && ISDIGIT (*b))	return *a - *b;      if (ISDIGIT (*a))	goto a_trailing_nonzero;      if (ISDIGIT (*b))	goto b_trailing_nonzero;      return 0;    }  else if (*a++ == decimal_point)    {    a_trailing_nonzero:      while (*a == NUMERIC_ZERO)	a++;      return ISDIGIT (*a);    }  else if (*b++ == decimal_point)    {    b_trailing_nonzero:      while (*b == NUMERIC_ZERO)	b++;      return - ISDIGIT (*b);    }  return 0;}/* Compare strings A and B as numbers without explicitly converting them to   machine numbers.  Comparatively slow for short strings, but asymptotically   hideously fast. */static intnumcompare (register const char *a, register const char *b){  register int tmpa, tmpb, loga, logb, tmp;  tmpa = *a;  tmpb = *b;  while (blanks[UCHAR (tmpa)])    tmpa = *++a;  while (blanks[UCHAR (tmpb)])    tmpb = *++b;  if (tmpa == NEGATION_SIGN)    {      do	tmpa = *++a;      while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));      if (tmpb != NEGATION_SIGN)	{	  if (tmpa == decimal_point)	    do	      tmpa = *++a;	    while (tmpa == NUMERIC_ZERO);	  if (ISDIGIT (tmpa))	    return -1;	  while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))	    tmpb = *++b;	  if (tmpb == decimal_point)	    do	      tmpb = *++b;	    while (tmpb == NUMERIC_ZERO);	  if (ISDIGIT (tmpb))	    return -1;	  return 0;	}      do	tmpb = *++b;      while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));      while (tmpa == tmpb && ISDIGIT (tmpa))	{	  do	    tmpa = *++a;	  while (IS_THOUSANDS_SEP (tmpa));	  do	    tmpb = *++b;	  while (IS_THOUSANDS_SEP (tmpb));	}      if ((tmpa == decimal_point && !ISDIGIT (tmpb))	  || (tmpb == decimal_point && !ISDIGIT (tmpa)))	return -fraccompare (a, b);      tmp = tmpb - tmpa;      for (loga = 0; ISDIGIT (tmpa); ++loga)	do	  tmpa = *++a;	while (IS_THOUSANDS_SEP (tmpa));      for (logb = 0; ISDIGIT (tmpb); ++logb)	do	  tmpb = *++b;	while (IS_THOUSANDS_SEP (tmpb));      if (logb - loga != 0)	return logb - loga;      if (!loga)	return 0;      return tmp;    }  else if (tmpb == NEGATION_SIGN)    {      do	tmpb = *++b;      while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));      if (tmpb == decimal_point)	do	  tmpb = *++b;	while (tmpb == NUMERIC_ZERO);      if (ISDIGIT (tmpb))	return 1;      while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))	tmpa = *++a;      if (tmpa == decimal_point)	do	  tmpa = *++a;	while (tmpa == NUMERIC_ZERO);      if (ISDIGIT (tmpa))	return 1;      return 0;    }  else    {      while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))	tmpa = *++a;      while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))	tmpb = *++b;      while (tmpa == tmpb && ISDIGIT (tmpa))	{	  do	    tmpa = *++a;	  while (IS_THOUSANDS_SEP (tmpa));	  do	    tmpb = *++b;	  while (IS_THOUSANDS_SEP (tmpb));	}      if ((tmpa == decimal_point && !ISDIGIT (tmpb))	  || (tmpb == decimal_point && !ISDIGIT (tmpa)))	return fraccompare (a, b);      tmp = tmpa - tmpb;      for (loga = 0; ISDIGIT (tmpa); ++loga)	do	  tmpa = *++a;	while (IS_THOUSANDS_SEP (tmpa));      for (logb = 0; ISDIGIT (tmpb); ++logb)	do	  tmpb = *++b;	while (IS_THOUSANDS_SEP (tmpb));      if (loga - logb != 0)	return loga - logb;      if (!loga)	return 0;      return tmp;    }}static intgeneral_numcompare (const char *sa, const char *sb){  /* FIXME: add option to warn about failed conversions.  */  /* FIXME: maybe add option to try expensive FP conversion     only if A and B can't be compared more cheaply/accurately.  */  char *ea;  char *eb;  double a = strtod (sa, &ea);  double b = strtod (sb, &eb);  /* Put conversion errors at the start of the collating sequence.  */  if (sa == ea)    return sb == eb ? 0 : -1;  if (sb == eb)    return 1;  /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after     conversion errors but before numbers; sort them by internal     bit-pattern, for lack of a more portable alternative.  */  return (a < b ? -1	  : a > b ? 1	  : a == b ? 0	  : b == b ? -1	  : a == a ? 1	  : memcmp ((char *) &a, (char *) &b, sizeof a));}/* Return an integer in 1..12 of the month name S with length LEN.   Return 0 if the name in S is not recognized.  */static intgetmonth (const char *s, int len){  char *month;  register int i, lo = 0, hi = MONTHS_PER_YEAR, result;  while (len > 0 && blanks[UCHAR (*s)])    {      ++s;      --len;    }  if (len == 0)    return 0;  month = (char *) alloca (len + 1);  for (i = 0; i < len; ++i)    month[i] = fold_toupper[UCHAR (s[i])];  while (blanks[UCHAR (month[i - 1])])    --i;  month[i] = '\0';  do    {      int ix = (lo + hi) / 2;      if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)	hi = ix;      else	lo = ix;    }  while (hi - lo > 1);  result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))	    ? monthtab[lo].val : 0);  return result;}/* Compare two lines A and B trying every key in sequence until there   are no more keys or a difference is found. */static intkeycompare (const struct line *a, const struct line *b){  register char *texta, *textb, *lima, *limb;  register unsigned char *translate;  register int *ignore;  struct keyfield *key;  int diff = 0, iter = 0, lena, lenb;  for (key = keyhead.next; key; key = key->next, ++iter)    {      int comparable_lengths = 1;      ignore = key->ignore;      translate = (unsigned char *) key->translate;      /* Find the beginning and limit of each field. */      if (iter || a->keybeg == NULL || b->keybeg == NULL)	{	  if (key->eword >= 0)	    lima = limfield (a, key), limb = limfield (b, key);	  else	    lima = a->text + a->length, limb = b->text + b->length;	  if (key->sword >= 0)	    texta = begfield (a, key), textb = begfield (b, key);	  else	    {	      texta = a->text, textb = b->text;	      if (key->skipsblanks)		{		  while (texta < lima && blanks[UCHAR (*texta)])		    ++texta;		  while (textb < limb && blanks[UCHAR (*textb)])		    ++textb;		}	    }	}      else	{	  /* For the first iteration only, the key positions have	     been precomputed for us. */	  texta = a->keybeg, lima = a->keylim;	  textb = b->keybeg, limb = b->keylim;	}      /* Find the lengths. */      lena = lima - texta, lenb = limb - textb;      if (lena < 0)	lena = 0;      if (lenb < 0)	lenb = 0;      if (key->skipeblanks)	{	  char *a_end = texta + lena;	  char *b_end = textb + lenb;	  trim_trailing_blanks (texta, &a_end);	  trim_trailing_blanks (textb, &b_end);	  lena = a_end - texta;	  lenb = b_end - textb;	}      /* Actually compare the fields. */      if (key->numeric | key->general_numeric)	{	  char savea, saveb;	  /* If the fields are adjacent, adjust the end of the earlier	     field back by 1 byte, since we temporarily modify the	     byte after the field during comparison.  This can't	     change a numeric comparison, since the byte is a newline.	     If the earlier field is empty, adjust its start as well.  */	  if (lima == textb)	    texta -= texta == lima--;	  if (limb == texta)	    textb -= textb == limb--;	  savea = *lima, saveb = *limb;	  *lima = *limb = '\0';	  diff = ((key->numeric ? numcompare : general_numcompare)		  (texta, textb));	  *lima = savea, *limb = saveb;	  if (diff)	    return key->reverse ? -diff : diff;	  continue;	}      else if (key->month)	{	  diff = getmonth (texta, lena) - getmonth (textb, lenb);	  if (diff)	    return key->reverse ? -diff : diff;	  continue;	}#ifdef ENABLE_NLS      /* Sorting like this may become slow, so in a simple locale the user         can select a faster sort that is similar to ascii sort  */      else if (hard_LC_COLLATE | hard_LC_CTYPE)	{	  if (ignore || translate)	    {	      char *copy_a = (char *) alloca (lena + 1);	      char *copy_b = (char *) alloca (lenb + 1);	      int new_len_a, new_len_b, i;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -