📄 sort.c
字号:
}/* Return a pointer to the first character of the field specified by KEY in LINE. */static char *begfield (const struct line *line, const struct keyfield *key){ register char *ptr = line->text, *lim = ptr + line->length; register int sword = key->sword, schar = key->schar; if (tab) while (ptr < lim && sword--) { while (ptr < lim && *ptr != tab) ++ptr; if (ptr < lim) ++ptr; } else while (ptr < lim && sword--) { while (ptr < lim && blanks[UCHAR (*ptr)]) ++ptr; while (ptr < lim && !blanks[UCHAR (*ptr)]) ++ptr; } if (key->skipsblanks) while (ptr < lim && blanks[UCHAR (*ptr)]) ++ptr; if (ptr + schar <= lim) ptr += schar; else ptr = lim; return ptr;}/* Return the limit of (a pointer to the first character after) the field in LINE specified by KEY. */static char *limfield (const struct line *line, const struct keyfield *key){ register char *ptr = line->text, *lim = ptr + line->length; register int eword = key->eword, echar = key->echar; /* Note: from the POSIX spec: The leading field separator itself is included in a field when -t is not used. FIXME: move this comment up... */ /* Move PTR past EWORD fields or to one past the last byte on LINE, whichever comes first. If there are more than EWORD fields, leave PTR pointing at the beginning of the field having zero-based index, EWORD. If a delimiter character was specified (via -t), then that `beginning' is the first character following the delimiting TAB. Otherwise, leave PTR pointing at the first `blank' character after the preceding field. */ if (tab) while (ptr < lim && eword--) { while (ptr < lim && *ptr != tab) ++ptr; if (ptr < lim && (eword || echar > 0)) ++ptr; } else while (ptr < lim && eword--) { while (ptr < lim && blanks[UCHAR (*ptr)]) ++ptr; while (ptr < lim && !blanks[UCHAR (*ptr)]) ++ptr; }#ifdef POSIX_UNSPECIFIED /* The following block of code makes GNU sort incompatible with standard Unix sort, so it's ifdef'd out for now. The POSIX spec isn't clear on how to interpret this. FIXME: request clarification. From: kwzh@gnu.ai.mit.edu (Karl Heuer) Date: Thu, 30 May 96 12:20:41 -0400 [...]I believe I've found another bug in `sort'. $ cat /tmp/sort.in a b c 2 d pq rs 1 t $ textutils-1.15/src/sort +0.6 -0.7 </tmp/sort.in a b c 2 d pq rs 1 t $ /bin/sort +0.6 -0.7 </tmp/sort.in pq rs 1 t a b c 2 d Unix sort produced the answer I expected: sort on the single character in column 6. GNU sort produced different results, because it disagrees on the interpretation of the key-end spec "-M.N". Unix sort reads this as "skip M fields, then N characters"; but GNU sort wants it to mean "skip M fields, then either N characters or the rest of the current field, whichever comes first". This extra clause applies only to key-ends, not key-starts. */ /* Make LIM point to the end of (one byte past) the current field. */ if (tab) { char *newlim; newlim = memchr (ptr, tab, lim - ptr); if (newlim) lim = newlim; } else { char *newlim; newlim = ptr; while (newlim < lim && blanks[UCHAR (*newlim)]) ++newlim; while (newlim < lim && !blanks[UCHAR (*newlim)]) ++newlim; lim = newlim; }#endif /* If we're skipping leading blanks, don't start counting characters until after skipping past any leading blanks. */ if (key->skipsblanks) while (ptr < lim && blanks[UCHAR (*ptr)]) ++ptr; /* Advance PTR by ECHAR (if possible), but no further than LIM. */ if (ptr + echar <= lim) ptr += echar; else ptr = lim; return ptr;}/* FIXME */voidtrim_trailing_blanks (const char *a_start, char **a_end){ while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))]) --(*a_end);}/* Find the lines in BUF, storing pointers and lengths in LINES. */static voidfindlines (struct buffer *buf, struct lines *lines){ register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr; struct keyfield *key = keyhead.next; lines->used = 0; while (beg < lim && (ptr = memchr (beg, eolchar, lim - beg)) && lines->used < lines->limit) { if (lines->used == lines->alloc) { lines->alloc *= 2; lines->lines = (struct line *) xrealloc ((char *) lines->lines, lines->alloc * sizeof (struct line)); } ptr++; lines->lines[lines->used].text = beg; lines->lines[lines->used].length = ptr - beg; /* Precompute the position of the first key for efficiency. */ if (key) { if (key->eword >= 0) lines->lines[lines->used].keylim = limfield (&lines->lines[lines->used], key); else lines->lines[lines->used].keylim = ptr; if (key->sword >= 0) lines->lines[lines->used].keybeg = begfield (&lines->lines[lines->used], key); else { if (key->skipsblanks) while (blanks[UCHAR (*beg)]) ++beg; lines->lines[lines->used].keybeg = beg; } if (key->skipeblanks) { trim_trailing_blanks (lines->lines[lines->used].keybeg, &lines->lines[lines->used].keylim); } } else { lines->lines[lines->used].keybeg = 0; lines->lines[lines->used].keylim = 0; } ++lines->used; beg = ptr; } buf->left = lim - beg;}/* Compare strings A and B containing decimal fractions < 1. Each string should begin with a decimal point followed immediately by the digits of the fraction. Strings not of this form are considered to be zero. *//* The goal here, is to take two numbers a and b... compare these in parallel. Instead of converting each, and then comparing the outcome. Most likely stopping the comparison before the conversion is complete. The algorithm used, in the old sort: Algorithm: fraccompare Action : compare two decimal fractions accepts : char *a, char *b returns : -1 if a<b, 0 if a=b, 1 if a>b. implement: if *a == decimal_point AND *b == decimal_point find first character different in a and b. if both are digits, return the difference *a - *b. if *a is a digit skip past zeros if digit return 1, else 0 if *b is a digit skip past zeros if digit return -1, else 0 if *a is a decimal_point skip past decimal_point and zeros if digit return 1, else 0 if *b is a decimal_point skip past decimal_point and zeros if digit return -1, else 0 return 0 */static intfraccompare (register const char *a, register const char *b){ if (*a == decimal_point && *b == decimal_point) { while (*++a == *++b) if (! ISDIGIT (*a)) return 0; if (ISDIGIT (*a) && ISDIGIT (*b)) return *a - *b; if (ISDIGIT (*a)) goto a_trailing_nonzero; if (ISDIGIT (*b)) goto b_trailing_nonzero; return 0; } else if (*a++ == decimal_point) { a_trailing_nonzero: while (*a == NUMERIC_ZERO) a++; return ISDIGIT (*a); } else if (*b++ == decimal_point) { b_trailing_nonzero: while (*b == NUMERIC_ZERO) b++; return - ISDIGIT (*b); } return 0;}/* Compare strings A and B as numbers without explicitly converting them to machine numbers. Comparatively slow for short strings, but asymptotically hideously fast. */static intnumcompare (register const char *a, register const char *b){ register int tmpa, tmpb, loga, logb, tmp; tmpa = *a; tmpb = *b; while (blanks[UCHAR (tmpa)]) tmpa = *++a; while (blanks[UCHAR (tmpb)]) tmpb = *++b; if (tmpa == NEGATION_SIGN) { do tmpa = *++a; while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa)); if (tmpb != NEGATION_SIGN) { if (tmpa == decimal_point) do tmpa = *++a; while (tmpa == NUMERIC_ZERO); if (ISDIGIT (tmpa)) return -1; while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb)) tmpb = *++b; if (tmpb == decimal_point) do tmpb = *++b; while (tmpb == NUMERIC_ZERO); if (ISDIGIT (tmpb)) return -1; return 0; } do tmpb = *++b; while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb)); while (tmpa == tmpb && ISDIGIT (tmpa)) { do tmpa = *++a; while (IS_THOUSANDS_SEP (tmpa)); do tmpb = *++b; while (IS_THOUSANDS_SEP (tmpb)); } if ((tmpa == decimal_point && !ISDIGIT (tmpb)) || (tmpb == decimal_point && !ISDIGIT (tmpa))) return -fraccompare (a, b); tmp = tmpb - tmpa; for (loga = 0; ISDIGIT (tmpa); ++loga) do tmpa = *++a; while (IS_THOUSANDS_SEP (tmpa)); for (logb = 0; ISDIGIT (tmpb); ++logb) do tmpb = *++b; while (IS_THOUSANDS_SEP (tmpb)); if (logb - loga != 0) return logb - loga; if (!loga) return 0; return tmp; } else if (tmpb == NEGATION_SIGN) { do tmpb = *++b; while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb)); if (tmpb == decimal_point) do tmpb = *++b; while (tmpb == NUMERIC_ZERO); if (ISDIGIT (tmpb)) return 1; while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa)) tmpa = *++a; if (tmpa == decimal_point) do tmpa = *++a; while (tmpa == NUMERIC_ZERO); if (ISDIGIT (tmpa)) return 1; return 0; } else { while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa)) tmpa = *++a; while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb)) tmpb = *++b; while (tmpa == tmpb && ISDIGIT (tmpa)) { do tmpa = *++a; while (IS_THOUSANDS_SEP (tmpa)); do tmpb = *++b; while (IS_THOUSANDS_SEP (tmpb)); } if ((tmpa == decimal_point && !ISDIGIT (tmpb)) || (tmpb == decimal_point && !ISDIGIT (tmpa))) return fraccompare (a, b); tmp = tmpa - tmpb; for (loga = 0; ISDIGIT (tmpa); ++loga) do tmpa = *++a; while (IS_THOUSANDS_SEP (tmpa)); for (logb = 0; ISDIGIT (tmpb); ++logb) do tmpb = *++b; while (IS_THOUSANDS_SEP (tmpb)); if (loga - logb != 0) return loga - logb; if (!loga) return 0; return tmp; }}static intgeneral_numcompare (const char *sa, const char *sb){ /* FIXME: add option to warn about failed conversions. */ /* FIXME: maybe add option to try expensive FP conversion only if A and B can't be compared more cheaply/accurately. */ char *ea; char *eb; double a = strtod (sa, &ea); double b = strtod (sb, &eb); /* Put conversion errors at the start of the collating sequence. */ if (sa == ea) return sb == eb ? 0 : -1; if (sb == eb) return 1; /* Sort numbers in the usual way, where -0 == +0. Put NaNs after conversion errors but before numbers; sort them by internal bit-pattern, for lack of a more portable alternative. */ return (a < b ? -1 : a > b ? 1 : a == b ? 0 : b == b ? -1 : a == a ? 1 : memcmp ((char *) &a, (char *) &b, sizeof a));}/* Return an integer in 1..12 of the month name S with length LEN. Return 0 if the name in S is not recognized. */static intgetmonth (const char *s, int len){ char *month; register int i, lo = 0, hi = MONTHS_PER_YEAR, result; while (len > 0 && blanks[UCHAR (*s)]) { ++s; --len; } if (len == 0) return 0; month = (char *) alloca (len + 1); for (i = 0; i < len; ++i) month[i] = fold_toupper[UCHAR (s[i])]; while (blanks[UCHAR (month[i - 1])]) --i; month[i] = '\0'; do { int ix = (lo + hi) / 2; if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) hi = ix; else lo = ix; } while (hi - lo > 1); result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) ? monthtab[lo].val : 0); return result;}/* Compare two lines A and B trying every key in sequence until there are no more keys or a difference is found. */static intkeycompare (const struct line *a, const struct line *b){ register char *texta, *textb, *lima, *limb; register unsigned char *translate; register int *ignore; struct keyfield *key; int diff = 0, iter = 0, lena, lenb; for (key = keyhead.next; key; key = key->next, ++iter) { int comparable_lengths = 1; ignore = key->ignore; translate = (unsigned char *) key->translate; /* Find the beginning and limit of each field. */ if (iter || a->keybeg == NULL || b->keybeg == NULL) { if (key->eword >= 0) lima = limfield (a, key), limb = limfield (b, key); else lima = a->text + a->length, limb = b->text + b->length; if (key->sword >= 0) texta = begfield (a, key), textb = begfield (b, key); else { texta = a->text, textb = b->text; if (key->skipsblanks) { while (texta < lima && blanks[UCHAR (*texta)]) ++texta; while (textb < limb && blanks[UCHAR (*textb)]) ++textb; } } } else { /* For the first iteration only, the key positions have been precomputed for us. */ texta = a->keybeg, lima = a->keylim; textb = b->keybeg, limb = b->keylim; } /* Find the lengths. */ lena = lima - texta, lenb = limb - textb; if (lena < 0) lena = 0; if (lenb < 0) lenb = 0; if (key->skipeblanks) { char *a_end = texta + lena; char *b_end = textb + lenb; trim_trailing_blanks (texta, &a_end); trim_trailing_blanks (textb, &b_end); lena = a_end - texta; lenb = b_end - textb; } /* Actually compare the fields. */ if (key->numeric | key->general_numeric) { char savea, saveb; /* If the fields are adjacent, adjust the end of the earlier field back by 1 byte, since we temporarily modify the byte after the field during comparison. This can't change a numeric comparison, since the byte is a newline. If the earlier field is empty, adjust its start as well. */ if (lima == textb) texta -= texta == lima--; if (limb == texta) textb -= textb == limb--; savea = *lima, saveb = *limb; *lima = *limb = '\0'; diff = ((key->numeric ? numcompare : general_numcompare) (texta, textb)); *lima = savea, *limb = saveb; if (diff) return key->reverse ? -diff : diff; continue; } else if (key->month) { diff = getmonth (texta, lena) - getmonth (textb, lenb); if (diff) return key->reverse ? -diff : diff; continue; }#ifdef ENABLE_NLS /* Sorting like this may become slow, so in a simple locale the user can select a faster sort that is similar to ascii sort */ else if (hard_LC_COLLATE | hard_LC_CTYPE) { if (ignore || translate) { char *copy_a = (char *) alloca (lena + 1); char *copy_b = (char *) alloca (lenb + 1); int new_len_a, new_len_b, i;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -