📄 tr.c
字号:
case 'b':
c = '\b';
break;
case 'f':
c = '\f';
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case 'v':
c = '\v';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
c = s[i + 1] - '0';
oct_digit = s[i + 2] - '0';
if (0 <= oct_digit && oct_digit <= 7)
{
c = 8 * c + oct_digit;
++i;
oct_digit = s[i + 2] - '0';
if (0 <= oct_digit && oct_digit <= 7)
{
if (8 * c + oct_digit < N_CHARS)
{
c = 8 * c + oct_digit;
++i;
}
else if (!posix_pedantic)
{
/* A 3-digit octal number larger than \377 won't
fit in 8 bits. So we stop when adding the
next digit would put us over the limit and
give a warning about the ambiguity. POSIX
isn't clear on this, but one person has said
that in his interpretation, POSIX says tr
can't even give a warning. */
error (0, 0, _("warning: the ambiguous octal escape \
\\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, `%c'"),
s[i], s[i + 1], s[i + 2],
s[i], s[i + 1], s[i + 2]);
}
}
}
break;
case '\0':
error (0, 0, _("invalid backslash escape at end of string"));
return 1;
default:
if (posix_pedantic)
{
error (0, 0, _("invalid backslash escape `\\%c'"), s[i + 1]);
return 1;
}
else
{
c = s[i + 1];
es->escaped[j] = 1;
}
}
++i;
es->s[j++] = c;
break;
default:
es->s[j++] = s[i];
break;
}
}
es->len = j;
return 0;
}
/* If CLASS_STR is a valid character class string, return its index
in the global char_class_name array. Otherwise, return CC_NO_CLASS. */
static enum Char_class
look_up_char_class (const unsigned char *class_str, size_t len)
{
unsigned int i;
for (i = 0; i < N_CHAR_CLASSES; i++)
if (strncmp ((const char *) class_str, char_class_name[i], len) == 0
&& strlen (char_class_name[i]) == len)
return (enum Char_class) i;
return CC_NO_CLASS;
}
/* Return a newly allocated string with a printable version of C.
This function is used solely for formatting error messages. */
static char *
make_printable_char (unsigned int c)
{
char *buf = xmalloc (5);
assert (c < N_CHARS);
if (ISPRINT (c))
{
buf[0] = c;
buf[1] = '\0';
}
else
{
sprintf (buf, "\\%03o", c);
}
return buf;
}
/* Return a newly allocated copy of S which is suitable for printing.
LEN is the number of characters in S. Most non-printing
(isprint) characters are represented by a backslash followed by
3 octal digits. However, the characters represented by \c escapes
where c is one of [abfnrtv] are represented by their 2-character \c
sequences. This function is used solely for printing error messages. */
static char *
make_printable_str (const unsigned char *s, size_t len)
{
/* Worst case is that every character expands to a backslash
followed by a 3-character octal escape sequence. */
char *printable_buf = xmalloc (4 * len + 1);
char *p = printable_buf;
size_t i;
for (i = 0; i < len; i++)
{
char buf[5];
char *tmp = NULL;
switch (s[i])
{
case '\\':
tmp = "\\";
break;
case '\007':
tmp = "\\a";
break;
case '\b':
tmp = "\\b";
break;
case '\f':
tmp = "\\f";
break;
case '\n':
tmp = "\\n";
break;
case '\r':
tmp = "\\r";
break;
case '\t':
tmp = "\\t";
break;
case '\v':
tmp = "\\v";
break;
default:
if (ISPRINT (s[i]))
{
buf[0] = s[i];
buf[1] = '\0';
}
else
sprintf (buf, "\\%03o", s[i]);
tmp = buf;
break;
}
p = stpcpy (p, tmp);
}
return printable_buf;
}
/* Append a newly allocated structure representing a
character C to the specification list LIST. */
static void
append_normal_char (struct Spec_list *list, unsigned int c)
{
struct List_element *new;
new = (struct List_element *) xmalloc (sizeof (struct List_element));
new->next = NULL;
new->type = RE_NORMAL_CHAR;
new->u.normal_char = c;
assert (list->tail);
list->tail->next = new;
list->tail = new;
}
/* Append a newly allocated structure representing the range
of characters from FIRST to LAST to the specification list LIST.
Return nonzero if LAST precedes FIRST in the collating sequence,
zero otherwise. This means that '[c-c]' is acceptable. */
static int
append_range (struct Spec_list *list, unsigned int first, unsigned int last)
{
struct List_element *new;
if (ORD (first) > ORD (last))
{
char *tmp1 = make_printable_char (first);
char *tmp2 = make_printable_char (last);
error (0, 0,
_("range-endpoints of `%s-%s' are in reverse collating sequence order"),
tmp1, tmp2);
free (tmp1);
free (tmp2);
return 1;
}
new = (struct List_element *) xmalloc (sizeof (struct List_element));
new->next = NULL;
new->type = RE_RANGE;
new->u.range.first_char = first;
new->u.range.last_char = last;
assert (list->tail);
list->tail->next = new;
list->tail = new;
return 0;
}
/* If CHAR_CLASS_STR is a valid character class string, append a
newly allocated structure representing that character class to the end
of the specification list LIST and return 0. If CHAR_CLASS_STR is not
a valid string return nonzero. */
static int
append_char_class (struct Spec_list *list,
const unsigned char *char_class_str, size_t len)
{
enum Char_class char_class;
struct List_element *new;
char_class = look_up_char_class (char_class_str, len);
if (char_class == CC_NO_CLASS)
return 1;
new = (struct List_element *) xmalloc (sizeof (struct List_element));
new->next = NULL;
new->type = RE_CHAR_CLASS;
new->u.char_class = char_class;
assert (list->tail);
list->tail->next = new;
list->tail = new;
return 0;
}
/* Append a newly allocated structure representing a [c*n]
repeated character construct to the specification list LIST.
THE_CHAR is the single character to be repeated, and REPEAT_COUNT
is a non-negative repeat count. */
static void
append_repeated_char (struct Spec_list *list, unsigned int the_char,
size_t repeat_count)
{
struct List_element *new;
new = (struct List_element *) xmalloc (sizeof (struct List_element));
new->next = NULL;
new->type = RE_REPEATED_CHAR;
new->u.repeated_char.the_repeated_char = the_char;
new->u.repeated_char.repeat_count = repeat_count;
assert (list->tail);
list->tail->next = new;
list->tail = new;
}
/* Given a string, EQUIV_CLASS_STR, from a [=str=] context and
the length of that string, LEN, if LEN is exactly one, append
a newly allocated structure representing the specified
equivalence class to the specification list, LIST and return zero.
If LEN is not 1, return nonzero. */
static int
append_equiv_class (struct Spec_list *list,
const unsigned char *equiv_class_str, size_t len)
{
struct List_element *new;
if (len != 1)
return 1;
new = (struct List_element *) xmalloc (sizeof (struct List_element));
new->next = NULL;
new->type = RE_EQUIV_CLASS;
new->u.equiv_code = *equiv_class_str;
assert (list->tail);
list->tail->next = new;
list->tail = new;
return 0;
}
/* Return a newly allocated copy of the LEN-byte prefix of P.
The returned string may contain NUL bytes and is *not* NUL-terminated. */
static unsigned char *
xmemdup (const unsigned char *p, size_t len)
{
unsigned char *tmp = (unsigned char *) xmalloc (len);
/* Use memcpy rather than strncpy because `p' may contain zero-bytes. */
memcpy (tmp, p, len);
return tmp;
}
/* Search forward starting at START_IDX for the 2-char sequence
(PRE_BRACKET_CHAR,']') in the string P of length P_LEN. If such
a sequence is found, set *RESULT_IDX to the index of the first
character and return nonzero. Otherwise return zero. P may contain
zero bytes. */
static int
find_closing_delim (const struct E_string *es, size_t start_idx,
unsigned int pre_bracket_char, size_t *result_idx)
{
size_t i;
for (i = start_idx; i < es->len - 1; i++)
if (es->s[i] == pre_bracket_char && es->s[i + 1] == ']'
&& !es->escaped[i] && !es->escaped[i + 1])
{
*result_idx = i;
return 1;
}
return 0;
}
/* Parse the bracketed repeat-char syntax. If the P_LEN characters
beginning with P[ START_IDX ] comprise a valid [c*n] construct,
then set *CHAR_TO_REPEAT, *REPEAT_COUNT, and *CLOSING_BRACKET_IDX
and return zero. If the second character following
the opening bracket is not `*' or if no closing bracket can be
found, return -1. If a closing bracket is found and the
second char is `*', but the string between the `*' and `]' isn't
empty, an octal number, or a decimal number, print an error message
and return -2. */
static int
find_bracketed_repeat (const struct E_string *es, size_t start_idx,
unsigned int *char_to_repeat, size_t *repeat_count,
size_t *closing_bracket_idx)
{
size_t i;
assert (start_idx + 1 < es->len);
if (!ES_MATCH (es, start_idx + 1, '*'))
return -1;
for (i = start_idx + 2; i < es->len; i++)
{
if (ES_MATCH (es, i, ']'))
{
size_t digit_str_len = i - start_idx - 2;
*char_to_repeat = es->s[start_idx];
if (digit_str_len == 0)
{
/* We've matched [c*] -- no explicit repeat count. */
*repeat_count = 0;
*closing_bracket_idx = i;
return 0;
}
/* Here, we have found [c*s] where s should be a string
of octal (if it starts with `0') or decimal digits. */
{
const char *digit_str = (const char *) &es->s[start_idx + 2];
unsigned long int tmp_ulong;
char *d_end;
int base = 10;
/* Select the base manually so we can be sure it's either 8 or 10.
If the spec allowed it to be interpreted as hexadecimal, we
could have used `0' and let xstrtoul decide. */
if (*digit_str == '0')
{
base = 8;
++digit_str;
--digit_str_len;
}
if (xstrtoul (digit_str, &d_end, base, &tmp_ulong, NULL)
!= LONGINT_OK
|| BEGIN_STATE < tmp_ulong
|| digit_str + digit_str_len != d_end)
{
char *tmp = make_printable_str (es->s + start_idx + 2,
i - start_idx - 2);
error (0, 0, _("invalid repeat count `%s' in [c*n] construct"),
tmp);
free (tmp);
return -2;
}
*repeat_count = tmp_ulong;
}
*closing_bracket_idx = i;
return 0;
}
}
return -1; /* No bracket found. */
}
/* Return nonzero if the string at ES->s[IDX] matches the regular
expression `\*[0-9]*\]', zero otherwise. To match, the `*' and
the `]' must not be escaped. */
static int
star_digits_closebracket (const struct E_string *es, size_t idx)
{
size_t i;
if (!ES_MATCH (es, idx, '*'))
return 0;
for (i = idx + 1; i < es->len; i++)
{
if (!ISDIGIT (es->s[i]))
{
if (ES_MATCH (es, i, ']'))
return 1;
return 0;
}
}
return 0;
}
/* Convert string UNESCAPED_STRING (which has been preprocessed to
convert backslash-escape sequences) of length LEN characters into
a linked list of the following 5 types of constructs:
- [:str:] Character class where `str' is one of the 12 valid strings.
- [=c=] Equivalence class where `c' is any single character.
- [c*n] Repeat the single character `c' `n' times. n may be omitted.
However, if `n' is present, it must be a non-negative octal or
decimal integer.
- r-s Range of characters from `r' to `s'. The second endpoint must
not precede the first in the current collating sequence.
- c Any other character is interpreted as itself. */
static int
build_spec_list (const struct E_string *es, struct Spec_list *result)
{
const unsigned char *p;
size_t i;
p = es->s;
/* The main for-loop below recognizes the 4 multi-character constructs.
A character that matches (in its context) none of the multi-character
constructs is classified as `normal'. Since all multi-character
constructs have at least 3 characters, any strings of length 2 or
less are composed solely of normal characters. Hence, the index of
the outer for-loop runs only as far as LEN-2. */
for (i = 0; i + 2 < es->len; /* empty */)
{
if (ES_MATCH (es, i, '['))
{
int matched_multi_char_construct;
size_t closing_bracket_idx;
unsigned int char_to_repeat;
size_t repeat_count;
int err;
matched_multi_char_construct = 1;
if (ES_MATCH (es, i + 1, ':')
|| ES_MATCH (es, i + 1, '='))
{
size_t closing_delim_idx;
int found;
found = find_closing_delim (es, i + 2, p[i + 1],
&closing_delim_idx);
if (found)
{
int parse_failed;
size_t opnd_str_len = closing_delim_idx - 1 - (i + 2) + 1;
unsigned char *opnd_str;
if (opnd_str_len == 0)
{
if (p[i + 1] == ':')
error (0, 0, _("missing character class name `[::]'"));
else
error (0, 0,
_("missing equivalence class character `[==]'"));
return 1;
}
opnd_str = xmemdup (p + i + 2, opnd_str_len);
if (p[i + 1] == ':')
{
parse_failed = append_char_class (result, opnd_str,
opnd_str_len);
/* FIXME: big comment. */
if (parse_failed)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -