📄 tr.c
字号:
if (star_digits_closebracket (es, i + 2))
{
free (opnd_str);
goto try_bracketed_repeat;
}
else
{
char *tmp = make_printable_str (opnd_str,
opnd_str_len);
error (0, 0, _("invalid character class `%s'"),
tmp);
free (tmp);
return 1;
}
}
}
else
{
parse_failed = append_equiv_class (result, opnd_str,
opnd_str_len);
/* FIXME: big comment. */
if (parse_failed)
{
if (star_digits_closebracket (es, i + 2))
{
free (opnd_str);
goto try_bracketed_repeat;
}
else
{
char *tmp = make_printable_str (opnd_str,
opnd_str_len);
error (0, 0,
_("%s: equivalence class operand must be a single character"),
tmp);
free (tmp);
return 1;
}
}
}
free (opnd_str);
/* Return nonzero if append_*_class reports a problem. */
if (parse_failed)
return 1;
else
i = closing_delim_idx + 2;
continue;
}
/* Else fall through. This could be [:*] or [=*]. */
}
try_bracketed_repeat:
/* Determine whether this is a bracketed repeat range
matching the RE \[.\*(dec_or_oct_number)?\]. */
err = find_bracketed_repeat (es, i + 1, &char_to_repeat,
&repeat_count,
&closing_bracket_idx);
if (err == 0)
{
append_repeated_char (result, char_to_repeat, repeat_count);
i = closing_bracket_idx + 1;
}
else if (err == -1)
{
matched_multi_char_construct = 0;
}
else
{
/* Found a string that looked like [c*n] but the
numeric part was invalid. */
return 1;
}
if (matched_multi_char_construct)
continue;
/* We reach this point if P does not match [:str:], [=c=],
[c*n], or [c*]. Now, see if P looks like a range `[-c'
(from `[' to `c'). */
}
/* Look ahead one char for ranges like a-z. */
if (ES_MATCH (es, i + 1, '-'))
{
if (append_range (result, p[i], p[i + 2]))
return 1;
i += 3;
}
else
{
append_normal_char (result, p[i]);
++i;
}
}
/* Now handle the (2 or fewer) remaining characters p[i]..p[es->len - 1]. */
for (; i < es->len; i++)
append_normal_char (result, p[i]);
return 0;
}
/* Given a Spec_list S (with its saved state implicit in the values
of its members `tail' and `state'), return the next single character
in the expansion of S's constructs. If the last character of S was
returned on the previous call or if S was empty, this function
returns -1. For example, successive calls to get_next where S
represents the spec-string 'a-d[y*3]' will return the sequence
of values a, b, c, d, y, y, y, -1. Finally, if the construct from
which the returned character comes is [:upper:] or [:lower:], the
parameter CLASS is given a value to indicate which it was. Otherwise
CLASS is set to UL_NONE. This value is used only when constructing
the translation table to verify that any occurrences of upper and
lower class constructs in the spec-strings appear in the same relative
positions. */
static int
get_next (struct Spec_list *s, enum Upper_Lower_class *class)
{
struct List_element *p;
int return_val;
int i;
if (class)
*class = UL_NONE;
if (s->state == BEGIN_STATE)
{
s->tail = s->head->next;
s->state = NEW_ELEMENT;
}
p = s->tail;
if (p == NULL)
return -1;
switch (p->type)
{
case RE_NORMAL_CHAR:
return_val = p->u.normal_char;
s->state = NEW_ELEMENT;
s->tail = p->next;
break;
case RE_RANGE:
if (s->state == NEW_ELEMENT)
s->state = ORD (p->u.range.first_char);
else
++(s->state);
return_val = CHR (s->state);
if (s->state == ORD (p->u.range.last_char))
{
s->tail = p->next;
s->state = NEW_ELEMENT;
}
break;
case RE_CHAR_CLASS:
if (class)
{
int upper_or_lower;
switch (p->u.char_class)
{
case CC_LOWER:
*class = UL_LOWER;
upper_or_lower = 1;
break;
case CC_UPPER:
*class = UL_UPPER;
upper_or_lower = 1;
break;
default:
upper_or_lower = 0;
break;
}
if (upper_or_lower)
{
s->tail = p->next;
s->state = NEW_ELEMENT;
return_val = 0;
break;
}
}
if (s->state == NEW_ELEMENT)
{
for (i = 0; i < N_CHARS; i++)
if (is_char_class_member (p->u.char_class, i))
break;
assert (i < N_CHARS);
s->state = i;
}
assert (is_char_class_member (p->u.char_class, s->state));
return_val = CHR (s->state);
for (i = s->state + 1; i < N_CHARS; i++)
if (is_char_class_member (p->u.char_class, i))
break;
if (i < N_CHARS)
s->state = i;
else
{
s->tail = p->next;
s->state = NEW_ELEMENT;
}
break;
case RE_EQUIV_CLASS:
/* FIXME: this assumes that each character is alone in its own
equivalence class (which appears to be correct for my
LC_COLLATE. But I don't know of any function that allows
one to determine a character's equivalence class. */
return_val = p->u.equiv_code;
s->state = NEW_ELEMENT;
s->tail = p->next;
break;
case RE_REPEATED_CHAR:
/* Here, a repeat count of n == 0 means don't repeat at all. */
if (p->u.repeated_char.repeat_count == 0)
{
s->tail = p->next;
s->state = NEW_ELEMENT;
return_val = get_next (s, class);
}
else
{
if (s->state == NEW_ELEMENT)
{
s->state = 0;
}
++(s->state);
return_val = p->u.repeated_char.the_repeated_char;
if (p->u.repeated_char.repeat_count > 0
&& s->state == p->u.repeated_char.repeat_count)
{
s->tail = p->next;
s->state = NEW_ELEMENT;
}
}
break;
case RE_NO_TYPE:
abort ();
break;
default:
abort ();
break;
}
return return_val;
}
/* This is a minor kludge. This function is called from
get_spec_stats to determine the cardinality of a set derived
from a complemented string. It's a kludge in that some of the
same operations are (duplicated) performed in set_initialize. */
static int
card_of_complement (struct Spec_list *s)
{
int c;
int cardinality = N_CHARS;
SET_TYPE in_set[N_CHARS];
memset (in_set, 0, N_CHARS * sizeof (in_set[0]));
s->state = BEGIN_STATE;
while ((c = get_next (s, NULL)) != -1)
if (!in_set[c]++)
--cardinality;
return cardinality;
}
/* Gather statistics about the spec-list S in preparation for the tests
in validate that determine the consistency of the specs. This function
is called at most twice; once for string1, and again for any string2.
LEN_S1 < 0 indicates that this is the first call and that S represents
string1. When LEN_S1 >= 0, it is the length of the expansion of the
constructs in string1, and we can use its value to resolve any
indefinite repeat construct in S (which represents string2). Hence,
this function has the side-effect that it converts a valid [c*]
construct in string2 to [c*n] where n is large enough (or 0) to give
string2 the same length as string1. For example, with the command
tr a-z 'A[\n*]Z' on the second call to get_spec_stats, LEN_S1 would
be 26 and S (representing string2) would be converted to 'A[\n*24]Z'. */
static void
get_spec_stats (struct Spec_list *s)
{
struct List_element *p;
int len = 0;
s->n_indefinite_repeats = 0;
s->has_equiv_class = 0;
s->has_restricted_char_class = 0;
s->has_char_class = 0;
for (p = s->head->next; p; p = p->next)
{
switch (p->type)
{
int i;
case RE_NORMAL_CHAR:
++len;
break;
case RE_RANGE:
assert (p->u.range.last_char >= p->u.range.first_char);
len += p->u.range.last_char - p->u.range.first_char + 1;
break;
case RE_CHAR_CLASS:
s->has_char_class = 1;
for (i = 0; i < N_CHARS; i++)
if (is_char_class_member (p->u.char_class, i))
++len;
switch (p->u.char_class)
{
case CC_UPPER:
case CC_LOWER:
break;
default:
s->has_restricted_char_class = 1;
break;
}
break;
case RE_EQUIV_CLASS:
for (i = 0; i < N_CHARS; i++)
if (is_equiv_class_member (p->u.equiv_code, i))
++len;
s->has_equiv_class = 1;
break;
case RE_REPEATED_CHAR:
if (p->u.repeated_char.repeat_count > 0)
len += p->u.repeated_char.repeat_count;
else if (p->u.repeated_char.repeat_count == 0)
{
s->indefinite_repeat_element = p;
++(s->n_indefinite_repeats);
}
break;
case RE_NO_TYPE:
assert (0);
break;
}
}
s->length = len;
}
static void
get_s1_spec_stats (struct Spec_list *s1)
{
get_spec_stats (s1);
if (complement)
s1->length = card_of_complement (s1);
}
static void
get_s2_spec_stats (struct Spec_list *s2, size_t len_s1)
{
get_spec_stats (s2);
if (len_s1 >= s2->length && s2->n_indefinite_repeats == 1)
{
s2->indefinite_repeat_element->u.repeated_char.repeat_count =
len_s1 - s2->length;
s2->length = len_s1;
}
}
static void
spec_init (struct Spec_list *spec_list)
{
spec_list->head = spec_list->tail =
(struct List_element *) xmalloc (sizeof (struct List_element));
spec_list->head->next = NULL;
}
/* This function makes two passes over the argument string S. The first
one converts all \c and \ddd escapes to their one-byte representations.
The second constructs a linked specification list, SPEC_LIST, of the
characters and constructs that comprise the argument string. If either
of these passes detects an error, this function returns nonzero. */
static int
parse_str (const unsigned char *s, struct Spec_list *spec_list)
{
struct E_string es;
int fail;
fail = unquote (s, &es);
if (!fail)
fail = build_spec_list (&es, spec_list);
es_free (&es);
return fail;
}
/* Given two specification lists, S1 and S2, and assuming that
S1->length > S2->length, append a single [c*n] element to S2 where c
is the last character in the expansion of S2 and n is the difference
between the two lengths.
Upon successful completion, S2->length is set to S1->length. The only
way this function can fail to make S2 as long as S1 is when S2 has
zero-length, since in that case, there is no last character to repeat.
So S2->length is required to be at least 1.
Providing this functionality allows the user to do some pretty
non-BSD (and non-portable) things: For example, the command
tr -cs '[:upper:]0-9' '[:lower:]'
is almost guaranteed to give results that depend on your collating
sequence. */
static void
string2_extend (const struct Spec_list *s1, struct Spec_list *s2)
{
struct List_element *p;
int char_to_repeat;
int i;
assert (translating);
assert (s1->length > s2->length);
assert (s2->length > 0);
p = s2->tail;
switch (p->type)
{
case RE_NORMAL_CHAR:
char_to_repeat = p->u.normal_char;
break;
case RE_RANGE:
char_to_repeat = p->u.range.last_char;
break;
case RE_CHAR_CLASS:
for (i = N_CHARS; i >= 0; i--)
if (is_char_class_member (p->u.char_class, i))
break;
assert (i >= 0);
char_to_repeat = CHR (i);
break;
case RE_REPEATED_CHAR:
char_to_repeat = p->u.repeated_char.the_repeated_char;
break;
case RE_EQUIV_CLASS:
/* This shouldn't happen, because validate exits with an error
if it finds an equiv class in string2 when translating. */
abort ();
break;
case RE_NO_TYPE:
abort ();
break;
default:
abort ();
break;
}
append_repeated_char (s2, char_to_repeat, s1->length - s2->length);
s2->length = s1->length;
}
/* Return non-zero if S is a non-empty list in which exactly one
character (but potentially, many instances of it) appears.
E.g. [X*] or xxxxxxxx. */
static int
homogeneous_spec_list (struct Spec_list *s)
{
int b, c;
s->state = BEGIN_STATE;
if ((b = get_next (s, NULL)) == -1)
return 0;
while ((c = get_next (s, NULL)) != -1)
if (c != b)
return 0;
return 1;
}
/* Die with an error message if S1 and S2 describe strings that
are not valid with the given command line switches.
A side effect of this function is that if a valid [c*] or
[c*0] construct appears in string2, it is converted to [c*n]
with a value for n that makes s2->length == s1->length. By
the same token, if the --truncate-set1 option is not
given, S2 may be extended. */
static void
validate (struct Spec_list *s1, struct Spec_list *s2)
{
get_s1_spec_stats (s1);
if (s1->n_indefinite_repeats > 0)
{
error (EXIT_FAILURE, 0,
_("the [c*] repeat construct may not appear in string1"));
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -