📄 tr.c

📁 linux下一些命令的c语言的实现
💻 C
📖 第 1 页 / 共 4 页
字号:
			  if (star_digits_closebracket (es, i + 2))
			    {
			      free (opnd_str);
			      goto try_bracketed_repeat;
			    }
			  else
			    {
			      char *tmp = make_printable_str (opnd_str,
							      opnd_str_len);
			      error (0, 0, _("invalid character class `%s'"),
				     tmp);
			      free (tmp);
			      return 1;
			    }
			}
		    }
		  else
		    {
		      parse_failed = append_equiv_class (result, opnd_str,
							 opnd_str_len);

		      /* FIXME: big comment.  */
		      if (parse_failed)
			{
			  if (star_digits_closebracket (es, i + 2))
			    {
			      free (opnd_str);
			      goto try_bracketed_repeat;
			    }
			  else
			    {
			      char *tmp = make_printable_str (opnd_str,
							      opnd_str_len);
			      error (0, 0,
	       _("%s: equivalence class operand must be a single character"),
				     tmp);
			      free (tmp);
			      return 1;
			    }
			}
		    }
		  free (opnd_str);

		  /* Return nonzero if append_*_class reports a problem.  */
		  if (parse_failed)
		    return 1;
		  else
		    i = closing_delim_idx + 2;
		  continue;
		}
	      /* Else fall through.  This could be [:*] or [=*].  */
	    }

	try_bracketed_repeat:

	  /* Determine whether this is a bracketed repeat range
	     matching the RE \[.\*(dec_or_oct_number)?\].  */
	  err = find_bracketed_repeat (es, i + 1, &char_to_repeat,
				       &repeat_count,
				       &closing_bracket_idx);
	  if (err == 0)
	    {
	      append_repeated_char (result, char_to_repeat, repeat_count);
	      i = closing_bracket_idx + 1;
	    }
	  else if (err == -1)
	    {
	      matched_multi_char_construct = 0;
	    }
	  else
	    {
	      /* Found a string that looked like [c*n] but the
		 numeric part was invalid.  */
	      return 1;
	    }

	  if (matched_multi_char_construct)
	    continue;

	  /* We reach this point if P does not match [:str:], [=c=],
	     [c*n], or [c*].  Now, see if P looks like a range `[-c'
	     (from `[' to `c').  */
	}

      /* Look ahead one char for ranges like a-z.  */
      if (ES_MATCH (es, i + 1, '-'))
	{
	  if (append_range (result, p[i], p[i + 2]))
	    return 1;
	  i += 3;
	}
      else
	{
	  append_normal_char (result, p[i]);
	  ++i;
	}
    }

  /* Now handle the (2 or fewer) remaining characters p[i]..p[es->len - 1].  */
  for (; i < es->len; i++)
    append_normal_char (result, p[i]);

  return 0;
}

/* Given a Spec_list S (with its saved state implicit in the values
   of its members `tail' and `state'), return the next single character
   in the expansion of S's constructs.  If the last character of S was
   returned on the previous call or if S was empty, this function
   returns -1.  For example, successive calls to get_next where S
   represents the spec-string 'a-d[y*3]' will return the sequence
   of values a, b, c, d, y, y, y, -1.  Finally, if the construct from
   which the returned character comes is [:upper:] or [:lower:], the
   parameter CLASS is given a value to indicate which it was.  Otherwise
   CLASS is set to UL_NONE.  This value is used only when constructing
   the translation table to verify that any occurrences of upper and
   lower class constructs in the spec-strings appear in the same relative
   positions.  */

static int
get_next (struct Spec_list *s, enum Upper_Lower_class *class)
{
  struct List_element *p;
  int return_val;
  int i;

  if (class)
    *class = UL_NONE;

  if (s->state == BEGIN_STATE)
    {
      s->tail = s->head->next;
      s->state = NEW_ELEMENT;
    }

  p = s->tail;
  if (p == NULL)
    return -1;

  switch (p->type)
    {
    case RE_NORMAL_CHAR:
      return_val = p->u.normal_char;
      s->state = NEW_ELEMENT;
      s->tail = p->next;
      break;

    case RE_RANGE:
      if (s->state == NEW_ELEMENT)
	s->state = ORD (p->u.range.first_char);
      else
	++(s->state);
      return_val = CHR (s->state);
      if (s->state == ORD (p->u.range.last_char))
	{
	  s->tail = p->next;
	  s->state = NEW_ELEMENT;
	}
      break;

    case RE_CHAR_CLASS:
      if (class)
	{
	  int upper_or_lower;
	  switch (p->u.char_class)
	    {
	    case CC_LOWER:
	      *class = UL_LOWER;
	      upper_or_lower = 1;
	      break;
	    case CC_UPPER:
	      *class = UL_UPPER;
	      upper_or_lower = 1;
	      break;
	    default:
	      upper_or_lower = 0;
	      break;
	    }

	  if (upper_or_lower)
	    {
	      s->tail = p->next;
	      s->state = NEW_ELEMENT;
	      return_val = 0;
	      break;
	    }
	}

      if (s->state == NEW_ELEMENT)
	{
	  for (i = 0; i < N_CHARS; i++)
	    if (is_char_class_member (p->u.char_class, i))
	      break;
	  assert (i < N_CHARS);
	  s->state = i;
	}
      assert (is_char_class_member (p->u.char_class, s->state));
      return_val = CHR (s->state);
      for (i = s->state + 1; i < N_CHARS; i++)
	if (is_char_class_member (p->u.char_class, i))
	  break;
      if (i < N_CHARS)
	s->state = i;
      else
	{
	  s->tail = p->next;
	  s->state = NEW_ELEMENT;
	}
      break;

    case RE_EQUIV_CLASS:
      /* FIXME: this assumes that each character is alone in its own
         equivalence class (which appears to be correct for my
         LC_COLLATE.  But I don't know of any function that allows
         one to determine a character's equivalence class.  */

      return_val = p->u.equiv_code;
      s->state = NEW_ELEMENT;
      s->tail = p->next;
      break;

    case RE_REPEATED_CHAR:
      /* Here, a repeat count of n == 0 means don't repeat at all.  */
      if (p->u.repeated_char.repeat_count == 0)
	{
	  s->tail = p->next;
	  s->state = NEW_ELEMENT;
	  return_val = get_next (s, class);
	}
      else
	{
	  if (s->state == NEW_ELEMENT)
	    {
	      s->state = 0;
	    }
	  ++(s->state);
	  return_val = p->u.repeated_char.the_repeated_char;
	  if (p->u.repeated_char.repeat_count > 0
	      && s->state == p->u.repeated_char.repeat_count)
	    {
	      s->tail = p->next;
	      s->state = NEW_ELEMENT;
	    }
	}
      break;

    case RE_NO_TYPE:
      abort ();
      break;

    default:
      abort ();
      break;
    }

  return return_val;
}

/* This is a minor kludge.  This function is called from
   get_spec_stats to determine the cardinality of a set derived
   from a complemented string.  It's a kludge in that some of the
   same operations are (duplicated) performed in set_initialize.  */

static int
card_of_complement (struct Spec_list *s)
{
  int c;
  int cardinality = N_CHARS;
  SET_TYPE in_set[N_CHARS];

  memset (in_set, 0, N_CHARS * sizeof (in_set[0]));
  s->state = BEGIN_STATE;
  while ((c = get_next (s, NULL)) != -1)
    if (!in_set[c]++)
      --cardinality;
  return cardinality;
}

/* Gather statistics about the spec-list S in preparation for the tests
   in validate that determine the consistency of the specs.  This function
   is called at most twice; once for string1, and again for any string2.
   LEN_S1 < 0 indicates that this is the first call and that S represents
   string1.  When LEN_S1 >= 0, it is the length of the expansion of the
   constructs in string1, and we can use its value to resolve any
   indefinite repeat construct in S (which represents string2).  Hence,
   this function has the side-effect that it converts a valid [c*]
   construct in string2 to [c*n] where n is large enough (or 0) to give
   string2 the same length as string1.  For example, with the command
   tr a-z 'A[\n*]Z' on the second call to get_spec_stats, LEN_S1 would
   be 26 and S (representing string2) would be converted to 'A[\n*24]Z'.  */

static void
get_spec_stats (struct Spec_list *s)
{
  struct List_element *p;
  int len = 0;

  s->n_indefinite_repeats = 0;
  s->has_equiv_class = 0;
  s->has_restricted_char_class = 0;
  s->has_char_class = 0;
  for (p = s->head->next; p; p = p->next)
    {
      switch (p->type)
	{
	  int i;
	case RE_NORMAL_CHAR:
	  ++len;
	  break;

	case RE_RANGE:
	  assert (p->u.range.last_char >= p->u.range.first_char);
	  len += p->u.range.last_char - p->u.range.first_char + 1;
	  break;

	case RE_CHAR_CLASS:
	  s->has_char_class = 1;
	  for (i = 0; i < N_CHARS; i++)
	    if (is_char_class_member (p->u.char_class, i))
	      ++len;
	  switch (p->u.char_class)
	    {
	    case CC_UPPER:
	    case CC_LOWER:
	      break;
	    default:
	      s->has_restricted_char_class = 1;
	      break;
	    }
	  break;

	case RE_EQUIV_CLASS:
	  for (i = 0; i < N_CHARS; i++)
	    if (is_equiv_class_member (p->u.equiv_code, i))
	      ++len;
	  s->has_equiv_class = 1;
	  break;

	case RE_REPEATED_CHAR:
	  if (p->u.repeated_char.repeat_count > 0)
	    len += p->u.repeated_char.repeat_count;
	  else if (p->u.repeated_char.repeat_count == 0)
	    {
	      s->indefinite_repeat_element = p;
	      ++(s->n_indefinite_repeats);
	    }
	  break;

	case RE_NO_TYPE:
	  assert (0);
	  break;
	}
    }

  s->length = len;
}

static void
get_s1_spec_stats (struct Spec_list *s1)
{
  get_spec_stats (s1);
  if (complement)
    s1->length = card_of_complement (s1);
}

static void
get_s2_spec_stats (struct Spec_list *s2, size_t len_s1)
{
  get_spec_stats (s2);
  if (len_s1 >= s2->length && s2->n_indefinite_repeats == 1)
    {
      s2->indefinite_repeat_element->u.repeated_char.repeat_count =
	len_s1 - s2->length;
      s2->length = len_s1;
    }
}

static void
spec_init (struct Spec_list *spec_list)
{
  spec_list->head = spec_list->tail =
    (struct List_element *) xmalloc (sizeof (struct List_element));
  spec_list->head->next = NULL;
}

/* This function makes two passes over the argument string S.  The first
   one converts all \c and \ddd escapes to their one-byte representations.
   The second constructs a linked specification list, SPEC_LIST, of the
   characters and constructs that comprise the argument string.  If either
   of these passes detects an error, this function returns nonzero.  */

static int
parse_str (const unsigned char *s, struct Spec_list *spec_list)
{
  struct E_string es;
  int fail;

  fail = unquote (s, &es);
  if (!fail)
    fail = build_spec_list (&es, spec_list);
  es_free (&es);
  return fail;
}

/* Given two specification lists, S1 and S2, and assuming that
   S1->length > S2->length, append a single [c*n] element to S2 where c
   is the last character in the expansion of S2 and n is the difference
   between the two lengths.
   Upon successful completion, S2->length is set to S1->length.  The only
   way this function can fail to make S2 as long as S1 is when S2 has
   zero-length, since in that case, there is no last character to repeat.
   So S2->length is required to be at least 1.

   Providing this functionality allows the user to do some pretty
   non-BSD (and non-portable) things:  For example, the command
       tr -cs '[:upper:]0-9' '[:lower:]'
   is almost guaranteed to give results that depend on your collating
   sequence.  */

static void
string2_extend (const struct Spec_list *s1, struct Spec_list *s2)
{
  struct List_element *p;
  int char_to_repeat;
  int i;

  assert (translating);
  assert (s1->length > s2->length);
  assert (s2->length > 0);

  p = s2->tail;
  switch (p->type)
    {
    case RE_NORMAL_CHAR:
      char_to_repeat = p->u.normal_char;
      break;
    case RE_RANGE:
      char_to_repeat = p->u.range.last_char;
      break;
    case RE_CHAR_CLASS:
      for (i = N_CHARS; i >= 0; i--)
	if (is_char_class_member (p->u.char_class, i))
	  break;
      assert (i >= 0);
      char_to_repeat = CHR (i);
      break;

    case RE_REPEATED_CHAR:
      char_to_repeat = p->u.repeated_char.the_repeated_char;
      break;

    case RE_EQUIV_CLASS:
      /* This shouldn't happen, because validate exits with an error
         if it finds an equiv class in string2 when translating.  */
      abort ();
      break;

    case RE_NO_TYPE:
      abort ();
      break;

    default:
      abort ();
      break;
    }

  append_repeated_char (s2, char_to_repeat, s1->length - s2->length);
  s2->length = s1->length;
}

/* Return non-zero if S is a non-empty list in which exactly one
   character (but potentially, many instances of it) appears.
   E.g.  [X*] or xxxxxxxx.  */

static int
homogeneous_spec_list (struct Spec_list *s)
{
  int b, c;

  s->state = BEGIN_STATE;

  if ((b = get_next (s, NULL)) == -1)
    return 0;

  while ((c = get_next (s, NULL)) != -1)
    if (c != b)
      return 0;

  return 1;
}

/* Die with an error message if S1 and S2 describe strings that
   are not valid with the given command line switches.
   A side effect of this function is that if a valid [c*] or
   [c*0] construct appears in string2, it is converted to [c*n]
   with a value for n that makes s2->length == s1->length.  By
   the same token, if the --truncate-set1 option is not
   given, S2 may be extended.  */

static void
validate (struct Spec_list *s1, struct Spec_list *s2)
{
  get_s1_spec_stats (s1);
  if (s1->n_indefinite_repeats > 0)
    {
      error (EXIT_FAILURE, 0,
	     _("the [c*] repeat construct may not appear in string1"));
    }
💿 文件大小 295 K
👤 上传用户 norwaybaby
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux #c语言 #命令
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -