⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tr.c

📁 linux下一些命令的c语言的实现
💻 C
📖 第 1 页 / 共 4 页
字号:
	    case 'b':
	      c = '\b';
	      break;
	    case 'f':
	      c = '\f';
	      break;
	    case 'n':
	      c = '\n';
	      break;
	    case 'r':
	      c = '\r';
	      break;
	    case 't':
	      c = '\t';
	      break;
	    case 'v':
	      c = '\v';
	      break;
	    case '0':
	    case '1':
	    case '2':
	    case '3':
	    case '4':
	    case '5':
	    case '6':
	    case '7':
	      c = s[i + 1] - '0';
	      oct_digit = s[i + 2] - '0';
	      if (0 <= oct_digit && oct_digit <= 7)
		{
		  c = 8 * c + oct_digit;
		  ++i;
		  oct_digit = s[i + 2] - '0';
		  if (0 <= oct_digit && oct_digit <= 7)
		    {
		      if (8 * c + oct_digit < N_CHARS)
			{
			  c = 8 * c + oct_digit;
			  ++i;
			}
		      else if (!posix_pedantic)
			{
			  /* A 3-digit octal number larger than \377 won't
			     fit in 8 bits.  So we stop when adding the
			     next digit would put us over the limit and
			     give a warning about the ambiguity.  POSIX
			     isn't clear on this, but one person has said
			     that in his interpretation, POSIX says tr
			     can't even give a warning.  */
			  error (0, 0, _("warning: the ambiguous octal escape \
\\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, `%c'"),
				 s[i], s[i + 1], s[i + 2],
				 s[i], s[i + 1], s[i + 2]);
			}
		    }
		}
	      break;
	    case '\0':
	      error (0, 0, _("invalid backslash escape at end of string"));
	      return 1;

	    default:
	      if (posix_pedantic)
		{
		  error (0, 0, _("invalid backslash escape `\\%c'"), s[i + 1]);
		  return 1;
		}
	      else
	        {
		  c = s[i + 1];
		  es->escaped[j] = 1;
		}
	    }
	  ++i;
	  es->s[j++] = c;
	  break;
	default:
	  es->s[j++] = s[i];
	  break;
	}
    }
  es->len = j;
  return 0;
}

/* If CLASS_STR is a valid character class string, return its index
   in the global char_class_name array.  Otherwise, return CC_NO_CLASS.  */

static enum Char_class
look_up_char_class (const unsigned char *class_str, size_t len)
{
  unsigned int i;

  for (i = 0; i < N_CHAR_CLASSES; i++)
    if (strncmp ((const char *) class_str, char_class_name[i], len) == 0
	&& strlen (char_class_name[i]) == len)
      return (enum Char_class) i;
  return CC_NO_CLASS;
}

/* Return a newly allocated string with a printable version of C.
   This function is used solely for formatting error messages.  */

static char *
make_printable_char (unsigned int c)
{
  char *buf = xmalloc (5);

  assert (c < N_CHARS);
  if (ISPRINT (c))
    {
      buf[0] = c;
      buf[1] = '\0';
    }
  else
    {
      sprintf (buf, "\\%03o", c);
    }
  return buf;
}

/* Return a newly allocated copy of S which is suitable for printing.
   LEN is the number of characters in S.  Most non-printing
   (isprint) characters are represented by a backslash followed by
   3 octal digits.  However, the characters represented by \c escapes
   where c is one of [abfnrtv] are represented by their 2-character \c
   sequences.  This function is used solely for printing error messages.  */

static char *
make_printable_str (const unsigned char *s, size_t len)
{
  /* Worst case is that every character expands to a backslash
     followed by a 3-character octal escape sequence.  */
  char *printable_buf = xmalloc (4 * len + 1);
  char *p = printable_buf;
  size_t i;

  for (i = 0; i < len; i++)
    {
      char buf[5];
      char *tmp = NULL;

      switch (s[i])
	{
	case '\\':
	  tmp = "\\";
	  break;
	case '\007':
	  tmp = "\\a";
	  break;
	case '\b':
	  tmp = "\\b";
	  break;
	case '\f':
	  tmp = "\\f";
	  break;
	case '\n':
	  tmp = "\\n";
	  break;
	case '\r':
	  tmp = "\\r";
	  break;
	case '\t':
	  tmp = "\\t";
	  break;
	case '\v':
	  tmp = "\\v";
	  break;
	default:
	  if (ISPRINT (s[i]))
	    {
	      buf[0] = s[i];
	      buf[1] = '\0';
	    }
	  else
	    sprintf (buf, "\\%03o", s[i]);
	  tmp = buf;
	  break;
	}
      p = stpcpy (p, tmp);
    }
  return printable_buf;
}

/* Append a newly allocated structure representing a
   character C to the specification list LIST.  */

static void
append_normal_char (struct Spec_list *list, unsigned int c)
{
  struct List_element *new;

  new = (struct List_element *) xmalloc (sizeof (struct List_element));
  new->next = NULL;
  new->type = RE_NORMAL_CHAR;
  new->u.normal_char = c;
  assert (list->tail);
  list->tail->next = new;
  list->tail = new;
}

/* Append a newly allocated structure representing the range
   of characters from FIRST to LAST to the specification list LIST.
   Return nonzero if LAST precedes FIRST in the collating sequence,
   zero otherwise.  This means that '[c-c]' is acceptable.  */

static int
append_range (struct Spec_list *list, unsigned int first, unsigned int last)
{
  struct List_element *new;

  if (ORD (first) > ORD (last))
    {
      char *tmp1 = make_printable_char (first);
      char *tmp2 = make_printable_char (last);

      error (0, 0,
       _("range-endpoints of `%s-%s' are in reverse collating sequence order"),
	     tmp1, tmp2);
      free (tmp1);
      free (tmp2);
      return 1;
    }
  new = (struct List_element *) xmalloc (sizeof (struct List_element));
  new->next = NULL;
  new->type = RE_RANGE;
  new->u.range.first_char = first;
  new->u.range.last_char = last;
  assert (list->tail);
  list->tail->next = new;
  list->tail = new;
  return 0;
}

/* If CHAR_CLASS_STR is a valid character class string, append a
   newly allocated structure representing that character class to the end
   of the specification list LIST and return 0.  If CHAR_CLASS_STR is not
   a valid string return nonzero.  */

static int
append_char_class (struct Spec_list *list,
		   const unsigned char *char_class_str, size_t len)
{
  enum Char_class char_class;
  struct List_element *new;

  char_class = look_up_char_class (char_class_str, len);
  if (char_class == CC_NO_CLASS)
    return 1;
  new = (struct List_element *) xmalloc (sizeof (struct List_element));
  new->next = NULL;
  new->type = RE_CHAR_CLASS;
  new->u.char_class = char_class;
  assert (list->tail);
  list->tail->next = new;
  list->tail = new;
  return 0;
}

/* Append a newly allocated structure representing a [c*n]
   repeated character construct to the specification list LIST.
   THE_CHAR is the single character to be repeated, and REPEAT_COUNT
   is a non-negative repeat count.  */

static void
append_repeated_char (struct Spec_list *list, unsigned int the_char,
		      size_t repeat_count)
{
  struct List_element *new;

  new = (struct List_element *) xmalloc (sizeof (struct List_element));
  new->next = NULL;
  new->type = RE_REPEATED_CHAR;
  new->u.repeated_char.the_repeated_char = the_char;
  new->u.repeated_char.repeat_count = repeat_count;
  assert (list->tail);
  list->tail->next = new;
  list->tail = new;
}

/* Given a string, EQUIV_CLASS_STR, from a [=str=] context and
   the length of that string, LEN, if LEN is exactly one, append
   a newly allocated structure representing the specified
   equivalence class to the specification list, LIST and return zero.
   If LEN is not 1, return nonzero.  */

static int
append_equiv_class (struct Spec_list *list,
		    const unsigned char *equiv_class_str, size_t len)
{
  struct List_element *new;

  if (len != 1)
    return 1;
  new = (struct List_element *) xmalloc (sizeof (struct List_element));
  new->next = NULL;
  new->type = RE_EQUIV_CLASS;
  new->u.equiv_code = *equiv_class_str;
  assert (list->tail);
  list->tail->next = new;
  list->tail = new;
  return 0;
}

/* Return a newly allocated copy of the LEN-byte prefix of P.
   The returned string may contain NUL bytes and is *not* NUL-terminated.  */

static unsigned char *
xmemdup (const unsigned char *p, size_t len)
{
  unsigned char *tmp = (unsigned char *) xmalloc (len);

  /* Use memcpy rather than strncpy because `p' may contain zero-bytes.  */
  memcpy (tmp, p, len);
  return tmp;
}

/* Search forward starting at START_IDX for the 2-char sequence
   (PRE_BRACKET_CHAR,']') in the string P of length P_LEN.  If such
   a sequence is found, set *RESULT_IDX to the index of the first
   character and return nonzero. Otherwise return zero.  P may contain
   zero bytes.  */

static int
find_closing_delim (const struct E_string *es, size_t start_idx,
		    unsigned int pre_bracket_char, size_t *result_idx)
{
  size_t i;

  for (i = start_idx; i < es->len - 1; i++)
    if (es->s[i] == pre_bracket_char && es->s[i + 1] == ']'
	&& !es->escaped[i] && !es->escaped[i + 1])
      {
	*result_idx = i;
	return 1;
      }
  return 0;
}

/* Parse the bracketed repeat-char syntax.  If the P_LEN characters
   beginning with P[ START_IDX ] comprise a valid [c*n] construct,
   then set *CHAR_TO_REPEAT, *REPEAT_COUNT, and *CLOSING_BRACKET_IDX
   and return zero. If the second character following
   the opening bracket is not `*' or if no closing bracket can be
   found, return -1.  If a closing bracket is found and the
   second char is `*', but the string between the `*' and `]' isn't
   empty, an octal number, or a decimal number, print an error message
   and return -2.  */

static int
find_bracketed_repeat (const struct E_string *es, size_t start_idx,
		       unsigned int *char_to_repeat, size_t *repeat_count,
		       size_t *closing_bracket_idx)
{
  size_t i;

  assert (start_idx + 1 < es->len);
  if (!ES_MATCH (es, start_idx + 1, '*'))
    return -1;

  for (i = start_idx + 2; i < es->len; i++)
    {
      if (ES_MATCH (es, i, ']'))
	{
	  size_t digit_str_len = i - start_idx - 2;

	  *char_to_repeat = es->s[start_idx];
	  if (digit_str_len == 0)
	    {
	      /* We've matched [c*] -- no explicit repeat count.  */
	      *repeat_count = 0;
	      *closing_bracket_idx = i;
	      return 0;
	    }

	  /* Here, we have found [c*s] where s should be a string
	     of octal (if it starts with `0') or decimal digits.  */
	  {
	    const char *digit_str = (const char *) &es->s[start_idx + 2];
	    unsigned long int tmp_ulong;
	    char *d_end;
	    int base = 10;
	    /* Select the base manually so we can be sure it's either 8 or 10.
	       If the spec allowed it to be interpreted as hexadecimal, we
	       could have used `0' and let xstrtoul decide.  */
	    if (*digit_str == '0')
	      {
		base = 8;
		++digit_str;
		--digit_str_len;
	      }
	    if (xstrtoul (digit_str, &d_end, base, &tmp_ulong, NULL)
		  != LONGINT_OK
		|| BEGIN_STATE < tmp_ulong
		|| digit_str + digit_str_len != d_end)
	      {
		char *tmp = make_printable_str (es->s + start_idx + 2,
						i - start_idx - 2);
		error (0, 0, _("invalid repeat count `%s' in [c*n] construct"),
		       tmp);
		free (tmp);
		return -2;
	      }
	    *repeat_count = tmp_ulong;
	  }
	  *closing_bracket_idx = i;
	  return 0;
	}
    }
  return -1;			/* No bracket found.  */
}

/* Return nonzero if the string at ES->s[IDX] matches the regular
   expression `\*[0-9]*\]', zero otherwise.  To match, the `*' and
   the `]' must not be escaped.  */

static int
star_digits_closebracket (const struct E_string *es, size_t idx)
{
  size_t i;

  if (!ES_MATCH (es, idx, '*'))
    return 0;

  for (i = idx + 1; i < es->len; i++)
    {
      if (!ISDIGIT (es->s[i]))
	{
	  if (ES_MATCH (es, i, ']'))
	    return 1;
	  return 0;
	}
    }
  return 0;
}

/* Convert string UNESCAPED_STRING (which has been preprocessed to
   convert backslash-escape sequences) of length LEN characters into
   a linked list of the following 5 types of constructs:
      - [:str:] Character class where `str' is one of the 12 valid strings.
      - [=c=] Equivalence class where `c' is any single character.
      - [c*n] Repeat the single character `c' `n' times. n may be omitted.
	  However, if `n' is present, it must be a non-negative octal or
	  decimal integer.
      - r-s Range of characters from `r' to `s'.  The second endpoint must
	  not precede the first in the current collating sequence.
      - c Any other character is interpreted as itself.  */

static int
build_spec_list (const struct E_string *es, struct Spec_list *result)
{
  const unsigned char *p;
  size_t i;

  p = es->s;

  /* The main for-loop below recognizes the 4 multi-character constructs.
     A character that matches (in its context) none of the multi-character
     constructs is classified as `normal'.  Since all multi-character
     constructs have at least 3 characters, any strings of length 2 or
     less are composed solely of normal characters.  Hence, the index of
     the outer for-loop runs only as far as LEN-2.  */

  for (i = 0; i + 2 < es->len; /* empty */)
    {
      if (ES_MATCH (es, i, '['))
	{
	  int matched_multi_char_construct;
	  size_t closing_bracket_idx;
	  unsigned int char_to_repeat;
	  size_t repeat_count;
	  int err;

	  matched_multi_char_construct = 1;
	  if (ES_MATCH (es, i + 1, ':')
	      || ES_MATCH (es, i + 1, '='))
	    {
	      size_t closing_delim_idx;
	      int found;

	      found = find_closing_delim (es, i + 2, p[i + 1],
					  &closing_delim_idx);
	      if (found)
		{
		  int parse_failed;
		  size_t opnd_str_len = closing_delim_idx - 1 - (i + 2) + 1;
		  unsigned char *opnd_str;

		  if (opnd_str_len == 0)
		    {
		      if (p[i + 1] == ':')
			error (0, 0, _("missing character class name `[::]'"));
		      else
			error (0, 0,
			       _("missing equivalence class character `[==]'"));
		      return 1;
		    }

		  opnd_str = xmemdup (p + i + 2, opnd_str_len);

		  if (p[i + 1] == ':')
		    {
		      parse_failed = append_char_class (result, opnd_str,
							opnd_str_len);

		      /* FIXME: big comment.  */
		      if (parse_failed)
			{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -