📄 tr.c

📁 linux下一些命令的c语言的实现
💻 C
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34

  if (s2)
    {
      get_s2_spec_stats (s2, s1->length);

      if (s2->n_indefinite_repeats > 1)
	{
	  error (EXIT_FAILURE, 0,
		 _("only one [c*] repeat construct may appear in string2"));
	}

      if (translating)
	{
	  if (s2->has_equiv_class)
	    {
	      error (EXIT_FAILURE, 0,
		     _("[=c=] expressions may not appear in string2 \
when translating"));
	    }

	  if (s1->length > s2->length)
	    {
	      if (!truncate_set1)
		{
		  /* string2 must be non-empty unless --truncate-set1 is
		     given or string1 is empty.  */

		  if (s2->length == 0)
		    error (EXIT_FAILURE, 0,
		     _("when not truncating set1, string2 must be non-empty"));
		  string2_extend (s1, s2);
		}
	    }

	  if (complement && s1->has_char_class
	      && ! (s2->length == s1->length && homogeneous_spec_list (s2)))
	    {
	      error (EXIT_FAILURE, 0,
		     _("when translating with complemented character classes,\
\nstring2 must map all characters in the domain to one"));
	    }

	  if (s2->has_restricted_char_class)
	    {
	      error (EXIT_FAILURE, 0,
		     _("when translating, the only character classes that may \
appear in\nstring2 are `upper' and `lower'"));
	    }
	}
      else
	/* Not translating.  */
	{
	  if (s2->n_indefinite_repeats > 0)
	    error (EXIT_FAILURE, 0,
		   _("the [c*] construct may appear in string2 only \
when translating"));
	}
    }
}

/* Read buffers of SIZE bytes via the function READER (if READER is
   NULL, read from stdin) until EOF.  When non-NULL, READER is either
   read_and_delete or read_and_xlate.  After each buffer is read, it is
   processed and written to stdout.  The buffers are processed so that
   multiple consecutive occurrences of the same character in the input
   stream are replaced by a single occurrence of that character if the
   character is in the squeeze set.  */

static void
squeeze_filter (unsigned char *buf, size_t size, Filter reader)
{
  unsigned int char_to_squeeze = NOT_A_CHAR;
  size_t i = 0;
  size_t nr = 0;

  for (;;)
    {
      size_t begin;

      if (i >= nr)
	{
	  if (reader == NULL)
	    {
	      nr = safe_read (0, (char *) buf, size);
	      if (nr == SAFE_READ_ERROR)
		error (EXIT_FAILURE, errno, _("read error"));
	    }
	  else
	    {
	      nr = (*reader) (buf, size, NULL);
	    }

	  if (nr == 0)
	    break;
	  i = 0;
	}

      begin = i;

      if (char_to_squeeze == NOT_A_CHAR)
	{
	  size_t out_len;
	  /* Here, by being a little tricky, we can get a significant
	     performance increase in most cases when the input is
	     reasonably large.  Since tr will modify the input only
	     if two consecutive (and identical) input characters are
	     in the squeeze set, we can step by two through the data
	     when searching for a character in the squeeze set.  This
	     means there may be a little more work in a few cases and
	     perhaps twice as much work in the worst cases where most
	     of the input is removed by squeezing repeats.  But most
	     uses of this functionality seem to remove less than 20-30%
	     of the input.  */
	  for (; i < nr && !in_squeeze_set[buf[i]]; i += 2)
	    ;			/* empty */

	  /* There is a special case when i == nr and we've just
	     skipped a character (the last one in buf) that is in
	     the squeeze set.  */
	  if (i == nr && in_squeeze_set[buf[i - 1]])
	    --i;

	  if (i >= nr)
	    out_len = nr - begin;
	  else
	    {
	      char_to_squeeze = buf[i];
	      /* We're about to output buf[begin..i].  */
	      out_len = i - begin + 1;

	      /* But since we stepped by 2 in the loop above,
	         out_len may be one too large.  */
	      if (i > 0 && buf[i - 1] == char_to_squeeze)
		--out_len;

	      /* Advance i to the index of first character to be
	         considered when looking for a char different from
	         char_to_squeeze.  */
	      ++i;
	    }
	  if (out_len > 0
	      && fwrite ((char *) &buf[begin], 1, out_len, stdout) == 0)
	    error (EXIT_FAILURE, errno, _("write error"));
	}

      if (char_to_squeeze != NOT_A_CHAR)
	{
	  /* Advance i to index of first char != char_to_squeeze
	     (or to nr if all the rest of the characters in this
	     buffer are the same as char_to_squeeze).  */
	  for (; i < nr && buf[i] == char_to_squeeze; i++)
	    ;			/* empty */
	  if (i < nr)
	    char_to_squeeze = NOT_A_CHAR;
	  /* If (i >= nr) we've squeezed the last character in this buffer.
	     So now we have to read a new buffer and continue comparing
	     characters against char_to_squeeze.  */
	}
    }
}

/* Read buffers of SIZE bytes from stdin until one is found that
   contains at least one character not in the delete set.  Store
   in the array BUF, all characters from that buffer that are not
   in the delete set, and return the number of characters saved
   or 0 upon EOF.  */

static size_t
read_and_delete (unsigned char *buf, size_t size, Filter not_used)
{
  size_t n_saved;
  static int hit_eof = 0;

  assert (not_used == NULL);

  if (hit_eof)
    return 0;

  /* This enclosing do-while loop is to make sure that
     we don't return zero (indicating EOF) when we've
     just deleted all the characters in a buffer.  */
  do
    {
      size_t i;
      size_t nr = safe_read (0, (char *) buf, size);

      if (nr == SAFE_READ_ERROR)
	error (EXIT_FAILURE, errno, _("read error"));
      if (nr == 0)
	{
	  hit_eof = 1;
	  return 0;
	}

      /* This first loop may be a waste of code, but gives much
         better performance when no characters are deleted in
         the beginning of a buffer.  It just avoids the copying
         of buf[i] into buf[n_saved] when it would be a NOP.  */

      for (i = 0; i < nr && !in_delete_set[buf[i]]; i++)
	/* empty */ ;
      n_saved = i;

      for (++i; i < nr; i++)
	if (!in_delete_set[buf[i]])
	  buf[n_saved++] = buf[i];
    }
  while (n_saved == 0);

  return n_saved;
}

/* Read at most SIZE bytes from stdin into the array BUF.  Then
   perform the in-place and one-to-one mapping specified by the global
   array `xlate'.  Return the number of characters read, or 0 upon EOF.  */

static size_t
read_and_xlate (unsigned char *buf, size_t size, Filter not_used)
{
  size_t bytes_read = 0;
  static int hit_eof = 0;
  size_t i;

  assert (not_used == NULL);

  if (hit_eof)
    return 0;

  bytes_read = safe_read (0, (char *) buf, size);
  if (bytes_read == SAFE_READ_ERROR)
    error (EXIT_FAILURE, errno, _("read error"));
  if (bytes_read == 0)
    {
      hit_eof = 1;
      return 0;
    }

  for (i = 0; i < bytes_read; i++)
    buf[i] = xlate[buf[i]];

  return bytes_read;
}

/* Initialize a boolean membership set IN_SET with the character
   values obtained by traversing the linked list of constructs S
   using the function `get_next'.  If COMPLEMENT_THIS_SET is
   nonzero the resulting set is complemented.  */

static void
set_initialize (struct Spec_list *s, int complement_this_set, SET_TYPE *in_set)
{
  int c;
  size_t i;

  memset (in_set, 0, N_CHARS * sizeof (in_set[0]));
  s->state = BEGIN_STATE;
  while ((c = get_next (s, NULL)) != -1)
    in_set[c] = 1;
  if (complement_this_set)
    for (i = 0; i < N_CHARS; i++)
      in_set[i] = (!in_set[i]);
}

int
main (int argc, char **argv)
{
  int c;
  int non_option_args;
  struct Spec_list buf1, buf2;
  struct Spec_list *s1 = &buf1;
  struct Spec_list *s2 = &buf2;

  program_name = argv[0];
  setlocale (LC_ALL, "");
  bindtextdomain (PACKAGE, LOCALEDIR);
  textdomain (PACKAGE);

  atexit (close_stdout);

  while ((c = getopt_long (argc, argv, "cdst", long_options, NULL)) != -1)
    {
      switch (c)
	{
	case 0:
	  break;

	case 'c':
	  complement = 1;
	  break;

	case 'd':
	  delete = 1;
	  break;

	case 's':
	  squeeze_repeats = 1;
	  break;

	case 't':
	  truncate_set1 = 1;
	  break;

	case_GETOPT_HELP_CHAR;

	case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);

	default:
	  usage (2);
	  break;
	}
    }

  posix_pedantic = (getenv ("POSIXLY_CORRECT") != NULL);

  non_option_args = argc - optind;
  translating = (non_option_args == 2 && !delete);

  /* Change this test if it is valid to give tr no options and
     no args at all.  POSIX doesn't specifically say anything
     either way, but it looks like they implied it's invalid
     by omission.  If you want to make tr do a slow imitation
     of `cat' use `tr a a'.  */
  if (non_option_args > 2)
    {
      error (0, 0, _("too many arguments"));
      usage (2);
    }

  if (!delete && !squeeze_repeats && non_option_args != 2)
    error (EXIT_FAILURE, 0, _("two strings must be given when translating"));

  if (delete && squeeze_repeats && non_option_args != 2)
    error (EXIT_FAILURE, 0, _("two strings must be given when both \
deleting and squeezing repeats"));

  /* If --delete is given without --squeeze-repeats, then
     only one string argument may be specified.  But POSIX
     says to ignore any string2 in this case, so if POSIXLY_CORRECT
     is set, pretend we never saw string2.  But I think
     this deserves a fatal error, so that's the default.  */
  if ((delete && !squeeze_repeats) && non_option_args != 1)
    {
      if (posix_pedantic && non_option_args == 2)
	--non_option_args;
      else
	error (EXIT_FAILURE, 0,
	       _("only one string may be given when deleting \
without squeezing repeats"));
    }

  if (squeeze_repeats && non_option_args == 0)
    error (EXIT_FAILURE, 0,
	   _("at least one string must be given when squeezing repeats"));

  spec_init (s1);
  if (parse_str ((unsigned char *) argv[optind], s1))
    exit (EXIT_FAILURE);

  if (non_option_args == 2)
    {
      spec_init (s2);
      if (parse_str ((unsigned char *) argv[optind + 1], s2))
	exit (EXIT_FAILURE);
    }
  else
    s2 = NULL;

  validate (s1, s2);

  /* Use binary I/O, since `tr' is sometimes used to transliterate
     non-printable characters, or characters which are stripped away
     by text-mode reads (like CR and ^Z).  */
  SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);

  if (squeeze_repeats && non_option_args == 1)
    {
      set_initialize (s1, complement, in_squeeze_set);
      squeeze_filter (io_buf, IO_BUF_SIZE, NULL);
    }
  else if (delete && non_option_args == 1)
    {
      size_t nr;

      set_initialize (s1, complement, in_delete_set);
      do
	{
	  nr = read_and_delete (io_buf, IO_BUF_SIZE, NULL);
	  if (nr > 0 && fwrite ((char *) io_buf, 1, nr, stdout) == 0)
	    error (EXIT_FAILURE, errno, _("write error"));
	}
      while (nr > 0);
    }
  else if (squeeze_repeats && delete && non_option_args == 2)
    {
      set_initialize (s1, complement, in_delete_set);
      set_initialize (s2, 0, in_squeeze_set);
      squeeze_filter (io_buf, IO_BUF_SIZE, read_and_delete);
    }
  else if (translating)
    {
      if (complement)
	{
	  int i;
	  SET_TYPE *in_s1 = in_delete_set;

	  set_initialize (s1, 0, in_s1);
	  s2->state = BEGIN_STATE;
	  for (i = 0; i < N_CHARS; i++)
	    xlate[i] = i;
	  for (i = 0; i < N_CHARS; i++)
	    {
	      if (!in_s1[i])
		{
		  int ch = get_next (s2, NULL);
		  assert (ch != -1 || truncate_set1);
		  if (ch == -1)
		    {
		      /* This will happen when tr is invoked like e.g.
		         tr -cs A-Za-z0-9 '\012'.  */
		      break;
		    }
		  xlate[i] = ch;
		}
	    }
	  assert (get_next (s2, NULL) == -1 || truncate_set1);
	}
      else
	{
	  int c1, c2;
	  int i;
	  enum Upper_Lower_class class_s1;
	  enum Upper_Lower_class class_s2;

	  for (i = 0; i < N_CHARS; i++)
	    xlate[i] = i;
	  s1->state = BEGIN_STATE;
	  s2->state = BEGIN_STATE;
	  for (;;)
	    {
	      c1 = get_next (s1, &class_s1);
	      c2 = get_next (s2, &class_s2);
	      if (!class_ok[(int) class_s1][(int) class_s2])
		error (EXIT_FAILURE, 0,
		       _("misaligned [:upper:] and/or [:lower:] construct"));

	      if (class_s1 == UL_LOWER && class_s2 == UL_UPPER)
		{
		  for (i = 0; i < N_CHARS; i++)
		    if (ISLOWER (i))
		      xlate[i] = toupper (i);
		}
	      else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER)
		{
		  for (i = 0; i < N_CHARS; i++)
		    if (ISUPPER (i))
		      xlate[i] = tolower (i);
		}
	      else if ((class_s1 == UL_LOWER && class_s2 == UL_LOWER)
		       || (class_s1 == UL_UPPER && class_s2 == UL_UPPER))
		{
		  /* By default, GNU tr permits the identity mappings: from
		     [:upper:] to [:upper:] and [:lower:] to [:lower:].  But
		     when POSIXLY_CORRECT is set, those evoke diagnostics.  */
		  if (posix_pedantic)
		    {
		      error (EXIT_FAILURE, 0,
			     _("\
invalid identity mapping;  when translating, any [:lower:] or [:upper:]\n\
construct in string1 must be aligned with a corresponding construct\n\
([:upper:] or [:lower:], respectively) in string2"));
		    }
		}
	      else
		{
		  /* The following should have been checked by validate...  */
		  if (c1 == -1 || c2 == -1)
		    break;
		  xlate[c1] = c2;
		}
	    }
	  assert (c1 == -1 || truncate_set1);
	}
      if (squeeze_repeats)
	{
	  set_initialize (s2, 0, in_squeeze_set);
	  squeeze_filter (io_buf, IO_BUF_SIZE, read_and_xlate);
	}
      else
	{
	  size_t bytes_read;

	  do
	    {
	      bytes_read = read_and_xlate (io_buf, IO_BUF_SIZE, NULL);
	      if (bytes_read > 0
		  && fwrite ((char *) io_buf, 1, bytes_read, stdout) == 0)
		error (EXIT_FAILURE, errno, _("write error"));
	    }
	  while (bytes_read > 0);
	}
    }

  if (close (STDIN_FILENO) != 0)
    error (EXIT_FAILURE, errno, _("standard input"));

  exit (EXIT_SUCCESS);
}
上一页 1 2 34
💿 文件大小 295 K
👤 上传用户 norwaybaby
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux #c语言 #命令
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -