⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 uniq.c

📁 《linux应用开发技术详解》的配套代码
💻 C
📖 第 1 页 / 共 2 页
字号:
		{
		  uwc = towupper (wc);

		  if (uwc != wc)
		    {
		      mbstate_t state_wc;

		      memset (&state_wc, '\0', sizeof(mbstate_t));
		      wcrtomb (copy[i] + j, uwc, &state_wc);
		    }
		  else
		    memcpy (copy[i] + j, str[i] + j, mblength);
		}
	      else
		memcpy (copy[i] + j, str[i] + j, mblength);
	    }
	  j += mblength;
	}
      copy[i][j] = '\0';
      len[i] = j;
    }

  return xmemcoll (copy[0], len[0], copy[1], len[1]);
}
#endif

/* Output the line in linebuffer LINE to stream STREAM
   provided that the switches say it should be output.
   If requested, print the number of times it occurred, as well;
   LINECOUNT + 1 is the number of times that the line occurred. */

static void
writeline (const struct linebuffer *line, FILE *stream, int linecount)
{
  if ((mode == output_unique && linecount != 0)
      || (mode == output_repeated && linecount == 0)
      || (mode == output_all_repeated && linecount == 0))
    return;

  if (countmode == count_occurrences)
    fprintf (stream, "%7d\t", linecount + 1);

  fwrite (line->buffer, sizeof (char), line->length, stream);
}

/* Process input file INFILE with output to OUTFILE.
   If either is "-", use the standard I/O stream for it instead. */

static void
check_file (const char *infile, const char *outfile)
{
  FILE *istream;
  FILE *ostream;
  struct linebuffer lb1, lb2;
  struct linebuffer *thisline, *prevline;

  if (STREQ (infile, "-"))
    istream = stdin;
  else
    istream = fopen (infile, "r");
  if (istream == NULL)
    error (EXIT_FAILURE, errno, "%s", infile);

  if (STREQ (outfile, "-"))
    ostream = stdout;
  else
    ostream = fopen (outfile, "w");
  if (ostream == NULL)
    error (EXIT_FAILURE, errno, "%s", outfile);

  thisline = &lb1;
  prevline = &lb2;

  initbuffer (thisline);
  initbuffer (prevline);

  /* The duplication in the following `if' and `else' blocks is an
     optimization to distinguish the common case (in which none of
     the following options has been specified: --count, -repeated,
     --all-repeated, --unique) from the others.  In the common case,
     this optimization lets uniq output each different line right away,
     without waiting to see if the next one is different.  */

  if (mode == output_all && countmode == count_none)
    {
      char *prevfield IF_LINT (= NULL);
      size_t prevlen IF_LINT (= 0);
#if HAVE_MBRTOWC
      mbstate_t prevstate;

      memset (&prevstate, '\0', sizeof (mbstate_t));
#endif

      while (!feof (istream))
	{
	  char *thisfield;
	  size_t thislen;
#if HAVE_MBRTOWC
        mbstate_t thisstate;
#endif

	  if (readline (thisline, istream) == 0)
	    break;
	  thisfield = find_field (thisline);
	  thislen = thisline->length - 1 - (thisfield - thisline->buffer);
#if HAVE_MBRTOWC
	if (MB_CUR_MAX > 1)
            {
            thisstate = thisline->state;

            if (prevline->length == 0 || different_multi
              (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
              {
                fwrite (thisline->buffer, sizeof (char),
                      thisline->length, ostream);

                SWAP_LINES (prevline, thisline);
                prevfield = thisfield;
                prevlen = thislen;
                prevstate = thisstate;
              }
          }
	else
#endif
          {
            if (prevline->length == 0
                || different (thisfield, prevfield, thislen, prevlen))
              {
                fwrite (thisline->buffer, sizeof (char),
                    thisline->length, ostream);

                SWAP_LINES (prevline, thisline);
                prevfield = thisfield;
                prevlen = thislen;
              }
	    }
	}
    }
  else
    {
      char *prevfield;
      size_t prevlen;
      int match_count = 0;
      int first_delimiter = 1;
#if HAVE_MBRTOWC
      mbstate_t prevstate;
#endif

      if (readline (prevline, istream) == 0)
	goto closefiles;
      prevfield = find_field (prevline);
      prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
#if HAVE_MBRTOWC
      prevstate = prevline->state;
#endif

      while (!feof (istream))
	{
	  int match;
	  char *thisfield;
	  size_t thislen;
#if HAVE_MBRTOWC
	  mbstate_t thisstate;
#endif
	  if (readline (thisline, istream) == 0)
	    break;
	  thisfield = find_field (thisline);
	  thislen = thisline->length - 1 - (thisfield - thisline->buffer);
#if HAVE_MBRTOWC
	if (MB_CUR_MAX > 1)
	  {
            thisstate = thisline->state;
            match = !different_multi (thisfield, prevfield,
                              thislen, prevlen, thisstate, prevstate);
          }
	else
#endif
          match = !different (thisfield, prevfield, thislen, prevlen);

	  if (match)
	    ++match_count;

          if (mode == output_all_repeated && delimit_groups != DM_NONE)
	    {
	      if (!match)
		{
		  if (match_count) /* a previous match */
		    first_delimiter = 0; /* Only used when DM_SEPARATE */
		}
	      else if (match_count == 1)
		{
		  if ((delimit_groups == DM_PREPEND)
		      || (delimit_groups == DM_SEPARATE
			  && !first_delimiter))
		    putc ('\n', ostream);
		}
	    }

	  if (!match || mode == output_all_repeated)
	    {
	      writeline (prevline, ostream, match_count);
	      SWAP_LINES (prevline, thisline);
	      prevfield = thisfield;
	      prevlen = thislen;
#if HAVE_MBRTOWC
	      prevstate = thisstate;
#endif
	      if (!match)
		match_count = 0;
	    }
	}

      writeline (prevline, ostream, match_count);
    }

 closefiles:
  if (ferror (istream) || fclose (istream) == EOF)
    error (EXIT_FAILURE, errno, _("error reading %s"), infile);

  /* Close ostream only if it's not stdout -- the latter is closed
     via the atexit-invoked close_stdout.  */
  if (ostream != stdout && (ferror (ostream) || fclose (ostream) == EOF))
    error (EXIT_FAILURE, errno, _("error writing %s"), outfile);

  free (lb1.buffer);
  free (lb2.buffer);
}

int
main (int argc, char **argv)
{
  int optc = 0;
  bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
  bool obsolete_skip_fields = false;
  int nfiles = 0;
  char const *file[2];

  file[0] = file[1] = "-";
  program_name = argv[0];
  setlocale (LC_ALL, "");
  bindtextdomain (PACKAGE, LOCALEDIR);
  textdomain (PACKAGE);
  hard_LC_COLLATE = hard_locale (LC_COLLATE);

  atexit (close_stdout);

#if HAVE_MBRTOWC
  if (MB_CUR_MAX > 1)
    {
      find_field = find_field_multi;
    }
  else
#endif
    {
      find_field = find_field_uni;
    }



  skip_chars = 0;
  skip_fields = 0;
  check_chars = SIZE_MAX;
  mode = output_all;
  countmode = count_none;
  delimit_groups = DM_NONE;

  for (;;)
    {
      /* Parse an operand with leading "+" as a file after "--" was
         seen; or if pedantic and a file was seen; or if not
         obsolete.  */

      if (optc == -1
	  || (posixly_correct && nfiles != 0)
	  || ((optc = getopt_long (argc, argv,
				   "-0123456789Dcdf:is:uw:", longopts, NULL))
	      == -1))
	{
	  if (optind == argc)
	    break;
	  if (nfiles == 2)
	    {
	      error (0, 0, _("extra operand `%s'"), argv[optind]);
	      usage (EXIT_FAILURE);
	    }
	  file[nfiles++] = argv[optind++];
	}
      else switch (optc)
	{
	case 1:
	  {
	    unsigned long int size;
	    if (optarg[0] == '+'
		&& (posix2_version () < 200112 || !getenv ("POSIXLY_CORRECT"))
		&& xstrtoul (optarg, NULL, 10, &size, "") == LONGINT_OK
		&& size <= SIZE_MAX)
	      skip_chars = size;
	    else if (nfiles == 2)
	      {
		error (0, 0, _("extra operand `%s'"), optarg);
		usage (EXIT_FAILURE);
	      }
	    else
	      file[nfiles++] = optarg;
	  }
	  break;

	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
	  {
	    size_t s = skip_fields;
	    skip_fields = s * 10 + optc - '0';
	    if (SIZE_MAX / 10 < s || skip_fields < s)
	      error (EXIT_FAILURE, 0, "%s",
		     _("invalid number of fields to skip"));
	    obsolete_skip_fields = true;
	  }
	  break;

	case 'c':
	  countmode = count_occurrences;
	  break;

	case 'd':
	  mode = output_repeated;
	  break;

	case 'D':
	  mode = output_all_repeated;
	  if (optarg == NULL)
	    delimit_groups = DM_NONE;
	  else
	    delimit_groups = XARGMATCH ("--all-repeated", optarg,
					delimit_method_string,
					delimit_method_map);
	  break;

	case 'f':		/* Like '-#'. */
	  skip_fields = size_opt (optarg,
				  N_("invalid number of fields to skip"));
	  break;

	case 'i':
	  ignore_case = 1;
	  break;

	case 's':		/* Like '+#'. */
	  skip_chars = size_opt (optarg,
				 N_("invalid number of bytes to skip"));
	  break;

	case 'u':
	  mode = output_unique;
	  break;

	case 'w':
	  check_chars = size_opt (optarg,
				  N_("invalid number of bytes to compare"));
	  break;

	case_GETOPT_HELP_CHAR;

	case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);

	default:
	  usage (EXIT_FAILURE);
	}
    }

  if (obsolete_skip_fields && 200112 <= posix2_version ()
      && getenv ("POSIXLY_CORRECT"))
    {
      error (0, 0, _("`-%lu' option is obsolete; use `-f %lu'"),
	     (unsigned long) skip_fields, (unsigned long) skip_fields);
      usage (EXIT_FAILURE);
    }

  if (countmode == count_occurrences && mode == output_all_repeated)
    {
      error (0, 0,
	   _("printing all duplicated lines and repeat counts is meaningless"));
      usage (EXIT_FAILURE);
    }

  check_file (file[0], file[1]);

  exit (EXIT_SUCCESS);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -