⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 uniq.c

📁 《linux应用开发技术详解》的配套代码
💻 C
📖 第 1 页 / 共 2 页
字号:
/* uniq -- remove duplicate lines from a sorted file
   Copyright (C) 86, 91, 1995-2002, Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */

/* Written by Richard Stallman and David MacKenzie. */
 

#include <config.h>

#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>

/* Get mbstate_t, mbrtowc(). */
#if HAVE_WCHAR_H
# include <wchar.h>
#endif

/* Get isw* functions. */
#if HAVE_WCTYPE_H
# include <wctype.h>
#endif

#include "system.h"
#include "closeout.h"
#include "argmatch.h"
#include "linebuffer.h"
#include "error.h"
#include "hard-locale.h"
#include "posixver.h"
#include "xmemcoll.h"
#include "xstrtol.h"
#include "xmemcoll.h"

/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
   installation; work around this configuration error.  */
#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
# define MB_LEN_MAX 16
#endif

/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
#if HAVE_MBRTOWC && defined mbstate_t
# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
#endif


/* The official name of this program (e.g., no `g' prefix).  */
#define PROGRAM_NAME "uniq"

#define AUTHORS N_ ("Richard Stallman and David MacKenzie")

#define SWAP_LINES(A, B)			\
  do						\
    {						\
      struct linebuffer *_tmp;			\
      _tmp = (A);				\
      (A) = (B);				\
      (B) = _tmp;				\
    }						\
  while (0)

/* The name this program was run with. */
char *program_name;

/* Nonzero if the LC_COLLATE locale is hard.  */
static int hard_LC_COLLATE;

/* Number of fields to skip on each line when doing comparisons. */
static size_t skip_fields;

/* Number of chars to skip after skipping any fields. */
static size_t skip_chars;

/* Number of chars to compare. */
static size_t check_chars;

enum countmode
{
  count_occurrences,		/* -c Print count before output lines. */
  count_none			/* Default.  Do not print counts. */
};

/* Whether and how to precede the output lines with a count of the number of
   times they occurred in the input. */
static enum countmode countmode;

enum output_mode
{
  output_repeated,		/* -d Only lines that are repeated. */
  output_all_repeated,		/* -D All lines that are repeated. */
  output_unique,		/* -u Only lines that are not repeated. */
  output_all			/* Default.  Print first copy of each line. */
};

/* Which lines to output. */
static enum output_mode mode;

/* If nonzero, ignore case when comparing.  */
static int ignore_case;

enum delimit_method
{
  /* No delimiters output.  --all-repeated[=none] */
  DM_NONE,

  /* Delimiter precedes all groups.  --all-repeated=prepend */
  DM_PREPEND,

  /* Delimit all groups.  --all-repeated=separate */
  DM_SEPARATE
};

static char const *const delimit_method_string[] =
{
  "none", "prepend", "separate", 0
};

static enum delimit_method const delimit_method_map[] =
{
  DM_NONE, DM_PREPEND, DM_SEPARATE
};

/* Select whether/how to delimit groups of duplicate lines.  */
static enum delimit_method delimit_groups;

/* Function pointers. */
static char *
(*find_field) (struct linebuffer *line);

static struct option const longopts[] =
{
  {"count", no_argument, NULL, 'c'},
  {"repeated", no_argument, NULL, 'd'},
  {"all-repeated", optional_argument, NULL, 'D'},
  {"ignore-case", no_argument, NULL, 'i'},
  {"unique", no_argument, NULL, 'u'},
  {"skip-fields", required_argument, NULL, 'f'},
  {"skip-chars", required_argument, NULL, 's'},
  {"check-chars", required_argument, NULL, 'w'},
  {GETOPT_HELP_OPTION_DECL},
  {GETOPT_VERSION_OPTION_DECL},
  {NULL, 0, NULL, 0}
};

void
usage (int status)
{
  if (status != 0)
    fprintf (stderr, _("Try `%s --help' for more information.\n"),
	     program_name);
  else
    {
      printf (_("\
Usage: %s [OPTION]... [INPUT [OUTPUT]]\n\
"),
	      program_name);
      fputs (_("\
Discard all but one of successive identical lines from INPUT (or\n\
standard input), writing to OUTPUT (or standard output).\n\
\n\
"), stdout);
     fputs (_("\
Mandatory arguments to long options are mandatory for short options too.\n\
"), stdout);
     fputs (_("\
  -c, --count           prefix lines by the number of occurrences\n\
  -d, --repeated        only print duplicate lines\n\
"), stdout);
     fputs (_("\
  -D, --all-repeated[=delimit-method] print all duplicate lines\n\
                        delimit-method={none(default),prepend,separate}\n\
                        Delimiting is done with blank lines.\n\
  -f, --skip-fields=N   avoid comparing the first N fields\n\
  -i, --ignore-case     ignore differences in case when comparing\n\
  -s, --skip-chars=N    avoid comparing the first N characters\n\
  -u, --unique          only print unique lines\n\
"), stdout);
     fputs (_("\
  -w, --check-chars=N   compare no more than N characters in lines\n\
"), stdout);
     fputs (HELP_OPTION_DESCRIPTION, stdout);
     fputs (VERSION_OPTION_DESCRIPTION, stdout);
     fputs (_("\
\n\
A field is a run of whitespace, then non-whitespace characters.\n\
Fields are skipped before chars.\n\
"), stdout);
      printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
    }
  exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}

/* Convert OPT to size_t, reporting an error using MSGID if it does
   not fit.  */

static size_t
size_opt (char const *opt, char const *msgid)
{
  unsigned long int size;
  if (xstrtoul (opt, NULL, 10, &size, "") != LONGINT_OK
      || SIZE_MAX < size)
    error (EXIT_FAILURE, 0, "%s: %s", opt, _(msgid));
  return size;
}

/* Given a linebuffer LINE,
   return a pointer to the beginning of the line's field to be compared. */

static char *
find_field_uni (struct linebuffer *line)
{
  register size_t count;
  register char *lp = line->buffer;
  register size_t size = line->length - 1;
  register size_t i = 0;

  for (count = 0; count < skip_fields && i < size; count++)
    {
      while (i < size && ISBLANK (lp[i]))
	i++;
      while (i < size && !ISBLANK (lp[i]))
	i++;
    }

  for (count = 0; count < skip_chars && i < size; count++)
    i++;

  return lp + i;
}

#if HAVE_MBRTOWC

# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL)  \
  do									\
    {									\
      mbstate_t state_bak;						\
									\
      CONVFAIL = 0;							\
      state_bak = *STATEP;						\
									\
      MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP);		\
									\
      switch (MBLENGTH)							\
	{								\
	case (size_t)-2:						\
	case (size_t)-1:						\
	  *STATEP = state_bak;						\
	  CONVFAIL++;							\
	  /* Fall through */						\
	case 0:								\
	  MBLENGTH = 1;							\
	}								\
    }									\
  while (0)

static char *
find_field_multi (struct linebuffer *line)
{
  size_t count;
  char *lp = line->buffer;
  size_t size = line->length - 1;
  size_t pos;
  size_t mblength;
  wchar_t wc;
  mbstate_t *statep;
  int convfail;

  pos = 0;
  statep = &(line->state);

  /* skip fields. */
  for (count = 0; count < skip_fields && pos < size; count++)
    {
      while (pos < size)
	{
	  MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
 
	  if (convfail || !iswblank (wc))
	    {
	      pos += mblength;
	      break;
	    }
	  pos += mblength;
	}

      while (pos < size)
	{
	  MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);

	  if (!convfail && iswblank (wc))
	    break;

	  pos += mblength;
	}
    }

  /* skip fields. */
  for (count = 0; count < skip_chars && pos < size; count++)
    {
      MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
      pos += mblength;
    }

  return lp + pos;
}
#endif

/* Return zero if two strings OLD and NEW match, nonzero if not.
   OLD and NEW point not to the beginnings of the lines
   but rather to the beginnings of the fields to compare.
   OLDLEN and NEWLEN are their lengths. */

static int
different (char *old, char *new, size_t oldlen, size_t newlen)
{
  char *copy_old, *copy_new;

  if (check_chars < oldlen)
    oldlen = check_chars;
  if (check_chars < newlen)
    newlen = check_chars;

  if (ignore_case)
    {
      size_t i;

      copy_old = alloca (oldlen + 1);
      copy_new = alloca (oldlen + 1);

      for (i = 0; i < oldlen; i++)
	{
	  copy_old[i] = toupper (old[i]);
	  copy_new[i] = toupper (new[i]);
	}
    }
  else
    {
      copy_old = (char *)old;
      copy_new = (char *)new;
    }

  return xmemcoll (copy_old, oldlen, copy_new, newlen);
}

#if HAVE_MBRTOWC
static int
different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
{
  size_t i, j, chars;
  const char *str[2];
  char *copy[2];
  size_t len[2];
  mbstate_t state[2];
  size_t mblength;
  wchar_t wc, uwc;
  mbstate_t state_bak;

  str[0] = old;
  str[1] = new;
  len[0] = oldlen;
  len[1] = newlen;
  state[0] = oldstate;
  state[1] = newstate;

  for (i = 0; i < 2; i++)
    {
      copy[i] = alloca (len[i] + 1);

      for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
	{
	  state_bak = state[i];
	  mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));

	  switch (mblength)
	    {
	    case (size_t)-1:
	    case (size_t)-2:
	      state[i] = state_bak;
	      /* Fall through */
	    case 0:
	      mblength = 1;
	      break;

	    default:
	      if (ignore_case)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -