📄 uniq.c
字号:
{
uwc = towupper (wc);
if (uwc != wc)
{
mbstate_t state_wc;
memset (&state_wc, '\0', sizeof(mbstate_t));
wcrtomb (copy[i] + j, uwc, &state_wc);
}
else
memcpy (copy[i] + j, str[i] + j, mblength);
}
else
memcpy (copy[i] + j, str[i] + j, mblength);
}
j += mblength;
}
copy[i][j] = '\0';
len[i] = j;
}
return xmemcoll (copy[0], len[0], copy[1], len[1]);
}
#endif
/* Output the line in linebuffer LINE to stream STREAM
provided that the switches say it should be output.
If requested, print the number of times it occurred, as well;
LINECOUNT + 1 is the number of times that the line occurred. */
static void
writeline (const struct linebuffer *line, FILE *stream, int linecount)
{
if ((mode == output_unique && linecount != 0)
|| (mode == output_repeated && linecount == 0)
|| (mode == output_all_repeated && linecount == 0))
return;
if (countmode == count_occurrences)
fprintf (stream, "%7d\t", linecount + 1);
fwrite (line->buffer, sizeof (char), line->length, stream);
}
/* Process input file INFILE with output to OUTFILE.
If either is "-", use the standard I/O stream for it instead. */
static void
check_file (const char *infile, const char *outfile)
{
FILE *istream;
FILE *ostream;
struct linebuffer lb1, lb2;
struct linebuffer *thisline, *prevline;
if (STREQ (infile, "-"))
istream = stdin;
else
istream = fopen (infile, "r");
if (istream == NULL)
error (EXIT_FAILURE, errno, "%s", infile);
if (STREQ (outfile, "-"))
ostream = stdout;
else
ostream = fopen (outfile, "w");
if (ostream == NULL)
error (EXIT_FAILURE, errno, "%s", outfile);
thisline = &lb1;
prevline = &lb2;
initbuffer (thisline);
initbuffer (prevline);
/* The duplication in the following `if' and `else' blocks is an
optimization to distinguish the common case (in which none of
the following options has been specified: --count, -repeated,
--all-repeated, --unique) from the others. In the common case,
this optimization lets uniq output each different line right away,
without waiting to see if the next one is different. */
if (mode == output_all && countmode == count_none)
{
char *prevfield IF_LINT (= NULL);
size_t prevlen IF_LINT (= 0);
#if HAVE_MBRTOWC
mbstate_t prevstate;
memset (&prevstate, '\0', sizeof (mbstate_t));
#endif
while (!feof (istream))
{
char *thisfield;
size_t thislen;
#if HAVE_MBRTOWC
mbstate_t thisstate;
#endif
if (readline (thisline, istream) == 0)
break;
thisfield = find_field (thisline);
thislen = thisline->length - 1 - (thisfield - thisline->buffer);
#if HAVE_MBRTOWC
if (MB_CUR_MAX > 1)
{
thisstate = thisline->state;
if (prevline->length == 0 || different_multi
(thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
{
fwrite (thisline->buffer, sizeof (char),
thisline->length, ostream);
SWAP_LINES (prevline, thisline);
prevfield = thisfield;
prevlen = thislen;
prevstate = thisstate;
}
}
else
#endif
{
if (prevline->length == 0
|| different (thisfield, prevfield, thislen, prevlen))
{
fwrite (thisline->buffer, sizeof (char),
thisline->length, ostream);
SWAP_LINES (prevline, thisline);
prevfield = thisfield;
prevlen = thislen;
}
}
}
}
else
{
char *prevfield;
size_t prevlen;
int match_count = 0;
int first_delimiter = 1;
#if HAVE_MBRTOWC
mbstate_t prevstate;
#endif
if (readline (prevline, istream) == 0)
goto closefiles;
prevfield = find_field (prevline);
prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
#if HAVE_MBRTOWC
prevstate = prevline->state;
#endif
while (!feof (istream))
{
int match;
char *thisfield;
size_t thislen;
#if HAVE_MBRTOWC
mbstate_t thisstate;
#endif
if (readline (thisline, istream) == 0)
break;
thisfield = find_field (thisline);
thislen = thisline->length - 1 - (thisfield - thisline->buffer);
#if HAVE_MBRTOWC
if (MB_CUR_MAX > 1)
{
thisstate = thisline->state;
match = !different_multi (thisfield, prevfield,
thislen, prevlen, thisstate, prevstate);
}
else
#endif
match = !different (thisfield, prevfield, thislen, prevlen);
if (match)
++match_count;
if (mode == output_all_repeated && delimit_groups != DM_NONE)
{
if (!match)
{
if (match_count) /* a previous match */
first_delimiter = 0; /* Only used when DM_SEPARATE */
}
else if (match_count == 1)
{
if ((delimit_groups == DM_PREPEND)
|| (delimit_groups == DM_SEPARATE
&& !first_delimiter))
putc ('\n', ostream);
}
}
if (!match || mode == output_all_repeated)
{
writeline (prevline, ostream, match_count);
SWAP_LINES (prevline, thisline);
prevfield = thisfield;
prevlen = thislen;
#if HAVE_MBRTOWC
prevstate = thisstate;
#endif
if (!match)
match_count = 0;
}
}
writeline (prevline, ostream, match_count);
}
closefiles:
if (ferror (istream) || fclose (istream) == EOF)
error (EXIT_FAILURE, errno, _("error reading %s"), infile);
/* Close ostream only if it's not stdout -- the latter is closed
via the atexit-invoked close_stdout. */
if (ostream != stdout && (ferror (ostream) || fclose (ostream) == EOF))
error (EXIT_FAILURE, errno, _("error writing %s"), outfile);
free (lb1.buffer);
free (lb2.buffer);
}
int
main (int argc, char **argv)
{
int optc = 0;
bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
bool obsolete_skip_fields = false;
int nfiles = 0;
char const *file[2];
file[0] = file[1] = "-";
program_name = argv[0];
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
hard_LC_COLLATE = hard_locale (LC_COLLATE);
atexit (close_stdout);
#if HAVE_MBRTOWC
if (MB_CUR_MAX > 1)
{
find_field = find_field_multi;
}
else
#endif
{
find_field = find_field_uni;
}
skip_chars = 0;
skip_fields = 0;
check_chars = SIZE_MAX;
mode = output_all;
countmode = count_none;
delimit_groups = DM_NONE;
for (;;)
{
/* Parse an operand with leading "+" as a file after "--" was
seen; or if pedantic and a file was seen; or if not
obsolete. */
if (optc == -1
|| (posixly_correct && nfiles != 0)
|| ((optc = getopt_long (argc, argv,
"-0123456789Dcdf:is:uw:", longopts, NULL))
== -1))
{
if (optind == argc)
break;
if (nfiles == 2)
{
error (0, 0, _("extra operand `%s'"), argv[optind]);
usage (EXIT_FAILURE);
}
file[nfiles++] = argv[optind++];
}
else switch (optc)
{
case 1:
{
unsigned long int size;
if (optarg[0] == '+'
&& (posix2_version () < 200112 || !getenv ("POSIXLY_CORRECT"))
&& xstrtoul (optarg, NULL, 10, &size, "") == LONGINT_OK
&& size <= SIZE_MAX)
skip_chars = size;
else if (nfiles == 2)
{
error (0, 0, _("extra operand `%s'"), optarg);
usage (EXIT_FAILURE);
}
else
file[nfiles++] = optarg;
}
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
{
size_t s = skip_fields;
skip_fields = s * 10 + optc - '0';
if (SIZE_MAX / 10 < s || skip_fields < s)
error (EXIT_FAILURE, 0, "%s",
_("invalid number of fields to skip"));
obsolete_skip_fields = true;
}
break;
case 'c':
countmode = count_occurrences;
break;
case 'd':
mode = output_repeated;
break;
case 'D':
mode = output_all_repeated;
if (optarg == NULL)
delimit_groups = DM_NONE;
else
delimit_groups = XARGMATCH ("--all-repeated", optarg,
delimit_method_string,
delimit_method_map);
break;
case 'f': /* Like '-#'. */
skip_fields = size_opt (optarg,
N_("invalid number of fields to skip"));
break;
case 'i':
ignore_case = 1;
break;
case 's': /* Like '+#'. */
skip_chars = size_opt (optarg,
N_("invalid number of bytes to skip"));
break;
case 'u':
mode = output_unique;
break;
case 'w':
check_chars = size_opt (optarg,
N_("invalid number of bytes to compare"));
break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
default:
usage (EXIT_FAILURE);
}
}
if (obsolete_skip_fields && 200112 <= posix2_version ()
&& getenv ("POSIXLY_CORRECT"))
{
error (0, 0, _("`-%lu' option is obsolete; use `-f %lu'"),
(unsigned long) skip_fields, (unsigned long) skip_fields);
usage (EXIT_FAILURE);
}
if (countmode == count_occurrences && mode == output_all_repeated)
{
error (0, 0,
_("printing all duplicated lines and repeat counts is meaningless"));
usage (EXIT_FAILURE);
}
check_file (file[0], file[1]);
exit (EXIT_SUCCESS);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -