📄 sort.c
字号:
/* sort - sort a file of lines Author: Michiel Huisjes *//* SYNOPSIS: * sort [-funbirdcmt'x'] [+beg_pos[opts] [-end_pos]] [-o outfile] [file].. * * [opts] can be any of * -f : Fold upper case to lower. * -n : Sort to numeric value (optional decimal point) implies -b * -b : Skip leading blanks * -i : Ignore chars outside ASCII range (040 - 0176) * -r : Reverse the sense of comparisons. * -d : Sort to dictionary order. Only letters, digits, comma's and points * are compared. * If any of these flags are used in [opts], then they override all global * ordering for this field. * * I/O control flags are: * -u : Print uniq lines only once. * -c : Check if files are sorted in order. * -m : Merge already sorted files. * -o outfile : Name of output file. (Can be one of the input files). * Default is stdout. * - : Take stdin as input. * * Fields: * -t'x' : Field separating character is 'x' * +a.b : Start comparing at field 'a' with offset 'b'. A missing 'b' is * taken to be 0. * -a.b : Stop comparing at field 'a' with offset 'b'. A missing 'b' is * taken to be 0. * A missing -a.b means the rest of the line. */#include <sys/types.h>#include <sys/stat.h>#include <fcntl.h>#include <signal.h>#include <unistd.h>#include <stdlib.h>#include <string.h>#include <stdio.h>#include <limits.h>#define OPEN_FILES (OPEN_MAX-4) /* Nr of open files per process */#define MEMORY_SIZE (20 * 1024) /* Total mem_size */#define LINE_SIZE (1024 >> 1) /* Max length of a line */#define IO_SIZE (2 * 1024) /* Size of buffered output */#define STD_OUT 1 /* Fd of terminal *//* Return status of functions */#define OK 0#define ERROR -1#define NIL_PTR ((char *) 0)/* Compare return values */#define LOWER -1#define SAME 0#define HIGHER 1/* Table definitions. */#define DICT 0x001 /* Alpha, numeric, letters and . */#define ASCII 0x002 /* All between ' ' and '~' */#define BLANK 0x004 /* ' ' and '\t' */#define DIGIT 0x008 /* 0-9 */#define UPPER 0x010 /* A-Z */typedef int BOOL;#define FALSE 0#define TRUE 1typedef struct { int fd; /* Fd of file */ char *buffer; /* Buffer for reads */ int read_chars; /* Nr of chars actually read in buffer */ int cnt; /* Nr of chars taken out of buffer */ char *line; /* Contains line currently used */} MERGE;#define NIL_MERGE ((MERGE *) 0)MERGE merge_f[OPEN_FILES]; /* Merge structs */int buf_size; /* Size of core available for each struct */#define FIELDS_LIMIT 10 /* 1 global + 9 user */#define GLOBAL 0typedef struct { int beg_field, beg_pos; /* Begin field + offset */ int end_field, end_pos; /* End field + offset. ERROR == EOLN */ BOOL reverse; /* TRUE if rev. flag set on this field */ BOOL blanks; BOOL dictionary; BOOL fold_case; BOOL ascii; BOOL numeric;} FIELD;/* Field declarations. A total of FILEDS_LIMIT is allowed */FIELD fields[FIELDS_LIMIT];int field_cnt; /* Nr of field actually assigned *//* Various output control flags */BOOL check = FALSE;BOOL only_merge = FALSE;BOOL uniq = FALSE;char *mem_top; /* Mem_top points to lowest pos of memory. */char *cur_pos; /* First free position in mem */char **line_table; /* Pointer to the internal line table */BOOL in_core = TRUE; /* Set if input cannot all be sorted in core */ /* Place where temp_files should be made */char temp_files[] = "/tmp/sort.XXXXX.XX";char *output_file; /* Name of output file */int out_fd; /* Fd to output file (could be STD_OUT) */char out_buffer[IO_SIZE]; /* For buffered output */char **argptr; /* Pointer to argv structure */int args_offset; /* Nr of args spilled on options */int args_limit; /* Nr of args given */char separator; /* Char that separates fields */int nr_of_files = 0; /* Nr_of_files to be merged */int disabled; /* Nr of files done */char USAGE[] = "Usage: sort [-funbirdcmt'x'] [+beg_pos [-end_pos]] [-o outfile] [file] ..";/* Forward declarations */_PROTOTYPE(int main, (int argc, char **argv));_PROTOTYPE(void get_opts, (char *ptr, FIELD * field));_PROTOTYPE(void new_field, (FIELD * field, int *offset, BOOL beg_fl));_PROTOTYPE(void adjust_options, (FIELD * field));_PROTOTYPE(void error, (BOOL quit, char *message, char *arg));_PROTOTYPE(void open_outfile, (void));_PROTOTYPE(void get_file, (int fd, off_t size));_PROTOTYPE(int last_line, (void));_PROTOTYPE(void print_table, (int fd));_PROTOTYPE(char *file_name, (int nr));_PROTOTYPE(void mread, (int fd, char *address, int bytes));_PROTOTYPE(void mwrite, (int fd, char *address, int bytes));_PROTOTYPE(void sort, (void));_PROTOTYPE(void sort_table, (int nel));_PROTOTYPE(void incr, (int si, int ei));_PROTOTYPE(int cmp_fields, (char *el1, char *el2));_PROTOTYPE(void build_field, (char *dest, FIELD * field, char *src));_PROTOTYPE(char *skip_fields, (char *str, int nf));_PROTOTYPE(int compare, (char *el1, char *el2));_PROTOTYPE(int cmp, (unsigned char *el1, unsigned char *el2, FIELD * field));_PROTOTYPE(int digits, (char *str1, char *str2, BOOL check_sign));_PROTOTYPE(void files_merge, (int file_cnt));_PROTOTYPE(void merge, (int start_file, int limit_file));_PROTOTYPE(void put_line, (char *line));_PROTOTYPE(MERGE * print, (MERGE * merg, int file_cnt));_PROTOTYPE(int read_line, (MERGE * merg));_PROTOTYPE(MERGE * skip_lines, (MERGE * smallest, int file_cnt));_PROTOTYPE(void uniq_lines, (MERGE * merg));_PROTOTYPE(void check_file, (int fd, char *file));_PROTOTYPE(int length, (char *line));_PROTOTYPE(void copy, (char *dest, char *src));_PROTOTYPE(char *msbrk, (int size));_PROTOTYPE(void mbrk, (char *address));_PROTOTYPE(void catch, (int dummy));/* Table of all chars. 0 means no special meaning. */char table[256] = {/* '^@' to space */ 0, 0, 0, 0, 0, 0, 0, 0, 0, BLANK | DICT, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/* Space to '0' */ BLANK | DICT | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,/* '0' until '9' */ DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,/* ASCII from ':' to '@' */ ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,/* Upper case letters 'A' to 'Z' */ UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,/* ASCII from '[' to '`' */ ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,/* Lower case letters from 'a' to 'z' */ DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,/* ASCII from '{' to '~' */ ASCII, ASCII, ASCII, ASCII,/* Stuff from -1 to -177 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};/* * Get_opts () assigns the options into the field structure as described in ptr. * This field structure could be the GLOBAL one. */void get_opts(ptr, field)register char *ptr;register FIELD *field;{ switch (*ptr) { case 'b': /* Skip leading blanks */ field->blanks = TRUE; break; case 'd': /* Dictionary order */ field->dictionary = TRUE; break; case 'f': /* Fold upper case to lower */ field->fold_case = TRUE; break; case 'i': /* Skip chars outside ' ' '~' */ field->ascii = TRUE; break; case 'n': /* Sort on numeric */ field->numeric = TRUE; field->blanks = TRUE; break; case 'r': /* Reverse comparisons */ field->reverse = TRUE; break; default: /* Illegal options */ error(TRUE, USAGE, NIL_PTR); }}/* New_field () assigns a new field as described by the arguments. * A field description is of the form: +a.b[opts] -c.d, where b and d, as well * as -c.d and [opts] are optional. Nr before digit is field nr. Nr after digit * is offset from field. */void new_field(field, offset, beg_fl)register FIELD *field; /* Field to assign */int *offset; /* Offset in argv structure */BOOL beg_fl; /* Assign beg or end of field */{ register char *ptr; ptr = argptr[*offset]; *offset += 1; /* Incr offset to next arg */ ptr++; if (beg_fl) field->beg_field = atoi(ptr); /* Assign int of first field */ else field->end_field = atoi(ptr); while (table[*ptr] & DIGIT) /* Skip all digits */ ptr++; if (*ptr == '.') { /* Check for offset */ ptr++; if (beg_fl) field->beg_pos = atoi(ptr); else field->end_pos = atoi(ptr); while (table[*ptr] & DIGIT) /* Skip digits */ ptr++; } if (beg_fl) { while (*ptr != '\0') /* Check options after field */ get_opts(ptr++, field); } if (beg_fl) { /* Check for end pos */ ptr = argptr[*offset]; if (ptr && *ptr == '-' && table[*(ptr + 1)] & DIGIT) { new_field(field, offset, FALSE); if (field->beg_field > field->end_field) error(TRUE, "End field is before start field!", NIL_PTR); } else /* No end pos. */ field->end_field = ERROR; }}int main(argc, argv)int argc;char *argv[];{ int arg_count = 1; /* Offset in argv */ struct stat st; register char *ptr; /* Ptr to *argv in use */ register int fd; int pid, pow; argptr = argv; cur_pos = mem_top = msbrk(MEMORY_SIZE); /* Find lowest mem. location */ while (arg_count < argc && ((ptr = argv[arg_count])[0] == '-' || *ptr == '+')) { if (*ptr == '-' && *(ptr + 1) == '\0') /* "-" means stdin */ break; if (*ptr == '+') { /* Assign field. */ if (++field_cnt == FIELDS_LIMIT) error(TRUE, "Too many fields", NIL_PTR); new_field(&fields[field_cnt], &arg_count, TRUE); } else { /* Get output options */ while (*++ptr) { switch (*ptr) { case 'c': /* Only check file */ check = TRUE; break; case 'm': /* Merge (sorted) files */ only_merge = TRUE; break; case 'u': /* Only give uniq lines */ uniq = TRUE; break; case 'o': /* Name of output file */ output_file = argv[++arg_count]; break; case 't': /* Field separator */ ptr++; separator = *ptr; break; default: /* Sort options */ get_opts(ptr, &fields[GLOBAL]); } } arg_count++; } } for (fd = 1; fd <= field_cnt; fd++) adjust_options(&fields[fd]);/* Create name of tem_files 'sort.pid.aa' */ ptr = &temp_files[10]; pid = getpid(); pow = 10000; while (pow != 0) { *ptr++ = pid / pow + '0'; pid %= pow; pow /= 10; } signal(SIGINT, catch);/* Only merge files. Set up */ if (only_merge) { args_limit = args_offset = arg_count; while (argv[args_limit] != NIL_PTR) args_limit++; /* Find nr of args */ files_merge(args_limit - arg_count); exit(0); } if (arg_count == argc) { /* No args left. Use stdin */ if (check) check_file(0, NIL_PTR); else get_file(0, (off_t) 0); } else while (arg_count < argc) { /* Sort or check args */ if (strcmp(argv[arg_count], "-") == 0) fd = 0; else if (stat(argv[arg_count], &st) < 0) { error(FALSE, "Cannot find ", argv[arg_count++]); continue; } /* Open files */ else if ((fd = open(argv[arg_count], O_RDONLY)) < 0) { error(FALSE, "Cannot open ", argv[arg_count++]); continue; } if (check) check_file(fd, argv[arg_count]); else /* Get_file reads whole file */ get_file(fd, st.st_size);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -