📄 sort.c
字号:
arg_count++; } if (check) exit(0); sort(); /* Sort whatever is left */ if (nr_of_files == 1) /* Only one file sorted -> don't merge */ exit(0); files_merge(nr_of_files); return(0);}/* Adjust_options() assigns all global variables set also in the fields * assigned. */void adjust_options(field)register FIELD *field;{ register FIELD *gfield = &fields[GLOBAL]; if (gfield->reverse) field->reverse = TRUE; if (gfield->blanks) field->blanks = TRUE; if (gfield->dictionary) field->dictionary = TRUE; if (gfield->fold_case) field->fold_case = TRUE; if (gfield->ascii) field->ascii = TRUE; if (gfield->numeric) field->numeric = TRUE;}/* Error () prints the error message on stderr and exits if quit == TRUE. */void error(quit, message, arg)register BOOL quit;register char *message, *arg;{ write(2, message, strlen(message)); if (arg != NIL_PTR) write(2, arg, strlen(arg)); perror(" "); if (quit) exit(1);}/* Open_outfile () assigns to out_fd the fd where the output must go when all * the sorting is done. */void open_outfile(){ if (output_file == NIL_PTR) out_fd = STD_OUT; else if ((out_fd = creat(output_file, 0644)) < 0) error(TRUE, "Cannot creat ", output_file);}/* Get_file reads the whole file of filedescriptor fd. If the file is too big * to keep in core, a partial sort is done, and the output is stashed somewhere. */void get_file(fd, size)int fd; /* Fd of file to read */register off_t size; /* Size of file */{ register int i; int rest; /* Rest in memory */ char save_ch; /* Used in stdin readings */ rest = MEMORY_SIZE - (cur_pos - mem_top); if (fd == 0) { /* We're reding stdin */ while ((i = read(0, cur_pos, rest)) > 0) { if ((cur_pos - mem_top) + i == MEMORY_SIZE) { in_core = FALSE; i = last_line(); /* End of last line */ save_ch = mem_top[i]; mem_top[i] = '\0'; sort(); /* Sort core */ mem_top[i] = save_ch; /* Restore erased char */ /* Restore last (half read) line */ for (rest = 0; i + rest != MEMORY_SIZE; rest++) mem_top[rest] = mem_top[i + rest]; /* Assign current pos. in memory */ cur_pos = &mem_top[rest]; } else { /* Fits, just assign position in mem. */ cur_pos = cur_pos + i; *cur_pos = '\0'; } /* Calculate rest of mem */ rest = MEMORY_SIZE - (cur_pos - mem_top); } } /* Reading file. Check size */ else if (size > rest) { /* Won't fit */ mread(fd, cur_pos, rest); in_core = FALSE; i = last_line(); /* Get pos. of last line */ mem_top[i] = '\0'; /* Truncate */ (void) lseek(fd, (off_t) (i - MEMORY_SIZE), SEEK_CUR); /* Do this next time */ size = size - rest - i + MEMORY_SIZE; /* Calculate rest */ cur_pos = mem_top; /* Reset mem */ sort(); /* Sort core */ get_file(fd, size); /* Get rest of file */ } else { /* Fits. Just read in */ rest = size; mread(fd, cur_pos, rest); cur_pos = cur_pos + rest; /* Reassign cur_pos */ *cur_pos = '\0'; (void) close(fd); /* File completed */ }}/* Last_line () find the last line in core and retuns the offset from the top * of the memory. */int last_line(){ register int i; for (i = MEMORY_SIZE - 2; i > 0; i--) if (mem_top[i] == '\n') break; return i + 1;}/* Print_table prints the line table in the given file_descriptor. If the fd * equals ERROR, it opens a temp_file itself. */void print_table(fd)int fd;{ register char **line_ptr; /* Ptr in line_table */ register char *ptr; /* Ptr to line */ int index = 0; /* Index in output buffer */ if (fd == ERROR) { if ((fd = creat(file_name(nr_of_files), 0644)) < 0) error(TRUE, "Cannot creat ", file_name(nr_of_files)); } for (line_ptr = line_table; *line_ptr != NIL_PTR; line_ptr++) { ptr = *line_ptr; /* Skip all same lines if uniq is set */ if (uniq && *(line_ptr + 1) != NIL_PTR) { if (compare(ptr, *(line_ptr + 1)) == SAME) continue; } do { /* Print line in a buffered way */ out_buffer[index++] = *ptr; if (index == IO_SIZE) { mwrite(fd, out_buffer, IO_SIZE); index = 0; } } while (*ptr++ != '\n'); } mwrite(fd, out_buffer, index);/* Flush buffer */ (void) close(fd); /* Close file */ nr_of_files++; /* Increment nr_of_files to merge */}/* File_name () returns the nr argument from the argument list, or a uniq * filename if the nr is too high, or the arguments were not merge files. */char *file_name(nr)register int nr;{ if (only_merge) { if (args_offset + nr < args_limit) return argptr[args_offset + nr]; } temp_files[16] = nr / 26 + 'a'; temp_files[17] = nr % 26 + 'a'; return temp_files;}/* Mread () performs a normal read (), but checks the return value. */void mread(fd, address, bytes)int fd;char *address;register int bytes;{ if (read(fd, address, bytes) < 0 && bytes != 0) error(TRUE, "Read error", NIL_PTR);}/* Mwrite () performs a normal write (), but checks the return value. */void mwrite(fd, address, bytes)int fd;char *address;register int bytes;{ if (write(fd, address, bytes) != bytes && bytes != 0) error(TRUE, "Write error", NIL_PTR);}/* Sort () sorts the input in memory starting at mem_top. */void sort(){ register char *ptr = mem_top; register int count = 0;/* Count number of lines in memory */ while (*ptr) { if (*ptr++ == '\n') count++; }/* Set up the line table */ line_table = (char **) msbrk(count * sizeof(char *) + sizeof(char *)); count = 1; ptr = line_table[0] = mem_top; while (*ptr) { if (*ptr++ == '\n') line_table[count++] = ptr; } line_table[count - 1] = NIL_PTR;/* Sort the line table */ sort_table(count - 1);/* Stash output somewhere */ if (in_core) { open_outfile(); print_table(out_fd); } else print_table(ERROR);/* Free line table */ mbrk((char *) line_table);}/* Sort_table () sorts the line table consisting of nel elements. */void sort_table(nel)register int nel;{ char *tmp; register int i; /* Make heap */ for (i = (nel >> 1); i >= 1; i--) incr(i, nel); /* Sort from heap */ for (i = nel; i > 1; i--) { tmp = line_table[0]; line_table[0] = line_table[i - 1]; line_table[i - 1] = tmp; incr(1, i - 1); }}/* Incr () increments the heap. */void incr(si, ei)register int si, ei;{ char *tmp; while (si <= (ei >> 1)) { si <<= 1; if (si + 1 <= ei && compare(line_table[si - 1], line_table[si]) <= 0) si++; if (compare(line_table[(si >> 1) - 1], line_table[si - 1]) >= 0) return; tmp = line_table[(si >> 1) - 1]; line_table[(si >> 1) - 1] = line_table[si - 1]; line_table[si - 1] = tmp; }}/* Cmp_fields builds new lines out of the lines pointed to by el1 and el2 and * puts it into the line1 and line2 arrays. It then calls the cmp () routine * with the field describing the arguments. */int cmp_fields(el1, el2)register char *el1, *el2;{ int i, ret; char line1[LINE_SIZE], line2[LINE_SIZE]; for (i = 0; i < field_cnt; i++) { /* Setup line parts */ build_field(line1, &fields[i + 1], el1); build_field(line2, &fields[i + 1], el2); if ((ret = cmp((unsigned char *) line1, (unsigned char *) line2, &fields[i + 1])) != SAME) break; /* If equal, try next field */ }/* Check for reverse flag */ if (i != field_cnt && fields[i + 1].reverse) return -ret;/* Else return the last return value of cmp () */ return ret;}/* Build_field builds a new line from the src as described by the field. * The result is put in dest. */void build_field(dest, field, src)char *dest; /* Holds result */register FIELD *field; /* Field description */register char *src; /* Source line */{ char *begin = src; /* Remember start location */ char *last; /* Pointer to end location */ int i;/* Skip begin fields */ src = skip_fields(src, field->beg_field);/* Skip begin positions */ for (i = 0; i < field->beg_pos && *src != '\n'; i++) src++;/* Copy whatever is left */ copy(dest, src);/* If end field is assigned truncate (perhaps) the part copied */ if (field->end_field != ERROR) { /* Find last field */ last = skip_fields(begin, field->end_field);/* Skip positions as given by end fields description */ for (i = 0; i < field->end_pos && *last != '\n'; i++) last++; dest[last - src] = '\n';/* Truncate line */ }}/* Skip_fields () skips nf fields of the line pointed to by str. */char *skip_fields(str, nf)register char *str;int nf;{ while (nf-- > 0) { if (separator == '\0') {/* Means ' ' or '\t' */ while (table[*str] & BLANK) str++; while (*str != ' ' && *str != '\t' && *str != '\n') str++; } else { while (*str == separator) str++; while (*str != separator && *str != '\n') str++; } } return str; /* Return pointer to indicated field */}/* Compare is called by all sorting routines. It checks if fields assignments * has been made. if so, it calls cmp_fields (). If not, it calls cmp () and * reversed the return value if the (global) reverse flag is set. */int compare(el1, el2)register char *el1, *el2;{ int ret; if (field_cnt > GLOBAL) return cmp_fields(el1, el2); ret = cmp((unsigned char *) el1, (unsigned char *) el2, &fields[GLOBAL]); return(fields[GLOBAL].reverse) ? -ret : ret;}/* Cmp () is the actual compare routine. It compares according to the * description given in the field pointer. */int cmp(el1, el2, field)register unsigned char *el1, *el2;FIELD *field;{ int c1, c2; if (field->blanks) { /* Skip leading blanks */ while (table[*el1] & BLANK) el1++; while (table[*el2] & BLANK) el2++; } if (field->numeric) /* Compare numeric */ return digits((char *) el1, (char *) el2, TRUE); for (;;) { while (*el1 == *el2) { if (*el1++ == '\n') /* EOLN on both strings */ return SAME; el2++; } if (*el1 == '\n') /* EOLN on string one */ return LOWER; if (*el2 == '\n') return HIGHER; if (field->ascii) { /* Skip chars outside 040 - 0177 */ if ((table[*el1] & ASCII) == 0) { do { el1++; } while ((table[*el1] & ASCII) == 0); continue; } if ((table[*el2] & ASCII) == 0) { do { el2++; } while ((table[*el2] & ASCII) == 0); continue; } } if (field->dictionary) {/* Skip non-dict chars */ if ((table[*el1] & DICT) == 0) { do { el1++; } while ((table[*el1] & DICT) == 0); continue; } if ((table[*el2] & DICT) == 0) { do { el2++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -