📄 sort.c
字号:
/* * These functions sort lines according to keys in a marked BOX block. * * Being that the data structure was changed from a big text buffer to a * linked list, we can replace the simple insertion sort algorithm with a * much faster sort algorithm. The classic search and sort reference is by * Donald E. Knuth, _The Art of Computer Programming; Volume 3: Sorting and * Searching_. One of the fastest and most widely used general purpose * sorting algorithms is the "Quicksort" algorithm. * * For implementation hints and guidelines on the Quicksort algorithm, one * might read the original Quicksort paper by C. A. R. Hoare or anything * by Robert Sedgewick. Although Dr. Sedgewick includes a chapter on * Quicksort in his intro computer science textbooks, _Algorithms in X_, * I prefer the detailed hints and guidelines in his 1978 CACM paper. * Incidentally, Dr. Sedgewick's Ph.D. dissertation was on the modification * and mathematical analysis of the Quicksort algorithm. * * * See: * * Charles Antony Richard Hoare, "Algorithm 63: Partition." _Communications * of the ACM_, 4 (No. 7): 321, 1961. * * Charles Antony Richard Hoare, "Algorithm 64: Quicksort." _Communications * of the ACM_, 4 (No. 7): 321, 1961. * * Charles Antony Richard Hoare, "Quicksort." _The Computer Journal_, * 5 (April 1962 - January 1963): 10-15, 1962. * * See also: * * Donald Ervin Knuth, _The Art of Computer Programming; Volume 3: Sorting * and Searching_, Addison-Wesley, Reading, Mass., 1973, Chapter 5, * Sorting, pp 114-123. ISBN 0-201-03803-X. * * Robert Sedgewick, "Implementing Quicksort Programs." _Communications * of the ACM_, 21 (No. 10): 847-857, 1978. * * * Quicksort in TDE * * Quicksort in TDE is a stable, non-recursive implementation based on * Program 2, page 851, CACM, 21 (No. 10), 1978, by Robert Sedgewick. My * partition algorithm finds the median-of-three. Then, it walks from the * head of the list, comparing nodes, and uses the first occurrence of the * key of the median-of-three as the partition node. Mostly by accident * and partly by design, my partition routine uses a "fat pivot" to keep * equal nodes in the same relative order as they appear in the file (the * definition of a stable sort). By using the first median-of-three node * as the partitioning node, 1) sorting a sorted list is fairly fast and * 2) encountering a worst case is very unlikely. TDE will sort, fairly * fast, multiple keys ascending or descending, ignore or match case, and * preserve order in the file, while using a custom sort sequence for your * favorite domestic or foreign language. * * Found an error in the comparison function in versions 2.2 & 3.0. Equal * keys were not compared correctly. Fixed in version 3.1. * * New editor name: TDE, the Thomson-Davis Editor. * Author: Frank Davis * Date: June 5, 1991, version 1.0 * Date: July 29, 1991, version 1.1 * Date: October 5, 1991, version 1.2 * Date: January 20, 1992, version 1.3 * Date: February 17, 1992, version 1.4 * Date: April 1, 1992, version 1.5 * Date: June 5, 1992, version 2.0 * Date: October 31, 1992, version 2.1 * Date: April 1, 1993, version 2.2 * Date: June 5, 1993, version 3.0 * Date: August 29, 1993, version 3.1 * Date: November 13, 1993, version 3.2 * Date: June 5, 1994, version 4.0 * Date: December 5, 1998, version 5.0 (jmh) * * This code is released into the public domain, Frank Davis. * You may use and distribute it freely. */#include "tdestr.h"#include "common.h"#include "tdefunc.h"#include "define.h"/*#define USE_QSORT*/ /* use quicksort instead of mergesort *//*#define TEST */ /* display number of comparisons and ticks */#ifdef TESTunsigned *comps, qcomps, icomps;clock_t ticks, ticks1;#endif/* * threshold value: partitions smaller than this will use insertion sort. */#define THRESH 25/* * function prototypes for mergesort */void merge_sort_block( long, line_list_ptr );line_list_ptr run( line_list_ptr, long *, long * );line_list_ptr merge( line_list_ptr, long, line_list_ptr, long, line_list_ptr );/* * Name: sort_box_block * Purpose: sort lines according to text in marked BOX block * Date: June 5, 1992 * Passed: window: pointer to current window * Notes: quick sort and insertion sort the lines in the BOX buff according * to stuff in a box block. * jmh 980614: test for tabs. * jmh 030224: sort line blocks. * jmh 060827: detab each line instead of detabbing and entabbing the whole * block - this is necessary to properly preserve tabs. */int sort_box_block( TDE_WIN *window ){int prompt_line;int block_type;line_list_ptr ll;register file_infos *file;TDE_WIN *sw;int rc; /* * make sure block is marked OK */ rc = OK; prompt_line = window->bottom_line; if (un_copy_line( window->ll, window, TRUE, TRUE ) == ERROR) return( ERROR ); check_block( &sw ); if (g_status.marked == TRUE) { file = g_status.marked_file; if (file->read_only) return( ERROR ); block_type = file->block_type; if (block_type == BOX || block_type == LINE) { /* * sort ascending or descending? */ rc = get_sort_order( window ); if (rc != ERROR) { file->modified = TRUE; if (mode.do_backups == TRUE) backup_file( sw ); show_search_message( SORTING ); if (block_type == BOX) { /* * figure the width of the block. */ sort.block_len = file->block_ec + 1 - file->block_bc; /* * set up the sort structure. */ sort.bc = file->block_bc; sort.ec = file->block_ec; } else { sort.bc = 0; sort.ec = MAX_LINE_LENGTH - 1; sort.block_len = MAX_LINE_LENGTH; } sort.order_array = (mode.search_case == IGNORE) ? sort_order.ignore : sort_order.match; /* * save the previous node for use with insertion sort. */ ll = file->block_start->prev;#ifdef TEST comps = &icomps; qcomps = icomps = 0; ticks1 = clock(); while (ticks1 == (ticks = clock())) ;#endif#ifdef USE_QSORT quick_sort_block( file->block_br, file->block_er, file->block_start, file->block_end );#ifdef TEST ticks = clock() - ticks; printf( "Comparisons: %u (quick = %u, insert = %u); ticks: %lu\n", qcomps + icomps, qcomps, icomps, (unsigned long)ticks );#endif#else merge_sort_block( file->block_er - file->block_br + 1, file->block_start );#ifdef TEST ticks = clock() - ticks; printf( "Comparisons: %u; ticks: %lu\n", icomps, (unsigned long)ticks );#endif#endif /* * housekeeping. mark the file as dirty and restore the * cursors, which are scrambled during the sort. * jmh 980729: Update the syntax flags. */ file->dirty = GLOBAL; restore_cursors( file ); syntax_check_block( file->block_br, file->block_er, ll->next, file->syntax ); show_search_message( CLR_SEARCH ); } } else { /* * cannot sort stream blocks */ error( WARNING, prompt_line, block23 ); rc = ERROR; } } else { /* * box not marked */ error( WARNING, prompt_line, block24 ); rc = ERROR; } return( rc );}#ifdef USE_QSORT/* * Name: quick_sort_block * Purpose: sort lines according to text in marked BOX block * Date: Jaunary 10, 1993 * Passed: low: starting line in box block * high: ending line in a box block * low_node: starting node in box block * high_node: ending node in box block * Notes: Quicksort lines in the BOX block according to keys in * a box block. * because the median of three method is used to find the partion * node, high - low should be greater than or equal to 2. * with end recursion removal and sorting the smallest sublist * first, our stack only needs room for log2 (N+1)/(M+2) nodes. * a stack size of 24 can reliably handle almost 500 million lines. */void quick_sort_block( long low, long high, line_list_ptr low_node, line_list_ptr high_node ){long low_rline_stack[24];long high_rline_stack[24];line_list_ptr low_node_stack[24];line_list_ptr high_node_stack[24];long low_count;long high_count;long count;line_list_ptr low_start;line_list_ptr low_head;line_list_ptr low_tail;line_list_ptr high_end;line_list_ptr high_head;line_list_ptr high_tail;line_list_ptr equal_head;line_list_ptr equal_tail;line_list_ptr walk_node;line_list_ptr median_node;int i;int stack_pointer; assert( low_node->len != EOF); assert( high_node->len != EOF); stack_pointer = 0; for (;;) { /* * being that a median-of-three is used as the partition algorithm, * we probably need to have at least 2 nodes in each sublist. I * chose a minimum of 25 nodes as a SWAG (scientific wild ass guess). * a simple insertion sort mops the list after quicksort finishes. */ while (high - low > THRESH) { assert( high >= 1 ); assert( low >= 1 ); assert( low <= high ); /* * start the walk node at the head of the list and walk to the * middle of the sublist. */ walk_node = low_node; count = (high - low) / 2; for (; count > 0; count--) walk_node = walk_node->next;#ifdef TEST comps = &qcomps;#endif /* * now, find the median of the low, middle, and high node. * * being that I am subject to error, let's assert that we really * did find the median-of-three. */ load_pivot( low_node ); if (compare_pivot( walk_node ) < 0) { low_head = walk_node; median_node = low_node; } else { low_head = low_node; median_node = walk_node; } high_head = high_node; load_pivot( median_node ); if (compare_pivot( high_node ) < 0) { high_head = median_node; median_node = high_node; } load_pivot( median_node ); if (compare_pivot( low_head ) > 0) { low_tail = median_node; median_node = low_head; low_head = low_tail; } load_pivot( median_node ); assert( compare_pivot( low_head ) <= 0 ); assert( compare_pivot( high_head ) >= 0 ); /* * initialize pointers and counters for this partition. */ low_start = low_node->prev; high_end = high_node->next; low_head = low_tail = NULL; equal_head = equal_tail = NULL; high_head = high_tail = NULL; low_count = high_count = 0; /* * PARTITION: * put all nodes less than the pivot on the end of the low list. * put all nodes equal to the pivot on the end of the equal list. * put all nodes greater than the pivot on the end of the high list. */ walk_node = low_node; for (count = low; count <= high; count++) { i = compare_pivot( walk_node ); if (i > 0) { if (high_head == NULL) high_head = high_tail = walk_node; else { high_tail->next = walk_node; walk_node->prev = high_tail; high_tail = walk_node; } /* * keep a count of the number of nodes in the high list. */ ++high_count; } else if (i < 0) { if (low_head == NULL) low_head = low_tail = walk_node; else { low_tail->next = walk_node; walk_node->prev = low_tail; low_tail = walk_node; } /* * keep a count of the number of nodes in the low list */ ++low_count; } else { if (equal_head == NULL) equal_head = equal_tail = walk_node; else { equal_tail->next = walk_node; walk_node->prev = equal_tail; equal_tail = walk_node; } } walk_node = walk_node->next; } assert( low_count >= 0 ); assert( low_count < high - low ); assert( high_count >= 0 ); assert( high_count < high - low ); /* * we just partitioned the sublist into low, equal, and high * sublists. now, let's put the lists back together. */ if (low_count > 0) { low_head->prev = low_start; low_start->next = low_head; low_tail->next = equal_head; equal_head->prev = low_tail; } else { equal_head->prev = low_start; low_start->next = equal_head; } if (high_count > 0) { high_head->prev = equal_tail; equal_tail->next = high_head; high_tail->next = high_end; high_end->prev = high_tail; } else { equal_tail->next = high_end; high_end->prev = equal_tail; }#ifdef TEST comps = &icomps;#endif /* * now, lets look at the low list and the high list. save the node * pointers and counters of the longest sublist on the stack. * then, quicksort the shortest sublist. */ if (low_count > high_count) { /* * if fewer than THRESH nodes in the high count, don't bother to * push the stack -- sort the low list. */ if (high_count > THRESH) { low_rline_stack[stack_pointer] = low; high_rline_stack[stack_pointer] = low + low_count - 1; low_node_stack[stack_pointer] = low_head; high_node_stack[stack_pointer] = low_tail; ++stack_pointer; low = high - high_count + 1; high = high; low_node = high_head; high_node = high_tail; } else { insertion_sort_block( high - high_count + 1, high, high_head ); low = low; high = low + low_count - 1; low_node = low_head; high_node = low_tail; } } else { /* * if fewer than THRESH nodes in the low count, don't bother to * push the stack -- sort the high list. */ if (low_count > THRESH) { low_rline_stack[stack_pointer] = high - high_count + 1; high_rline_stack[stack_pointer] = high; low_node_stack[stack_pointer] = high_head; high_node_stack[stack_pointer] = high_tail; ++stack_pointer; low = low; high = low + low_count - 1; low_node = low_head; high_node = low_tail; } else { insertion_sort_block( low, low + low_count - 1, low_head ); low = high - high_count + 1; high = high; low_node = high_head; high_node = high_tail; } } assert( stack_pointer < 24 ); } insertion_sort_block( low, high, low_node );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -