📄 exmrg2.cpp

📁 数据结构与算法分析（C++）（版第二版）源码
💻 CPP
字号:
// Simple external merge demonstration.
// Second in a series of three programs.
// This version builds initial runs of full memory size, then combines
// the runs with a 2-way merge.

#include <iostream.h>
#include <stdlib.h>
#include <fstream.h>
#include <stdio.h>
#include <string.h>

#include "book.h"
#include "compare.h"

const int THRESHOLD=0;
template <class Elem, class Comp>
void inssort(Elem A[], int n) { // Insertion Sort
  for (int i=1; i<n; i++)       // Insert i'th record
    for (int j=i; (j>0) && (Comp::lt(A[j], A[j-1])); j--)
      swap(A, j, j-1);
}

template <class Elem> int findpivot(Elem A[], int i, int j)
  { return (i+j)/2; }

template <class Elem, class Comp>
int partition(Elem A[], int l, int r, Elem& pivot) {
  do {             // Move the bounds inward until they meet
    while (Comp::lt(A[++l], pivot));     // Move l right and
    while ((r != 0) && Comp::gt(A[--r], pivot)); // r left
    swap(A, l, r);              // Swap out-of-place values
  } while (l < r);              // Stop when they cross
  swap(A, l, r);                // Reverse last, wasted swap
  return l;      // Return first position in right partition
}

template <class Elem, class Comp>
void qsort(Elem array[], int i, int j) {
  if ((j-i) <= THRESHOLD) return; // Don't sort short list
  int pivotindex = findpivot(array, i, j);
  swap(array, pivotindex, j); // stick pivot at end
  int k = partition<Elem,Comp>(array, i-1, j, array[j]);
  swap(array, k, j);          // Put pivot value in place
  qsort<Elem,Comp>(array, i, k-1);
  qsort<Elem,Comp>(array, k+1, j);
}

#define RecsPerBlock 1024  // Number of records/block
int BlocksPerRun;          // Blocks per run.  Determined by file size.
#define RecsPerRun BlocksPerRun*RecsPerBlock // Number of records/run

// Return type from getnext().  Signifies no more records in file
#define EMPTY -2

// Define indices for run files.
// These indices get swapped around as necessary to switch input files
// to become output files on alternate passes.
#define IN 0
#define IN1 0
#define IN2 1
#define OUT 2
#define OUT1 2
#define OUT2 3

// This is a simple example of external sorting, with the records being
// nothing more than an integer value.
typedef int Elem;

// Maintain info on the run files -- two input files, two output files
// Posit indicates the current position in the given file
int Posit[4] = {RecsPerBlock, RecsPerBlock, 0, 0};
// Recinblock indicates number for records now in the current block
//   of the given file
int Recinblock[4] = {0, 0, 0, 0};
// Buffers for the four run files
Elem Array[4][RecsPerBlock];
// File pointers for the four run files
fstream FS[4];
// Names of the four run files
char Name[4][30];

// Return in val the next record from the file fpindex.
// This supports buffered reading from the input file, one block at a time.
bool getnext(int fpindex, Elem& val) {
  if (Posit[fpindex] >= Recinblock[fpindex]) { // page fault
    FS[fpindex].read(&Array[fpindex], sizeof(Elem)*RecsPerBlock);
    Recinblock[fpindex] = FS[fpindex].gcount()/sizeof(Elem);
/* cout << "Read " << Recinblock[fpindex] << " records from "
     << fpindex << endl;
cout << "Now at position" << FS[fpindex].tellg() << endl; */
    if (Recinblock[fpindex] == 0) { val = EMPTY; return false; }
    Posit[fpindex] = 0;
  }
  val = Array[fpindex][Posit[fpindex]++];
  return true;
}


// Write a record (in val) to the output file fpindex.
// This supports buffered writing to the output file, one block at a time.
void putout(int fpindex, Elem val) {
  int dum;
  if (Posit[fpindex] == RecsPerBlock) { // page fault
    FS[fpindex].write(Array[fpindex], sizeof(Elem)*RecsPerBlock);
    Posit[fpindex] = 0;
  }
  Array[fpindex][Posit[fpindex]++] = val;
}


// Flush an output block to disk
void myflush(int fpindex) {
  FS[fpindex].write(Array[fpindex], sizeof(Elem)*Posit[fpindex]);
}

void sort(Elem* array, int n) {
  qsort<Elem,intintCompare>(array, 0, n-1);
  inssort<Elem,intintCompare>(array, n); // Cleanup sort
}

// First pass of merge sort.
// Split input into two files.
void pass1(int inindex, int outindex1, int outindex2, int numruns)
{
  // Declare the space for building the initial runs.
  Elem *BigArray = new Elem[RecsPerRun];
  for (int i=0; i<numruns; i++) {
    FS[inindex].read(BigArray, sizeof(Elem)*RecsPerRun);
    if (!FS[inindex]) cout << "Error reading run " << i << endl;

    sort(BigArray, RecsPerRun);

    FS[outindex1].write(BigArray, sizeof(Elem)*RecsPerRun);
    if (!FS[outindex1]) cout << "Error writing run " << i << endl;
  }
  delete [] BigArray;
}


// Do the real work here
void exmergesort(int in1, int in2, int out1, int out2, char* outname,
                 int numruns) {
  Elem val1, val2;
  Elem last = -1;
  bool DONE = false;

  // Open output files
  FS[out1].open(Name[out1], ios::out | ios::binary);
  if (!FS[out1]) cout << "Error opening " << Name[out1] << endl;
  FS[out2].open(Name[out2], ios::out | ios::binary);
  if (!FS[out2]) cout << "Error opening " << Name[out2] << endl;

  // Create the initial run files
  pass1(in1, out1, out2, numruns);
  FS[in1].close();
  FS[out1].close();
  FS[out2].close();

  // Now, merge runs into two output files.
  // Repeat that process until only one run is left.
  // This works by opening the files prior to the pass and closing them
  // after the pass.  This allows the system to track the number of records
  // in the files.  The alternative would be to explicitly  keep track of
  // the number of records in each file, and just have the files all be
  // both readable and writable.
  while (!DONE) {
cout << "Do Pass\n";
    swap(in1, out1);
    swap(in2, out2);
    FS[in1].open(Name[in1], ios::in | ios::binary);
    if (!FS[in1]) cout << "Error opening " << Name[in1] << endl;
    FS[in2].open(Name[in2], ios::in | ios::binary);
    if (!FS[in2]) cout << "Error opening " << Name[in2] << endl;
    FS[out1].open(Name[out1], ios::out | ios::binary);
    if (!FS[out1]) cout << "Error opening " << Name[out1] << endl;
    FS[out2].open(Name[out2], ios::out | ios::binary);
    if (!FS[out2]) cout << "Error opening " << Name[out2] << endl;

    Posit[out1] = Posit[out2] = 0;
    Posit[in1] = Posit[in2] = RecsPerBlock;
    DONE = true;
    last = -1;
    getnext(in1, val1);  getnext(in2, val2);

    // Here is where the actual merge takes place
    while ((val1 != EMPTY) || (val2 != EMPTY)) {
      if ((val1 < last) && (val2 < last)) // At end of these runs
	{ swap(out1, out2); last = -1; DONE = false; }
      if (val1 >= last) // Still stuff in run 1
	if (val2 >= last) // Still stuff in run 2
	  if (val1 < val2) // Put out one of the records
	    { putout(out1, val1); last = val1; getnext(in1, val1); }
          else
            { putout(out1, val2); last = val2; getnext(in2, val2); }
        else // Put out from run 1
          { putout(out1, val1); last = val1; getnext(in1, val1); }
      else // Put out from run 2
        { putout(out1, val2); last = val2; getnext(in2, val2); }
    }
    // Done this run, so flush output
    myflush(out1);
    myflush(out2);
    FS[in1].close(); FS[in2].close();
    FS[out1].close(); FS[out2].close();
  }
  rename(Name[out1], outname);
}


// Main routine.  Get everything ready
int main(int argc, char** argv) {

  if (argc < 4) {
    cout << "Usage: exqsort <infile> <outfile> <numblocks>.\n";
    cout << "Blocksize is " << RecsPerBlock * sizeof(Elem) << " bytes.\n";
    exit(-1);
  }

  int numblocks = atoi(argv[3]);
  if (numblocks <= 4096) BlocksPerRun = 64;
  else if (numblocks <= 65536) BlocksPerRun = 256;
  else {
    cout << "File size is too big.\n";
    exit(1);
  }
  cout << "Allocating " << RecsPerRun*sizeof(Elem)
       << " bytes of working space\n";

  // For first pass, need the original input file.  Won't use again
  FS[IN1].open(argv[1], ios::in | ios::binary);
  if (!FS[IN1]) cout << "Error opening " << argv[1] << endl;

  // build the names of the run files
  sprintf(Name[IN1], "%s%s", argv[1], ".1");
  sprintf(Name[IN2], "%s%s", argv[1], ".2");
  sprintf(Name[OUT1], "%s%s", argv[2], ".1");
  sprintf(Name[OUT2], "%s%s", argv[2], ".2");

  // Start timing from here
  Settime();
  exmergesort(IN1, IN2, OUT1, OUT2, argv[2], numblocks/BlocksPerRun);
  cout << "Time is " << Gettime() << "seconds\n";

  return 0;
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -