📄 sort.c

📁 linux 下用c++ 开发的一个小型数据库系统
💻 C
字号:
#include <sys/types.h>#include <functional>#include <string.h>#include <iostream>#include <sstream>#include <vector>using namespace std;#include "sort.h"#define MIN(a,b)   ((a) < (b) ? (a) : (b))// These comparison functions are visible only within this// source file. reccmp is the comparison routine (much like// strcmp or memcmp) that accepts integers, floats, and strings.// It returns -1 if p1 is less than p2, +1 if p1 is greater// than p2, or zero otherwise.static int reccmp(char* p1, char* p2, int p1Len, int p2Len, Datatype type){  float diff = 0.0;  switch(type) {  case INTEGER:    int iattr, ifltr;                   // word-alignment problem possible    memcpy(&iattr, p1, sizeof(int));    memcpy(&ifltr, p2, sizeof(int));    diff = iattr - ifltr;    break;  case FLOAT:    float fattr, ffltr;                 // word-alignment problem possible    memcpy(&fattr, p1, sizeof(float));    memcpy(&ffltr, p2, sizeof(float));    diff = fattr - ffltr;    break;  case STRING:    diff = memcmp(p1, p2, MIN(p1Len, p2Len));    break;  }  if (diff < 0)    diff = -1;  else if (diff > 0)    diff = 1;  return (int)diff;}// These three comparison routines are jacketed versions of// reccmp. This is because qsort(3) takes only a function pointer// but no additional parameters. The objects pointed to by p1// and p2 are of type SORTREC which has a pointer to the field// to be compared as well as its length (used for strings).#define SR(p)  ((SORTREC*)p)static int intcmp(const void* p1, const void* p2){  return reccmp(SR(p1)->field, SR(p2)->field,		SR(p1)->length, SR(p2)->length,		INTEGER);}static int floatcmp(const void* p1, const void* p2){  return reccmp(SR(p1)->field, SR(p2)->field,		SR(p1)->length, SR(p2)->length,		FLOAT);}static int stringcmp(const void* p1, const void* p2){  return reccmp(SR(p1)->field, SR(p2)->field,		SR(p1)->length, SR(p2)->length,		STRING);}// Create a sorted temporary file of the source file (fileName).// Sorting is based on attribute that is defined by offset, len,// and type. maxItems is the maximum number of items that a sorted// sub-run can hold (usually derived from amount of memory available).// Status code is returned in variable status.SortedFile::SortedFile(const string & fileName, 		       int offset, int len, Datatype type,		       int maxItems, Status& status)      : fileName(fileName), type(type), offset(offset), 	length(len), maxItems(maxItems){  // Check incoming parameters.  status = OK;  if (offset < 0 || len < 1)    status = BADSORTPARM;  else if (type != STRING && type != INTEGER && type != FLOAT)    status = BADSORTPARM;  else if (type == INTEGER && len != sizeof(int)	   || type == FLOAT && len != sizeof(float))    status = BADSORTPARM;  if (status != OK)    return;  // Must have space for at least 2 items (records) because otherwise  // items cannot be swapped and sorted!  if (maxItems < 2 || !(buffer = new SORTREC [maxItems])) {    status = INSUFMEM;    return;  }      status = sortFile();}// Sort file into sub-runs. The source file is split into runs// which have at most maxItems records each. That many records// are read into memory, sorted using qsort(3), and then written// to a temporary file.Status SortedFile::sortFile(){  Status status;  Record rec;  // Open source file.  // Start an unfiltered sequential scan.  hfs = new HeapFileScan(fileName, status);  if (status != OK) return status;  status = hfs->startScan(0, 0, STRING, NULL, EQ);  if (status != OK) return status;  // As long as the source file has more records, collect up to  // maxItems records into buffer and then dump records into  // temporary file.  do {    for(numItems = 0; numItems < maxItems; numItems++) {      // Fetch next record from source file, check if end of file.      if ((status = hfs->scanNext(buffer[numItems].rid)) == FILEEOF) break;      else if (status != OK) return status;      if ((status = hfs->getRecord(rec)) != OK) return status;      // Create space for holding a copy of the sorting attribute      // only (rest of record is read when temporary file is      // written). Copy sorting attribute from source record and      // store the length of the attribute (reccmp is general-      // purpose and can be shared by multiple instances of      // SortedFile!).      if (!(buffer[numItems].field = new char [length])) return INSUFMEM;      memcpy(buffer[numItems].field, (char *)rec.data + offset, length);      buffer[numItems].length = length;    }        // If at least 1 record in sub-run, sort records and write out    // to temporary file.    if (numItems > 0) {      if ((status = generateRun(numItems)) != OK) return status;      for(int i = 0; i < numItems; i++) delete [] buffer[i].field;    }  } while (numItems > 0);  // Terminate sequential scan on source file and close file.  delete hfs;  // Prepare a sequential scan on each sub-run so that next()  // can fetch next record from each run.  if ((status = startScans()) != OK) return status;  return OK;}// Sort the records in buffer[] (actually, the sorting attribute// plus the associated RID) and then dump records into temporary// file.Status SortedFile::generateRun(int items){  Status status;  // Sort buffer using library function qsort (quick sort). Use  // the appropriate comparison function for integers, floats,  // or strings (qsort can't take type as a parameter).  if (type == INTEGER)    qsort(buffer, items, sizeof(SORTREC), intcmp);  else if (type == FLOAT)    qsort(buffer, items, sizeof(SORTREC), floatcmp);  else    qsort(buffer, items, sizeof(SORTREC), stringcmp);  // If this is the first sub-run, malloc space for a RUN object,  // otherwise realloc more space. Note that on most systems  // realloc(NULL) could be used even when runs == NULL, but  // this doesn't work on all systems.  RUN newRun;  runs.push_back(newRun);  // If failed to create space for an additional run.   RUN & run = runs.back();  // Generate file name for temporary file.  stringstream  outputString;  outputString << fileName << ".sort." << runs.size() << ends;  run.name = outputString.str();#ifdef DEBUGSORT  cout << "%%  Writing " << items << " tuples to file " << run.name       << endl;#endif  // Make sure temporary file does not exist already. We don't  // want to corrupt somebody else's sorted files (on another  // attribute, for example).  if ((status = db.createFile(run.name)) != OK)    return status;                      // file must not exist already  if ((status = db.destroyFile(run.name)) != OK)    return status;                      // delete if successful  // Open a heap file. This will also create the temporary file.  if (!(run.outFile = new InsertFileScan(run.name, status))) return INSUFMEM;  if (status != OK) return status;  // Open input file  hfile = new HeapFile (fileName, status);  if (status != OK) return status;  // For each sort record (attribute plus RID) in the buffer, fetch  // the whole record from the source file and then insert it into  // the temporary file.  // cout << "%%  Writing " << items << " tuples to file " << run.name << endl;  for(int i = 0; i < items; i++) {    SORTREC* rec = &buffer[i];    RID rid;    Record record;    if ((status = hfile->getRecord(rec->rid, record)) != OK) return status;    if ((status = run.outFile->insertRecord(record, rid)) != OK) return status;  }  delete run.outFile;  delete hfile;  return OK;}// Prepare a sequential scan on each sub-run so that next()// can fetch the next record from each run. The valid bit of// each run is marked false to indicate that the (first)// record has not been fetched yet. next() must therefore// fetch it.Status SortedFile::startScans(){  Status status;  vector<RUN>::iterator run;  for(run = runs.begin(); run != runs.end(); run++)    {      run->inFile = new HeapFileScan(run->name, status);      if (status != OK) return status;      status = (run->inFile)->startScan(0, 0, STRING, NULL, EQ);      if (status != OK) return status;      run->valid = false;      run->rid.pageNo = -1;      run->rid.slotNo = -1;    }  return OK;}// Retrieve the next smallest record from the set of sorted sub-runs.// The next record of each sub-run is peeked to find out the// smallest of all. The pointer in the chosen sub-run is then// advanced.Status SortedFile::next(Record & rec){  Status status;  int i=0;  // Empty source file has zero sub-runs and causes  // end of file to be returned.  if (runs.size() <= 0) return FILEEOF;  // Find the run which has the smallest next record. If a run  // has false valid bit, it doesn't have the next record in memory  // yet.  RUN* smallest = NULL;  vector<RUN>::iterator run;  for(run = runs.begin(); run != runs.end(); run++, i++)    {      if (run->valid == false) {          // no record fetched yet for this run?	status = run->inFile->scanNext(run->rid);	if (status == FILEEOF)            // reached end of this run file?	  run->rid.pageNo = -1;           // mark end of file	else if (status != OK)	  return status;	else {                            // if next record exists, fetch it	  if ((status = run->inFile->getRecord(run->rec)) != OK)	    return status;	}	run->valid = true;                // a record is now in memory      }      if (run->rid.pageNo < 0)            // end of run already?	continue;      if (!smallest)                      // select first one as smallest	smallest = &(*run);      else if (reccmp((char *)smallest->rec.data + offset,		      (char *)run->rec.data + offset,		      length, length, type) > 0)	smallest = &(*run);    }    if (!smallest)                        // no next record found?    return FILEEOF;#ifdef DEBUGSORT  cout << "%%  Retrieved smallest from " << smallest->name << endl;#endif  rec = smallest->rec;               // give record pointers to caller  smallest->valid = false;              // must fetch new record next time  return OK;}// Remember a position in the sorted output so that the caller// can later return to this spot. Status SortedFile::setMark(){#ifdef DEBUGSORT  cout << "%%  Setting mark in file" << endl;#endif  vector<RUN>::iterator run;  for(run = runs.begin(); run != runs.end(); run++)  {      (run->inFile)->markScan();      run->mark.pageNo = run->rid.pageNo;      run->mark.slotNo = run->rid.slotNo;  }  return OK;}// Restore sort position by fetching the last marked record// This allows the caller to back up in the sorted sequence // (used in sort-merge join in case of duplicates).Status SortedFile::gotoMark(){#ifdef DEBUGSORT  cout << "%%  Going to a mark in file" << endl;#endif  Status status;  vector<RUN>::iterator run;  for(run = runs.begin(); run != runs.end(); run++)    {      status = (run->inFile)->resetScan();      if (status != OK) return status;      // restore rid info in the run      run->rid.pageNo = run->mark.pageNo;      run->rid.slotNo = run->mark.slotNo;      // Restore file position only if last marked position is      // something else than end of file.      if (run->rid.pageNo >= 0) {	if ((status = run->inFile->getRecord(run->rec)) != OK) return status;      }      // Current record is already in memory so next() must not      // advance in the temporary file.      run->valid = true;    }  return OK;}// Deallocate all space allocated for this sorted file and// delete temporary files.SortedFile::~SortedFile(){  for(unsigned int i = 0; i < runs.size(); i++) {    delete runs[i].inFile;    (void)db.destroyFile(runs[i].name);  }     delete [] buffer;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -