⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bigsort.c

📁 b tree how to operate on b tr
💻 C
📖 第 1 页 / 共 2 页
字号:
/*JS***********************************************************************    Program : BIGSORT*    Language: ANSI-C*    Author  : Joerg Schoen*    Purpose : Sort huge amounts of data by doing a multi-phase*              sorting on temporary files. For a description of the*              algorithm see Section 2.4.4 in*                Niklaus Wirth: "Algorithmen und Datenstrukturen mit*                                Modula-2"**  This implementation automatically recognizes if the input data fits*   completely into memory and then does a in-place sort via quicksort.**************************************************************************/#ifndef lintstatic const char rcsid[] = "$Id: bigsort.c,v 1.12 1998/04/01 17:07:48 joerg Stab joerg $";#endif/*********     INCLUDE                                         *********/#include <jsalloca.h>#include <stddef.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <jsconfig.h>#include <jssubs.h>#ifndef CONFIG_NO_POSIX# include <unistd.h>#endif/*#define DEBUG*//*********     DEFINES                                          *********/#ifndef BSH_File/*  START-DEFINITIONS  */#include <stdio.h>   /*	 For "FILE" definition    *//*  Structure to access a sorted stream  */typedef struct {  /*  0: Memory list ("BSH_Buff"); 1: File access ("BSH_File")  */  int     BSH_Type;  long    BSH_Len,BSH_Count,BSH_Curr;  union {    FILE  *bsh_File;    void **bsh_Buff;  } c;} BigSortHandle;#define BSH_File  c.bsh_File#define BSH_Buff  c.bsh_Buff/*  For variable mode from bigSortOpen  */#define BIGSORT_FDISTINCT       (1<<0) /* remove duplicate entries  *//*  Obtain length of sorted stream  */#define bigSortLength(bsh)      ((bsh)->BSH_Count)/*  Seek or rewind a stream and get current position  */#define bigSortSeek(bsh,pos)    ((bsh)->BSH_Curr = (pos))#define bigSortRewind(bsh)      bigSortSeek(bsh,0)#define bigSortCurrPos(bsh)     ((bsh)->BSH_Curr)/*  Get next set in sequence (sequential access)  */#define bigSortNext(bsh,buff)   bigSortGet(bsh,-1,buff)/*  Same as bigSortNext, but do not advance internal pointer  */#define bigSortPeek(bsh,buff)   bigSortGet(bsh,-2,buff)/*  END-DEFINITIONS  */#endif/*  Align fields for comparison routine  *//*#define CONFIG_BIGSORT_ALIGN*//*  Maximum allocated memory  */#ifndef BIGSORT_MAXMEM# define BIGSORT_MAXMEM   500000#endif/*  Internal structures  */typedef struct {  FILE *SEQ_File;  char *SEQ_Buffer;  long  SEQ_Count;  int   SEQ_NRuns,SEQ_DiffRuns;} Sequence;typedef struct {  long      BSI_Len;  void     *BSI_UPtr;  int     (*BSI_Compare)(void *uptr,const void *a,const void *b);  long      BSI_Count;  Sequence *BSI_Seqs;  int       BSI_NSeqs;  int       BSI_Level;  int       BSI_Curr;  /*  current output sequence  */  void    **BSI_Temp;} BigSort;#define Prototype extern/*********     PROTOTYPES                                       *********/Prototype BigSortHandle *bigSortOpen(BigSortHandle *bsh,long len,				     int (*get)(void *uptr,void *buff,long nr),				     int (*compare)(void *uptr,						    const void *a,						    const void *b),				     void *uPtr,int mode);Prototype int            bigSortClose(BigSortHandle *bsh,int mode);Prototype long           bigSortGet(BigSortHandle *bsh,long nr,void *buff);Prototype long           bigSortFind(BigSortHandle *bsh,const void *cont,				     int (*compare)(void *uptr,const void *a,						    const void *b),void *uPtr,				     void *dest);static int               distRuns(BigSort *pbs,int n,				  int (*get)(void *uptr,void *buff,long nr),				  int mode);static void              bsSift(void **array,int (*compare)(void *uptr,							    const void *a,							    const void *b),				void *uPtr,long l,long r);static void              sortSpecial(void **array,long n,				     int (*compare)(void *uptr,const void *a,						    const void *b),				     void *uPtr);static int               selectRun(BigSort *pbs);static int               mergeRuns(BigSort *pbs,int mode);static FILE             *bsTmpFile(void);Prototype long           BSortMaxMem;Prototype int            BSortNSeqs;Prototype const char    *BSortTempDir;/*********     GLOBAL VARIABLES                                 *********/long BSortMaxMem = BIGSORT_MAXMEM;int BSortNSeqs = 32; /*  at least 3  */const char *BSortTempDir;/* ERROR-DEFINITIONS from bigSortOpen label _ERR_BSORTOPEN ord 18   BSortMaxMem to small*//*JS**********************************************************************   Reads a stream until EOF is signaled and sorts the units of size len*    using the given comparison function. The routine get must return 0*    on success, -1 on error and 1 on EOF. A handle is returned which*    allows direct or sequential access to the sorted units. If no*    comparison routine is given (compare == NULL), the stream is simply*    transferred to a direct-access one.*************************************************************************/BigSortHandle *bigSortOpen(BigSortHandle *bsh,long len,			   int (*get)(void *uptr,void *buff,long nr),			   int (*compare)(void *uptr,const void *a,					  const void *b),void *uPtr,int mode)/************************************************************************/{  long i,j;  BigSort bs;  int nSeqs;  /*  Set up internal structure  */  bs.BSI_Len     = len;  bs.BSI_UPtr    = uPtr;  bs.BSI_Compare = compare;  bs.BSI_NSeqs   = nSeqs = BSortNSeqs;  bs.BSI_Level   = 0;  bs.BSI_Curr    = 0;  bs.BSI_Count   = 0;  bs.BSI_Temp    = NULL;  /* *** 1. Distribute elements  *** */  /*  How much elements may we store in memory (at least 2)?  */  if((i = (BSortMaxMem - nSeqs * (len + sizeof(void *))) /      (len + sizeof(void *))) < 2) {    JSErrNo = _ERR_BSORTOPEN + 0;    goto error2;  }  if((j = distRuns(&bs,i,get,mode)) < 0) goto error2;  if(j > 0) {    /* ***  Simple case: All elements fit in memory, so stop  *** */    if(bsh == NULL && (bsh = (BigSortHandle *)malloc(sizeof(*bsh))) == NULL)      goto error2;    bsh->BSH_Type  = 0;    bsh->BSH_Len   = len;    bsh->BSH_Curr  = 0;    bsh->BSH_Count = bs.BSI_Count;    bsh->BSH_Buff  = bs.BSI_Temp;    return(bsh);  }  /* *** 2. Mix sequences  *** */  if(compare) {    if(mergeRuns(&bs,mode) < 0) goto error;#ifdef DEBUG    if(bs.BSI_Seqs[0].SEQ_Count != bs.BSI_Count)      fprintf(stderr,"ERROR: Different count %ld != %ld!\n",	      bs.BSI_Seqs[0].SEQ_Count,bs.BSI_Count);#endif  }  /* ***  Finished, set up return value  *** */  if(bsh == NULL && (bsh = (BigSortHandle *)malloc(sizeof(*bsh))) == NULL)    goto error;  bsh->BSH_Type  = 1;  bsh->BSH_Len   = len;  bsh->BSH_Curr  = 0;  bsh->BSH_Count = bs.BSI_Count;  bsh->BSH_File  = bs.BSI_Seqs[0].SEQ_File;  /*  Close everything and clean up  */#ifdef CONFIG_BIGSORT_ALIGN  free(bs.BSI_Seqs[0].SEQ_Buffer);#endif  for(i = 1 ; i < bs.BSI_NSeqs ; i++) {    if(bs.BSI_Seqs[i].SEQ_File && fclose(bs.BSI_Seqs[i].SEQ_File)) goto error;#ifdef CONFIG_BIGSORT_ALIGN    free(bs.BSI_Seqs[i].SEQ_Buffer);#endif  }  free(bs.BSI_Seqs);#ifndef CONFIG_BIGSORT_ALIGN  /*  Now its time to free temporary work space from distRuns  */  free(bs.BSI_Temp);#endif  return(bsh);error:  if(bs.BSI_Seqs) {#ifdef CONFIG_BIGSORT_ALIGN    free(bs.BSI_Seqs[0].SEQ_Buffer);#endif    for(i = 1 ; i < bs.BSI_NSeqs ; i++) {      if(bs.BSI_Seqs[i].SEQ_File && fclose(bs.BSI_Seqs[i].SEQ_File))	goto error;#ifdef CONFIG_BIGSORT_ALIGN      free(bs.BSI_Seqs[i].SEQ_Buffer);#endif    }    free(bs.BSI_Seqs);  }#ifndef CONFIG_BIGSORT_ALIGN  free(bs.BSI_Temp);#endiferror2:  return(NULL);}/*JS**********************************************************************   Closes the handle obtained by bigSortOpen. If mode is non-zero, bsh*    itself is not deallocated.*************************************************************************/int bigSortClose(BigSortHandle *bsh,int mode)/************************************************************************/{  if(bsh->BSH_Type == 0) {#ifdef CONFIG_BIGSORT_ALIGN    long i;    for(i = 0 ; i < bsh->BSH_Count ; i++) free(bsh->BSH_Buff[i]);#endif    free(bsh->BSH_Buff);  } else if(fclose(bsh->BSH_File)) {    if(!mode) free(bsh);    return(-1);  }  if(!mode) free(bsh);  return(0);}/*JS**********************************************************************   Provides direct access to the sorted stream. Read set nr (0 .. n-1)*    into buff and return ordinal of it. On error -1 is returned and -2*    on EOF. If nr == -1, the next set in the sequence is returned*    (sequential access). nr == -2 is similar to nr == -1, but the*    current position is not moved.*************************************************************************/long bigSortGet(BigSortHandle *bsh,long nr,void *buff)/************************************************************************/{  long number,len;  /*  Current position  */  number = (nr >= 0) ? nr : bsh->BSH_Curr;  /*  Check for EOF  */  if(number >= bsh->BSH_Count) return(-2);  len = bsh->BSH_Len;  if(bsh->BSH_Type == 0) {    /*  Memory list  */    memcpy(buff,bsh->BSH_Buff[number],len);  } else if((nr >= 0 && fseek(bsh->BSH_File,number * len,SEEK_SET)) ||	    fread(buff,len,1,bsh->BSH_File) != 1 ||	    /*  Go back to current position if required  */	    (nr == -2 && fseek(bsh->BSH_File,number * len,SEEK_SET)))    return(-1);  if(nr != -2) bsh->BSH_Curr = number + 1;  return(number);}/*JS**********************************************************************   Provides direct access to the stream, allowing to search for a*    specific set.*************************************************************************/long bigSortFind(BigSortHandle *bsh,const void *cont,		 int (*compare)(void *uptr,const void *a,const void *b),		 void *uPtr,void *dest)/************************************************************************/{  long l,r;  l = 0;  r = bsh->BSH_Count;  if(bsh->BSH_Type == 1) {    void *temp;    if((temp = malloc(bsh->BSH_Len)) == NULL) return(-1);    /* **  Binary search in file  ** */    while(l < r) {      long m = (l + r) / 2;      if(fseek(bsh->BSH_File,m * bsh->BSH_Len,SEEK_SET) ||	 fread(temp,bsh->BSH_Len,1,bsh->BSH_File) != 1) {	free(temp);	return(-1);      }      if((*compare)(uPtr,temp,cont) < 0)	l = m + 1;      else	r = m;    }    free(temp);    if(dest && (fseek(bsh->BSH_File,l * bsh->BSH_Len,SEEK_SET) ||		fread(dest,bsh->BSH_Len,1,bsh->BSH_File) != 1))      return(-1);    return(l);  }  /* **  Binary search in memory list  ** */  while(l < r) {    long m = (l + r) / 2;    if((*compare)(uPtr,bsh->BSH_Buff[m],cont) < 0)      l = m + 1;    else      r = m;  }  if(dest) memcpy(dest,bsh->BSH_Buff[l],bsh->BSH_Len);  return(l);}/*JS**********************************************************************   Distribute initial data to the sequences, sorting chunks of a fixed*    size using a modified heap sort.*************************************************************************/static int distRuns(BigSort *pbs,int n,		    int (*get)(void *uptr,void *buff,long nr),int mode)/************************************************************************/{  void **a,*tmp;  long i,len,s,r;  len = pbs->BSI_Len;  /*  Set up temporary array  */  if((a = (void **)#ifdef CONFIG_BIGSORT_ALIGN      malloc((n + 1) * sizeof(*a))#else      malloc((n + 1) * (sizeof(*a) + len) + sizeof(*a))#endif      ) == NULL) goto error;#ifndef CONFIG_BIGSORT_ALIGN  /*  Hack: Assuming most strict alignment for type "double" and   *   sizeof(double) == 2 * sizeof(void *), the following   *   calculation makes sure that fields are correctly aligned.   */  a[0] = (void *)&a[(n + 2) & ~1];#endif  /* ***  To recognize small amounts of data more quickly,   * ***   try to read first as much data as possible.   */  for(i = 0 ; i < n ; i++) {    int ret;#ifdef CONFIG_BIGSORT_ALIGN    if((a[i] = (void *)malloc(len)) == NULL) goto error;#else    if(i) a[i] = (char *)(a[i - 1]) + len;#endif    if((ret = (*get)(pbs->BSI_UPtr,a[i],pbs->BSI_Count)) < 0) goto error;    /*  EOF?  */    if(ret > 0) break;    pbs->BSI_Count++;  }  if(i < n) {#ifndef CONFIG_BIGSORT_ALIGN    char *from,*to;    ptrdiff_t diff;#endif    /*  Special case: Everything in memory. Sort a[0 .. i-1]     *   if required and return     */#ifdef CONFIG_BIGSORT_ALIGN    free(a[i]);#endif    if(pbs->BSI_Compare) {      sortSpecial(a,i,pbs->BSI_Compare,pbs->BSI_UPtr);      if((mode & BIGSORT_FDISTINCT) && i > 1) {	/*  Remove duplicate entries  */	for(s = 0, r = 1 ; r < i ; r++) {	  if((*(pbs->BSI_Compare))(pbs->BSI_UPtr,a[s],a[r]) == 0) {#ifdef CONFIG_BIGSORT_ALIGN	    free(a[r]);#endif	    continue;	  }	  s++;	  if(s < r)#ifdef CONFIG_BIGSORT_ALIGN	    a[s] = a[r];#else	    memcpy(a[s],a[r],len);#endif	}	pbs->BSI_Count = ++s;	/*  In case we did not align, we cannot reduce memory by setting	 *   i = s, since the sorting routine shuffled everything around.	 */#ifdef CONFIG_BIGSORT_ALIGN	i = s;#endif      }    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -