📄 bigsort.c
字号:
#ifndef CONFIG_BIGSORT_ALIGN from = (char *)&a[(n + 2) & ~1]; to = (char *)&a[(i + 2) & ~1]; if(to != from) memmove(to,from,i * len); /* Minimize memory requirement */ if((pbs->BSI_Temp = (void **)realloc(a,(i + 1) * sizeof(*a) + i * len + sizeof(*a))) == NULL) goto error; /* We do not know if realloc to a smaller size may change the pointer */ diff = ((char *)pbs->BSI_Temp - (char *)a) + (to - from); if(diff != 0) while(--i >= 0) pbs->BSI_Temp[i] = (char *)(pbs->BSI_Temp[i]) + diff;#else if((pbs->BSI_Temp = (void **)realloc(a,i * sizeof(*a))) == NULL) goto error;#endif return(1); } if(pbs->BSI_Compare == NULL) { int ret; /* If no comparison routine given, simply copy everything to * one stream and return */ pbs->BSI_NSeqs = 1; if((pbs->BSI_Seqs = (Sequence *)malloc(sizeof(*(pbs->BSI_Seqs)))) == NULL || (pbs->BSI_Seqs[0].SEQ_File = bsTmpFile()) == NULL) goto error; for(i = 0 ; i < n ; i++) if(fwrite(a[i],len,1,pbs->BSI_Seqs[0].SEQ_File) != 1) goto error; while((ret = (*get)(pbs->BSI_UPtr,a[0],pbs->BSI_Count)) == 0) { (pbs->BSI_Count)++; if(fwrite(a[0],len,1,pbs->BSI_Seqs[0].SEQ_File) != 1) goto error; } if(ret < 0) goto error;#ifdef CONFIG_BIGSORT_ALIGN for(i = 0 ; i < n ; i++) free(a[i]);#endif free(a); return(0); } /* Set up final element */#ifdef CONFIG_BIGSORT_ALIGN if((a[n] = (void *)malloc(len)) == NULL) goto error;#else a[n] = (char *)a[n - 1] + len;#endif /* *** Now initialize all streams *** */ if((pbs->BSI_Seqs = (Sequence *)#ifdef CONFIG_BIGSORT_ALIGN malloc(pbs->BSI_NSeqs * sizeof(*(pbs->BSI_Seqs)))#else malloc(pbs->BSI_NSeqs * (sizeof(*(pbs->BSI_Seqs)) + len) + sizeof(*(pbs->BSI_Seqs)))#endif ) == NULL) goto error;#ifndef CONFIG_BIGSORT_ALIGN pbs->BSI_Seqs[0].SEQ_Buffer = (char *)&pbs->BSI_Seqs[(pbs->BSI_NSeqs + 1) & ~1];#endif for(i = 0 ; i < pbs->BSI_NSeqs ; i++) { pbs->BSI_Seqs[i].SEQ_File = NULL;#ifdef CONFIG_BIGSORT_ALIGN if((pbs->BSI_Seqs[i].SEQ_Buffer = (char *)malloc(len)) == NULL) goto error;#else if(i) pbs->BSI_Seqs[i].SEQ_Buffer = pbs->BSI_Seqs[i - 1].SEQ_Buffer + len;#endif pbs->BSI_Seqs[i].SEQ_NRuns = pbs->BSI_Seqs[i].SEQ_DiffRuns = (i < (pbs->BSI_NSeqs-1)) ? 1 : 0; pbs->BSI_Seqs[i].SEQ_Count = 0; } if(selectRun(pbs) < 0) goto error; /* *** Transfer array into a valid heap *** */ for(i = n / 2 ; i > 0 ; ) bsSift(a,pbs->BSI_Compare,pbs->BSI_UPtr,--i,n); /* *** Now read all elements and distribute them *** */ for(s = r = n ; ; ) {#if 0 printf("DIST %d %d\n",s,r); for(i = 0 ; i < r ; i++)printf(" a[%d]='%s'\n",i,a[i]);#endif /* Write a[0] */ tmp = a[0]; a[0] = pbs->BSI_Seqs[pbs->BSI_Curr].SEQ_Buffer; pbs->BSI_Seqs[pbs->BSI_Curr].SEQ_Buffer = tmp; if(fwrite(tmp,len,1,pbs->BSI_Seqs[pbs->BSI_Curr].SEQ_File) != 1) goto error; (pbs->BSI_Seqs[pbs->BSI_Curr].SEQ_Count)++; /* tmp contains the last written field */ if(r == n) { int ret; if((ret = (*get)(pbs->BSI_UPtr,a[n],pbs->BSI_Count)) < 0) goto error; if(ret > 0) { /* EOF */ r--; } else { (pbs->BSI_Count)++; if((*(pbs->BSI_Compare))(pbs->BSI_UPtr,tmp,a[n]) <= 0) { /* Element belongs to same run, put in lower heap */ tmp = a[0]; a[0] = a[n]; a[n] = tmp; bsSift(a,pbs->BSI_Compare,pbs->BSI_UPtr,0,s); continue; } } } else r--; /* Put element in upper heap */ s--; if(s > 0) { tmp = a[0]; a[0] = a[s]; a[s] = tmp; bsSift(a,pbs->BSI_Compare,pbs->BSI_UPtr,0,s); } if(s < r) { tmp = a[s]; a[s] = a[r]; a[r] = tmp; if(s < n/2) bsSift(a,pbs->BSI_Compare,pbs->BSI_UPtr,s,r); } /* This run finished */ if(s == 0) { /* EOF reached? */ if(r == 0) break; /* Start next run */ if(selectRun(pbs) < 0) goto error; s = r; } }#ifdef CONFIG_BIGSORT_ALIGN for(i = 0 ; i <= n ; i++) free(a[i]); free(a);#else /* Since malloced region a contains buffers of streams, do not free now */ pbs->BSI_Temp = a;#endif return(0);error: return(-1);}/*JS********************************************************************** Sift routine, see a documentation for heapsort for an explanation.*************************************************************************/static void bsSift(void **array,int (*compare)(void *uptr,const void *a, const void *b),void *uPtr, long l,long r)/************************************************************************/{ long j; void *temp; /* Get first element to sink in heap */ temp = array[l]; while((j = (l << 1) + 1) < r) { if(j < (r - 1) && (*compare)(uPtr,array[j],array[j + 1]) > 0) j++; if((*compare)(uPtr,temp,array[j]) <= 0) break; /* Copy lower element to top */ array[l] = array[j]; l = j; } /* Now the first element has reached its place */ array[l] = temp; return;}/*JS********************************************************************** If the stream fits completely into memory, use a quicksort to sort it.*************************************************************************//* Generated automatically with "gensort.h" */#define GENSORT_NAME sortSpecial#define GENSORT_ARGS ,compare,uPtr#define GENSORT_ARGSPROTO ,int (*compare)(void *uptr, \ const void *a,const void *b), \ void *uPtr#define GENSORT_TYPE void *#define GENSORT_KEYTYPE void *#define GENSORT_GETKEY(a) a#define GENSORT_COMPAREKEYS(k1,k2) (*compare)(uPtr,k1,k2) < 0static /* Generated as a static routine */#include <gensort.h>/*JS********************************************************************** Select the next sequence to copy the next run to.*************************************************************************/static int selectRun(BigSort *pbs)/************************************************************************/{ if(pbs->BSI_Seqs[pbs->BSI_Curr].SEQ_DiffRuns < pbs->BSI_Seqs[pbs->BSI_Curr + 1].SEQ_DiffRuns) { (pbs->BSI_Curr)++; } else if(pbs->BSI_Seqs[pbs->BSI_Curr].SEQ_DiffRuns == 0) { int i,nRuns0; (pbs->BSI_Level)++; nRuns0 = pbs->BSI_Seqs[0].SEQ_NRuns; for(i = 0 ; i < (pbs->BSI_NSeqs - 1) ; i++) { int save; save = pbs->BSI_Seqs[i].SEQ_NRuns; pbs->BSI_Seqs[i].SEQ_NRuns = nRuns0 + pbs->BSI_Seqs[i + 1].SEQ_NRuns; pbs->BSI_Seqs[i].SEQ_DiffRuns = pbs->BSI_Seqs[i].SEQ_NRuns - save; } pbs->BSI_Curr = 0; } else pbs->BSI_Curr = 0; (pbs->BSI_Seqs[pbs->BSI_Curr].SEQ_DiffRuns)--; if(pbs->BSI_Seqs[pbs->BSI_Curr].SEQ_File == NULL && (pbs->BSI_Seqs[pbs->BSI_Curr].SEQ_File = bsTmpFile()) == NULL) return(-1); return(0);}/*JS********************************************************************** The I/O intensive sorting is done here: Merge runs on nSeqs-1 input* streams to one output stream until one stream is exhausted. Then* exchange the output stream with the empty input stream and start* over. The number of runs on the input stream (i.e. real ones and* pseudo runs) must fulfill the condition of the Fibonacci number of* a certain order.*************************************************************************/static int mergeRuns(BigSort *pbs,int mode)/************************************************************************/{ int *tr,i,nSeqs; long len; len = pbs->BSI_Len; nSeqs = pbs->BSI_NSeqs; if((tr = (int *)#ifdef CONFIG_USE_ALLOCA alloca(nSeqs * sizeof(*tr))#else malloc(nSeqs * sizeof(*tr))#endif ) == NULL) goto error; /* Initialize reading for input sequences */ for(i = 0 ; i < (nSeqs - 1) ; i++) if(pbs->BSI_Seqs[i].SEQ_Count > 0 && (fseek(pbs->BSI_Seqs[i].SEQ_File,0L,SEEK_SET) || fread(pbs->BSI_Seqs[i].SEQ_Buffer,len,1,pbs->BSI_Seqs[i].SEQ_File) != 1)) goto error; /* Create missing file for merging */ if((pbs->BSI_Seqs[nSeqs - 1].SEQ_File = bsTmpFile()) == NULL) goto error; do { long remRuns; int k,mi; /* *** Mix required number of runs for this level from sequences * *** 0 .. nSeqs-2 to output sequence nSeqs-1. */ /* Clear output sequence */ if(fseek(pbs->BSI_Seqs[nSeqs - 1].SEQ_File,0L,SEEK_SET) == -1#ifndef CONFIG_NO_POSIX /* Truncate the output file to zero length */ || ftruncate(fileno(pbs->BSI_Seqs[nSeqs - 1].SEQ_File),0) == -1#endif ) goto error; pbs->BSI_Seqs[nSeqs - 1].SEQ_DiffRuns = 0; pbs->BSI_Seqs[nSeqs - 1].SEQ_Count = 0;#if 0 fprintf(stderr,"Level %d\n",pbs->BSI_Level); for(k = i = 0 ; i < nSeqs ; i++) { fprintf(stderr," Seq[%d]: a %d d %d n %d\n",1+i, pbs->BSI_Seqs[i].SEQ_NRuns,pbs->BSI_Seqs[i].SEQ_DiffRuns, pbs->BSI_Seqs[i].SEQ_Count); k += pbs->BSI_Seqs[i].SEQ_Count; } fprintf(stderr,"%d on sequences.\n",k);#endif for(remRuns = pbs->BSI_Seqs[nSeqs - 2].SEQ_NRuns ; remRuns > 0 ; remRuns--) { /* Count number of active sequences */ for(k = i = 0 ; i < (nSeqs - 1) ; i++) if(pbs->BSI_Seqs[i].SEQ_DiffRuns > 0) { /* Pseudo run on sequence i */ (pbs->BSI_Seqs[i].SEQ_DiffRuns)--; } else if(pbs->BSI_Seqs[i].SEQ_Count > 0) { tr[k++] = i; } if(k == 0) { /* All sequences have at least one pseudo run, so * record one pseudo run for destination. */ (pbs->BSI_Seqs[nSeqs - 1].SEQ_DiffRuns)++; continue; } /* ** Now copy one run from sequences t[0] .. t[k-1] to nSeqs-1 ** */ do { char *tmp; /* Find minimal element */ for(mi = 0, i = 1 ; i < k ; i++) if((*(pbs->BSI_Compare))(pbs->BSI_UPtr, pbs->BSI_Seqs[tr[i]].SEQ_Buffer, pbs->BSI_Seqs[tr[mi]].SEQ_Buffer) < 0) mi = i; /* Write minimal element from sequence tr[mi] to sequence nSeqs-1 */ tmp = pbs->BSI_Seqs[tr[mi]].SEQ_Buffer; pbs->BSI_Seqs[tr[mi]].SEQ_Buffer = pbs->BSI_Seqs[nSeqs - 1].SEQ_Buffer; pbs->BSI_Seqs[nSeqs - 1].SEQ_Buffer = tmp; if(fwrite(tmp,len,1,pbs->BSI_Seqs[nSeqs - 1].SEQ_File) != 1) goto error; (pbs->BSI_Seqs[nSeqs - 1].SEQ_Count)++; (pbs->BSI_Seqs[tr[mi]].SEQ_Count)--; /* Read next element from input sequence, check for EOF */ if(pbs->BSI_Seqs[tr[mi]].SEQ_Count > 0 && fread(pbs->BSI_Seqs[tr[mi]].SEQ_Buffer,len,1, pbs->BSI_Seqs[tr[mi]].SEQ_File) != 1) goto error; /* Check for end of run */ if(pbs->BSI_Seqs[tr[mi]].SEQ_Count == 0 || (*(pbs->BSI_Compare))(pbs->BSI_UPtr,tmp, pbs->BSI_Seqs[tr[mi]].SEQ_Buffer) > 0) { /* Remove this sequence from input sequences */ tr[mi] = tr[--k]; } } while(k > 0); } /* *** Rotate sequences *** */ { Sequence save; long diff; save = pbs->BSI_Seqs[nSeqs - 1]; diff = pbs->BSI_Seqs[nSeqs - 2].SEQ_NRuns; for(i = nSeqs - 1 ; i > 0 ; i--) { pbs->BSI_Seqs[i] = pbs->BSI_Seqs[i - 1]; pbs->BSI_Seqs[i].SEQ_NRuns -= diff; } pbs->BSI_Seqs[0] = save; pbs->BSI_Seqs[0].SEQ_NRuns = diff; } /* Initialize read on former output sequence */ if(fseek(pbs->BSI_Seqs[0].SEQ_File,0L,SEEK_SET) || fread(pbs->BSI_Seqs[0].SEQ_Buffer,len,1,pbs->BSI_Seqs[0].SEQ_File) != 1) goto error; (pbs->BSI_Level)--; } while(pbs->BSI_Level >= 0); if(mode & BIGSORT_FDISTINCT) { /* Remove duplicate entries on stream */ FILE *fp; char *buff0,*buff1; long s,r,n; fp = pbs->BSI_Seqs[0].SEQ_File; buff0 = pbs->BSI_Seqs[0].SEQ_Buffer; buff1 = pbs->BSI_Seqs[1].SEQ_Buffer; n = pbs->BSI_Count; if(fseek(fp,0L,SEEK_SET) || fread(buff0,len,1,fp) != 1) goto error; for(s = 0, r = 1 ; r < n ; r++) { if(fread(buff1,len,1,fp) != 1) goto error; if((*(pbs->BSI_Compare))(pbs->BSI_UPtr,buff0,buff1) == 0) continue; s++; memcpy(buff0,buff1,len); if(s < r && (fseek(fp,s * len,SEEK_SET) || fwrite(buff0,len,1,fp) != 1 || fseek(fp,(r + 1) * len,SEEK_SET))) goto error; } pbs->BSI_Count = ++s;#ifndef CONFIG_NO_POSIX /* Truncate the output file */ if(ftruncate(fileno(fp),s * len) == -1) goto error;#endif } /* Rewind resulting sequence */ if(fseek(pbs->BSI_Seqs[0].SEQ_File,0L,SEEK_SET)) goto error;#ifndef CONFIG_USE_ALLOCA free(tr);#endif return(0);error:#ifndef CONFIG_USE_ALLOCA if(tr) free(tr);#endif return(-1);}/*JS********************************************************************** Returns a temporary file. May be changed to use a different directory* for temporary files.*************************************************************************/static FILE *bsTmpFile(void)/************************************************************************/{ FILE *fp; if(BSortTempDir) { char name[JS_FILENAME_MAX]; static int Counter;#ifndef CONFIG_NO_POSIX sprintf(name,"%s/bstp%d_%01d",BSortTempDir,(int)getpid(),Counter);#else sprintf(name,"%s/bstmp%02d",BSortTempDir,Counter);#endif Counter++; if((fp = fopen(name,"wb+")) == NULL) return(NULL); if(remove(name) < 0) { fclose(fp); return(NULL); } } else fp = tmpfile(); return(fp);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -