⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 out_sort.c

📁 外部排序程序
💻 C
📖 第 1 页 / 共 3 页
字号:
			(char *)(sourceArray+high*dbfHead.record_length),dbfHead.record_length);
		memcpy((char *)(sourceArray+high*dbfHead.record_length),\
			tmp,dbfHead.record_length);

		quickSort(sourceArray, first, high-1);
		quickSort(sourceArray, high+1, last);
}


/*
功能:调整参数parallel_num,max_mem
      调整依据是在[1..parallel_num]*[1..MERGE_K_MAX]表中选择某点合适的
	  max_mem=adjust1(parallel_num*(SORT_AREA_ROWS*dbfHead.record_length*adjust2(MERGE_K_MAX))
  */
void adjust_par(){
	unsigned long tmp=0;
	int j;

#ifdef UNIX
	int i;
	for(int i=1;i<parallel_num+1;i++){
		for(int j=1;j<MERGE_K_MAX+1;j++){
			tmp=i*(SORT_AREA_ROWS*dbfHead.record_length*j);
			//寻找点parallel_num先merge_k后
			if(tmp>max_mem)
				goto leb1;
		}
	}
leb1:
	parallel_num=i;
#else
		for(j=1;j<MERGE_K_MAX+1;j++){
			tmp=(SORT_AREA_ROWS*dbfHead.record_length*j);
			//寻找点merge_k
			if(tmp>max_mem)
				break;
		}
#endif
	merge_k=j-1;
#ifdef UNIX
	max_mem=parallel_num*SORT_AREA_ROWS*dbfHead.record_length*merge_k;
#else
    max_mem=SORT_AREA_ROWS*dbfHead.record_length*merge_k;
#endif
}

/*
功能:主函数入口
  */
void main(int argc,char * * args){
	int i=0;
	int k=0,file_nums=0;
	int tmp_files=0;              //tmp_files为快速排序段数
	char tmpFile[100]="";         //排序临时文件名字
	char tmppix[10]="";
	FILE * tmpF=NULL;             //生成中间文件
	char * buf_sort=NULL ;        //一个内存排序段
	unsigned int sort_area_rows=0;       //实际读入的文件长度
	DBF_FILE_ALL_FIELD allf;      //所有字段数据,写文件用到。

#ifdef UNIX
	pid_t pid;                   //进程号
	int shmid1=0;                //unix系统中共享内存的标示
	int parallel_now=0;          //现在的子进程数,不包括父进程。
	int parallel_all=0;          //所有的子进程,全部子进程
	parallel_num=DEFAULT_PAR;
	now_process=0;
	int process=0;
#endif

	FILE * src_file=NULL;
	FILE * tar_file=NULL;
	char tar_file_name[200]="";
	char buffer1[BUFFERSIZE];
	//char buffer2[BUFFERSIZE];
    golfileId=0;

    //获取参数
    strcpy(argsg,args[1]);

    if(argc<4 || argc>6){
		printf("ERROR parameters too few or too much!\n");
		useage();
	    exit(1);
	}
	if(argc>4){
        if(getParaValue(args,4,argc-1)==1){
			printf("get parameters vales error\n");
		    useage();
			exit(1);
		}
	}
	if(strcmp(args[1],args[2])==0){
		printf("name of source file can not same with target file name\n");
		exit(1);
	}

	//打开原文件
    if((src_file=openAFile(args[1],"rb"))==NULL){
		printf("can not open source file!");
		exit(1);
	}
	//打开目标文件
    //if((tar_file=openAFile(args[2],"w+b"))==NULL){
	//	printf("can not open target file!");
	//	exit(1);
	//}

	/* 设置文件缓冲区*/
	if( setvbuf(src_file, buffer1, _IOFBF, BUFFERSIZE) != 0){
		printf("Incorrect type or size of buffer for fileIn\n");
		exit(1);
	}
	
	//if( setvbuf(tar_file, buffer2, _IOFBF, BUFFERSIZE) != 0){
	//	printf("Incorrect type or size of buffer for fileOut\n");
	//	exit(1);
	//}

	/* 读dbf头文件信息 */
	fseek(src_file,0,SEEK_SET);
	fread(&dbfHead,sizeof(DBF_FILE_HEAD),1,src_file);

#ifdef HP_UX
	revert_unsigned_short(&dbfHead.head_length);		/* 用于定位到第一条记录*/
	revert_unsigned_long(&dbfHead.record_num);			/* 总的记录数*/
	revert_unsigned_short(&dbfHead.record_length);		/* 每条记录长度*/
#endif

#ifdef PRINT_MSG
	printf("dbfHead.head_length:%d\n",dbfHead.head_length);
	printf("dbfHead.record_num:%ld\n",dbfHead.record_num);
	printf("dbfHead.record_length:%d\n",dbfHead.record_length);
#endif

    //验证原文件的正确性
	if(verifyDbfFile(src_file)==-1){
		printf("source file is destoryed");
		exit(1);
	}
	//验证排序字段是否出现在原文件DBF头中
	fseek(src_file,(int)0x20,SEEK_SET);    //将指针掉向字段记录开始
	
	for(i=0;i<dbfHead.head_length/32-1;i++){
         fread(&dbfField,sizeof(DBF_FILE_FIELD),1,src_file);
		 if(i<(dbfHead.head_length/32-1)){
		    if(strcmp(args[3],dbfField.name)==0)
			    break;                     //最后dbffield是要排序的字段结构
		 }else if ((i==(dbfHead.head_length/32-1))&&strcmp(args[3],dbfField.name)!=0 ){
			 printf("sort column can not find in source file");
			 exit(1);
		 }
		 
	}
#ifdef HP_UX
	revert_unsigned_short(&dbfField.length);
#endif
	//读取全部头信息。
	if((allf.pttr=malloc(dbfHead.head_length-32))==NULL){
		printf("malloc memary to allf error !");
		exit(1);
	}
	fseek(src_file,(int)0x20,SEEK_SET);    //将指针掉向字段记录开始
    if((allf.len=fread(allf.pttr,dbfHead.head_length-32,1,src_file))!=1){
		printf("read the column message error \n");
		exit(1);
	}

	//如果记录长度超过RECORD_MAX则推出
    if(dbfField.length>(RECORD_MAX-1)){
		printf("the sort coloum is %s and length is %d \n",dbfField.name,dbfField.length);
		printf("%s,%d \n","sort column length must less then ",RECORD_MAX-1);
		exit(1);
	}

	//如果比较字符串,长度应该小于最长长度MAX_CMP_CHAR
    if(dbfField.type='C' && dbfField.length>MAX_CMP_CHAR-1){
		printf("%s,%d \n","sort column length must less then ",MAX_CMP_CHAR-1);
		exit(1);
	}
		

    /*检验参数正确性
	  file parameters can not adjust:
	      must: dbfHead.record_num*dbfHead.record_length<=SORT_AREA_ROWS*SORT_FILE_MAX*dbfHead.record_length
      memary parameter can adjust MERAGE_K_MAX:
	      must: max_mem>=min(parallel_num)*(SORT_AREA_ROWS*dbfHead.record_length*min(MERGE_K_MAX=2))
		  final:max_mem=adjust1(parallel_num*(SORT_AREA_ROWS*dbfHead.record_length*adjust2(MERGE_K_MAX))
    */

    if( (double)dbfHead.record_num*dbfHead.record_length>(double)SORT_AREA_ROWS*SORT_FILE_MAX*dbfHead.record_length ){
		printf("The source file is too large!it must less then %f \n", \
			   (double)SORT_AREA_ROWS*SORT_FILE_MAX*dbfHead.record_length);
		exit(1);
	}
    if( max_mem!=0 && max_mem<1*(SORT_AREA_ROWS*dbfHead.record_length*2 )){
		printf("The max memary is too small ");
		exit(1);
#ifdef UNIX
	}else if(max_mem!=0 && max_mem<parallel_num*(SORT_AREA_ROWS*dbfHead.record_length*MERGE_K_MAX)){
#else
    }else if(max_mem!=0 && max_mem<SORT_AREA_ROWS*dbfHead.record_length*MERGE_K_MAX){
#endif
		adjust_par();//调整参数。
	}else{
#ifdef UNIX
		max_mem=parallel_num*(SORT_AREA_ROWS*dbfHead.record_length*MERGE_K_MAX);
#else
        max_mem=SORT_AREA_ROWS*dbfHead.record_length*MERGE_K_MAX;
#endif
		merge_k=MERGE_K_MAX;
	}

	if((dbfHead.record_num%SORT_AREA_ROWS)!=0)
	    file_nums=(int)(dbfHead.record_num/SORT_AREA_ROWS)+1;
	else
		file_nums=(int)(dbfHead.record_num/SORT_AREA_ROWS);

	//分配排序临时文件空间,在Unix中为共享的内存。
#ifdef UNIX
    if((shmid1 = shmget(IPC_PRIVATE, sizeof(SORT_TEMP_FILE)*SORT_FILE_MAX, (SHM_R|SHM_W)))<0){
		printf("shmget error");
		exit (1);
	}
	if((sort_file = (SORT_TEMP_FILE *)shmat(shmid1,0,0)) == (void *)-1){
		printf("shmat error");
		exit(1);
	}
    //快速排序,在Unix系统中使用多进程

	for(k=0;k<parallel_num;k++){
		now_process++;
        process++;

		if(process>=MAX_PROCESS){
			printf("open too many process than %d and exit \n",MAX_PROCESS);
			exit(1);
		}		
        if((pid=fork())<0){
              printf("fork process error \n");
			  if(k!=0){k--;now_process--;}
	    	  continue;
		}else if(pid==0){   //父进程,不负责排序
			  parallel_now++;
			  parallel_all++;
			  tmp_files++;
			  if(parallel_now==parallel_num){
				  for(int v=0;v<parallel_num;v++){
					  wait(NULL);
				  }
				  //重新启动其它进程直道最后完成所有进程。
				  parallel_now=0;
                  if(parallel_all < ((int)(dbfHead.record_length/SORT_AREA_ROWS)+1) ){
                      continue;
				  }
				  //开始父进程其他作业,首先检查文件的完整性,然后才开始合并排序,在并行进程中必须这么做
				  for(k=0;k<tmp_files;k++){
				     if((sort_file[k].f=openAFile(sort_file[k].file,"rb"))!=NULL){
                         if(verifyDbfFile(sort_file[k].f)==-1){
						     printf("the temp quick sort file %s error \n",sort_file[k].file);
							 exit(1);
						 }
						 fclose(sort_file[k].f);
					 }
				  }

			  }
		}else{    
			//子进程作业,子进程首先快速排序。
         	for(tmp_files=0;tmp_files<file_nums;tmp_files++){
		        //临时文件后缀前加数字,这样后缀不变。
#ifdef PRINT_MSG
                printf("tmp_files=%d \n tmppix=%s \n",tmp_files,tmppix);
#endif
                itoa(tmp_files,tmppix,10);
				strcpy(tmpFile,args[1]);
				strcat(tmpFile,tmppix);
				strcat(tmpFile+strlen(args[1])+strlen(tmppix),".dbf");
                if((buf_sort=malloc(SORT_AREA_ROWS*dbfHead.record_length))==NULL){
                    printf("malloc memary in child process error and exit");
                    exit(1);
				}
                fseek(src_file,tmp_files*SORT_AREA_ROWS*dbfHead.record_length+dbfHead.head_length,SEEK_SET);    
                sort_area_rows=fread(buf_sort,dbfHead.record_length,SORT_AREA_ROWS,src_file);
				
				if((tmpF=openAFile(tmpFile,"w+b"))==NULL){
					printf("open files %s failed",tmpFile);
					exit(1);
				}

                quickSort(buf_sort,0,sort_area_rows-1);
                if(writeAFile(buf_sort,sort_area_rows,tmpF,allf,1,0)==-1){
                     printf("write a dbf file %s failed \n ",tmpFile);
                     fclose(tmpF);
                     exit(1);
				}
                fclose(tmpF);
				free(buf_sort);
                sort_file[tmp_files].fileid=tmp_files;
                if(dbfHead.record_num%SORT_AREA_ROWS!=0)
                     sort_file[tmp_files].max=((int)(dbfHead.record_num/SORT_AREA_ROWS)+1);
                else sort_file[tmp_files].max=(int)(dbfHead.record_num/SORT_AREA_ROWS);
                strcpy(sort_file[tmp_files].file,tmpFile);
                sort_file[tmp_files].used=0;
			}
			//成功退出
			exit(0);

		}
	}

#else
	if((sort_file = (SORT_TEMP_FILE *)(malloc(sizeof(SORT_TEMP_FILE)*SORT_FILE_MAX)))==NULL){
		  printf("Assign memary for sort temp file message failed");
		  exit (1);
	}

	//快速排序
	for(tmp_files=0;tmp_files<file_nums;tmp_files++){
		        //临时文件后缀前加数字,这样后缀不变。
                itoa(tmp_files,tmppix,10);
				strcpy(tmpFile,args[1]);
				strcat(tmpFile,tmppix);
				strcat(tmpFile+strlen(args[1])+strlen(tmppix),".dbf");
                if((buf_sort=malloc(SORT_AREA_ROWS*dbfHead.record_length))==NULL){
                    printf("malloc memary in child process error and exit");
                    exit(1);
				}
                fseek(src_file,tmp_files*SORT_AREA_ROWS*dbfHead.record_length+dbfHead.head_length,SEEK_SET);    
                sort_area_rows=fread(buf_sort,dbfHead.record_length,SORT_AREA_ROWS,src_file);
				
				if((tmpF=openAFile(tmpFile,"w+b"))==NULL){
					printf("open files %s failed",tmpFile);
					exit(1);
				}

                quickSort(buf_sort,0,sort_area_rows-1);
                if(writeAFile(buf_sort,sort_area_rows,tmpF,allf,1,0)==-1){
                     printf("write a dbf file %s failed \n ",tmpFile);
                     fclose(tmpF);
                     exit(1);
				}
                fclose(tmpF);
				free(buf_sort);
                sort_file[tmp_files].fileid=tmp_files;
                if(dbfHead.record_num%SORT_AREA_ROWS!=0)
                     sort_file[tmp_files].max=((int)(dbfHead.record_num/SORT_AREA_ROWS)+1);
                else sort_file[tmp_files].max=(int)(dbfHead.record_num/SORT_AREA_ROWS);
                strcpy(sort_file[tmp_files].file,tmpFile);
                sort_file[tmp_files].used=0;
	}
#endif

	//合并排序
    if(merge_sort(sort_file,tar_file_name,merge_k,allf)==-1){
		printf("merge sort error ans exit");
		exit(1);
	}

	//改名
	rename(tar_file_name,args[2]);

    if((tar_file=openAFile(args[2],"rb"))==NULL){
		printf("can not open target file!");
		exit(1);
	}
	//判断文件完整性
	if(verifyDbfFile(tar_file)==-1){
		printf("target file is destoryed");
		exit(1);
	}

	//打印成功标致,退出
	printf("file out sort succeeful");

	fclose(src_file);
	fclose(tar_file);
    free(sort_file);

	exit(0);
}
 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -