⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 audcreindex.cpp

📁 查重
💻 CPP
📖 第 1 页 / 共 2 页
字号:

/* 根据查重关键字格式化输出               */
/* 索引文件格式 msisdn+skey1+skye2        */
void formatKey(char * buf,char * callDate,int pos,int key1,int key2)
{	
sprintf(buf,"%05d%04d %2.2s%04d %09d\n",key1/10000,pos,callDate+8,key1%10000,key2);
buf[27]='\0';	
}

/* 根据通话日期判断属于哪个循环次数 */
int checkDate(char * callDate,char * indexPath,char * idxNamePrefix)
{
int k;

for(k=0;k<1280;k++){
	if(strlen(pIndexName[k].callDate) == 0){
		memcpy(pIndexName[k].callDate,callDate,10);
		pIndexName[k].callDate[10] = '\0';			
		totalIndexFiles++;
		return k;
		}         
	if(strncmp(pIndexName[k].callDate,callDate,10) == 0) return k;
	}
return -1;
}

/* 每一个循环结束输出索引数据到临时目录下 */
/* 索引文件格式 msisdn+skey1+skye2        */
void writeIndexToBuf(int loopTimes,char * tmpPath,char * idxNamePrefix)
{
int  k,i,n;
char fullPathName[128+1];
FILE * fpIdx;
typeMoreKeyList * p;
typeOneKeyList  * tp;
char buf[128+1];

for(k=loopTimes*MAXLOADINDEXNUMB+0;k<loopTimes*MAXLOADINDEXNUMB+MAXLOADINDEXNUMB;k++){
	if( strlen(pIndexName[k].callDate) == 0 ) continue;
	p = pIndexName[k].pMoreKeyList;
	memset(fullPathName,0,sizeof(fullPathName));
	if ((strncmp(orgfileName,"p",1) == 0) || (strncmp(orgfileName,"m",1) == 0))
		sprintf(fullPathName,"%s/%s%10.10s.idx01",tmpPath,idxNamePrefix,pIndexName[k].callDate);
	if ((strncmp(orgfileName,"q",1) == 0) || (strncmp(orgfileName,"w",1) == 0))
		sprintf(fullPathName,"%s/%s%10.10s.idx02",tmpPath,idxNamePrefix,pIndexName[k].callDate);

	if( (fpIdx=fopen(fullPathName,"w")) == NULL){
		memset(buf,0,sizeof(buf));
		sprintf(buf,"Can't write index file %s to tmp path!\n",fullPathName);
		throw (char *)buf;
		}

	/* 依次循环从0-9999 */
	for(i=0;i<10000;i++){
		/* 书写定长数组存贮的关键字 */
		for(n=0;n<4*ONEDAYONEUSERCDR;n++){
			if((p[i].pKeyArray[n]).key1 == 0 && (p[i].pKeyArray[n]).key2 == 0)
				break;			
			memset(buf,0,sizeof(buf));
			formatKey(buf,pIndexName[k].callDate,i,(p[i].pKeyArray[n]).key1,(p[i].pKeyArray[n]).key2);
			fputs(buf,fpIdx);
			}
		/* 书写动态链表存贮的关键字 */
		for(tp=p[i].pKeyList;tp!=NULL;tp=tp->p){
			memset(buf,0,sizeof(buf));
			formatKey(buf,pIndexName[k].callDate,i,tp->key1,tp->key2);
			fputs(buf,fpIdx);
			}
		}
	fclose(fpIdx);
	}
}

/* 删除索引所占空间  */
void delIndexBuf(int loopTimes)
{
int  k;
int  i;

typeMoreKeyList * tp;
typeOneKeyList  * mp;
typeOneKeyList  * lp;

for(k=loopTimes*MAXLOADINDEXNUMB+0;k<loopTimes*MAXLOADINDEXNUMB+MAXLOADINDEXNUMB;k++){
	if( strlen(pIndexName[k].callDate) == 0 ) continue;
	tp = pIndexName[k].pMoreKeyList ;
	for(i=0;i<10000;i++){
		mp=tp[i].pKeyList;
		while(mp!=NULL){
		lp = mp;
		mp = mp->p;
		delete lp;
		lp = NULL;
		}		
  }
	delete [] tp;	
	pIndexName[k].pMoreKeyList = NULL;
	}		
}



/*  针对一个文件查重具体实现逻辑  */
void checkDupFile(char * fullPathName,char * tmpPath,char * indexPath,char * errorPath,char * idxNamePrefix)
{
char  errFileName[128+1];
char  tmpFileName[128+1];
char  indexFileName[128+1];
char  fileName[128+1];
char  oriFileName[24+1];
char  buf[384+1];
int   totalRecord;
int   validRecord;
int   retValue;
char  callDate[14+1];
char  msisdn[11+1];
char  otherParty[24+1];
int   loopTimes;
int   k;
int   i;

char tmpBuf[16+1];
int  pos;
char sKey1[9+1];
char sKey2[9+1];

clock_t startClock,stopClock;

FILE  * fpErr;  /* 错单文件 */
FILE  * fpTmp;  /* 临时文件 */
FILE  * fp;     /* 输入文件 */

/* 去掉文件名尾.lst			*/
memset(fileName,0,sizeof(fileName));
baseName(fileName,fullPathName);
fileName[strlen(fileName)-4]='\0';
if( (fp=fopen(fullPathName,"r")) == NULL ){
	writeLog(&pLogBuf,"Can't open %s for read input!\n",fullPathName);
	throw (char *)"Open File Fail!";
	}
			
memset(tmpFileName,0,sizeof(tmpFileName));
sprintf(tmpFileName,"%s/%s.chk",tmpPath,fileName);
if( (fpTmp=fopen(tmpFileName,"w")) == NULL ){
	writeLog(&pLogBuf,"Can't open %s for write tmp!\n",tmpFileName);
	throw (char *)"Open File Fail!";
	}
			
memset(errFileName,0,sizeof(errFileName));
sprintf(errFileName,"%s/%s.dup",tmpPath,fileName);
if( (fpErr=fopen(errFileName,"w")) == NULL ){
	writeLog(&pLogBuf,"Can't open %s for write error!\n",errFileName);
	throw (char *)"Open File Fail!";
	}

/* 初始化计数器          */
totalRecord = 0;
validRecord = 0;

startClock = clock();

// 初始化日志输出记录
memset(pLogCkDpBuf,0,1024 * sizeof(typeChkDupLogBuf));	
totalLogBuf = 0;

/* 初始化索引文件名缓冲  */		
for(k=0;k<1280;k++){
	memset(pIndexName[k].callDate,0,sizeof(pIndexName[k].callDate));
	pIndexName[k].pMoreKeyList = NULL;
	}
totalIndexFiles = 0;

/* 循环次数初始化为0                             */
/* 引入循环次数是考虑极端情况                    */
/* 防止申请内存不够,一次只申请存贮7天的索引数据 */
loopTimes = 0;

do{
	/* 处理前先将文件指针移到文件头 */
	fseek(fp,0L,SEEK_SET);
	//writeLog("Loop One %d\n",loopTimes);
	while(memset(buf,0,sizeof(buf)),fgets(buf,280,fp)!=NULL){
		/*  去掉字符串末尾的换行符 */
		if( buf[strlen(buf)-1]=='\n' ) buf[strlen(buf)-1]='\0';
		//printf("buf=%s\n",buf);
		subStrCpy(oriFileName,buf,86,24);

		memset(callDate,0,sizeof(callDate));
		subStrCpy(callDate,buf,43,14);	
			
		/*  不在本次循环处理范围内          */
		if((k=checkDate(callDate,indexPath,idxNamePrefix))/MAXLOADINDEXNUMB != loopTimes) continue;		
		
		/* 该通话日期第一条清单,初始化索引 */
		if( pIndexName[k].pMoreKeyList == NULL) {
			//writeLog("Read Index File %8.8s ...\n",callDate);
			readIndexFile(callDate,indexPath,&(pIndexName[k].pMoreKeyList),idxNamePrefix);
			//writeLog("Read Index File %10.10s Finished...\n",callDate);
			}
		totalRecord++;
				
		subStrCpy(msisdn,buf,4,11);
		rtrim(msisdn);
				
		/* 确定下标,4位msisdn				*/
		subStrCpy(tmpBuf,msisdn,7,4);
		pos = atoi(tmpBuf);
		
		/* 拼写查重关键字 sKey1 = msisdn(5)+callTime(4)	*/
		memset(sKey1,0,sizeof(sKey1));
		sprintf(sKey1,"%5.5s%4.4s",msisdn+2,callDate+10);
		sKey1[9]='\0';
			
		/* 拼写查重关键字 sKey2 = otherParty+2(7)	*/
		memset(sKey2,0,sizeof(sKey2));
		if((strncmp(orgfileName,"m",1) == 0)||(strncmp(orgfileName,"w",1) == 0)){
			if(strncmp(&buf[19],"0000",4) == 0){
				sprintf(sKey2,"00%7.7s",buf+23);
			}
			else if((strncmp(&buf[19],"000",3) == 0)||(strncmp(&buf[19],"010",3) == 0)||
				(strncmp(&buf[19],"02", 2) == 0)){
				sprintf(sKey2,"0%8.8s",buf+22);
			}
			else if(strncmp(&buf[19],"00",2) == 0){
				sprintf(sKey2,"%9.9s",buf+21);
			}
			else if(strncmp(&buf[19],"0",1 ) == 0){
				sprintf(sKey2,"00%7.7s",buf+23);
			}
			else if(strncmp(&buf[19],"13",2) == 0){
				sprintf(sKey2,"%9.9s",buf+21);
			}
			else{
				sprintf(sKey2,"00%7.7s",buf+19);
			}
		}
		else{
			sprintf(sKey2,"%9.9s",buf+21);
		}
		//subStrCpy(sKey2,buf,21,9);
		sKey2[9]='\0';

		/* 对关键字二进行检查				*/
		for(i=0;i<9;i++){
			if( sKey2[i]>'9'||sKey2[i]<'0' ) sKey2[i] = '0';
			}

		
		/* 检查是否为重单                     */
		if( addToIndexList(pIndexName[k].pMoreKeyList,pos,sKey1,sKey2) < 0){
			fprintf(fpErr,"E%03d:%s",(-1)*ERROR_DUP_CDR,buf);
			addToLogBuf(oriFileName,1);
			continue;
			}
		
		addToLogBuf(oriFileName,0);
		fputs(buf,fpTmp);
		fputs("\n",fpTmp);
		validRecord++;
		}
	
	writeIndexToBuf(loopTimes,tmpPath,idxNamePrefix);
	delIndexBuf(loopTimes);	
	loopTimes++;
	} while (loopTimes * MAXLOADINDEXNUMB < totalIndexFiles);

/* 输出日志,依次为文件名,总话单数,正确话单数,错误话单数 */
writeLog(&pLogBuf,"Process File %s,%d,%d,%d\n",fileName,totalRecord,validRecord,(totalRecord-validRecord));
fclose(fpErr);
fclose(fpTmp);
fclose(fp);
stopClock=clock();
writeLog(&pLogBuf,"Record %9d Speed %12.02f\n",totalRecord,totalRecord/((float)(stopClock-startClock)/CLOCKS_PER_SEC));

/* 记录对应于原始清单的日志          */
for(i=0;i<1024;i++) {
	if( strlen(pLogCkDpBuf[i].origenName) == 0 ) break;
	writeLog(&pLogBuf,"orifiles,%s,%s,%d,%d,%d\n",fileName,pLogCkDpBuf[i].origenName,\
	pLogCkDpBuf[i].totalNum,pLogCkDpBuf[i].curNum,pLogCkDpBuf[i].totalNum-pLogCkDpBuf[i].curNum);
	}
}



/* 主函数实现 */
int main(int argc,char * argv[])
{ 
/* 通用变量定义部分   */
/* 配置参数和日志定义 */
typeCfgParam  pCfgParam[128];          // 程序运行配置文件内容链表
typeFileList  pFileList[1024];

/* 程序调度状态                     */  
/* 0 正常,1 重新初始化数据,2 退出 */
int   programStatu;
char  fullPathName[128+1];
char  tmpBuf[128+1];
char  fileName[128+1];
int   k;

/* 对计费系统程序命名进行统一编码     */
/* 用于对配置文件描述,日志进行匹配   */
/* 该处必须填写,如标准批价为rate     */
char programName[32+1]="audCreIndex";

/* 命令行参数检查 */
if(argc != 2){
	printf("Usage: %s configureFileName \n",argv[0]);
	exit(0);
	}

/* 根据第一个输入参数打开配置文件 */
if( openCfgFile(pCfgParam,argv[1]) ){
	printf("Can't open %s for read configure!\n",argv[1]);
	exit(0);
	}

/* 从配置文件读取相关配置参数    */
if( readCfgFile(argv[1],pCfgParam) ){
	printf("Read configure fail!\n");
	exit(0);
	}

/* 如果运行参数为守护进程方式,进入守护进程  */
if ( runFlag == 1 || runFlag == 3 )
	daemonInit();

/* 考虑查重程序并发可能性,追加处理单位       */
strcat(programName,idxNamePrefix);	

/* 如果初始化日志输出不成功,程序退出        */
if( initLogFile(&pLogBuf,logPath,(char *)programName,procID)  ){
	printf("Can't init logBuf %s,error msg: %s \n",logPath,strerror(errno));
	exit(1);	
	}

redoHandler((char *)programName);

/* 进入具体逻辑实现和处理流程 */
try{        
	while( (programStatu=getProgramStatu((char *)programName,tmpPath)) != 0 ){
		for(k=0;k<1024;k++) memset(pFileList[k].fullPathName,0,sizeof(pFileList[k].fullPathName));
		if((k = getFileList(pFileList,inputPath,filePattern)) <= 0 ){ 
			if( k < 0 ){		        /* 不能打开输入文件路径,程序退出     */
				sprintf(errMsg,"Can't open dir %s for read!Msg: %s\n",inputPath,strerror(errno));
				throw errMsg;
				}
			else if( k == 0 ){		/* 如果没有文件处理,程序进入休眠状态  */
				printf("No file found,sleep %d sec...\n",sleepTime);
				sleep(sleepTime);
				continue ;
				}
			}
		
		/* 否则程序依次处理输入目录下符合条件的文件 */
		while ( memset(fullPathName,0,sizeof(fullPathName)),getFileFromList(fullPathName,pFileList)){
			/* 需要在此处添加具体处理逻辑   */
			/* 用于实现对fileName文件的处理 */	
			memset(fileName,0,sizeof(fileName));		
			memset(orgfileName,0,sizeof(orgfileName));
			baseName(fileName,fullPathName);
			strcpy(orgfileName,fileName);
			fileName[strlen(fileName)-4]='\0';
			printf("Process File %s ......\n",fullPathName);	
			
			/*  此处调用查重逻辑 */
			checkDupFile(fullPathName,tmpPath,indexPath,errorPath,idxNamePrefix);
					
			/*  开始移文件                              */
			beginTran((char *)programName,fileName);
			
			if( runFlag == 2 || runFlag == 3 ) remove(fullPathName);
			else if(mvfile(fullPathName,bakPath)){
				throw (char *)"Can't backup file";
			}
			
			memset(tmpBuf,0,sizeof(tmpBuf));
			sprintf(tmpBuf,"%s.chk",fileName);
			if(mvfile(tmpPath,tmpBuf,outputPath,tmpBuf)){
				throw (char *)"Can't mv chk file";
			}

			memset(tmpBuf,0,sizeof(tmpBuf));
			sprintf(tmpBuf,"%s.dup",fileName);		
			if( mvfile(tmpPath,tmpBuf,errorPath,tmpBuf)){
				throw (char *)"Can't mv dup file";
			}
		
			/*  移动索引文件 */
			for(k=0;k<totalIndexFiles;k++){
				memset(tmpBuf,0,sizeof(tmpBuf));
				if (( strncmp( orgfileName, "p",1) == 0 ) || ( strncmp( orgfileName, "m",1) == 0 ))
					sprintf(tmpBuf,"%s/%s%s.idx01",tmpPath,idxNamePrefix,pIndexName[k].callDate);
				if (( strncmp( orgfileName, "q",1) == 0 ) || ( strncmp( orgfileName, "w",1) == 0 ))
					sprintf(tmpBuf,"%s/%s%s.idx02",tmpPath,idxNamePrefix,pIndexName[k].callDate);
				
				if( mvfile(tmpBuf,indexPath)){
					throw (char *)"Can't mv index file";
					}
			}
			
			commitTran((char *)programName);
			/*  其他操作   */
			}
		}	
	}
catch (char * errMsg ){
	/* 捕获程序需要退出处理信号,程序退出   */
	writeLog(&pLogBuf,"%s\n",errMsg);
	closeLogFile(&pLogBuf);
	exit(1);
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -