⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 substr.c

📁 求2个或多个文本间的公共子串
💻 C
字号:
#include "publib.h"
#include "strmatch.h"
#include <dirent.h>
#include <sys/stat.h>


static unsigned long hashpjw(char *arKey, unsigned int nKeyLength)  
{  
	unsigned long h = 0, g;  
	char *arEnd=arKey+nKeyLength;  

	while (arKey < arEnd) {  
		h = (h << 4) + *arKey++;  
		if ((g = (h & 0xF0000000))) {  
			h = h ^ (g >> 24);  
			h = h ^ g;  
		}  
	}  
	return h;  
}  

#define STR_LIST_LEN 23
#define IDX_SIZE 2333

void PrintStrList(StrList * strListCur){
	size_t turn = 0;
	size_t inturn = 0;
	size_t tempSize = 0;
	StrNode * pStrNode = NULL;
	char strShow[255];
	
	for(turn = 0; turn < STR_LIST_LEN; ++turn){
		if(NULL != (pStrNode = strListCur[turn].pNodeArray)){
			for(inturn = 0; inturn < strListCur[turn].memUse; ++inturn){
				tempSize = pStrNode[inturn].uiSmchLen * sizeof(smch_t);
					if(254 < tempSize)
						tempSize = 255;
				strncpy(strShow, (char *)(pStrNode[inturn].pSmch),tempSize);
				strShow[tempSize] = '\0';
				printf("%u, %u ===> %s\n",pStrNode[inturn].uiBegPos, pStrNode[inturn].uiSmchLen,  strShow);
			}
		}
	}
}


int main(int argc, char * argv[])
{	
	
	char * pPath = "txt/2.txt";

	
	Fileinfo info;

	Fileinfo infoOne, infoTwo;
	
	CharIndex * idxArray[IDX_SIZE]; /* size of 2333 */
	CharIndex * pIndex = NULL;
	size_t turn = 0;
	size_t inturn = 0;
	size_t fileturn = 0;
	char str[3];
	size_t smchlen = 0;


	smch_t * pTranStr = NULL;
	size_t uiTranLen = 0;

	smch_t * pTranStrOne = NULL;
	smch_t * pTranStrTwo = NULL;
	size_t uiTranLenOne = 0;
	size_t uiTranLenTwo = 0;

	size_t * pCharList = NULL;
	size_t uiListSize = 0;

	size_t uiCount = 0;
	StrNode * pStrNode = NULL;

	StrList strList[STR_LIST_LEN];
	char strShow[256];
	size_t tempSize = 0;
	
	
	char * pFoldStr = "/home/ps/wanjm/substr/txt/";
	
	char foldPath[255];
	struct dirent * pDirent = NULL;
	DIR * pDir = NULL;
	struct stat statbuf;

	smch_t ** smListPtr = NULL;
	size_t * smLenPtr = NULL;
	size_t  listUse = 0;
	size_t listLen = 10;
	StrList strListArray[2][STR_LIST_LEN];
	StrList * strListCur = NULL;
	size_t uiConCur = 0;
	size_t outurn = 0;

	
	if(2 != sizeof(unsigned short)){
		printf("unsigned short is not 2 bytes");
		return -1;
	}

	smListPtr = (smch_t **)malloc(listLen * sizeof(smch_t *));
	smLenPtr = (size_t *)malloc(listLen * sizeof(size_t));

	if((NULL == smListPtr) || (NULL == smLenPtr)){
		perror("Failed to allocate memory");
		return 1;
	}

	strncpy(foldPath, pFoldStr, strlen(pFoldStr));
	foldPath[strlen(pFoldStr)] = 0;
	
	/**********************
	 *
	 * Many files 
	 *
	 * *******************/
	if(NULL == (pDir = opendir(foldPath))){
		perror("Failed to open directory");
		return 1;
	}

	uiTranLenOne = -1;
	uiTranLenTwo = -1;
	
	while(NULL != (pDirent = readdir(pDir))){
		
		strncpy(foldPath, pFoldStr, strlen(pFoldStr));
	        foldPath[strlen(pFoldStr)] = 0;
		strncat(foldPath, pDirent->d_name, strlen(pDirent->d_name));
		
		if(-1 == stat(foldPath, &statbuf))
		       return -1;
		if(! S_ISREG(statbuf.st_mode))
			continue;
		
		if((-1 != GetFileInfo(foldPath, &info)) &&\
		(-1 != TransferString(info.pStr, info.nLen, &pTranStr, &uiTranLen))){
			
			/***************
			printf("<%s %u> = %s\n", foldPath, uiTranLen, (char *)pTranStr);
			*****************/

			free(info.pStr);

			uiTranLen /= sizeof(smch_t);
						
			if((-1 != uiTranLenOne) && (-1 != uiTranLenTwo)){
				if(uiTranLen < uiTranLenOne){
					
					smListPtr[listUse] = pTranStrOne;
					smLenPtr[listUse] = uiTranLenOne;
					++listUse;
					
					if(uiTranLen < uiTranLenTwo){
						
						pTranStrOne = pTranStrTwo;
						uiTranLenOne = uiTranLenTwo;
						
						pTranStrTwo = pTranStr;
						uiTranLenTwo = uiTranLen;	
					}else{
						pTranStrOne = pTranStr;
						uiTranLenOne = uiTranLen;
					}
					
				
				}else{
					smListPtr[listUse] = pTranStr;
					smLenPtr[listUse] = uiTranLen;
					++listUse;
				}
			}

			if(listUse >= listLen){
				listLen *= 2;
				if( (NULL == (smListPtr = (smch_t **)realloc(smListPtr, listLen * sizeof(smch_t *)))) ||\
				(NULL == (smLenPtr = (size_t *)realloc(smLenPtr, listLen * sizeof(size_t)))) ){
					perror("Failed to allocate memory");
					return 1;				 
				 }
				
			}
		
			if(-1 == uiTranLenOne){
				pTranStrOne = pTranStr;
				uiTranLenOne = uiTranLen;
			}else if(-1 == uiTranLenTwo){
				
				if(uiTranLen > uiTranLenOne){
					pTranStrTwo = pTranStrOne;
					uiTranLenTwo = uiTranLenOne;
					
					pTranStrOne = pTranStr;
					uiTranLenOne = uiTranLen;
				}else{
					pTranStrTwo = pTranStr;
					uiTranLenTwo = uiTranLen;
				}
			}
			
		}		
		
	}
	while( (-1 == closedir(pDir)) && (EINTR == errno) );	

	/*********
	printf("%u => %s\n", uiTranLenOne, (char *)pTranStrOne);
	printf("%u => %s\n", uiTranLenTwo, (char *)pTranStrTwo);
	for(turn = 0; turn < listUse; ++turn){
		printf("%u => %s\n", smLenPtr[turn], (char *)(smListPtr[turn]));
	}

	 ********/

	for(turn = 0; turn < 2; ++turn){
		memset((char *)strListArray[turn], 0, STR_LIST_LEN * sizeof(StrList *));
	}


	uiConCur = 0;
	strListCur = strListArray[uiConCur];
	if(-1 != strmatch(pTranStrOne, uiTranLenOne, pTranStrTwo, 0, uiTranLenTwo, strListCur, STR_LIST_LEN)){
		
		for(fileturn = 0; fileturn < listUse; ++fileturn){

			/*******			 
			PrintStrList(strListCur);
	                printf("%u => %s\n", smLenPtr[fileturn], (char *)(smListPtr[fileturn]));
			********/
				
			for(turn = 0; turn < STR_LIST_LEN; ++turn){
				if(NULL != (pStrNode = strListCur[turn].pNodeArray)){
					for(outurn = 0; outurn < strListCur[turn].memUse; ++outurn){

						/***********
						printf("<ptr = %u, off = %u, len = %u>\n", (char *)pStrNode[outurn].pSmch,\ 
						pStrNode[outurn].uiBegPos, pStrNode[outurn].uiSmchLen);
						************/
						
						if(-1 == strmatch(smListPtr[fileturn], smLenPtr[fileturn],\ 
						pStrNode[outurn].pSmch, pStrNode[outurn].uiBegPos, \
						pStrNode[outurn].uiSmchLen, strListArray[1 - uiConCur], STR_LIST_LEN)){
							perror("Failed to strmatch");
							return -1;
						}
					}					       
					free(pStrNode);
					memset( &(strListCur[turn]), 0, sizeof(StrList));
				}
			}
			uiConCur = 1 - uiConCur;
			strListCur = strListArray[uiConCur];
		}
	}else{
		printf("Failed to match files\n");
		return -1;
	}
	
	PrintStrList(strListArray[uiConCur]);
	
	return 0;
        
	
	/*******************
	 *
	 * Two files
	 * 
	 * *****************/	
	if( (-1 != GetFileInfo("txt/8.txt", &infoOne)) && (-1 != GetFileInfo("txt/3.txt", &infoTwo))){
		if(-1 != TransferString(infoOne.pStr, infoOne.nLen, &pTranStrOne, &uiTranLenOne) && \
		(-1 != TransferString(infoTwo.pStr, infoTwo.nLen, &pTranStrTwo, &uiTranLenTwo))){
			smchlen = sizeof(smch_t);
			if(-1 != strmatch(pTranStrOne, uiTranLenOne/smchlen, pTranStrTwo, 0, uiTranLenTwo/smchlen, strList, STR_LIST_LEN)){
				for(turn = 0; turn < STR_LIST_LEN; ++turn){
					if(NULL != (pStrNode = strList[turn].pNodeArray)){
						for(inturn = 0; inturn < strList[turn].memUse; ++inturn){
							tempSize = pStrNode[inturn].uiSmchLen * sizeof(smch_t);
							if(254 < tempSize)
								tempSize = 255;
							strncpy(strShow, (char *)(pStrNode[inturn].pSmch),tempSize);
							strShow[tempSize] = '\0';
							printf("%u ===> %s\n",turn,  strShow);							
						}
					}
				}	
			}else{
				printf("no\n");
			}
		}		
	}

	
	return 0;


	
	/***********************
	 *
	 * funcition test
	 * 
	 * *********************/
	
		
	if(-1 != GetFileInfo(pPath, &info)){
		printf("%d, =>%s\n", info.nLen, info.pStr);
                               
              	if(-1 != TransferString(info.pStr, info.nLen, &pTranStr, &uiTranLen)){
			printf("%u (%u) => %s\n", uiTranLen, strlen((char *)pTranStr), (char *)pTranStr);

		        smchlen = uiTranLen / sizeof(smch_t);
			
			
			if(-1 != IndexCharForStr(pTranStr, smchlen, idxArray, IDX_SIZE)){
				for(turn = 0; turn < IDX_SIZE; ++turn){
					if(NULL != idxArray[turn])
						printf("\n");

					for(pIndex = idxArray[turn]; NULL != pIndex; pIndex = pIndex->pNext){
						
								
						
						
						*((smch_t *)str) = pIndex->smch;
						str[2] = '\0';
						printf("[%u] %s => %u ", turn, str, pIndex->memUse);
						
						if(260 == pIndex->memUse)
							printf(" loc = %u", turn);
						else
						
						for(inturn = 0; inturn < pIndex->memUse; ++inturn){
							printf("%u ", pIndex->location[inturn]);
							++uiCount;
						}

						
												
						/*  printf("\n"); */
					}
						
				}
			}

			printf("uiCount => %u\n", uiCount);
			
			
			printf("\n\n GetCharPosList \n\n");
			if(-1 != GetCharPosList(pTranStr[24], idxArray, IDX_SIZE, &pCharList, &uiListSize)){
				*((smch_t *)str) = pTranStr[24];
				str[2] = '\0';
				printf("%s => ", str);
				for(inturn = 0; inturn < uiListSize; ++inturn)
					printf(" %u ", pCharList[inturn]);
							
			}
			printf("\n");		
			
			
		}
                
	}else{
		printf("no\n");
	}

	return 0;
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -