⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 strmatch.c

📁 求2个或多个文本间的公共子串
💻 C
字号:
#include "strmatch.h"#include "publib.h"static unsigned long StrHash(char *arKey, unsigned int nKeyLength){	unsigned long h = 0, g;	char *arEnd = arKey + nKeyLength;		while (arKey < arEnd) {		h = (h << 4) + *arKey++;		if ((g = (h & 0xF0000000))) {			h = h ^ (g >> 24);			h = h ^ g;		}	}	return h;}void printMatrix(smch_t * loStr, smch_t * shStr, size_t ** pMatrix, const size_t row, const size_t col){	size_t outurn = 0;	size_t inturn = 0;	size_t NUM = 50;	size_t BEG = 0;	char str[3];		printf("     ");        for(outurn = BEG; outurn < min(col, NUM); ++outurn){		printf("%.2u", outurn);	}	printf("\n");		printf("     ");	for(outurn = BEG; outurn < min(row, NUM); ++outurn){		*((smch_t *)str) = shStr[outurn];		str[2] = 0;		printf("%s", str);	}	printf("\n");				for(outurn = BEG; outurn < min(row, NUM); ++outurn){		*((smch_t *)str) = loStr[outurn];		str[2] = 0;		printf("%u %s ", outurn, str);				for(inturn = BEG; inturn < min(col, NUM); ++inturn){			printf("%u ", pMatrix[outurn][inturn]);		}		printf("\n");	}	printf("\n\n");}#ifndef INCRE_SIZE#define INCRE_SIZE 30#endifint AddCommonStr(const size_t begPos, const size_t strLen, smch_t * strShort, StrList * pStrList, const size_t listSize){		size_t modPos = 0;	StrNode * pNodeCur = NULL; 	size_t curPos = 0;	unsigned long ulHashVal = 0;	ulHashVal = StrHash((char *)(&(strShort[begPos])), strLen * sizeof(smch_t) );		modPos = ulHashVal % listSize;	pNodeCur = pStrList[modPos].pNodeArray;	/**************	if(18 == modPos){		printf("we got it");	}	**************/		if(NULL == pNodeCur){		if(NULL == (pNodeCur = (StrNode *)malloc(INCRE_SIZE * sizeof(StrNode)))){			perror("Failed to allocate memory");			return -1;		}		memset((char *)pNodeCur, 0, INCRE_SIZE * sizeof(StrNode));		pStrList[modPos].pNodeArray = pNodeCur;		pStrList[modPos].memLen = INCRE_SIZE;		pStrList[modPos].memUse = 0;				pNodeCur->pSmch = &(strShort[begPos]);		pNodeCur->uiBegPos = begPos;		pNodeCur->uiSmchLen = strLen;		pStrList[modPos].memUse += 1;	}else{		for(curPos = 0; (curPos < pStrList[modPos].memUse) &&\		((pNodeCur[curPos].uiSmchLen != strLen) ||\		(0 != strncmp((char *)((pNodeCur + curPos)->pSmch), \		(char *)(strShort + begPos), strLen * sizeof(smch_t)))); ++curPos);				if(curPos >= pStrList[modPos].memUse){			(pNodeCur + curPos)->pSmch = &(strShort[begPos]);			(pNodeCur + curPos)->uiBegPos = begPos;			(pNodeCur + curPos)->uiSmchLen = strLen;						pStrList[modPos].memUse += 1;		}				if(pStrList[modPos].memUse >= pStrList[modPos].memLen){			pStrList[modPos].memLen += INCRE_SIZE;						if(NULL == (pStrList[modPos].pNodeArray = (StrNode *)realloc(\			pStrList[modPos].pNodeArray, \			pStrList[modPos].memLen * sizeof(StrNode) ))){				perror("Failed to allocate memory");				return -1;			}		}																																									                                        }		return 0;}int strmatch(smch_t * strLong, const size_t nLongLen, smch_t * strShortOffset, const size_t offset, const size_t nShortLen, StrList * pStrList, const size_t listSize){	smch_t * strShort = strShortOffset;	size_t shturn = 0;	size_t loturn = 0;	size_t listPos = 0;		size_t uiLeftTop = 0;	size_t ** pMatchMatrix = NULL;	const size_t IDX_SIZE = min(2333, max(15, nShortLen / 3));	CharIndex * idxArray[IDX_SIZE]; /* size of 2333 */	size_t * pCharList = NULL;	size_t uiListSize = 0;		size_t temp = 0;	int * markList[2];		size_t uiIdx = 0;		size_t uiOldEnd = 0;	size_t uiOldCur = 0;	size_t uiNewCur = 0;	size_t uiShPos = 0;	size_t uiTempNum = 0;	int HasChar = -1;	/*************	printf(">>>>>>>>%s", (char *)strLong);	printf("\n>>>>>>>>%s\n", (char *)strShort);	**************/		if( (0 == nShortLen) || (0 == nLongLen))		return 0;	if( (NULL == strShortOffset) || (NULL == strLong) || (NULL == pStrList) || (0 == listSize))	        return -1;			temp = nLongLen * sizeof(size_t *);	if(NULL == (pMatchMatrix = (size_t **)malloc(temp))){		perror("Failed to allocate memory");		return -1;	}	memset(pMatchMatrix, 0, temp);		for(loturn = 0; loturn < nLongLen; ++loturn){		temp = nShortLen * sizeof(size_t);		pMatchMatrix[loturn] = (size_t *)malloc(temp);		if(NULL == pMatchMatrix[loturn]){			perror("Failed to allocate memory");			return -1;		}		memset(pMatchMatrix[loturn], 0, temp);	}	if(-1 == IndexCharForStr(strShort, nShortLen, idxArray, IDX_SIZE)){		perror("Failed to create Index");		return -1;	}	if( (NULL == (markList[0] = (int *)malloc(nShortLen * sizeof(int)))) || \		(NULL == (markList[1] = (int *)malloc(nShortLen * sizeof(int))))){		perror("Failed to allocate memory");		return -1;	}	/*********	memset((char *)markList[0], 0, nShortLen * sizeof(int));	memset((char *)markList[1], 0, nShortLen * sizeof(int));	*********/		for(loturn = 0; loturn < nLongLen; ++loturn){		if(strLong[loturn] == strShort[0]){			pMatchMatrix[loturn][0] = 1;		}	}	for(shturn = 0; shturn < nShortLen; ++shturn){		if(strLong[0] == strShort[shturn]){			pMatchMatrix[0][shturn] = 1;		}	}		uiIdx = 0;	uiOldEnd = 0;	uiOldCur = 0;	uiNewCur = 0;	for(loturn = 1; loturn < nLongLen; ++loturn){			/********		printMatrix(strLong, strShort, pMatchMatrix, nLongLen, nShortLen);		printf("\n <loturn %u>before\n", loturn);		for(temp = 0; temp < uiOldEnd; ++temp){			printf(" %u => %u ", markList[uiIdx][temp], pMatchMatrix[loturn - 1][markList[uiIdx][temp]]);		}		printf("\n");		***************/							if(-1 != (HasChar = GetCharPosList(strLong[loturn], idxArray, IDX_SIZE, &pCharList, &uiListSize))){		       			/**********			printf("\nloca => ");			for(listPos = 0; listPos < uiListSize; ++listPos)				printf("%u ", pCharList[listPos]);			printf("\n");			**********/						for(listPos = 0; listPos < uiListSize; ++listPos){							if(0 == (shturn = pCharList[listPos]))					continue;							/*******					printf("=> %d ", shturn);				*******/					if(nShortLen <= shturn){						perror("overflow");						return -1;					}					if(-1 == (uiLeftTop = pMatchMatrix[loturn - 1][shturn - 1])){						perror("size_t overflow");						return -1;					}										if(strLong[loturn] == strShort[shturn]){						pMatchMatrix[loturn][shturn] = uiLeftTop + 1;										if(shturn < (nShortLen - 1)){							markList[1 - uiIdx][uiNewCur] = shturn;							++uiNewCur;						}else{							AddCommonStr(shturn - uiLeftTop + offset, uiLeftTop + 1, \							strShort - offset, pStrList, listSize);						}																									/***********						printf(" <add> %u <%u, %u, %u>\n", shturn, loturn, shturn, uiLeftTop + 1);											for(; (uiOldCur < uiOldEnd) && \						((uiShPos = markList[uiIdx][uiOldCur]) < (shturn - 1)); ++uiOldCur){							if(2 <= (uiTempNum = pMatchMatrix[loturn - 1][uiShPos])){								AddCommonStr(uiShPos - uiTempNum + 1, uiTempNum, \								strShort - offset * sizeof(smch_t), pStrList, listSize);							}						}						**********/												for(uiOldCur = 0; uiOldCur < uiOldEnd; ++uiOldCur){							if((shturn - 1) == markList[uiIdx][uiOldCur]){								markList[uiIdx][uiOldCur] = -1;							}						}				}																	}					}				/*********************		printf("\nafter\n");		for(temp = 0; temp < uiOldEnd; ++temp){			printf(" %d => %u ", markList[uiIdx][temp], pMatchMatrix[loturn - 1][markList[uiIdx][temp]]);		}		printf("\n");		*******************/				for(uiOldCur = 0; uiOldCur < uiOldEnd; ++uiOldCur){		       	if((-1 != (uiShPos = markList[uiIdx][uiOldCur])) &&\			(2 <= (uiTempNum = pMatchMatrix[loturn - 1][uiShPos]))){				AddCommonStr(uiShPos - uiTempNum + 1 + offset, uiTempNum, \				strShort - offset, pStrList, listSize);			}		}				if(-1 != HasChar){			uiOldEnd = uiNewCur;		}else{			uiOldEnd = 0;		}				uiNewCur = 0;		uiOldCur = 0;		uiIdx = 1 - uiIdx;				}		/***************		if( ((loturn == nLongLen) || (loturn == nShortLen)) && (0 != uiOldEnd)){			if((-1 != (uiShPos = markList[uiIdx][uiOldCur])) &&\                (2 <= (uiTempNum = pMatchMatrix[loturn - 1][uiShPos]))){			AddCommonStr(uiShPos - uiTempNum + 1, uiTempNum, strShort,\			nShortLen, pStrList, listSize);		}	}	**********************/	for(loturn = 0; loturn < nLongLen; ++loturn)		free(pMatchMatrix[loturn]);	free(pMatchMatrix);	free(markList[0]);	free(markList[1]);	return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -