📄 regex.c

📁 uClinux下用的数据库
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*** Copyright (c) 1995-2001  Hughes Technologies Pty Ltd.  All rights** reserved.  **** Terms under which this software may be used or copied are** provided in the  specific license associated with this product.**** Hughes Technologies disclaims all warranties with regard to this ** software, including all implied warranties of merchantability and ** fitness, in no event shall Hughes Technologies be liable for any ** special, indirect or consequential damages or any damages whatsoever ** resulting from loss of use, data or profits, whether in an action of ** contract, negligence or other tortious action, arising out of or in ** connection with the use or performance of this software.****** $Id: regex.c,v 1.7 2002/06/29 04:09:01 bambi Exp $***//*** Module	: main : regex** Purpose	: Implementation of SQL regular expressions** Exports	: ** Depends Upon	: *//*** Using basic ANSI SQL regular expression specification,** see if a expression matches some data.  The expression** is nul terminated and the data is terminated by the** length dlen which is enforced by the use of msqlStringLength.** (See charMatch.)  It would really be nice if mSQL kept** track of the actual data lengths in the db (and of literals).** In the meantime, using msqlStringLength is a win for** expressions with multiple % wildcards, especially if the data is** at least of moderate length.  It does slow down expressions** without %'s or matches to very short data.*//**************************************************************************** STANDARD INCLUDES**************************************************************************/#include <common/config.h>#include <stdio.h>#include <stdlib.h>#include <sys/types.h>#ifdef HAVE_UNISTD_H#  include <unistd.h>#endif#ifdef HAVE_STRING_H#  include <string.h>#endif#include <common/portability.h>/**************************************************************************** MODULE SPECIFIC INCLUDES**************************************************************************/#include <ctype.h>#include <common/msql_defs.h>#include <common/debug/debug.h>#include <msqld/index/index.h>#include <msqld/includes/errmsg.h>#include <msqld/includes/msqld.h>#include <msqld/regexp/regexp.h>#include <libmsql/msql.h>#include "regex.h"#include "varchar.h"/**************************************************************************** GLOBAL VARIABLES**************************************************************************/extern char errMsg[];/**************************************************************************** PRIVATE ROUTINES**************************************************************************/static char *getNextChar(type, data, last)	void	*data;	char	*last;{	if (type == CHAR_TYPE)	{		char	*lastPtr = (char *)last;		if (lastPtr == NULL)		{			return((char *)data);		}		else		{			lastPtr++;			return(lastPtr);		}	}	if (type == TEXT_TYPE)	{		return(varcharGetNext(data));	}	/*	** Should never get here	*/	return(NULL);}/**************************************************************************** PUBLIC ROUTINES**************************************************************************//* ** regexStringLength -- determine the actual string length of mSQL data string.** dptr -- character data to determine length.** maxLen -- maximum possible length. (Shorter if a nul character occurs.)** returns an integer that is the actual number of characters (not**   including a nul, if there is one.*/int regexStringLength(dptr, maxLen )	char 	*dptr;	int 	maxLen;{	int 	len;	len = 0;	while ((maxLen > 0) && (*dptr != '\0')) 	{		len++;		maxLen--;		dptr++;	}	return len;}/* ** likeTest -- (simple) ANSI SQL regular expression matcher.** dptr -- character data to be matched.  May or may not be nul terminated.** eptr -- regular expression to match. Must be nul terminated.** dlen -- length of the data pointed to by dptr.  Determines data end when**     it is not nul terminated.** returns true (non-zero) or false (0) for match or no match, respectively.*/int likeTest(data, eptr, dlen, ignoreCase, dataType)	void 	*data;	char 	*eptr;	int  	dlen;	char	ignoreCase,		dataType;{	char 	eval, 		*dptr;	void	*cursor;	eval = *eptr;	dptr = getNextChar(dataType,data,NULL);	while (eval != '\0') 	{ 		if (eval == '%') 		{ 			/*			** ignore 0 or more characters      			** try to find the next character that must match      			** collapse special characters: %'s and _'s 			*/			eptr++;			eval = *eptr;			while (eval != '\0') 			{				if (eval == '%') 				{ 					/* any number of %'s same as one */					eptr++;					eval = *eptr;					continue;				}				if (eval == '_') 				{ 					/* consume one char for each _ */					if (dlen == 0) 					{	    					/* data ended : no match. */	    					return 0;	  				}	  				eptr++;	  				eval = *eptr;					dptr = getNextChar(dataType,data,dptr);	  				dlen--;	  				continue;				}				break; 			}			/*			** special case of ending with enough characters			** for wild cards. 			*/      			if (eval == '\0') 			{				return 1;      			}			if (eval == '\\') 			{				/* 					** backslash escapes everything including 				** itself, so skip it always. 				*/				eptr++;				eval = *eptr;				if (eval == '\0')				{					/*					** end with match (skipping an 					** ending backslash) 					*/	  				return 1;				}			}			/*	 		** At this point, we have a non-special character			** to look for in data, then we recursively match			** remainder of expression at points where that			** character exists in the data.			**			** note: leave eval alone, but eptr positioned for			** recursion. 			*/			eptr++;			while (dlen != 0) 			{				while (dlen != 0) 				{					if (ignoreCase)					{						if(toupper(*dptr) == 							toupper(eval))						{							break;						}					}					else					{						if(*dptr == eval)						{							break;						}					}					dptr = getNextChar(dataType,data,dptr);					dlen--;				}				if (dlen == 0) 				{ 					/* there is no eval in data */					return 0;				}				/*				** we matched eval and need to check 				** rest of exp and data 				*/				if (dataType == TEXT_TYPE)				{					cursor = varcharDupCursor(data);					if (likeTest(cursor, eptr, dlen -1 ,						ignoreCase, dataType)) 					{						/* match */						free(cursor);						return 1;					} 					dptr = getNextChar(dataType,data,dptr);					dlen--;				}				else				{					dptr = getNextChar(dataType,data,dptr);					dlen--;					if (likeTest(dptr, eptr, dlen ,						ignoreCase, dataType)) 					{						/* match */						return 1;					} 				}				/*				** else just try to see if this position				** is an eval 				*/			} /* end try to match rest */			/* There is something after % and it didn't matched */			return 0;		} /* end if % */		if (eval ==  '_') 		{ 			/* try to consume one char */      			if (dlen == 0) 			{				/* data ended first: no match. */				return 0;			}			eptr++;			eval = *eptr;			dptr++;			dlen--;			continue;		} 		if (eval == '\\')		{      			/* 			** backslash escapes everything including self, so 			** skip it always. 			*/			eptr++;			eval = *eptr;			if (eval == '\0') 			{				/*				** if dlen == 0 then end with match 				** (skipping an ending backslash) else 				** expr ended before data				*/				return (dlen == 0);			}      			/* Fall through */		}		if (dlen == 0) 		{			/* data ends before expr */			return 0;		}		if (ignoreCase)		{			if (toupper(eval) != toupper(*dptr))				return(0);		}		else		{			if (eval != *dptr)				return(0);		}		/* This character matched, consume it and continue */		eptr++;		eval = *eptr;		dptr = getNextChar(dataType,data,dptr);		dlen--;	}	/* if we matched everything, return true. */	return (dlen == 0);}/* RNS * Non-ANSI, full-functioned regular expressions courtesy of * Henry Spencer. *//* * RLIKE_DATA_MAXLEN -- the maximum length of a string that the * static data buffer can hold in order to use it rather than malloc. * 1024 is big enough for full pathnames on most UNIX boxes. */#define RLIKE_DATA_MAXLEN 1024/* * rLikeBuffer -- a static character buffer that should be faster * than using malloc.  If RLIKE_DATA_MAXLEN is too small, then * malloc is used. */static char rLikeBuffer[RLIKE_DATA_MAXLEN + 1];/* * RLIKE_NUM_REGEXPS -- the number of regular expression compilations * to cache (i.e., the RLIKE cache size).  RLIKE_NUM_REGEXPS must be * at least 1, but can be as large as needed.  Normally, a small number * like 5 is probably about right.  However, if you have an application * or set of applications that repeatedly use the same set of expressions,
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -