📄 regex.c
字号:
/*** Copyright (c) 1995-2001 Hughes Technologies Pty Ltd. All rights** reserved. **** Terms under which this software may be used or copied are** provided in the specific license associated with this product.**** Hughes Technologies disclaims all warranties with regard to this ** software, including all implied warranties of merchantability and ** fitness, in no event shall Hughes Technologies be liable for any ** special, indirect or consequential damages or any damages whatsoever ** resulting from loss of use, data or profits, whether in an action of ** contract, negligence or other tortious action, arising out of or in ** connection with the use or performance of this software.****** $Id: regex.c,v 1.7 2002/06/29 04:09:01 bambi Exp $***//*** Module : main : regex** Purpose : Implementation of SQL regular expressions** Exports : ** Depends Upon : *//*** Using basic ANSI SQL regular expression specification,** see if a expression matches some data. The expression** is nul terminated and the data is terminated by the** length dlen which is enforced by the use of msqlStringLength.** (See charMatch.) It would really be nice if mSQL kept** track of the actual data lengths in the db (and of literals).** In the meantime, using msqlStringLength is a win for** expressions with multiple % wildcards, especially if the data is** at least of moderate length. It does slow down expressions** without %'s or matches to very short data.*//**************************************************************************** STANDARD INCLUDES**************************************************************************/#include <common/config.h>#include <stdio.h>#include <stdlib.h>#include <sys/types.h>#ifdef HAVE_UNISTD_H# include <unistd.h>#endif#ifdef HAVE_STRING_H# include <string.h>#endif#include <common/portability.h>/**************************************************************************** MODULE SPECIFIC INCLUDES**************************************************************************/#include <ctype.h>#include <common/msql_defs.h>#include <common/debug/debug.h>#include <msqld/index/index.h>#include <msqld/includes/errmsg.h>#include <msqld/includes/msqld.h>#include <msqld/regexp/regexp.h>#include <libmsql/msql.h>#include "regex.h"#include "varchar.h"/**************************************************************************** GLOBAL VARIABLES**************************************************************************/extern char errMsg[];/**************************************************************************** PRIVATE ROUTINES**************************************************************************/static char *getNextChar(type, data, last) void *data; char *last;{ if (type == CHAR_TYPE) { char *lastPtr = (char *)last; if (lastPtr == NULL) { return((char *)data); } else { lastPtr++; return(lastPtr); } } if (type == TEXT_TYPE) { return(varcharGetNext(data)); } /* ** Should never get here */ return(NULL);}/**************************************************************************** PUBLIC ROUTINES**************************************************************************//* ** regexStringLength -- determine the actual string length of mSQL data string.** dptr -- character data to determine length.** maxLen -- maximum possible length. (Shorter if a nul character occurs.)** returns an integer that is the actual number of characters (not** including a nul, if there is one.*/int regexStringLength(dptr, maxLen ) char *dptr; int maxLen;{ int len; len = 0; while ((maxLen > 0) && (*dptr != '\0')) { len++; maxLen--; dptr++; } return len;}/* ** likeTest -- (simple) ANSI SQL regular expression matcher.** dptr -- character data to be matched. May or may not be nul terminated.** eptr -- regular expression to match. Must be nul terminated.** dlen -- length of the data pointed to by dptr. Determines data end when** it is not nul terminated.** returns true (non-zero) or false (0) for match or no match, respectively.*/int likeTest(data, eptr, dlen, ignoreCase, dataType) void *data; char *eptr; int dlen; char ignoreCase, dataType;{ char eval, *dptr; void *cursor; eval = *eptr; dptr = getNextChar(dataType,data,NULL); while (eval != '\0') { if (eval == '%') { /* ** ignore 0 or more characters ** try to find the next character that must match ** collapse special characters: %'s and _'s */ eptr++; eval = *eptr; while (eval != '\0') { if (eval == '%') { /* any number of %'s same as one */ eptr++; eval = *eptr; continue; } if (eval == '_') { /* consume one char for each _ */ if (dlen == 0) { /* data ended : no match. */ return 0; } eptr++; eval = *eptr; dptr = getNextChar(dataType,data,dptr); dlen--; continue; } break; } /* ** special case of ending with enough characters ** for wild cards. */ if (eval == '\0') { return 1; } if (eval == '\\') { /* ** backslash escapes everything including ** itself, so skip it always. */ eptr++; eval = *eptr; if (eval == '\0') { /* ** end with match (skipping an ** ending backslash) */ return 1; } } /* ** At this point, we have a non-special character ** to look for in data, then we recursively match ** remainder of expression at points where that ** character exists in the data. ** ** note: leave eval alone, but eptr positioned for ** recursion. */ eptr++; while (dlen != 0) { while (dlen != 0) { if (ignoreCase) { if(toupper(*dptr) == toupper(eval)) { break; } } else { if(*dptr == eval) { break; } } dptr = getNextChar(dataType,data,dptr); dlen--; } if (dlen == 0) { /* there is no eval in data */ return 0; } /* ** we matched eval and need to check ** rest of exp and data */ if (dataType == TEXT_TYPE) { cursor = varcharDupCursor(data); if (likeTest(cursor, eptr, dlen -1 , ignoreCase, dataType)) { /* match */ free(cursor); return 1; } dptr = getNextChar(dataType,data,dptr); dlen--; } else { dptr = getNextChar(dataType,data,dptr); dlen--; if (likeTest(dptr, eptr, dlen , ignoreCase, dataType)) { /* match */ return 1; } } /* ** else just try to see if this position ** is an eval */ } /* end try to match rest */ /* There is something after % and it didn't matched */ return 0; } /* end if % */ if (eval == '_') { /* try to consume one char */ if (dlen == 0) { /* data ended first: no match. */ return 0; } eptr++; eval = *eptr; dptr++; dlen--; continue; } if (eval == '\\') { /* ** backslash escapes everything including self, so ** skip it always. */ eptr++; eval = *eptr; if (eval == '\0') { /* ** if dlen == 0 then end with match ** (skipping an ending backslash) else ** expr ended before data */ return (dlen == 0); } /* Fall through */ } if (dlen == 0) { /* data ends before expr */ return 0; } if (ignoreCase) { if (toupper(eval) != toupper(*dptr)) return(0); } else { if (eval != *dptr) return(0); } /* This character matched, consume it and continue */ eptr++; eval = *eptr; dptr = getNextChar(dataType,data,dptr); dlen--; } /* if we matched everything, return true. */ return (dlen == 0);}/* RNS * Non-ANSI, full-functioned regular expressions courtesy of * Henry Spencer. *//* * RLIKE_DATA_MAXLEN -- the maximum length of a string that the * static data buffer can hold in order to use it rather than malloc. * 1024 is big enough for full pathnames on most UNIX boxes. */#define RLIKE_DATA_MAXLEN 1024/* * rLikeBuffer -- a static character buffer that should be faster * than using malloc. If RLIKE_DATA_MAXLEN is too small, then * malloc is used. */static char rLikeBuffer[RLIKE_DATA_MAXLEN + 1];/* * RLIKE_NUM_REGEXPS -- the number of regular expression compilations * to cache (i.e., the RLIKE cache size). RLIKE_NUM_REGEXPS must be * at least 1, but can be as large as needed. Normally, a small number * like 5 is probably about right. However, if you have an application * or set of applications that repeatedly use the same set of expressions,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -