⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tclregexp.c

📁 tcl是工具命令语言
💻 C
📖 第 1 页 / 共 2 页
字号:
/*  * tclRegexp.c -- * *	This file contains the public interfaces to the Tcl regular *	expression mechanism. * * Copyright (c) 1998 by Sun Microsystems, Inc. * Copyright (c) 1998-1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * * RCS: @(#) $Id: tclRegexp.c,v 1.14 2002/01/17 03:03:12 dgp Exp $ */#include "tclInt.h"#include "tclPort.h"#include "tclRegexp.h"/* *---------------------------------------------------------------------- * The routines in this file use Henry Spencer's regular expression * package contained in the following additional source files: * *	regc_color.c	regc_cvec.c	regc_lex.c *	regc_nfa.c	regcomp.c	regcustom.h *	rege_dfa.c	regerror.c	regerrs.h *	regex.h		regexec.c	regfree.c *	regfronts.c	regguts.h * * Copyright (c) 1998 Henry Spencer.  All rights reserved. *  * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results.  The author * thanks all of them.  *  * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. *  * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. *  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * *** NOTE: this code has been altered slightly for use in Tcl: *** * *** 1. Names have been changed, e.g. from re_comp to		 *** * ***    TclRegComp, to avoid clashes with other 		 *** * ***    regexp implementations used by applications. 		 *** *//* * Thread local storage used to maintain a per-thread cache of compiled * regular expressions. */#define NUM_REGEXPS 30typedef struct ThreadSpecificData {    int initialized;		/* Set to 1 when the module is initialized. */    char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled				 * regular expression patterns.	 NULL				 * means that this slot isn't used.				 * Malloc-ed. */    int patLengths[NUM_REGEXPS];/* Number of non-null characters in				 * corresponding entry in patterns.				 * -1 means entry isn't used. */    struct TclRegexp *regexps[NUM_REGEXPS];				/* Compiled forms of above strings.  Also				 * malloc-ed, or NULL if not in use yet. */} ThreadSpecificData;static Tcl_ThreadDataKey dataKey;/* * Declarations for functions used only in this file. */static TclRegexp *	CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp,			    CONST char *pattern, int length, int flags));static void		DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr,			    Tcl_Obj *copyPtr));static void		FinalizeRegexp _ANSI_ARGS_((ClientData clientData));static void		FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));static void		FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));static int		RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp,			    Tcl_RegExp re, CONST Tcl_UniChar *uniString,			    int numChars, int nmatches, int flags));static int		SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp,			    Tcl_Obj *objPtr));/* * The regular expression Tcl object type.  This serves as a cache * of the compiled form of the regular expression. */Tcl_ObjType tclRegexpType = {    "regexp",				/* name */    FreeRegexpInternalRep,		/* freeIntRepProc */    DupRegexpInternalRep,		/* dupIntRepProc */    NULL,				/* updateStringProc */    SetRegexpFromAny			/* setFromAnyProc */};/* *---------------------------------------------------------------------- * * Tcl_RegExpCompile -- * *	Compile a regular expression into a form suitable for fast *	matching.  This procedure is DEPRECATED in favor of the *	object version of the command. * * Results: *	The return value is a pointer to the compiled form of string, *	suitable for passing to Tcl_RegExpExec.  This compiled form *	is only valid up until the next call to this procedure, so *	don't keep these around for a long time!  If an error occurred *	while compiling the pattern, then NULL is returned and an error *	message is left in the interp's result. * * Side effects: *	Updates the cache of compiled regexps. * *---------------------------------------------------------------------- */Tcl_RegExpTcl_RegExpCompile(interp, string)    Tcl_Interp *interp;		/* For use in error reporting and				 * to access the interp regexp cache. */    CONST char *string;		/* String for which to produce				 * compiled regular expression. */{    return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string),	    REG_ADVANCED);}/* *---------------------------------------------------------------------- * * Tcl_RegExpExec -- * *	Execute the regular expression matcher using a compiled form *	of a regular expression and save information about any match *	that is found. * * Results: *	If an error occurs during the matching operation then -1 *	is returned and the interp's result contains an error message. *	Otherwise the return value is 1 if a matching range is *	found and 0 if there is no matching range. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_RegExpExec(interp, re, string, start)    Tcl_Interp *interp;		/* Interpreter to use for error reporting. */    Tcl_RegExp re;		/* Compiled regular expression;  must have				 * been returned by previous call to				 * Tcl_GetRegExpFromObj. */    CONST char *string;		/* String against which to match re. */    CONST char *start;		/* If string is part of a larger string,				 * this identifies beginning of larger				 * string, so that "^" won't match. */{    int flags, result, numChars;    TclRegexp *regexp = (TclRegexp *)re;    Tcl_DString ds;    CONST Tcl_UniChar *ustr;    /*     * If the starting point is offset from the beginning of the buffer,     * then we need to tell the regexp engine not to match "^".     */    if (string > start) {	flags = REG_NOTBOL;    } else {	flags = 0;    }    /*     * Remember the string for use by Tcl_RegExpRange().     */    regexp->string = string;    regexp->objPtr = NULL;    /*     * Convert the string to Unicode and perform the match.     */    Tcl_DStringInit(&ds);    ustr = Tcl_UtfToUniCharDString(string, -1, &ds);    numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar);    result = RegExpExecUniChar(interp, re, ustr, numChars,	    -1 /* nmatches */, flags);    Tcl_DStringFree(&ds);    return result;}/* *--------------------------------------------------------------------------- * * Tcl_RegExpRange -- * *	Returns pointers describing the range of a regular expression match, *	or one of the subranges within the match. * * Results: *	The variables at *startPtr and *endPtr are modified to hold the *	addresses of the endpoints of the range given by index.  If the *	specified range doesn't exist then NULLs are returned. * * Side effects: *	None. * *--------------------------------------------------------------------------- */voidTcl_RegExpRange(re, index, startPtr, endPtr)    Tcl_RegExp re;		/* Compiled regular expression that has				 * been passed to Tcl_RegExpExec. */    int index;			/* 0 means give the range of the entire				 * match, > 0 means give the range of				 * a matching subrange. */    CONST char **startPtr;	/* Store address of first character in				 * (sub-) range here. */    CONST char **endPtr;	/* Store address of character just after last				 * in (sub-) range here. */{    TclRegexp *regexpPtr = (TclRegexp *) re;    CONST char *string;    if ((size_t) index > regexpPtr->re.re_nsub) {	*startPtr = *endPtr = NULL;    } else if (regexpPtr->matches[index].rm_so < 0) {	*startPtr = *endPtr = NULL;    } else {	if (regexpPtr->objPtr) {	    string = Tcl_GetString(regexpPtr->objPtr);	} else {	    string = regexpPtr->string;	}	*startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so);	*endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo);    }}/* *--------------------------------------------------------------------------- * * RegExpExecUniChar -- * *	Execute the regular expression matcher using a compiled form of a *	regular expression and save information about any match that is *	found. * * Results: *	If an error occurs during the matching operation then -1 is *	returned and an error message is left in interp's result. *	Otherwise the return value is 1 if a matching range was found or *	0 if there was no matching range. * * Side effects: *	None. * *---------------------------------------------------------------------- */static intRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags)    Tcl_Interp *interp;		/* Interpreter to use for error reporting. */    Tcl_RegExp re;		/* Compiled regular expression; returned by				 * a previous call to Tcl_GetRegExpFromObj */    CONST Tcl_UniChar *wString;	/* String against which to match re. */    int numChars;		/* Length of Tcl_UniChar string (must				 * be >= 0). */    int nmatches;		/* How many subexpression matches (counting				 * the whole match as subexpression 0) are				 * of interest.  -1 means "don't know". */    int flags;			/* Regular expression flags. */{    int status;    TclRegexp *regexpPtr = (TclRegexp *) re;    size_t last = regexpPtr->re.re_nsub + 1;    size_t nm = last;    if (nmatches >= 0 && (size_t) nmatches < nm) {	nm = (size_t) nmatches;    }    status = TclReExec(&regexpPtr->re, wString, (size_t) numChars,	    &regexpPtr->details, nm, regexpPtr->matches, flags);    /*     * Check for errors.     */    if (status != REG_OKAY) {	if (status == REG_NOMATCH) {	    return 0;	}	if (interp != NULL) {	    TclRegError(interp, "error while matching regular expression: ",		    status);	}	return -1;    }    return 1;}/* *--------------------------------------------------------------------------- * * TclRegExpRangeUniChar -- * *	Returns pointers describing the range of a regular expression match, *	or one of the subranges within the match, or the hypothetical range *	represented by the rm_extend field of the rm_detail_t. * * Results: *	The variables at *startPtr and *endPtr are modified to hold the *	offsets of the endpoints of the range given by index.  If the *	specified range doesn't exist then -1s are supplied. * * Side effects: *	None. * *--------------------------------------------------------------------------- */voidTclRegExpRangeUniChar(re, index, startPtr, endPtr)    Tcl_RegExp re;		/* Compiled regular expression that has				 * been passed to Tcl_RegExpExec. */    int index;			/* 0 means give the range of the entire				 * match, > 0 means give the range of				 * a matching subrange, -1 means the				 * range of the rm_extend field. */    int *startPtr;		/* Store address of first character in				 * (sub-) range here. */    int *endPtr;		/* Store address of character just after last				 * in (sub-) range here. */{    TclRegexp *regexpPtr = (TclRegexp *) re;    if ((regexpPtr->flags&REG_EXPECT) && index == -1) {	*startPtr = regexpPtr->details.rm_extend.rm_so;	*endPtr = regexpPtr->details.rm_extend.rm_eo;    } else if ((size_t) index > regexpPtr->re.re_nsub) {	*startPtr = -1;	*endPtr = -1;    } else {	*startPtr = regexpPtr->matches[index].rm_so;	*endPtr = regexpPtr->matches[index].rm_eo;    }}/* *---------------------------------------------------------------------- * * Tcl_RegExpMatch -- * *	See if a string matches a regular expression. * * Results: *	If an error occurs during the matching operation then -1 *	is returned and the interp's result contains an error message. *	Otherwise the return value is 1 if "string" matches "pattern" *	and 0 otherwise. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_RegExpMatch(interp, string, pattern)    Tcl_Interp *interp;		/* Used for error reporting. May be NULL. */    CONST char *string;		/* String. */    CONST char *pattern;	/* Regular expression to match against				 * string. */{    Tcl_RegExp re;    re = Tcl_RegExpCompile(interp, pattern);    if (re == NULL) {	return -1;    }    return Tcl_RegExpExec(interp, re, string, string);}/* *---------------------------------------------------------------------- * * Tcl_RegExpExecObj -- * *	Execute a precompiled regexp against the given object. * * Results: *	If an error occurs during the matching operation then -1 *	is returned and the interp's result contains an error message. *	Otherwise the return value is 1 if "string" matches "pattern" *	and 0 otherwise. * * Side effects: *	Converts the object to a Unicode object. * *---------------------------------------------------------------------- */intTcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags)    Tcl_Interp *interp;		/* Interpreter to use for error reporting. */    Tcl_RegExp re;		/* Compiled regular expression;  must have				 * been returned by previous call to				 * Tcl_GetRegExpFromObj. */    Tcl_Obj *objPtr;		/* String against which to match re. */    int offset;			/* Character index that marks where matching				 * should begin. */    int nmatches;		/* How many subexpression matches (counting				 * the whole match as subexpression 0) are				 * of interest.  -1 means all of them. */    int flags;			/* Regular expression execution flags. */{    TclRegexp *regexpPtr = (TclRegexp *) re;    Tcl_UniChar *udata;    int length;    /*     * Save the target object so we can extract strings from it later.     */    regexpPtr->string = NULL;    regexpPtr->objPtr = objPtr;    udata = Tcl_GetUnicodeFromObj(objPtr, &length);    if (offset > length) {	offset = length;    }    udata += offset;    length -= offset;        return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);}/* *---------------------------------------------------------------------- * * Tcl_RegExpMatchObj -- * *	See if an object matches a regular expression. * * Results: *	If an error occurs during the matching operation then -1 *	is returned and the interp's result contains an error message. *	Otherwise the return value is 1 if "string" matches "pattern" *	and 0 otherwise. * * Side effects: *	Changes the internal rep of the pattern and string objects. * *---------------------------------------------------------------------- */intTcl_RegExpMatchObj(interp, stringObj, patternObj)    Tcl_Interp *interp;		/* Used for error reporting. May be NULL. */    Tcl_Obj *stringObj;		/* Object containing the String to search. */    Tcl_Obj *patternObj;	/* Regular expression to match against				 * string. */{    Tcl_RegExp re;    re = Tcl_GetRegExpFromObj(interp, patternObj,	    TCL_REG_ADVANCED | TCL_REG_NOSUB);    if (re == NULL) {	return -1;    }    return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */,	    0 /* nmatches */, 0 /* flags */);}/* *---------------------------------------------------------------------- * * Tcl_RegExpGetInfo -- * *	Retrieve information about the current match. * * Results: *	None. *

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -