lregex.c

来自「ultraEdit的Ctag标签工具的实现源代码」· C语言代码 · 共 697 行 · 第 1/2 页
697 行
/*
*   $Id: lregex.c,v 1.6 2003/07/11 01:21:53 darren Exp $
*
*   Copyright (c) 2000-2003, Darren Hiebert
*
*   This source code is released for free distribution under the terms of the
*   GNU General Public License.
*
*   This module contains functions for applying regular expression matching.
*
*   The code for utlizing the Gnu regex package with regards to processing the
*   regex option and checking for regex matches was adapted from routines in
*   Gnu etags.
*/

/*
*   INCLUDE FILES
*/
#include "general.h"	/* must always come first */

#include <string.h>

#ifdef HAVE_REGCOMP
# include <ctype.h>
# include <stddef.h>
# ifdef HAVE_SYS_TYPES_H
#  include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
# endif
# include "regex.h"
#endif

#include "debug.h"
#include "entry.h"
#include "parse.h"
#include "read.h"
#include "routines.h"

#ifdef HAVE_REGEX

/*
*   MACROS
*/

/* Back-references \0 through \9 */
#define BACK_REFERENCE_COUNT 10

#if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
# define POSIX_REGEX
#endif

#define REGEX_NAME "Regex"

/*
*   DATA DECLARATIONS
*/
#if defined (POSIX_REGEX)

struct sKind {
    boolean enabled;
    char letter;
    char* name;
    char* description;
};

enum pType { PTRN_TAG, PTRN_CALLBACK };

typedef struct {
    regex_t *pattern;
    enum pType type;
    union {
	struct {
	    char *name_pattern;
	    struct sKind kind;
	} tag;
	struct {
	    regexCallback function;
	} callback;
    } u;
} regexPattern;

#endif

typedef struct {
    regexPattern *patterns;
    unsigned int count;
} patternSet;

/*
*   DATA DEFINITIONS
*/

static boolean regexBroken = FALSE;

/* Array of pattern sets, indexed by language */
static patternSet* Sets = NULL;
static int SetUpper = -1;	/* upper language index in list */

/*
*   FUNCTION DEFINITIONS
*/

static void clearPatternSet (const langType language)
{
    if (language < SetUpper)
    {
	patternSet* const set = Sets + language;
	unsigned int i;
	for (i = 0  ;  i < set->count  ;  ++i)
	{
#if defined (POSIX_REGEX)
	    regfree (set->patterns [i].pattern);
#endif
	    eFree (set->patterns [i].pattern);
	    set->patterns [i].pattern = NULL;

	    if (set->patterns [i].type == PTRN_TAG)
	    {
		eFree (set->patterns [i].u.tag.name_pattern);
		set->patterns [i].u.tag.name_pattern = NULL;
	    }
	}
	if (set->patterns != NULL)
	    eFree (set->patterns);
	set->patterns = NULL;
	set->count = 0;
    }
}

/*
*   Regex psuedo-parser
*/

static void makeRegexTag (
	const vString* const name, const struct sKind* const kind)
{
    if (kind->enabled)
    {
	tagEntryInfo e;
	Assert (name != NULL  &&  vStringLength (name) > 0);
	Assert (kind != NULL);
	initTagEntry (&e, vStringValue (name));
	e.kind     = kind->letter;
	e.kindName = kind->name;
	makeTagEntry (&e);
    }
}

/*
*   Regex pattern definition
*/

/* Take a string like "/blah/" and turn it into "blah", making sure
 * that the first and last characters are the same, and handling
 * quoted separator characters.  Actually, stops on the occurrence of
 * an unquoted separator.  Also turns "\t" into a Tab character.
 * Returns pointer to terminating separator.  Works in place.  Null
 * terminates name string.
 */
static char* scanSeparators (char* name)
{
    char sep = name [0];
    char *copyto = name;
    boolean quoted = FALSE;

    for (++name ; *name != '\0' ; ++name)
    {
	if (quoted)
	{
	    if (*name == sep)
		*copyto++ = sep;
	    else if (*name == 't')
		*copyto++ = '\t';
	    else
	    {
		/* Something else is quoted, so preserve the quote. */
		*copyto++ = '\\';
		*copyto++ = *name;
	    }
	    quoted = FALSE;
	}
	else if (*name == '\\')
	    quoted = TRUE;
	else if (*name == sep)
	{
	    break;
	}
	else
	    *copyto++ = *name;
    }
    *copyto = '\0';
    return name;
}

/* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
 * character is whatever the first character of `regexp' is), by breaking it
 * up into null terminated strings, removing the separators, and expanding
 * '\t' into tabs. When complete, `regexp' points to the line matching
 * pattern, a pointer to the name matching pattern is written to `name', a
 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
 * to the trailing flags is written to `flags'. If the pattern is not in the
 * correct format, a false value is returned.
 */
static boolean parseTagRegex (
	char* const regexp, char** const name,
	char** const kinds, char** const flags)
{
    boolean result = FALSE;
    const int separator = (unsigned char) regexp [0];

    *name = scanSeparators (regexp);
    if (*regexp == '\0')
	error (WARNING, "empty regexp");
    else if (**name != separator)
	error (WARNING, "%s: incomplete regexp", regexp);
    else
    {
	char* const third = scanSeparators (*name);
	if (**name == '\0')
	    error (WARNING, "%s: regexp missing name pattern", regexp);
	if ((*name) [strlen (*name) - 1] == '\\')
	    error (WARNING, "error in name pattern: \"%s\"", *name);
	if (*third != separator)
	    error (WARNING, "%s: regexp missing final separator", regexp);
	else
	{
	    char* const fourth = scanSeparators (third);
	    if (*fourth == separator)
	    {
		*kinds = third;
		scanSeparators (fourth);
		*flags = fourth;
	    }
	    else
	    {
		*flags = third;
		*kinds = NULL;
	    }
	    result = TRUE;
	}
    }
    return result;
}

static void addCompiledTagPattern (
	const langType language, regex_t* const pattern,
	char* const name, const char kind, char* const kindName,
	char *const description)
{
    patternSet* set;
    regexPattern *ptrn;
    if (language > SetUpper)
    {
	int i;
	Sets = xRealloc (Sets, (language + 1), patternSet);
	for (i = SetUpper + 1  ;  i <= language  ;  ++i)
	{
	    Sets [i].patterns = NULL;
	    Sets [i].count = 0;
	}
	SetUpper = language;
    }
    set = Sets + language;
    set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
    ptrn = &set->patterns [set->count];
    set->count += 1;

    ptrn->pattern = pattern;
    ptrn->type    = PTRN_TAG;
    ptrn->u.tag.name_pattern = name;
    ptrn->u.tag.kind.enabled = TRUE;
    ptrn->u.tag.kind.letter  = kind;
    ptrn->u.tag.kind.name    = kindName;
    ptrn->u.tag.kind.description = description;
}

static void addCompiledCallbackPattern (
	const langType language, regex_t* const pattern,
	const regexCallback callback)
{
    patternSet* set;
    regexPattern *ptrn;
    if (language > SetUpper)
    {
	int i;
	Sets = xRealloc (Sets, (language + 1), patternSet);
	for (i = SetUpper + 1  ;  i <= language  ;  ++i)
	{
	    Sets [i].patterns = NULL;
	    Sets [i].count = 0;
	}
	SetUpper = language;
    }
    set = Sets + language;
    set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
    ptrn = &set->patterns [set->count];
    set->count += 1;

    ptrn->pattern = pattern;
    ptrn->type    = PTRN_CALLBACK;
    ptrn->u.callback.function = callback;
}

#if defined (POSIX_REGEX)

static regex_t* compileRegex (const char* const regexp, const char* const flags)
{
    int cflags = REG_EXTENDED | REG_NEWLINE;
    regex_t *result = NULL;
    int errcode;
    int i;
    for (i = 0  ; flags != NULL  &&  flags [i] != '\0'  ;  ++i)
    {
	switch ((int) flags [i])
	{
	    case 'b': cflags &= ~REG_EXTENDED; break;
	    case 'e': cflags |= REG_EXTENDED;  break;
	    case 'i': cflags |= REG_ICASE;     break;
	    default: error (WARNING, "unknown regex flag: '%c'", *flags); break;
	}
    }
    result = xMalloc (1, regex_t);
    errcode = regcomp (result, regexp, cflags);
    if (errcode != 0)
    {
	char errmsg[256];
	regerror (errcode, result, errmsg, 256);
	error (WARNING, "%s", errmsg);
	regfree (result);
	eFree (result);
	result = NULL;
    }
    return result;
}

#endif

static void parseKinds (
	const char* const kinds, char* const kind, char** const kindName,
	char **description)
{
    *kind = '\0';
    *kindName = NULL;
    *description = NULL;
    if (kinds == NULL  ||  kinds [0] == '\0')
    {
	*kind = 'r';
	*kindName = eStrdup ("regex");
    }
    else if (kinds [0] != '\0')
lregex.c - 源码说明

本页面展示了「ultraEdit的Ctag标签工具的实现源代码」中的 lregex.c 源码文件，采用 C语言编程语言编写，共 697 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ultraEdit相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?