regc_locale.c

来自「A*算法 A*算法 A*算法 A*算法A*算法A*算法」· C语言 代码 · 共 1,179 行 · 第 1/4 页

C
1,179
字号
	    index = i;
	    break;
	}
    }
    if (index == -1) {
	ERR(REG_ECTYPE);
	return NULL;
    }
    
    /*
     * Now compute the character class contents.
     */

    switch((enum classes) index) {
    case CC_PRINT:
    case CC_ALNUM:
	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0);
	if (cv) {
	    for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
		addchr(cv, alphaCharTable[i]);
	    }
	    for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
		addrange(cv, alphaRangeTable[i].start,
			alphaRangeTable[i].end);
	    }
	    for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
		addrange(cv, digitRangeTable[i].start,
			digitRangeTable[i].end);
	    }
	}
	break;
    case CC_ALPHA:
	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0);
	if (cv) {
	    for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
		addrange(cv, alphaRangeTable[i].start,
			alphaRangeTable[i].end);
	    }
	    for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
		addchr(cv, alphaCharTable[i]);
	    }
	}
	break;
    case CC_ASCII:
	cv = getcvec(v, 0, 1, 0);
	if (cv) {
	    addrange(cv, 0, 0x7f);
	}
	break;
    case CC_BLANK:
	cv = getcvec(v, 2, 0, 0);
	addchr(cv, '\t');
	addchr(cv, ' ');
	break;
    case CC_CNTRL:
	cv = getcvec(v, 0, 2, 0);
	addrange(cv, 0x0, 0x1f);
	addrange(cv, 0x7f, 0x9f);
	break;
    case CC_DIGIT:
	cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0);
	if (cv) {	
	    for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
		addrange(cv, digitRangeTable[i].start,
			digitRangeTable[i].end);
	    }
	}
	break;
    case CC_PUNCT:
	cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0);
	if (cv) {
	    for (i=0 ; i<NUM_PUNCT_RANGE ; i++) {
		addrange(cv, punctRangeTable[i].start,
			punctRangeTable[i].end);
	    }
	    for (i=0 ; i<NUM_PUNCT_CHAR ; i++) {
		addchr(cv, punctCharTable[i]);
	    }
	}
	break;
    case CC_XDIGIT:
	/*
	 * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no
	 * idea how to define the digits 'a' through 'f' in
	 * non-western locales.  The concept is quite possibly non
	 * portable, or only used in contextx where the characters
	 * used would be the western ones anyway!  Whatever is
	 * actually the case, the number of ranges is fixed (until
	 * someone comes up with a better arrangement!)
	 */
	cv = getcvec(v, 0, 3, 0);
	if (cv) {	
	    addrange(cv, '0', '9');
	    addrange(cv, 'a', 'f');
	    addrange(cv, 'A', 'F');
	}
	break;
    case CC_SPACE:
	cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0);
	if (cv) {
	    for (i=0 ; i<NUM_SPACE_RANGE ; i++) {
		addrange(cv, spaceRangeTable[i].start,
			spaceRangeTable[i].end);
	    }
	    for (i=0 ; i<NUM_SPACE_CHAR ; i++) {
		addchr(cv, spaceCharTable[i]);
	    }
	}
	break;
    case CC_LOWER:
	cv  = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0);
	if (cv) {
	    for (i=0 ; i<NUM_LOWER_RANGE ; i++) {
		addrange(cv, lowerRangeTable[i].start,
			lowerRangeTable[i].end);
	    }
	    for (i=0 ; i<NUM_LOWER_CHAR ; i++) {
		addchr(cv, lowerCharTable[i]);
	    }
	}
	break;
    case CC_UPPER:
	cv  = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0);
	if (cv) {
	    for (i=0 ; i<NUM_UPPER_RANGE ; i++) {
		addrange(cv, upperRangeTable[i].start,
			upperRangeTable[i].end);
	    }
	    for (i=0 ; i<NUM_UPPER_CHAR ; i++) {
		addchr(cv, upperCharTable[i]);
	    }
	}
	break;
    case CC_GRAPH:
	cv  = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0);
	if (cv) {
	    for (i=0 ; i<NUM_GRAPH_RANGE ; i++) {
		addrange(cv, graphRangeTable[i].start,
			graphRangeTable[i].end);
	    }
	    for (i=0 ; i<NUM_GRAPH_CHAR ; i++) {
		addchr(cv, graphCharTable[i]);
	    }
	}
	break;
    }
    if (cv == NULL) {
	ERR(REG_ESPACE);
    }
    return cv;
}

#else   /* wxUSE_UNICODE */

static struct cvec *
cclass(v, startp, endp, cases)
struct vars *v;
chr *startp;                    /* where the name starts */
chr *endp;                      /* just past the end of the name */
int cases;                      /* case-independent? */
{
    size_t len;
    char *p;
    struct cclass *cc;
    struct cvec *cv;
    chr *np;
    int i;
    int count;
    char buf[256];
    const char *loc;

    /* find the name */
    len = endp - startp;
    np = startp;
    if (cases && len == 5 && (wxStrncmp(_T("lower"), np, 5) == 0 ||
                                    wxStrncmp(_T("upper"), np, 5) == 0))
            np = _T("alpha");
    for (cc = cclasses; cc->name != NULL; cc++)
            if (wxStrlen_(cc->name) == len && wxStrncmp(cc->name, np, len) == 0)
                    break;          /* NOTE BREAK OUT */
    if (cc->name == NULL) {
            ERR(REG_ECTYPE);
            return NULL;
    }

    loc = setlocale(LC_CTYPE, NULL);

    if (!cc->isfunc || loc == NULL || strcmp(loc, "C") == 0)
    {
        /* set up vector */
        cv = getcvec(v, (int)strlen(cc->chars), 0, 0);
        if (cv == NULL) {
                ERR(REG_ESPACE);
                return NULL;
        }

        /* fill it in */
        for (p = cc->chars; *p != '\0'; p++)
                addchr(cv, (chr)*p);
    }
    else
    {
        count = 0;
        for (i = 0; i < 256; i++)
            if (cc->isfunc(i))
                buf[count++] = i;

        /* set up vector */
        cv = getcvec(v, count, 0, 0);
        if (cv == NULL) {
            ERR(REG_ESPACE);
            return NULL;
        }

        /* fill it in */
        for (i = 0; i < count; i++)
            addchr(cv, buf[i]);
    }

    return cv;
}

#endif  /* !wxUSE_UNICODE */


/*
 - allcases - supply cvec for all case counterparts of a chr (including itself)
 * This is a shortcut, preferably an efficient one, for simple characters;
 * messy cases are done via range().
 ^ static struct cvec *allcases(struct vars *, pchr);
 */
static struct cvec *
allcases(v, pc)
    struct vars *v;			/* context */
    pchr pc;				/* character to get case equivs of */
{
    struct cvec *cv;
    chr c = (chr)pc;
    chr lc, uc, tc;

    lc = Tcl_UniCharToLower((chr)c);
    uc = Tcl_UniCharToUpper((chr)c);
    tc = Tcl_UniCharToTitle((chr)c);

    if (tc != uc) {
	cv = getcvec(v, 3, 0, 0);
	addchr(cv, tc);
    } else {
	cv = getcvec(v, 2, 0, 0);
    }
    addchr(cv, lc);
    if (lc != uc) {
	addchr(cv, uc);
    }
    return cv;
}

/*
 - cmp - chr-substring compare
 * Backrefs need this.  It should preferably be efficient.
 * Note that it does not need to report anything except equal/unequal.
 * Note also that the length is exact, and the comparison should not
 * stop at embedded NULs!
 ^ static int cmp(CONST chr *, CONST chr *, size_t);
 */
static int				/* 0 for equal, nonzero for unequal */
cmp(x, y, len)
    CONST chr *x, *y;			/* strings to compare */
    size_t len;				/* exact length of comparison */
{
    return memcmp(VS(x), VS(y), len*sizeof(chr));
}

/*
 - casecmp - case-independent chr-substring compare
 * REG_ICASE backrefs need this.  It should preferably be efficient.
 * Note that it does not need to report anything except equal/unequal.
 * Note also that the length is exact, and the comparison should not
 * stop at embedded NULs!
 ^ static int casecmp(CONST chr *, CONST chr *, size_t);
 */
static int				/* 0 for equal, nonzero for unequal */
casecmp(x, y, len)
    CONST chr *x, *y;			/* strings to compare */
    size_t len;				/* exact length of comparison */
{
    for (; len > 0; len--, x++, y++) {
	if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
	    return 1;
	}
    }
    return 0;
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?