📄 regc_locale.c
字号:
- before - is celt x before celt y, for purposes of range legality? ^ static int before(celt, celt); */static int /* predicate */before(x, y) celt x, y; /* collating elements */{ /* trivial because no MCCEs */ if (x < y) { return 1; } return 0;}/* - eclass - supply cvec for an equivalence class * Must include case counterparts on request. ^ static struct cvec *eclass(struct vars *, celt, int); */static struct cvec *eclass(v, c, cases) struct vars *v; /* context */ celt c; /* Collating element representing * the equivalence class. */ int cases; /* all cases? */{ struct cvec *cv; /* crude fake equivalence class for testing */ if ((v->cflags®_FAKE) && c == 'x') { cv = getcvec(v, 4, 0, 0); addchr(cv, (chr)'x'); addchr(cv, (chr)'y'); if (cases) { addchr(cv, (chr)'X'); addchr(cv, (chr)'Y'); } return cv; } /* otherwise, none */ if (cases) { return allcases(v, c); } cv = getcvec(v, 1, 0, 0); assert(cv != NULL); addchr(cv, (chr)c); return cv;}/* - cclass - supply cvec for a character class * Must include case counterparts on request. ^ static struct cvec *cclass(struct vars *, chr *, chr *, int); */static struct cvec *cclass(v, startp, endp, cases) struct vars *v; /* context */ chr *startp; /* where the name starts */ chr *endp; /* just past the end of the name */ int cases; /* case-independent? */{ size_t len; struct cvec *cv = NULL; Tcl_DString ds; CONST char *np; char **namePtr; int i, index; /* * The following arrays define the valid character class names. */ static char *classNames[] = { "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph", "lower", "print", "punct", "space", "upper", "xdigit", NULL }; enum classes { CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT }; /* * Extract the class name */ len = endp - startp; Tcl_DStringInit(&ds); np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); /* * Remap lower and upper to alpha if the match is case insensitive. */ if (cases && len == 5 && (strncmp("lower", np, 5) == 0 || strncmp("upper", np, 5) == 0)) { np = "alpha"; } /* * Map the name to the corresponding enumerated value. */ index = -1; for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) { if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) { index = i; break; } } Tcl_DStringInit(&ds); if (index == -1) { ERR(REG_ECTYPE); return NULL; } /* * Now compute the character class contents. */ switch((enum classes) index) { case CC_PRINT: case CC_ALNUM: cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0); if (cv) { for (i=0 ; i<NUM_ALPHA_CHAR ; i++) { addchr(cv, alphaCharTable[i]); } for (i=0 ; i<NUM_ALPHA_RANGE ; i++) { addrange(cv, alphaRangeTable[i].start, alphaRangeTable[i].end); } for (i=0 ; i<NUM_DIGIT_RANGE ; i++) { addrange(cv, digitRangeTable[i].start, digitRangeTable[i].end); } } break; case CC_ALPHA: cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0); if (cv) { for (i=0 ; i<NUM_ALPHA_RANGE ; i++) { addrange(cv, alphaRangeTable[i].start, alphaRangeTable[i].end); } for (i=0 ; i<NUM_ALPHA_CHAR ; i++) { addchr(cv, alphaCharTable[i]); } } break; case CC_ASCII: cv = getcvec(v, 0, 1, 0); if (cv) { addrange(cv, 0, 0x7f); } break; case CC_BLANK: cv = getcvec(v, 2, 0, 0); addchr(cv, '\t'); addchr(cv, ' '); break; case CC_CNTRL: cv = getcvec(v, 0, 2, 0); addrange(cv, 0x0, 0x1f); addrange(cv, 0x7f, 0x9f); break; case CC_DIGIT: cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0); if (cv) { for (i=0 ; i<NUM_DIGIT_RANGE ; i++) { addrange(cv, digitRangeTable[i].start, digitRangeTable[i].end); } } break; case CC_PUNCT: cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0); if (cv) { for (i=0 ; i<NUM_PUNCT_RANGE ; i++) { addrange(cv, punctRangeTable[i].start, punctRangeTable[i].end); } for (i=0 ; i<NUM_PUNCT_CHAR ; i++) { addchr(cv, punctCharTable[i]); } } break; case CC_XDIGIT: /* * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no * idea how to define the digits 'a' through 'f' in * non-western locales. The concept is quite possibly non * portable, or only used in contextx where the characters * used would be the western ones anyway! Whatever is * actually the case, the number of ranges is fixed (until * someone comes up with a better arrangement!) */ cv = getcvec(v, 0, 3, 0); if (cv) { addrange(cv, '0', '9'); addrange(cv, 'a', 'f'); addrange(cv, 'A', 'F'); } break; case CC_SPACE: cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0); if (cv) { for (i=0 ; i<NUM_SPACE_RANGE ; i++) { addrange(cv, spaceRangeTable[i].start, spaceRangeTable[i].end); } for (i=0 ; i<NUM_SPACE_CHAR ; i++) { addchr(cv, spaceCharTable[i]); } } break; case CC_LOWER: cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0); if (cv) { for (i=0 ; i<NUM_LOWER_RANGE ; i++) { addrange(cv, lowerRangeTable[i].start, lowerRangeTable[i].end); } for (i=0 ; i<NUM_LOWER_CHAR ; i++) { addchr(cv, lowerCharTable[i]); } } break; case CC_UPPER: cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0); if (cv) { for (i=0 ; i<NUM_UPPER_RANGE ; i++) { addrange(cv, upperRangeTable[i].start, upperRangeTable[i].end); } for (i=0 ; i<NUM_UPPER_CHAR ; i++) { addchr(cv, upperCharTable[i]); } } break; case CC_GRAPH: cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0); if (cv) { for (i=0 ; i<NUM_GRAPH_RANGE ; i++) { addrange(cv, graphRangeTable[i].start, graphRangeTable[i].end); } for (i=0 ; i<NUM_GRAPH_CHAR ; i++) { addchr(cv, graphCharTable[i]); } } break; } if (cv == NULL) { ERR(REG_ESPACE); } return cv;}/* - allcases - supply cvec for all case counterparts of a chr (including itself) * This is a shortcut, preferably an efficient one, for simple characters; * messy cases are done via range(). ^ static struct cvec *allcases(struct vars *, pchr); */static struct cvec *allcases(v, pc) struct vars *v; /* context */ pchr pc; /* character to get case equivs of */{ struct cvec *cv; chr c = (chr)pc; chr lc, uc, tc; lc = Tcl_UniCharToLower((chr)c); uc = Tcl_UniCharToUpper((chr)c); tc = Tcl_UniCharToTitle((chr)c); if (tc != uc) { cv = getcvec(v, 3, 0, 0); addchr(cv, tc); } else { cv = getcvec(v, 2, 0, 0); } addchr(cv, lc); if (lc != uc) { addchr(cv, uc); } return cv;}/* - cmp - chr-substring compare * Backrefs need this. It should preferably be efficient. * Note that it does not need to report anything except equal/unequal. * Note also that the length is exact, and the comparison should not * stop at embedded NULs! ^ static int cmp(CONST chr *, CONST chr *, size_t); */static int /* 0 for equal, nonzero for unequal */cmp(x, y, len) CONST chr *x, *y; /* strings to compare */ size_t len; /* exact length of comparison */{ return memcmp(VS(x), VS(y), len*sizeof(chr));}/* - casecmp - case-independent chr-substring compare * REG_ICASE backrefs need this. It should preferably be efficient. * Note that it does not need to report anything except equal/unequal. * Note also that the length is exact, and the comparison should not * stop at embedded NULs! ^ static int casecmp(CONST chr *, CONST chr *, size_t); */static int /* 0 for equal, nonzero for unequal */casecmp(x, y, len) CONST chr *x, *y; /* strings to compare */ size_t len; /* exact length of comparison */{ for (; len > 0; len--, x++, y++) { if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) { return 1; } } return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -