📄 slregexp.c
字号:
/* ed style regular expressions *//* Copyright (c) 1992, 1999, 2001, 2002, 2003 John E. Davis * * You may distribute under the terms of either the GNU General Public * License or the Perl Artistic License. */#include "slinclud.h"#include "slang.h"#include "_slang.h"#define SET_BIT(b, n) b[(unsigned int) (n) >> 3] |= 1 << ((unsigned int) (n) % 8)#define TEST_BIT(b, n) (b[(unsigned int)(n) >> 3] & (1 << ((unsigned int) (n) % 8)))#define LITERAL 1#define RANGE 2 /* [...] */#define ANY 3 /* . */#define BOL 4 /* ^ */#define EOL 5 /* $ */#define NTH_MATCH 6 /* \1 \2 ... \9 */#define OPAREN 7 /* \( */#define CPAREN 0x8 /* \) */#define ANY_DIGIT 0x9 /* \d */#define BOW 0xA /* \< */#define EOW 0xB /* \> */#if 0#define NOT_LITERAL 0xC /* \~ */#endif#define STAR 0x80 /* * */#define LEAST_ONCE 0x40 /* + */#define MAYBE_ONCE 0x20 /* ? */#define MANY 0x10 /* {n,m} *//* The rest are additions */#define YES_CASE (STAR | BOL)#define NO_CASE (STAR | EOL)#define UPPERCASE(x) (cs ? (x) : UPPER_CASE(x))#define LOWERCASE(x) (cs ? (x) : LOWER_CASE(x))static unsigned char Word_Chars[256];#define IS_WORD_CHAR(x) Word_Chars[(unsigned int) (x)]#if 0static int ctx->open_paren_number;static char Closed_Paren_Matches[10];static SLRegexp_Type *This_Reg;static unsigned char *This_Str;#endiftypedef struct{ SLRegexp_Type *reg; unsigned char *str; unsigned int len; char closed_paren_matches[10]; int open_paren_number;}Re_Context_Type;static unsigned char *do_nth_match (Re_Context_Type *ctx, int n, unsigned char *str, unsigned char *estr){ unsigned char *bpos; if (ctx->closed_paren_matches[n] == 0) return NULL; bpos = ctx->reg->beg_matches[n] + ctx->str; n = ctx->reg->end_matches[n]; if (n == 0) return(str); if (n > (int) (estr - str)) return (NULL); /* This needs fixed for case sensitive match */ if (0 != strncmp((char *) str, (char *) bpos, (unsigned int) n)) return (NULL); str += n; return (str);}/* returns pointer to the end of regexp or NULL */static unsigned char *regexp_looking_at (Re_Context_Type *ctx, register unsigned char *str, unsigned char *estr, unsigned char *buf, register int cs){ register unsigned char p, p1; unsigned char *save_str, *tmpstr; int n, n0, n1; int save_num_open; char save_closed_matches[10]; p = *buf++; while (p != 0) { /* p1 = UPPERCASE(*buf); */ /* if (str < estr) c = UPPERCASE(*str); */ switch((unsigned char) p) { case BOW: if ((str != ctx->str) && ((str >= estr) || IS_WORD_CHAR(*(str - 1)) || (0 == IS_WORD_CHAR(*str)))) return NULL; break; case EOW: if ((str < estr) && IS_WORD_CHAR (*str)) return NULL; break; case YES_CASE: cs = 1; break; case NO_CASE: cs = 0; break; case OPAREN: ctx->open_paren_number++; ctx->reg->beg_matches[ctx->open_paren_number] = (int) (str - ctx->str); break; case CPAREN: n = ctx->open_paren_number; while (n > 0) { if (ctx->closed_paren_matches[n] != 0) { n--; continue; } ctx->closed_paren_matches[n] = 1; ctx->reg->end_matches[n] = (unsigned int) (str - (ctx->str + ctx->reg->beg_matches[n])); break; } break;#ifdef NOT_LITERAL case NOT_LITERAL: if ((str >= estr) || (*buf == UPPERCASE(*str))) return (NULL); str++; buf++; break; case MAYBE_ONCE | NOT_LITERAL: save_str = str; if ((str < estr) && (*buf != UPPERCASE(*str))) str++; buf++; goto match_rest; case NOT_LITERAL | LEAST_ONCE: /* match at least once */ if ((str >= estr) || (UPPERCASE(*str) == UPPERCASE(*buf))) return (NULL); str++; /* drop */ case STAR | NOT_LITERAL: save_str = str; p1 = *buf; while ((str < estr) && (UPPERCASE(*str) != p1)) str++; buf++; goto match_rest; /* this type consists of the expression + two bytes that determine number of matches to perform */ case MANY | NOT_LITERAL: p1 = *buf; buf++; n = n0 = (int) (unsigned char) *buf++; /* minimum number to match--- could be 0 */ n1 = (int) (unsigned char) *buf++; /* maximum number to match */ while (n && (str < estr) && (p1 != UPPERCASE(*str))) { n--; str++; } if (n) return (NULL); save_str = str; n = n1 - n0; while (n && (str < estr) && (p1 != UPPERCASE(*str))) { n--; str++; } goto match_rest;#endif /* NOT_LITERAL */ case LITERAL: if ((str >= estr) || (*buf != UPPERCASE(*str))) return (NULL); str++; buf++; break; case MAYBE_ONCE | LITERAL: save_str = str; if ((str < estr) && (*buf == UPPERCASE(*str))) str++; buf++; goto match_rest; case LITERAL | LEAST_ONCE: /* match at least once */ if ((str >= estr) || (UPPERCASE(*str) != UPPERCASE(*buf))) return (NULL); str++; /* drop */ case STAR | LITERAL: save_str = str; p1 = *buf; while ((str < estr) && (UPPERCASE(*str) == p1)) str++; buf++; goto match_rest; /* this type consists of the expression + two bytes that determine number of matches to perform */ case MANY | LITERAL: p1 = *buf; buf++; n = n0 = (int) (unsigned char) *buf++; /* minimum number to match--- could be 0 */ n1 = (int) (unsigned char) *buf++; /* maximum number to match */ while (n && (str < estr) && (p1 == UPPERCASE(*str))) { n--; str++; } if (n) return (NULL); save_str = str; n = n1 - n0; while (n && (str < estr) && (p1 == UPPERCASE(*str))) { n--; str++; } goto match_rest; case NTH_MATCH: if ((str = do_nth_match(ctx, (int) (unsigned char) *buf, str, estr)) == NULL) return(NULL); buf++; break; case MAYBE_ONCE | NTH_MATCH: save_str = str; tmpstr = do_nth_match (ctx, (int) (unsigned char) *buf, str, estr); buf++; if (tmpstr != NULL) { str = tmpstr; goto match_rest; } continue; case LEAST_ONCE | NTH_MATCH: if ((str = do_nth_match(ctx, (int) (unsigned char) *buf, str, estr)) == NULL) return(NULL); /* drop */ case STAR | NTH_MATCH: save_str = str; while (NULL != (tmpstr = do_nth_match(ctx, (int) (unsigned char) *buf, str, estr))) { str = tmpstr; } buf++; goto match_rest; case MANY | NTH_MATCH: return(NULL); /* needs done */ case RANGE: if (str >= estr) return (NULL); if (TEST_BIT(buf, UPPERCASE(*str)) == 0) return (NULL); buf += 32; str++; break; case MAYBE_ONCE | RANGE: save_str = str; if ((str < estr) && TEST_BIT(buf, UPPERCASE(*str))) str++; buf += 32; goto match_rest; case LEAST_ONCE | RANGE: if ((str >= estr) || (0 == TEST_BIT(buf, UPPERCASE(*str)))) return NULL; str++; /* drop */ case STAR | RANGE: save_str = str; while ((str < estr) && TEST_BIT(buf, UPPERCASE(*str))) str++; buf += 32; goto match_rest; /* The first 32 bytes correspond to the range and the two * following bytes indicate the min and max number of matches. */ case MANY | RANGE: /* minimum number to match--- could be 0 */ n = n0 = (int) (unsigned char) *(buf + 32); /* maximum number to match */ n1 = (int) (unsigned char) *(buf + 33); while (n && (str < estr) && (TEST_BIT(buf, UPPERCASE(*str)))) { n--; str++; } if (n) return (NULL); save_str = str; n = n1 - n0; while (n && (str < estr) && (TEST_BIT(buf, UPPERCASE(*str)))) { n--; str++; } buf += 34; /* 32 + 2 */ goto match_rest; case ANY_DIGIT: if ((str >= estr) || (*str > '9') || (*str < '0')) return (NULL); str++; break; case MAYBE_ONCE | ANY_DIGIT: save_str = str; if ((str < estr) && ((*str > '9') || (*str < '0'))) str++; goto match_rest; case LEAST_ONCE | ANY_DIGIT: if ((str >= estr) || ((*str > '9') || (*str < '0'))) return NULL; str++; /* drop */ case STAR | ANY_DIGIT: save_str = str; while ((str < estr) && ((*str <= '9') && (*str >= '0'))) str++; goto match_rest; case MANY | ANY_DIGIT: /* needs finished */ return (NULL); case ANY: if ((str >= estr) || (*str == '\n')) return (NULL); str++; break; case MAYBE_ONCE | ANY: save_str = str; if ((str < estr) && (*str != '\n')) str++; goto match_rest; case LEAST_ONCE | ANY: if ((str >= estr) || (*str == '\n')) return (NULL); str++; /* drop */ case STAR | ANY: save_str = str; while ((str < estr) && (*str != '\n')) str++; goto match_rest; case MANY | ANY: return (NULL); /* needs finished */ case EOL: if ((str >= estr) || (*str == '\n')) return (str); return(NULL); default: return (NULL); } p = *buf++; continue; match_rest: if (save_str == str) { p = *buf++; continue; } /* if (p == EOL) * { * if (str < estr) return (NULL); else return (str); * } */ SLMEMCPY(save_closed_matches, ctx->closed_paren_matches, sizeof(save_closed_matches)); save_num_open = ctx->open_paren_number; while (str >= save_str) { tmpstr = regexp_looking_at (ctx, str, estr, buf, cs); if (tmpstr != NULL) return(tmpstr); SLMEMCPY(ctx->closed_paren_matches, save_closed_matches, sizeof(ctx->closed_paren_matches)); ctx->open_paren_number = save_num_open; str--; } return NULL; } if ((p != 0) && (p != EOL)) return (NULL); else return (str);}static voidfixup_beg_end_matches (Re_Context_Type *ctx, SLRegexp_Type *r, unsigned char *str, unsigned char *epos){ int i; if (str == NULL) { r->beg_matches[0] = -1; r->end_matches[0] = 0; SLMEMSET(ctx->closed_paren_matches, 0, sizeof(ctx->closed_paren_matches)); } else { r->beg_matches[0] = (int) (str - ctx->str); r->end_matches[0] = (unsigned int) (epos - str); } for (i = 1; i < 10; i++) { if (ctx->closed_paren_matches [i] == 0) { r->beg_matches[i] = -1; r->end_matches[i] = 0; } }}static void init_re_context (Re_Context_Type *ctx, SLRegexp_Type *reg, unsigned char *str, unsigned int len){ memset ((char *) ctx, 0, sizeof (Re_Context_Type)); ctx->reg = reg; ctx->str = str; ctx->len = len;}unsigned char *SLang_regexp_match(unsigned char *str, unsigned int len, SLRegexp_Type *reg){ register unsigned char c = 0, *estr = str + len; int cs = reg->case_sensitive, lit = 0; unsigned char *buf = reg->buf, *epos = NULL; Re_Context_Type ctx_buf; if (reg->min_length > len) return NULL; init_re_context (&ctx_buf, reg, str, len); if (*buf == BOL) { if (NULL == (epos = regexp_looking_at (&ctx_buf, str, estr, buf + 1, cs))) str = NULL; fixup_beg_end_matches (&ctx_buf, reg, str, epos); return str; } if (*buf == NO_CASE) { buf++; cs = 0; } if (*buf == YES_CASE) { buf++; cs = 1; } if (*buf == LITERAL) { lit = 1; c = *(buf + 1); } else if ((*buf == OPAREN) && (*(buf + 1) == LITERAL)) { lit = 1; c = *(buf + 2); } while (1) { ctx_buf.open_paren_number = 0; memset (ctx_buf.closed_paren_matches, 0, sizeof(ctx_buf.closed_paren_matches)); /* take care of leading chars */ if (lit) { while ((str < estr) && (c != UPPERCASE(*str))) str++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -