regex.c

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 1,201 行 · 第 1/3 页

C
1,201
字号
                                return(0);
                        break;
                case EOL:
                        if (*reginput != '\0')
                                return(0);
                        break;
                case ANY:
                        if (*reginput == '\0')
                                return(0);
                        reginput++;
                        break;
                case EXACTLY: {
                                register int len;
                                register char *opnd;

                                opnd = OPERAND(scan);
                                /* Inline the first character, for speed. */
                                if (*opnd != *reginput)
                                        return(0);
                                len = strlen(opnd);
                                if (len > 1 && strncmp(opnd, reginput, len) != 0)
                                        return(0);
                                reginput += len;
                        }
                        break;
                case ANYOF:
                        if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
                                return(0);
                        reginput++;
                        break;
                case ANYBUT:
                        if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
                                return(0);
                        reginput++;
                        break;
                case NOTHING:
                        break;
                case BACK:
                        break;
                case OPEN+1:
                case OPEN+2:
                case OPEN+3:
                case OPEN+4:
                case OPEN+5:
                case OPEN+6:
                case OPEN+7:
                case OPEN+8:
                case OPEN+9: {
                                register int no;
                                register char *save;

                                no = OP(scan) - OPEN;
                                save = reginput;

                                if (regmatch(next)) {
                                        /*
                                         * Don't set startp if some later
                                         * invocation of the same parentheses
                                         * already has.
                                         */
                                        if (regstartp[no] == NULL)
                                                regstartp[no] = save;
                                        return(1);
                                } else
                                        return(0);
                        }
                        break;
                case CLOSE+1:
                case CLOSE+2:
                case CLOSE+3:
                case CLOSE+4:
                case CLOSE+5:
                case CLOSE+6:
                case CLOSE+7:
                case CLOSE+8:
                case CLOSE+9: {
                                register int no;
                                register char *save;

                                no = OP(scan) - CLOSE;
                                save = reginput;

                                if (regmatch(next)) {
                                        /*
                                         * Don't set endp if some later
                                         * invocation of the same parentheses
                                         * already has.
                                         */
                                        if (regendp[no] == NULL)
                                                regendp[no] = save;
                                        return(1);
                                } else
                                        return(0);
                        }
                        break;
                case BRANCH: {
                                register char *save;

                                if (OP(next) != BRANCH)         /* No choice. */
                                        next = OPERAND(scan);   /* Avoid recursion. */
                                else {
                                        do {
                                                save = reginput;
                                                if (regmatch(OPERAND(scan)))
                                                        return(1);
                                                reginput = save;
                                                scan = regnext(scan);
                                        } while (scan != NULL && OP(scan) == BRANCH);
                                        return(0);
                                        /* NOTREACHED */
                                }
                        }
                        break;
                case STAR:
                case PLUS: {
                                register char nextch;
                                register int no;
                                register char *save;
                                register int min;

                                /*
                                 * Lookahead to avoid useless match attempts
                                 * when we know what character comes next.
                                 */
                                nextch = '\0';
                                if (OP(next) == EXACTLY)
                                        nextch = *OPERAND(next);
                                min = (OP(scan) == STAR) ? 0 : 1;
                                save = reginput;
                                no = regrepeat(OPERAND(scan));
                                while (no >= min) {
                                        /* If it could work, try it. */
                                        if (nextch == '\0' || *reginput == nextch)
                                                if (regmatch(next))
                                                        return(1);
                                        /* Couldn't or didn't -- back up. */
                                        no--;
                                        reginput = save + no;
                                }
                                return(0);
                        }
                        break;
                case END:
                        return(1);      /* Success! */
                        break;
                default:
                        regerror("memory corruption");
                        return(0);
                        break;
                }

                scan = next;
        }

        /*
         * We get here only if there's trouble -- normally "case END" is
         * the terminating point.
         */
        regerror("corrupted pointers");
        return(0);
}

/*
 - regrepeat - repeatedly match something simple, report how many
 */
static int
regrepeat(p)
char *p;
{
        char *strchr();
        register int count = 0;
        register char *scan;
        register char *opnd;

        scan = reginput;
        opnd = OPERAND(p);
        switch (OP(p)) {
        case ANY:
                count = strlen(scan);
                scan += count;
                break;
        case EXACTLY:
                while (*opnd == *scan) {
                        count++;
                        scan++;
                }
                break;
        case ANYOF:
                while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
                        count++;
                        scan++;
                }
                break;
        case ANYBUT:
                while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
                        count++;
                        scan++;
                }
                break;
        default:                /* Oh dear.  Called inappropriately. */
                regerror("internal foulup");
                count = 0;      /* Best compromise. */
                break;
        }
        reginput = scan;

        return(count);
}

/*
 - regnext - dig the "next" pointer out of a node
 */
static char *
regnext(p)
register char *p;
{
        register int offset;

        if (p == &regdummy)
                return(NULL);

        offset = NEXT(p);
        if (offset == 0)
                return(NULL);

        if (OP(p) == BACK)
                return(p-offset);
        else
                return(p+offset);
}

#ifdef DEBUG

STATIC char *regprop();

/*
 - regdump - dump a regexp onto stdout in vaguely comprehensible form
 */
void
regdump(r)
regexp *r;
{
        register char *s;
        register char op = EXACTLY;     /* Arbitrary non-END op. */
        register char *next;
        extern char *strchr();


        s = r->program + 1;
        while (op != END) {     /* While that wasn't END last time... */
                op = OP(s);
                printf("%2d%s", s-r->program, regprop(s));      /* Where, what. */
                next = regnext(s);
                if (next == NULL)               /* Next ptr. */
                        printf("(0)");
                else 
                        printf("(%d)", (s-r->program)+(next-s));
                s += 3;
                if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
                        /* Literal string, where present. */
                        while (*s != '\0') {
                                putchar(*s);
                                s++;
                        }
                        s++;
                }
                putchar('\n');
        }

        /* Header fields of interest. */
        if (r->regstart != '\0')
                printf("start `%c' ", r->regstart);
        if (r->reganch)
                printf("anchored ");
        if (r->regmust != NULL)
                printf("must have \"%s\"", r->regmust);
        printf("\n");
}

/*
 - regprop - printable representation of opcode
 */
static char *
regprop(op)
char *op;
{
        register char *p;
        static char buf[50];

        (void) strcpy(buf, ":");

        switch (OP(op)) {
        case BOL:
                p = "BOL";
                break;
        case EOL:
                p = "EOL";
                break;
        case ANY:
                p = "ANY";
                break;
        case ANYOF:
                p = "ANYOF";
                break;
        case ANYBUT:
                p = "ANYBUT";
                break;
        case BRANCH:
                p = "BRANCH";
                break;
        case EXACTLY:
                p = "EXACTLY";
                break;
        case NOTHING:
                p = "NOTHING";
                break;
        case BACK:
                p = "BACK";
                break;
        case END:
                p = "END";
                break;
        case OPEN+1:
        case OPEN+2:
        case OPEN+3:
        case OPEN+4:
        case OPEN+5:
        case OPEN+6:
        case OPEN+7:
        case OPEN+8:
        case OPEN+9:
                sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
                p = NULL;
                break;
        case CLOSE+1:
        case CLOSE+2:
        case CLOSE+3:
        case CLOSE+4:
        case CLOSE+5:
        case CLOSE+6:
        case CLOSE+7:
        case CLOSE+8:
        case CLOSE+9:
                sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
                p = NULL;
                break;
        case STAR:
                p = "STAR";
                break;
        case PLUS:
                p = "PLUS";
                break;
        default:
                regerror("corrupted opcode");
                break;
        }
        if (p != NULL)
                (void) strcat(buf, p);
        return(buf);
}
#endif

/*
 * The following is provided for those people who do not have strcspn() in
 * their C libraries.  They should get off their butts and do something
 * about it; at least one public-domain implementation of those (highly
 * useful) string routines has been published on Usenet.
 */
#ifdef STRCSPN
/*
 * strcspn - find length of initial segment of s1 consisting entirely
 * of characters not from s2
 */

static int
strcspn(s1, s2)
char *s1;
char *s2;
{
        register char *scan1;
        register char *scan2;
        register int count;

        count = 0;
        for (scan1 = s1; *scan1 != '\0'; scan1++) {
                for (scan2 = s2; *scan2 != '\0';)       /* ++ moved down. */
                        if (*scan1 == *scan2++)
                                return(count);
                count++;
        }
        return(count);
}
#endif

void regerror(char *s)
{
    fprintf(stderr, "regerror: %s\n", s);
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?