⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexp.c

📁 CMX990 demonstration board (DE9901)
💻 C
📖 第 1 页 / 共 3 页
字号:
  int flags;
  
  ret = regatom(&flags);
  if (ret == NULL)
    return(NULL);
  
  op = *regparse;
  if (!ISMULT(op)) {
    *flagp = flags;
    return(ret);
  }
  
  if (!(flags&HASWIDTH) && op != '?')
    FAIL("*+ operand could be empty");
  *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
  
  if (op == '*' && (flags&SIMPLE))
    reginsert(STAR, ret);
  else if (op == '*') {
    /* Emit x* as (x&|), where & means "self". */
    reginsert(BRANCH, ret);			/* Either x */
    regoptail(ret, regnode(BACK));		/* and loop */
    regoptail(ret, ret);			/* back */
    regtail(ret, regnode(BRANCH));		/* or */
    regtail(ret, regnode(NOTHING));		/* null. */
  } else if (op == '+' && (flags&SIMPLE))
    reginsert(PLUS, ret);
  else if (op == '+') {
    /* Emit x+ as x(&|), where & means "self". */
    next = regnode(BRANCH);			/* Either */
    regtail(ret, next);
    regtail(regnode(BACK), ret);		/* loop back */
    regtail(next, regnode(BRANCH));		/* or */
    regtail(ret, regnode(NOTHING));		/* null. */
  } else if (op == '?') {
    /* Emit x? as (x|) */
    reginsert(BRANCH, ret);			/* Either x */
    regtail(ret, regnode(BRANCH));		/* or */
    next = regnode(NOTHING);		/* null. */
    regtail(ret, next);
    regoptail(ret, next);
  }
  regparse++;
  if (ISMULT(*regparse))
    FAIL("nested *?+");
  
  return(ret);
}

/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static char *regatom(int *flagp) {
  register char *ret;
  int flags;

  *flagp = WORST;		/* Tentatively. */
  
  switch (*regparse++) {
  case '^':
    ret = regnode(BOL);
    break;
  case '$':
    ret = regnode(EOL);
    break;
  case '.':
    ret = regnode(ANY);
    *flagp |= HASWIDTH|SIMPLE;
    break;
  case '[': {
    register int clss;
    register int classend;
    
    if (*regparse == '^') {	/* Complement of range. */
      ret = regnode(ANYBUT);
      regparse++;
    } else
      ret = regnode(ANYOF);
    if (*regparse == ']' || *regparse == '-')
      regc(*regparse++);
    while (*regparse != '\0' && *regparse != ']') {
      if (*regparse == '-') {
        regparse++;
        if (*regparse == ']' || *regparse == '\0')
          regc('-');
        else {
          clss = UCHARAT(regparse-2)+1;
          classend = UCHARAT(regparse);
          if (clss > classend+1)
            FAIL("invalid [] range");
          for (; clss <= classend; clss++)
            regc(clss);
          regparse++;
        }
      } else
        regc(*regparse++);
    }
    regc('\0');
    if (*regparse != ']')
      FAIL("unmatched []");
    regparse++;
    *flagp |= HASWIDTH|SIMPLE;
  }
    break;
  case '(':
    ret = reg(1, &flags);
    if (ret == NULL)
      return(NULL);
    *flagp |= flags&(HASWIDTH|SPSTART);
    break;
  case '\0':
  case '|':
  case ')':
    FAIL("internal urp");	/* Supposed to be caught earlier. */
    /* NOTREACHED */
    break;
  case '?':
  case '+':
  case '*':
    FAIL("?+* follows nothing");
    /* NOTREACHED */
    break;
  case '\\':
    if (*regparse == '\0')
      FAIL("trailing \\");
    ret = regnode(EXACTLY);
    regc(*regparse++);
    regc('\0');
    *flagp |= HASWIDTH|SIMPLE;
    break;
  default: {
    register int len;
    register char ender;
    
    regparse--;
    len = strcspn(regparse, META);
    if (len <= 0)
      FAIL("internal disaster");
    ender = *(regparse+len);
    if (len > 1 && ISMULT(ender))
      len--;		/* Back off clear of ?+* operand. */
    *flagp |= HASWIDTH;
    if (len == 1)
      *flagp |= SIMPLE;
    ret = regnode(EXACTLY);
    while (len > 0) {
      regc(*regparse++);
      len--;
    }
    regc('\0');
  }
    break;
  }
  
  return(ret);
}

/*
 - regnode - emit a node
 */
static char *regnode(char op) {
  register char *ret;
  register char *ptr;
  
  ret = regcode;
  if (ret == &regdummy) {
    regsize += 3;
    return(ret);
  }
  
  ptr = ret;
  *ptr++ = op;
  *ptr++ = '\0';		/* Null "next" pointer. */
  *ptr++ = '\0';
  regcode = ptr;
  
  return(ret);
}

/*
 - regc - emit (if appropriate) a byte of code
 */
static void regc(char b) {
  if (regcode != &regdummy)
    *regcode++ = b;
  else
    regsize++;
}

/*
 - reginsert - insert an operator in front of already-emitted operand
 *
 * Means relocating the operand.
 */
static void reginsert(char op, char *opnd) {
  register char *src;
  register char *dst;
  register char *place;
  
  if (regcode == &regdummy) {
    regsize += 3;
    return;
  }
  
  src = regcode;
  regcode += 3;
  dst = regcode;
  while (src > opnd)
    *--dst = *--src;
  
  place = opnd;		/* Op node, where operand used to be. */
  *place++ = op;
  *place++ = '\0';
  *place++ = '\0';
}

/*
 - regtail - set the next-pointer at the end of a node chain
 */
static void regtail(char *p, char *val) {
  register char *scan;
  register char *temp;
  register int offset;
  
  if (p == &regdummy)
    return;
  
  /* Find last node. */
  scan = p;
  for (;;) {
    temp = regnext(scan);
    if (temp == NULL)
      break;
    scan = temp;
  }
  
  if (OP(scan) == BACK)
    offset = scan - val;
  else
    offset = val - scan;
  *(scan+1) = (offset>>8)&0377;
  *(scan+2) = offset&0377;
}

/*
 - regoptail - regtail on operand of first argument; nop if operandless
 */
static void regoptail(char *p, char *val) {
  /* "Operandless" and "op != BRANCH" are synonymous in practice. */
  if (p == NULL || p == &regdummy || OP(p) != BRANCH)
    return;
  regtail(OPERAND(p), val);
}

/*
 * regexec and friends
 */

/*
 * Global work variables for regexec().
 */
static char *reginput;		/* String-input pointer. */
static char *regbol;		/* Beginning of input, for ^ check. */
static char **regstartp;	/* Pointer to startp array. */
static char **regendp;		/* Ditto for endp. */

/*
 * Forwards.
 */
STATIC int regtry(regexp *prog, char *string);
STATIC int regmatch(char *prog);
STATIC int regrepeat(char *p);

#ifdef DEBUG
int regnarrate = 0;
void regdump(regexp *r);
STATIC char *regprop(char *op);
#endif

/*
 - regexec - match a regexp against a string
 */
int regexec(regexp *prog, char *string) {
  register char *s;
  
  /* Be paranoid... */
  if (prog == NULL || string == NULL) {
    regerror("NULL parameter");
    return(0);
  }
  
  /* Check validity of program. */
  if (UCHARAT(prog->program) != MAGIC) {
    regerror("corrupted program");
    return(0);
  }
  
  /* If there is a "must appear" string, look for it. */
  if (prog->regmust != NULL) {
    s = string;
    while ((s = strchr(s, prog->regmust[0])) != NULL) {
      if (strncmp(s, prog->regmust, prog->regmlen) == 0)
        break;	/* Found it. */
      s++;
    }
    if (s == NULL)	/* Not present. */
      return(0);
  }
  
  /* Mark beginning of line for ^ . */
  regbol = string;
  
  /* Simplest case:  anchored match need be tried only once. */
  if (prog->reganch)
    return(regtry(prog, string));
  
  /* Messy cases:  unanchored match. */
  s = string;
  if (prog->regstart != '\0')
    /* We know what char it must start with. */
    while ((s = strchr(s, prog->regstart)) != NULL) {
      if (regtry(prog, s))
        return(1);
      s++;
    }
  else
    /* We don't -- general case. */
    do {
      if (regtry(prog, s))
        return(1);
    } while (*s++ != '\0');
  
  /* Failure. */
  return(0);
}

/*
 - regtry - try match at specific point
 */
static int			/* 0 failure, 1 success */
regtry(regexp *prog, char *string) {
  register int i;
  register char **sp;
  register char **ep;
  
  reginput = string;
  regstartp = prog->startp;
  regendp = prog->endp;
  
  sp = prog->startp;
  ep = prog->endp;
  for (i = NSUBEXP; i > 0; i--) {
    *sp++ = NULL;
    *ep++ = NULL;
  }
  if (regmatch(prog->program + 1)) {
    prog->startp[0] = string;
    prog->endp[0] = reginput;
    return(1);
  } else
    return(0);
}

/*
 - regmatch - main matching routine
 *
 * Conceptually the strategy is simple:  check to see whether the current
 * node matches, call self recursively to see whether the rest matches,
 * and then act accordingly.  In practice we make some effort to avoid
 * recursion, in particular by going through "ordinary" nodes (that don't
 * need to know whether the rest of the match failed) by a loop instead of
 * by recursion.
 */
static int			/* 0 failure, 1 success */
regmatch(char *prog) {
  register char *scan;	/* Current node. */
  char *next;		/* Next node. */
  
  scan = prog;
#ifdef DEBUG
  if (scan != NULL && regnarrate)
    fprintf(stderr, "%s(\n", regprop(scan));
#endif
  while (scan != NULL) {
#ifdef DEBUG
    if (regnarrate)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -