⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regularexp.c

📁 nedit 是一款linux下的开发源码的功能强大的编辑器
💻 C
📖 第 1 页 / 共 5 页
字号:
 * Process main body of regex or process a parenthesized "thing".       * *                                                                      * * Caller must absorb opening parenthesis.                              * *                                                                      * * Combining parenthesis handling with the base level of regular        * * expression is a trifle forced, but the need to tie the tails of the  * * branches to what follows makes it hard to avoid.                     * *----------------------------------------------------------------------*/static unsigned char * chunk (int paren, int *flag_param,                               len_range *range_param) {   register unsigned char *ret_val = NULL;   register unsigned char *this_branch;   register unsigned char *ender = NULL;   register          int   this_paren = 0;                     int   flags_local, first = 1, zero_width, i;                     int   old_sensitive = Is_Case_Insensitive;                     int   old_newline   = Match_Newline;		     len_range range_local;		     int   look_only = 0;            unsigned char *emit_look_behind_bounds = NULL;	                *flag_param = HAS_WIDTH;  /* Tentatively. */   range_param->lower = 0;   /* Idem */   range_param->upper = 0;   /* Make an OPEN node, if parenthesized. */   if (paren == PAREN) {      if (Total_Paren >= NSUBEXP) {         sprintf (Error_Text, "number of ()'s > %d", (int) NSUBEXP);         REG_FAIL (Error_Text);      }      this_paren = Total_Paren; Total_Paren++;      ret_val    = emit_node (OPEN + this_paren);   } else if (paren == POS_AHEAD_OPEN || paren == NEG_AHEAD_OPEN) {      *flag_param = WORST;  /* Look ahead is zero width. */      look_only   = 1;      ret_val     = emit_node (paren);   } else if (paren == POS_BEHIND_OPEN || paren == NEG_BEHIND_OPEN) {      *flag_param = WORST;  /* Look behind is zero width. */      look_only   = 1;      /* We'll overwrite the zero length later on, so we save the ptr */      ret_val 	  = emit_special (paren, 0, 0);      emit_look_behind_bounds = ret_val + NODE_SIZE;   } else if (paren == INSENSITIVE) {      Is_Case_Insensitive = 1;   } else if (paren == SENSITIVE) {      Is_Case_Insensitive = 0;   } else if (paren == NEWLINE) {      Match_Newline = 1;   } else if (paren == NO_NEWLINE) {      Match_Newline = 0;   }   /* Pick up the branches, linking them together. */   do {      this_branch = alternative (&flags_local, &range_local);      if (this_branch == NULL) return (NULL);      if (first) {         first = 0;	 *range_param = range_local;         if (ret_val == NULL) ret_val = this_branch;      } else if (range_param->lower >= 0) {	  if (range_local.lower >= 0) {             if (range_local.lower < range_param->lower)                range_param->lower = range_local.lower;             if (range_local.upper > range_param->upper)                range_param->upper = range_local.upper;          } else {	      range_param->lower = -1; /* Branches have different lengths */	      range_param->upper = -1;	  }      }      tail (ret_val, this_branch);  /* Connect BRANCH -> BRANCH. */      /* If any alternative could be zero width, consider the whole         parenthisized thing to be zero width. */      if (!(flags_local & HAS_WIDTH)) *flag_param &= ~HAS_WIDTH;      /* Are there more alternatives to process? */      if (*Reg_Parse != '|') break;      Reg_Parse++;   } while (1);   /* Make a closing node, and hook it on the end. */   if (paren == PAREN) {      ender = emit_node (CLOSE + this_paren);   } else if (paren == NO_PAREN) {      ender = emit_node (END);   } else if (paren == POS_AHEAD_OPEN || paren == NEG_AHEAD_OPEN) {      ender = emit_node (LOOK_AHEAD_CLOSE);   } else if (paren == POS_BEHIND_OPEN || paren == NEG_BEHIND_OPEN) {      ender = emit_node (LOOK_BEHIND_CLOSE);   } else {      ender = emit_node (NOTHING);   }   tail (ret_val, ender);   /* Hook the tails of the branch alternatives to the closing node. */   for (this_branch = ret_val; this_branch != NULL; ) {      branch_tail (this_branch, NODE_SIZE, ender);      this_branch = next_ptr (this_branch);   }   /* Check for proper termination. */   if (paren != NO_PAREN && *Reg_Parse++ != ')') {      REG_FAIL ("missing right parenthesis \')\'");   } else if (paren == NO_PAREN && *Reg_Parse != '\0') {      if (*Reg_Parse == ')') {         REG_FAIL ("missing left parenthesis \'(\'");      } else {         REG_FAIL ("junk on end");  /* "Can't happen" - NOTREACHED */      }   }   /* Check whether look behind has a fixed size */   if (emit_look_behind_bounds) {       if (range_param->lower < 0) {	   REG_FAIL ("look-behind does not have a bounded size");       }        if (range_param->upper > 65535L) {	   REG_FAIL ("max. look-behind size is too large (>65535)")       }        if (Code_Emit_Ptr != &Compute_Size) {          *emit_look_behind_bounds++ = PUT_OFFSET_L (range_param->lower);          *emit_look_behind_bounds++ = PUT_OFFSET_R (range_param->lower);          *emit_look_behind_bounds++ = PUT_OFFSET_L (range_param->upper);          *emit_look_behind_bounds   = PUT_OFFSET_R (range_param->upper);       }   }   /* For look ahead/behind, the length must be set to zero again */   if (look_only) {       range_param->lower = 0;       range_param->upper = 0;   }   zero_width = 0;   /* Set a bit in Closed_Parens to let future calls to function `back_ref'      know that we have closed this set of parentheses. */   if (paren == PAREN && this_paren <= (int)sizeof (Closed_Parens) * CHAR_BIT) {      SET_BIT (Closed_Parens, this_paren);      /* Determine if a parenthesized expression is modified by a quantifier         that can have zero width. */      if (*(Reg_Parse) == '?' || *(Reg_Parse) == '*') {         zero_width++;      } else if (*(Reg_Parse) == '{' && Brace_Char == '{') {         if (*(Reg_Parse + 1) == ',' || *(Reg_Parse + 1) == '}') {            zero_width++;         } else if (*(Reg_Parse + 1) == '0') {            i = 2;            while (*(Reg_Parse + i) == '0') i++;            if (*(Reg_Parse + i) == ',') zero_width++;         }      }   }   /* If this set of parentheses is known to never match the empty string, set      a bit in Paren_Has_Width to let future calls to function back_ref know      that this set of parentheses has non-zero width.  This will allow star      (*) or question (?) quantifiers to be aplied to a back-reference that      refers to this set of parentheses. */   if ((*flag_param & HAS_WIDTH)  &&        paren == PAREN            &&        !zero_width               &&        this_paren <= (int)(sizeof (Paren_Has_Width) * CHAR_BIT)) {      SET_BIT (Paren_Has_Width, this_paren);   }   Is_Case_Insensitive = old_sensitive;   Match_Newline       = old_newline;   return (ret_val);}/*----------------------------------------------------------------------* * alternative * * Processes one alternative of an '|' operator.  Connects the NEXT * pointers of each regex atom together sequentialy. *----------------------------------------------------------------------*/static unsigned char * alternative (int *flag_param, len_range *range_param) {   register unsigned char *ret_val;   register unsigned char *chain;   register unsigned char *latest;                     int   flags_local;		     len_range range_local;   *flag_param = WORST;  /* Tentatively. */   range_param->lower = 0; /* Idem */   range_param->upper = 0;   ret_val = emit_node (BRANCH);   chain   = NULL;   /* Loop until we hit the start of the next alternative, the end of this set      of alternatives (end of parentheses), or the end of the regex. */   while (*Reg_Parse != '|' && *Reg_Parse != ')' && *Reg_Parse != '\0') {      latest = piece (&flags_local, &range_local);      if (latest == NULL) return (NULL); /* Something went wrong. */      *flag_param |= flags_local & HAS_WIDTH;      if (range_local.lower < 0) {	  /* Not a fixed length */	  range_param->lower = -1;	  range_param->upper = -1;      } else if (range_param->lower >= 0) {	  range_param->lower += range_local.lower;	  range_param->upper += range_local.upper;      }      if (chain != NULL) { /* Connect the regex atoms together sequentialy. */         tail (chain, latest);      }      chain = latest;   }   if (chain == NULL) {  /* Loop ran zero times. */      (void) emit_node (NOTHING);   }   return (ret_val);}/*----------------------------------------------------------------------* * piece - something followed by possible '*', '+', '?', or "{m,n}" * * Note that the branching code sequences used for the general cases of * *, +. ?, and {m,n} are somewhat optimized:  they use the same * NOTHING node as both the endmarker for their branch list and the * body of the last branch. It might seem that this node could be * dispensed with entirely, but the endmarker role is not redundant. *----------------------------------------------------------------------*/static unsigned char * piece (int *flag_param, len_range *range_param) {   register unsigned char *ret_val;   register unsigned char *next;   register unsigned char  op_code;            unsigned long  min_max [2] = {REG_ZERO, REG_INFINITY};            int            flags_local, i, brace_present = 0;            int            lazy = 0, comma_present = 0;            int            digit_present [2] = {0,0};	    len_range      range_local;   ret_val = atom (&flags_local, &range_local);   if (ret_val == NULL) return (NULL);  /* Something went wrong. */   op_code = *Reg_Parse;   if (!IS_QUANTIFIER (op_code)) {      *flag_param = flags_local;      *range_param = range_local;      return (ret_val);   } else if (op_code == '{') { /* {n,m} quantifier present */      brace_present++;      Reg_Parse++;      /* This code will allow specifying a counting range in any of the         following forms:         {m,n}  between m and n.         {,n}   same as {0,n} or between 0 and infinity.         {m,}   same as {m,0} or between m and infinity.         {m}    same as {m,m} or exactly m.         {,}    same as {0,0} or between 0 and infinity or just '*'.         {}     same as {0,0} or between 0 and infinity or just '*'.         Note that specifying a max of zero, {m,0} is not allowed in the regex         itself, but it is implemented internally that way to support '*', '+',         and {min,} constructs and signals an unlimited number. */      for (i = 0; i < 2; i++) {         /* Look for digits of number and convert as we go.  The numeric maximum            value for max and min of 65,535 is due to using 2 bytes to store            each value in the compiled regex code. */         while (isdigit (*Reg_Parse)) {            /* (6553 * 10 + 6) > 65535 (16 bit max) */            if ((min_max [i] == 6553UL && (*Reg_Parse - '0') <= 5) ||                (min_max [i] <= 6552UL)) {               min_max [i] = (min_max [i] * 10UL) +                             (unsigned long) (*Reg_Parse - '0');               Reg_Parse++;               digit_present [i]++;            } else {               if (i == 0) {                  sprintf (Error_Text, "min operand of {%lu%c,???} > 65535",

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -