📄 preprocess.c
字号:
/* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. *//* substitute metachar with special symbol *//* if regularr expression, then set flag REGEX *//* if REGEX and MULTIPAT then report error message, *//* -w only for single word pattern. If WORDBOUND & MULTIWORD error *//* process start of line, endof line symbol, *//* process -w WORDBOUND option, append special symbol at begin&end of *//* process -d option before this routine *//* the delimiter pattern is in D_pattern (need to end with '; ') *//* if '-t' (suggestion: how about -B) the pattern is passed to sgrep *//* and doesn't go here *//* in that case, -d is ignored? or not necessary *//* upon return, Pattern contains the pattern to be processed by maskgen *//* D_pattern contains transformed D_pattern */ #include "agrep.h" extern int SIMPLEPATTERN, WHOLELINE, REGEX, RE_ERR, DELIMITER, TAIL, WORDBOUND;extern int HEAD;extern CHAR Progname[];extern int D_length;extern int table[WORD][WORD]; preprocess(D_pattern, Pattern) /* need two parameters */CHAR *D_pattern, *Pattern;{ CHAR temp[Maxline], *r_pat, *old_pat; /* r_pat for r.e. */ CHAR old_D_pat[MaxDelimit]; int i, j=0, rp=0, m, t=0, partitions, num_pos, ANDON = 0; int d_end ; int IN_RANGE=0, EVEN=0, OR_AND=0; old_pat = Pattern; /* to remember the starting position */ m = strlen(Pattern); for(i=0; i< m; i++) { if(Pattern[i] == '\\') i++; else if(Pattern[i] == '|' || Pattern[i] == '*' ) REGEX = ON; } r_pat = (CHAR *) malloc(strlen(Pattern)+2*strlen(D_pattern)); strcpy(temp, D_pattern); d_end = t = strlen(temp); /* size of D_pattern, including '; ' */ if (WHOLELINE) { temp[t++] = LANGLE; temp[t++] = NNLINE; temp[t++] = RANGLE; temp[t] = '\0'; strcat(temp, Pattern); m = strlen(temp); temp[m++] = LANGLE; temp[m++] = '\n'; temp[m++] = RANGLE; temp[m] = '\0'; } else { if (WORDBOUND) { temp[t++] = LANGLE; temp[t++] = WORDB; temp[t++] = RANGLE; temp[t] = '\0'; } strcat(temp, Pattern); m = strlen(temp); if (WORDBOUND) { temp[m++] = LANGLE; temp[m++] = WORDB; temp[m++] = RANGLE; } temp[m] = '\0'; } /* now temp contains augmented pattern , m it's size */ D_length = 0; for (i=0, j=0; i< d_end-2; i++) { switch(temp[i]) { case '\\' : i++; Pattern[j++] = temp[i]; old_D_pat[D_length++] = temp[i]; break; case '<' : Pattern[j++] = LANGLE; break; case '>' : Pattern[j++] = RANGLE; break; case '^' : Pattern[j++] = '\n'; old_D_pat[D_length++] = temp[i]; break; case '$' : Pattern[j++] = '\n'; old_D_pat[D_length++] = temp[i]; break; default : Pattern[j++] = temp[i]; old_D_pat[D_length++] = temp[i]; break; } } if(D_length > MAXDELIM) { fprintf(stderr, "%s: delimiter pattern too long\n", Progname); exit(2); } Pattern[j++] = ANDPAT; old_D_pat[D_length] = '\0'; strcpy(D_pattern, old_D_pat); D_length++;/* Pattern[j++] = ' ';*/ Pattern[j] = '\0'; rp = 0; if(REGEX) { r_pat[rp++] = '.'; /* if REGEX: always append '.' in front */ r_pat[rp++] = '('; Pattern[j++] = NOCARE; HEAD = ON; } for (i=d_end; i < m ; i++) { switch(temp[i]) { case '\\': i++; Pattern[j++] = temp[i]; r_pat[rp++] = 'o'; /* the symbol doesn't matter */ break; case '#': if(REGEX) { Pattern[j++] = NOCARE; r_pat[rp++] = '.'; r_pat[rp++] = '*'; break; } Pattern[j++] = WILDCD; break; case '(': Pattern[j++] = LPARENT; r_pat[rp++] = '('; break; case ')': Pattern[j++] = RPARENT; r_pat[rp++] = ')'; break; case '[': Pattern[j++] = LRANGE; r_pat[rp++] = '['; IN_RANGE = ON; break; case ']': Pattern[j++] = RRANGE; r_pat[rp++] = ']'; IN_RANGE = OFF; break; case '<': Pattern[j++] = LANGLE; break; case '>': Pattern[j++] = RANGLE; break; case '^': if (temp[i-1] == '[') Pattern[j++] = NOTSYM; else Pattern[j++] = '\n'; r_pat[rp++] = '^'; break; case '$': Pattern[j++] = '\n'; r_pat[rp++] = '$'; break; case '.': Pattern[j++] = NOCARE; r_pat[rp++] = '.'; break; case '*': Pattern[j++] = STAR; r_pat[rp++] = '*'; break; case '|': Pattern[j++] = ORSYM; r_pat[rp++] = '|'; break; case ',': Pattern[j++] = ORPAT; RE_ERR = ON; break; case ';': if(ANDON) RE_ERR = ON; Pattern[j++] = ANDPAT; ANDON = ON; break; case '-': if(IN_RANGE) { Pattern[j++] = HYPHEN; r_pat[rp++] = '-'; } else { Pattern[j++] = temp[i]; r_pat[rp++] = temp[i]; } break; case NNLINE : Pattern[j++] = temp[i]; r_pat[rp++] = 'N'; break; default: Pattern[j++] = temp[i]; r_pat[rp++] = temp[i]; break; } } if(REGEX) { /* append ').' at end of regular expression */ r_pat[rp++] = ')'; r_pat[rp++] = '.'; Pattern[j++] = NOCARE; TAIL = ON; } Pattern[j] = '\0'; m = j; r_pat[rp] = '\0'; if(REGEX) { if(DELIMITER || WORDBOUND) { fprintf(stderr, "%s: -d or -w option is not supported for this pattern\n", Progname); exit(2); } if(RE_ERR) { fprintf(stderr, "%s: illegal regular expression\n", Progname); exit(2); } while(*Pattern != NOCARE && m-- > 0) Pattern++; /* poit to . */ num_pos = init(r_pat, table); if(num_pos <= 0) { fprintf(stderr, "%s: illegal regular expression\n", Progname); exit(2); } if(num_pos > 30) { fprintf(stderr, "%s: regular expression too long\n", Progname); exit(2); } strcpy(old_pat, Pattern); /* do real change to the Pattern to be returned */ return; } /* if regex */ return;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -