📄 pcregrep.c
字号:
static char *end_of_line(char *p, char *endptr, int *lenptr){switch(endlinetype) { default: /* Just in case */ case EL_LF: while (p < endptr && *p != '\n') p++; if (p < endptr) { *lenptr = 1; return p + 1; } *lenptr = 0; return endptr; case EL_CR: while (p < endptr && *p != '\r') p++; if (p < endptr) { *lenptr = 1; return p + 1; } *lenptr = 0; return endptr; case EL_CRLF: for (;;) { while (p < endptr && *p != '\r') p++; if (++p >= endptr) { *lenptr = 0; return endptr; } if (*p == '\n') { *lenptr = 2; return p + 1; } } break; case EL_ANY: while (p < endptr) { int extra = 0; register int c = *((unsigned char *)p); if (utf8 && c >= 0xc0) { int gcii, gcss; extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ gcss = 6*extra; c = (c & utf8_table3[extra]) << gcss; for (gcii = 1; gcii <= extra; gcii++) { gcss -= 6; c |= (p[gcii] & 0x3f) << gcss; } } p += 1 + extra; switch (c) { case 0x0a: /* LF */ case 0x0b: /* VT */ case 0x0c: /* FF */ *lenptr = 1; return p; case 0x0d: /* CR */ if (p < endptr && *p == 0x0a) { *lenptr = 2; p++; } else *lenptr = 1; return p; case 0x85: /* NEL */ *lenptr = utf8? 2 : 1; return p; case 0x2028: /* LS */ case 0x2029: /* PS */ *lenptr = 3; return p; default: break; } } /* End of loop for ANY case */ *lenptr = 0; /* Must have hit the end */ return endptr; } /* End of overall switch */}/************************************************** Find start of previous line **************************************************//* This is called when looking back for before lines to print.Arguments: p start of the subsequent line startptr start of available dataReturns: pointer to the start of the previous line*/static char *previous_line(char *p, char *startptr){switch(endlinetype) { default: /* Just in case */ case EL_LF: p--; while (p > startptr && p[-1] != '\n') p--; return p; case EL_CR: p--; while (p > startptr && p[-1] != '\n') p--; return p; case EL_CRLF: for (;;) { p -= 2; while (p > startptr && p[-1] != '\n') p--; if (p <= startptr + 1 || p[-2] == '\r') return p; } return p; /* But control should never get here */ case EL_ANY: if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; if (utf8) while ((*p & 0xc0) == 0x80) p--; while (p > startptr) { register int c; char *pp = p - 1; if (utf8) { int extra = 0; while ((*pp & 0xc0) == 0x80) pp--; c = *((unsigned char *)pp); if (c >= 0xc0) { int gcii, gcss; extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ gcss = 6*extra; c = (c & utf8_table3[extra]) << gcss; for (gcii = 1; gcii <= extra; gcii++) { gcss -= 6; c |= (pp[gcii] & 0x3f) << gcss; } } } else c = *((unsigned char *)pp); switch (c) { case 0x0a: /* LF */ case 0x0b: /* VT */ case 0x0c: /* FF */ case 0x0d: /* CR */ case 0x85: /* NEL */ case 0x2028: /* LS */ case 0x2029: /* PS */ return p; default: break; } p = pp; /* Back one character */ } /* End of loop for ANY case */ return startptr; /* Hit start of data */ } /* End of overall switch */}/************************************************** Print the previous "after" lines **************************************************//* This is called if we are about to lose said lines because of buffer filling,and at the end of the file. The data in the line is written using fwrite() sothat a binary zero does not terminate it.Arguments: lastmatchnumber the number of the last matching line, plus one lastmatchrestart where we restarted after the last match endptr end of available data printname filename for printingReturns: nothing*/static void do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr, char *printname){if (after_context > 0 && lastmatchnumber > 0) { int count = 0; while (lastmatchrestart < endptr && count++ < after_context) { int ellength; char *pp = lastmatchrestart; if (printname != NULL) fprintf(stdout, "%s-", printname); if (number) fprintf(stdout, "%d-", lastmatchnumber++); pp = end_of_line(pp, endptr, &ellength); fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout); lastmatchrestart = pp; } hyphenpending = TRUE; }}/************************************************** Grep an individual file **************************************************//* This is called from grep_or_recurse() below. It uses a buffer that is threetimes the value of MBUFTHIRD. The matching point is never allowed to stray intothe top third of the buffer, thus keeping more of the file available forcontext printing or for multiline scanning. For large files, the pointer willbe in the middle third most of the time, so the bottom third is available for"before" context printing.Arguments: in the fopened FILE stream printname the file name if it is to be printed for each match or NULL if the file name is not to be printed it cannot be NULL if filenames[_nomatch]_only is setReturns: 0 if there was at least one match 1 otherwise (no matches)*/static intpcregrep(FILE *in, char *printname){int rc = 1;int linenumber = 1;int lastmatchnumber = 0;int count = 0;int offsets[99];char *lastmatchrestart = NULL;char buffer[3*MBUFTHIRD];char *ptr = buffer;char *endptr;size_t bufflength;BOOL endhyphenpending = FALSE;/* Do the first read into the start of the buffer and set up the pointer toend of what we have. */bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);endptr = buffer + bufflength;/* Loop while the current pointer is not at the end of the file. For largefiles, endptr will be at the end of the buffer when we are in the middle of thefile, but ptr will never get there, because as soon as it gets over 2/3 of theway, the buffer is shifted left and re-filled. */while (ptr < endptr) { int i, endlinelength; int mrc = 0; BOOL match = FALSE; char *t = ptr; size_t length, linelength; /* At this point, ptr is at the start of a line. We need to find the length of the subject string to pass to pcre_exec(). In multiline mode, it is the length remainder of the data in the buffer. Otherwise, it is the length of the next line. After matching, we always advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so that any match is constrained to be in the first line. */ t = end_of_line(t, endptr, &endlinelength); linelength = t - ptr - endlinelength; length = multiline? endptr - ptr : linelength; /* Extra processing for Jeffrey Friedl's debugging. */#ifdef JFRIEDL_DEBUG if (jfriedl_XT || jfriedl_XR) { #include <sys/time.h> #include <time.h> struct timeval start_time, end_time; struct timezone dummy; if (jfriedl_XT) { unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix); const char *orig = ptr; ptr = malloc(newlen + 1); if (!ptr) { printf("out of memory"); exit(2); } endptr = ptr; strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix); for (i = 0; i < jfriedl_XT; i++) { strncpy(endptr, orig, length); endptr += length; } strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix); length = newlen; } if (gettimeofday(&start_time, &dummy) != 0) perror("bad gettimeofday"); for (i = 0; i < jfriedl_XR; i++) match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0); if (gettimeofday(&end_time, &dummy) != 0) perror("bad gettimeofday"); double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0)) - (start_time.tv_sec + (start_time.tv_usec / 1000000.0))); printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta); return 0; }#endif /* Run through all the patterns until one matches. Note that we don't include the final newline in the subject string. */ for (i = 0; i < pattern_count; i++) { mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0, offsets, 99); if (mrc >= 0) { match = TRUE; break; } if (mrc != PCRE_ERROR_NOMATCH) { fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc); if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1); fprintf(stderr, "this line:\n"); fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */ fprintf(stderr, "\n"); if (error_count == 0 && (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT)) { fprintf(stderr, "pcregrep: error %d means that a resource limit " "was exceeded\n", mrc); fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n"); } if (error_count++ > 20) { fprintf(stderr, "pcregrep: too many errors - abandoned\n"); exit(2); } match = invert; /* No more matching; don't show the line again */ break; } } /* If it's a match or a not-match (as required), do what's wanted. */ if (match != invert) { BOOL hyphenprinted = FALSE; /* We've failed if we want a file that doesn't have any matches. */ if (filenames == FN_NOMATCH_ONLY) return 1; /* Just count if just counting is wanted. */ if (count_only) count++; /* If all we want is a file name, there is no need to scan any more lines in the file. */ else if (filenames == FN_ONLY) { fprintf(stdout, "%s\n", printname); return 0; } /* Likewise, if all we want is a yes/no answer. */ else if (quiet) return 0; /* The --only-matching option prints just the substring that matched, and does not pring any context. */ else if (only_matching) { if (printname != NULL) fprintf(stdout, "%s:", printname); if (number) fprintf(stdout, "%d:", linenumber); fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); fprintf(stdout, "\n"); } /* This is the default case when none of the above options is set. We print the matching lines(s), possibly preceded and/or followed by other lines of context. */ else { /* See if there is a requirement to print some "after" lines from a previous match. We never print any overlaps. */ if (after_context > 0 && lastmatchnumber > 0) { int ellength; int linecount = 0; char *p = lastmatchrestart; while (p < ptr && linecount < after_context) { p = end_of_line(p, ptr, &ellength); linecount++; } /* It is important to advance lastmatchrestart during this printing so that it interacts correctly with any "before" printing below. Print each line's data using fwrite() in case there are binary zeroes. */ while (lastmatchrestart < p) { char *pp = lastmatchrestart; if (printname != NULL) fprintf(stdout, "%s-", printname); if (number) fprintf(stdout, "%d-", lastmatchnumber++); pp = end_of_line(pp, endptr, &ellength); fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout); lastmatchrestart = pp; } if (lastmatchrestart != ptr) hyphenpending = TRUE; } /* If there were non-contiguous lines printed above, insert hyphens. */ if (hyphenpending) { fprintf(stdout, "--\n"); hyphenpending = FALSE; hyphenprinted = TRUE; } /* See if there is a requirement to print some "before" lines for this match. Again, don't print overlaps. */ if (before_context > 0) { int linecount = 0; char *p = ptr; while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) && linecount < before_context) { linecount++; p = previous_line(p, buffer); } if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted) fprintf(stdout, "--\n"); while (p < ptr) { int ellength; char *pp = p; if (printname != NULL) fprintf(stdout, "%s-", printname); if (number) fprintf(stdout, "%d-", linenumber - linecount--); pp = end_of_line(pp, endptr, &ellength); fwrite(p, 1, pp - p, stdout); p = pp; } } /* Now print the matching line(s); ensure we set hyphenpending at the end of the file if any context lines are being output. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -