📄 sgrep.c
字号:
CHARTYPE *curtextbegin; CHARTYPE *curtextend;#if MEASURE_TIMES struct timeval initt, finalt;#endif if(SILENT) return 0; if (TCOMPRESSED == ON) { if (!DELIMITER) { curtextbegin = text + *i; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text + *i, text, tc_D_pattern, tc_D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */, textend, tc_D_pattern, tc_D_length, OUTTAIL); } } else { if (!DELIMITER) { curtextbegin = text + *i; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text + *i, text, D_pattern, D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */, textend, D_pattern, D_length, OUTTAIL); } } if (TCOMPRESSED == ON) {#if MEASURE_TIMES gettimeofday(&initt, NULL);#endif /*MEASURE_TIMES*/ if (-1 == exists_tcompressed_word(pat, m, curtextbegin, text + *i - curtextbegin + m, EASYSEARCH)) { num_of_matched --; return 0; }#if MEASURE_TIMES gettimeofday(&finalt, NULL); FILTERALGO_ms += (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);#endif /*MEASURE_TIMES*/ } textbegin = curtextend; /*(curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */ oldi = *i; *i += textbegin - (text + *i); if(COUNT) return 0; if (INVERSE) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, *lastout, curtextbegin - *lastout, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, *lastout, curtextbegin - *lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } *lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(*lastout, 1, curtextbegin-*lastout, agrep_finalfp); else { if (curtextbegin - *lastout + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, *lastout, curtextbegin-*lastout); agrep_outpointer += (curtextbegin - *lastout); } *lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } /* TCOMPRESSED */ return 0; } if(FNAME && (NEW_FILE || !POST_FILTER)) { char nextchar = (POST_FILTER == ON)?'\n':' '; char *prevstring = (POST_FILTER == ON)?"\n":""; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName); else { int outindex; if (prevstring[0] != '\0') { if(agrep_outpointer + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else agrep_outbuffer[agrep_outpointer ++] = prevstring[0]; } for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && (CurrentFileName[outindex] != '\0'); outindex++) { agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex]; } if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && (s[outindex] != '\0'); outindex++) { agrep_outbuffer[agrep_outpointer+outindex] = s[outindex]; } if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ":%c", nextchar); else { if (agrep_outpointer+2>= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else { agrep_outbuffer[agrep_outpointer++] = ':'; agrep_outbuffer[agrep_outpointer++] = nextchar; } } NEW_FILE = OFF; PRINTED = 1; } if(BYTECOUNT) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%d= ", CurrentByteOffset); else { char s[32]; int outindex; sprintf(s, "%d= ", CurrentByteOffset); for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && (s[outindex] != '\0'); outindex++) { agrep_outbuffer[agrep_outpointer+outindex] = s[outindex]; } if (s[outindex] != '\0') { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } PRINTED = 1; } if (PRINTOFFSET) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text + oldi-curtextbegin), curtextend-curtextbegin); else { char s[32]; int outindex; sprintf(s, "@%d{%d} ", CurrentByteOffset - (text + oldi-curtextbegin), curtextend-curtextbegin); for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) && (s[outindex] != '\0'); outindex ++) { agrep_outbuffer[agrep_outpointer+outindex] = s[outindex]; } if (s[outindex] != '\0') { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } PRINTED = 1; } if (PRINTRECORD) { if (TCOMPRESSED == ON) {#if MEASURE_TIMES gettimeofday(&initt, NULL);#endif /*MEASURE_TIMES*/ if (agrep_finalfp != NULL) { newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH); } else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (agrep_outpointer + newlen + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } }#if MEASURE_TIMES gettimeofday(&finalt, NULL); OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);#endif /*MEASURE_TIMES*/ } else { if (agrep_finalfp != NULL) { fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp); } else { if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer + agrep_outpointer, curtextbegin, curtextend - curtextbegin); agrep_outpointer += curtextend - curtextbegin; } } } else if (PRINTED) { if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp); else agrep_outbuffer[agrep_outpointer ++] = '\n'; PRINTED = 0; } return 0;}static voidprep_bm(Pattern, m) unsigned char *Pattern;register m;{ int i; unsigned hash; unsigned char lastc; for (i = 0; i < MAXSYM; i++) SHIFT[i] = m; for (i = m-1; i>=0; i--) { hash = TR[Pattern[i]]; if((int)(SHIFT[hash]) >= (int)(m - 1)) SHIFT[hash] = m-1-i; } shift_1 = m-1; /* shift_1 records the previous occurrence of the last character of the pattern. When we match this last character but do not have a match, we can shift until we reach the next occurrence from the right. */ lastc = TR[Pattern[m-1]]; for (i= m-2; i>=0; i--) { if(TR[Pattern[i]] == lastc ) { shift_1 = m-1 - i; i = -1; } } if(shift_1 == 0) shift_1 = 1; /* can never happen - Udi 11/7/94 */ if(NOUPPER) for(i=0; i<MAXSYM; i++) { if (isupper(i)) SHIFT[i] = SHIFT[tolower(i)]; /* SHIFT[i] = SHIFT[i + 'a' - 'A']; */ }#ifdef DEBUG for(i='a'; i<='z'; i++) printf("%c: %d", i, SHIFT[i]); printf("\n"); for(i='A'; i<='Z'; i++) printf("%c: %d", i, SHIFT[i]); printf("\n");#endif}/* monkey uses two characters for delta_1 shifting */CHARTYPE SHIFT_2[MAX_SHIFT_2];intmonkey( pat, m, text, textend ) register int m ; register CHARTYPE *text, *textend, *pat;{ int PRINTED = 0; register unsigned hash; register CHARTYPE shift; register int m1, j; CHARTYPE *textbegin = text; CHARTYPE *textstart; int newlen; CHARTYPE *curtextbegin; CHARTYPE *curtextend;#if MEASURE_TIMES struct timeval initt, finalt;#endif CHARTYPE *lastout = text; m1 = m - 1; text = text+m1; CurrentByteOffset += m1; while (text < textend) { textstart = text; hash = TR[*text]; hash = (hash << 3) + TR[*(text-1)]; shift = SHIFT_2[hash]; while(shift) { text = text + shift; hash = (TR[*text] << 3) + TR[*(text-1)]; shift = SHIFT_2[hash]; } CurrentByteOffset += text - textstart; j = 0; while(TR[pat[m1 - j]] == TR[*(text - j)]) { if(++j == m) break; } if (j == m ) { if(text > textend) return 0; /* Udi: used to be >= for some reason */ /* added by Udi 11/7/94 */ if(WORDBOUND) { /* if(isalnum(*(unsigned char *)(text+1))) goto CONT; --> fixed by SHIOZAKI Takehiko <takehi-s@ascii.co.jp> */ if((text+1 <= textend) && isalnum(*(unsigned char *)(text+1)) && isalnum(*(unsigned char *)text)) { goto CONT; /* as if there was no match */ } /* if(isalnum(*(unsigned char *)(text-m))) goto CONT; --> fixed by SHIOZAKI Takehiko <takehi-s@ascii.co.jp> */ if((textbegin <= (text-m)) && isalnum(*(unsigned char *)(text-m)) && isalnum(*(unsigned char *)(text-m+1))) { goto CONT; /* as if there was no match */ } /* changed by Udi 11/7/94 to avoid having to set TR[] to W_delim */ } if (TCOMPRESSED == ON) { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text-m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin /*text -m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL); } } else { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text-m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin/*text -m*/, textend, D_pattern, D_length, OUTTAIL); } } if (TCOMPRESSED == ON) {#if MEASURE_TIMES gettimeofday(&initt, NULL);#endif /*MEASURE_TIMES*/ if (-1 == exists_tcompressed_word(pat, m, curtextbegin, text - curtextbegin + m, EASYSEARCH)) goto CONT; /* as if there was no match */#if MEASURE_TIMES gettimeofday(&finalt, NULL); FILTERALGO_ms += (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);#endif /*MEASURE_TIMES*/ } textbegin = curtextend; /*(curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */ num_of_matched++; if(FILENAMEONLY) return 0; if (!COUNT) { if (!INVERSE) { if(FNAME && (NEW_FILE || !POST_FILTER)) { char nextchar = (POST_FILTER == ON)?'\n':' '; char *prevstring = (POST_FILTER == ON)?"\n":""; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName); else { int outindex; if (prevstring[0] != '\0') { if(agrep_outpointer + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else agrep_outbuffer[agrep_outpointer ++] = prevstring[0]; } for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && (CurrentFileName[outindex] != '\0'); outindex++) { agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex]; } if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) && (s[outindex] != '\0'); outindex++) { agrep_outbuffer[agrep_outpointer+outindex] = s[outindex]; } if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ":%c", nextchar); else { if (agrep_outpointer+2>= agrep_outlen) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -