📄 sgrep.c
字号:
} PRINTED = 1; } if (PRINTOFFSET) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin); else { char s[32]; int outindex; sprintf(s, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin); for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) && (s[outindex] != '\0'); outindex ++) { agrep_outbuffer[agrep_outpointer+outindex] = s[outindex]; } if (s[outindex] != '\0') { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } PRINTED = 1; } CurrentByteOffset += textbegin - text; text = textbegin; if (PRINTRECORD) { if (TCOMPRESSED == ON) {#if MEASURE_TIMES gettimeofday(&initt, NULL);#endif /*MEASURE_TIMES*/ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (agrep_outpointer + newlen + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } }#if MEASURE_TIMES gettimeofday(&finalt, NULL); OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);#endif /*MEASURE_TIMES*/ } else { if (agrep_finalfp != NULL) { fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp); } else { if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin); agrep_outpointer += curtextend - curtextbegin; } } } else if (PRINTED) { if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp); else agrep_outbuffer[agrep_outpointer ++] = '\n'; PRINTED = 0; } } else { /* INVERSE */ if (!SILENT) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp); else { if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout); agrep_outpointer += (curtextbegin - lastout); } lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } /* TCOMPRESSED */ } /* !SILENT */ } /* INVERSE */ } else { /* COUNT */ CurrentByteOffset += textbegin - text; text = textbegin; } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */CONT: if (m == 1) shift = 0; else shift = 1; /* ZZZZZZZZZZZZZZZZ check it out later */ } else shift = d1;WCONT: ; } if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp); else { if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1); agrep_outpointer += (textend - lastout + 1); } } /* TCOMPRESSED */ } return 0;}/* initmask() initializes the mask table for the pattern */ /* endposition is a mask for the endposition of the pattern *//* endposition will contain k mask bits if the pattern contains k fragments */static voidinitmask(pattern, Mask, m, D, endposition)CHARTYPE *pattern; unsigned *Mask; register int m, D; unsigned *endposition;{ register unsigned Bit1, c; register int i, j, frag_num; /* Bit1 = 1 << 31;*/ /* the first bit of Bit1 is 1, others 0. */ Bit1 = (unsigned)0x80000000; frag_num = D+1; *endposition = 0; for (i = 0; i < frag_num; i++) *endposition = *endposition | (Bit1 >> i); *endposition = *endposition >> (m - frag_num); for(i = 0; i < m; i++) if (pattern[i] == '^' || pattern[i] == '$') { pattern[i] = '\n'; } for(i = 0; i < MAXSYM; i++) Mask[i] = ~0; for(i = 0; i < m; i++) /* initialize the mask table */ { c = pattern[i]; for ( j = 0; j < m; j++) if( c == pattern[j] ) Mask[c] = Mask[c] & ~( Bit1 >> j ) ; }}static voidprep(Pattern, M, D) /* preprocessing for partitioning_bm */CHARTYPE *Pattern; /* can be fine-tuned to choose a better partition */register int M, D;{ register int i, j, k, p, shift; register unsigned m; unsigned hash, b_size = 3; m = M/(D+1); p = M - m*(D+1); for (i = 0; i < MAXSYM; i++) SHIFT[i] = m; for (i = M-1; i>=p ; i--) { shift = (M-1-i)%m; hash = Pattern[i]; if((int)(SHIFT[hash]) > (int)(shift)) SHIFT[hash] = shift; }#ifdef DEBUG for(i=0; i<M; i++) printf(" %d,", SHIFT[Pattern[i]]); printf("\n");#endif shift_1 = m; for(i=0; i<D+1; i++) { j = M-1 - m*i; for(k=1; k<m; k++) { for(p=0; p<D+1; p++) if(Pattern[j-k] == Pattern[M-1-m*p]) if(k < shift_1) shift_1 = k; } }#ifdef DEBUG printf("\nshift_1 = %d", shift_1);#endif if(shift_1 == 0) shift_1 = 1; for(i=0; i<MAXMEMBER; i++) MEMBER[i] = 0; if (m < 3) b_size = m; for(i=0; i<D+1; i++) { j = M-1 - m*i; hash = 0; for(k=0; k<b_size; k++) { hash = (hash << 2) + Pattern[j-k]; }#ifdef DEBUG printf(" hash = %d,", hash);#endif MEMBER[hash] = 1; }}intagrep( pat, M, text, textend, D, oldpat, oldM) int M, D, oldM; register CHARTYPE *text, *textend, *pat, *oldpat;{ register int i; register int m = M/(D+1); register CHARTYPE *textbegin; CHARTYPE *textstart; register int shift, HASH; int j=0, k, d1; int n, cdx; int Candidate[MaxCan][2], round, lastend=0; unsigned R1[MaxError+1], R2[MaxError+1]; register unsigned int r1, endpos, c; unsigned currentpos; unsigned Bit1; unsigned r_newline; int oldbyteoffset; CHARTYPE *lastout = text; int newlen; Candidate[0][0] = Candidate[0][1] = 0; d1 = shift_1; cdx = 0; if(m < 3) r1 = m; else r1 = 3; textbegin = text; shift = m-1; while (text < textend) { textstart = text; shift = SHIFT[*(text += shift)]; while(shift) { shift = SHIFT[*(text += shift)]; shift = SHIFT[*(text += shift)]; } CurrentByteOffset += text - textstart; j = 1; HASH = *text; while(j < r1) { HASH = (HASH << 2) + *(text-j); j++; } if (MEMBER[HASH]) { i = text - textbegin; if((i - M - D - 10) > Candidate[cdx][1]) { Candidate[++cdx][0] = i-M-D-2; Candidate[cdx][1] = i+M+D; } else Candidate[cdx][1] = i+M+D; shift = d1; } else shift = d1; } CurrentByteOffset += (textbegin - text); text = textbegin; n = textend - textbegin; r_newline = '\n'; /* for those candidate areas, find the D-error matches */ if(Candidate[1][0] < 0) Candidate[1][0] = 0; endpos = endposition; /* the mask table and the endposition */ /* Bit1 = (1 << 31); */ Bit1 = (unsigned)0x80000000; oldbyteoffset = CurrentByteOffset; for(round = 0; round <= cdx; round++) { i = Candidate[round][0] ; if(Candidate[round][1] > n) Candidate[round][1] = n; if(i < 0) i = 0; CurrentByteOffset = oldbyteoffset+i; R1[0] = R2[0] = ~0; R1[1] = R2[1] = ~Bit1; for(k = 1; k <= D; k++) R1[k] = R2[k] = (R1[k-1] >> 1) & R1[k-1]; while (i < Candidate[round][1]) { c = text[i++]; CurrentByteOffset ++; if(c == r_newline) { for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 ); } r1 = Mask[c]; R1[0] = (R2[0] >> 1) | r1; for(k=1; k<=D; k++) R1[k] = ((R2[k] >> 1) | r1) & R2[k-1] & ((R1[k-1] & R2[k-1]) >> 1); if((R1[D] & endpos) == 0) { num_of_matched++; if(FILENAMEONLY) return 0; currentpos = i; if(i <= lastend) { CurrentByteOffset += lastend - i; i = lastend; } else { int oldcurrentpos = currentpos; if (-1 == s_output(text, ¤tpos, textbegin, textend, &lastout, pat, M, oldpat, oldM)) return -1; CurrentByteOffset += currentpos - oldcurrentpos; i = currentpos; } lastend = i; for(k=0; k<=D; k++) R1[k] = R2[k] = ~0; if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */ } /* copying the code to save a few instructions. you need to understand the shift-or algorithm to figure this one... */ c = text[i++]; CurrentByteOffset ++; if(c == r_newline) { for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 ); } r1 = Mask[c]; R2[0] = (R1[0] >> 1) | r1; for(k = 1; k <= D; k++) R2[k] = ((R1[k] >> 1) | r1) & R1[k-1] & ((R1[k-1] & R2[k-1]) >> 1); if((R2[D] & endpos) == 0) { currentpos = i; num_of_matched++; if(FILENAMEONLY) return 0; if(i <= lastend) { CurrentByteOffset += lastend - i; i = lastend; } else { int oldcurrentpos = currentpos; if (-1 == s_output(text, ¤tpos, textbegin, textend, &lastout, pat, M, oldpat, oldM)) return -1; CurrentByteOffset += currentpos - oldcurrentpos; i = currentpos; } lastend = i; for(k=0; k<=D; k++) R1[k] = R2[k] = ~0; if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */ } } } if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp); else { if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1); agrep_outpointer += (textend - lastout + 1); } } /* TCOMPRESSED */ } return 0;}/* Don't update CurrentByteOffset here: done by caller */ints_output(text, i, textbegin, textend, lastout, pat, m, oldpat, oldm) int *i; /* in, out */int m, oldm; CHARTYPE *text, *textbegin, *textend, *pat, *oldpat;CHARTYPE **lastout; /* in, out */{ int PRINTED = 0; int newlen; int oldi;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -