📄 sgrep.c
字号:
agrep_outpointer += outindex;
}
NEW_FILE = OFF;
PRINTED = 1;
}
if(BYTECOUNT) {
if (agrep_finalfp != NULL)
fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);
else {
char s[32];
int outindex;
sprintf(s, "%d=", CurrentByteOffset);
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
(s[outindex] != '\0'); outindex++) {
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
}
if (s[outindex] != '\0') {
OUTPUT_OVERFLOW;
return -1;
}
agrep_outpointer += outindex;
}
PRINTED = 1;
}
if (PRINTOFFSET) {
if (agrep_finalfp != NULL)
fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
else {
char s[32];
int outindex;
sprintf(s, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
(s[outindex] != '\0'); outindex ++) {
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
}
if (s[outindex] != '\0') {
OUTPUT_OVERFLOW;
return -1;
}
agrep_outpointer += outindex;
}
PRINTED = 1;
}
CurrentByteOffset += textbegin - text;
text = textbegin;
if (PRINTRECORD) {
if (TCOMPRESSED == ON) {
#if MEASURE_TIMES
gettimeofday(&initt, NULL);
#endif /*MEASURE_TIMES*/
if (agrep_finalfp != NULL)
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);
else {
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
if (agrep_outpointer + newlen + 1 >= agrep_outlen) {
OUTPUT_OVERFLOW;
return -1;
}
agrep_outpointer += newlen;
}
}
#if MEASURE_TIMES
gettimeofday(&finalt, NULL);
OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
#endif /*MEASURE_TIMES*/
}
else {
if (agrep_finalfp != NULL) {
fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);
}
else {
if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {
OUTPUT_OVERFLOW;
return -1;
}
memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin);
agrep_outpointer += curtextend - curtextbegin;
}
}
}
else if (PRINTED) {
if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);
else agrep_outbuffer[agrep_outpointer ++] = '\n';
PRINTED = 0;
}
}
else { /* INVERSE */
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
if (agrep_finalfp != NULL)
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);
else {
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
if (newlen + agrep_outpointer >= agrep_outlen) {
OUTPUT_OVERFLOW;
return -1;
}
agrep_outpointer += newlen;
}
}
lastout=textbegin;
CurrentByteOffset += textbegin - text;
text = textbegin;
}
else { /* NOT TCOMPRESSED */
if (agrep_finalfp != NULL)
fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);
else {
if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {
OUTPUT_OVERFLOW;
return -1;
}
memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);
agrep_outpointer += (curtextbegin - lastout);
}
lastout=textbegin;
CurrentByteOffset += textbegin - text;
text = textbegin;
} /* TCOMPRESSED */
} /* INVERSE */
}
else { /* COUNT */
CurrentByteOffset += textbegin - text;
text = textbegin;
}
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */
/* CONT: was here TG */
/* shift = 1; [del] [TG] */
shift = SHIFT[*text]; /* [new] [TG] */
CONT: /* now here to restart the skip-loop */
#ifdef _WIN32
;
#endif
}
else shift = d1;
}
if (INVERSE && !COUNT && (lastout <= textend)) {
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
if (agrep_finalfp != NULL)
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
else {
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
if (newlen + agrep_outpointer >= agrep_outlen) {
OUTPUT_OVERFLOW;
return -1;
}
agrep_outpointer += newlen;
}
}
}
else { /* NOT TCOMPRESSED */
if (agrep_finalfp != NULL)
fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp);
else {
if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {
OUTPUT_OVERFLOW;
return -1;
}
memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1);
agrep_outpointer += (textend - lastout + 1);
}
} /* TCOMPRESSED */
}
return 0;
}
/* initmask() initializes the mask table for the pattern */
/* endposition is a mask for the endposition of the pattern */
/* endposition will contain k mask bits if the pattern contains k fragments */
static void initmask(pattern, Mask, m, D, endposition)
CHARTYPE *pattern;
unsigned *Mask;
register int m, D;
unsigned *endposition;
{
register unsigned Bit1, c;
register int i, j, frag_num;
/* Bit1 = 1 << 31;*/ /* the first bit of Bit1 is 1, others 0. */
Bit1 = (unsigned)0x80000000;
frag_num = D+1;
*endposition = 0;
for (i = 0; i < frag_num; i++) *endposition = *endposition | (Bit1 >> i);
*endposition = *endposition >> (m - frag_num);
for(i = 0; i < m; i++)
if (pattern[i] == '^' || pattern[i] == '$') {
pattern[i] = '\n';
}
for(i = 0; i < MAXSYM; i++) Mask[i] = ~0;
for(i = 0; i < m; i++) /* initialize the mask table */
{
c = pattern[i];
for ( j = 0; j < m; j++)
if( c == pattern[j] )
Mask[c] = Mask[c] & ~( Bit1 >> j ) ;
}
}
static void
prep(Pattern, M, D) /* preprocessing for partitioning_bm */
CHARTYPE *Pattern; /* can be fine-tuned to choose a better partition */
register int M, D;
{
register int i, j, k, p, shift;
register unsigned m;
unsigned hash, b_size = 3;
m = M/(D+1);
p = M - m*(D+1);
for (i = 0; i < MAXSYM; i++) SHIFT[i] = m;
for (i = M-1; i>=p ; i--) {
shift = (M-1-i)%m;
hash = Pattern[i];
if((int)(SHIFT[hash]) > (int)(shift)) SHIFT[hash] = shift;
}
#ifdef DEBUG
for(i=0; i<M; i++) printf(" %d,", SHIFT[Pattern[i]]);
printf("\n");
#endif
shift_1 = m;
for(i=0; i<D+1; i++) {
j = M-1 - m*i;
for(k=1; k<m; k++) {
for(p=0; p<D+1; p++)
if(Pattern[j-k] == Pattern[M-1-m*p])
if(k < shift_1) shift_1 = k;
}
}
#ifdef DEBUG
printf("\nshift_1 = %d", shift_1);
#endif
if(shift_1 == 0) shift_1 = 1;
for(i=0; i<MAXMEMBER; i++) MEMBER[i] = 0;
if (m < 3) b_size = m;
for(i=0; i<D+1; i++) {
j = M-1 - m*i;
hash = 0;
for(k=0; k<b_size; k++) {
hash = (hash << 2) + Pattern[j-k];
}
#ifdef DEBUG
printf(" hash = %d,", hash);
#endif
MEMBER[hash] = 1;
}
}
int
agrep( pat, M, text, textend, D, oldpat, oldM)
int M, D, oldM;
register CHARTYPE *text, *textend, *pat, *oldpat;
{
register int i;
register int m = M/(D+1);
register CHARTYPE *textbegin;
CHARTYPE *textstart;
register int shift, HASH;
int j=0, k, d1;
int n, cdx;
int Candidate[MaxCan][2], round, lastend=0;
unsigned R1[MaxError+1], R2[MaxError+1];
register unsigned int r1, endpos, c;
unsigned currentpos;
unsigned Bit1;
unsigned r_newline;
int oldbyteoffset;
CHARTYPE *lastout = text;
int newlen;
Candidate[0][0] = Candidate[0][1] = 0;
d1 = shift_1;
cdx = 0;
if(m < 3) r1 = m;
else r1 = 3;
textbegin = text;
shift = m-1;
while (text < textend) {
textstart = text;
shift = SHIFT[*(text += shift)];
while(shift) {
shift = SHIFT[*(text += shift)];
shift = SHIFT[*(text += shift)];
}
CurrentByteOffset += text - textstart;
j = 1;
HASH = *text;
while(j < r1) {
HASH = (HASH << 2) + *(text-j);
j++;
}
if (MEMBER[HASH]) {
i = text - textbegin;
if((i - M - D - 10) > Candidate[cdx][1]) {
Candidate[++cdx][0] = i-M-D-2;
Candidate[cdx][1] = i+M+D;
}
else Candidate[cdx][1] = i+M+D;
shift = d1;
}
else shift = d1;
}
CurrentByteOffset += (textbegin - text);
text = textbegin;
n = textend - textbegin;
r_newline = '\n';
/* for those candidate areas, find the D-error matches */
if(Candidate[1][0] < 0) Candidate[1][0] = 0;
endpos = endposition; /* the mask table and the endposition */
/* Bit1 = (1 << 31); */
Bit1 = (unsigned)0x80000000;
oldbyteoffset = CurrentByteOffset;
for(round = 0; round <= cdx; round++)
{
i = Candidate[round][0] ;
if(Candidate[round][1] > n) Candidate[round][1] = n;
if(i < 0) i = 0;
CurrentByteOffset = oldbyteoffset+i;
R1[0] = R2[0] = ~0;
R1[1] = R2[1] = ~Bit1;
for(k = 1; k <= D; k++) R1[k] = R2[k] = (R1[k-1] >> 1) & R1[k-1];
while (i < Candidate[round][1])
{
c = text[i++];
CurrentByteOffset ++;
if(c == r_newline) {
for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 );
}
r1 = Mask[c];
R1[0] = (R2[0] >> 1) | r1;
for(k=1; k<=D; k++)
R1[k] = ((R2[k] >> 1) | r1) & R2[k-1] & ((R1[k-1] & R2[k-1]) >> 1);
if((R1[D] & endpos) == 0) {
num_of_matched++;
if(FILENAMEONLY) return 0;
currentpos = i;
if(i <= lastend) {
CurrentByteOffset += lastend - i;
i = lastend;
}
else {
int oldcurrentpos = currentpos;
if (-1 == s_output(text, ¤tpos, textbegin, textend, &lastout, pat, M, oldpat, oldM)) return -1;
CurrentByteOffset += currentpos - oldcurrentpos;
i = currentpos;
}
lastend = i;
for(k=0; k<=D; k++) R1[k] = R2[k] = ~0;
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */
}
/* copying the code to save a few instructions.
you need to understand the shift-or algorithm
to figure this one... */
c = text[i++];
CurrentByteOffset ++;
if(c == r_newline) {
for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 );
}
r1 = Mask[c];
R2[0] = (R1[0] >> 1) | r1;
for(k = 1; k <= D; k++)
R2[k] = ((R1[k] >> 1) | r1) & R1[k-1] & ((R1[k-1] & R2[k-1]) >> 1);
if((R2[D] & endpos) == 0) {
currentpos = i;
num_of_matched++;
if(FILENAMEONLY) return 0;
if(i <= lastend) {
CurrentByteOffset += lastend - i;
i = lastend;
}
else {
int oldcurrentpos = currentpos;
if (-1 == s_output(text, ¤tpos, textbegin, textend, &lastout, pat, M, oldpat, oldM)) return -1;
CurrentByteOffset += currentpos - oldcurrentpos;
i = currentpos;
}
lastend = i;
for(k=0; k<=D; k++) R1[k] = R2[k] = ~0;
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */
}
}
}
if (INVERSE && !COUNT && (lastout <= textend)) {
if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
if (agrep_finalfp != NULL)
newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
else {
if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
if (newlen + agrep_outpointer >= agrep_outlen) {
OUTPUT_OVERFLOW;
return -1;
}
agrep_outpointer += newlen;
}
}
}
else { /* NOT TCOMPRESSED */
if (agrep_finalfp != NULL)
fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp);
else {
if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -