📄 newmgrep.c
字号:
* 4. Process other variables/functions (pat_spool, tr, tr1, pat_len, accumulate, SHIFT1, f_prep, f_prep1, pat_indices) appropriately.
*/
int
tc_prepf(buf, length)
unsigned char *buf;
int length;
{
int i, p=1;
uchar *pat_ptr;
unsigned Mask = 31;
int tc_length;
unsigned char tc_buf[MAXPAT * 2]; /* maximum length of the compressed pattern */
static struct timeval initt, finalt;
if (length*2 > MAXPATFILE + 2*max_num) {
fprintf(stderr, "%s: pattern buffer too large (> %d B)\n", Progname, (MAXPATFILE+2*max_num)/2);
return -1;
}
if (tc_pat_spool != NULL) free(tc_pat_spool);
pat_ptr = tc_pat_spool = (unsigned char *)malloc(length*2 + MAXPAT);
#if MEASURE_TIMES
gettimeofday(&initt, NULL);
#endif /*MEASURE_TIMES*/
i=0; p=1;
while(i < length) {
tc_patt[p] = pat_ptr;
while((*pat_ptr = buf[i++]) != '\n') pat_ptr++;
*pat_ptr++ = 0;
if ((tc_length = quick_tcompress(FREQ_FILE, HASH_FILE, tc_patt[p], strlen(tc_patt[p]), tc_buf, MAXPAT * 2 - 8, TC_EASYSEARCH)) > 0) {
memcpy(tc_patt[p], tc_buf, tc_length);
tc_patt[p][tc_length] = '\0';
pat_ptr = tc_patt[p] + tc_length + 1; /* character after '\0' */
}
p++;
}
for(i=1; i<20; i++) *pat_ptr = i; /* boundary safety zone */
/* Ignore all other options: it is automatically W_DELIM */
for(i=0; i< MAXSYM; i++) tc_tr[i] = i;
for(i=0; i< MAXSYM; i++) tc_tr1[i] = tc_tr[i]&Mask;
tc_num_pat = p-1;
tc_p_size = MAXPAT;
for(i=1; i<=num_pat; i++) {
p = strlen(tc_patt[i]);
tc_pat_len[i] = p;
#ifdef debug
printf("prepf(): tc_patt[%d]=%s, tc_pat_len[%d]=%d\n", i, tc_patt[i], i, tc_pat_len[i]);
#endif
if(p!=0 && p < tc_p_size) tc_p_size = p; /* MIN */
}
if(tc_p_size == 0) { /* cannot happen NOW */
fprintf(stderr, "%s: the pattern file is empty\n", Progname);
if (!EXITONERROR) {
errno = AGREP_ERROR;
return -1;
}
else exit(2);
}
if(length > 400 && tc_p_size > 2) tc_LONG = 1;
if(tc_p_size == 1) tc_SHORT = 1;
for(i=0; i<MAXMEMBER1; i++) tc_SHIFT1[i] = tc_p_size - 1 - LONG;
for(i=0; i<MAXHASH; i++) {
tc_HASH[i] = 0;
}
for(i=1; i<=tc_num_pat; i++) tc_f_prep(i, tc_patt[i]);
tc_accumulate();
memset(tc_pat_indices, '\0', sizeof(int) * (tc_num_pat + 1));
for(i=1; i<=tc_num_pat; i++) tc_f_prep1(i, tc_patt[i]);
#if MEASURE_TIMES
gettimeofday(&finalt, NULL);
INFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
#endif /*MEASURE_TIMES*/
return 0;
}
#endif /*DOTCOMPRESSED*/
int
mgrep(fd)
int fd;
{
register char r_newline = '\n';
unsigned char *text;
register int buf_end, num_read, start, end, residue = 0;
int oldCurrentByteOffset;
int first_time = 1;
#if AGREP_POINTER
if (fd != -1) {
#endif /*AGREP_POINTER*/
alloc_buf(fd, &text, 2*BLOCKSIZE+MAXLINE);
text[MAXLINE-1] = '\n'; /* initial case */
start = MAXLINE;
while( (num_read = fill_buf(fd, text+MAXLINE, 2*BLOCKSIZE)) > 0)
{
buf_end = end = MAXLINE + num_read -1 ;
oldCurrentByteOffset = CurrentByteOffset;
if (first_time) {
if ((TCOMPRESSED == ON) && tuncompressible(text+MAXLINE, num_read)) {
EASYSEARCH = text[MAXLINE+SIGNATURE_LEN-1];
start += SIGNATURE_LEN;
CurrentByteOffset += SIGNATURE_LEN;
if (!EASYSEARCH) {
fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName);
}
}
else TCOMPRESSED = OFF;
first_time = 0;
}
if (!DELIMITER) {
while(text[end] != r_newline && end > MAXLINE) end--;
text[start-1] = r_newline;
}
else {
unsigned char *newbuf = text + end + 1;
newbuf = backward_delimiter(newbuf, text+MAXLINE, D_pattern, D_length, OUTTAIL); /* see agrep.c/'d' */
if (newbuf < text+MAXLINE+D_length) newbuf = text + end + 1;
end = newbuf - text - 1;
/* TG 22.10.97 Check bounds before memcpy-ing */
/* printf("text %x start %i D_length %i D_pattern %i residue %i\n",text,start,D_length,D_pattern,residue); */
if (start > D_length) memcpy(text+start-D_length, D_pattern, D_length);
memcpy(text+start+residue, D_pattern, D_length);
/* original code was: memcpy(text+start-D_length, D_pattern, D_length); */
}
residue = buf_end - end + 1 ;
if(INVERSE && COUNT) countline(text+MAXLINE, num_read);
/* MGREP_PROCESS */
if (TCOMPRESSED) { /* separate functions since separate globals => too many if-statements within a single function makes it slow */
#if DOTCOMPRESSED
if(tc_SHORT) { if (-1 == tc_m_short(text, start, end)) {free_buf(fd, text); return -1;}}
else { if (-1 == tc_monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
#endif /*DOTCOMPRESSED*/
}
else {
if(SHORT) { if (-1 == m_short(text, start, end)) {free_buf(fd, text); return -1;}}
else { if (-1 == monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
}
if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
if (agrep_finalfp != NULL)
fprintf(agrep_finalfp, "%s\n", CurrentFileName);
else {
int outindex;
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
(CurrentFileName[outindex] != '\0'); outindex++) {
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
}
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
OUTPUT_OVERFLOW;
free_buf(fd, text);
return -1;
}
else {
agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
}
agrep_outpointer += outindex;
}
free_buf(fd, text);
NEW_FILE = OFF;
return 0;
}
CurrentByteOffset = oldCurrentByteOffset + end - start + 1;
start = MAXLINE - residue;
if(start < 0) {
start = 1;
}
strncpy(text+start, text+end, residue);
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
free_buf(fd, text);
return 0; /* done */
}
} /* end of while(num_read = ... */
if (!DELIMITER) {
text[start-1] = '\n';
text[start+residue] = '\n';
}
else {
if (start > D_length) memcpy(text+start-D_length, D_pattern, D_length);
memcpy(text+start+residue, D_pattern, D_length);
}
end = start + residue;
if(residue > 1) {
if (TCOMPRESSED) {
#if DOTCOMPRESSED
if(tc_SHORT) tc_m_short(text, start, end);
else tc_monkey1(text, start, end);
#endif /*DOTCOMPRESSED*/
}
else {
if(SHORT) m_short(text, start, end);
else monkey1(text, start, end);
}
if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
if (agrep_finalfp != NULL)
fprintf(agrep_finalfp, "%s\n", CurrentFileName);
else {
int outindex;
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
(CurrentFileName[outindex] != '\0'); outindex++) {
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
}
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
OUTPUT_OVERFLOW;
free_buf(fd, text);
return -1;
}
else {
agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
}
agrep_outpointer += outindex;
}
free_buf(fd, text);
NEW_FILE = OFF;
return 0;
}
}
free_buf(fd, text);
return (0);
#if AGREP_POINTER
}
else {
text = (unsigned char *)agrep_inbuffer;
num_read = agrep_inlen;
start = 0;
buf_end = end = num_read - 1;
oldCurrentByteOffset = CurrentByteOffset;
if (first_time) {
if ((TCOMPRESSED == ON) && tuncompressible(text+MAXLINE, num_read)) {
EASYSEARCH = text[MAXLINE+SIGNATURE_LEN-1];
start += SIGNATURE_LEN;
CurrentByteOffset += SIGNATURE_LEN;
if (!EASYSEARCH) {
fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName);
}
}
else TCOMPRESSED = OFF;
first_time = 0;
}
if (!DELIMITER)
while(text[end] != r_newline && end > 1) end--;
else {
unsigned char *newbuf = text + end + 1;
newbuf = backward_delimiter(newbuf, text, D_pattern, D_length, OUTTAIL); /* see agrep.c/'d' */
if (newbuf < text+D_length) newbuf = text + end + 1;
end = newbuf - text - 1;
}
/* text[0] = text[end] = r_newline; : the user must ensure that the delimiter is there at text[0] and occurs somewhere before text[end] */
if (INVERSE && COUNT) countline(text, num_read);
/* An exact copy of the above MGREP_PROCESS */
if (TCOMPRESSED) { /* separate functions since separate globals => too many if-statements within a single function makes it slow */
#if DOTCOMPRESSED
if(tc_SHORT) { if (-1 == tc_m_short(text, start, end)) {free_buf(fd, text); return -1;}}
else { if (-1 == tc_monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
#endif /*DOTCOMPRESSED*/
}
else {
if(SHORT) { if (-1 == m_short(text, start, end)) {free_buf(fd, text); return -1;}}
else { if (-1 == monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
}
if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
if (agrep_finalfp != NULL)
fprintf(agrep_finalfp, "%s\n", CurrentFileName);
else {
int outindex;
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
(CurrentFileName[outindex] != '\0'); outindex++) {
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
}
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
OUTPUT_OVERFLOW;
free_buf(fd, text);
return -1;
}
else {
agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
}
agrep_outpointer += outindex;
}
free_buf(fd, text);
NEW_FILE = OFF;
return 0;
}
return 0;
}
#endif /*AGREP_POINTER*/
#ifdef perf_check
fprintf(stderr,"Shifted %d times; shift=0 %d times; hash was = %d times\n",cshift, cshift0, chash);
return 0;
#endif
} /* end mgrep */
static void
countline(text, len)
unsigned char *text; int len;
{
int i;
for (i=0; i<len; i++) if(text[i] == '\n') total_line++;
}
/* Stuff that always needs to be printed whenever there is a match in all functions in this file */
int
print_options(pat_index, text, curtextbegin, curtextend)
int pat_index;
unsigned char *text, *curtextbegin, *curtextend;
{
int PRINTED = 0;
if(FNAME && (NEW_FILE || !POST_FILTER)) {
char nextchar = (POST_FILTER == ON)?'\n':' ';
char *prevstring = (POST_FILTER == ON)?"\n":"";
if (agrep_finalfp != NULL)
fprintf(agrep_finalfp, "%s%s:%c", prevstring, CurrentFileName, nextchar);
else {
int outindex;
if (prevstring[0] != '\0') {
if(agrep_outpointer + 1 >= agrep_outlen) {
OUTPUT_OVERFLOW;
return -1;
}
else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];
}
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
(CurrentFileName[outindex] != '\0'); outindex++) {
agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
}
if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+2>=agrep_outlen)) {
OUTPUT_OVERFLOW;
return -1;
}
else {
agrep_outbuffer[agrep_outpointer+outindex++] = ':';
agrep_outbuffer[agrep_outpointer+outindex++] = nextchar;
}
agrep_outpointer += outindex;
}
NEW_FILE = OFF;
PRINTED = 1;
}
if (PRINTPATTERN) {
if (agrep_finalfp != NULL)
fprintf(agrep_finalfp, "%d- ", pat_index);
else {
char s[32];
int outindex;
sprintf(s, "%d- ", pat_index);
for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
(s[outindex] != '\0'); outindex++) {
agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
}
if (s[outindex] != '\0') {
OUTPUT_OVERFLOW;
return -1;
}
agrep_outpointer += outindex;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -