⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 agrep.c

📁 Mehldau和Myer的Agrep3版本
💻 C
📖 第 1 页 / 共 5 页
字号:
/*
 	AGREP	APPROXIMATE PATTERN - GREP.
	
	Copyright (c) 1994-1997 Sun Wu, Udi Manber, Burra Gopal, Tom Gries (for OS/2)
	All Rights Reserved.

[fix]	3.35	TG 11.12.97	agrep.c: -f function did not work because of a simple
				problem in the commandline parser: the patternfile preprocessing
				prepf() was done before definition of the codepage. Oops.
[fix]	3.34			newmgrep.c: input buffer was used before its start address
[chg]	3.33	TG 02.03.97	when no target filename(s) were given:
				AGREP displays an error message now
				instead of reading from stdin.
				Solves the following problem:
				When one uses "AGREP <needle> *" and there are
				no files in that subdirectory,
				the 3.32 has waited for stdin (=haystack, target).
[new]				verbose option -V5 dumps codepage table
[chg]				compiled with emx 0.9c

[chg]		TG 18.02.97     new homepage, see AGREPHLP.C.
				I keep the same version number.

[chg]   3.32    TG 15.01.97     new links, helppage revised
[new]	3.31	TG 16.12.96	new option -i0
[fix]	3.29	TG 14.10.96	see version.h
[new]	3.21	TG 07.10.96	Verbose option -V
[new]	3.20	TG 06.10.96	multi-codepage support
				now implemented CP437 and CP850
[new]	3.19	TG 05.10.96	environment variable AGREPOPTS
[new]	3.14	TG 25.09.96	new option -i#:
				let letters match letters, digits match digits,
				others match others
[new]	3.12			dynamic metasymbol assignment (in preparation)
[chg]				returncode = total number of hits
[new]	3.11	TG 23.09.96	option -ia maps all ISO characters to nearest ASCII
[chg]	3.10	TG 22.09.96	handling and check for metasymbols in searchstring
[fix]	3.09	TG 22.09.96	BITAP.C: type CHAR instead of type char
[new]				AGREP.C: Grand Total
[fix]	3.08 	TG 16.09.96
	
		- A dummy printf("") statement is now used to avoid the
		  following memory(?) problem:

		  now and then, when using the delimiter option and
		  on big files and piping AGREP's output to a file..
		  
		  Example: AGREP -d "/AN " -i "next;block" infile > outfile
		  
		  ..AGREP has crashed with SYS3175 (access violation).
		  

		- provisonally un-commenting the diagnostic error message
		  for OUTPUT_OVERFLOW (in AGRPEP.H)

[chg]   3.07 [TG]
	
		- improved performance for CP850 upper to lower translation
		  (now using a look-up table)
		  
		- improved help screens
		
[chg]	3.04 R.M. Thomas [MT] & Th. Gries [TG] July/August 1996:
	
		- conditional compiling using the __EMX__ flag
		- EMX: use of _wildcard() function to expand wildcards
		- using constants AGREP_VERSION, AGREP_DATE in agrep.h

[ini]	3.x bgopal: (1993-1994)
 
 		- Added a library interface and removed some bugs.
 	   	- Also selectively modified many routines to work
		  with our text-compression algo.
*/

/* definition of an environmentvariable which value is prepended to
   to value of the actual command line.		[TG]
   
   Example:
   SET AGREPOPTS=-i	to let AGREP search case-insensitive
*/

unsigned char metasymb[16];			/* we define 16 metasymbols */
#include "agrep.h"
#include "version.h"
#include "codepage.h"
#include "checkfil.h"

char	AGREPOPT_STR[MAX_LINE_LEN];

extern char	CODEPAGE_STR[MAX_LINE_LEN];	/* holds the selected codepage identifier */
extern int	CODEPAGE;			/* corresponding number */
extern char	CP_MAPPING;			/* -i option = case(in)sensitive search ?
						   is one of 'i', 'a', '#' or 0	*/

extern struct CODEPAGE_struct CP[CODEPAGES][CPSIZE];
extern unsigned char LUT[256];

#define PRINT(s)
#ifndef __BORLANDC__
extern char **environ;
extern int errno;
#endif
int pattern_index;		/* index in argv where the pattern is */

int glimpse_isserver=0;		/* so that there is no user interaction */
int glimpse_call = 0;		/* So that usage message is not printed twice */
int glimpse_clientdied=0;	/* to quit search if glimpseserver's client dies */

int  agrep_initialfd;		/* Where does input come from? File/Memory? */
CHAR *agrep_inbuffer;
int  agrep_inlen;
int  agrep_inpointer;

FILE *agrep_finalfp;		/* Where does output go to? File/Memory? */
CHAR *agrep_outbuffer;
int  agrep_outlen;
int  agrep_outpointer;

int  execfd;	   /* used by exec called within agrep_search, set in agrep_init */
int  multifd = -1; /* fd for multipattern search used in ^^ , set in   ^^^^^^^^ */
extern char *pat_spool;

#if	DOTCOMPRESSED
extern char *tc_pat_spool;
#endif	/* DOTCOMPRESSED */

char *multibuf=NULL; 	/* buffer to put the multiple patterns in */
int  multilen = 0; 	/* length of the multibuf: not the #of multi-patterns! */

extern int pos_cnt;	/* to re-initialize it to 0 for reg-exp search */
unsigned Mask[MAXSYM];
unsigned Init1, NO_ERR_MASK, Init[MaxError];
unsigned Bit[WORD+1];
CHAR buffer[BlockSize+Maxline+1];	/* should not be used anywhere: 10/18/93 */
unsigned Next[MaxNext], Next1[MaxNext];
unsigned wildmask, endposition, D_endpos; 
int  LIMITOUTPUT;	/* maximum number of matches we are going to allow */
int  LIMITPERFILE;	/* maximum number of matches per file we are going to allow */
int  LIMITTOTALFILE;	/* maximum number of files we are going to allow */
int  EXITONERROR;	/* return -1 or exit on error? */
int  REGEX, FASTREGEX, RE_ERR, FNAME, WHOLELINE, SIMPLEPATTERN;
int  COUNT, HEAD, TAIL, LINENUM, INVERSE, I, S, DD, AND, SGREP, JUMP; 
int  NOOUTPUTZERO;
int  Num_Pat, PSIZE, prev_num_of_matched, num_of_matched, files_matched;
int  SILENT, NOPROMPT, BESTMATCH, NOUPPER, ISO2ASCII;
int  NOMATCH, TRUNCATE, FIRST_IN_RE, FIRSTOUTPUT;
int  WORDBOUND, DELIMITER, D_length, tc_D_length, original_D_length;
int  EATFIRST, OUTTAIL;
int  BYTECOUNT;
int  PRINTOFFSET;
int  PRINTRECORD;
int  VERBOSE=1;			/* Verbose default: AGREP shows the Grand Total */
int  FILEOUT;
int  DNA;
int  APPROX;

int  PAT_FILE;			/* multiple patterns from a given file */
char PAT_FILE_NAME[MAX_LINE_LEN];
int  PAT_BUFFER;		/* multiple patterns from a given buffer */
int  CONSTANT;
int  RECURSIVE;

int  total_line;	/* used in mgrep */
int  D;
int  M;
int  TCOMPRESSED;
int  EASYSEARCH;	/* 1 used only for compressed files: LITTLE/BIG */
int  ALWAYSFILENAME = OFF;
int  POST_FILTER = OFF;
int  NEW_FILE = OFF;	/* only when post-filter is used */
int  PRINTFILENUMBER = OFF;
int  PRINTPATTERN = OFF;
int  MULTI_OUTPUT = OFF; /* should mgrep print the matched line multiple times for each matched pattern or just once? */
/* invisible to the user, used only by glimpse: cannot use -l since it is incompatible with stdin and -A is used for the index search (done next) */

/* Stuff to handle complicated boolean patterns */

int  AComplexBoolean = 0;
ParseTree *AParse = NULL;
int anum_terminals = 0;
ParseTree aterminals[MAXNUM_PAT];
char amatched_terminals[MAXNUM_PAT];

#if	MEASURE_TIMES
					/* timing variables */
int OUTFILTER_ms;
int FILTERALGO_ms;
int INFILTER_ms;
#endif					/*MEASURE_TIMES*/

CHAR **Textfiles = NULL;		/* array of filenames to be searched */
int Numfiles = 0;			/* indicates how many files in Textfiles */
int copied_from_argv = 0;		/* were filenames copied from argv (should I free 'em)? */
CHAR old_D_pat[MaxDelimit * 2] = "\n";  /* to hold original D_pattern */
CHAR original_old_D_pat[MaxDelimit * 2] = "\n";
CHAR Pattern[MAXPAT], OldPattern[MAXPAT];
CHAR CurrentFileName[MAX_LINE_LEN];
int SetCurrentFileName = 0;		/* dirty glimpse trick to make filters work: output seems to come from another file */
int CurrentByteOffset;
int SetCurrentByteOffset = 0;
CHAR Progname[MAXNAME]; 

/* string which delimits records -- defaults to newline */
CHAR D_pattern[MaxDelimit * 2] = "\n; ";

CHAR tc_D_pattern[MaxDelimit * 2] = "\n";
CHAR original_D_pattern[MaxDelimit * 2] = "\n; ";
char COMP_DIR[MAX_LINE_LEN];
char FREQ_FILE[MAX_LINE_LEN], HASH_FILE[MAX_LINE_LEN], STRING_FILE[MAX_LINE_LEN];	/* interfacing with tcompress */

int  NOFILENAME,  			/* Boolean flag, set for -h option */
     FILENAMEONLY;			/* Boolean flag, set for -l option */
     
extern int init();
int table[WORD][WORD];
CHAR *agrep_saved_pattern = NULL;	/* to prevent multiple prepfs for each boolean search: crd@hplb.hpl.hp.com */

#ifdef _WIN32
#include <direct.h>
#include <io.h>
int  agrep_usage();       /* agrep.c */
int  exec();              /* agrep.c */
int  exponen();           /* agrep.c */
int  r_output();          /* agrep.c */
int  file_out();          /* agrep.c */
void agrep_online_help(); /* agrephlp.c */
int  bitap();             /* bitap.c */
int  fill_buf();          /* bitap.c */
int  check_file();        /* checkfil.c */
int  checksg();           /* checksg.c */
int  get_current_codepage(); /* codepage.c */
int  compat();            /* compat.c */
int  maskgen();           /* maskgen.c */
int  mgrep();             /* newmgrep.c */
int  prepf();             /* newmgrep.c */
int  preprocess();        /* preproce.c */
void destroy_tree();      /* putils.c */
int  recursive();         /* recursiv.c */
int  sgrep();             /* sgrep.c */
int  initialize_common(); /* dummyfil.c */
int  tuncompressible_filename();   /* dummyfil.c */
int  quick_tcompress();   /* dummyfil.c */
#endif

/* Called when multipattern search and pattern has not changed */

void reinit_value_partial()
{
	num_of_matched = prev_num_of_matched = 0;
	errno = 0;
	FIRST_IN_RE = ON;
}

/* This must be called before every agrep_search to reset agrep globals */
void reinit_value()
{
        int i, j;

	/* Added on 7th Oct 1994 */
	
	if (AParse) {
		if (AComplexBoolean) destroy_tree(AParse);
		AComplexBoolean = 0;
		AParse = 0;
		PAT_BUFFER = 0;
		if (multibuf != NULL) free(multibuf);	/* this was allocated for arbit booleans, not multipattern search */
		multibuf = NULL;
		multilen = 0;
		/* Cannot free multifd here since that is always allocated for multipattern search */
	}
	for (i=0; i<anum_terminals; i++) {
		free(aterminals[i].data.leaf.value);
		memset(&aterminals[i], '\0', sizeof(ParseTree));
	}
	anum_terminals = 0;

        Bit[WORD] = 1;
        for (i = WORD - 1; i > 0  ; i--)  Bit[i] = Bit[i+1] << 1;
        for (i=0; i< MAXSYM; i++) Mask[i] = 0;

        /* bg: new things added on Mar 13 94 */
	
        Init1 = 0;
        NO_ERR_MASK = 0;
        memset(Init, '\0', MaxError * sizeof(unsigned));
        memset(Next, '\0', MaxNext * sizeof(unsigned));
        memset(Next1, '\0', MaxNext * sizeof(unsigned));
        wildmask = endposition = D_endpos = 0;
        for (i=0; i<WORD; i++)
                for (j=0; j<WORD; j++)
                        table[i][j] = 0;

        strcpy(D_pattern, original_D_pattern);
        D_length = original_D_length;
        strcpy(old_D_pat, original_old_D_pat);

	/* Changed on Dec 26th: bg */
	FASTREGEX = REGEX = 0;
	HEAD = TAIL = ON;	/* were off initially */
	RE_ERR = 0;
	AND = 0;
	M = 0;
	pos_cnt = 0;	/* added 31 Jan 95 */

	reinit_value_partial();
}

/* This must be called before every agrep_init to reset agrep options */

void initial_value()
{
	SetCurrentFileName = 0;	/* 16/9/94 */
	SetCurrentByteOffset = 0;	/* 23/9/94 */

	/* courtesy: crd@hplb.hpl.hp.com */
	
	if (agrep_saved_pattern) {
		free(agrep_saved_pattern);
		agrep_saved_pattern= NULL;
	}
	
	/* bg: new stuff on 17/Feb/94 */
	
	if (multifd != -1) close(multifd);
	multifd = -1;
	if (multibuf != NULL) free(multibuf);
	multibuf = NULL;
	multilen = 0;
	if (pat_spool != NULL) free(pat_spool);
	pat_spool = NULL;

#if	DOTCOMPRESSED
	if (tc_pat_spool != NULL) free(tc_pat_spool);
	tc_pat_spool = NULL;
#endif	/* DOTCOMPRESSED */

	LIMITOUTPUT = 0;	/* means infinity = current semantics */
	LIMITPERFILE = 0;	/* means infinity = current semantics */
	LIMITTOTALFILE = 0;	/* means infinity = current semantics */
	EASYSEARCH = 1;
	DNA = APPROX = PAT_FILE = PAT_BUFFER = CONSTANT = total_line = D = TCOMPRESSED = 0;
	PAT_FILE_NAME[0] = '\0';
	EXITONERROR = NOFILENAME = FILENAMEONLY = FILEOUT = ALWAYSFILENAME = NEW_FILE = POST_FILTER = 0;

        original_old_D_pat[0] = old_D_pat[0] = '\n';
        original_old_D_pat[1] = old_D_pat[1] = '\0';
        original_D_pattern[0] = D_pattern[0] = '\n';
        original_D_pattern[1] = D_pattern[1] = ';';
        original_D_pattern[2] = D_pattern[2] = ' ';
        original_D_pattern[3] = D_pattern[3] = '\0';

	strcpy(tc_D_pattern, "\n");
	tc_D_length = 1;

	/* the functions agrep_init and agrep_search take care of Textfiles and Numfiles */
	agrep_inpointer = 0;
	agrep_outpointer = 0;
	agrep_outlen = 0;

#if	MEASURE_TIMES
	OUTFILTER_ms = FILTERALGO_ms = INFILTER_ms = 0;
#endif	/*MEASURE_TIMES*/

	MULTI_OUTPUT = 0;
	PRINTPATTERN = 0;
	PRINTFILENUMBER = 0;
	JUMP = FNAME = BESTMATCH = NOPROMPT = NOUPPER = ISO2ASCII = 0 ;
	RECURSIVE = 0;
	COUNT = LINENUM = WHOLELINE = SGREP = 0;
	NOOUTPUTZERO = 0;
	EATFIRST = INVERSE = TRUNCATE = OUTTAIL = 0; 
	NOMATCH = FIRSTOUTPUT = ON;	/* were off initally */
	I = DD = S = 1;	/* were off initially */
	original_D_length = D_length = 2;	/* was 0 initially */
	SILENT = Num_Pat = PSIZE = SIMPLEPATTERN = prev_num_of_matched = num_of_matched = files_matched = 0;
	WORDBOUND = DELIMITER = 0;

	COMP_DIR[0] = '\0';
	FREQ_FILE[0] = '\0';
	HASH_FILE[0] = '\0';
	STRING_FILE[0] = '\0';
	BYTECOUNT = OFF;
	PRINTOFFSET = OFF;
	PRINTRECORD = ON;

	glimpse_clientdied = 0;	/* added 15th Feb 95 */

	/* Pattern, OldPattern, execfd, Numfiles are set in agrep_init: so no need to initialize */
	reinit_value();
}

void compute_next(M, Next, Next1)
int M; 
unsigned *Next, *Next1;
{
	int i, j=0, n,  k, temp;
	int mid, pp;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -