⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sed.c

📁 GNU Sed GNU Sed GNU Sed
💻 C
📖 第 1 页 / 共 3 页
字号:
/*  GNU SED, a batch stream editor.
    Copyright (C) 1989-1991 Free Software Foundation, Inc.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2, or (at your option)
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */


/* TimF@microsoft.com:	7-Aug-92  Port to Microsoft's Windows NT (tm) */

#ifdef	WINDOWSNT
#include	<errno.h>
#include	<windows.h>
#endif


#ifdef __STDC__
#define VOID void
#else
#define VOID char
#endif

#define _GNU_SOURCE
#include <ctype.h>
#ifndef isblank
#define isblank(c) ((c) == ' ' || (c) == '\t')
#endif
#include <stdio.h>
#include <regex.h>
#include <getopt.h>
#if defined(STDC_HEADERS)
#include <stdlib.h>
#endif
#if defined(USG) || defined(STDC_HEADERS)
#include <string.h>
#include <memory.h>
#define bcopy(s, d, n) (memcpy((d), (s), (n)))
#else
#include <strings.h>
VOID *memchr();
#endif


char *version_string = "GNU sed version 1.08";

/* Struct vector is used to describe a chunk of a sed program.  There is one
   vector for the main program, and one for each { } pair. */
struct vector {
	struct sed_cmd *v;
	int v_length;
	int v_allocated;
	struct vector *up_one;
	struct vector *next_one;
};


/* Goto structure is used to hold both GOTO's and labels.  There are two
   separate lists, one of goto's, called 'jumps', and one of labels, called
   'labels'.
   the V element points to the descriptor for the program-chunk in which the
   goto was encountered.
   the v_index element counts which element of the vector actually IS the
   goto/label.  The first element of the vector is zero.
   the NAME element is the null-terminated name of the label.
   next is the next goto/label in the list. */

struct sed_label {
	struct vector *v;
	int v_index;
	char *name;
	struct sed_label *next;
};

/* ADDR_TYPE is zero for a null address,
   one if addr_number is valid, or
   two if addr_regex is valid,
   three, if the address is '$'

   Other values are undefined.
 */

#define ADDR_NULL	0
#define ADDR_NUM	1
#define ADDR_REGEX	2
#define ADDR_LAST	3
	
struct addr {
	int	addr_type;
	struct re_pattern_buffer *addr_regex;
	int	addr_number;
};


/* Aflags:  If the low order bit is set, a1 has been
   matched; apply this command until a2 matches.
   If the next bit is set, apply this command to all
   lines that DON'T match the address(es).
 */

#define A1_MATCHED_BIT	01
#define ADDR_BANG_BIT	02

 
struct sed_cmd {
	struct addr a1,a2;
	int aflags;

	char cmd;

	union {
		/* This structure is used for a, i, and c commands */
		struct {
			char *text;
			int text_len;
		} cmd_txt;

		/* This is used for b and t commands */
		struct sed_cmd *label;

		/* This for r and w commands */
		FILE *io_file;

		/* This for the hairy s command */
		/* For the flags var:
		   low order bit means the 'g' option was given,
		   next bit means the 'p' option was given,
		   and the next bit means a 'w' option was given,
		      and wio_file contains the file to write to. */

#define S_GLOBAL_BIT	01
#define S_PRINT_BIT	02
#define S_WRITE_BIT	04
#define S_NUM_BIT	010

		struct {
			struct re_pattern_buffer *regx;
			char *replacement;
			int replace_length;
			int flags;
			int numb;
			FILE *wio_file;
		} cmd_regex;

		/* This for the y command */
		unsigned char *translate;

		/* For { and } */
		struct vector *sub;
		struct sed_label *jump;
	} x;
};

/* Sed operates a line at a time. */
struct line {
	char *text;		/* Pointer to line allocated by malloc. */
	int length;		/* Length of text. */
	int alloc;		/* Allocated space for text. */
};

/* This structure holds information about files opend by the 'r', 'w',
   and 's///w' commands.  In paticular, it holds the FILE pointer to
   use, the file's name, a flag that is non-zero if the file is being
   read instead of written. */

#define NUM_FPS	32
struct {
	FILE *phile;
	char *name;
	int readit;
} file_ptrs[NUM_FPS];


#if defined(__STDC__)
# define P_(s) s
#else
# define P_(s) ()
#endif

void panic P_((char *str, ...));
char *__fp_name P_((FILE *fp));
FILE *ck_fopen P_((char *name, char *mode));
void ck_fwrite P_((char *ptr, int size, int nmemb, FILE *stream));
void ck_fclose P_((FILE *stream));
VOID *ck_malloc P_((int size));
VOID *ck_realloc P_((VOID *ptr, int size));
char *ck_strdup P_((char *str));
VOID *init_buffer P_((void));
void flush_buffer P_((VOID *bb));
int size_buffer P_((VOID *b));
void add_buffer P_((VOID *bb, char *p, int n));
void add1_buffer P_((VOID *bb, int ch));
char *get_buffer P_((VOID *bb));

void compile_string P_((char *str));
void compile_file P_((char *str));
struct vector *compile_program P_((struct vector *vector));
void bad_prog P_((char *why));
int inchar P_((void));
void savchar P_((int ch));
int compile_address P_((struct addr *addr));
void compile_regex P_((int slash));
struct sed_label *setup_jump P_((struct sed_label *list, struct sed_cmd *cmd, struct vector *vec));
FILE *compile_filename P_((int readit));
void read_file P_((char *name));
void execute_program P_((struct vector *vec));
int match_address P_((struct addr *addr));
int read_pattern_space P_((void));
void append_pattern_space P_((void));
void line_copy P_((struct line *from, struct line *to));
void line_append P_((struct line *from, struct line *to));
void str_append P_((struct line *to, char *string, int length));
void usage P_((void));

extern char *myname;

/* If set, don't write out the line unless explictly told to */
int no_default_output = 0;

/* Current input line # */
int input_line_number = 0;

/* Are we on the last input file? */
int last_input_file = 0;

/* Have we hit EOF on the last input file?  This is used to decide if we
   have hit the '$' address yet. */
int input_EOF = 0;

/* non-zero if a quit command has been executed. */
int quit_cmd = 0;

/* Have we done any replacements lately?  This is used by the 't' command. */
int replaced = 0;

/* How many '{'s are we executing at the moment */
int program_depth = 0;

/* The complete compiled SED program that we are going to run */
struct vector *the_program = 0;

/* information about labels and jumps-to-labels.  This is used to do
   the required backpatching after we have compiled all the scripts. */
struct sed_label *jumps = 0;
struct sed_label *labels = 0;

/* The 'current' input line. */
struct line line;

/* An input line that's been stored by later use by the program */
struct line hold;

/* A 'line' to append to the current line when it comes time to write it out */
struct line append;


/* When we're reading a script command from a string, 'prog_start' and
   'prog_end' point to the beginning and end of the string.  This
   would allow us to compile script strings that contain nulls, except
   that script strings are only read from the command line, which is
   null-terminated */ 
char *prog_start;
char *prog_end;

/* When we're reading a script command from a string, 'prog_cur' points
   to the current character in the string */
char *prog_cur;

/* This is the name of the current script file.
   It is used for error messages. */
char *prog_name;

/* This is the current script file.  If it is zero, we are reading
   from a string stored in 'prog_start' instead.  If both 'prog_file'
   and 'prog_start' are zero, we're in trouble! */
FILE *prog_file;

/* this is the number of the current script line that we're compiling.  It is
   used to give out useful and informative error messages. */
int prog_line = 1;

/* This is the file pointer that we're currently reading data from.  It may
   be stdin */
FILE *input_file;

/* If this variable is non-zero at exit, one or more of the input
   files couldn't be opened. */

int bad_input = 0;

/* 'an empty regular expression is equivalent to the last regular
   expression read' so we have to keep track of the last regex used.
   Here's where we store a pointer to it (it is only malloc()'d once) */
struct re_pattern_buffer *last_regex;

/* Various error messages we may want to print */
static char ONE_ADDR[] = "Command only uses one address";
static char NO_ADDR[] = "Command doesn't take any addresses";
static char LINE_JUNK[] = "Extra characters after command";
static char BAD_EOF[] = "Unexpected End-of-file";
static char NO_REGEX[] = "No previous regular expression";

static struct option longopts[] =
{
  {"expression", 1, NULL, 'e'},
  {"file", 1, NULL, 'f'},
  {"quiet", 0, NULL, 'n'},
  {"silent", 0, NULL, 'n'},
  {"version", 0, NULL, 'V'},
  {NULL, 0, NULL, 0}
};

/* Yes, the main program, which parses arguments, and does the right
   thing with them; it also inits the temporary storage, etc. */
void
main(argc,argv)
int argc;
char **argv;
{
	int opt;
	char *e_strings = NULL;
	int compiled = 0;
	struct sed_label *go,*lbl;

	myname=argv[0];
	while((opt=getopt_long(argc,argv,"ne:f:V", longopts, (int *) 0))
	      !=EOF) {
		switch(opt) {
		case 'n':
			no_default_output = 1;
			break;
		case 'e':
			if(e_strings == NULL) {
				e_strings=ck_malloc(strlen(optarg)+2);
				strcpy(e_strings,optarg);
			} else {
				e_strings=ck_realloc(e_strings,strlen(e_strings)+strlen(optarg)+2);
				strcat(e_strings,optarg);
			}
			strcat(e_strings,"\n");
			compiled = 1;
			break;
		case 'f':
			compile_file(optarg);
			compiled = 1;
			break;
		case 'V':
			fprintf(stderr, "%s\n", version_string);
			break;
		default:
			usage();
		}
	}
	if(e_strings) {
		compile_string(e_strings);
		free(e_strings);
	}
	if(!compiled) {
		if (optind == argc)
			usage();
		compile_string(argv[optind++]);
	}

	for(go=jumps;go;go=go->next) {
		for(lbl=labels;lbl;lbl=lbl->next)
			if(!strcmp(lbl->name,go->name))
				break;
		if(*go->name && !lbl)
			panic("Can't find label for jump to '%s'",go->name);
		go->v->v[go->v_index].x.jump=lbl;
	}

	line.length=0;
	line.alloc=50;
	line.text=ck_malloc(50);

	append.length=0;
	append.alloc=50;
	append.text=ck_malloc(50);

	hold.length=0;
	hold.alloc=50;
	hold.text=ck_malloc(50);

	if(argc<=optind) {
		last_input_file++;
		read_file("-");
	} else while(optind<argc) {
		if(optind==argc-1)
			last_input_file++;
		read_file(argv[optind]);
		optind++;
		if(quit_cmd)
			break;
	}
	if(bad_input)
		exit(2);
	exit(0);
}

/* 'str' is a string (from the command line) that contains a sed command.
   Compile the command, and add it to the end of 'the_program' */
void
compile_string(str)
char *str;
{
	prog_file = 0;
	prog_line=0;
	prog_start=prog_cur=str;
	prog_end=str+strlen(str);
	the_program=compile_program(the_program);
}

/* 'str' is the name of a file containing sed commands.  Read them in
   and add them to the end of 'the_program' */
void
compile_file(str)
char *str;
{
	int ch;

	prog_start=prog_cur=prog_end=0;
	prog_name=str;
	prog_line=1;
	if(str[0]=='-' && str[1]=='\0')
		prog_file=stdin;
	else
		prog_file=ck_fopen(str,"r");
	ch=getc(prog_file);
	if(ch=='#') {
		ch=getc(prog_file);
		if(ch=='n')
			no_default_output++;
		while(ch!=EOF && ch!='\n')
			ch=getc(prog_file);
	} else if(ch!=EOF)
		ungetc(ch,prog_file);
	the_program=compile_program(the_program);
}

#define MORE_CMDS 40

/* Read a program (or a subprogram within '{' '}' pairs) in and store
   the compiled form in *'vector'  Return a pointer to the new vector.  */
struct vector *
compile_program(vector)
struct vector *vector;
{
	struct sed_cmd *cur_cmd;
	int	ch;
	int	slash;
	VOID	*b;
	unsigned char	*string;
	int	num;

	if(!vector) {
		vector=(struct vector *)ck_malloc(sizeof(struct vector));
		vector->v=(struct sed_cmd *)ck_malloc(MORE_CMDS*sizeof(struct sed_cmd));
		vector->v_allocated=MORE_CMDS;
		vector->v_length=0;
		vector->up_one = 0;
		vector->next_one = 0;
	}
	for(;;) {
	skip_comment:
		do ch=inchar();
		while(ch!=EOF && (isblank(ch) || ch=='\n' || ch==';'));
		if(ch==EOF)
			break;
		savchar(ch);

		if(vector->v_length==vector->v_allocated) {
			vector->v=(struct sed_cmd *)ck_realloc((VOID *)vector->v,(vector->v_length+MORE_CMDS)*sizeof(struct sed_cmd));
			vector->v_allocated+=MORE_CMDS;
		}
		cur_cmd=vector->v+vector->v_length;
		vector->v_length++;

		cur_cmd->a1.addr_type=0;
		cur_cmd->a2.addr_type=0;
		cur_cmd->aflags=0;
		cur_cmd->cmd=0;

		if(compile_address(&(cur_cmd->a1))) {
			ch=inchar();
			if(ch==',') {
				do ch=inchar();
				while(ch!=EOF && isblank(ch));
				savchar(ch);
				if(compile_address(&(cur_cmd->a2)))
					;
				else
					bad_prog("Unexpected ','");
			} else
				savchar(ch);
		}
		ch=inchar();
		if(ch==EOF)
			break;
 new_cmd:
		switch(ch) {
		case '#':
			if(cur_cmd->a1.addr_type!=0)
				bad_prog(NO_ADDR);
			do ch=inchar();
			while(ch!=EOF && ch!='\n');
			vector->v_length--;
			goto skip_comment;
		case '!':
			if(cur_cmd->aflags & ADDR_BANG_BIT)
				bad_prog("Multiple '!'s");
			cur_cmd->aflags|= ADDR_BANG_BIT;
			do ch=inchar();
			while(ch!=EOF && isblank(ch));
			if(ch==EOF)
				bad_prog(BAD_EOF);
#if 0
			savchar(ch);
#endif
			goto new_cmd;
		case 'a':
		case 'i':
			if(cur_cmd->a2.addr_type!=0)
				bad_prog(ONE_ADDR);
			/* Fall Through */
		case 'c':
			cur_cmd->cmd=ch;
			if(inchar()!='\\' || inchar()!='\n')
				bad_prog(LINE_JUNK);
			b=init_buffer();
			while((ch=inchar())!=EOF && ch!='\n') {
				if(ch=='\\')
					ch=inchar();
				add1_buffer(b,ch);
			}
			if(ch!=EOF)
				add1_buffer(b,ch);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -