sed.c

来自「手机嵌入式Linux下可用的busybox源码」· C语言代码 · 共 867 行 · 第 1/2 页
867 行
/* * sed.c - very minimalist version of sed * * Copyright (C) 1999,2000 by Lineo, inc. and Mark Whitley * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>  * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//*	Supported features and commands in this version of sed:	 - comments ('#')	 - address matching: num|/matchstr/[,num|/matchstr/|$]command	 - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)	 - edit commands: (a)ppend, (i)nsert, (c)hange	 - file commands: (r)ead	 - backreferences in substitution expressions (\1, \2...\9)	 	 (Note: Specifying an address (range) to match is *optional*; commands	 default to the whole pattern space if no specific address match was	 requested.)	Unsupported features:	 - transliteration (y/source-chars/dest-chars/) (use 'tr')	 - no pattern space hold space storing / swapping (x, etc.)	 - no labels / branching (: label, b, t, and friends)	 - and lots, lots more.*/#include <stdio.h>#include <unistd.h> /* for getopt() */#include <regex.h>#include <string.h> /* for strdup() */#include <errno.h>#include <ctype.h> /* for isspace() */#include <stdlib.h>#include "busybox.h"/* externs */extern void xregcomp(regex_t *preg, const char *regex, int cflags);extern int optind; /* in unistd.h */extern char *optarg; /* ditto *//* options */static int be_quiet = 0;struct sed_cmd {	/* GENERAL FIELDS */	char delimiter;	    /* The delimiter used to separate regexps */	/* address storage */	int beg_line; /* 'sed 1p'   0 == no begining line, apply commands to all lines */	int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */	regex_t *beg_match; /* sed -e '/match/cmd' */	regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */	/* the command */	char cmd; /* p,d,s (add more at your leisure :-) */	/* SUBSTITUTION COMMAND SPECIFIC FIELDS */	/* sed -e 's/sub_match/replace/' */	regex_t *sub_match;	char *replace;	unsigned int num_backrefs:4; /* how many back references (\1..\9) */			/* Note:  GNU/POSIX sed does not save more than nine backrefs, so			 * we only use 4 bits to hold the number */	unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */	unsigned int sub_p:2; /* sed -e 's/foo/bar/p' (print substitution) */	/* EDIT COMMAND (a,i,c) SPEICIFIC FIELDS */	char *editline;	/* FILE COMMAND (r) SPEICIFIC FIELDS */	char *filename;};/* globals */static struct sed_cmd *sed_cmds = NULL; /* growable arrary holding a sequence of sed cmds */static int ncmds = 0; /* number of sed commands *//*static char *cur_file = NULL;*/ /* file currently being processed XXX: do I need this? */const char * const semicolon_whitespace = "; \n\r\t\v\0";#ifdef BB_FEATURE_CLEAN_UPstatic void destroy_cmd_strs(){	if (sed_cmds == NULL)		return;	/* destroy all the elements in the array */	while (--ncmds >= 0) {		if (sed_cmds[ncmds].beg_match) {			regfree(sed_cmds[ncmds].beg_match);			free(sed_cmds[ncmds].beg_match);		}		if (sed_cmds[ncmds].end_match) {			regfree(sed_cmds[ncmds].end_match);			free(sed_cmds[ncmds].end_match);		}		if (sed_cmds[ncmds].sub_match) {			regfree(sed_cmds[ncmds].sub_match);			free(sed_cmds[ncmds].sub_match);		}		if (sed_cmds[ncmds].replace)			free(sed_cmds[ncmds].replace);	}	/* destroy the array */	free(sed_cmds);	sed_cmds = NULL;}#endif/* * index_of_next_unescaped_regexp_delim - walks left to right through a string * beginning at a specified index and returns the index of the next regular * expression delimiter (typically a forward * slash ('/')) not preceeded by  * a backslash ('\'). */static int index_of_next_unescaped_regexp_delim(struct sed_cmd *sed_cmd, const char *str, int idx){	int bracket = -1;	int escaped = 0;	for ( ; str[idx]; idx++) {		if (bracket != -1) {			if (str[idx] == ']' && !(bracket == idx - 1 ||									 (bracket == idx - 2 && str[idx-1] == '^')))				bracket = -1;		} else if (escaped)			escaped = 0;		else if (str[idx] == '\\')			escaped = 1;		else if (str[idx] == '[')			bracket = idx;		else if (str[idx] == sed_cmd->delimiter)			return idx;	}	/* if we make it to here, we've hit the end of the string */	return -1;}/* * returns the index in the string just past where the address ends. */static int get_address(struct sed_cmd *sed_cmd, const char *str, int *linenum, regex_t **regex){	char *my_str = strdup(str);	int idx = 0;	char olddelimiter;	olddelimiter = sed_cmd->delimiter;	sed_cmd->delimiter = '/';	if (isdigit(my_str[idx])) {		do {			idx++;		} while (isdigit(my_str[idx]));		my_str[idx] = 0;		*linenum = atoi(my_str);	}	else if (my_str[idx] == '$') {		*linenum = -1;		idx++;	}	else if (my_str[idx] == '/') {		idx = index_of_next_unescaped_regexp_delim(sed_cmd, my_str, ++idx);		if (idx == -1)			error_msg_and_die("unterminated match expression");		my_str[idx] = '\0';		*regex = (regex_t *)xmalloc(sizeof(regex_t));		xregcomp(*regex, my_str+1, REG_NEWLINE);		idx++; /* so it points to the next character after the last '/' */	}	else {		error_msg("get_address: no address found in string\n"				"\t(you probably didn't check the string you passed me)");		idx = -1;	}	free(my_str);	sed_cmd->delimiter = olddelimiter;	return idx;}static int parse_subst_cmd(struct sed_cmd * const sed_cmd, const char *substr){	int oldidx, cflags = REG_NEWLINE;	char *match;	int idx = 0;	int j;	/*	 * the string that gets passed to this function should look like this:	 *    s/match/replace/gIp	 *    ||     |        |||	 *    mandatory       optional	 *	 *    (all three of the '/' slashes are mandatory)	 */	/* verify that the 's' is followed by something.  That something	 * (typically a 'slash') is now our regexp delimiter... */	if (!substr[++idx])		error_msg_and_die("bad format in substitution expression");	else	    sed_cmd->delimiter=substr[idx];	/* save the match string */	oldidx = idx+1;	idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx);	if (idx == -1)		error_msg_and_die("bad format in substitution expression");	match = xstrndup(substr + oldidx, idx - oldidx);	/* determine the number of back references in the match string */	/* Note: we compute this here rather than in the do_subst_command()	 * function to save processor time, at the expense of a little more memory	 * (4 bits) per sed_cmd */		/* sed_cmd->num_backrefs = 0; */ /* XXX: not needed? --apparently not */ 	for (j = 0; match[j]; j++) {		/* GNU/POSIX sed does not save more than nine backrefs */		if (match[j] == '\\' && match[j+1] == '(' && sed_cmd->num_backrefs <= 9)			sed_cmd->num_backrefs++;	}	/* save the replacement string */	oldidx = idx+1;	idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx);	if (idx == -1)		error_msg_and_die("bad format in substitution expression");	sed_cmd->replace = xstrndup(substr + oldidx, idx - oldidx);	/* process the flags */	while (substr[++idx]) {		switch (substr[idx]) {			case 'g':				sed_cmd->sub_g = 1;				break;			case 'I':				cflags |= REG_ICASE;				break;			case 'p':				sed_cmd->sub_p = 1;				break;			default:				/* any whitespace or semicolon trailing after a s/// is ok */				if (strchr("; \t\v\n\r", substr[idx]))					goto out;				/* else */				error_msg_and_die("bad option in substitution expression");		}	}out:		/* compile the match string into a regex */	sed_cmd->sub_match = (regex_t *)xmalloc(sizeof(regex_t));	xregcomp(sed_cmd->sub_match, match, cflags);	free(match);	return idx;}#if 0static void move_back(char *str, int offset){    memmove(str, str + offset, strlen(str + offset) + 1);}#endifstatic int parse_edit_cmd(struct sed_cmd *sed_cmd, const char *editstr){	int idx = 0;	int slashes_eaten = 0;	char *ptr; /* shorthand */	/*	 * the string that gets passed to this function should look like this:	 *	 *    need one of these 	 *    |	 *    |    this backslash (immediately following the edit command) is mandatory	 *    |    |	 *    [aic]\	 *    TEXT1\	 *    TEXT2\	 *    TEXTN	 *	 * as soon as we hit a TEXT line that has no trailing '\', we're done.	 * this means a command like:	 *	 * i\	 * INSERTME	 *	 * is a-ok.	 *	 */	if (editstr[1] != '\\' && (editstr[2] != '\n' || editstr[2] != '\r'))		error_msg_and_die("bad format in edit expression");	/* store the edit line text */	/* make editline big enough to accomodate the extra '\n' we will tack on	 * to the end */	sed_cmd->editline = xmalloc(strlen(&editstr[3]) + 2);	strcpy(sed_cmd->editline, &editstr[3]);	ptr = sed_cmd->editline;	/* now we need to go through * and: s/\\[\r\n]$/\n/g on the edit line */	while (ptr[idx]) {		while (ptr[idx] != '\\' || (ptr[idx+1] != '\n' && ptr[idx+1] != '\r')) {			idx++;			if (!ptr[idx]) {				goto out;			}		}		/* move the newline over the '\' before it (effectively eats the '\') */		memmove(&ptr[idx], &ptr[idx+1], strlen(&ptr[idx+1]));		ptr[strlen(ptr)-1] = 0;		slashes_eaten++;		/* substitue \r for \n if needed */		if (ptr[idx] == '\r')			ptr[idx] = '\n';	}out:	/* this accounts for discrepancies between the modified string and the	 * original string passed in to this function */	idx += slashes_eaten;	/* figure out if we need to add a newline */	if (ptr[idx-1] != '\n') {		ptr[idx] = '\n';		idx++;	}	/* terminate string */	ptr[idx]= 0;	/* adjust for opening 2 chars [aic]\ */	idx += 2;	return idx;}static int parse_file_cmd(struct sed_cmd *sed_cmd, const char *filecmdstr){	int idx = 0;	int filenamelen = 0;	/*	 * the string that gets passed to this function should look like this:	 *    '[ ]filename'	 *      |  |	 *      |  a filename	 *      |	 *     optional whitespace	 *   re: the file to be read, the GNU manual says the following: "Note that	 *   if filename cannot be read, it is treated as if it were an empty file,	 *   without any error indication." Thus, all of the following commands are	 *   perfectly leagal:	 *	 *   sed -e '1r noexist'	 *   sed -e '1r ;'	 *   sed -e '1r'	 */	/* the file command may be followed by whitespace; move past it. */	while (isspace(filecmdstr[++idx]))		{ ; }			/* the first non-whitespace we get is a filename. the filename ends when we	 * hit a normal sed command terminator or end of string */	filenamelen = strcspn(&filecmdstr[idx], "; \n\r\t\v\0");	sed_cmd->filename = xmalloc(filenamelen + 1);	safe_strncpy(sed_cmd->filename, &filecmdstr[idx], filenamelen + 1);	return idx + filenamelen;}static char *parse_cmd_str(struct sed_cmd * const sed_cmd, const char *const cmdstr){	int idx = 0;	/* parse the command	 * format is: [addr][,addr]cmd	 *            |----||-----||-|	 *            part1 part2  part3	 */	/* first part (if present) is an address: either a number or a /regex/ */	if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/')		idx = get_address(sed_cmd, cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);	/* second part (if present) will begin with a comma */	if (cmdstr[idx] == ',') {		idx++;		idx += get_address(sed_cmd, &cmdstr[idx], &sed_cmd->end_line, &sed_cmd->end_match);	}	/* last part (mandatory) will be a command */	if (cmdstr[idx] == '\0')		error_msg_and_die("missing command");	sed_cmd->cmd = cmdstr[idx];
sed.c - 源码说明

本页面展示了「手机嵌入式Linux下可用的busybox源码」中的 sed.c 源码文件，采用 C语言编程语言编写，共 867 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与busybox相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?