ascmagic.c

来自「sleuthit-2.09 一个磁盘的工具集」· C语言代码 · 共 716 行 · 第 1/2 页
716 行
/* * Copyright (c) Ian F. Darwin 1986-1995. * Software written by Ian F. Darwin and others; * maintained 1995-present by Christos Zoulas and others. *  * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright *    notice immediately at the beginning of the file, without modification, *    this list of conditions, and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in the *    documentation and/or other materials provided with the distribution. *   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. *//* * ASCII magic -- file types that we know based on keywords * that can appear anywhere in the file. * * Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000, * to handle character codes other than ASCII on a unified basis. * * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit * international characters, now subsumed into this file. */#include "file.h"#include "magic.h"#include <stdio.h>#include <string.h>#include <memory.h>#include <ctype.h>#include <stdlib.h>#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#include "names.h"#ifndef	lintFILE_RCSID("@(#)$File: ascmagic.c,v 1.50 2007/03/15 14:51:00 christos Exp $")#endif	/* lint */typedef unsigned long unichar;#define MAXLINELEN 300	/* longest sane line length */#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \		  || (x) == 0x85 || (x) == '\f')private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);private int looks_utf8(const unsigned char *, size_t, unichar *, size_t *);private int looks_unicode(const unsigned char *, size_t, unichar *, size_t *);private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);private void from_ebcdic(const unsigned char *, size_t, unsigned char *);private int ascmatch(const unsigned char *, const unichar *, size_t);protected intfile_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes){	size_t i;	unsigned char *nbuf = NULL;	unichar *ubuf = NULL;		size_t ulen;	struct names *p;	int rv = -1;	const char *code = NULL;	const char *code_mime = NULL;	const char *type = NULL;	const char *subtype = NULL;	const char *subtype_mime = NULL;	int has_escapes = 0;	int has_backspace = 0;	int seen_cr = 0;	int n_crlf = 0;	int n_lf = 0;	int n_cr = 0;	int n_nel = 0;	size_t last_line_end = (size_t)-1;	int has_long_lines = 0;	/*	 * Undo the NUL-termination kindly provided by process()	 * but leave at least one byte to look at	 */	while (nbytes > 1 && buf[nbytes - 1] == '\0')		nbytes--;	if ((nbuf = calloc(1, (nbytes + 1) * sizeof(nbuf[0]))) == NULL)		goto done;	if ((ubuf = calloc(1, (nbytes + 1) * sizeof(ubuf[0]))) == NULL)		goto done;	/*	 * Then try to determine whether it's any character code we can	 * identify.  Each of these tests, if it succeeds, will leave	 * the text converted into one-unichar-per-character Unicode in	 * ubuf, and the number of characters converted in ulen.	 */	if (looks_ascii(buf, nbytes, ubuf, &ulen)) {		code = "ASCII";		code_mime = "us-ascii";		type = "text";	} else if (looks_utf8(buf, nbytes, ubuf, &ulen)) {		code = "UTF-8 Unicode";		code_mime = "utf-8";		type = "text";	} else if ((i = looks_unicode(buf, nbytes, ubuf, &ulen)) != 0) {		if (i == 1)			code = "Little-endian UTF-16 Unicode";		else			code = "Big-endian UTF-16 Unicode";		type = "character data";		code_mime = "utf-16";    /* is this defined? */	} else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {		code = "ISO-8859";		type = "text";		code_mime = "iso-8859-1"; 	} else if (looks_extended(buf, nbytes, ubuf, &ulen)) {		code = "Non-ISO extended-ASCII";		type = "text";		code_mime = "unknown";	} else {		from_ebcdic(buf, nbytes, nbuf);		if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {			code = "EBCDIC";			type = "character data";			code_mime = "ebcdic";		} else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {			code = "International EBCDIC";			type = "character data";			code_mime = "ebcdic";		} else {			rv = 0;			goto done;  /* doesn't look like text at all */		}	}	if (nbytes <= 1) {		rv = 0;		goto done;	}	/*	 * for troff, look for . + letter + letter or .\";	 * this must be done to disambiguate tar archives' ./file	 * and other trash from real troff input.	 *	 * I believe Plan 9 troff allows non-ASCII characters in the names	 * of macros, so this test might possibly fail on such a file.	 */	if ((ms->flags & MAGIC_NO_CHECK_TROFF) == 0 && *ubuf == '.') {		unichar *tp = ubuf + 1;		while (ISSPC(*tp))			++tp;	/* skip leading whitespace */		if ((tp[0] == '\\' && tp[1] == '\"') ||		    (isascii((unsigned char)tp[0]) &&		     isalnum((unsigned char)tp[0]) &&		     isascii((unsigned char)tp[1]) &&		     isalnum((unsigned char)tp[1]) &&		     ISSPC(tp[2]))) {			subtype_mime = "text/troff";			subtype = "troff or preprocessor input";			goto subtype_identified;		}	}	if ((ms->flags & MAGIC_NO_CHECK_FORTRAN) == 0 &&	    (*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) {		subtype_mime = "text/fortran";		subtype = "fortran program";		goto subtype_identified;	}	/* look for tokens from names.h - this is expensive! */	if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0)		goto subtype_identified;	i = 0;	while (i < ulen) {		size_t end;		/*		 * skip past any leading space		 */		while (i < ulen && ISSPC(ubuf[i]))			i++;		if (i >= ulen)			break;		/*		 * find the next whitespace		 */		for (end = i + 1; end < nbytes; end++)			if (ISSPC(ubuf[end]))				break;		/*		 * compare the word thus isolated against the token list		 */		for (p = names; p < names + NNAMES; p++) {			if (ascmatch((const unsigned char *)p->name, ubuf + i,			    end - i)) {				subtype = types[p->type].human;				subtype_mime = types[p->type].mime;				goto subtype_identified;			}		}		i = end;	}subtype_identified:	/*	 * Now try to discover other details about the file.	 */	for (i = 0; i < ulen; i++) {		if (ubuf[i] == '\n') {			if (seen_cr)				n_crlf++;			else				n_lf++;			last_line_end = i;		} else if (seen_cr)			n_cr++;		seen_cr = (ubuf[i] == '\r');		if (seen_cr)			last_line_end = i;		if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */			n_nel++;			last_line_end = i;		}		/* If this line is _longer_ than MAXLINELEN, remember it. */		if (i > last_line_end + MAXLINELEN)			has_long_lines = 1;		if (ubuf[i] == '\033')			has_escapes = 1;		if (ubuf[i] == '\b')			has_backspace = 1;	}	/* Beware, if the data has been truncated, the final CR could have	   been followed by a LF.  If we have HOWMANY bytes, it indicates	   that the data might have been truncated, probably even before	   this function was called. */	if (seen_cr && nbytes < HOWMANY)		n_cr++;	if ((ms->flags & MAGIC_MIME)) {		if (subtype_mime) {			if (file_printf(ms, subtype_mime) == -1)				goto done;		} else {			if (file_printf(ms, "text/plain") == -1)				goto done;		}		if (code_mime) {			if (file_printf(ms, "; charset=") == -1)				goto done;			if (file_printf(ms, code_mime) == -1)				goto done;		}	} else {		if (file_printf(ms, code) == -1)			goto done;		if (subtype) {			if (file_printf(ms, " ") == -1)				goto done;			if (file_printf(ms, subtype) == -1)				goto done;		}		if (file_printf(ms, " ") == -1)			goto done;		if (file_printf(ms, type) == -1)			goto done;		if (has_long_lines)			if (file_printf(ms, ", with very long lines") == -1)				goto done;		/*		 * Only report line terminators if we find one other than LF,		 * or if we find none at all.		 */		if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||		    (n_crlf != 0 || n_cr != 0 || n_nel != 0)) {			if (file_printf(ms, ", with") == -1)				goto done;			if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0)			{				if (file_printf(ms, " no") == -1)					goto done;			} else {				if (n_crlf) {					if (file_printf(ms, " CRLF") == -1)						goto done;					if (n_cr || n_lf || n_nel)						if (file_printf(ms, ",") == -1)							goto done;				}				if (n_cr) {					if (file_printf(ms, " CR") == -1)						goto done;					if (n_lf || n_nel)						if (file_printf(ms, ",") == -1)							goto done;				}				if (n_lf) {					if (file_printf(ms, " LF") == -1)						goto done;					if (n_nel)						if (file_printf(ms, ",") == -1)							goto done;				}				if (n_nel)					if (file_printf(ms, " NEL") == -1)						goto done;			}			if (file_printf(ms, " line terminators") == -1)				goto done;		}		if (has_escapes)			if (file_printf(ms, ", with escape sequences") == -1)				goto done;		if (has_backspace)			if (file_printf(ms, ", with overstriking") == -1)				goto done;	}
ascmagic.c - 源码说明

本页面展示了「sleuthit-2.09 一个磁盘的工具集」中的 ascmagic.c 源码文件，采用 C语言编程语言编写，共 716 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与sleuthit相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?