📄 binascii.c

📁 python s60 1.4.5版本的源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* Portions Copyright (c) 2005 Nokia Corporation */
/*
** Routines to represent binary data in ASCII and vice-versa
**
** This module currently supports the following encodings:
** uuencode:
**     	each line encodes 45 bytes (except possibly the last)
**	First char encodes (binary) length, rest data
**	each char encodes 6 bits, as follows:
**	binary: 01234567 abcdefgh ijklmnop
**	ascii:  012345 67abcd efghij klmnop
**	ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
**	short binary data is zero-extended (so the bits are always in the
**	right place), this does *not* reflect in the length.
** base64:
**      Line breaks are insignificant, but lines are at most 76 chars
**      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
**      is done via a table.
**      Short binary data is filled (in ASCII) with '='.
** hqx:
**	File starts with introductory text, real data starts and ends
**	with colons.
**	Data consists of three similar parts: info, datafork, resourcefork.
**	Each part is protected (at the end) with a 16-bit crc
**	The binary data is run-length encoded, and then ascii-fied:
**	binary: 01234567 abcdefgh ijklmnop
**	ascii:  012345 67abcd efghij klmnop
**	ASCII encoding is table-driven, see the code.
**	Short binary data results in the runt ascii-byte being output with
**	the bits in the right place.
**
** While I was reading dozens of programs that encode or decode the formats
** here (documentation? hihi:-) I have formulated Jansen's Observation:
**
**	Programs that encode binary data in ASCII are written in
**	such a style that they are as unreadable as possible. Devices used
**	include unnecessary global variables, burying important tables
**	in unrelated sourcefiles, putting functions in include files,
**	using seemingly-descriptive variable names for different purposes,
**	calls to empty subroutines and a host of others.
**
** I have attempted to break with this tradition, but I guess that that
** does make the performance sub-optimal. Oh well, too bad...
**
** Jack Jansen, CWI, July 1995.
**
** Added support for quoted-printable encoding, based on rfc 1521 et al
** quoted-printable encoding specifies that non printable characters (anything
** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
** of the character.  It also specifies some other behavior to enable 8bit data
** in a mail message with little difficulty (maximum line sizes, protecting
** some cases of whitespace, etc).
**
** Brandon Long, September 2001.
*/


#include "Python.h"

// static PyObject *Error;
// static PyObject *Incomplete;

/*
** hqx lookup table, ascii->binary.
*/

#define RUNCHAR 0x90

#define DONE 0x7F
#define SKIP 0x7E
#define FAIL 0x7D

static const unsigned char table_a2b_hqx[256] = {
/*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
/* 0*/	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
/*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
/* 1*/	FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
/*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
/* 2*/	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
/*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
/* 3*/	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
/*              !     "     #     $     %     &     '   */
/* 4*/	FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
/*        (     )     *     +     ,     -     .     /   */
/* 5*/	0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
/*        0     1     2     3     4     5     6     7   */
/* 6*/	0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
/*        8     9     :     ;     <     =     >     ?   */
/* 7*/	0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
/*        @     A     B     C     D     E     F     G   */
/* 8*/	0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
/*        H     I     J     K     L     M     N     O   */
/* 9*/	0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
/*        P     Q     R     S     T     U     V     W   */
/*10*/	0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
/*        X     Y     Z     [     \     ]     ^     _   */
/*11*/	0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
/*        `     a     b     c     d     e     f     g   */
/*12*/	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
/*        h     i     j     k     l     m     n     o   */
/*13*/	0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
/*        p     q     r     s     t     u     v     w   */
/*14*/	0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
/*        x     y     z     {     |     }     ~    ^?   */
/*15*/	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
/*16*/	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
};

static const unsigned char table_b2a_hqx[] =
"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";

static const signed char table_a2b_base64[] = {
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
	-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
};

#define BASE64_PAD '='

/* Max binary chunk size; limited only by available memory */
#define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)

static const unsigned char table_b2a_base64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";



static const unsigned short crctab_hqx[256] = {
	0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
	0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
	0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
	0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
	0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
	0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
	0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
	0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
	0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
	0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
	0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
	0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
	0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
	0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
	0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
	0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
	0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
	0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
	0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
	0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
	0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
	0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
	0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
	0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
	0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
	0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
	0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
	0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
	0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
	0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
	0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
	0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
};

static const char doc_a2b_uu[] = "(ascii) -> bin. Decode a line of uuencoded data";

static PyObject *
binascii_a2b_uu(PyObject *self, PyObject *args)
{
	unsigned char *ascii_data, *bin_data;
	int leftbits = 0;
	unsigned char this_ch;
	unsigned int leftchar = 0;
	PyObject *rv;
	int ascii_len, bin_len;

	if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
		return NULL;

	/* First byte: binary data length (in bytes) */
	bin_len = (*ascii_data++ - ' ') & 077;
	ascii_len--;

	/* Allocate the buffer */
	if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
		return NULL;
	bin_data = (unsigned char *)PyString_AsString(rv);

	for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
		this_ch = *ascii_data;
		if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
			/*
			** Whitespace. Assume some spaces got eaten at
			** end-of-line. (We check this later)
			*/
			this_ch = 0;
	        } else {
			/* Check the character for legality
			** The 64 in stead of the expected 63 is because
			** there are a few uuencodes out there that use
			** '`' as zero instead of space.
			*/
			if ( this_ch < ' ' || this_ch > (' ' + 64)) {
				PyErr_SetString((PYTHON_GLOBALS->binascii_Error), "Illegal char");
				Py_DECREF(rv);
				return NULL;
			}
			this_ch = (this_ch - ' ') & 077;
		}
		/*
		** Shift it in on the low end, and see if there's
		** a byte ready for output.
		*/
		leftchar = (leftchar << 6) | (this_ch);
		leftbits += 6;
		if ( leftbits >= 8 ) {
			leftbits -= 8;
			*bin_data++ = (leftchar >> leftbits) & 0xff;
			leftchar &= ((1 << leftbits) - 1);
			bin_len--;
		}
	}
	/*
	** Finally, check that if there's anything left on the line
	** that it's whitespace only.
	*/
	while( ascii_len-- > 0 ) {
		this_ch = *ascii_data++;
		/* Extra '`' may be written as padding in some cases */
		if ( this_ch != ' ' && this_ch != ' '+64 &&
		     this_ch != '\n' && this_ch != '\r' ) {
			PyErr_SetString((PYTHON_GLOBALS->binascii_Error), "Trailing garbage");
			Py_DECREF(rv);
			return NULL;
		}
	}
	return rv;
}

static const char doc_b2a_uu[] = "(bin) -> ascii. Uuencode line of data";

static PyObject *
binascii_b2a_uu(PyObject *self, PyObject *args)
{
	unsigned char *ascii_data, *bin_data;
	int leftbits = 0;
	unsigned char this_ch;
	unsigned int leftchar = 0;
	PyObject *rv;
	int bin_len;

	if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
		return NULL;
	if ( bin_len > 45 ) {
		/* The 45 is a limit that appears in all uuencode's */
		PyErr_SetString((PYTHON_GLOBALS->binascii_Error), "At most 45 bytes at once");
		return NULL;
	}

	/* We're lazy and allocate to much (fixed up later) */
	if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2)) == NULL )
		return NULL;
	ascii_data = (unsigned char *)PyString_AsString(rv);

	/* Store the length */
	*ascii_data++ = ' ' + (bin_len & 077);

	for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
		/* Shift the data (or padding) into our buffer */
		if ( bin_len > 0 )	/* Data */
			leftchar = (leftchar << 8) | *bin_data;
		else			/* Padding */
			leftchar <<= 8;
		leftbits += 8;

		/* See if there are 6-bit groups ready */
		while ( leftbits >= 6 ) {
			this_ch = (leftchar >> (leftbits-6)) & 0x3f;
			leftbits -= 6;
			*ascii_data++ = this_ch + ' ';
		}
	}
	*ascii_data++ = '\n';	/* Append a courtesy newline */

	_PyString_Resize(&rv, (ascii_data -
			       (unsigned char *)PyString_AsString(rv)));
	return rv;
}


static int
binascii_find_valid(unsigned char *s, int slen, int num)
{
	/* Finds & returns the (num+1)th
	** valid character for base64, or -1 if none.
	*/

	int ret = -1;
	unsigned char c, b64val;

	while ((slen > 0) && (ret == -1)) {
		c = *s;
		b64val = table_a2b_base64[c & 0x7f];
		if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
			if (num == 0)
				ret = *s;
			num--;
		}

		s++;
		slen--;
	}
	return ret;
}

static const char doc_a2b_base64[] = "(ascii) -> bin. Decode a line of base64 data";

static PyObject *
binascii_a2b_base64(PyObject *self, PyObject *args)
{
	unsigned char *ascii_data, *bin_data;
	int leftbits = 0;
	unsigned char this_ch;
	unsigned int leftchar = 0;
	PyObject *rv;
	int ascii_len, bin_len;
	int quad_pos = 0;

	if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
		return NULL;

	bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */

	/* Allocate the buffer */
	if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
		return NULL;
	bin_data = (unsigned char *)PyString_AsString(rv);
	bin_len = 0;

	for( ; ascii_len > 0; ascii_len--, ascii_data++) {
		this_ch = *ascii_data;

		if (this_ch > 0x7f ||
		    this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
			continue;

		/* Check for pad sequences and ignore
		** the invalid ones.
		*/
		if (this_ch == BASE64_PAD) {
			if ( (quad_pos < 2) ||
			     ((quad_pos == 2) &&
			      (binascii_find_valid(ascii_data, ascii_len, 1)
			       != BASE64_PAD)) )
			{
				continue;
			}
			else {
				/* A pad sequence means no more input.
				** We've already interpreted the data
				** from the quad at this point.
				*/
				leftbits = 0;
				break;
			}
		}

		this_ch = table_a2b_base64[*ascii_data];
		if ( this_ch == (unsigned char) -1 )
			continue;

		/*
		** Shift it in on the low end, and see if there's
		** a byte ready for output.
		*/
		quad_pos = (quad_pos + 1) & 0x03;
		leftchar = (leftchar << 6) | (this_ch);
		leftbits += 6;

		if ( leftbits >= 8 ) {
			leftbits -= 8;
			*bin_data++ = (leftchar >> leftbits) & 0xff;
			bin_len++;
			leftchar &= ((1 << leftbits) - 1);
		}
 	}

	if (leftbits != 0) {
		PyErr_SetString((PYTHON_GLOBALS->binascii_Error), "Incorrect padding");
		Py_DECREF(rv);
		return NULL;
	}

	/* and set string size correctly */
	if (bin_len > 0)
		_PyString_Resize(&rv, bin_len);
	return rv;
}

static const char doc_b2a_base64[] = "(bin) -> ascii. Base64-code line of data";

static PyObject *
binascii_b2a_base64(PyObject *self, PyObject *args)
{
	unsigned char *ascii_data, *bin_data;
	int leftbits = 0;
	unsigned char this_ch;
	unsigned int leftchar = 0;
	PyObject *rv;
	int bin_len;

	if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
		return NULL;
	if ( bin_len > BASE64_MAXBIN ) {
		PyErr_SetString((PYTHON_GLOBALS->binascii_Error), "Too much data for base64 line");
		return NULL;
	}

	/* We're lazy and allocate too much (fixed up later).
	   "+3" leaves room for up to two pad characters and a trailing
	   newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
	if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
		return NULL;
	ascii_data = (unsigned char *)PyString_AsString(rv);

	for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -