📄 codecs.java
字号:
unicode.append((char) ch); break; case 3: ch1 = str.charAt(i+1); char ch2 = str.charAt(i+2); if ((ch1 & 0xc0) != 0x80 || (ch2 & 0xc0) != 0x80) { codecs.decoding_error("utf-8", unicode, errors, "invalid data"); i++; continue; } ch = ((ch & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f); if (ch < 0x800 || (ch >= 0xd800 && ch < 0xe000)) { codecs.decoding_error("utf-8", unicode, errors, "illegal encoding"); i++; continue; } else unicode.append((char) ch); break; case 4: ch1 = str.charAt(i+1); ch2 = str.charAt(i+2); char ch3 = str.charAt(i+3); if ((ch1 & 0xc0) != 0x80 || (ch2 & 0xc0) != 0x80 || (ch3 & 0xc0) != 0x80) { codecs.decoding_error("utf-8", unicode, errors, "invalid data"); i++; continue; } ch = ((ch & 0x7) << 18) + ((ch1 & 0x3f) << 12) + ((ch2 & 0x3f) << 6) + (ch3 & 0x3f); /* validate and convert to UTF-16 */ if ((ch < 0x10000) || /* minimum value allowed for 4 byte encoding */ (ch > 0x10ffff)) { /* maximum value allowed for UTF-16 */ codecs.decoding_error("utf-8", unicode, errors, "illegal encoding"); i++; continue; } /* compute and append the two surrogates: */ /* translate from 10000..10FFFF to 0..FFFF */ ch -= 0x10000; /* high surrogate = top 10 bits added to D800 */ unicode.append((char) (0xD800 + (ch >> 10))); /* low surrogate = bottom 10 bits added to DC00 */ unicode.append((char) (0xDC00 + (ch & ~0xFC00))); break; default: /* Other sizes are only needed for UCS-4 */ codecs.decoding_error("utf-8", unicode, errors, "unsupported Unicode code range"); i++; } i += n; } return unicode.toString(); } public static String PyUnicode_EncodeUTF8(String str, String errors) { int size = str.length(); StringBuffer v = new StringBuffer(size * 3); for (int i = 0; i < size; ) { int ch = str.charAt(i++); if (ch < 0x80) v.append((char) ch); else if (ch < 0x0800) { v.append((char) (0xc0 | (ch >> 6))); v.append((char) (0x80 | (ch & 0x3f))); } else { if (0xD800 <= ch && ch <= 0xDFFF) { if (i != size) { int ch2 = str.charAt(i); if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { /* combine the two values */ ch = ((ch - 0xD800)<<10 | (ch2-0xDC00))+0x10000; v.append((char)((ch >> 18) | 0xf0)); v.append((char)(0x80 | ((ch >> 12) & 0x3f))); i++; } } } else { v.append((char)(0xe0 | (ch >> 12))); } v.append((char) (0x80 | ((ch >> 6) & 0x3f))); v.append((char) (0x80 | (ch & 0x3f))); } } return v.toString(); } /* --- 7-bit ASCII Codec -------------------------------------------- */ public static String PyUnicode_DecodeASCII(String str, int size, String errors) { StringBuffer v = new StringBuffer(size); for (int i = 0; i < size; i++) { char ch = str.charAt(i); if (ch < 128) { v.append(ch); } else { decoding_error("ascii", v, errors, "ordinal not in range(128)"); continue; } } return v.toString(); } public static String PyUnicode_EncodeASCII(String str, int size, String errors) { StringBuffer v = new StringBuffer(size); for (int i = 0; i < size; i++) { char ch = str.charAt(i); if (ch >= 128) { encoding_error("ascii", v, errors, "ordinal not in range(128)"); } else v.append(ch); } return v.toString(); } /* --- RawUnicodeEscape Codec ---------------------------------------- */ private static char[] hexdigit = "0123456789ABCDEF".toCharArray(); // The modified flag is used by cPickle. public static String PyUnicode_EncodeRawUnicodeEscape(String str, String errors, boolean modifed) { int size = str.length(); StringBuffer v = new StringBuffer(str.length()); for (int i = 0; i < size; i++) { char ch = str.charAt(i); if (ch >= 256 || (modifed && (ch == '\n' || ch == '\\'))) { v.append("\\u"); v.append(hexdigit[(ch >>> 12) & 0xF]); v.append(hexdigit[(ch >>> 8) & 0xF]); v.append(hexdigit[(ch >>> 4) & 0xF]); v.append(hexdigit[ch & 0xF]); } else v.append(ch); } return v.toString(); } public static String PyUnicode_DecodeRawUnicodeEscape(String str, String errors) { int size = str.length(); StringBuffer v = new StringBuffer(size); for (int i = 0; i < size; ) { char ch = str.charAt(i); /* Non-escape characters are interpreted as Unicode ordinals */ if (ch != '\\') { v.append(ch); i++; continue; } /* \\u-escapes are only interpreted iff the number of leading backslashes is odd */ int bs = i; while (i < size) { ch = str.charAt(i); if (ch != '\\') break; v.append(ch); i++; } if (((i - bs) & 1) == 0 || i >= size || ch != 'u') { continue; } v.setLength(v.length() - 1); i++; /* \\uXXXX with 4 hex digits */ int x = 0; for (int j = 0; j < 4; j++) { ch = str.charAt(i+j); int d = Character.digit(ch, 16); if (d == -1) { codecs.decoding_error("unicode escape", v, errors, "truncated \\uXXXX"); break; } x = ((x<<4) & ~0xF) + d; } i += 4; v.append((char) x); } return v.toString(); } /* --- Utility methods -------------------------------------------- */ public static void encoding_error(String type, StringBuffer dest, String errors, String details) { if (errors == null || errors == "strict") throw Py.UnicodeError(type + " encoding error: " + details); else if (errors == "ignore") { } else if (errors == "replace") dest.append('?'); else throw Py.ValueError(type + " encoding error; "+ "unknown error handling code: " + errors); } public static void decoding_error(String type, StringBuffer dest, String errors, String details) { if (errors == null || errors == "strict") throw Py.UnicodeError(type + " decoding error: " + details); else if (errors == "ignore") { } else if (errors == "replace") { if (dest != null) dest.append(Py_UNICODE_REPLACEMENT_CHARACTER); } else throw Py.ValueError(type + " decoding error; "+ "unknown error handling code: " + errors); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -