📄 codecs.java
字号:
/* * Copyright 2000 Finn Bock * * This program contains material copyrighted by: * Copyright (c) Corporation for National Research Initiatives. * Originally written by Marc-Andre Lemburg (mal@lemburg.com). */package org.python.core;/** * Contains the implementation of the builtin codecs. * @since Jython 2.0 */public class codecs { private static char Py_UNICODE_REPLACEMENT_CHARACTER = 0xFFFD; private static PyList searchPath = new PyList(); private static PyStringMap searchCache = new PyStringMap(); private static String default_encoding = "ascii"; public static String getDefaultEncoding() { return default_encoding; } public static void setDefaultEncoding(String encoding) { lookup(encoding); default_encoding = encoding; } public static void register(PyObject search_function) { if (!search_function.isCallable()) throw Py.TypeError("argument must be callable"); searchPath.append(search_function); } public static PyTuple lookup(String encoding) { import_encodings(); PyString v = new PyString(normalizestring(encoding)); PyObject result = searchCache.__finditem__(v); if (result != null) return (PyTuple)result; if (searchPath.__len__() == 0) throw new PyException(Py.LookupError, "no codec search functions registered: " + "can't find encoding"); PyObject iter = searchPath.__iter__(); PyObject func = null; while ((func = iter.__iternext__()) != null) { result = func.__call__(v); if (result == Py.None) continue; if (!(result instanceof PyTuple) || result.__len__() != 4) throw Py.TypeError("codec search functions must "+ "return 4-tuples"); break; } if (func == null) throw new PyException(Py.LookupError, "unknown encoding " + encoding); searchCache.__setitem__(v, result); return (PyTuple)result; } private static String normalizestring(String string) { return string.toLowerCase().replace(' ', '-'); } private static boolean import_encodings_called = false; private static void import_encodings() { if (!import_encodings_called) { import_encodings_called = true; try { __builtin__.__import__("encodings"); } catch (PyException exc) { if (exc.type != Py.ImportError) throw exc; } } } public static PyString decode(PyString v, String encoding, String errors) { if (encoding == null) encoding = getDefaultEncoding(); else encoding = normalizestring(encoding); if (errors != null) errors = errors.intern(); /* Shortcuts for common default encodings *//* if (encoding.equals("utf-8")) return utf_8_decode(v, errors).__getitem__(0).__str__(); else if (encoding.equals("latin-1")) ; //return PyUnicode_DecodeLatin1(s, size, errors); else if (encoding.equals("ascii")) ; //return PyUnicode_DecodeASCII(s, size, errors);*/ if (encoding.equals("ascii")) return new PyString(PyUnicode_DecodeASCII(v.toString(), v.__len__(), errors)); /* Decode via the codec registry */ PyObject decoder = getDecoder(encoding); PyObject result = null; if (errors != null) { result = decoder.__call__(v, new PyString(errors)); } else { result = decoder.__call__(v); } if (!(result instanceof PyTuple) || result.__len__() != 2) throw Py.TypeError("decoder must return a tuple " + "(object,integer)"); return result.__getitem__(0).__str__(); } private static PyObject getDecoder(String encoding) { PyObject codecs = lookup(encoding); return codecs.__getitem__(1); } public static PyString encode(PyString v, String encoding, String errors) { if (encoding == null) encoding = getDefaultEncoding(); else encoding = normalizestring(encoding); if (errors != null) errors = errors.intern(); /* Shortcuts for common default encodings *//* if (encoding.equals("utf-8")) return PyUnicode_DecodeUTF8(v.toString(), v.__len__(), errors); else if (encoding.equals("latin-1")) return PyUnicode_DecodeLatin1(v.toString(), v.__len__(), errors); else*/ if (encoding.equals("ascii")) return new PyString(PyUnicode_EncodeASCII(v.toString(), v.__len__(), errors)); /* Decode via the codec registry */ PyObject encoder = getEncoder(encoding); PyObject result = null; if (errors != null) { result = encoder.__call__(v, new PyString(errors)); } else { result = encoder.__call__(v); } if (!(result instanceof PyTuple) || result.__len__() != 2) throw Py.TypeError("encoder must return a tuple " + "(object,integer)"); return result.__getitem__(0).__str__(); } private static PyObject getEncoder(String encoding) { PyObject codecs = lookup(encoding); return codecs.__getitem__(0); } /* --- UTF-8 Codec ---------------------------------------------------- */ private static byte utf8_code_length[] = { /* Map UTF-8 encoded prefix byte to sequence length. zero means illegal prefix. see RFC 2279 for details */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 }; public static String PyUnicode_DecodeUTF8(String str, String errors) { int size = str.length(); StringBuffer unicode = new StringBuffer(size); /* Unpack UTF-8 encoded data */ for (int i = 0; i < size; ) { int ch = str.charAt(i); if (ch > 0xFF) { codecs.decoding_error("utf-8", unicode, errors, "ordinal not in range(255)"); i++; continue; } if (ch < 0x80) { unicode.append((char) ch); i++; continue; } int n = utf8_code_length[ch]; if (i + n > size) { codecs.decoding_error("utf-8", unicode, errors, "unexpected end of data"); i++; continue; } switch (n) { case 0: codecs.decoding_error("utf-8", unicode, errors, "unexpected code byte"); i++; continue; case 1: codecs.decoding_error("utf-8", unicode, errors, "internal error"); i++; continue; case 2: char ch1 = str.charAt(i+1); if ((ch1 & 0xc0) != 0x80) { codecs.decoding_error("utf-8", unicode, errors, "invalid data"); i++; continue; } ch = ((ch & 0x1f) << 6) + (ch1 & 0x3f); if (ch < 0x80) { codecs.decoding_error("utf-8", unicode, errors, "illegal encoding"); i++; continue; } else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -