utf8encoding.cs
来自「没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没」· CS 代码 · 共 1,015 行 · 第 1/2 页
CS
1,015 行
leftSoFar = 1; leftSize = 4; } else if((ch & (uint)0xFC) == (uint)0xF8) { // Five-byte UTF-8 character. leftBits = (ch & (uint)0x03); leftSoFar = 1; leftSize = 5; } else if((ch & (uint)0xFC) == (uint)0xFC) { // Six-byte UTF-8 character. leftBits = (ch & (uint)0x03); leftSoFar = 1; leftSize = 6; } else { // Invalid UTF-8 start character. if(throwOnInvalid) { throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); } } } else { // Process an extra byte in a multi-byte sequence. if((ch & (uint)0xC0) == (uint)0x80) { leftBits = ((leftBits << 6) | (ch & (uint)0x3F)); if(++leftSoFar >= leftSize) { // We have a complete character now. if(leftBits < (uint)0x10000) { if(leftBits != (uint)0xFEFF) { ++length; } } else if(leftBits < (uint)0x110000) { length += 2; } else if(throwOnInvalid) { throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); } leftSize = 0; } } else { // Invalid UTF-8 sequence: clear and restart. if(throwOnInvalid) { throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); } leftSize = 0; --index; ++count; } } } if(flush && leftSize != 0 && throwOnInvalid) { // We had left-over bytes that didn't make up // a complete UTF-8 character sequence. throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); } // Return the final length to the caller. return length; } // Get the number of characters needed to decode a byte buffer. public override int GetCharCount(byte[] bytes, int index, int count) { return InternalGetCharCount(bytes, index, count, 0, 0, throwOnInvalid, true); } // Get the characters that result from decoding a byte buffer. private static int InternalGetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, ref uint leftOverBits, ref uint leftOverCount, bool throwOnInvalid, bool flush) { // Validate the parameters. if(bytes == null) { throw new ArgumentNullException("bytes"); } if(chars == null) { throw new ArgumentNullException("chars"); } if(byteIndex < 0 || byteIndex > bytes.Length) { throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array")); } if(byteCount < 0 || byteCount > (bytes.Length - byteIndex)) { throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array")); } if(charIndex < 0 || charIndex > chars.Length) { throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array")); } // Convert the bytes into the output buffer. uint ch; int length = chars.Length; int posn = charIndex; uint leftBits = leftOverBits; uint leftSoFar = (leftOverCount & (uint)0x0F); uint leftSize = ((leftOverCount >> 4) & (uint)0x0F); while(byteCount > 0) { // Fetch the next character from the byte buffer. ch = (uint)(bytes[byteIndex++]); --byteCount; if(leftSize == 0) { // Process a UTF-8 start character. if(ch < (uint)0x0080) { // Single-byte UTF-8 character. if(posn >= length) { throw new ArgumentException (_("Arg_InsufficientSpace"), "chars"); } chars[posn++] = (char)ch; } else if((ch & (uint)0xE0) == (uint)0xC0) { // Double-byte UTF-8 character. leftBits = (ch & (uint)0x1F); leftSoFar = 1; leftSize = 2; } else if((ch & (uint)0xF0) == (uint)0xE0) { // Three-byte UTF-8 character. leftBits = (ch & (uint)0x0F); leftSoFar = 1; leftSize = 3; } else if((ch & (uint)0xF8) == (uint)0xF0) { // Four-byte UTF-8 character. leftBits = (ch & (uint)0x07); leftSoFar = 1; leftSize = 4; } else if((ch & (uint)0xFC) == (uint)0xF8) { // Five-byte UTF-8 character. leftBits = (ch & (uint)0x03); leftSoFar = 1; leftSize = 5; } else if((ch & (uint)0xFC) == (uint)0xFC) { // Six-byte UTF-8 character. leftBits = (ch & (uint)0x03); leftSoFar = 1; leftSize = 6; } else { // Invalid UTF-8 start character. if(throwOnInvalid) { throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); } } } else { // Process an extra byte in a multi-byte sequence. if((ch & (uint)0xC0) == (uint)0x80) { leftBits = ((leftBits << 6) | (ch & (uint)0x3F)); if(++leftSoFar >= leftSize) { // We have a complete character now. if(leftBits < (uint)0x10000) { if(leftBits != (uint)0xFEFF) { if(posn >= length) { throw new ArgumentException (_("Arg_InsufficientSpace"), "chars"); } chars[posn++] = (char)leftBits; } } else if(leftBits < (uint)0x110000) { if((posn + 2) > length) { throw new ArgumentException (_("Arg_InsufficientSpace"), "chars"); } leftBits -= (uint)0x10000; chars[posn++] = (char)((leftBits >> 10) + (uint)0xD800); chars[posn++] = (char)((leftBits & (uint)0x3FF) + (uint)0xDC00); } else if(throwOnInvalid) { throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); } leftSize = 0; } } else { // Invalid UTF-8 sequence: clear and restart. if(throwOnInvalid) { throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); } leftSize = 0; --byteIndex; ++byteCount; } } } if(flush && leftSize != 0 && throwOnInvalid) { // We had left-over bytes that didn't make up // a complete UTF-8 character sequence. throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes"); } leftOverBits = leftBits; leftOverCount = (leftSoFar | (leftSize << 4)); // Return the final length to the caller. return posn - charIndex; } // Get the characters that result from decoding a byte buffer. public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) { uint leftOverBits = 0; uint leftOverCount = 0; return InternalGetChars(bytes, byteIndex, byteCount, chars, charIndex, ref leftOverBits, ref leftOverCount, throwOnInvalid, true); } // Get the maximum number of bytes needed to encode a // specified number of characters. public override int GetMaxByteCount(int charCount) { if(charCount < 0) { throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative")); } return charCount * 4; } // Get the maximum number of characters needed to decode a // specified number of bytes. public override int GetMaxCharCount(int byteCount) { if(byteCount < 0) { throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative")); } return byteCount; } // Get a UTF8-specific decoder that is attached to this instance. public override Decoder GetDecoder() { return new UTF8Decoder(throwOnInvalid); } // Get a UTF8-specific encoder that is attached to this instance. public override Encoder GetEncoder() { return new UTF8Encoder(); } // Get the UTF8 preamble. public override byte[] GetPreamble() { if(emitIdentifier) { byte[] pre = new byte [3]; pre[0] = (byte)0xEF; pre[1] = (byte)0xBB; pre[2] = (byte)0xBF; return pre; } else { return new byte [0]; } } // Determine if this object is equal to another. public override bool Equals(Object value) { UTF8Encoding enc = (value as UTF8Encoding); if(enc != null) { return (codePage == enc.codePage && emitIdentifier == enc.emitIdentifier && throwOnInvalid == enc.throwOnInvalid); } else { return false; } } // Get the hash code for this object. public override int GetHashCode() { return base.GetHashCode(); }#if !ECMA_COMPAT // Get the mail body name for this encoding. internal override String InternalBodyName { get { return "utf-8"; } } // Get the human-readable name for this encoding. internal override String InternalEncodingName { get { return "Unicode (UTF-8)"; } } // Get the mail agent header name for this encoding. internal override String InternalHeaderName { get { return "utf-8"; } } // Determine if this encoding can be displayed in a Web browser. internal override bool InternalIsBrowserDisplay { get { return true; } } // Determine if this encoding can be saved from a Web browser. internal override bool InternalIsBrowserSave { get { return true; } } // Determine if this encoding can be displayed in a mail/news agent. internal override bool InternalIsMailNewsDisplay { get { return true; } } // Determine if this encoding can be saved from a mail/news agent. internal override bool InternalIsMailNewsSave { get { return true; } } // Get the IANA-preferred Web name for this encoding. internal override String InternalWebName { get { return "utf-8"; } } // Get the Windows code page represented by this object. internal override int InternalWindowsCodePage { get { return UnicodeEncoding.UNICODE_CODE_PAGE; } }#endif // !ECMA_COMPAT // UTF-8 decoder implementation. [Serializable] private sealed class UTF8Decoder : Decoder { private bool throwOnInvalid; private uint leftOverBits; private uint leftOverCount; // Constructor. public UTF8Decoder(bool throwOnInvalid) { this.throwOnInvalid = throwOnInvalid; leftOverBits = 0; leftOverCount = 0; } // Override inherited methods. public override int GetCharCount(byte[] bytes, int index, int count) { return InternalGetCharCount(bytes, index, count, leftOverBits, leftOverCount, throwOnInvalid, false); } public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) { return InternalGetChars(bytes, byteIndex, byteCount, chars, charIndex, ref leftOverBits, ref leftOverCount, throwOnInvalid, false); } } // class UTF8Decoder // UTF-8 encoder implementation. [Serializable] private sealed class UTF8Encoder : Encoder { private uint leftOver; // Constructor. public UTF8Encoder() { leftOver = 0; } // Override inherited methods. public override int GetByteCount(char[] chars, int index, int count, bool flush) { return InternalGetByteCount (chars, index, count, leftOver, flush); } public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteCount, bool flush) { int result; result = InternalGetBytes (chars, charIndex, charCount, bytes, byteCount, ref leftOver, flush); return result; } } // class UTF8Encoder}; // class UTF8Encoding}; // namespace System.Text
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?