utf8encoding.cs

来自「没的没的没的没的没的没的没的没的没的没的没的没的没的没」· CS 代码 · 共 1,015 行 · 第 1/2 页
1,015 行
							leftSoFar = 1;							leftSize = 4;						}						else if((ch & (uint)0xFC) == (uint)0xF8)						{							// Five-byte UTF-8 character.							leftBits = (ch & (uint)0x03);							leftSoFar = 1;							leftSize = 5;						}						else if((ch & (uint)0xFC) == (uint)0xFC)						{							// Six-byte UTF-8 character.							leftBits = (ch & (uint)0x03);							leftSoFar = 1;							leftSize = 6;						}						else						{							// Invalid UTF-8 start character.							if(throwOnInvalid)							{								throw new ArgumentException									(_("Arg_InvalidUTF8"), "bytes");							}						}					}					else					{						// Process an extra byte in a multi-byte sequence.						if((ch & (uint)0xC0) == (uint)0x80)						{							leftBits = ((leftBits << 6) | (ch & (uint)0x3F));							if(++leftSoFar >= leftSize)							{								// We have a complete character now.								if(leftBits < (uint)0x10000)								{									if(leftBits != (uint)0xFEFF)									{										++length;									}								}								else if(leftBits < (uint)0x110000)								{									length += 2;								}								else if(throwOnInvalid)								{									throw new ArgumentException										(_("Arg_InvalidUTF8"), "bytes");								}								leftSize = 0;							}						}						else						{							// Invalid UTF-8 sequence: clear and restart.							if(throwOnInvalid)							{								throw new ArgumentException									(_("Arg_InvalidUTF8"), "bytes");							}							leftSize = 0;							--index;							++count;						}					}				}				if(flush && leftSize != 0 && throwOnInvalid)				{					// We had left-over bytes that didn't make up					// a complete UTF-8 character sequence.					throw new ArgumentException						(_("Arg_InvalidUTF8"), "bytes");				}				// Return the final length to the caller.				return length;			}	// Get the number of characters needed to decode a byte buffer.	public override int GetCharCount(byte[] bytes, int index, int count)			{				return InternalGetCharCount(bytes, index, count, 0, 0,											throwOnInvalid, true);			}	// Get the characters that result from decoding a byte buffer.	private static int InternalGetChars(byte[] bytes, int byteIndex,									    int byteCount, char[] chars,									    int charIndex, ref uint leftOverBits,									    ref uint leftOverCount,									    bool throwOnInvalid, bool flush)			{				// Validate the parameters.				if(bytes == null)				{					throw new ArgumentNullException("bytes");				}				if(chars == null)				{					throw new ArgumentNullException("chars");				}				if(byteIndex < 0 || byteIndex > bytes.Length)				{					throw new ArgumentOutOfRangeException						("byteIndex", _("ArgRange_Array"));				}				if(byteCount < 0 || byteCount > (bytes.Length - byteIndex))				{					throw new ArgumentOutOfRangeException						("byteCount", _("ArgRange_Array"));				}				if(charIndex < 0 || charIndex > chars.Length)				{					throw new ArgumentOutOfRangeException						("charIndex", _("ArgRange_Array"));				}				// Convert the bytes into the output buffer.				uint ch;				int length = chars.Length;				int posn = charIndex;				uint leftBits = leftOverBits;				uint leftSoFar = (leftOverCount & (uint)0x0F);				uint leftSize = ((leftOverCount >> 4) & (uint)0x0F);				while(byteCount > 0)				{					// Fetch the next character from the byte buffer.					ch = (uint)(bytes[byteIndex++]);					--byteCount;					if(leftSize == 0)					{						// Process a UTF-8 start character.						if(ch < (uint)0x0080)						{							// Single-byte UTF-8 character.							if(posn >= length)							{								throw new ArgumentException									(_("Arg_InsufficientSpace"), "chars");							}							chars[posn++] = (char)ch;						}						else if((ch & (uint)0xE0) == (uint)0xC0)						{							// Double-byte UTF-8 character.							leftBits = (ch & (uint)0x1F);							leftSoFar = 1;							leftSize = 2;						}						else if((ch & (uint)0xF0) == (uint)0xE0)						{							// Three-byte UTF-8 character.							leftBits = (ch & (uint)0x0F);							leftSoFar = 1;							leftSize = 3;						}						else if((ch & (uint)0xF8) == (uint)0xF0)						{							// Four-byte UTF-8 character.							leftBits = (ch & (uint)0x07);							leftSoFar = 1;							leftSize = 4;						}						else if((ch & (uint)0xFC) == (uint)0xF8)						{							// Five-byte UTF-8 character.							leftBits = (ch & (uint)0x03);							leftSoFar = 1;							leftSize = 5;						}						else if((ch & (uint)0xFC) == (uint)0xFC)						{							// Six-byte UTF-8 character.							leftBits = (ch & (uint)0x03);							leftSoFar = 1;							leftSize = 6;						}						else						{							// Invalid UTF-8 start character.							if(throwOnInvalid)							{								throw new ArgumentException									(_("Arg_InvalidUTF8"), "bytes");							}						}					}					else					{						// Process an extra byte in a multi-byte sequence.						if((ch & (uint)0xC0) == (uint)0x80)						{							leftBits = ((leftBits << 6) | (ch & (uint)0x3F));							if(++leftSoFar >= leftSize)							{								// We have a complete character now.								if(leftBits < (uint)0x10000)								{									if(leftBits != (uint)0xFEFF)									{										if(posn >= length)										{											throw new ArgumentException												(_("Arg_InsufficientSpace"),												 "chars");										}										chars[posn++] = (char)leftBits;									}								}								else if(leftBits < (uint)0x110000)								{									if((posn + 2) > length)									{										throw new ArgumentException											(_("Arg_InsufficientSpace"),											 "chars");									}									leftBits -= (uint)0x10000;									chars[posn++] = (char)((leftBits >> 10) +														   (uint)0xD800);									chars[posn++] =										(char)((leftBits & (uint)0x3FF) +										       (uint)0xDC00);								}								else if(throwOnInvalid)								{									throw new ArgumentException										(_("Arg_InvalidUTF8"), "bytes");								}								leftSize = 0;							}						}						else						{							// Invalid UTF-8 sequence: clear and restart.							if(throwOnInvalid)							{								throw new ArgumentException									(_("Arg_InvalidUTF8"), "bytes");							}							leftSize = 0;							--byteIndex;							++byteCount;						}					}				}				if(flush && leftSize != 0 && throwOnInvalid)				{					// We had left-over bytes that didn't make up					// a complete UTF-8 character sequence.					throw new ArgumentException						(_("Arg_InvalidUTF8"), "bytes");				}				leftOverBits = leftBits;				leftOverCount = (leftSoFar | (leftSize << 4));				// Return the final length to the caller.				return posn - charIndex;			}	// Get the characters that result from decoding a byte buffer.	public override int GetChars(byte[] bytes, int byteIndex, int byteCount,								 char[] chars, int charIndex)			{				uint leftOverBits = 0;				uint leftOverCount = 0;				return InternalGetChars(bytes, byteIndex, byteCount,										chars, charIndex, ref leftOverBits,										ref leftOverCount, throwOnInvalid,										true);			}	// Get the maximum number of bytes needed to encode a	// specified number of characters.	public override int GetMaxByteCount(int charCount)			{				if(charCount < 0)				{					throw new ArgumentOutOfRangeException						("charCount", _("ArgRange_NonNegative"));				}				return charCount * 4;			}	// Get the maximum number of characters needed to decode a	// specified number of bytes.	public override int GetMaxCharCount(int byteCount)			{				if(byteCount < 0)				{					throw new ArgumentOutOfRangeException						("byteCount", _("ArgRange_NonNegative"));				}				return byteCount;			}	// Get a UTF8-specific decoder that is attached to this instance.	public override Decoder GetDecoder()			{				return new UTF8Decoder(throwOnInvalid);			}	// Get a UTF8-specific encoder that is attached to this instance.	public override Encoder GetEncoder()			{				return new UTF8Encoder();			}	// Get the UTF8 preamble.	public override byte[] GetPreamble()			{				if(emitIdentifier)				{					byte[] pre = new byte [3];					pre[0] = (byte)0xEF;					pre[1] = (byte)0xBB;					pre[2] = (byte)0xBF;					return pre;				}				else				{					return new byte [0];				}			}	// Determine if this object is equal to another.	public override bool Equals(Object value)			{				UTF8Encoding enc = (value as UTF8Encoding);				if(enc != null)				{					return (codePage == enc.codePage &&							emitIdentifier == enc.emitIdentifier &&							throwOnInvalid == enc.throwOnInvalid);				}				else				{					return false;				}			}	// Get the hash code for this object.	public override int GetHashCode()			{				return base.GetHashCode();			}#if !ECMA_COMPAT	// Get the mail body name for this encoding.	internal override String InternalBodyName			{				get				{					return "utf-8";				}			}	// Get the human-readable name for this encoding.	internal override String InternalEncodingName			{				get				{					return "Unicode (UTF-8)";				}			}	// Get the mail agent header name for this encoding.	internal override String InternalHeaderName			{				get				{					return "utf-8";				}			}	// Determine if this encoding can be displayed in a Web browser.	internal override bool InternalIsBrowserDisplay			{				get				{					return true;				}			}	// Determine if this encoding can be saved from a Web browser.	internal override bool InternalIsBrowserSave			{				get				{					return true;				}			}	// Determine if this encoding can be displayed in a mail/news agent.	internal override bool InternalIsMailNewsDisplay			{				get				{					return true;				}			}	// Determine if this encoding can be saved from a mail/news agent.	internal override bool InternalIsMailNewsSave			{				get				{					return true;				}			}	// Get the IANA-preferred Web name for this encoding.	internal override String InternalWebName			{				get				{					return "utf-8";				}			}	// Get the Windows code page represented by this object.	internal override int InternalWindowsCodePage			{				get				{					return UnicodeEncoding.UNICODE_CODE_PAGE;				}			}#endif // !ECMA_COMPAT	// UTF-8 decoder implementation.	[Serializable]	private sealed class UTF8Decoder : Decoder	{		private bool throwOnInvalid;		private uint leftOverBits;		private uint leftOverCount;		// Constructor.		public UTF8Decoder(bool throwOnInvalid)				{					this.throwOnInvalid = throwOnInvalid;					leftOverBits = 0;					leftOverCount = 0;				}		// Override inherited methods.		public override int GetCharCount(byte[] bytes, int index, int count)				{					return InternalGetCharCount(bytes, index, count,												leftOverBits, leftOverCount,												throwOnInvalid, false);				}		public override int GetChars(byte[] bytes, int byteIndex,									 int byteCount, char[] chars,									 int charIndex)				{					return InternalGetChars(bytes, byteIndex, byteCount,											chars, charIndex,											ref leftOverBits,											ref leftOverCount,											throwOnInvalid, false);				}	} // class UTF8Decoder	// UTF-8 encoder implementation.	[Serializable]	private sealed class UTF8Encoder : Encoder	{		private uint leftOver;		// Constructor.		public UTF8Encoder()				{					leftOver = 0;				}		// Override inherited methods.		public override int GetByteCount(char[] chars, int index,										 int count, bool flush)				{					return InternalGetByteCount						(chars, index, count, leftOver, flush);				}		public override int GetBytes(char[] chars, int charIndex,									 int charCount, byte[] bytes,									 int byteCount, bool flush)				{					int result;					result = InternalGetBytes						(chars, charIndex, charCount, bytes, byteCount,						 ref leftOver, flush);					return result;				}	} // class UTF8Encoder}; // class UTF8Encoding}; // namespace System.Text
utf8encoding.cs - 源码说明

本页面展示了「没的没的没的没的没的没的没的没的没的没的没的没的没的没的没的没的没的没的没的没的没的」中的 utf8encoding.cs 源码文件，采用 CS 编程语言编写，共 1,015 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?