utf8encoding.cs

来自「没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没的 没」· CS 代码 · 共 1,015 行 · 第 1/2 页

CS
1,015
字号
							leftSoFar = 1;							leftSize = 4;						}						else if((ch & (uint)0xFC) == (uint)0xF8)						{							// Five-byte UTF-8 character.							leftBits = (ch & (uint)0x03);							leftSoFar = 1;							leftSize = 5;						}						else if((ch & (uint)0xFC) == (uint)0xFC)						{							// Six-byte UTF-8 character.							leftBits = (ch & (uint)0x03);							leftSoFar = 1;							leftSize = 6;						}						else						{							// Invalid UTF-8 start character.							if(throwOnInvalid)							{								throw new ArgumentException									(_("Arg_InvalidUTF8"), "bytes");							}						}					}					else					{						// Process an extra byte in a multi-byte sequence.						if((ch & (uint)0xC0) == (uint)0x80)						{							leftBits = ((leftBits << 6) | (ch & (uint)0x3F));							if(++leftSoFar >= leftSize)							{								// We have a complete character now.								if(leftBits < (uint)0x10000)								{									if(leftBits != (uint)0xFEFF)									{										++length;									}								}								else if(leftBits < (uint)0x110000)								{									length += 2;								}								else if(throwOnInvalid)								{									throw new ArgumentException										(_("Arg_InvalidUTF8"), "bytes");								}								leftSize = 0;							}						}						else						{							// Invalid UTF-8 sequence: clear and restart.							if(throwOnInvalid)							{								throw new ArgumentException									(_("Arg_InvalidUTF8"), "bytes");							}							leftSize = 0;							--index;							++count;						}					}				}				if(flush && leftSize != 0 && throwOnInvalid)				{					// We had left-over bytes that didn't make up					// a complete UTF-8 character sequence.					throw new ArgumentException						(_("Arg_InvalidUTF8"), "bytes");				}				// Return the final length to the caller.				return length;			}	// Get the number of characters needed to decode a byte buffer.	public override int GetCharCount(byte[] bytes, int index, int count)			{				return InternalGetCharCount(bytes, index, count, 0, 0,											throwOnInvalid, true);			}	// Get the characters that result from decoding a byte buffer.	private static int InternalGetChars(byte[] bytes, int byteIndex,									    int byteCount, char[] chars,									    int charIndex, ref uint leftOverBits,									    ref uint leftOverCount,									    bool throwOnInvalid, bool flush)			{				// Validate the parameters.				if(bytes == null)				{					throw new ArgumentNullException("bytes");				}				if(chars == null)				{					throw new ArgumentNullException("chars");				}				if(byteIndex < 0 || byteIndex > bytes.Length)				{					throw new ArgumentOutOfRangeException						("byteIndex", _("ArgRange_Array"));				}				if(byteCount < 0 || byteCount > (bytes.Length - byteIndex))				{					throw new ArgumentOutOfRangeException						("byteCount", _("ArgRange_Array"));				}				if(charIndex < 0 || charIndex > chars.Length)				{					throw new ArgumentOutOfRangeException						("charIndex", _("ArgRange_Array"));				}				// Convert the bytes into the output buffer.				uint ch;				int length = chars.Length;				int posn = charIndex;				uint leftBits = leftOverBits;				uint leftSoFar = (leftOverCount & (uint)0x0F);				uint leftSize = ((leftOverCount >> 4) & (uint)0x0F);				while(byteCount > 0)				{					// Fetch the next character from the byte buffer.					ch = (uint)(bytes[byteIndex++]);					--byteCount;					if(leftSize == 0)					{						// Process a UTF-8 start character.						if(ch < (uint)0x0080)						{							// Single-byte UTF-8 character.							if(posn >= length)							{								throw new ArgumentException									(_("Arg_InsufficientSpace"), "chars");							}							chars[posn++] = (char)ch;						}						else if((ch & (uint)0xE0) == (uint)0xC0)						{							// Double-byte UTF-8 character.							leftBits = (ch & (uint)0x1F);							leftSoFar = 1;							leftSize = 2;						}						else if((ch & (uint)0xF0) == (uint)0xE0)						{							// Three-byte UTF-8 character.							leftBits = (ch & (uint)0x0F);							leftSoFar = 1;							leftSize = 3;						}						else if((ch & (uint)0xF8) == (uint)0xF0)						{							// Four-byte UTF-8 character.							leftBits = (ch & (uint)0x07);							leftSoFar = 1;							leftSize = 4;						}						else if((ch & (uint)0xFC) == (uint)0xF8)						{							// Five-byte UTF-8 character.							leftBits = (ch & (uint)0x03);							leftSoFar = 1;							leftSize = 5;						}						else if((ch & (uint)0xFC) == (uint)0xFC)						{							// Six-byte UTF-8 character.							leftBits = (ch & (uint)0x03);							leftSoFar = 1;							leftSize = 6;						}						else						{							// Invalid UTF-8 start character.							if(throwOnInvalid)							{								throw new ArgumentException									(_("Arg_InvalidUTF8"), "bytes");							}						}					}					else					{						// Process an extra byte in a multi-byte sequence.						if((ch & (uint)0xC0) == (uint)0x80)						{							leftBits = ((leftBits << 6) | (ch & (uint)0x3F));							if(++leftSoFar >= leftSize)							{								// We have a complete character now.								if(leftBits < (uint)0x10000)								{									if(leftBits != (uint)0xFEFF)									{										if(posn >= length)										{											throw new ArgumentException												(_("Arg_InsufficientSpace"),												 "chars");										}										chars[posn++] = (char)leftBits;									}								}								else if(leftBits < (uint)0x110000)								{									if((posn + 2) > length)									{										throw new ArgumentException											(_("Arg_InsufficientSpace"),											 "chars");									}									leftBits -= (uint)0x10000;									chars[posn++] = (char)((leftBits >> 10) +														   (uint)0xD800);									chars[posn++] =										(char)((leftBits & (uint)0x3FF) +										       (uint)0xDC00);								}								else if(throwOnInvalid)								{									throw new ArgumentException										(_("Arg_InvalidUTF8"), "bytes");								}								leftSize = 0;							}						}						else						{							// Invalid UTF-8 sequence: clear and restart.							if(throwOnInvalid)							{								throw new ArgumentException									(_("Arg_InvalidUTF8"), "bytes");							}							leftSize = 0;							--byteIndex;							++byteCount;						}					}				}				if(flush && leftSize != 0 && throwOnInvalid)				{					// We had left-over bytes that didn't make up					// a complete UTF-8 character sequence.					throw new ArgumentException						(_("Arg_InvalidUTF8"), "bytes");				}				leftOverBits = leftBits;				leftOverCount = (leftSoFar | (leftSize << 4));				// Return the final length to the caller.				return posn - charIndex;			}	// Get the characters that result from decoding a byte buffer.	public override int GetChars(byte[] bytes, int byteIndex, int byteCount,								 char[] chars, int charIndex)			{				uint leftOverBits = 0;				uint leftOverCount = 0;				return InternalGetChars(bytes, byteIndex, byteCount,										chars, charIndex, ref leftOverBits,										ref leftOverCount, throwOnInvalid,										true);			}	// Get the maximum number of bytes needed to encode a	// specified number of characters.	public override int GetMaxByteCount(int charCount)			{				if(charCount < 0)				{					throw new ArgumentOutOfRangeException						("charCount", _("ArgRange_NonNegative"));				}				return charCount * 4;			}	// Get the maximum number of characters needed to decode a	// specified number of bytes.	public override int GetMaxCharCount(int byteCount)			{				if(byteCount < 0)				{					throw new ArgumentOutOfRangeException						("byteCount", _("ArgRange_NonNegative"));				}				return byteCount;			}	// Get a UTF8-specific decoder that is attached to this instance.	public override Decoder GetDecoder()			{				return new UTF8Decoder(throwOnInvalid);			}	// Get a UTF8-specific encoder that is attached to this instance.	public override Encoder GetEncoder()			{				return new UTF8Encoder();			}	// Get the UTF8 preamble.	public override byte[] GetPreamble()			{				if(emitIdentifier)				{					byte[] pre = new byte [3];					pre[0] = (byte)0xEF;					pre[1] = (byte)0xBB;					pre[2] = (byte)0xBF;					return pre;				}				else				{					return new byte [0];				}			}	// Determine if this object is equal to another.	public override bool Equals(Object value)			{				UTF8Encoding enc = (value as UTF8Encoding);				if(enc != null)				{					return (codePage == enc.codePage &&							emitIdentifier == enc.emitIdentifier &&							throwOnInvalid == enc.throwOnInvalid);				}				else				{					return false;				}			}	// Get the hash code for this object.	public override int GetHashCode()			{				return base.GetHashCode();			}#if !ECMA_COMPAT	// Get the mail body name for this encoding.	internal override String InternalBodyName			{				get				{					return "utf-8";				}			}	// Get the human-readable name for this encoding.	internal override String InternalEncodingName			{				get				{					return "Unicode (UTF-8)";				}			}	// Get the mail agent header name for this encoding.	internal override String InternalHeaderName			{				get				{					return "utf-8";				}			}	// Determine if this encoding can be displayed in a Web browser.	internal override bool InternalIsBrowserDisplay			{				get				{					return true;				}			}	// Determine if this encoding can be saved from a Web browser.	internal override bool InternalIsBrowserSave			{				get				{					return true;				}			}	// Determine if this encoding can be displayed in a mail/news agent.	internal override bool InternalIsMailNewsDisplay			{				get				{					return true;				}			}	// Determine if this encoding can be saved from a mail/news agent.	internal override bool InternalIsMailNewsSave			{				get				{					return true;				}			}	// Get the IANA-preferred Web name for this encoding.	internal override String InternalWebName			{				get				{					return "utf-8";				}			}	// Get the Windows code page represented by this object.	internal override int InternalWindowsCodePage			{				get				{					return UnicodeEncoding.UNICODE_CODE_PAGE;				}			}#endif // !ECMA_COMPAT	// UTF-8 decoder implementation.	[Serializable]	private sealed class UTF8Decoder : Decoder	{		private bool throwOnInvalid;		private uint leftOverBits;		private uint leftOverCount;		// Constructor.		public UTF8Decoder(bool throwOnInvalid)				{					this.throwOnInvalid = throwOnInvalid;					leftOverBits = 0;					leftOverCount = 0;				}		// Override inherited methods.		public override int GetCharCount(byte[] bytes, int index, int count)				{					return InternalGetCharCount(bytes, index, count,												leftOverBits, leftOverCount,												throwOnInvalid, false);				}		public override int GetChars(byte[] bytes, int byteIndex,									 int byteCount, char[] chars,									 int charIndex)				{					return InternalGetChars(bytes, byteIndex, byteCount,											chars, charIndex,											ref leftOverBits,											ref leftOverCount,											throwOnInvalid, false);				}	} // class UTF8Decoder	// UTF-8 encoder implementation.	[Serializable]	private sealed class UTF8Encoder : Encoder	{		private uint leftOver;		// Constructor.		public UTF8Encoder()				{					leftOver = 0;				}		// Override inherited methods.		public override int GetByteCount(char[] chars, int index,										 int count, bool flush)				{					return InternalGetByteCount						(chars, index, count, leftOver, flush);				}		public override int GetBytes(char[] chars, int charIndex,									 int charCount, byte[] bytes,									 int byteCount, bool flush)				{					int result;					result = InternalGetBytes						(chars, charIndex, charCount, bytes, byteCount,						 ref leftOver, flush);					return result;				}	} // class UTF8Encoder}; // class UTF8Encoding}; // namespace System.Text

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?