📄 utf8encoding.cls
字号:
End Select
Else
' if we get in here then we must be on bytes 2-4 in a byte sequence,
' so they all will look like 10xxxxxx.
If (b And &HC0) <> &H80 Then Call ByteStreamError(ByteIndex)
Ch = Ch * &H40
Ch = Ch + (b And &H3F)
BytesLeft = BytesLeft - 1
Select Case BytesInSequence
Case 2 ' 110xxxxx 10xxxxxx - char from &h80 to &h7ff
If Ch = 0 Then Call ByteStreamError(ByteIndex)
If BytesLeft = 0 Then
If putChars Then
If CharIndex > maxCharIndex Then Call SmallBufferError("Chars")
Chars(CharIndex) = Ch
CharIndex = CharIndex + 1
End If
BytesInSequence = 0
CharCount = CharCount + 1
End If
Case 3 ' 1110xxxx 10xxxxxx 10xxxxxx - char from &h800 to &hffff, including &hd800-&hdfff for bad surrogates
If Ch = 0 Then Call ByteStreamError(ByteIndex)
Select Case BytesLeft
Case 0
If putChars Then
If CharIndex > maxCharIndex Then Call SmallBufferError("Chars")
Chars(CharIndex) = AsWord(Ch)
CharIndex = CharIndex + 1
End If
BytesInSequence = 0
CharCount = CharCount + 1
Case 1 ' check for shortest form conformity
If Ch < &H20 Then Call ByteStreamError(ByteIndex)
End Select
Case 4 ' 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx - used for surrogate pairs
If BytesLeft = 0 Then
Dim hs As Long
Dim ls As Long
ls = (Ch And &H3FF) + &HDC00&
hs = (Ch \ &H400) + &HD7C0&
If ls < &HDC00& Or ls > &HDFFF& Then Call ByteStreamError(ByteIndex)
If hs < &HD800& Or hs > &HDBFF& Then Call ByteStreamError(ByteIndex)
If putChars Then
If CharIndex + 1 > maxCharIndex Then Call SmallBufferError("Chars")
Chars(CharIndex + 1) = AsWord(ls) ' AsWord is used to shove the highest bit in
Chars(CharIndex) = AsWord(hs) ' without causing an overflow. The char will
CharIndex = CharIndex + 2 ' be negative afterwards.
End If
CharCount = CharCount + 2
BytesInSequence = 0
Else
' check for shortest form conformity
If Ch = 0 Then Call ByteStreamError(ByteIndex)
End If
Case Else
Call ByteStreamError(ByteIndex)
BytesInSequence = 0
BytesLeft = 0
End Select
End If
ByteIndex = ByteIndex + 1
Loop
If Not Decoder Is Nothing Then
Decoder.BytesLeft = BytesLeft
If BytesLeft = 0 Then
Decoder.Char = 0
Else
Decoder.Char = Ch
End If
Decoder.BytesInSequence = BytesInSequence
End If
InternalGetChars = CharCount
End Function
Friend Function InternalGetByteCount(ByRef Chars() As Integer, ByVal Index As Long, ByVal Count As Long, ByRef Encoder As UTF8Encoder) As Long
Dim Ret As Long
Dim HasHighSurrogate As Boolean
If Index < LBound(Chars) Then _
Throw Cor.NewArgumentOutOfRangeException(Environment.GetResourceString(ArgumentOutOfRange_LBound), "Index", Index)
If Count < 0 Then _
Throw Cor.NewArgumentOutOfRangeException(Environment.GetResourceString(ArgumentOutOfRange_NeedNonNegNum), "Count", Count)
If Index + Count - 1 > UBound(Chars) Then _
Throw Cor.NewArgumentException(Environment.GetResourceString(Argument_InvalidCountOffset), "Count")
If Not Encoder Is Nothing Then HasHighSurrogate = (Encoder.HighSurrogateChar <> 0)
Do While Count > 0
Select Case Chars(Index)
Case 0 To &H7F ' ascii value
If HasHighSurrogate Then
If mThrowException Then Throw Cor.NewArgumentException("An incorrect low surrogate was found immediately after a high surrogate.", "Chars")
End If
Ret = Ret + 1
Case &H80 To &H7FF ' 2 byte UTF-8 encoding
If HasHighSurrogate Then
If mThrowException Then Throw Cor.NewArgumentException("An incorrect low surrogate was found immediately after a high surrogate.", "Chars")
End If
Ret = Ret + 2
Case &HD800 To &HDBFF ' high surrogate value
If HasHighSurrogate Then
If mThrowException Then Throw Cor.NewArgumentException("Second high surrogate in a row was found.", "Chars")
Else
HasHighSurrogate = True
End If
Ret = Ret + 3
Case &HDC00 To &HDFFF ' low surrogate value
If HasHighSurrogate Then
Ret = Ret + 1
HasHighSurrogate = False
Else
If mThrowException Then Throw Cor.NewArgumentException("Low surrogate was found without a precedding high surrogate.", "Chars")
Ret = Ret + 3
End If
Case Else
Ret = Ret + 3
End Select
Index = Index + 1
Count = Count - 1
Loop
If HasHighSurrogate Then
If Not Encoder Is Nothing Then
If Encoder.Flush Then
If mThrowException Then Throw Cor.NewArgumentException("A high surrogate character was not followed by a low surrogate character.", "Chars")
End If
Else
If mThrowException Then Throw Cor.NewArgumentException("Missing Low Surrogage.", "Chars")
End If
End If
InternalGetByteCount = Ret
End Function
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Private helpers
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Private Sub SmallBufferError(ByVal ParamName As String)
Throw Cor.NewArgumentException(Environment.GetResourceString(Argument_SmallConversionBuffer), ParamName)
End Sub
Private Function EncodeSurrogate(ByVal Char As Long, ByRef Bytes() As Byte, ByVal Index As Long) As Long
Bytes(Index) = &HE0 Or ((Char \ &H1000&) And &HF)
Bytes(Index + 1) = &H80 Or ((Char \ &H40&) And &H3F)
Bytes(Index + 2) = &H80 Or (Char And &H3F)
EncodeSurrogate = Index + 3
End Function
Private Sub ByteStreamError(ByVal Index As Long)
If Not mThrowException Then Exit Sub
Throw Cor.NewArgumentException(cString.Format("An invalid byte was found at index {0}.", Index), "Bytes")
End Sub
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Class Events
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Private Sub Class_Initialize()
With mCharsSA
.cbElements = 2
.cDims = 1
.cElements = &H7FFFFFFF
End With
End Sub
Private Sub Class_ReadProperties(PropBag As PropertyBag)
With PropBag
mThrowException = .ReadProperty("ThrowException", True)
mShouldEmitUTF8Identifier = .ReadProperty("EmitBOM", True)
End With
End Sub
Private Sub Class_Terminate()
SAPtr(mChars) = 0
End Sub
Private Sub Class_WriteProperties(PropBag As PropertyBag)
With PropBag
Call .WriteProperty("ThrowException", mThrowException)
Call .WriteProperty("EmitBOM", mShouldEmitUTF8Identifier)
End With
End Sub
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' IObject Interface
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Private Function IObject_Equals(Value As Variant) As Boolean
IObject_Equals = Equals(Value)
End Function
Private Function IObject_GetHashcode() As Long
IObject_GetHashcode = GetHashCode
End Function
Private Function IObject_ToString() As String
IObject_ToString = ToString
End Function
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Encoding Interface
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Private Property Get Encoding_BodyName() As String
Encoding_BodyName = BodyName
End Property
Private Property Get Encoding_CodePage() As Long
Encoding_CodePage = CodePage
End Property
Private Property Get Encoding_EncodingName() As String
Encoding_EncodingName = EncodingName
End Property
Private Function Encoding_Equals(Value As Variant) As Boolean
Encoding_Equals = Equals(Value)
End Function
Private Function Encoding_GetByteCount(Value As Variant, Optional Index As Variant, Optional Count As Variant) As Long
Encoding_GetByteCount = GetByteCount(Value, Index, Count)
End Function
Private Function Encoding_GetBytes(Source As Variant, Optional Index As Variant, Optional Count As Variant) As Byte()
Encoding_GetBytes = GetBytes(Source, Index, Count)
End Function
Private Function Encoding_GetBytesEx(Source As Variant, ByVal CharIndex As Long, ByVal CharCount As Long, Bytes() As Byte, ByVal ByteIndex As Long) As Long
Encoding_GetBytesEx = GetBytesEx(Source, CharIndex, CharCount, Bytes, ByteIndex)
End Function
Private Function Encoding_GetCharCount(Bytes() As Byte, Optional Index As Variant, Optional Count As Variant) As Long
Encoding_GetCharCount = GetCharCount(Bytes, Index, Count)
End Function
Private Function Encoding_GetChars(Bytes() As Byte, Optional Index As Variant, Optional Count As Variant) As Integer()
Encoding_GetChars = GetChars(Bytes, Index, Count)
End Function
Private Function Encoding_GetCharsEx(Bytes() As Byte, ByVal ByteIndex As Long, ByVal ByteCount As Long, Chars() As Integer, ByVal CharIndex As Long) As Long
Encoding_GetCharsEx = GetCharsEx(Bytes, ByteIndex, ByteCount, Chars, CharIndex)
End Function
Private Function Encoding_GetDecoder() As Decoder
Set Encoding_GetDecoder = GetDecoder
End Function
Private Function Encoding_GetEncoder() As Encoder
Set Encoding_GetEncoder = GetEncoder
End Function
Private Function Encoding_GetHashCode() As Long
Encoding_GetHashCode = GetHashCode
End Function
Private Function Encoding_GetMaxByteCount(ByVal CharCount As Long) As Long
Encoding_GetMaxByteCount = GetMaxByteCount(CharCount)
End Function
Private Function Encoding_GetMaxCharCount(ByVal ByteCount As Long) As Long
Encoding_GetMaxCharCount = GetMaxCharCount(ByteCount)
End Function
Private Function Encoding_GetPreamble() As Byte()
Encoding_GetPreamble = GetPreamble
End Function
Private Function Encoding_GetString(Bytes() As Byte, Optional Index As Variant, Optional Count As Variant) As String
Encoding_GetString = GetString(Bytes, Index, Count)
End Function
Private Property Get Encoding_HeaderName() As String
Encoding_HeaderName = HeaderName
End Property
Private Property Get Encoding_IsBrowserDisplay() As Boolean
Encoding_IsBrowserDisplay = IsBrowserDisplay
End Property
Private Property Get Encoding_IsBrowserSave() As Boolean
Encoding_IsBrowserSave = IsBrowserSave
End Property
Private Property Get Encoding_IsMailNewsDisplay() As Boolean
Encoding_IsMailNewsDisplay = IsMailNewsDisplay
End Property
Private Property Get Encoding_IsMailNewsSave() As Boolean
Encoding_IsMailNewsSave = IsMailNewsSave
End Property
Private Function Encoding_ToString() As String
Encoding_ToString = ToString
End Function
Private Property Get Encoding_WebName() As String
Encoding_WebName = WebName
End Property
Private Property Get Encoding_WindowsCodePage() As Long
Encoding_WindowsCodePage = WindowsCodePage
End Property
Private Property Get Encoding_IsSingleByte() As Boolean
Encoding_IsSingleByte = IsSingleByte
End Property
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -