📄 utf8encoding.cls
字号:
'
' @param Bytes The set of bytes to be decoded into a string.
' @param Index The index of the first byte to be decoded.
' @param Count The number of bytes to be used in the decoding.
' @return A string containing the decoded set of bytes.
'
Public Function GetString(ByRef Bytes() As Byte, Optional ByRef Index As Variant, Optional ByRef Count As Variant) As String
Dim ElemIndex As Long
Dim ElemCount As Long
Dim Result As Long
Result = GetOptionalArrayRange(SAPtr(Bytes), Index, ElemIndex, Count, ElemCount)
If Result <> NO_ERROR Then Call ThrowArrayRangeException(Result, "Bytes", ElemIndex, "Index", ElemCount, "Count", IsMissing(Index))
Dim Size As Long
Size = GetMaxCharCount(ElemCount)
Dim Ret As String
Ret = SysAllocStringLen(0, Size)
With mCharsSA
.pvData = StrPtr(Ret)
.lLbound = 0
.cElements = Size
End With
SAPtr(mChars) = VarPtr(mCharsSA)
Size = InternalGetChars(Bytes, ElemIndex, ElemCount, mChars, 0, Nothing)
GetString = Left$(Ret, Size)
End Function
''
' Returns an array of bytes that represents this encoding.
'
' @return A byte array containg the bytes to be used to identify this encoding type.
' @remarks <p>If the encoding was created setting the <i>ShouldEmitUTF8Identifier</i> paramter
' to true, then this will return the UTF-8 identifer &HFFEF already encoded into the 3
' bytes. The bytes are &HEF, &HBB, &HBF. They will decode to &HFFEF. Since &HFF and &HFE
' are not valid in a UTF-8 encoding, these will unique and can be used to identify the
' encoding used in the stream.</p>
' If <i>ShouldEmitUTF8Identifier</i> was set to False, then an empty array will be returned.
' The array will not be null. It will have a UBound of -1.
'
Public Function GetPreamble() As Byte()
If mShouldEmitUTF8Identifier Then
GetPreamble = Cor.NewBytes(&HEF, &HBB, &HBF)
Else
GetPreamble = Cor.NewBytes() ' creates a 0-length array instead of a null array.
End If
End Function
''
' Returns the maximum number of bytes that can be created from a specific number of characters.
'
' @param CharCount The number of characters to be encoded.
' @return The maximum number of bytes that can be generated from the specified number of characters.
'
Public Function GetMaxByteCount(ByVal CharCount As Long) As Long
If CharCount < 0 Then _
Throw Cor.NewArgumentOutOfRangeException(Environment.GetResourceString(ArgumentOutOfRange_NeedNonNegNum), "CharCount", CharCount)
GetMaxByteCount = CharCount * 4
End Function
''
' Returns the maximum number of characters than can be decoded from the number of bytes specified.
'
' @param ByteCount The number of bytes to be decoded.
' @return The maximum number of characters that can be decoded from the specified number of bytes.
'
Public Function GetMaxCharCount(ByVal ByteCount As Long) As Long
If ByteCount < 0 Then _
Throw Cor.NewArgumentOutOfRangeException(Environment.GetResourceString(ArgumentOutOfRange_NeedNonNegNum), "ByteCount", ByteCount)
GetMaxCharCount = ByteCount
End Function
''
' Returns an encoder that maintains state.
'
' @return The stateful encoder.
' @remarks Unlike UTF8Encoding, the encoder maintains state between encoding calls. This allows
' for a set of characters to be split up but still be encoded as a single set of characters. This
' will only be effective when encoding UTF-16 surrogate pairs of characters. Otherwise, a single
' characters is all that is required to be encodable. the UTF-16 surrogate pairs can be split to
' the last character of one call and the first character of the next and the encoding will
' continue as if a single call was made.
'
Public Function GetEncoder() As Encoder
Dim Ret As New UTF8Encoder
Call Ret.Init(Me)
Set GetEncoder = Ret
End Function
''
' Returns a decoder that maintains state.
'
' @return The stateful decoder.
' @remarks Unlike UTF8Encoding, the decoder maintains state between decoding calls. this allows
' for a sequence of bytes to not have to be in the same call in order to be decoded to the
' representive character. Other than the standard ASCII set of characters (0-127), all other
' characters are represented by 2-4 bytes. Using this decoder allows for some of those bytes to be
' at the end of one call and the rest be at the beginning of the next call. The decoder will
' continue decoding as if all the required bytes were in the same call.
'
Public Function GetDecoder() As Decoder
Dim Ret As New UTF8Decoder
Call Ret.Init(Me)
Set GetDecoder = Ret
End Function
''
' Returns a string representation of this object instance.
'
' @return String representing this instance.
Public Function ToString() As String
ToString = Object.ToString(Me, App)
End Function
''
' Returns a boolean indicating if the value and this object
' instance are the same instance.
'
' @param value The value to compare equality to.
' @return Boolean indicating equality.
Public Function Equals(ByRef Value As Variant) As Boolean
If IsObject(Value) Then
If Value Is Nothing Then Exit Function
If TypeOf Value Is UTF8Encoding Then
Dim en As UTF8Encoding
Set en = Value
Equals = (mShouldEmitUTF8Identifier = en.EmitUTF8Identifier)
End If
End If
End Function
''
' Returns a pseudo-unique number identifying this instance.
'
' @return Pseudo-unique number identifying this instance.
Public Function GetHashCode() As Long
GetHashCode = CODE_PAGE Or CLng(IIf(mShouldEmitUTF8Identifier, &H80000000, 0)) Or CLng(IIf(mThrowException, &H40000000, 0))
End Function
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Friend Interface
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Friend Sub Init(ByVal ShouldEmitUTF8Identifier As Long, ByVal ShouldThrowException As Boolean)
mShouldEmitUTF8Identifier = ShouldEmitUTF8Identifier
mThrowException = ShouldThrowException
End Sub
Friend Property Get EmitUTF8Identifier() As Boolean
EmitUTF8Identifier = mShouldEmitUTF8Identifier
End Property
Friend Function InternalGetBytes(ByRef Chars() As Integer, ByVal CharIndex As Long, ByVal CharCount As Long, ByRef Bytes() As Byte, ByVal ByteIndex As Long, ByRef Encoder As UTF8Encoder) As Long
Const HIGH_OFFSET As Long = &HD800&
Const LOW_OFFSET As Long = &HDC00&
Dim uch As Long
Dim maxindex As Long
Dim MaxByteIndex As Long
Dim StartIndex As Long
Dim HighSurrogateChar As Long
If cArray.IsNull(Bytes) Then _
Throw Cor.NewArgumentNullException(Environment.GetResourceString(ArgumentNull_Array), "Byte")
If ByteIndex < LBound(Bytes) Then _
Throw Cor.NewArgumentOutOfRangeException(Environment.GetResourceString(ArgumentOutOfRange_LBound), "ByteIndex", ByteIndex)
If Not Encoder Is Nothing Then HighSurrogateChar = Encoder.HighSurrogateChar
StartIndex = ByteIndex
MaxByteIndex = UBound(Bytes) + 1
maxindex = CharIndex + CharCount
Do While CharIndex < maxindex
AsWord(uch) = Chars(CharIndex)
'If CharIndex = UBound(Chars) Then Stop
If HighSurrogateChar <> 0 Then
Select Case uch
Case &HDC00& To &HDFFF&
' we have a surrogate pair.
Dim pair As Long
If ByteIndex + 4 > MaxByteIndex Then Call SmallBufferError("Bytes")
pair = ((HighSurrogateChar - HIGH_OFFSET) * &H400) + (uch - LOW_OFFSET) + &H10000
Bytes(ByteIndex + 3) = &H80 Or (pair And &H3F)
Bytes(ByteIndex + 2) = &H80 Or ((pair \ &H40&) And &H3F)
Bytes(ByteIndex + 1) = &H80 Or ((pair \ &H1000&) And &H3F)
Bytes(ByteIndex) = &HF0 Or (pair \ &H40000)
ByteIndex = ByteIndex + 4
HighSurrogateChar = 0
Case &HD800& To &HDBFF&
' we have two high surrogates in a row
If mThrowException Then Throw Cor.NewArgumentException("A high surrogate was followed by a second high surrogate.", "Chars")
If ByteIndex + 3 > MaxByteIndex Then Call SmallBufferError("Bytes")
ByteIndex = EncodeSurrogate(HighSurrogateChar, Bytes, ByteIndex)
HighSurrogateChar = uch
Case Else
If mThrowException Then Throw Cor.NewArgumentException("A high surrogate was not followed by a low surrogate.", "Chars")
If ByteIndex + 3 >= MaxByteIndex Then Call SmallBufferError("Bytes")
ByteIndex = EncodeSurrogate(HighSurrogateChar, Bytes, ByteIndex)
CharIndex = CharIndex - 1
HighSurrogateChar = 0
End Select
Else
Select Case uch
Case Is < &H80 ' ascii
If ByteIndex + 1 > MaxByteIndex Then Call SmallBufferError("Bytes")
Bytes(ByteIndex) = uch
ByteIndex = ByteIndex + 1
Case Is < &H800
If ByteIndex + 2 > MaxByteIndex Then Call SmallBufferError("Bytes")
Bytes(ByteIndex) = &HC0 Or (uch \ 64)
Bytes(ByteIndex + 1) = &H80 Or (uch And &H3F)
ByteIndex = ByteIndex + 2
Case &HD800& To &HDBFF& ' high surrogate
HighSurrogateChar = uch
Case &HDC00& To &HDFFF& ' low surrogate
If mThrowException Then Throw Cor.NewArgumentException("A low surrogate was not preceeded by a high surrogate.", "Chars")
If ByteIndex + 3 > MaxByteIndex Then Call SmallBufferError("Bytes")
ByteIndex = EncodeSurrogate(uch, Bytes, ByteIndex)
Case Else
If ByteIndex + 3 > MaxByteIndex Then Call SmallBufferError("Bytes")
ByteIndex = EncodeSurrogate(uch, Bytes, ByteIndex)
End Select
End If
CharIndex = CharIndex + 1
Loop
If Not Encoder Is Nothing Then
If Not Encoder.Flush Then
Encoder.HighSurrogateChar = HighSurrogateChar
Else
If mThrowException And (HighSurrogateChar <> 0) Then Throw Cor.NewArgumentException("A high surrogate was not followed by a low surrogate.", "Chars")
Encoder.HighSurrogateChar = 0
End If
ElseIf HighSurrogateChar <> 0 Then
If mThrowException Then Throw Cor.NewArgumentException("A high surrogate was not followed by a low surrogate.", "Chars")
ByteIndex = EncodeSurrogate(HighSurrogateChar, Bytes, ByteIndex)
End If
InternalGetBytes = ByteIndex - StartIndex
End Function
Friend Function InternalGetChars(ByRef Bytes() As Byte, ByVal ByteIndex As Long, ByVal ByteCount As Long, ByRef Chars() As Integer, ByVal CharIndex As Long, ByRef Decoder As UTF8Decoder) As Long
Dim BytesInSequence As Long
Dim b As Byte
Dim Ch As Long
Dim CharCount As Long
Dim BytesLeft As Long
Dim putChars As Boolean
Dim maxindex As Long
Dim maxCharIndex As Long
putChars = Not cArray.IsNull(Chars)
If putChars Then
maxCharIndex = UBound(Chars)
If CharIndex < LBound(Chars) Then _
Throw Cor.NewArgumentOutOfRangeException(Environment.GetResourceString(ArgumentOutOfRange_LBound), "CharIndex", CharIndex)
If CharIndex > maxCharIndex Then _
Throw Cor.NewArgumentOutOfRangeException(Environment.GetResourceString(ArgumentOutOfRange_UBound))
End If
If Not Decoder Is Nothing Then
BytesLeft = Decoder.BytesLeft
Ch = Decoder.Char
BytesInSequence = Decoder.BytesInSequence
End If
maxindex = ByteIndex + ByteCount
Do While ByteIndex < maxindex
b = Bytes(ByteIndex)
' only ascii an character will be 1 byte. 0xxxxxxx format
If b < &H80 Then
' we were expecting more bytes to complete the sequence.
If BytesInSequence > 0 Then Call ByteStreamError(ByteIndex)
If putChars Then
If CharIndex > maxCharIndex Then Call SmallBufferError("Chars")
Chars(CharIndex) = b
CharIndex = CharIndex + 1
End If
BytesLeft = 0
BytesInSequence = 0
CharCount = CharCount + 1
ElseIf BytesInSequence = 0 Then
Ch = b
' we count off the number of bits set starting from
' the highest bit. No more than 4 should be found.
' This will tell us how many bytes need to be put together
' to form the output character, or surrogate pair.
Do While (b And &H80)
BytesInSequence = BytesInSequence + 1
b = (b And &H7F) * 2
Loop
BytesLeft = BytesInSequence - 1
Select Case BytesInSequence
Case 2 ' 110xxxxx format
Ch = Ch And &H1F
If Ch < 2 Then Call ByteStreamError(ByteIndex)
Case 3 ' 1110xxxx format
Ch = Ch And &HF
Case 4 ' 11110xxx format
Ch = Ch And &H7
If Ch <> 0 Then
If Ch <> &H4 Then Call ByteStreamError(ByteIndex)
End If
Case Else
Call ByteStreamError(ByteIndex)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -