⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gptextstream.pas

📁 OmniXML源码
💻 PAS
📖 第 1 页 / 共 3 页
字号:
  wc : word;

  procedure AddByte(b: byte);
  begin
    pch^ := char(b);
    Inc(pch);
  end; { AddByte }

begin { WideCharBufToUTF8Buf }
  pwc := @unicodeBuf;
  pch := @utf8Buf;
  for iwc := 1 to uniByteCount div SizeOf(WideChar) do begin
    wc := Ord(pwc^);
    Inc(pwc);
    if (wc >= $0001) and (wc <= $007F) then begin
      AddByte(wc AND $7F);
    end
    else if (wc >= $0080) and (wc <= $07FF) then begin
      AddByte($C0 OR ((wc SHR 6) AND $1F));
      AddByte($80 OR (wc AND $3F));
    end
    else begin // (wc >= $0800) and (wc <= $FFFF)
      AddByte($E0 OR ((wc SHR 12) AND $0F));
      AddByte($80 OR ((wc SHR 6) AND $3F));
      AddByte($80 OR (wc AND $3F));
    end;
  end; //for
  Result := integer(pch)-integer(@utf8Buf);
end; { WideCharBufToUTF8Buf }

{:Converts UTF-8 encoded buffer into WideChars. Target buffer must be
  pre-allocated and large enough (at most utfByteCount number of WideChars will
  be generated).                                                                 <br>
  RFC 2279 (http://www.ietf.org/rfc/rfc2279.txt) describes the conversion:       <br>
  $00..$7F => $0000..$007F                                                       <br>
  110[bit10..bit6] 10[bit5..bit0] => $0080..$07FF                                <br>
  1110[bit15..bit12] 10[bit11..bit6] 10[bit5..bit0] => $0800..$FFFF
  @param   utf8Buf      UTF-8 encoded buffer.
  @param   utfByteCount Size of utf8Buf, in bytes.
  @param   unicodeBuf   Pre-allocated buffer for WideChars.
  @param   leftUTF8     Number of bytes left in utf8Buf after conversion (0, 1,
                        or 2).
  @returns Number of bytes used in unicodeBuf buffer.
  @since   2.01
}
function UTF8BufToWideCharBuf(const utf8Buf; utfByteCount: integer;
 var unicodeBuf; var leftUTF8: integer): integer;
var
  c1 : byte;
  c2 : byte;
  ch : byte;
  pch: PChar;
  pwc: PWideChar;
begin
  pch := @utf8Buf;
  pwc := @unicodeBuf;
  leftUTF8 := utfByteCount;
  while leftUTF8 > 0 do begin
    ch := byte(pch^);
    Inc(pch);
    if (ch AND $80) = 0 then begin // 1-byte code
      word(pwc^) := ch;
      Inc(pwc);
      Dec(leftUTF8);
    end
    else if (ch AND $E0) = $C0 then begin // 2-byte code
      if leftUTF8 < 2 then
        break;
      c1 := byte(pch^);
      Inc(pch);
      word(pwc^) := (word(ch AND $1F) SHL 6) OR (c1 AND $3F);
      Inc(pwc);
      Dec(leftUTF8,2);
    end
    else begin // 3-byte code
      if leftUTF8 < 3 then
        break;
      c1 := byte(pch^);
      Inc(pch);
      c2 := byte(pch^);
      Inc(pch);
      word(pwc^) :=
        (word(ch AND $0F) SHL 12) OR
        (word(c1 AND $3F) SHL 6) OR
        (c2 AND $3F);
      Inc(pwc);
      Dec(leftUTF8,3);
    end;
  end; //while
  Result := integer(pwc)-integer(@unicodeBuf);
end; { UTF8BufToWideCharBuf }

{:Returns default Ansi codepage for LangID or 'defCP' in case of error (LangID
  does not specify valid language ID).
  @param   LangID Language ID.
  @param   defCP  Default value that is to be returned if LangID doesn't specify
                  a valid language ID.
  @returns Default Ansi codepage for LangID or 'defCP' in case of error.
}
function GetDefaultAnsiCodepage (LangID: LCID; defCP: integer): word;
var
  p: array [0..255] of char;
begin
  if GetLocaleInfo (LangID, 4100, p, High (p)) > 0 then
    Result := StrToIntDef(p,defCP)
  else
    Result := defCP;
end; { GetDefaultAnsiCodepage }

{ TGpTextStream }

{:Allocates buffer for 8/16/8 bit conversions. If requested size is small
  enough, returns pre-allocated buffer, otherwise allocates new buffer.
  @param   size Requested size in bytes.
  @returns Pointer to buffer.
}
function TGpTextStream.AllocBuffer(size: integer): pointer;
begin
  if size <= CtsSmallBufSize then
    Result := tsSmallBuf
  else
    GetMem(Result,size);
end; { TGpTextStream.AllocBuffer }

{:Initializes stream and opens it in required access mode.
  @param   dataStream  Wrapped (physical) stream used for data access.
  @param   access      Required access mode.
  @param   openFlags   Open flags (used when access mode is accReset).
  @param   createFlags Create flags (used when access mode is accRewrite or
                       tsaccAppend).
  @param   codePage    Code page to be used for 8/16/8 bit conversions. If set
                       to 0, current default code page will be used.
}
constructor TGpTextStream.Create(dataStream: TStream;
  access: TGpTSAccess; createFlags: TGpTSCreateFlags; codePage: word);
begin
  inherited Create(dataStream);
  if (tscfUnicode in createFlags) and (codePage <> CP_UTF8) then
    codePage := CP_UNICODE;
  tsAccess := access;
  tsCreateFlags := createFlags;
  SetCodepage(codePage);
  GetMem(tsSmallBuf,CtsSmallBufSize);
  PrepareStream;
end; { TGpTextStream.Create }

{:Cleanup. 
}
destructor TGpTextStream.Destroy;
begin
  FreeMem(tsSmallBuf);
  tsReadlnBuf.Free;
  tsReadlnBuf := nil;
  inherited Destroy;
end; { TGpTextStream.Destroy }

{:Frees buffer for 8/16/8 bit conversions. If pre-allocated buffer is passed,
  nothing will be done.
  @param   buffer Conversion buffer.
}
procedure TGpTextStream.FreeBuffer(var buffer: pointer);
begin
  if buffer <> tsSmallBuf then begin
    FreeMem(buffer);
    buffer := nil;
  end;
end; { TGpTextStream.FreeBuffer }

{:Checks if stream is 16-bit Unicode.
  @returns True if stream is 16-bit Unicode.
  @since   2.01
}
function TGpTextStream.GetWindowsError: DWORD;
begin
  if tsWindowsError <> 0 then
    Result := tsWindowsError
  else
    Result := 0;
end; { TGpTextStream.GetWindowsError }

{:Checks if stream contains 16-bit characters.
  @returns True if stream contains 16-bit characters.
}
function TGpTextStream.Is16bit: boolean;
begin
  Result := IsUnicode and (Codepage <> CP_UTF8);
end; { TGpTextStream.Is16bit }

{:Checks if stream is Unicode (UCS-2 or UTF-8 encoding).
  @returns True if stream is Unicode.
}
function TGpTextStream.IsUnicode: boolean;
begin
  Result := (tscfUnicode in tsCreateFlags);
end; { TGpTextStream.IsUnicode }

{:Prepares stream for read or write operation.
  @raises EGpTextStream if caller tries to rewrite or append 'reverse'
          Unicode stream.
}
procedure TGpTextStream.PrepareStream;
var
  marker : WideChar;
  marker3: Char;
begin
  case tsAccess of
    tsaccRead:
      begin
        tsCreateFlags := [];
        if WrappedStream.Size >= SizeOf(WideChar) then begin
          WrappedStream.Read(marker,SizeOf(WideChar));
          if marker = CUnicodeNormal then begin
            tsCreateFlags := tsCreateFlags + [tscfUnicode];
            Codepage := CP_UNICODE;
          end
          else if marker = CUnicodeReversed then begin
            tsCreateFlags := tsCreateFlags + [tscfUnicode,tscfReverseByteOrder];
            Codepage := CP_UNICODE;
          end
          else if (marker = CUTF8BOM12) and (WrappedStream.Size >= 3) then begin
            WrappedStream.Read(marker3,SizeOf(Char));
            if marker3 = CUTF8BOM3 then begin
              tsCreateFlags := tsCreateFlags + [tscfUnicode];
              Codepage := CP_UTF8;
            end;
          end;
          if not IsUnicode then
            WrappedStream.Position := 0;
        end;
        if (not IsUnicode) and ((Codepage = CP_UTF8) or (Codepage = CP_UNICODE)) then
          tsCreateFlags := [tscfUnicode];
      end; //tsaccRead
    tsaccWrite:
      begin
        if ((Codepage = CP_UTF8) or (Codepage = CP_UNICODE)) then
          tsCreateFlags := tsCreateFlags + [tscfUnicode];
        if tsCreateFlags*[tscfUnicode,tscfReverseByteOrder] = [tscfUnicode,tscfReverseByteOrder] then
          raise EGpTextStream.CreateFmtHelp(sCannotWriteReversedUnicodeStream,[StreamName],hcTFCannotWriteReversed);
        WrappedStream.Size := 0;
        if IsUnicode then begin
          if Codepage <> CP_UTF8 then
            WrappedStream.Write(CUnicodeNormal,SizeOf(WideChar))
          else if tscfWriteUTF8BOM in tsCreateFlags then begin
            WrappedStream.Write(CUTF8BOM12,SizeOf(WideChar));
            WrappedStream.Write(CUTF8BOM3,SizeOf(Char));
          end;
        end;
      end; //tsaccWrite
    tsaccReadWrite:
      begin
        if ((Codepage = CP_UTF8) or (Codepage = CP_UNICODE)) then
          tsCreateFlags := tsCreateFlags + [tscfUnicode];
        if tsCreateFlags*[tscfUnicode,tscfReverseByteOrder] = [tscfUnicode,tscfReverseByteOrder] then
          raise EGpTextStream.CreateFmtHelp(sCannotAppendReversedUnicodeStream,[StreamName],hcTFCannotAppendReversed);
        if (WrappedStream.Size = 0) and IsUnicode then begin
          if Codepage <> CP_UTF8 then
            WrappedStream.Write(CUnicodeNormal,SizeOf(WideChar))
          else if tscfWriteUTF8BOM in tsCreateFlags then begin
            WrappedStream.Write(CUTF8BOM12,SizeOf(WideChar));
            WrappedStream.Write(CUTF8BOM3,SizeOf(Char));
          end;
        end;
      end; //tsaccReadWrite
    tsaccAppend:
      begin
        tsCreateFlags := [];
        if WrappedStream.Size >= SizeOf(WideChar) then begin
          WrappedStream.Position := 0;
          WrappedStream.Read(marker,SizeOf(WideChar));
          if marker = CUnicodeNormal then begin
            tsCreateFlags := tsCreateFlags + [tscfUnicode];
            Codepage := CP_UNICODE;
          end
          else if marker = CUnicodeReversed then begin
            tsCreateFlags := tsCreateFlags + [tscfUnicode,tscfReverseByteOrder];
            Codepage := CP_UNICODE;
          end
          else if (marker = CUTF8BOM12) and (WrappedStream.Size >= 3) then begin
            WrappedStream.Read(marker3,SizeOf(Char));
            if marker3 = CUTF8BOM3 then begin
              tsCreateFlags := tsCreateFlags + [tscfUnicode];
              Codepage := CP_UTF8;
            end;
          end;
          WrappedStream.Position := WrappedStream.Size;
        end
        else if (WrappedStream.Size = 0) and IsUnicode then begin
          if Codepage <> CP_UTF8 then
            WrappedStream.Write(CUnicodeNormal,SizeOf(WideChar))
          else if tscfWriteUTF8BOM in tsCreateFlags then begin
            WrappedStream.Write(CUTF8BOM12,SizeOf(WideChar));
            WrappedStream.Write(CUTF8BOM3,SizeOf(Char));
          end;
        end;
        if (not IsUnicode) and ((Codepage = CP_UTF8) or (Codepage = CP_UNICODE)) then
          tsCreateFlags := tsCreateFlags + [tscfUnicode];
        if tsCreateFlags*[tscfUnicode,tscfReverseByteOrder] = [tscfUnicode,tscfReverseByteOrder] then
          raise EGpTextStream.CreateFmtHelp(sCannotAppendReversedUnicodeStream,[StreamName],hcTFCannotAppendReversed);
      end; //tsaccAppend
  end; //case
end; { TGpTextStream.PrepareStream }

{:Reads 'count' number of bytes from stream. 'Count' must be an even number as
  data is always returned in Unicode format (two bytes per character). If stream
  is 8-bit, data is converted to Unicode according to code page specified in
  constructor.
  @param   buffer Buffer for read data.
  @param   count  Number of bytes to be read.
  @returns Number of bytes actually read.
  @raises  EGpTextStream if 'count' is odd.
  @raises  EGpTextStream if conversion from 8-bit to Unicode failes.
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -