⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qimport3html.pas

📁 Advanced Data Import Component Suite for Borland Delphi and C++ Builder allows you to import your da
💻 PAS
字号:
unit QImport3HTML;

{$I QImport3VerCtrl.Inc}

interface

{$IFDEF HTML}
{$IFDEF VCL6}

uses Classes, QImport3, QImport3StrTypes, IniFiles;

type
  THTMLCell = class(TCollectionItem)
  private
    FText: qiString;
  public
    property Text: qiString read FText write FText;
  end;

  THTMLCellList = class(TCollection)
  private
    function GetItem(Index: Integer): THTMLCell;
    procedure SetItem(Index: Integer; const Value: THTMLCell);
  public
    function Add: THTMLCell;
    property Items[Index: Integer]: THTMLCell read GetItem write SetItem; default;
  end;

  THTMLRow = class(TCollectionItem)
  private
    FCells: THTMLCellList;
  public
    constructor Create(Collection: TCollection); override;
    destructor Destroy; override;
    property Cells: THTMLCellList read FCells;
  end;

  THTMLRowList = class(TCollection)
  private
    function GetItem(Index: Integer): THTMLRow;
    procedure SetItem(Index: Integer; const Value: THTMLRow);
  public
    function Add: THTMLRow;
    property Items[Index: Integer]: THTMLRow read GetItem write SetItem; default;
  end;

  THTMLTable = class(TCollectionItem)
  private
    FRows: THTMLRowList;
    procedure SetRows(const Value: THTMLRowList);
  public
    constructor Create(Collection: TCollection); override;
    destructor Destroy; override;
    property Rows: THTMLRowList read FRows write SetRows;
  end;

  THTMLTableList = class(TCollection)
  private
    function GetItem(Index: Integer): THTMLTable;
    procedure SetItem(Index: Integer; const Value: THTMLTable);
  public
    function Add: THTMLTable;
    property Items[Index: Integer]: THTMLTable read GetItem write SetItem; default;
  end;

  THTMLFile = class
  private
    FFileName: string;
    FLoaded: Boolean;
    FData: AnsiString;
    FPosition: Integer;
    FTableList: THTMLTableList;
{$IFDEF QI_UNICODE}
    FNeedUTF8Decode: Boolean;
{$ENDIF}
  public
    constructor Create;
    destructor Destroy; override;
    procedure Open;
    procedure Close;
    procedure Load;
    procedure Clear;
    property FileName: string read FFileName write FFileName;
    property TableList: THTMLTableList read FTableList;
    property NeedUTF8Decode: Boolean read FNeedUTF8Decode
      default False;
  end;

  TQImport3HTML = class(TQImport3)
  private
    FHTML: THTMLFile;
    FCounter: Integer;
    FTableNumber: Integer;
    FExternalHTMLFile: Boolean;
{$IFDEF QI_UNICODE}
    FNeedUTF8Decode: Boolean;
{$ENDIF}
    procedure SetTableNumber(const Value: Integer);
{$IFDEF QI_UNICODE}
  private
    property NeedUTF8Decode: Boolean read FNeedUTF8Decode
      default False;
{$ENDIF}
  protected
    procedure BeforeImport; override;
    procedure StartImport; override;
    function CheckCondition: Boolean; override;
    function Skip: Boolean; override;
    procedure ChangeCondition; override;
    procedure FinishImport; override;
    procedure AfterImport; override;
    procedure FillImportRow; override;
    function ImportData: TQImportResult; override;
    procedure DoLoadConfiguration(IniFile: TIniFile); override;
    procedure DoSaveConfiguration(IniFile: TIniFile); override;
    property HTML: THTMLFile read FHTML write FHTML;
  public
    constructor Create(AOwner: TComponent); override;
  published
    property FileName;
    property SkipFirstRows default 0;
    property TableNumber: Integer read FTableNumber
      write SetTableNumber default 0;
  end;

  TDefault = class(TObject);

function SafeUtf8DecodeEx(const Source: AnsiString; RaiseOnError: Boolean = False): WideString;

{$ENDIF}
{$ENDIF}

implementation

{$IFDEF HTML}
{$IFDEF VCL6}

uses
  {$IFDEF VCL6} Variants, {$ENDIF}
  {$IFDEF VCL9} Windows, {$ENDIF}
  EmsWideStrUtils, SysUtils, QImport3Common, Math, ActiveX, MSHTML;

const
  sFileNameNotDefined = 'File name is not defined';
  sFileNotFound = 'File %s not found';
  ErrorSymbol: AnsiChar = '?';

procedure ParseHTML(HTMLFile: THTMLFile; const HTML: AnsiString);
var
  HTMLTable: IHTMLTable;
  HTMLRow: IHTMLTableRow;
  HTMLCell: IHTMLElement;
  HTMLElem: IHTMLElement;
  HTMLDocument: IHTMLDocument2;
  i, j, k: Integer;

  procedure LoadInterface;
  var
    v: Variant;
  begin
    v := VarArrayCreate([0, 0], varVariant);
    v[0] := HTML;
    HTMLDocument.close;
    HTMLDocument.write(PSafeArray(TVarData(v).VArray));
  end;

begin
  if Assigned(HTMLFile) then
  begin
    HTMLDocument := CoHTMLDocument.Create as IHTMLDocument2;
    try
      LoadInterface;
      for i := 0 to HTMLDocument.all.length - 1 do
      begin
        HTMLDocument.all.item(i, 0).QueryInterface(IHTMLElement, HTMLElem);
        if QIUpperCase(HTMLElem.tagName) = 'TABLE' then
        begin
          HTMLFile.TableList.Add;
          HTMLDocument.all.item(i, 0).QueryInterface(IHTMLTable, HTMLTable);
          for j := 0 to HTMLTable.rows.length - 1 do
          begin
            HTMLTable.rows.item(j, 0).QueryInterface(IHTMLTableRow, HTMLRow);
            HTMLFile.TableList.Items[HTMLFile.TableList.Count - 1].Rows.Add;
            for k := 0 to HTMLRow.cells.length - 1 do
            begin
              HTMLRow.cells.item(k, 0).QueryInterface(IHTMLElement, HTMLCell);
              HTMLFile.TableList.Items[HTMLFile.TableList.Count - 1].Rows[j].Cells.Add;
              HTMLFile.TableList.Items[HTMLFile.TableList.Count - 1].Rows[j].Cells[k].Text := HTMLCell.innerText;
            end;
          end;
        end;
      end;
    finally
      HTMLDocument := nil;
    end;
  end;
end;

function SafeUtf8DecodeEx(const Source: AnsiString; RaiseOnError: Boolean = False): WideString;
var
  I, J: Integer;
  wc, c, c2: Cardinal;

  procedure Error(Code: Cardinal; Length: Integer);
  begin
    if RaiseOnError then
      raise Exception.Create('0x'+IntToHex(Code, Length*2));
    Result[J] := WideChar(ErrorSymbol);
    Inc(I);
    Inc(J);
  end;

begin
  SetLength(Result, Length(Source));
  I := 1;
  J := 1;
  while I <= Length(Source) do
  begin
    wc := Ord(Source[I]);
    if wc >= $80 then
    begin
      if (wc < $C0) or (wc >= $F0) then
      begin;
        Error(wc,1);
        continue;
      end;
      if I+1 > Length(Source) then
        break;
      c := Ord(Source[I+1]);
      if (c and $C0) <> $80 then
      begin
        Error((wc shl 8) + c, 2);
        continue;
      end;
      if wc < $E0 then
      begin
        wc := ((wc and $1F) shl 6) + (c and $3F);
        Inc(I, 2);
      end
      else
      begin
        if I+2 > Length(Source) then
          break;
        c2 := Ord(Source[I+2]);
        if (c2 and $C0) <> $80 then
        begin
          Error((wc shl 16) + (c shl 8) + c2, 3);
          continue;
        end;
        wc := ((wc and $F) shl 12) + ((c and $3F) shl 6) + (c2 and $3F);
        Inc(I, 3);
      end;
    end
    else
      Inc(I);
    Result[J] := WideChar(wc);
    Inc(J);
  end;
  SetLength(Result, J - 1);
end;

{ THTMLCellList }

function THTMLCellList.Add: THTMLCell;
begin
  Result := THTMLCell(inherited Add)
end;

function THTMLCellList.GetItem(Index: Integer): THTMLCell;
begin
  Result := THTMLCell(inherited Items[Index]);
end;

procedure THTMLCellList.SetItem(Index: Integer; const Value: THTMLCell);
begin
  inherited Items[Index] := Value;
end;

{ THTMLRow }

constructor THTMLRow.Create(Collection: TCollection);
begin
  inherited;
  FCells := THTMLCellList.Create(THTMLCell);
end;

destructor THTMLRow.Destroy;
begin
  FCells.Free;
  inherited;
end;

{ THTMLRowList }

function THTMLRowList.Add: THTMLRow;
begin
  Result := THTMLRow(inherited Add)
end;

function THTMLRowList.GetItem(Index: Integer): THTMLRow;
begin
  Result := THTMLRow(inherited Items[Index]);
end;

procedure THTMLRowList.SetItem(Index: Integer; const Value: THTMLRow);
begin
  inherited Items[Index] := Value;
end;

{ THTMLTableList }

function THTMLTableList.Add: THTMLTable;
begin
  Result := THTMLTable(inherited Add)
end;

function THTMLTableList.GetItem(Index: Integer): THTMLTable;
begin
  Result := THTMLTable(inherited Items[Index]);
end;

procedure THTMLTableList.SetItem(Index: Integer; const Value: THTMLTable);
begin
  inherited Items[Index] := Value;
end;

{ THTMLTable }

constructor THTMLTable.Create(Collection: TCollection);
begin
  inherited;
  FRows := THTMLRowList.Create(THTMLRow);
end;

destructor THTMLTable.Destroy;
begin
  FRows.Free;
  inherited;
end;

procedure THTMLTable.SetRows(const Value: THTMLRowList);
begin
  FRows.Assign(Value);
end;

{ THTMLFile }

constructor THTMLFile.Create;
begin
  inherited;
  FLoaded := False;
  FTableList := THTMLTableList.Create(THTMLTable);
{$IFDEF QI_UNICODE}
  FNeedUTF8Decode := False;
{$ENDIF}
end;

destructor THTMLFile.Destroy;
begin
  FTableList.Free;
  inherited;
end;

procedure THTMLFile.Open;
var
  Stream: TFileStream;
begin
  if FFileName = EmptyStr then
    raise Exception.Create(sFileNameNotDefined);
  if not FileExists(FFileName) then
    raise Exception.CreateFmt(sFileNotFound, [FFileName]);
  Stream := TFileStream.Create(FFileName, fmOpenRead or fmShareDenyWrite);
  try
    Stream.Position := 0;
    SetLength(FData, Stream.Size);
    Stream.Read(FData[1], Stream.Size);
{$IFDEF QI_UNICODE}
    FNeedUTF8Decode := Pos({$IFDEF VCL12}AnsiString{$ENDIF}('charset=UTF-8'), FData) > 0;
{$ENDIF}
    FPosition := 1;
  finally
    Stream.Free;
  end;
end;

procedure THTMLFile.Close;
begin

end;

procedure THTMLFile.Load;
begin
  Open;
  try
    ParseHTML(Self, FData);
    FLoaded := True;
  finally
    Close;
  end;
end;

procedure THTMLFile.Clear;
begin
  TableList.Clear;
  FLoaded := False;
end;

{ TQImport3HTML }

constructor TQImport3HTML.Create(AOwner: TComponent);
begin
  inherited;
  SkipFirstRows := 0;
  FTableNumber := 0;
{$IFDEF QI_UNICODE}
  FNeedUTF8Decode := False;
{$ENDIF}
end;

procedure TQImport3HTML.SetTableNumber(const Value: integer);
begin
  FTableNumber := Value;
end;

procedure TQImport3HTML.BeforeImport;
begin
  FExternalHTMLFile := Assigned(FHTML);
  if not FExternalHTMLFile then
  begin
    FHTML := THTMLFile.Create;
    FHTML.FileName := FileName;
    FHTML.Load;
{$IFDEF QI_UNICODE}
    FNeedUTF8Decode := FHTML.NeedUTF8Decode;
{$ENDIF}
  end;
  inherited;
end;

procedure TQImport3HTML.StartImport;
begin
  FCounter := 0;
  FTotalRecCount := 0;
  if Assigned(FHTML) and (FTableNumber > 0) then
    FTotalRecCount := FHTML.FTableList[Pred(FTableNumber)].Rows.Count;
end;

function TQImport3HTML.CheckCondition: Boolean;
begin
  Result := FCounter < FTotalRecCount;
end;

function TQImport3HTML.Skip: Boolean;
begin
  Result := (SkipFirstRows > 0) and (FCounter < SkipFirstRows);
end;

procedure TQImport3HTML.ChangeCondition;
begin
  Inc(FCounter);
end;

procedure TQImport3HTML.FinishImport;
begin
  if not Canceled and not IsCSV then
  begin
    if CommitAfterDone then
      DoNeedCommit
    else if (CommitRecCount > 0) and ((ImportedRecs + ErrorRecs) mod CommitRecCount > 0) then
      DoNeedCommit;
  end;
end;

procedure TQImport3HTML.AfterImport;
begin
  if not FExternalHTMLFile and Assigned(FHTML) then
  begin
    FHTML.Free;
    FHTML := nil;
  end;
  inherited;
end;

procedure TQImport3HTML.FillImportRow;
var
  j, k: Integer;
  stValue: qiString;
  p: Pointer;
  mapValue: qiString;
begin
  FImportRow.ClearValues;
  for j := 0 to FImportRow.Count - 1 do
  begin
    if FImportRow.MapNameIdxHash.Search(FImportRow[j].Name, p) then
    begin
      k := Integer(p);
      stValue := '';
{$IFDEF VCL7}
      mapValue := Map.ValueFromIndex[k];
{$ELSE}
      mapValue := Map.Values[FImportRow[j].Name];
{$ENDIF}
      if FHTML.TableList[Pred(FTableNumber)].Rows[FCounter].Cells.Count >= StrToInt(mapValue) then
        stValue := FHTML.TableList[Pred(FTableNumber)].Rows[FCounter].Cells[Pred(StrToInt(mapValue))].Text;
      if stValue <> '' then
        while ord(stValue[1]) = 1042 do
          stValue := Copy(stValue, 3, Length(stValue) - 2);
{$IFDEF QI_UNICODE}
      if NeedUTF8Decode then
        stValue := SafeUtf8DecodeEx( AnsiString(stValue));
{$ENDIF}
      FImportRow.SetValue(Map.Names[k], stValue, False);
    end;
    DoUserDataFormat(FImportRow[j]);
  end;
end;

function TQImport3HTML.ImportData: TQImportResult;
begin
  Result := qirOk;
  try
    try
      if Canceled  and not CanContinue then
      begin
        Result := qirBreak;
        Exit;
      end;
      DataManipulation;
    except
      on E:Exception do
      begin
        try
          DestinationCancel;
        except
        end;
        DoImportError(E);
        Result := qirContinue;
        Exit;
      end;
    end;
  finally
    if (not IsCSV) and (CommitRecCount > 0) and not CommitAfterDone and
       (
        ((ImportedRecs + ErrorRecs) > 0)
        and ((ImportedRecs + ErrorRecs) mod CommitRecCount = 0)
       )
    then
      DoNeedCommit;
    if (ImportRecCount > 0) and
       ((ImportedRecs + ErrorRecs) mod ImportRecCount = 0) then
      Result := qirBreak;
  end;
end;

procedure TQImport3HTML.DoLoadConfiguration(IniFile: TIniFile);
begin
  inherited;
  with IniFile do
  begin
    SkipFirstRows := ReadInteger(HTML_OPTIONS, HTML_SKIP_LINES, SkipFirstRows);
  end;
end;

procedure TQImport3HTML.DoSaveConfiguration(IniFile: TIniFile);
begin
  inherited;
  with IniFile do
  begin
    WriteInteger(HTML_OPTIONS, HTML_SKIP_LINES, SkipFirstRows);
  end;
end;

{$ENDIF}
{$ENDIF}
end.


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -