⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 upaserwordthread.pas

📁 这是一个从指定网页格式分离单词的小程序
💻 PAS
字号:
{-----------------------------------------------------------------------------
 Unit Name: uPaserWordThread
 Author:    Piao
 Date:      2005-3-16 1:08:17
 Purpose:   分析单词线程
 History:
-----------------------------------------------------------------------------}
unit uPaserWordThread;

interface

uses
  Classes, Windows, PerlRegEx, ActiveX, ADODB, SysUtils;

type
  TPaserWordThread = class(TThread)
    FWordClassID: string;             //单词类别ID
    FsFileName: string;               //要解析的文件路径
    FProHandle: HWND;                 //处理消息的Handle
    FTaskID: integer;                 //调用线程分配的ID
    FSource: TStringList;             //要解析的内容
    FTarget: TStringList;             //解析后的内容
    FPerlRegEx: TPerlRegEx;           //正则表达式解析组件
    FADOCon: TADOConnection;          //连接数据库组件
    FProADOQ: TADOQuery;              //添加指定数据
    FErrorMsg: integer;               //-1 未处理 0 正确  1 链接数据库出错  2 加载文件出错 3 线程被中断

  private
    { Private declarations }
    function InitADOConn: boolean;    //初始化连接
    procedure ParserOver;
  protected
    procedure Execute; override;

  public
    constructor Create(mdbLinkStr, sFileName, WordClassID:String;
      ProHandle: HWND; TaskID: integer; CreateSuspended: boolean);
    destructor Destroy; override;

  end;

implementation

uses uConst, uPublic;


{ TPaserWordThread }

constructor TPaserWordThread.Create(mdbLinkStr, sFileName, WordClassID: String;
  ProHandle: HWND; TaskID: integer; CreateSuspended: boolean);
begin
  inherited Create(CreateSuspended);

  FWordClassID := WordClassID;     //单词类别ID
  FsFileName := sFileName;         //要解析的文件路径
  FProHandle := ProHandle;         //处理消息的Handle
  FTaskID := TaskID;               //调用线程分配的ID

  Activex.CoInitialize(nil);
  FSource := TStringList.Create;
  FTarget := TStringList.Create;
  FPerlRegEx := TPerlRegEx.Create(nil);
  FADOCon := TADOConnection.Create(nil);
  FADOCon.ConnectionString := mdbLinkStr;
  FADOCon.LoginPrompt := False;
  FProADOQ := TADOQuery.Create(nil);
  FProADOQ.Connection := FADOCon;

  FErrorMsg := -1; //标识为没有处理过的类型

end;

destructor TPaserWordThread.Destroy;
begin
  FSource.Free;
  FTarget.Free;
  FPerlRegEx.Free;
  FProADOQ.Free;
  FADOCon.Free;
  Activex.CoUninitialize;
  Synchronize(ParserOver);
  inherited Destroy;
end;

procedure TPaserWordThread.Execute;
var i, Count, CurLen: integer;
    MatchEnglish,MatchChinese: array of string;
    tmpS: string;
    procedure InsertToTable;
    begin
      Count := Length(MatchEnglish);
      if Count = Length(MatchChinese) then
      begin{中英文对应,将数据插入数据库中}
        i := 0;
//        tmpS := StringReplace(MatchEnglish[i],'''','''''',[rfReplaceAll]);
//        MatchEnglish[i] := tmpS;
//        tmpS := StringReplace(MatchChinese[i],'''','''''',[rfReplaceAll]);
//        MatchChinese[i] := tmpS;
        while (i < Count) and (not Terminated) do
        begin{将单词和中文分别插入TChinese、TEnglish表中}
          if GetCountCondition('FEnglish','TEnglish',
            'FEnglish="' + MatchEnglish[i] + '"',
            FProADOQ) = 0 then
          begin//如果没找到英文单词则插入
            ExecADOQ('insert into TEnglish(FWordClassID,FEnglish) values("' +
              FWordClassID + '","' + MatchEnglish[i] + '")',FProADOQ);
          end;//if

          if OpenADOQ('select FID from TEnglish where FEnglish="' +
            MatchEnglish[i] + '"', FProADOQ) then
          if FProADOQ.RecordCount = 1 then
          begin//找到才插入中文
            tmpS := FProADOQ.Fields[0].AsString;//得到插入英文的ID
            if GetCountCondition('FChinese','TChinese',
            'FChinese="' + MatchChinese[i] + '" and FEnglishID="' + tmpS + '"',
            FProADOQ) = 0 then
            begin//对应单词没有重复的翻译时添加
              ExecADOQ('insert into TChinese(FEnglishID,FChinese) values("' +
                tmpS + '","' + MatchChinese[i] + '")',FProADOQ);
            end;//if
          end;//if
          inc(i);
          Sleep(0);
        end;//while
      end;//if

    end;
begin
  { Place thread code here }
  FreeOnTerminate := True;              //当线程结束时自动释放
  if not InitADOConn then
  begin
    FErrorMsg := 1; //标识为连接数据库出错
    Exit;
  end;//if
  if not Terminated then
  begin
    try
      FSource.Clear;
      FSource.LoadFromFile(FsFileName);
    except
      // SendMessage(FProHandle,WM_SendErrorMsg,FTaskID,0);//发送处理失败消息
      FErrorMsg := 2;//标识为加载文件失败
      Exit;
    end;//try
  end;//if


  FErrorMsg := 3;
//  //处理Test04.html类型“Barina ||  巴里纳(澳大利亚通用-霍尔登公司)”
//  FTarget.Clear;
//  FPerlRegEx.Options := [preMultiLine];
//  FPerlRegEx.Subject := FSource.Text;
//
//  FPerlRegEx.RegEx := SRegExOne_One;
//  if FPerlRegEx.Match then
//  repeat{得到中英文}
//    tmpS := StringReplace(FPerlRegEx.MatchedExpression,#13#10,'',[rfReplaceAll]);
//    if tmpS <> '' then
//    FTarget.Add(tmpS);
//    Sleep(0);
//  until (not Terminated) and (not FPerlRegEx.MatchAgain);
//
//  CurLen := FTarget.Count;
//  if CurLen > 0 then
//  begin
//    SetLength(MatchEnglish, CurLen);
//    SetLength(MatchChinese, CurLen);
//
//    FPerlRegEx.RegEx := SRegExOne_Two;
//    FPerlRegEx.Subject := FTarget.Text;
//    i := 0;
//    if FPerlRegEx.Match then
//    repeat{得到中文}
//      tmpS := Trim(FPerlRegEx.MatchedExpression);
//      if tmpS <> '' then
//      begin
//        if i >= CurLen then
//        begin
//          i := 0;
//          Break;
//        end;//if
//
//        MatchChinese[i] := tmpS;
//        inc(i);
//      end;//if
//      Sleep(0);
//    until (not Terminated) and (not FPerlRegEx.MatchAgain);
//    SetLength(MatchChinese, i);
//
//    if i > 0 then
//    begin
//      FPerlRegEx.RegEx := SRegExOne_Three;
//      FPerlRegEx.Subject := FTarget.Text;
//      i := 0;
//      if FPerlRegEx.Match then
//      repeat{得到英文}
//        tmpS := Trim(FPerlRegEx.MatchedExpression);
//        if tmpS <> '' then
//        begin
//          if i >= CurLen then
//          begin
//            i := 0;
//            Break;
//          end;//if
//          MatchEnglish[i] := tmpS;
//          inc(i);
//        end;//if
//        Sleep(0);
//      until (not Terminated) and (not FPerlRegEx.MatchAgain);
//      SetLength(MatchEnglish, i);
//
//      InsertToTable;//将数据插入数据表中
//    end;//if i > 0
//  end;//if CurLen > 0
//
//  {Add 2005-03-16 by Piao }
//  {处理 Test03.html类型“1. 棉织物:COTTON FABRIC ”}
//  FTarget.Clear;
//  FPerlRegEx.Options := [preMultiLine];
//  FPerlRegEx.Subject := FSource.Text;
//
//  FPerlRegEx.RegEx := SRegExTwo_One;
//  if FPerlRegEx.Match then
//  repeat{得到中英文}
//    tmpS := StringReplace(FPerlRegEx.MatchedExpression,#13#10,'',[rfReplaceAll]);
//    if tmpS <> '' then
//    FTarget.Add(tmpS);
//    Sleep(0);
//  until (not Terminated) and (not FPerlRegEx.MatchAgain);
//
//  CurLen := FTarget.Count;
//
//  if CurLen > 0 then
//  begin
//    SetLength(MatchEnglish, CurLen);
//    SetLength(MatchChinese, CurLen);
//
//    FPerlRegEx.RegEx := SRegExTwo_Two;
//    FPerlRegEx.Subject := FTarget.Text;
//    i := 0;
//    if FPerlRegEx.Match then
//    repeat{得到中文}
//      tmpS := Trim(FPerlRegEx.MatchedExpression);
//      if tmpS <> '' then
//      begin
//        if i >= CurLen then
//        begin
//          i := 0;
//          Break;
//        end;//if
//        MatchChinese[i] := tmpS;
//        inc(i);
//      end;//if
//      Sleep(0);
//    until (not Terminated) and (not FPerlRegEx.MatchAgain);
//    SetLength(MatchChinese, i);
//
//    if i > 0 then
//    begin
//      FPerlRegEx.RegEx := SRegExTwo_Three;
//      FPerlRegEx.Subject := FTarget.Text;
//      i := 0;
//      if FPerlRegEx.Match then
//      repeat{得到英文}
//        tmpS := Trim(FPerlRegEx.MatchedExpression);
//        if tmpS <> '' then
//        begin
//          if i >= CurLen then
//          begin
//            i := 0;
//            Break;
//          end;//if
//          if Length(tmpS) > 3 then
//          begin//确保不会出错
//            tmpS := Copy(tmpS,3,Length(tmpS) - 2);//去掉开始的":"字符
//            MatchEnglish[i] := tmpS;
//            inc(i);
//          end;
//        end;//if
//        Sleep(0);
//      until (not Terminated) and (not FPerlRegEx.MatchAgain);
//      SetLength(MatchEnglish, i);
//
//      InsertToTable;//将数据插入数据表中
//    end;//if i > 0
//  end;//if CurLen > 0

  {Add 2005-03-16 by Piao }
  {New001.html类型“烤漆区     Coating Area”}
  FTarget.Clear;
  FPerlRegEx.Options := [preMultiLine];
  FPerlRegEx.Subject := FSource.Text;

  FPerlRegEx.RegEx := SRegExThree_One;
  if FPerlRegEx.Match then
  repeat{得到中英文}
    tmpS := StringReplace(FPerlRegEx.MatchedExpression,#13#10,'',[rfReplaceAll]);
    if tmpS <> '' then
    FTarget.Add(tmpS);
    Sleep(0);
  until (not Terminated) and (not FPerlRegEx.MatchAgain);

  CurLen := FTarget.Count;
  if CurLen > 0 then
  begin
    SetLength(MatchEnglish, CurLen);
    SetLength(MatchChinese, CurLen);

    FPerlRegEx.RegEx := SRegExThree_Two;
    FPerlRegEx.Subject := FTarget.Text;
    i := 0;
    if FPerlRegEx.Match then
    repeat{得到中文}
      tmpS := Trim(FPerlRegEx.MatchedExpression);
      if tmpS <> '' then
      begin
        if i >= CurLen then
        begin
          i := 0;
          Break;
        end;//if
        MatchChinese[i] := tmpS;
        inc(i);
      end;//if
      Sleep(0);
    until (not Terminated) and (not FPerlRegEx.MatchAgain);
    SetLength(MatchChinese, i);
    if i > 0 then
    begin
      FPerlRegEx.RegEx := SRegExThree_Three;
      FPerlRegEx.Subject := FTarget.Text;
      i := 0;
      if FPerlRegEx.Match then
      repeat{得到英文}
        tmpS := Trim(FPerlRegEx.MatchedExpression);
        if tmpS <> '' then
        begin
          if i >= CurLen then
          begin
            i := 0;
            Break;
          end;//if
          MatchEnglish[i] := tmpS;
          inc(i);
        end;//if
        Sleep(0);
      until (not Terminated) and (not FPerlRegEx.MatchAgain);
      SetLength(MatchEnglish, i);

      InsertToTable;//将数据插入数据表中
    end;//if i > 0
  end;//if CurLen > 0

  {Add 2005-03-17 by Piao }
  {New002.html类型“to pay 付款,支付,偿还 ”}
  FTarget.Clear;
  FPerlRegEx.Options := [preMultiLine];
  FPerlRegEx.Subject := FSource.Text;

  FPerlRegEx.RegEx := SRegExFour_One;
  if FPerlRegEx.Match then
  repeat{得到中英文}
    tmpS := StringReplace(FPerlRegEx.MatchedExpression,#13#10,'',[rfReplaceAll]);
    if tmpS <> '' then
    FTarget.Add(tmpS);
    Sleep(0);
  until (not Terminated) and (not FPerlRegEx.MatchAgain);

  CurLen := FTarget.Count;
  if CurLen > 0 then
  begin
    SetLength(MatchEnglish, CurLen);
    SetLength(MatchChinese, CurLen);

    FPerlRegEx.RegEx := SRegExFour_Two;
    FPerlRegEx.Subject := FTarget.Text;
    i := 0;
    if FPerlRegEx.Match then
    repeat{得到中文}
      tmpS := Trim(FPerlRegEx.MatchedExpression);
      if tmpS <> '' then
      begin
        if i >= CurLen then
        begin
          i := 0;
          Break;
        end;//if
        MatchChinese[i] := tmpS;
        inc(i);
      end;//if
      Sleep(0);
    until (not Terminated) and (not FPerlRegEx.MatchAgain);
    SetLength(MatchChinese, i);
    if i > 0 then
    begin
      FPerlRegEx.RegEx := SRegExFour_Three;
      FPerlRegEx.Subject := FTarget.Text;
      i := 0;
      if FPerlRegEx.Match then
      repeat{得到英文}
        tmpS := Trim(FPerlRegEx.MatchedExpression);
        if tmpS <> '' then
        begin
          if i >= CurLen then
          begin
            i := 0;
            Break;
          end;//if
          MatchEnglish[i] := tmpS;
          inc(i);
        end;//if
        Sleep(0);
      until (not Terminated) and (not FPerlRegEx.MatchAgain);
      SetLength(MatchEnglish, i);

      InsertToTable;//将数据插入数据表中
    end;//if i > 0
  end;//if CurLen > 0

  PostMessage(FProHandle,WM_CheckTmpURLList,0,0);//发送更新相似页面消息
  if not Terminated then
  FErrorMsg := 0; //标识为正确分析完毕
end;

function TPaserWordThread.InitADOConn: boolean;
begin
  Result := True;
  try
    FADOCon.Close;
    FADOCon.Open;
  except
    Result := False;
  end;//try
end;

procedure TPaserWordThread.ParserOver;
begin
  ProOnPaserWordOver(FTaskID,FErrorMsg);
end;

end.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -