⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 udocuments.~pas

📁 用于中文分词的算法。包括逆向分词和反向分词
💻 ~PAS
字号:
unit UDocuments;

{
////////////////////////////////////////
   文档类-----
   只实现了目录方式的文档类,
   数据方式的文档类可以根据自己设计的数据库结构实现
////////////////////////////////////////
}
interface
uses Classes,AdoDB,SysUtils;
Type
   TAbstractDocs = Class
   public
     //加载文档列表
     Function LoadDocs():boolean;virtual; abstract;
     //下一个文档
     procedure NextDoc(); virtual; abstract;
     //取得ID
     Function GetID():Integer; virtual; abstract;
     //取得文本
     Function GetText():String; Virtual; abstract;
     //是否达到最后一个文档
     Function EndDoc():Boolean;virtual; abstract;
     //取得文档的总数两
     Function GetDocCount():integer;virtual; abstract;
     //取的索引的列表
     //Function GetIndexList():TList;virtual; abstract;
     //把文件设置为第一个
     Procedure FirstDoc();virtual;abstract;
   end;

   //目录方式
   PFileIndex = ^TFileIndex;
   TFileIndex = record
     FID : integer;
     FFileName : string[255];
   end;
   TDirDocs = class(TAbstractDocs)
   private
     //目录
     FDir : string;
     //过滤方式
     FFilter : string;
     //文件名集合
     FFiles : TList;
     //指示器游标
     FCursor : integer;
   private
     FID : integer;
     FText : string;
   private
     procedure Clear();
     procedure ClearEx(list:TList);
     procedure UpdateFileList(SourceList :TList);
     Function MaxID():integer;
     Function SearchDir():TList;
     procedure SaveFiles(FileName : string);
     procedure LoadFiles(FileName : string);
   public
     Function LoadDocs():boolean;override;
     Procedure NextDoc();override;
     Function GetID():Integer;override;
     Function GetText():String;override;
     Function EndDoc():Boolean;override;
     Function GetDocCount():integer;override;
     //Function GetIndexList():TList;override;
     Procedure FirstDoc();override;
   public
     Constructor Create(Path : string;Filter : string);
     Destructor Destroy();override;
   end;

   //数据库
   TDBDocs = class(TAbstractDocs)
   private
     //连接字符串
     FConnectString : string;
     //数据表名称
     FTableName : string;
     //连接数据库组件
     FConnection : TAdoConnection;
     //查询组件
     FQuery : TAdoQuery;
     FIDFieldName,FTextFieldName : string;
   private
     FID : integer;
     FText : string;
   private
     //连接数据库
     Function ConnDB():Boolean;
     //生成Sql语言
     Function MakeSql():String;
     //查询
     Function Query():boolean;
   public
     Function LoadDocs():boolean;override;
     Procedure NextDoc();override;
     Function GetID():Integer;override;
     Function GetText():String;override;
     Function EndDoc():Boolean;override;
     Function GetDocCount():integer;override;
     //Function GetIndexList():TList;override;
     Procedure FirstDoc();override;
   public
     Constructor Create(connectionString : string;TableName : string;IndexFieldName:String;TextFieldName:String);
     Destructor Destroy();override;
   end;

implementation

{ TDirDocs }

procedure TDirDocs.Clear;
begin
  ClearEx(FFiles);
end;

procedure TDirDocs.ClearEx(list: TList);
var
  Index : integer;
begin
  For Index :=0 to list.Count-1 do
  begin
    Dispose(PFileIndex(list[Index]));
  end;
  list.Clear();
end;

constructor TDirDocs.Create(Path, Filter: string);
begin
  if(Path[Length(Path)]='\')then
    FDir := Path
  else
    FDir := Path+'\';
  FFilter := Filter;
  FFiles := TList.Create ;
end;

destructor TDirDocs.Destroy;
begin
  Clear();
  FFiles.Free;
  inherited;
end;

function TDirDocs.EndDoc: Boolean;
begin
  Result := false;
  if (FCursor <0)or(FCursor >= FFiles.Count) then
    Result := true;
end;

procedure TDirDocs.FirstDoc;
begin
  inherited;
  FCursor := 0;
end;

function TDirDocs.GetDocCount: integer;
begin
  result := FFiles.Count ;
end;

function TDirDocs.GetID: Integer;
begin
  Result := PFileIndex(FFiles[FCursor]).FID;
end;
{
function TDirDocs.GetIndexList: TList;
begin

end;
}
function TDirDocs.GetText: String;
var
  Text : String;
  TmpText : String;
  FileName : string;
  F : TextFile;
begin
  FileName := PFileIndex(FFiles[FCursor]).FFileName ;
  if(FileExists(self.FDir+FileName)) then
  begin
    AssignFile(F,self.FDir+FileName);
    try
      ReSet(F);
      Text := '';
      TmpText := '';
      while not Eof(F) do
      begin
        Readln(F,tmpText);
        Text := Text + TmpText;
      end;
      Result := Text;
    finally
      CloseFile(F);
    end;
  end
  else
    Result := '';
end;

function TDirDocs.LoadDocs: boolean;
var
  fileList : TList;
begin
  //首先搜索目录,然后加载索引文件,然后进行对比更新
  fileList := SearchDir();
  if FileExists(FDir+'File.DIX') then
  begin
    LoadFiles(FDir+'File.DIX');
    UpdateFileList(fileList);
    ClearEx(fileList);
  end
  else
  begin
    FFiles.Assign(FileList);
    FileList.Clear();
  end;
  //保存最新的文件列表索引
  SaveFiles(FDir+'File.DIX');
  //把指针放到第一个文档上
  Self.FirstDoc;
end;

procedure TDirDocs.LoadFiles(FileName: string);
var
  Stream : TStream;
  Count : integer;
  FileIndex : TFileIndex;
  P : PFileIndex;
  Index : integer;
begin
  Stream := TFileStream.Create(FileName,fmOpenRead);
  try
    Clear();
    Stream.Read(count,SizeOf(Index));
    For Index :=0 to count -1 do
    begin     
      Stream.Read(FileIndex,SizeOf(TFileIndex));
      new(P);
      P.FID := FileIndex.FID ;
      P.FFileName := FileIndex.FFileName;
      FFiles.Add(P); 
    end;
  Finally
    Stream.Free;
  end;
end;

function TDirDocs.MaxID: integer;
var
  ID : integer;
  index : integer;
begin
  ID := 0;
  For Index := 0 to FFiles.Count -1 do
  begin
    if ID < PFileIndex(FFiles[Index]).FID then
    begin
      ID := PFileIndex(FFiles[Index]).FID;
    end;
  end;
  result:=ID;
end;

procedure TDirDocs.NextDoc;
begin
  inherited;
  inc(FCursor);
end;

procedure TDirDocs.SaveFiles(FileName: string);
var
  Stream : TStream;
  Count : integer;
  P : PFileIndex;
  Index : integer;
begin
  Stream := TFileStream.Create(FileName,fmCreate);
  try
    Count := FFiles.Count ;
    Stream.Write(Count,SizeOf(Index)); 
    For Index :=0 to count -1 do
    begin
      P := PFileIndex(FFiles[Index])  ;
      Stream.Write(P^,SizeOf(TFileIndex));
    end;
  Finally
    Stream.Free;
  end;
end;

Function TDirDocs.SearchDir():TList;
var
  sr: TSearchRec;
  ID : Integer;
  P : PFileIndex;
begin
  ID := 0;
  Result := TList.Create();
  if FindFirst(FDir+FFilter,faAnyFile,Sr) =0 then
  begin
    repeat
      new(P);
      P.FID := ID;
      P.FFileName := sr.Name;
      Result.Add(P);
      Inc(ID);
    until FindNext(sr) <> 0;
    FindClose(sr);
  end;
end;

procedure TDirDocs.UpdateFileList(SourceList: TList);
var
  I,J : integer;
  FilesCount : integer;
  P : PFileIndex;
  MID : integer;
begin
  FilesCount := FFiles.Count;
  MID := MaxID;
  //往列表中添加新的文件
  For I:=0 to SourceList.Count -1 do
  begin
    For J:=0 to FilesCount -1 do
    begin
      //比较时只是比较文件名,不对路径比较
      if LowerCase(PFileIndex(FFiles[J]).FFileName) = LowerCase(PFileIndex(SourceList[I]).FFileName) then
      begin
        break;
      end;
    end;
    if J >= FilesCount then
    begin
      Inc(MID);
      new(P);
      P.FID := MID;
      P.FFileName := PFileIndex(SourceList[I]).FFileName;
    end;
  end;
  //从列表中删除没用的文件名
  For I := FilesCount -1 downto 0 do
  begin
    For J:=0 to SourceList.Count -1 do
    begin
      //比较时只是比较文件名,不对路径比较
      if LowerCase(PFileIndex(FFiles[I]).FFileName) = LowerCase(PFileIndex(SourceList[J]).FFileName) then
      begin
        break;
      end;
      if J >= SourceList.Count then
      begin
        P := PFileIndex(FFiles[I]);
        Dispose(P);
        FFiles.Delete(I); 
      end;
    end;  
  end;
end;

{ TDBDocs }

function TDBDocs.ConnDB: Boolean;
begin
  try
    FConnection.ConnectionString := FConnectString;
    FConnection.Connected := true;
    FQuery.Connection := FConnection;
    Result := true;
  Except
    Result := false;
  end;
end;

constructor TDBDocs.Create(connectionString, TableName, IndexFieldName,
  TextFieldName: String);
begin
  FConnectString := connectionstring;
  FTableName := TableName;
  FIDFieldName := IndexFieldName;
  FTextFieldName := TextFieldName;
  FConnection := TAdoConnection.Create(nil);
  FQuery := TAdoQuery.Create(nil); 
end;

destructor TDBDocs.Destroy;
begin
  FQuery.Free;
  FConnection.Free;
  inherited;
end;

function TDBDocs.EndDoc: Boolean;
begin
  result := FQuery.Eof;
end;

procedure TDBDocs.FirstDoc;
begin
  inherited;
  FQuery.First;
end;

function TDBDocs.GetDocCount: integer;
begin
  Result := FQuery.RecordCount;
end;

function TDBDocs.GetID: Integer;
begin
  Result := FQuery.Fields[0].AsInteger ;
end;

function TDBDocs.GetText: String;
begin
  Result := FQuery.Fields[1].AsString ;
end;

function TDBDocs.LoadDocs: boolean;
begin
  Result := false;
  if ConnDB() then
  begin
    if Query() then
    begin
      FQuery.First;
      result := not FQuery.IsEmpty; 
    end;
  end;
end;

function TDBDocs.MakeSql: String;
begin
   Result := 'SELECT '+FIDFieldName+','+FTextFieldName+' FROM '+FTableName;
end;

procedure TDBDocs.NextDoc;
begin
  inherited;
  FQuery.Next;
end;

function TDBDocs.Query: boolean;
begin
  FQuery.Close;
  FQuery.SQL.Clear;
  FQuery.SQL.Text := MakeSql();
  try
    FQuery.Open;
    Result := true;
  except
    Result := false;
  end;
end;

end.
 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -