⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 udic.pas

📁 用于中文分词的算法。包括逆向分词和反向分词
💻 PAS
📖 第 1 页 / 共 2 页
字号:
    begin
       P := Vector(I);
       FVectors.Delete(I);
       Dispose(P);
    end; 
  end;
end;

function TVSM.MaxDocID: integer;
var
  ID : integer;
  DocCnt,TmpCnt : integer;
  I : integer;
begin
  ID := -1;
  DocCnt := 0;
  For I :=0 to Count -1 do
  begin
    If ID <> Vector(i).FID then
    begin

      TmpCnt := tf(Vector(i).FID);
      if TmpCnt > DocCnt then
      begin
        DocCnt := TmpCnt;
        ID := Vector(i).FID;
      end;
    end;
  end;
  Result := ID;
end;

{ TAbstractDic }

procedure TAbstractDic.Add(Word: string);
begin
  begin
    AddNew(Word);  
  end;
end;

procedure TAbstractDic.AddNew(word: string);
begin
  FKeyWords.Add(word);
end;

constructor TAbstractDic.Create;
begin
  FKeyWords := TStringList.Create();
end;

destructor TAbstractDic.Destroy;
begin
  FKeyWords.Free;
end;

function TAbstractDic.IsWords(AWord: String): Boolean;
var
  Index : Integer;
begin
  Result := false;
  Index := FKeyWords.IndexOf(AWord);
  if Index >=0 then
  begin
    //TVSM(FKeyWords.Objects[Index]).AddVector(ID,Pos);
    Result := true;
  end;
end;
procedure TAbstractDic.Sort;
begin
  TStringList(FKeyWords).Sorted := true;
end;

{ TFileDic }

constructor TFileDic.Create(DicFile: string);
begin
  Inherited Create();
  FDicFile := DicFile;
end;

procedure TFileDic.LoadDic;
var
  F : TextFile;
  tmpStr : String;
begin
  inherited;
  if FileExists(FDicFile) then
  begin
    AssignFile(F,FDicFile) ;
    try
      Reset(F);
      While not Eof(F) do
      begin
        readln(F,tmpStr);
        AddNew(tmpStr);
      end;
      Sort();
    finally
      CloseFile(F);
    end;
  end;
end;

{ TDBDic }

function TDBDic.ConnDB: Boolean;
begin
    try
    FConnect.ConnectionString := FConnectionString;
    FConnect.Connected := true;
    FQuery.Connection := FConnect;
    Result := true;
  Except
    Result := false;
  end;
end;

constructor TDBDic.Create(ConnectionString, TableName, FieldName: string);
begin
  Inherited Create();
  FConnectionString := ConnectionString;
  FTableName := TableName;
  FFieldName := FieldName;
  FConnect := TAdoConnection.Create(nil);
  FQuery := TAdoQuery.Create(nil);
end;

procedure TDBDic.LoadDic;
begin
  inherited;
  if ConnDB() then
  begin
    if Query() then
    begin
      if not FQuery.IsEmpty then
      begin
        While not FQuery.Eof do
        begin
          self.AddNew(FQuery.Fields[0].asString); 
          FQuery.Next;
        end;
      end;
    end;
  end; 
end;

function TDBDic.MakeSql: String;
begin
  Result := 'SELECT '+FFieldName+' FROM '+FTableName;
end;

function TDBDic.Query: boolean;
begin
  FQuery.Close;
  FQuery.SQL.Clear;
  FQuery.SQL.Text := MakeSql();
  try
    FQuery.Open;
    Result := true;
  except
    Result := false;
  end;
end;

{ TParams }

procedure TParams.Addtf(FID, tf: integer);
var
  Index : integer;
  P : PParam;
begin
  Index := IndexOf(FID);
 if Index <> -1 then
 begin
   P := PParam(FItems[Index]);
   P.tf := tf;
 end
 else
 begin
   new(P);
   P.FID := FID;
   p.tf := tf;
   FItems.Add(P);
 end;
end;

procedure TParams.Clear;
var
  Index : integer;
  P : PParam;
begin
  For Index :=0 to FItems.Count-1 do
  begin
    P := PParam(FItems[Index]);
    Dispose(P);
  end;
  FItems.Clear();
end;

function TParams.Count: integer;
begin
  result := FItems.Count ;
end;

constructor TParams.Create;
begin
  FItems := TList.Create();
end;

destructor TParams.Destroy;
begin
  Clear();
  FItems.Free;
  inherited;
end;

function TParams.df: Integer;
begin
  result := FItems.Count;
end;

function TParams.DocID(Index: integer): Integer;
begin
  Result := PParam(FItems[Index]).FID ;
end;

function TParams.IndexOf(FID: integer): integer;
var
  P : PParam;
  I : integer;
begin
  Result := -1;
  For I:=0 to FItems.Count-1 do
  begin
    P := PParam(FItems[I]);
    if (P.FID = FID) then
    begin
       Result := I;
       break;
    end;
  end;
end;

function TParams.tf_ID(DocID: integer): Integer;
var
  Index : integer;
begin
  Index := IndexOf(DocID);
  result := tf_Index(Index);
end;

function TParams.tf_Index(Index: integer): integer;
begin
  result := -1;
  if (Index >= 0) and (Index < FItems.Count) then
  begin
    Result := PParam(FItems[Index]).tf ;
  end;
end;

{ TVSMList }

procedure TVSMList.Add(Word: string; ID, Pos: integer);
var
  Index : integer;
begin
  Index := FKeyWords.IndexOf(Word);
  if(Index < 0)then
  begin
    FKeyWords.AddObject(Word,TVSM.Create(ID,Pos));
  end
  else
  begin
    TVSM(FKeyWords.Objects[Index]).AddVector(ID,Pos); 
  end;
end;

procedure TVSMList.Add(Word: string);
begin
  if(FKeyWords.IndexOf(Word)<0) then
  begin
    FKeyWords.AddObject(Word,TVSM.Create()); 
  end;
end;

procedure TVSMList.Clear;
var
  i : integer;
begin
  For i:=0 to FKeyWords.Count -1 do
  begin
    TVSM(FKeyWords.Objects[i]).Free;
  end;
  FKeyWords.Clear();
end;

function TVSMList.Count: integer;
begin
  result := FKeyWords.Count;
end;

constructor TVSMList.Create;
begin
  FKeyWords := TStringList.Create();
end;

destructor TVSMList.Destroy;
begin
  Clear();
  FKeyWords.Free;
  inherited;
end;

function TVSMList.GetWords(index: integer): String;
begin
  result := FKeyWords[index];
end;

procedure TVSMList.LoadFromStream(stream: TStream);
var
  Count : integer;
  WordLen : integer;
  Word : TWordType;
  i : integer;
begin
  Stream.Read(Count,sizeof(integer));
  Clear();
  for i:=0 to Count-1 do
  begin
    Stream.Read(Word,SizeOf(TWordType));
    //Stream.Read(Word,WordLen);
    Self.Add(Word);
    TVSM(FKeyWords.Objects[i]).LoadFromStream(Stream);
  end;

end;

procedure TVSMList.ReadVector(FileName: string);
var
  stream : TStream;
begin
  stream := TFileStream.Create(FileName,fmOpenRead);
  try
    self.LoadFromStream(stream);
  finally
    Stream.Free;
  end; 
end;

procedure TVSMList.SaveToStream(stream: TStream);
var
  Count : integer;
  WordLen : integer;
  Word : TWordType;
  i : integer;
begin
  Count := FKeyWords.Count;
  Stream.Write(Count,SizeOf(integer));
  For i:=0 to Count-1 do
  begin
    //wordLen := Length(FKeyWords[i]);
    Word := FKeyWords[i];
    Stream.Write(Word,SizeOf(TWordType));
    TVSM(FKeyWords.Objects[i]).SaveToStream(stream);
  end;

end;

procedure TVSMList.SaveVector(FileName: string);
var
  Stream : TStream;
begin
  Stream := TFileStream.Create(FileName,fmCreate);
  try
    SaveToStream(Stream);
  finally
    stream.Free;
  end;
end;

function TVSMList.VSM(index: integer): TVSM;
begin
  if Index >=0 then
    result := TVSM(FKeyWords.Objects[index])
  else
    Result := nil;
end;

function TVSMList.VSM(Word: string): TVSM;
begin
  result := VSM(FKeyWords.IndexOf(Word));
end;

end.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -