📄 udic.~pas
字号:
unit UDic;
{
////////////////////////////////
字典类----用于保存关键词各种参数,以及向量
////////////////////////////////
}
interface
uses SysUtils,Classes,ADODB;
const
//最大的词长度
MAX_WORD_LENGTH = 10;
type
//词类型
TWordType = string[MAX_WORD_LENGTH];
//向量的记录
PVector = ^TVector;
TVector = record
FID : integer;
FPos : integer;
end;
//权重的参数
PParam = ^TParam;
TParam = Record
FID : integer;
tf : integer;
end;
//计算权重的参数
TParams = class
private
FItems : TList;
private
procedure Clear();
Function IndexOf(FID : integer):integer;
public
//根据文档号去除词频
Function tf_ID(DocID : integer):Integer;
//根据索引号取词频
Function tf_Index(Index : integer):integer;
//添加词频
Procedure Addtf(FID : integer;tf : integer);
//取得文档的频率
Function df():Integer;
//取得参数的数量
Function Count():integer;
//根据索引取出文档号
Function DocID(Index : integer):Integer;
public
Constructor Create();
Destructor Destroy();override;
end;
//向量空间
TVSM = Class
private
FVectors : TList;
FParams : TParams;
private
procedure Clear();
procedure SaveToStream(stream : TStream);
procedure LoadFromStream(stream : TStream);
//返回词在某个文档中的频数
Function tf(id : integer):integer;
//返回包含这个词的文档数
Function df():integer;
//通过文档号和位置求索引
Function IndexOf(ID,Pos : integer):integer;
public
//procedure AddVector(vector:TVector);overload;
procedure AddVector(id,pos : integer);overload;
Function Vector(Index : integer):PVector;
Function Count():integer;
//计算返回参数
Function CalcParam():TParams;
public
Constructor Create();overload;
Constructor Create(ID : integer; Pos : integer);overload;
Destructor Destroy(); override;
end;
//向量空间列表
TVSMList = Class
private
FKeyWords : TStrings;
private
procedure Clear();
procedure SaveToStream(stream : TStream);
procedure LoadFromStream(stream : TStream);
public
//添加一个向量
procedure Add(Word : string;ID,Pos : integer);overload;
procedure Add(Word : string);overload;
//保存向量到文件
procedure SaveVector(FileName:string);
//从文件重读取向量
procedure ReadVector(FileName:string);
//
Function Count():integer;
//取一个向量
Function VSM(index : integer):TVSM;
//取一个关键字
Function GetWords(index : integer):String;
public
Constructor Create();
Destructor Destroy(); override;
end;
//抽象的字典
TAbstractDic = class
private
FKeyWords : TStrings;
//FVSMList : TVSMList;
protected
//添加一个新词
procedure AddNew(word : string);
//添加一个词
procedure Add(Word : string);
//
Procedure Sort();
public
//加载文档列表
procedure LoadDic();virtual;abstract;
public
//测试字符串是否是一个词
Function IsWords(AWord:String):Boolean;virtual;
//public
// property VSMList : TVSMList Read FVSMList ;//Write FVSMList;
public
Constructor Create();
Destructor Destroy(); override;
end;
//文件字典
TFileDic = Class(TAbstractDic)
private
FDicFile : string;
public
//加载字典
procedure LoadDic();override;
public
Constructor Create(DicFile:string);
end;
//数据库字典
TDBDic = class(TAbstractDic)
private
FConnectionString : string;
FTableName : string;
FFieldName : string;
FConnect : TAdoConnection;
FQuery : TADoQuery;
private
//连接数据库
Function ConnDB():Boolean;
//生成Sql语言
Function MakeSql():String;
//查询
Function Query():boolean;
private
procedure LoadDic();override;
public
constructor Create(ConnectionString : string;TableName : string;FieldName :string);
end;
implementation
uses UCalcWeight;
{ TVSM }
procedure TVSM.AddVector(id, pos: integer);
var
PV : PVector;
begin
if IndexOf(ID,pos)=-1 then
begin
new(PV);
PV.FID := id;
PV.FPos := pos;
FVectors.Add(PV);
end;
end;
{procedure TVSM.AddVector(vector: TVector);
begin
AddVector(vector.FID,vector.FPos)
end;
}
function TVSM.CalcParam: TParams;
var
Index : integer;
begin
FParams.Clear();
For Index := 0 to FVectors.Count-1 do
begin
FParams.Addtf(PVector(FVectors[Index]).FID,0);
end;
For Index := 0 to FParams.Count -1 do
begin
FParams.Addtf(FParams.DocID(Index),tf(FParams.DocID(Index)));
end;
result := FParams;
end;
procedure TVSM.Clear;
var
i : integer;
begin
For i := 0 to FVectors.Count -1 do
begin
Dispose(PVector(FVectors[i]));
end;
FVectors.Clear();
end;
constructor TVSM.Create;
begin
FVectors := TList.Create;
FParams := TParams.Create();
end;
function TVSM.Count: integer;
begin
result := Self.FVectors.Count ;
end;
constructor TVSM.Create(ID, Pos: integer);
begin
FVectors := TList.Create;
FParams := TParams.Create();
Self.AddVector(ID,Pos);
end;
destructor TVSM.Destroy;
begin
FParams.Free;
Clear();
FVectors.Free;
inherited;
end;
function TVSM.df: integer;
var
i : integer;
DocIDs : TList;
TmpID :integer;
begin
DocIDs := TList.Create ;
For i := 0 to FVectors.Count -1 do
begin
TmpID := PVector(FVectors).FID;
if DocIDs.IndexOf(Pointer(TmpID)) < 0 then
begin
DocIDs.Add(Pointer(TmpID));
end;
end;
Result := DocIDs.Count ;
DocIDs.Free;
end;
procedure TVSM.LoadFromStream(stream: TStream);
var
Count : integer;
i : integer;
ID : integer;
Pos : integer;
begin
Stream.Read(Count,SizeOf(integer));
Clear();
For i:=0 to Count-1 do
begin
Stream.Read(ID,SizeOf(integer));
Stream.Read(Pos, SizeOf(integer));
AddVector(ID,Pos);
end;
end;
procedure TVSM.SaveToStream(stream: TStream);
var
Count : integer;
i : integer;
ID : integer;
Pos : integer;
begin
Count := FVectors.Count ;
stream.Write(Count,sizeOf(integer));
for i:=0 to Count-1 do
begin
ID := PVector(FVectors[i]).FID ;
Pos := PVector(FVectors[i]).FPos;
stream.Write(ID,SizeOf(integer));
Stream.Write(Pos,SizeOf(integer));
end;
end;
function TVSM.tf(id: integer): integer;
var
i : integer;
begin
Result := 0;
For i:=0 to FVectors.Count -1 do
begin
if(id = PVector(FVectors[i]).FID) then
begin
Inc(Result);
end;
end;
end;
function TVSM.Vector(Index: integer): PVector;
begin
result := PVector(FVectors[index]);
end;
function TVSM.IndexOf(ID, Pos: integer): integer;
var
Index : integer;
begin
Result := -1;
For Index :=0 to FVectors.Count -1 do
begin
if(ID = PVector(FVectors[index]).FID)and(Pos = PVector(FVectors[index]).FPos)then
begin
Result := Index;
break;
end;
end;
end;
{ TAbstractDic }
procedure TAbstractDic.Add(Word: string);
begin
begin
AddNew(Word);
end;
end;
procedure TAbstractDic.AddNew(word: string);
begin
FKeyWords.Add(word);
end;
constructor TAbstractDic.Create;
begin
FKeyWords := TStringList.Create();
end;
destructor TAbstractDic.Destroy;
begin
FKeyWords.Free;
end;
function TAbstractDic.IsWords(AWord: String): Boolean;
var
Index : Integer;
begin
Result := false;
Index := FKeyWords.IndexOf(AWord);
if Index >=0 then
begin
//TVSM(FKeyWords.Objects[Index]).AddVector(ID,Pos);
Result := true;
end;
end;
procedure TAbstractDic.Sort;
begin
TStringList(FKeyWords).Sorted := true;
end;
{ TFileDic }
constructor TFileDic.Create(DicFile: string);
begin
Inherited Create();
FDicFile := DicFile;
end;
procedure TFileDic.LoadDic;
var
F : TextFile;
tmpStr : String;
begin
inherited;
if FileExists(FDicFile) then
begin
AssignFile(F,FDicFile) ;
try
Reset(F);
While not Eof(F) do
begin
readln(F,tmpStr);
AddNew(tmpStr);
end;
Sort();
finally
CloseFile(F);
end;
end;
end;
{ TDBDic }
function TDBDic.ConnDB: Boolean;
begin
try
FConnect.ConnectionString := FConnectionString;
FConnect.Connected := true;
FQuery.Connection := FConnect;
Result := true;
Except
Result := false;
end;
end;
constructor TDBDic.Create(ConnectionString, TableName, FieldName: string);
begin
Inherited Create();
FConnectionString := ConnectionString;
FTableName := TableName;
FFieldName := FieldName;
FConnect := TAdoConnection.Create(nil);
FQuery := TAdoQuery.Create(nil);
end;
procedure TDBDic.LoadDic;
begin
inherited;
if ConnDB() then
begin
if Query() then
begin
if not FQuery.IsEmpty then
begin
While not FQuery.Eof do
begin
self.AddNew(FQuery.Fields[0].asString);
FQuery.Next;
end;
end;
end;
end;
end;
function TDBDic.MakeSql: String;
begin
Result := 'SELECT '+FFieldName+' FROM '+FTableName;
end;
function TDBDic.Query: boolean;
begin
FQuery.Close;
FQuery.SQL.Clear;
FQuery.SQL.Text := MakeSql();
try
FQuery.Open;
Result := true;
except
Result := false;
end;
end;
{ TParams }
procedure TParams.Addtf(FID, tf: integer);
var
Index : integer;
P : PParam;
begin
Index := IndexOf(FID);
if Index <> -1 then
begin
P := PParam(FItems[Index]);
P.tf := tf;
end
else
begin
new(P);
P.FID := FID;
p.tf := tf;
FItems.Add(P);
end;
end;
procedure TParams.Clear;
var
Index : integer;
P : PParam;
begin
For Index :=0 to FItems.Count-1 do
begin
P := PParam(FItems[Index]);
Dispose(P);
end;
FItems.Clear();
end;
function TParams.Count: integer;
begin
result := FItems.Count ;
end;
constructor TParams.Create;
begin
FItems := TList.Create();
end;
destructor TParams.Destroy;
begin
Clear();
FItems.Free;
inherited;
end;
function TParams.df: Integer;
begin
result := FItems.Count;
end;
function TParams.DocID(Index: integer): Integer;
begin
Result := PParam(FItems[Index]).FID ;
end;
function TParams.IndexOf(FID: integer): integer;
var
P : PParam;
I : integer;
begin
Result := -1;
For I:=0 to FItems.Count-1 do
begin
P := PParam(FItems[I]);
if (P.FID = FID) then
begin
Result := I;
break;
end;
end;
end;
function TParams.tf_ID(DocID: integer): Integer;
var
Index : integer;
begin
Index := IndexOf(DocID);
result := tf_Index(Index);
end;
function TParams.tf_Index(Index: integer): integer;
begin
result := -1;
if (Index >= 0) and (Index < FItems.Count) then
begin
Result := PParam(FItems[Index]).tf ;
end;
end;
{ TVSMList }
procedure TVSMList.Add(Word: string; ID, Pos: integer);
var
Index : integer;
begin
Index := FKeyWords.IndexOf(Word);
if(Index < 0)then
begin
FKeyWords.AddObject(Word,TVSM.Create(ID,Pos));
end
else
begin
TVSM(FKeyWords.Objects[Index]).AddVector(ID,Pos);
end;
end;
procedure TVSMList.Add(Word: string);
begin
if(FKeyWords.IndexOf(Word)<0) then
begin
FKeyWords.AddObject(Word,TVSM.Create());
end;
end;
procedure TVSMList.Clear;
var
i : integer;
begin
For i:=0 to FKeyWords.Count -1 do
begin
TVSM(FKeyWords.Objects[i]).Free;
end;
FKeyWords.Clear();
end;
function TVSMList.Count: integer;
begin
result := FKeyWords.Count;
end;
constructor TVSMList.Create;
begin
FKeyWords := TStringList.Create();
end;
destructor TVSMList.Destroy;
begin
Clear();
FKeyWords.Free;
inherited;
end;
function TVSMList.GetWords(index: integer): String;
begin
result := FKeyWords[index];
end;
procedure TVSMList.LoadFromStream(stream: TStream);
var
Count : integer;
WordLen : integer;
Word : TWordType;
i : integer;
begin
Stream.Read(Count,sizeof(integer));
Clear();
for i:=0 to Count-1 do
begin
Stream.Read(Word,SizeOf(TWordType));
//Stream.Read(Word,WordLen);
Self.Add(Word);
TVSM(FKeyWords.Objects[i]).LoadFromStream(Stream);
end;
end;
procedure TVSMList.ReadVector(FileName: string);
var
stream : TStream;
begin
stream := TFileStream.Create(FileName,fmOpenRead);
try
self.LoadFromStream(stream);
finally
Stream.Free;
end;
end;
procedure TVSMList.SaveToStream(stream: TStream);
var
Count : integer;
WordLen : integer;
Word : TWordType;
i : integer;
begin
Count := FKeyWords.Count;
Stream.Write(Count,SizeOf(integer));
For i:=0 to Count-1 do
begin
//wordLen := Length(FKeyWords[i]);
Word := FKeyWords[i];
Stream.Write(Word,SizeOf(TWordType));
TVSM(FKeyWords.Objects[i]).SaveToStream(stream);
end;
end;
procedure TVSMList.SaveVector(FileName: string);
var
Stream : TStream;
begin
Stream := TFileStream.Create(FileName,fmCreate);
try
SaveToStream(Stream);
finally
stream.Free;
end;
end;
function TVSMList.VSM(index: integer): TVSM;
begin
result := TVSM(FKeyWords.Objects[index]);
end;
end.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -