⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mainfu.pas

📁 用Delphi7实现了ID3算法
💻 PAS
字号:
{
 Description:ID3算法的简单实现,使用ClientDataset作为内存表
 Author   : Oneloong
 Datetime     : 2008.03.08 
 Test data : WeatherInfo_EN.txt

}
unit MainFU;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, ExtCtrls, Grids, StdCtrls, ComCtrls,math, DB, DBClient, DBGrids;

const
  NodeInfo = 'AttributeName: %s, Count: %d, Entropy: %8.3f, Gain: %8.3f';
type
  PSubSetInfo = ^TSubSetInfo;
  TSubSetInfo = record
    attriName : string;
    count     : integer;
    entropy   : double;
    gain      : double;
    Splited   : boolean ;
end;

//属性信息
type
  PAttriInfo = ^TAttriInfo;
  TAttriInfo = record
    Val : TStringList;//值的列表
    Splited : boolean;//以分裂
end;

//值信息
type
  PValInfo = ^TValInfo;
  TValInfo = record
    count : integer;//值的次数
    ConditionCount : integer;//条件次数
end;


type
  TMainF = class(TForm)
    Panel1: TPanel;
    Panel2: TPanel;
    Panel3: TPanel;
    Panel4: TPanel;
    Splitter1: TSplitter;
    btnLoadDB: TButton;
    Button2: TButton;
    PageControl1: TPageControl;
    TabSheet1: TTabSheet;
    TabSheet2: TTabSheet;
    TV: TTreeView;
    Memo: TMemo;
    ledtNodeInfo: TLabeledEdit;
    ClientDataSet: TClientDataSet;
    DBGrid1: TDBGrid;
    DataSource: TDataSource;
    Label1: TLabel;
    cbbSA: TComboBox;
    procedure btnLoadDBClick(Sender: TObject);
    procedure FormCreate(Sender: TObject);
    procedure FormDestroy(Sender: TObject);
    procedure Button2Click(Sender: TObject);
    procedure TVChange(Sender: TObject; Node: TTreeNode);
  private
    FAttributesList: TStringList;
    FOrgCDS: TClientDataSet;
    FtempCDS: TClientDataSet;
    FSplitedAttriList: TStringList;
    procedure SetAttributesList(const Value: TStringList);
    function isLeafNode(A1, V1: string;cds : TClientDataSet;  var LeafCaption : string): boolean;
    procedure SetOrgCDS(const Value: TClientDataSet);
    procedure SettempCDS(const Value: TClientDataSet);
    procedure SetSplitedAttriList(const Value: TStringList);
    procedure CopyClientData(Src: TClientDataSet;  Des : TClientDataSet);
  private
    FDBName: string;
    FDBInfoList: TStringList;
    FRecList: Tlist;
    FInitState: double;
    SubTreeView : TTreeView;
    CurrentNode : TTreeNode;
    //属性列表
    property AttributesList : TStringList read FAttributesList write SetAttributesList;
    property SplitedAttriList :TStringList read FSplitedAttriList write SetSplitedAttriList;
    function LoadDB: boolean;
    function GetNodeInfo(attrName : string;Count :integer; Entropy :double; Gain :double) : string;
    procedure SetDBName(const Value: string);
    function FindNodeByText(aText : string) : TTreeNode;
  private
    { Private declarations }

    property DBName : string read FDBName write SetDBName;
    property OrgCDS : TClientDataSet read FOrgCDS write SetOrgCDS;
    property tempCDS : TClientDataSet read FtempCDS write SettempCDS;
    procedure ParseDB(aDBName : string);//解析数据
    procedure ClearAttriList();
    function getValues(attri : string;cds : TClientDataSet) : TStringList;

    function getEntropy(attri : string;cds : TClientDataSet) : double;//计算熵
    
    //Gain(S, A) = Entropy(S) -   ((|S v| / |S|)  *  Entropy(S v))
    function getGain(
                S,//information gain of example set S on attribute A 
                A : string;
                cds : TClientDataSet): double;//信息增益
    function getMaxGain(S : string; cds : TClientDataSet) : string;
    //创建决策树
    procedure CreateDecisionTree(A, V: string; cds : TClientDataSet;ParNode : TTreeNode);
  public
    { Public declarations }
  end;

var
  MainF: TMainF;

implementation



{$R *.dfm}

function TMainF.LoadDB : boolean;
var
  OpenDlg : TOpenDialog;
begin
  result := False;
  OpenDlg := TOpenDialog.Create(self);
  try
    if OpenDlg.Execute then
    begin
      DBName := OpenDlg.FileName;
      result := True;
    end
    else
    begin

    end;
  finally
    OpenDlg.Free;
  end;

end;



procedure TMainF.btnLoadDBClick(Sender: TObject);
begin
  if LoadDB then
  begin
    tv.Items.BeginUpdate;
    ParseDB(DBName) ;
    tv.Items.EndUpdate;
  end;
  tempCDS.Active := true;
end;

procedure TMainF.ParseDB(aDBName : string);
var
  i, j : integer;
  tmpStr : string;
  AttributeName : string;//属性名
  tmpStrList, attristrList : TStringList;
  aField : TStringField;
  pAttInfo : PAttriInfo;
begin
  tmpStrList := TstringList.Create;
  attristrList := TstringList.Create;
  try
    tmpStrList.LoadFromFile(aDBName);
    //取得属性值
    AttributesList.CommaText := tmpStrList.Strings[0];
    cbbSA.Items.CommaText := AttributesList.CommaText;
    ClientDataSet.Active := False;
    ClientDataSet.FieldDefs.Clear;
    //创建内存表
    for i := 0 to AttributesList.Count -1 do
    begin
      aField := TStringField.Create(Self);
      
      with aField do
      begin
        FieldName := Trim(AttributesList.Strings[i]);
        Name := FieldName;
        Size := 10;
        Index := i//按创建的字段顺序累加
      end;
      aField.DataSet := ClientDataSet;
    end;
    ClientDataSet.CreateDataSet;
    tempCDS.Active := False;
    tempCDS.FieldDefs.Clear;
    tempCDS.FieldDefs := ClientDataSet.FieldDefs;

    for i := 1 to tmpStrList.Count -1 do
    begin
      attristrList.CommaText := tmpStrList.Strings[i];
      ClientDataSet.Append;
      for j := 0 to attristrList.Count -1 do
        ClientDataSet.Fields.Fields[j].AsString  := attristrList.Strings[j];
      ClientDataSet.Post;
    end;
    ClientDataSet.Active := true;
    tempCDS.Data := ClientDataSet.Data;
    OrgCDS.Data := ClientDataSet.Data;

  finally
    tmpStrList.Free;
    attriStrList.Free;
  end;
end;



procedure TMainF.FormCreate(Sender: TObject);
begin
  AttributesList := TStringList.Create;
  SplitedAttriList := TStringList.Create;
  OrgCDS := TClientDataSet.Create(self);
  tempCDS := TClientDataSet.Create(self);

end;

procedure TMainF.FormDestroy(Sender: TObject);
begin
  SubTreeView.Free;
  AttributesList.Free;
  SplitedAttriList.Free;
  OrgCDS.Free;
  tempCDS.Free;
end;

function TMainF.GetNodeInfo(attrName: string; Count: integer; Entropy,
  Gain: double): string;
begin
  result := Format(NodeInfo,[attrName,Count,Entropy,Gain]);
end;



procedure TMainF.SetDBName(const Value: string);
begin
  FDBName := Value;
end;


procedure TMainF.Button2Click(Sender: TObject);
var
  i : integer;
begin
  tv.Items.Clear;
  SplitedAttriList.Clear;
  clientDataset.Filter := '';
  ClientDataSet.Filtered := False;
  clientdataset.DisableControls;
  for i := 0 to AttributesList.Count - 1 do
  begin
    if AttributesList.Strings[i] = cbbSA.Text then
      continue;
    CreateDecisionTree(AttributesList.Strings[i],'',clientDataSet,nil);
  end;
  clientdataset.Filtered := False;
  clientdataset.EnableControls;
end;

function TMainF.FindNodeByText(aText: string): TTreeNode;
var
  i : integer;
begin
  result := nil;
  for i := 0 to SubTreeView.Items.Count - 1 do
  begin
    if SubTreeView.Items.Item[i].Text <> aText then
      continue
    else
    begin
      result := SubTreeView.Items.Item[i];
      break;
    end;
  end;
end;


procedure TMainF.TVChange(Sender: TObject; Node: TTreeNode);
begin
  if Node.Data = nil then
    exit;
  ledtNodeInfo.Text := GetNodeInfo(
        PSubSetInfo(Node.Data).attriName,
        PSubSetInfo(Node.Data).count,
        PSubSetInfo(Node.Data).entropy,
        PSubSetInfo(Node.Data).gain
  );

end;



procedure TMainF.SetAttributesList(const Value: TStringList);
begin
  FAttributesList := Value;
end;




function TMainF.getEntropy(attri: string; cds: TClientDataSet) : double;
var
  i, j, atrIndex : integer;
  vList : TStringList;
  pv : PValInfo;
  tEntropy : double;
begin
  vList := TStringList.Create;
  tEntropy := 0.0;
  try
    //各值出现的次数
    cds.First;
    for i := 0 to cds.RecordCount - 1 do
    begin
      atrIndex := vList.IndexOf(cds.FieldByName(attri).AsString);
      if atrIndex <> -1 then
      begin
        PValInfo(vList.Objects[atrIndex]).count :=
          PValInfo(vList.Objects[atrIndex]).count + 1;

      end
      else
      begin
        pv := new(PValInfo);
        pv.count := 1;
        vList.AddObject(cds.FieldByName(attri).AsString,TObject(pv));
      end;
      cds.Next;
    end;

    //计算熵
    for i := 0 to vList.Count - 1 do
    begin
      tEntropy := tEntropy -((PValInfo(vList.Objects[i]).count/cds.RecordCount))*log2(PValInfo(vList.Objects[i]).count/cds.RecordCount)

    end;
    result := tEntropy;
  finally
    for i := 0 to vList.Count - 1 do
    begin
      if vList.Objects[i] <> nil then
        dispose(PValInfo(vList.Objects[i]));
    end;
    vList.Free;
  end;
end;


procedure TMainF.ClearAttriList;
begin

end;

function TMainF.isLeafNode(A1, V1: string;cds : TClientDataSet; var LeafCaption : string) : boolean;
var
  sFilter : string;
  i : integer;
  tStr : string;
begin
  result := true;
  sFilter := A1+'='+quotedstr(v1);//+' and '+A2+'='+quotedstr(v2);
  cds.Filtered := false;
  cds.Filter := sFilter;
  cds.Filtered := true;
  cds.First;
  tStr := cds.FieldByName(cbbsa.Text).AsString;
  LeafCaption := tStr;
  for i := 0 to cds.RecordCount -1 do
  begin
    if cds.FieldByName(cbbsa.Text).AsString <> tStr then
    begin
      result := False;
      break;
    end;
    cds.Next;
  end;
end;

procedure TmainF.CopyClientData(Src : TClientDataSet; Des : TClientDataSet);
var
  i, j : integer;
begin
  tempCDS.Filtered := False;
  for i := 0 to tempCDS.RecordCount -1  do
  begin
    tempCDS.First;
    tempCDS.Delete;

  end;
  for i := 0 to src.RecordCount -1 do
  begin
    tempCDS.Append;
    for j := 0 to src.FieldCount -1 do
      tempCDS.Fields.Fields[j].AsVariant := Src.Fields.Fields[j].AsVariant;
    tempCDS.Post;
    src.Next;
  end;

end;
procedure TMainF.CreateDecisionTree(A, V: string; cds: TClientDataSet;ParNode : TTreeNode);
var
  splitedAttribute : string;
  tNode : TTreeNode;
  i, j : integer;
  vList : TStringList;
  sLeafCaption,sFilter, tStr : string;
begin
  //根据最大信息增益得到分裂属性
  splitedAttribute := getMaxGain(cbbsa.Text,cds);
  if splitedAttribute = '' then
    exit;
  //加入分裂属性列表
  if SplitedAttriList.IndexOf(splitedAttribute) = -1 then
    SplitedAttriList.Add(splitedAttribute);
  if A = splitedAttribute then
    tStr := A
  else
    tStr := A+'----'+splitedAttribute;

  tNode := TV.Items.AddChild(ParNode,tStr);

  vList := TStringlist.Create;

  try
    vList := getValues(splitedAttribute,cds);
    for i := 0 to vList.Count - 1 do
    begin

      if isLeafNode(splitedAttribute, vList.Strings[i], cds, sLeafCaption) then
      begin
        //叶节点
        TV.Items.AddChild(tNode,vList.Strings[i]+'-----'+sLeafCaption);
      end
      else
      begin
        sFilter := splitedAttribute+'='+quotedstr(vList.Strings[i]);
        ClientDataSet.Filtered := False;
        ClientDataSet.Filter := sFilter;
        ClientDataSet.Filtered := True;
        CopyClientData(ClientDataSet,tempCDS);
        CreateDecisionTree(vList.Strings[i],'',tempCDS,tNode);
      end;
    end;


  finally
    vList.Free;
  end;


end;



  {*

  *}
function TMainF.getGain(S, A: string; cds : TClientDataSet): double;
var
  i, j, atrIndex, recCount : integer;
  vList,SList : TStringList;
  pv : PValInfo;
  strFilter : string;
  d1, d2 : double;
begin
  result := 0.0;
  d1 := 0.0;
  d2 := 0.0;
  vList := TStringList.Create;
  sList := TStringList.Create;
  try
    //A各值出现的次数
    cds.First;
    for i := 0 to cds.RecordCount - 1 do
    begin
      atrIndex := vList.IndexOf(cds.FieldByName(A).AsString);
      if atrIndex <> -1 then
      begin
        PValInfo(vList.Objects[atrIndex]).count :=
          PValInfo(vList.Objects[atrIndex]).count + 1;
      end
      else
      begin
        pv := new(PValInfo);
        pv.count := 1;
        vList.AddObject(cds.FieldByName(A).AsString,TObject(pv));
      end;
      cds.Next;
    end;
    //S各值出现的次数
    cds.First;
    for i := 0 to cds.RecordCount - 1 do
    begin
      atrIndex := sList.IndexOf(cds.FieldByName(S).AsString);
      if atrIndex <> -1 then
      begin
        PValInfo(sList.Objects[atrIndex]).count :=
          PValInfo(sList.Objects[atrIndex]).count + 1;
      end
      else
      begin
        pv := new(PValInfo);
        pv.count := 1;
        sList.AddObject(cds.FieldByName(S).AsString,TObject(pv));
      end;
      cds.Next;
    end;

    recCount := cds.RecordCount;

    for i := 0 to vList.Count - 1 do
    begin
        strFilter := A+' = '+ Quotedstr(vList.Strings[i]) ;
        cds.Filtered := false;
        cds.Filter := strFilter;
        cds.Filtered := true;
        d1 := d1 -   (PValInfo(vList.Objects[i]).count/recCount)*getEntropy(S,cds);
    end;
    cds.Filtered := false;
    result := getEntropy(S,cds) + d1;
  finally
    for i := 0 to vList.Count - 1 do
    begin
      if vList.Objects[i] <> nil then
        dispose(PValInfo(vList.Objects[i]));
    end;
    for i := 0 to sList.Count - 1 do
    begin
      if sList.Objects[i] <> nil then
        dispose(PValInfo(sList.Objects[i]));
    end;
    vList.Free;
    sList.Free;
  end;
  cds.Filtered := false;
end;
  {*

  *}
function TMainF.getMaxGain(S: string; cds : TClientDataSet): string;
var
  i, j : integer;
  tmpStr : string;
  tmpD,tmpGain : Double;
begin
  tmpD := 0.0;
  tmpGain := 0.0;
  for i := 0 to AttributesList.Count - 1 do
  begin
    if cbbsa.Text = AttributesList.Strings[i] then
      continue;
    if SplitedAttriList.IndexOf(AttributesList.Strings[i]) <> -1  then
      continue;
    tmpGain := getGain(S,AttributesList.Strings[i],cds);
    if tmpGain > tmpD then
    begin
      tmpStr := AttributesList.Strings[i];
      tmpD := tmpGain;
    end;
  end;
  result := tmpStr;
end;


function TMainF.getValues(attri : string;cds : TClientDataSet) : TStringList;
var
  i, atrIndex : integer;
begin
  cds.First;
  result := TStringList.Create;
  for i := 0 to cds.RecordCount - 1 do
  begin
    atrIndex := result.IndexOf(cds.FieldByName(attri).AsString);
    if atrIndex = -1 then
    begin
      result.Add(cds.FieldByName(attri).AsString);
    end;
    cds.Next;
  end;
end;

procedure TMainF.SetOrgCDS(const Value: TClientDataSet);
begin
  FOrgCDS := Value;
end;

procedure TMainF.SettempCDS(const Value: TClientDataSet);
begin
  FtempCDS := Value;
end;

procedure TMainF.SetSplitedAttriList(const Value: TStringList);
begin
  FSplitedAttriList := Value;
end;

end.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -