⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 uct--delphi.txt

📁 当今电脑围棋的最新最强的算法 用Delphi实现
💻 TXT
字号:
电脑围棋的算法UCT语言delphi实现
经过几天的努力,终于实现了uct代码,因为pascal是最优美易懂,适合表达算法的语言,特奉上,有错请大家拍砖,刚入门的爱好者可以修改完善。如有建议,请不吝指教!

unit _XGoUCT;
interface
uses SysUtils, Classes, windows, _XGoBase;
var
  _UCTMaxSimulation: integer = 100;     //这个数越大,深入的层越多!
  _UCTMaxTime: cardinal = 5000;         //一次着棋的毫秒数
  _UCTkomi: double = 2.5;
type
  PNode = ^TNode;
  TNode = record
    move: TVertex;
    wins: double;
    visits: double;
    child: array of PNode;
    bestNode: PNode;
  end;
type
  TUCT = class(TXGoBase)
  private
    UCTk: double;                       //UCT常数
    UCTActivePlayer: shortint;          //当前次序
    UCTweight: array[0..18, 0..18] of integer;
    function fCreateChildNodes(const n: PNode): integer; //建立子节点,返回数量
    procedure fFreeNodes(const n: PNode); //释放树技
    procedure fSetBestNode(const n: PNode); //设置最好的节点
    function fPlayRandom: integer;      //自由布局
    function fUCTSelect(const n: PNode): PNode; //uct选择
    function fUCTSimulation(const n: PNode): integer; //模似
    function fUCTSearch(const p: Tplayer; const count: integer): TVertex; //搜索
  public
    constructor Create;
    destructor Destroy; override;
    function isEye(const i, j: integer; const p: Tplayer): boolean;
    function getWeight(const x, y: integer): integer;
    function fCalculate: double;
    function fgetEmptyVertexs: TVertexs; //获取被压缩的空点
    function getEvaluate: double;       //评估,返回黑正白负
    function PlayMove(const m: TVertex): boolean;
    function genMove(const p: Tplayer): TVertex;
    procedure showboard;
  end;
implementation
//==============================================================================
procedure TUCT.showboard;
var
  i, j, b: integer;
  c, s, os: string;
const
  topbottom: string = '   A B C D E F G H J K L M N O P Q R S T';
begin
  os := '';
  b := boardsize;
  writeln(copy(topbottom, 1, 4 + (b - 1) * 2));
  for i := 0 to b - 1 do
  begin
    s := '';
    for j := 0 to b - 1 do
    begin
      case Vertexs[i, j] of
        BLACK: if getmoveLast = char(i) + char(j) then
            s := s + '◆'
          else
            s := s + '●';
        WHITE: if getmoveLast = char(i) + char(j) then
            s := s + '◇'
          else
            s := s + '○';
      else
        if (i = 0) then
          if (j = 0) then
            c := '┏'
          else if (j > 0) and (j < b - 1) then
            c := '┯'
          else if (j = b - 1) then
            c := '┓';
        if (i > 0) and (i < b - 1) then
          if (j = 0) then
            c := '┠'
          else if (j > 0) and (j < b - 1) then
            c := '┼'
          else if (j = b - 1) then
            c := '┨';
        if (i = b - 1) then
          if (j = 0) then
            c := '┗'
          else if (j > 0) and (j < b - 1) then
            c := '┷'
          else if (j = b - 1) then
            c := '┛';
        case b of
          19: if ((i = 3) and ((j = 3) or (j = b div 2) or (j = b - 4)))
            or ((i = b div 2) and ((j = 3) or (j = b - 4)))
              or ((i = b - 4) and ((j = 3) or (j = b div 2) or (j = b - 4)))
                then
              c := '╋';
          13: if ((i = 3) and ((j = 3) or (j = b - 4)))
            or ((i = b - 4) and ((j = 3) or (j = b - 4))) then
              c := '╋';
        end;
        s := s + c;
      end;                              //case
    end;
    s := format('%0:2s', [inttostr(b - i)]) + s + inttostr(b - i);
    writeln(s);
  end;
  writeln(copy(topbottom, 1, 4 + (b - 1) * 2));
end;
function TUCT.getWeight(const x, y: integer): integer;
begin
  Result := UCTweight[x, y];
end;
//==============================================================================
constructor TUCT.Create;
begin
  inherited create;
  UCTk := 1;
  UCTActivePlayer := BLACK;
  fillchar(UCTweight, sizeof(UCTweight), 0);
end;
destructor TUCT.Destroy;
begin
  inherited Destroy;
end;
//------------------------------------------------------------------------------
function TUCT.isEye(const i, j: integer; const p: Tplayer): boolean;
var
  b: boolean;
  op: Tplayer;
begin
  if p = BLACK then
    op := WHITE
  else
    op := BLACK;
  if (i - 1) in [0..BoardSize - 1] then //这一步很重要@~@
    b := Vertexs[i - 1, j] = p
  else
    b := true;
  if (i + 1) in [0..BoardSize - 1] then
    b := b and (Vertexs[i + 1, j] = p);
  if (j - 1) in [0..BoardSize - 1] then
    b := b and (Vertexs[i, j - 1] = p);
  if (j + 1) in [0..BoardSize - 1] then
    b := b and (Vertexs[i, j + 1] = p);
  {  if ((i - 1) in [0..BoardSize - 1]) and ((j - 1) in [0..BoardSize - 1]) then
      b := b and (Vertexs[i - 1, j - 1] <> op);
    if ((i + 1) in [0..BoardSize - 1]) and ((j - 1) in [0..BoardSize - 1]) then
      b := b and (Vertexs[i + 1, j - 1] <> op);
    if ((i - 1) in [0..BoardSize - 1]) and ((j + 1) in [0..BoardSize - 1]) then
      b := b and (Vertexs[i - 1, j + 1] <> op);
    if ((i + 1) in [0..BoardSize - 1]) and ((j + 1) in [0..BoardSize - 1]) then
      b := b and (Vertexs[i + 1, j + 1] <> op);
   }result := b;
end;
function TUCT.fCalculate: double;
//计算胜负,黑为正,白为负,盘面应当为黑里无白,白里无黑的终结场面
var
  x, y: integer;
  c: integer;
begin
  c := 0;
  for x := 0 to BoardSize - 1 do
    for y := 0 to BoardSize - 1 do
      case Vertexs[x, y] of
        BLACK: inc(c);
        WHITE: dec(c);
      else
        if isEye(x, y, BLACK) then
          inc(c)
        else
          dec(c);                       //判断这个目归属,非黑即白
      end;
  result := c - _uctkomi;
end;
function TUCT.getEvaluate: double;
//评估胜负,黑为正,白为负
var
  p, v, x, y, m, n, r, i, j, sc: integer;
begin
  sc := 0;
  fillchar(UCTweight, SizeOf(UCTweight), 0);
  for x := 0 to boardsize - 1 do
    for y := 0 to boardsize - 1 do
      if Vertexs[x, y] <> EMPTY then
      begin
        p := 1 - Vertexs[x, y] shl 1;
        for m := -3 to 3 do
        begin
          r := 3 - abs(m);
          for n := -r to r do
          begin
            v := 1 shl (3 - abs(m) - abs(n));
            i := x + m;
            j := y + n;
            if (i in [0..boardsize - 1]) and (j in [0..boardsize - 1]) then
              UCTweight[i, j] := UCTweight[i, j] + v * p;
          end;
        end;
        sc := sc + UCTweight[x, y];
      end;
  result := sc - _uctkomi;
end;
function TUCT.fCreateChildNodes(const n: PNode): integer;
//在父节点下面建立很多个子节点,要选择地做点吧,要不还不overflow?!!!@_@
var
  i: integer;
  s: string;
  Count: integer;
begin
  result := 0;
  s := fgetEmptyVertexs;
  Count := length(s) div 2;
  setlength(n.child, Count);
  for i := 0 to Count - 1 do
  begin
    new(n.child);
    n.child.move := s[i * 2 + 1] + s[i * 2 + 2];
    n.child.wins := 0;
    n.child.visits := 0;
    n.child.bestNode := nil;
  end;
  result := Count;
end;
procedure TUCT.fFreeNodes(const n: PNode);
var
  i: integer;
begin
  if n <> nil then
  begin
    for i := 0 to length(n.child) - 1 do
      fFreeNodes(n.child);
    setlength(n.child, 0);
    dispose(n);
  end;
end;
function TUCT.fgetEmptyVertexs: TVertexs;
//得到可供下子的位置,此函数极为重要,必须大大地降低执行时间
var
  x, y: integer;
  ms: TVertexs;
begin
  ms := '';
  for x := 0 to BoardSize - 1 do
    for y := 0 to BoardSize - 1 do
      if Vertexs[x, y] = EMPTY then
        ms := ms + xyToVertex(x, y);
  result := ms;
end;
procedure TUCT.fSetBestNode(const n: PNode);
//设置本节点的bestNode值为本节点的子节点中胜率最大的节点,相同时返回第一个
var
  i: integer;
  best: PNode;
  winrate, bestwinrate: double;
begin
  best := nil;
  bestwinrate := -1;
  for i := 0 to length(n.child) - 1 do
    if n.Child.visits > 0 then
    begin
      winrate := n.Child.wins / n.Child.visits;
      if winrate > bestwinrate then
      begin
        bestwinrate := winrate;
        best := n.Child;
      end;
    end;
  n.bestNode := best;
end;
function TUCT.PlayMove(const m: TVertex): boolean;
var
  x, y: integer;
begin
  result := false;
  x := byte(m[1]);
  y := byte(m[2]);
  if not isEye(x, y, UCTActivePlayer) then //不能自杀,只漏某种特别的情况
    if Play(x, y, UCTActivePlayer) then //并不一定始终能下,可能有劫的情况
    begin
      if UCTActivePlayer = BLACK then   //如果成功,设置为另一方
        UCTActivePlayer := WHITE
      else
        UCTActivePlayer := BLACK;
      result := true;
    end;
end;
function TUCT.fUCTSelect(const n: PNode): PNode;
//重要的过程,遍历所有n的子节点并找出uct值最大的那个节点
var
  i: integer;
  winrate, uct, uctvalue, bestuctvalue: double;
begin
  result := nil;
  bestuctvalue := 0;
  for i := 0 to length(n.child) - 1 do
  begin
    if n.Child.visits > 0 then
    begin
      winrate := n.Child.wins / n.Child.visits;
      uct := UCTk * sqrt(ln(n.visits) / (5 * n.Child.visits));
      uctvalue := winrate + uct;
    end
    else
      uctvalue := 10000 + Random(1000); //等于无穷大,优先选择未下过的步
    if uctvalue > bestuctvalue then
    begin                               // get max uctvalue of all Children
      bestuctvalue := uctvalue;
      Result := n.Child;
    end;
  end;                                  //end of for
end;                                    //end of fUCTSelect
function TUCT.fPlayRandom: integer;
//随机下棋并返回是否胜!这个函数极为重要,必须提高执行效率
var
  i: integer;
  d: double;
  s, vs: TVertexs;
  p: TPlayer;
  jie: Tstringlist;
label
  labelCaculate;
begin
  randomize;
  jie := Tstringlist.Create;
  p := UCTActivePlayer;                 //获得当前轮到方
  vs := fGetEmptyVertexs;               //取得可下子的位置,肯定不为空,因为总有眼
  repeat
    if GetKeyState(VK_ESCAPE) < 0 then
      showboard;                        //debug
    i := random(length(vs) div 2);
    s := copy(vs, i * 2 + 1, 2);
    if PlayMove(s) then
    begin
      vs := fGetEmptyVertexs;           //取得可下子的位置,速度重要!
      jie.Add(s);                       //记录最后几次着棋
      if jie.Count > 4 then             //循环队列
        jie.Delete(0);
    end
    else
      delete(vs, i * 2 + 1, 2);         //将不能下的点删除
    if jie.Count > 3 then
      if (jie[0] = jie[2]) and (jie[1] = jie[3]) then //判断连环劫
      begin
        d := getEvaluate;               //在这种情况下,只能评估
        goto labelCaculate;             //直接计算结果
      end;
  until vs = '';                        //执行直到一方pass
  vs := fGetEmptyVertexs;               //让另一方去填死那些对方不能下子的空
  for i := 0 to length(vs) div 2 - 1 do //方便计算结果
  begin
    if UCTActivePlayer = BLACK then
      UCTActivePlayer := WHITE
    else
      UCTActivePlayer := BLACK;
    PlayMove(copy(vs, i * 2 + 1, 2));
  end;
  d := fCalculate;
  labelCaculate:                        //进行计算结果
  if ((d > 0) and (p = BLACK))          //判断哪方胜
  or ((d < 0) and (p = WHITE)) then
    result := 1
  else
    result := 0;
  jie.Free;
end;
function TUCT.fUCTSimulation(const n: PNode): integer;
//不停地按uct选定的规则摸拟走棋
var
  UCTnode: PNode;
  UCTresult: integer;
begin
  UCTresult := 0;
  if n.visits = 0 then                  //该节点未被访问过
    UCTresult := 1 - fPlayRandom        //评估本次模拟的结果
  else                                  //n.visits > 0
  begin
    if length(n.child) = 0 then         //无子节点  1
      fCreateChildNodes(n);             //建立子节点,绝不会无一个
    UCTnode := fUCTSelect(n);           //UCT选择一个,绝不会为空
    if PlayMove(UCTnode.move) then      //更新,考虑自杀和劫的情况
      UCTresult := 1 - fUCTSimulation(UCTnode)
  end;
  n.visits := n.visits + 1;
  n.wins := n.wins + UCTresult;         //wins,存储自己节点的胜率
  if length(n.child) > 0 then
    fSetBestNode(n);                    //在n的子节点中找出胜率最大的
  result := UCTresult;
end;
function TUCT.fUCTSearch(const p: Tplayer; const count: integer): TVertex;
var
  c: integer;
  t: cardinal;
  sl: Tstringlist;
  noderoot: PNode;
  v: array[0..18, 0..18] of shortint;
begin
  result := '';                         //空即为pass
  new(noderoot);                        //生成
  noderoot.move := '';
  noderoot.wins := 0;
  noderoot.visits := 0;
  noderoot.bestNode := nil;
  sl := Tstringlist.Create;
  sl.Assign(MoveList);                  //保存现场
  move(Vertexs, v, sizeof(Vertexs));
  UCTActivePlayer := p;                 //设置颜色
  t := getTickcount;                    //限时
  for c := 0 to count - 1 do            //模拟操作i次
  begin
    fUCTSimulation(noderoot);
    move(v, Vertexs, sizeof(Vertexs));
    MoveList.Assign(sl);                //恢复现场
    UCTActivePlayer := p;               //恢复颜色
    if getTickcount - t > _UCTMaxtime then
      break;
  end;
  if noderoot.bestNode <> nil then
    Result := noderoot.bestNode.move;   //胜率最高的一个
  sl.Free;
  fFreeNodes(noderoot);                 //释放树
end;
function TUCT.genMove(const p: Tplayer): TVertex;
begin
  result := fUCTSearch(p, _UCTMaxSimulation);
end;
end.
//==============================================================================

[ 本帖最后由 netxing 于 2008-4-1 18:40 编辑 ]

发表于 2008-3-18 15:04  只看该作者 





⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -