import MCTS as rvs
import tkinter as tk
import time
import tkinter.messagebox
total = []
class ReversiBoard(tk.Canvas):
# 创建了Reversi类继承Tk.Canvas,负责棋盘部分
# 定义棋盘单元格的大小、边距
cell_size = 54 # 单元格大小
margin = 20 # 边框
board = rvs.getInitialBoard() # 棋盘的情况
validBoard = True # 棋盘是否能够继续
isPayerTurn = True # 是否玩家先手
step = [] # 记录操作的数组
# 构造函数
def __init__(self, master):
cwidth = rvs.BOARD_SIZE * self.cell_size + 2 * self.margin # 计算单元格宽度
# 设置Canvas属性
tk.Canvas.__init__(self, master, bd=1, bg='#e4c8a9', width=cwidth, height=cwidth, cursor="hand2")
self.bind("<1>", self.put_stones) # 绑定实际按put_stones到鼠标左键
# 绘制棋盘
for i in range(rvs.BOARD_SIZE):
for j in range(rvs.BOARD_SIZE):
if ((i + j) % 2 == 0):
bcolor = "#c1914f" # 给相间的单元格添加不同的颜色
else:
bcolor = "#cba470"
x0 = i * self.cell_size + self.margin
y0 = j * self.cell_size + self.margin
self.create_rectangle(x0,y0,x0+self.cell_size,y0+self.cell_size,fill=bcolor)
self.refresh(rvs.PLAYER_NUM) # 显示落子
if (not self.isPayerTurn): # 判断ai先后手
rvs.PLAYER_NUM = 1
rvs.COMPUTER_NUM = -1
self.AI_move()
def put_stones(self, event): # 在界面上放置棋子
# 是否游戏结束
if self.validBoard == False:
# 游戏结束
self.validBoard = True
# 重新生成棋盘
self.board = rvs.getInitialBoard()
self.isPayerTurn = True
# 清除操作记录
for numid in self.step:
self.delete(numid)
self.step = []
self.refresh(rvs.PLAYER_NUM)
return
# 电脑轮次
if not (self.isPayerTurn):
return
# 玩家轮次
x = self.canvasx(event.x)
y = self.canvasy(event.y)
# 根据点击位置确定格子
i = int((x-self.margin)/self.cell_size)
j = int((y-self.margin)/self.cell_size)
if self.board[i][j] !=0 or not rvs.isok(self.board,rvs.PLAYER_NUM,i,j) :
return
rvs.updateBoard(self.board,rvs.PLAYER_NUM,i,j)
rvs.updatePathRoot(i,j)
self.step = []
self.refresh(rvs.COMPUTER_NUM)
isPayerTurn = False
self.after(100, self.AI_move())
def AI_move(self):
while True:
# 获取此时人类以及机器可以落子的结点
mcts_possibility = len(rvs.possible_positions(self.board, rvs.COMPUTER_NUM))
# 判断机器是否有棋可下
if mcts_possibility == 0:
break
start = time.time()
# 根据mcts算法获取落子位置
stone_pos = rvs.mctsNextPosition(self.board, 0.7, rvs.COMPUTER_NUM)
end = time.time()
one_time = end - start
print("落子位置", stone_pos)
print("总落子时间为", format(one_time, '.4f'), "s")
total.append(one_time)
rvs.updateBoard(self.board, rvs.COMPUTER_NUM, stone_pos[0], stone_pos[1])
rvs.updatePathRoot(stone_pos[0], stone_pos[1]) # 更新pathRoot
self.refresh(rvs.PLAYER_NUM)
player_possibility = len(rvs.possible_positions(self.board, rvs.PLAYER_NUM))
mcts_possibility = len(rvs.possible_positions(self.board, rvs.COMPUTER_NUM))
# 判断人类是否有棋可下
if player_possibility > 0 or mcts_possibility == 0:
break
def showResult(self):
player_stone, mcts_stone = rvs.countTile(self.board, rvs.PLAYER_NUM)
if player_stone > mcts_stone:
tkinter.messagebox.showinfo('游戏结束', "你获胜了")
elif player_stone == mcts_stone:
tkinter.messagebox.showinfo('游戏结束', "平局")
else:
tkinter.messagebox.showinfo('游戏结束', "你失败了")
print("ai整局用时", sum(total))
def refresh(self, tile): # 刷新整个棋盘
self.delete("probale")
for i in range(rvs.BOARD_SIZE):
for j in range(rvs.BOARD_SIZE):
x0 = i * self.cell_size + self.margin
y0 = j * self.cell_size + self.margin
if self.board[i][j] == 0:
continue
if self.board[i][j] == rvs.BLACK_NUM:
bcolor = "#000000"
if self.board[i][j] == rvs.WHITE_NUM:
bcolor = "#ffffff"
self.create_oval(x0 + 8, y0 + 8, x0 + self.cell_size - 8, y0 + self.cell_size - 8, fill=bcolor, width=0)
if tile == rvs.PLAYER_NUM:
probale = rvs.possible_positions(self.board, tile) # 显示可落子位置
bcolor = "#ffcc33"
for pos in probale:
x0 = pos[0] * self.cell_size + self.margin
y0 = pos[1] * self.cell_size + self.margin
self.create_oval(x0 + 18, y0 + 18, x0 + self.cell_size - 18, y0 + self.cell_size - 18, fill=bcolor,
width=0, tags="probale")
class Reversi(tk.Frame):
# 创建了Reversi类继承Tk.Frame,负责整个窗口
def __init__(self, master=None):
tk.Frame.__init__(self, master, bg="#51150b")
self.master.title("黑白棋")
# ReversiBoard为自定义的棋盘类,放置在窗口中
self.f_board = ReversiBoard(self)
self.f_board.pack(padx=20, pady=20)
if __name__ == '__main__':
app = Reversi()
app.pack()
app.mainloop()
import random
import math
import time
import copy
BOARD_SIZE = 8 # 棋盘行数与列数
PLAYER_NUM = -1 # 在board中代表玩家的数字
COMPUTER_NUM = 1 # 在board中代表带电脑的数字
MAX_THINK_TIME = 5 # 电脑的最大思考时间
direction = [[0, 1], [1, 1], [1, 0], [1, -1], [0, -1], [-1, -1], [-1, 0], [-1, 1]]
BLACK_NUM = -1 # 代表黑棋的数字
WHITE_NUM = 1 # 代表白棋的数字
PATHROOT = [] # 节点树
def getInitialBoard(): # 初始化棋盘数组
board = {}
for i in range(0, BOARD_SIZE):
board[i] = {}
for j in range(0, BOARD_SIZE):
board[i][j] = 0
board[BOARD_SIZE / 2 - 1][BOARD_SIZE / 2 - 1] = WHITE_NUM
board[BOARD_SIZE / 2][BOARD_SIZE / 2] = WHITE_NUM
board[BOARD_SIZE / 2 - 1][BOARD_SIZE / 2] = BLACK_NUM
board[BOARD_SIZE / 2][BOARD_SIZE / 2 - 1] = BLACK_NUM
return board
# 返回棋子数
def countTile(board, tile):
stones = 0
negstones = 0
for i in range(0, BOARD_SIZE):
for j in range(0, BOARD_SIZE):
if board[i][j] == tile:
stones += 1
elif board[i][j] == -tile:
negstones += 1
return stones, negstones
def possible_positions(board, tile): # 返回一个颜色棋子可落子位置
positions = []
for i in range(0, BOARD_SIZE):
for j in range(0, BOARD_SIZE):
if board[i][j] != 0:
continue
if isok(board, tile, i, j):
positions.append((i, j))
return positions
def isOnBoard(x, y): # 检测对应位置是否在棋盘
return x >= 0 and x <= 7 and y >= 0 and y <= 7
def isok(board, tile, i, j): # 检测该位置是否可以落子
change = -tile
if (board[i][j] != 0):
return False
for xdirection, ydirection in direction:
x, y = i, j
x += xdirection
y += ydirection
if isOnBoard(x, y) and board[x][y] == change: # 该点朝一dirction方向相邻一个棋子,且相邻的棋子为可以被翻转的数字
# 一直走到出界或不是对方棋子的位置
while board[x][y] == change:
x += xdirection
y += ydirection
if not isOnBoard(x, y):
break
# 出界了,则直接进行下一个方向的查找
if not isOnBoard(x, y):
continue
# 是自己的棋子,中间的所有棋子都要翻转
if board[x][y] == tile:
return True
return False
# 是否是合法走法,如果合法返回需要翻转的棋子列表
def updateBoard(board, tile, i, j):
change = -tile
need_turn = [] # 要被翻转的棋子
for xdirection, ydirection in direction:
x, y = i, j
x += xdirection
y += ydirection
if isOnBoard(x, y) and board[x][y] == change: # 该点朝一dirction方向相邻一个棋子,且相邻的棋子为可以被翻转的数字
# 一直走到出界或不是对方棋子的位置
while board[x][y] == change:
x += xdirection
y += ydirection
if not isOnBoard(x, y):
break
# 出界了,则直接进行下一个方向的查找
if not isOnBoard(x, y):
continue
# 是自己的棋子,中间的所有棋子都要翻转
if board[x][y] == tile:
while True:
x -= xdirection
y -= ydirection
# 回到了起点则结束
if x == i and y == j:
break
# 需要翻转的棋子
need_turn.append([x, y])
# 翻转棋子
board[i][j] = tile
for x, y in need_turn:
board[x][y] = tile
return len(need_turn)
def updatePathRoot(i, j):
global PATHROOT
for n_tuple in PATHROOT:
# 找到最佳路径中此节点对应的子节点
parent, t_playout, reward, t_childrens = n_tuple
if i == parent[0] and j == parent[1]:
PATHROOT = t_childrens
break
# 蒙特卡洛树搜索
def mctsNextPosition(board, ucb_c, playerNum): # 棋盘、ucb公式中常数c的值
def ucb(node_tuple, t): # t为进行循环的次数
# 返回各个结点用于进行ucb算法的值
name, nplayout, reward, childrens = node_tuple # 四个值分别对应 落点位置、模拟对局次数 、赢的次数、子节点
if nplayout == 0: # 避免意外情况
nplayout = 1
if t == 0: # 避免意外情况
t = 1
# reward 是赢的次数 nplayout是模拟对局次数,cval是常数
return (reward / nplayout) + ucb_c * math.sqrt(math.log(t) / nplayout)
def find_playout(tep_board, tile, depth=0): # 对tep_board进行了系列随机落点后,返回最终结果胜负
def eval_board(tep_board): # 比较二者的棋子数目,判断胜负
tileNum, negTilenum = countTile(tep_board, playerNum)
if tileNum > negTilenum:
# tile代表的棋胜
return True
# tile代表的棋负
return False
while (depth < 120): # 进行最多120次递归后返回结果
turn_positions = possible_positions(tep_board, tile)
if len(turn_positions) == 0: # 如果没位置下棋,切换到对手回合
tile = -tile
neg_turn_positions = possible_positions(tep_board, tile)
if len(neg_turn_positions) == 0: # 对方也没位置下棋,结束游戏
return eval_board(tep_board)
else:
turn_positions = neg_turn_positions
temp = turn_positions[random.randrange(0, len(turn_positions))] # 随机放置一个棋子
updateBoard(tep_board, tile, temp[0], temp[1])
# 转换轮次
tile = -tile
depth += 1
return eval_board(tep_board)
# 扩展结点,返回tep_board的子节点数组
def expand(tep_board, tile):
positions = possible_positions(tep_board, tile)
result = []
for temp in positions:
result.append((temp, 0, 0, []))
return result
def find_path(root):
current_path = []
child = root
parent_playout = 0
for item in child: # 计算父节点遍历过的次数
name, nplayout, reward, childrens = item
parent_playout += nplayout
isMCTSTurn = True
while True:
if len(child) == 0: # 无可落子的位置,直接结束
break
maxidxlist = [0]
cidx = 0
if isMCTSTurn:
maxval = -1
else:
maxval = 2
for n_tuple in child: # 对每一个可落子的位置进行最大最小搜索
# 实现最大最小搜索,电脑选择最大值,玩家选择最小值
if isMCTSTurn:
# ucb返回各个结点的值,之后就依靠这个值来进行最大最小算法
cval = ucb(n_tuple, parent_playout)
if cval >= maxval:
# 获取子结点中值最大的一项,并记录其id(即cidx)
if cval == maxval:
maxidxlist.append(cidx)
else:
maxidxlist = [cidx]
maxval = cval
else:
cval = ucb(n_tuple, parent_playout)
if cval <= maxval:
# 获取子节点中值最小的一项
if cval == maxval:
maxidxlist.append(cidx)
else:
maxidxlist = [cidx]
maxval = cval
cidx += 1
# 从最值结点中随机选择一处落子
maxidx = maxidxlist[random.randrange(0, len(maxidxlist))]
parent, t_playout, reward, t_childrens = child[maxidx]
current_path.append(parent)
parent_playout = t_playout
# 选择子节点进入下一次循环
child = t_childrens
isMCTSTurn = not (isMCTSTurn)
# 返回根据最大最小规则选择出来的一条路径
return current_path
global PATHROOT # 节点树
if len(PATHROOT) == 0:
PATHROOT = expand(board, playerNum)
for index, rootChild in enumerate(PATHROOT):
current_board = copy.deepcopy(board) # current_board记录在某处落子后的棋盘
parent, t_playout, reward, t_childrens = rootChild
updateBoard(current_board, playerNum, parent[0], parent[1]) # 对落子于此处的棋盘进行随机落子,使得能对其使用ucb算法(避免除以0的情况)
t_playout = 10
reward = 0
for i in range(1, 21):
current_board2 = copy.deepcopy(current_board) # current_board2是用来进行随机落点判断胜负的临时表盘
isWon = find_playout(current_board2, -playerNum) # tile表示下一步谁执行
if (isWon):
reward += 1
PATHROOT[index] = (parent, t_playout, reward, t_childrens)
# 记时,防止循环时间过长
start_time = time.time()
slectTime = 0 # 选择过程耗费的时间
expendTime = 0 # 扩展过程耗费的时间
simulationTime = 0 # 模拟过程耗费的时间
BackTime = 0 # 回溯过程耗费的时间
simulationTimes = 0
looptime = 0
for loop in range(0, 30): # 总的遍历
looptime += 1
current_board = copy.deepcopy(board) # current_board记录在某处落子后的棋盘
# 思考最大时间限制
if (time.time() - start_time) >= MAX_THINK_TIME:
break
# current_path是一个放置棋子的位置列表,根据此列表进行后续操作
tempStartTime = time.time() # 选择过程
current_path = find_path(PATHROOT) # find_path返回:ucb算法基于root蕴含的信息,获取的最佳路径(从头结点开始的,最佳子节点在各级child数组中的index数组),
tempEndTime = time.time()
slectTime += tempEndTime - tempStartTime
tile = playerNum
for temp in current_path:
# 将通过ucb算法得到的路径整合到一个初始棋盘中
updateBoard(current_board, tile, temp[0], temp[1]) # 最终current_board为根据路径落子得到的棋盘
tile = -tile
# 扩展与模拟过程
child = PATHROOT
randomTime = 0 # 进行随机落子的盘数
rewardSum = 0 # 胜利总次数
for temp in current_path:
# 遍历最佳路径
idx = 0
for n_tuple in child:
# 找到最佳路径中此节点对应的子节点
parent, t_playout, reward, t_childrens = n_tuple
if temp[0] == parent[0] and temp[1] == parent[1]:
break
idx += 1
if temp[0] == parent[0] and temp[1] == parent[1]:
if len(t_childrens) == 0:
# 找到路径的叶子结点,进行拓展
tempStartTime = time.time()
t_childrens = expand(current_board, tile) # 扩展过程
tempEndTime = time.time()
expendTime += tempEndTime - tempStartTime
randomTime = len(t_childrens) * 10 # 进行随机落子的盘数
rewardSum = 0 # 胜利总次数
tempStartTime = time.time() # 模拟过程
for index, rootChild in enumerate(t_childrens): # 对落子于此处的棋盘进行随机落子,使得能对其使用ucb算法(避免除以0的情况)
child_board = copy.deepcopy(current_board) # current_board记录在某处落子后的棋盘
child_parent, child_playout, reward, child_childrens = rootChild
tempTile = tile
tempNegTile = -tempTile
updateBoard(child_board, tempTile, child_parent[0], child_parent[1])
child_playout = 10
reward = 0
for i in range(1, 21):
current_board2 = copy.deepcopy(child_board) # current_board2是用来进行随机落点判断胜负的临时表盘
simulationTimes += 1
isWon = find_playout(current_board2, tempNegTile) # tile表示下一步谁执行
if (isWon):
reward += 1
rewardSum += reward
t_childrens[index] = (child_parent, child_playout, reward, child_childrens)
tempEndTime = time.time()
simulationTime += tempEndTime - tempStartTime
# 应用修改到本体
child[idx] = (parent, t_playout, reward, t_childrens)
# 继续处理子结点
child = t_childrens
if randomTime != 0:
tempStartTime = time.time() # 反向传播过程
child = PATHROOT
for temp in current_path:
# 遍历最佳路径
idx = 0
for n_tuple in child:
# 找到最佳路径中此节点对应的子节点
parent, t_playout, reward, t_childrens = n_tuple
if temp[0] == parent[0] and temp[1] == parent[1]:
break
idx += 1
if temp[0] == parent[0] and temp[1] == parent[1]:
# 找到了对应的结点,修改场数与胜利数
t_playout += randomTime
reward += rewardSum
# 应用修改到本体
child[idx] = (parent, t_playout, reward, t_childrens)
# 继续处理子结点
child = t_childrens
tempEndTime = time.time()
BackTime += tempEndTime - tempStartTime
max_avg_reward = -1
mt_result = (0, 0)
for n_tuple in PATHROOT:
parent, t_playout, reward, t_childrens = n_tuple
if (t_playout > 0) and (reward / t_playout > max_avg_reward):
mt_result = parent
max_avg_reward = reward / t_playout
print("选择阶段用时" + str(slectTime))
print("扩展阶段用时" + str(expendTime))
print("循环次数为" + str(looptime))
print("模拟次数为" + str(simulationTimes))
print("模拟阶段用时" + str(simulationTime))
print("回溯阶段用时" + str(BackTime))
return mt_result