11.14黑白棋

import MCTS as rvs
import tkinter as tk
import time
import tkinter.messagebox

total = []


class ReversiBoard(tk.Canvas):
    # 创建了Reversi类继承Tk.Canvas,负责棋盘部分
    # 定义棋盘单元格的大小、边距
    cell_size = 54  # 单元格大小
    margin = 20  # 边框
    board = rvs.getInitialBoard()  # 棋盘的情况
    validBoard = True  # 棋盘是否能够继续
    isPayerTurn = True  # 是否玩家先手
    step = []  # 记录操作的数组

    # 构造函数
    def __init__(self, master):
        cwidth = rvs.BOARD_SIZE * self.cell_size + 2 * self.margin  # 计算单元格宽度
        # 设置Canvas属性
        tk.Canvas.__init__(self, master, bd=1, bg='#e4c8a9', width=cwidth, height=cwidth, cursor="hand2")
        self.bind("<1>", self.put_stones)  # 绑定实际按put_stones到鼠标左键
        # 绘制棋盘
        for i in range(rvs.BOARD_SIZE):
            for j in range(rvs.BOARD_SIZE):
                if ((i + j) % 2 == 0):
                    bcolor = "#c1914f"  # 给相间的单元格添加不同的颜色
                else:
                    bcolor = "#cba470"
                x0 = i * self.cell_size + self.margin
                y0 = j * self.cell_size + self.margin
                self.create_rectangle(x0,y0,x0+self.cell_size,y0+self.cell_size,fill=bcolor)
        self.refresh(rvs.PLAYER_NUM)  # 显示落子
        if (not self.isPayerTurn):  # 判断ai先后手
            rvs.PLAYER_NUM = 1
            rvs.COMPUTER_NUM = -1
            self.AI_move()

    def put_stones(self, event):  # 在界面上放置棋子
        # 是否游戏结束
        if self.validBoard == False:
            # 游戏结束
            self.validBoard = True
            # 重新生成棋盘
            self.board = rvs.getInitialBoard()
            self.isPayerTurn = True

            # 清除操作记录
            for numid in self.step:
                self.delete(numid)
            self.step = []
            self.refresh(rvs.PLAYER_NUM)
            return

        # 电脑轮次
        if not (self.isPayerTurn):
            return
        # 玩家轮次
        x = self.canvasx(event.x)
        y = self.canvasy(event.y)
        # 根据点击位置确定格子
        i = int((x-self.margin)/self.cell_size)
        j = int((y-self.margin)/self.cell_size)
        if self.board[i][j] !=0 or not rvs.isok(self.board,rvs.PLAYER_NUM,i,j) :
            return
        rvs.updateBoard(self.board,rvs.PLAYER_NUM,i,j)
        rvs.updatePathRoot(i,j)
        self.step = []
        self.refresh(rvs.COMPUTER_NUM)
        isPayerTurn = False
        self.after(100, self.AI_move())










    def AI_move(self):
        while True:
            # 获取此时人类以及机器可以落子的结点
            mcts_possibility = len(rvs.possible_positions(self.board, rvs.COMPUTER_NUM))
            # 判断机器是否有棋可下
            if mcts_possibility == 0:
                break
            start = time.time()
            # 根据mcts算法获取落子位置
            stone_pos = rvs.mctsNextPosition(self.board, 0.7, rvs.COMPUTER_NUM)
            end = time.time()
            one_time = end - start
            print("落子位置", stone_pos)
            print("总落子时间为", format(one_time, '.4f'), "s")
            total.append(one_time)
            rvs.updateBoard(self.board, rvs.COMPUTER_NUM, stone_pos[0], stone_pos[1])
            rvs.updatePathRoot(stone_pos[0], stone_pos[1])  # 更新pathRoot
            self.refresh(rvs.PLAYER_NUM)

            player_possibility = len(rvs.possible_positions(self.board, rvs.PLAYER_NUM))
            mcts_possibility = len(rvs.possible_positions(self.board, rvs.COMPUTER_NUM))

            # 判断人类是否有棋可下
            if player_possibility > 0 or mcts_possibility == 0:
                break



    def showResult(self):
        player_stone, mcts_stone = rvs.countTile(self.board, rvs.PLAYER_NUM)

        if player_stone > mcts_stone:
            tkinter.messagebox.showinfo('游戏结束', "你获胜了")

        elif player_stone == mcts_stone:
            tkinter.messagebox.showinfo('游戏结束', "平局")

        else:
            tkinter.messagebox.showinfo('游戏结束', "你失败了")
        print("ai整局用时", sum(total))

    def refresh(self, tile):  # 刷新整个棋盘
        self.delete("probale")
        for i in range(rvs.BOARD_SIZE):
            for j in range(rvs.BOARD_SIZE):
                x0 = i * self.cell_size + self.margin
                y0 = j * self.cell_size + self.margin

                if self.board[i][j] == 0:
                    continue
                if self.board[i][j] == rvs.BLACK_NUM:
                    bcolor = "#000000"
                if self.board[i][j] == rvs.WHITE_NUM:
                    bcolor = "#ffffff"
                self.create_oval(x0 + 8, y0 + 8, x0 + self.cell_size - 8, y0 + self.cell_size - 8, fill=bcolor, width=0)
        if tile == rvs.PLAYER_NUM:
            probale = rvs.possible_positions(self.board, tile)  # 显示可落子位置
            bcolor = "#ffcc33"
            for pos in probale:
                x0 = pos[0] * self.cell_size + self.margin
                y0 = pos[1] * self.cell_size + self.margin
                self.create_oval(x0 + 18, y0 + 18, x0 + self.cell_size - 18, y0 + self.cell_size - 18, fill=bcolor,
                                 width=0, tags="probale")


class Reversi(tk.Frame):
    # 创建了Reversi类继承Tk.Frame,负责整个窗口
    def __init__(self, master=None):
        tk.Frame.__init__(self, master, bg="#51150b")
        self.master.title("黑白棋")
        # ReversiBoard为自定义的棋盘类,放置在窗口中
        self.f_board = ReversiBoard(self)
        self.f_board.pack(padx=20, pady=20)


if __name__ == '__main__':
    app = Reversi()
    app.pack()
    app.mainloop()

  

import random
import math
import time
import copy

BOARD_SIZE = 8  # 棋盘行数与列数
PLAYER_NUM = -1  # 在board中代表玩家的数字
COMPUTER_NUM = 1  # 在board中代表带电脑的数字
MAX_THINK_TIME = 5  # 电脑的最大思考时间
direction = [[0, 1], [1, 1], [1, 0], [1, -1], [0, -1], [-1, -1], [-1, 0], [-1, 1]]
BLACK_NUM = -1  # 代表黑棋的数字
WHITE_NUM = 1  # 代表白棋的数字
PATHROOT = []  # 节点树


def getInitialBoard():  # 初始化棋盘数组
    board = {}

    for i in range(0, BOARD_SIZE):
        board[i] = {}

        for j in range(0, BOARD_SIZE):
            board[i][j] = 0

    board[BOARD_SIZE / 2 - 1][BOARD_SIZE / 2 - 1] = WHITE_NUM
    board[BOARD_SIZE / 2][BOARD_SIZE / 2] = WHITE_NUM

    board[BOARD_SIZE / 2 - 1][BOARD_SIZE / 2] = BLACK_NUM
    board[BOARD_SIZE / 2][BOARD_SIZE / 2 - 1] = BLACK_NUM

    return board


# 返回棋子数
def countTile(board, tile):
    stones = 0
    negstones = 0
    for i in range(0, BOARD_SIZE):
        for j in range(0, BOARD_SIZE):
            if board[i][j] == tile:
                stones += 1
            elif board[i][j] == -tile:
                negstones += 1

    return stones, negstones


def possible_positions(board, tile):  # 返回一个颜色棋子可落子位置
    positions = []
    for i in range(0, BOARD_SIZE):
        for j in range(0, BOARD_SIZE):
            if board[i][j] != 0:
                continue
            if isok(board, tile, i, j):
                positions.append((i, j))
    return positions


def isOnBoard(x, y):  # 检测对应位置是否在棋盘
    return x >= 0 and x <= 7 and y >= 0 and y <= 7


def isok(board, tile, i, j):  # 检测该位置是否可以落子
    change = -tile
    if (board[i][j] != 0):
        return False
    for xdirection, ydirection in direction:
        x, y = i, j
        x += xdirection
        y += ydirection
        if isOnBoard(x, y) and board[x][y] == change:  # 该点朝一dirction方向相邻一个棋子,且相邻的棋子为可以被翻转的数字
            # 一直走到出界或不是对方棋子的位置
            while board[x][y] == change:
                x += xdirection
                y += ydirection
                if not isOnBoard(x, y):
                    break
            # 出界了,则直接进行下一个方向的查找
            if not isOnBoard(x, y):
                continue
            # 是自己的棋子,中间的所有棋子都要翻转
            if board[x][y] == tile:
                return True
    return False


# 是否是合法走法,如果合法返回需要翻转的棋子列表
def updateBoard(board, tile, i, j):
    change = -tile
    need_turn = []  # 要被翻转的棋子
    for xdirection, ydirection in direction:
        x, y = i, j
        x += xdirection
        y += ydirection
        if isOnBoard(x, y) and board[x][y] == change:  # 该点朝一dirction方向相邻一个棋子,且相邻的棋子为可以被翻转的数字
            # 一直走到出界或不是对方棋子的位置
            while board[x][y] == change:
                x += xdirection
                y += ydirection
                if not isOnBoard(x, y):
                    break
            # 出界了,则直接进行下一个方向的查找
            if not isOnBoard(x, y):
                continue
            # 是自己的棋子,中间的所有棋子都要翻转
            if board[x][y] == tile:
                while True:
                    x -= xdirection
                    y -= ydirection
                    # 回到了起点则结束
                    if x == i and y == j:
                        break
                    # 需要翻转的棋子
                    need_turn.append([x, y])
    # 翻转棋子
    board[i][j] = tile
    for x, y in need_turn:
        board[x][y] = tile
    return len(need_turn)


def updatePathRoot(i, j):
    global PATHROOT
    for n_tuple in PATHROOT:
        # 找到最佳路径中此节点对应的子节点
        parent, t_playout, reward, t_childrens = n_tuple
        if i == parent[0] and j == parent[1]:
            PATHROOT = t_childrens
            break


# 蒙特卡洛树搜索
def mctsNextPosition(board, ucb_c, playerNum):  # 棋盘、ucb公式中常数c的值
    def ucb(node_tuple, t):  # t为进行循环的次数
        #  返回各个结点用于进行ucb算法的值
        name, nplayout, reward, childrens = node_tuple  # 四个值分别对应 落点位置、模拟对局次数 、赢的次数、子节点

        if nplayout == 0:  # 避免意外情况
            nplayout = 1

        if t == 0:  # 避免意外情况
            t = 1
        # reward 是赢的次数 nplayout是模拟对局次数,cval是常数
        return (reward / nplayout) + ucb_c * math.sqrt(math.log(t) / nplayout)

    def find_playout(tep_board, tile, depth=0):  # 对tep_board进行了系列随机落点后,返回最终结果胜负
        def eval_board(tep_board):  # 比较二者的棋子数目,判断胜负
            tileNum, negTilenum = countTile(tep_board, playerNum)
            if tileNum > negTilenum:
                # tile代表的棋胜
                return True
                # tile代表的棋负
            return False

        while (depth < 120):  # 进行最多120次递归后返回结果
            turn_positions = possible_positions(tep_board, tile)
            if len(turn_positions) == 0:  # 如果没位置下棋,切换到对手回合
                tile = -tile
                neg_turn_positions = possible_positions(tep_board, tile)

                if len(neg_turn_positions) == 0:  # 对方也没位置下棋,结束游戏
                    return eval_board(tep_board)
                else:
                    turn_positions = neg_turn_positions

            temp = turn_positions[random.randrange(0, len(turn_positions))]  # 随机放置一个棋子
            updateBoard(tep_board, tile, temp[0], temp[1])
            # 转换轮次
            tile = -tile
            depth += 1

        return eval_board(tep_board)

    # 扩展结点,返回tep_board的子节点数组
    def expand(tep_board, tile):
        positions = possible_positions(tep_board, tile)
        result = []
        for temp in positions:
            result.append((temp, 0, 0, []))
        return result

    def find_path(root):
        current_path = []
        child = root
        parent_playout = 0
        for item in child:  # 计算父节点遍历过的次数
            name, nplayout, reward, childrens = item
            parent_playout += nplayout
        isMCTSTurn = True

        while True:
            if len(child) == 0:  # 无可落子的位置,直接结束
                break
            maxidxlist = [0]
            cidx = 0
            if isMCTSTurn:
                maxval = -1
            else:
                maxval = 2

            for n_tuple in child:  # 对每一个可落子的位置进行最大最小搜索
                # 实现最大最小搜索,电脑选择最大值,玩家选择最小值
                if isMCTSTurn:
                    # ucb返回各个结点的值,之后就依靠这个值来进行最大最小算法
                    cval = ucb(n_tuple, parent_playout)

                    if cval >= maxval:
                        # 获取子结点中值最大的一项,并记录其id(即cidx)
                        if cval == maxval:
                            maxidxlist.append(cidx)
                        else:
                            maxidxlist = [cidx]
                            maxval = cval
                else:
                    cval = ucb(n_tuple, parent_playout)

                    if cval <= maxval:
                        # 获取子节点中值最小的一项
                        if cval == maxval:
                            maxidxlist.append(cidx)
                        else:
                            maxidxlist = [cidx]
                            maxval = cval

                cidx += 1

            # 从最值结点中随机选择一处落子
            maxidx = maxidxlist[random.randrange(0, len(maxidxlist))]
            parent, t_playout, reward, t_childrens = child[maxidx]
            current_path.append(parent)
            parent_playout = t_playout
            # 选择子节点进入下一次循环
            child = t_childrens
            isMCTSTurn = not (isMCTSTurn)

        # 返回根据最大最小规则选择出来的一条路径
        return current_path

    global PATHROOT  # 节点树
    if len(PATHROOT) == 0:
        PATHROOT = expand(board, playerNum)
        for index, rootChild in enumerate(PATHROOT):
            current_board = copy.deepcopy(board)  # current_board记录在某处落子后的棋盘
            parent, t_playout, reward, t_childrens = rootChild
            updateBoard(current_board, playerNum, parent[0], parent[1])  # 对落子于此处的棋盘进行随机落子,使得能对其使用ucb算法(避免除以0的情况)
            t_playout = 10
            reward = 0
            for i in range(1, 21):
                current_board2 = copy.deepcopy(current_board)  # current_board2是用来进行随机落点判断胜负的临时表盘
                isWon = find_playout(current_board2, -playerNum)  # tile表示下一步谁执行
                if (isWon):
                    reward += 1
            PATHROOT[index] = (parent, t_playout, reward, t_childrens)
    # 记时,防止循环时间过长
    start_time = time.time()
    slectTime = 0  # 选择过程耗费的时间
    expendTime = 0  # 扩展过程耗费的时间
    simulationTime = 0  # 模拟过程耗费的时间
    BackTime = 0  # 回溯过程耗费的时间

    simulationTimes = 0
    looptime = 0

    for loop in range(0, 30):  # 总的遍历
        looptime += 1
        current_board = copy.deepcopy(board)  # current_board记录在某处落子后的棋盘
        # 思考最大时间限制
        if (time.time() - start_time) >= MAX_THINK_TIME:
            break

        # current_path是一个放置棋子的位置列表,根据此列表进行后续操作
        tempStartTime = time.time()  # 选择过程
        current_path = find_path(PATHROOT)  # find_path返回:ucb算法基于root蕴含的信息,获取的最佳路径(从头结点开始的,最佳子节点在各级child数组中的index数组),
        tempEndTime = time.time()
        slectTime += tempEndTime - tempStartTime

        tile = playerNum
        for temp in current_path:
            # 将通过ucb算法得到的路径整合到一个初始棋盘中
            updateBoard(current_board, tile, temp[0], temp[1])  # 最终current_board为根据路径落子得到的棋盘
            tile = -tile

        # 扩展与模拟过程
        child = PATHROOT
        randomTime = 0  # 进行随机落子的盘数
        rewardSum = 0  # 胜利总次数
        for temp in current_path:
            # 遍历最佳路径
            idx = 0
            for n_tuple in child:
                # 找到最佳路径中此节点对应的子节点
                parent, t_playout, reward, t_childrens = n_tuple
                if temp[0] == parent[0] and temp[1] == parent[1]:
                    break
                idx += 1

            if temp[0] == parent[0] and temp[1] == parent[1]:
                if len(t_childrens) == 0:
                    # 找到路径的叶子结点,进行拓展
                    tempStartTime = time.time()
                    t_childrens = expand(current_board, tile)  # 扩展过程
                    tempEndTime = time.time()
                    expendTime += tempEndTime - tempStartTime
                    randomTime = len(t_childrens) * 10  # 进行随机落子的盘数
                    rewardSum = 0  # 胜利总次数
                    tempStartTime = time.time()  # 模拟过程
                    for index, rootChild in enumerate(t_childrens):  # 对落子于此处的棋盘进行随机落子,使得能对其使用ucb算法(避免除以0的情况)
                        child_board = copy.deepcopy(current_board)  # current_board记录在某处落子后的棋盘
                        child_parent, child_playout, reward, child_childrens = rootChild
                        tempTile = tile
                        tempNegTile = -tempTile
                        updateBoard(child_board, tempTile, child_parent[0], child_parent[1])
                        child_playout = 10
                        reward = 0
                        for i in range(1, 21):
                            current_board2 = copy.deepcopy(child_board)  # current_board2是用来进行随机落点判断胜负的临时表盘
                            simulationTimes += 1
                            isWon = find_playout(current_board2, tempNegTile)  # tile表示下一步谁执行
                            if (isWon):
                                reward += 1
                        rewardSum += reward
                        t_childrens[index] = (child_parent, child_playout, reward, child_childrens)
                    tempEndTime = time.time()
                    simulationTime += tempEndTime - tempStartTime
                # 应用修改到本体
                child[idx] = (parent, t_playout, reward, t_childrens)
            # 继续处理子结点
            child = t_childrens

        if randomTime != 0:
            tempStartTime = time.time()  # 反向传播过程
            child = PATHROOT
            for temp in current_path:
                # 遍历最佳路径
                idx = 0
                for n_tuple in child:
                    # 找到最佳路径中此节点对应的子节点
                    parent, t_playout, reward, t_childrens = n_tuple
                    if temp[0] == parent[0] and temp[1] == parent[1]:
                        break
                    idx += 1

                if temp[0] == parent[0] and temp[1] == parent[1]:
                    # 找到了对应的结点,修改场数与胜利数
                    t_playout += randomTime
                    reward += rewardSum

                    # 应用修改到本体
                    child[idx] = (parent, t_playout, reward, t_childrens)
                # 继续处理子结点
                child = t_childrens
            tempEndTime = time.time()
            BackTime += tempEndTime - tempStartTime

    max_avg_reward = -1
    mt_result = (0, 0)
    for n_tuple in PATHROOT:
        parent, t_playout, reward, t_childrens = n_tuple

        if (t_playout > 0) and (reward / t_playout > max_avg_reward):
            mt_result = parent
            max_avg_reward = reward / t_playout
    print("选择阶段用时" + str(slectTime))
    print("扩展阶段用时" + str(expendTime))
    print("循环次数为" + str(looptime))
    print("模拟次数为" + str(simulationTimes))
    print("模拟阶段用时" + str(simulationTime))
    print("回溯阶段用时" + str(BackTime))

    return mt_result

  

posted @ 2025-01-02 15:49  jais  阅读(35)  评论(0)    收藏  举报