由于学习这个算法,所以大部分的代码来自github上别人的代码
顺便写点注释
他的源码有bug
checkGameOver全都改了
顺便给minimax函数加点东西
他的源码
输入:
2
1 1
2 1
就会有些问题,最主要出在他的minimax结束判断上
然后你就永远赢不了
import time
import numpy as np
import sys
from copy import copy
rows = 3
cols = 3
board = np.zeros((rows, cols))
# 0 ->blank
# 1 --> 'x'
# 2-> 'o'
inf = 9999999999
neg_inf = -9999999999
def printBoard():
sys.stdout.write(' ')
for i in range(1, rows + 1):
sys.stdout.write(' ' + str(i) + ' ')
print('')
for i in range(0, rows):
for j in range(0, cols):
if j == 0:
sys.stdout.write(str(i + 1))
if board[i, j] == 0:
sys.stdout.write(' _ ')
elif board[i, j] == 1:
sys.stdout.write(' X ')
else:
sys.stdout.write(' O ')
print('')
heuristicTable = np.zeros((rows + 1, cols + 1)) # 估值
numberOfWinningPositions = rows + cols + 2
for index in range(0, rows + 1):
heuristicTable[index, 0] = 10 ** index # **两个乘号就是乘方
heuristicTable[0, index] = -10 ** index
winningArray = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 3, 6], [1, 4, 7], [2, 5, 8], [0, 4, 8], [2, 4, 6]])
def utilityOfState(state):
stateCopy = copy(state.ravel()) # 将多维数组降位一维
heuristic = 0
for i in range(0, numberOfWinningPositions):
maxp = 0
minp = 0
for j in range(0, rows):
if stateCopy[winningArray[i, j]] == 2:
maxp += 1
elif stateCopy[winningArray[i, j]] == 1:
minp += 1
heuristic += heuristicTable[maxp][minp]
return heuristic
def minimax(state, alpha, beta, maximizing, depth, maxp, minp): # alpha-beta剪枝 搜索深度 maxplayer minplayer
if depth == 0: # 搜索深度到头
return utilityOfState(state), state
if checkGameOver(state) == 1:
return utilityOfState(state), state
rowsLeft, columnsLeft = np.where(state == 0) # 空位
returnState = copy(state)
if rowsLeft.shape[0] == 0: # 没有空位
return utilityOfState(state), returnState
if maximizing == True: # Alpha剪枝
utility = neg_inf
for i in range(0, rowsLeft.shape[0]):
nextState = copy(state)
nextState[rowsLeft[i], columnsLeft[i]] = maxp
Nutility, Nstate = minimax(nextState, alpha, beta, False, depth - 1, maxp, minp)
# 找打下一节点的min的max
if Nutility > utility:
utility = Nutility
returnState = copy(nextState)
if utility > alpha:
alpha = utility
# beta
if alpha >= beta: # beta min想让max的值最小,但此时max值大于min的期望值->肯定不会走这条,
# print('pruned')
break # 剪值
return utility, returnState
else:
utility = inf
for i in range(0, rowsLeft.shape[0]):
nextState = copy(state)
nextState[rowsLeft[i], columnsLeft[i]] = minp
Nutility, Nstate = minimax(nextState, alpha, beta, True, depth - 1, maxp, minp)
if Nutility < utility:
utility = Nutility
returnState = copy(nextState)
if utility < beta:
beta = utility
if alpha >= beta:
# print('pruned')
break
return utility, returnState
def checkGameOver(state):
stateCopy = copy(state.ravel())
for i in range(0, len(winningArray)):
cnt = 0
for j in range(0, rows):
if stateCopy[winningArray[i, j]] == stateCopy[winningArray[i, 0]]:
cnt += 1
if cnt == 3 and stateCopy[winningArray[i, 0]] != 0:
return 1
return -1
def main():
num = int(input('先手1/后手2'))
global board # global是Python中的全局变量关键字
if num == 1:
printBoard()
for turn in range(0, rows * cols):
if (turn + num) % 2 == 1: # make the player go first, and make the user player as 'X'
r, c = [int(x) for x in input('输入操作').split(' ')]
while board[r - 1, c - 1] != 0:
print('此位置已经有棋子')
r, c = [int(x) for x in input('重新输入操作').split(' ')]
board[r - 1, c - 1] = 1
printBoard()
value = checkGameOver(board)
if value == 1:
print('玩家获胜')
sys.exit()
print('\n')
else:
print("电脑操作")
time.sleep(0.5)
state = copy(board)
value, nextState = minimax(state, neg_inf, inf, True, 2, 2, 1)
board = copy(nextState)
printBoard()
print('\n')
value = checkGameOver(board)
if value == 1:
print('电脑获胜')
sys.exit()
print('平局')
main()