Minimax Algorithm in Game Theory in C++
Minimax Algorithm in Game Theory
极小化极大(minimax)算法满足零和博弈,初略的解释就是若有两个玩家进行游戏,如果其中一方得到利益那么另一方就会失去利益,游戏利益的总和为0(某些情况下为常数)。
Minimax is a kind of backtracking algorithm that is used in decision making and game theory to find the optimal move for a player, assuming that your opponent also plays optimally. It is widely used in two player turn-based games such as Tic-Tac-Toe, Backgammon, Mancala, Chess, etc.
In Minimax the two players are called maximizer and minimizer. The maximizer tries to get the highest score possible while the minimizer tries to do the opposite and get the lowest score possible.
A simple C++ program to find maximum score that maximizing player can get.
博弈树
从根节点出发
maximizer: 令最终到达的叶子节点数值最大
minimizer: 令最终到达的叶子节点数值最小
// A simple C++ program to find
// maximum score that
// maximizing player can get.
#include<iostream>
#include<algorithm>
using namespace std;
// Returns the optimal value a maximizer can obtain.
// depth is current depth in game tree.
// nodeIndex is index of current node in scores[].
// isMax is true if current move is
// of maximizer, else false
// scores[] stores leaves of Game tree.
// h is maximum height of Game tree
int minimax(int depth, int nodeIndex, bool isMax,
int scores[], int h)
{
// Terminating condition. i.e
// leaf node is reached
if (depth == h)
return scores[nodeIndex];
// If current move is maximizer,
// find the maximum attainable
// value
if (isMax)
return max(minimax(depth + 1, nodeIndex * 2, false, scores, h),
minimax(depth + 1, nodeIndex * 2 + 1, false, scores, h));
// Else (If current move is Minimizer), find the minimum
// attainable value
else
return min(minimax(depth + 1, nodeIndex * 2, true, scores, h),
minimax(depth + 1, nodeIndex * 2 + 1, true, scores, h));
}
// A utility function to find Log n in base 2
int log2(int n)
{
return (n == 1) ? 0 : 1 + log2(n / 2);
}
// Driver code
int main()
{
// The number of elements in scores must be
// a power of 2.
int scores[] = { 3, 5, 2, 9, 12, 5, 23, 23 };
int n = sizeof(scores) / sizeof(scores[0]);
int h = log2(n);
int res = minimax(0, 0, true, scores, h);
cout << "The optimal value is : " << res << endl;
return 0;
}
minmax递归函数return value的另一种写法
int minimax(int depth, int nodeIndex, int isMax,
int scores[], int h)
{
// Terminating condition. i.e
// leaf node is reached
if (depth == h)
return scores[nodeIndex];
// If current move is maximizer,
// find the maximum attainable
// value
return max(minimax(depth + 1, nodeIndex * 2, -isMax, scores, h) * isMax,
minimax(depth + 1, nodeIndex * 2 + 1, -isMax, scores, h) * isMax) * isMax;
}
Output:
The optimal value is: 12
Minmax for Tic-Tac-Toe
Implementation of Tic-Tac-Toe game
井字棋,圈叉游戏
For this scenario let us consider X as the maximizer and O as the minimizer.
一共9格,故选择加10。
- If
Xwins on the board we give it a positive value of+10.
- If
Owins on the board we give it a negative value of-10.
- If no one has won or the game results in a draw then we give a value of
+0.
Finding the Best Move (pseudocode):
function minimax(board, depth, isMaximizingPlayer):
if current board state is a terminal state :
return value of the board
if isMaximizingPlayer :
bestVal = -INFINITY
for each move in board :
value = minimax(board, depth+1, false)
bestVal = max( bestVal, value)
return bestVal
else :
bestVal = +INFINITY
for each move in board :
value = minimax(board, depth+1, true)
bestVal = min( bestVal, value)
return bestVal
递归深度depth是下棋的步数,一共9格,最多9步。
目标是以最少的步数获得胜利,或在失败的结果下,使得步数最多:
if maximizer has won:
return WIN_SCORE – depth
else if minimizer has won:
return LOOSE_SCORE + depth
C++实现:
// C++ program to find the next optimal move for
// a player
#include<iostream>
#include<algorithm>
using namespace std;
struct Move
{
int row, col;
};
char player = 'x', opponent = 'o';
// This function returns true if there are moves
// remaining on the board. It returns false if
// there are no moves left to play.
bool isMovesLeft(char board[3][3])
{
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
if (board[i][j] == '_')
return true;
return false;
}
// This is the evaluation function as discussed
// in the previous article ( http://goo.gl/sJgv68 )
int evaluate(char b[3][3])
{
// Checking for Rows for X or O victory.
for (int row = 0; row < 3; row++)
{
if (b[row][0] == b[row][1] &&
b[row][1] == b[row][2])
{
if (b[row][0] == player)
return +10;
else if (b[row][0] == opponent)
return -10;
}
}
// Checking for Columns for X or O victory.
for (int col = 0; col < 3; col++)
{
if (b[0][col] == b[1][col] &&
b[1][col] == b[2][col])
{
if (b[0][col] == player)
return +10;
else if (b[0][col] == opponent)
return -10;
}
}
// Checking for Diagonals for X or O victory.
if (b[0][0] == b[1][1] && b[1][1] == b[2][2])
{
if (b[0][0] == player)
return +10;
else if (b[0][0] == opponent)
return -10;
}
if (b[0][2] == b[1][1] && b[1][1] == b[2][0])
{
if (b[0][2] == player)
return +10;
else if (b[0][2] == opponent)
return -10;
}
// Else if none of them have won then return 0
return 0;
}
// This is the minimax function. It considers all
// the possible ways the game can go and returns
// the value of the board
int minimax(char board[3][3], int depth, bool isMax)
{
int score = evaluate(board);
// If Maximizer has won the game return his/her
// evaluated score
if (score == 10)
return score; // 不减去depth吗?即return score-depth;
// If Minimizer has won the game return his/her
// evaluated score
if (score == -10)
return score; // return score+depth;
// If there are no more moves and no winner then
// it is a tie
if (isMovesLeft(board) == false)
return 0;
// If this maximizer's move
if (isMax)
{
int best = -1000;
// Traverse all cells
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i][j] == '_')
{
// Make the move
board[i][j] = player;
// Call minimax recursively and choose
// the maximum value
best = max(best,
minimax(board, depth + 1, !isMax));
// Undo the move
board[i][j] = '_';
}
}
}
return best;
}
// If this minimizer's move
else
{
int best = 1000;
// Traverse all cells
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i][j] == '_')
{
// Make the move
board[i][j] = opponent;
// Call minimax recursively and choose
// the minimum value
best = min(best,
minimax(board, depth + 1, !isMax));
// Undo the move
board[i][j] = '_';
}
}
}
return best;
}
}
// This will return the best possible move for the player
Move findBestMove(char board[3][3])
{
int bestVal = -1000;
Move bestMove;
bestMove.row = -1;
bestMove.col = -1;
// Traverse all cells, evaluate minimax function for
// all empty cells. And return the cell with optimal
// value.
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i][j] == '_')
{
// Make the move
board[i][j] = player;
// compute evaluation function for this
// move.
int moveVal = minimax(board, 0, false);
// Undo the move
board[i][j] = '_';
// If the value of the current move is
// more than the best value, then update
// best/
if (moveVal > bestVal)
{
bestMove.row = i;
bestMove.col = j;
bestVal = moveVal;
}
}
}
}
printf("The value of the best Move is : %d\n\n",
bestVal);
return bestMove;
}
// Driver code
int main()
{
char board[3][3] =
{
{ 'x', 'o', 'x' },
{ 'o', 'o', 'x' },
{ '_', '_', '_' }
};
Move bestMove = findBestMove(board);
printf("The Optimal Move is :\n");
printf("ROW: %d COL: %d\n\n", bestMove.row,
bestMove.col);
return 0;
}
Implementation of Tic-Tac-Toe game:
// A C++ Program to play tic-tac-toe
#include <iostream>
#include <algorithm> // std::random_shuffle
#include <ctime> // std::time
#include <cstdlib> // std::rand, std::srand
using namespace std;
#define COMPUTER 1
#define HUMAN 2
#define SIDE 3 // Length of the board
// Computer will move with 'O'
// and human with 'X'
#define COMPUTERMOVE 'O'
#define HUMANMOVE 'X'
// A function to show the current board status
void showBoard(char board[][SIDE])
{
printf("\n\n");
printf("\t\t\t %c | %c | %c \n", board[0][0],
board[0][1], board[0][2]);
printf("\t\t\t--------------\n");
printf("\t\t\t %c | %c | %c \n", board[1][0],
board[1][1], board[1][2]);
printf("\t\t\t--------------\n");
printf("\t\t\t %c | %c | %c \n\n", board[2][0],
board[2][1], board[2][2]);
return;
}
// A function to show the instructions
void showInstructions()
{
printf("\t\t\t Tic-Tac-Toe\n\n");
printf("Choose a cell numbered from 1 to 9 as below"
" and play\n\n");
printf("\t\t\t 1 | 2 | 3 \n");
printf("\t\t\t--------------\n");
printf("\t\t\t 4 | 5 | 6 \n");
printf("\t\t\t--------------\n");
printf("\t\t\t 7 | 8 | 9 \n\n");
printf("-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n\n");
return;
}
// A function to initialise the game
void initialise(char board[][SIDE], int moves[])
{
// Initiate the random number generator so that
// the same configuration doesn't arises
srand(time(NULL));
// Initially the board is empty
for (int i = 0; i < SIDE; i++)
{
for (int j = 0; j < SIDE; j++)
board[i][j] = ' ';
}
// Fill the moves with numbers
for (int i = 0; i < SIDE * SIDE; i++)
moves[i] = i;
// randomise the moves
random_shuffle(moves, moves + SIDE * SIDE);
return;
}
// A function to declare the winner of the game
void declareWinner(int whoseTurn)
{
if (whoseTurn == COMPUTER)
printf("COMPUTER has won\n");
else
printf("HUMAN has won\n");
return;
}
// A function that returns true if any of the row
// is crossed with the same player's move
bool rowCrossed(char board[][SIDE])
{
for (int i = 0; i < SIDE; i++)
{
if (board[i][0] == board[i][1] &&
board[i][1] == board[i][2] &&
board[i][0] != ' ')
return (true);
}
return(false);
}
// A function that returns true if any of the column
// is crossed with the same player's move
bool columnCrossed(char board[][SIDE])
{
for (int i = 0; i < SIDE; i++)
{
if (board[0][i] == board[1][i] &&
board[1][i] == board[2][i] &&
board[0][i] != ' ')
return (true);
}
return(false);
}
// A function that returns true if any of the diagonal
// is crossed with the same player's move
bool diagonalCrossed(char board[][SIDE])
{
if (board[0][0] == board[1][1] &&
board[1][1] == board[2][2] &&
board[0][0] != ' ')
return(true);
if (board[0][2] == board[1][1] &&
board[1][1] == board[2][0] &&
board[0][2] != ' ')
return(true);
return(false);
}
// A function that returns true if the game is over
// else it returns a false
bool gameOver(char board[][SIDE])
{
return(rowCrossed(board) || columnCrossed(board)
|| diagonalCrossed(board));
}
// A function to play Tic-Tac-Toe
void playTicTacToe(int whoseTurn)
{
// A 3*3 Tic-Tac-Toe board for playing
char board[SIDE][SIDE];
int moves[SIDE * SIDE];
// Initialise the game
initialise(board, moves);
// Show the instructions before playing
showInstructions();
int moveIndex = 0, x, y;
// Keep playing till the game is over or it is a draw
while (gameOver(board) == false &&
moveIndex != SIDE * SIDE)
{
if (whoseTurn == COMPUTER)
{
x = moves[moveIndex] / SIDE;
y = moves[moveIndex] % SIDE;
board[x][y] = COMPUTERMOVE;
printf("COMPUTER has put a %c in cell %d\n",
COMPUTERMOVE, moves[moveIndex] + 1);
showBoard(board);
moveIndex++;
whoseTurn = HUMAN;
}
else if (whoseTurn == HUMAN)
{
x = moves[moveIndex] / SIDE;
y = moves[moveIndex] % SIDE;
board[x][y] = HUMANMOVE;
printf("HUMAN has put a %c in cell %d\n",
HUMANMOVE, moves[moveIndex] + 1);
showBoard(board);
moveIndex++;
whoseTurn = COMPUTER;
}
}
// If the game has drawn
if (gameOver(board) == false &&
moveIndex == SIDE * SIDE)
printf("It's a draw\n");
else
{
// Toggling the user to declare the actual
// winner
if (whoseTurn == COMPUTER)
whoseTurn = HUMAN;
else if (whoseTurn == HUMAN)
whoseTurn = COMPUTER;
// Declare the winner
declareWinner(whoseTurn);
}
return;
}
// Driver program
int main()
{
// Let us play the game with COMPUTER starting first
playTicTacToe(COMPUTER);
return (0);
}
Alpha-Beta Pruning
α-β剪枝
- Alpha is the best value that the maximizer currently can guarantee at that level or above.
- Beta is the best value that the minimizer currently can guarantee at that level or above.
Pseudocode:
function minimax(node, depth, isMaximizingPlayer, alpha, beta):
if node is a leaf node :
return value of the node
if isMaximizingPlayer :
bestVal = -INFINITY
for each child node :
value = minimax(node, depth+1, false, alpha, beta)
bestVal = max( bestVal, value)
alpha = max( alpha, bestVal)
if beta <= alpha:
break
return bestVal
else :
bestVal = +INFINITY
for each child node :
value = minimax(node, depth+1, true, alpha, beta)
bestVal = min( bestVal, value)
beta = min( beta, bestVal)
if beta <= alpha:
break
return bestVal
// Calling the function for the first time.
minimax(0, 0, true, -INFINITY, +INFINITY)
// C++ program to demonstrate
// working of Alpha-Beta Pruning
#include<iostream>
#include<algorithm>
using namespace std;
// Initial values of
// Aplha and Beta
const int MAX = 1000;
const int MIN = -1000;
// Returns optimal value for
// current player(Initially called
// for root and maximizer)
int minimax(int depth, int nodeIndex,
bool maximizingPlayer,
int values[], int alpha,
int beta)
{
// Terminating condition. i.e
// leaf node is reached
if (depth == 3)
return values[nodeIndex];
if (maximizingPlayer)
{
int best = MIN;
// Recur for left and
// right children
for (int i = 0; i < 2; i++)
{
int val = minimax(depth + 1, nodeIndex * 2 + i,
false, values, alpha, beta);
best = max(best, val);
alpha = max(alpha, best); // 本层对alpha进行决策,maximizer方试图让alpha最大
// Alpha Beta Pruning
if (beta <= alpha) // 此时alpha已经比上层的minimizer方原值beta更大,又beta方不会选择更大的alpha,可以break
break;
}
return best;
}
else
{
int best = MAX;
// Recur for left and
// right children
for (int i = 0; i < 2; i++)
{
int val = minimax(depth + 1, nodeIndex * 2 + i,
true, values, alpha, beta);
best = min(best, val);
beta = min(beta, best); // 本层对beta进行决策,minimizer方试图让beta最小
// Alpha Beta Pruning
if (beta <= alpha) // 此时beta已经比上层的maxmizer方原值alpha更大,又alpha方不会选择更小的beta,可以break
break;
}
return best;
}
}
// Driver Code
int main()
{
int values[8] = { 3, 5, 6, 9, 1, 2, 0, -1 };
cout << "The optimal value is : " << minimax(0, 0, true, values, MIN, MAX);;
return 0;
}
Zobrist Hashing
Zobrist哈希