Negamax的实现似乎不能用于tic-tac-toe

我已经实现了Negamax,因为它可以在wikipedia上find,其中包括alpha / beta修剪。

但是,似乎有利于失败,这应该是一个无效的结果。

游戏是Tic-Tac-Toe,我已经抽象了大部分游戏,因此在algorithm中发现一个错误应该是相当容易的。

下面是code, nextMovenegamax或者evaluate可能是包含错误的函数:

 #include <list> #include <climits> #include <iostream> //#define DEBUG 1 using namespace std; struct Move { int row, col; Move(int row, int col) : row(row), col(col) { } Move(const Move& m) { row = m.row; col = m.col; } }; struct Board { char player; char opponent; char board[3][3]; Board() { } void read(istream& stream) { stream >> player; opponent = player == 'X' ? 'O' : 'X'; for(int row = 0; row < 3; row++) { for(int col = 0; col < 3; col++) { char playa; stream >> playa; board[row][col] = playa == '_' ? 0 : playa == player ? 1 : -1; } } } void print(ostream& stream) { for(int row = 0; row < 3; row++) { for(int col = 0; col < 3; col++) { switch(board[row][col]) { case -1: stream << opponent; break; case 0: stream << '_'; break; case 1: stream << player; break; } } stream << endl; } } void do_move(const Move& move, int player) { board[move.row][move.col] = player; } void undo_move(const Move& move) { board[move.row][move.col] = 0; } bool isWon() { if (board[0][0] != 0) { if (board[0][0] == board[0][1] && board[0][1] == board[0][2]) return true; if (board[0][0] == board[1][0] && board[1][0] == board[2][0]) return true; } if (board[2][2] != 0) { if (board[2][0] == board[2][1] && board[2][1] == board[2][2]) return true; if (board[0][2] == board[1][2] && board[1][2] == board[2][2]) return true; } if (board[1][1] != 0) { if (board[0][1] == board[1][1] && board[1][1] == board[2][1]) return true; if (board[1][0] == board[1][1] && board[1][1] == board[1][2]) return true; if (board[0][0] == board[1][1] && board[1][1] == board[2][2]) return true; if (board[0][2] == board [1][1] && board[1][1] == board[2][0]) return true; } return false; } list<Move> getMoves() { list<Move> moveList; for(int row = 0; row < 3; row++) for(int col = 0; col < 3; col++) if (board[row][col] == 0) moveList.push_back(Move(row, col)); return moveList; } }; ostream& operator<< (ostream& stream, Board& board) { board.print(stream); return stream; } istream& operator>> (istream& stream, Board& board) { board.read(stream); return stream; } int evaluate(Board& board) { int score = board.isWon() ? 100 : 0; for(int row = 0; row < 3; row++) for(int col = 0; col < 3; col++) if (board.board[row][col] == 0) score += 1; return score; } int negamax(Board& board, int depth, int player, int alpha, int beta) { if (board.isWon() || depth <= 0) { #if DEBUG > 1 cout << "Found winner board at depth " << depth << endl; cout << board << endl; #endif return player * evaluate(board); } list<Move> allMoves = board.getMoves(); if (allMoves.size() == 0) return player * evaluate(board); for(list<Move>::iterator it = allMoves.begin(); it != allMoves.end(); it++) { board.do_move(*it, -player); int val = -negamax(board, depth - 1, -player, -beta, -alpha); board.undo_move(*it); if (val >= beta) return val; if (val > alpha) alpha = val; } return alpha; } void nextMove(Board& board) { list<Move> allMoves = board.getMoves(); Move* bestMove = NULL; int bestScore = INT_MIN; for(list<Move>::iterator it = allMoves.begin(); it != allMoves.end(); it++) { board.do_move(*it, 1); int score = -negamax(board, 100, 1, INT_MIN + 1, INT_MAX); board.undo_move(*it); #if DEBUG cout << it->row << ' ' << it->col << " = " << score << endl; #endif if (score > bestScore) { bestMove = &*it; bestScore = score; } } if (!bestMove) return; cout << bestMove->row << ' ' << bestMove->col << endl; #if DEBUG board.do_move(*bestMove, 1); cout << board; #endif } int main() { Board board; cin >> board; #if DEBUG cout << "Starting board:" << endl; cout << board; #endif nextMove(board); return 0; } 

给出这个input:

 O X__ ___ ___ 

该algorithmselect放置一块在0,1,导致一个保证的损失,做这个陷阱(没有什么可以做到赢或结束平局):

 XO_ X__ ___ 

也许这与评估function有关系? 如果是这样,我怎么解决它?

问题解决了!

evaluate职能不是从玩家角度评估董事会,而是从全球角度来评估董事会。 一旦我解决这个问题,negamaxalgorithm就像一个魅力!