1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
|
#include <math.h>
#include <time.h>
#include "chess.h"
#include "data.h"
#if defined(UNIX)
# include <unistd.h>
#endif
/* last modified 02/26/09 */
/*
*******************************************************************************
* *
* LearnBook() is used to update the book database when a game ends for any *
* reason. It uses the global "learn_value" variable and updates the book *
* based on the moves played and the value that was "learned". *
* *
* The global learn_value has two possible sources. If a game ends with a *
* real result (win, lose or draw) then the learrn_value will be set to a *
* number in the interval {-300, 300} depending on the result. If there is *
* no result (the operator exits the program prior to reaching a conclusion *
* (quit, end, ^C) then we will use the values from the first few searches *
* after leaving book to compute a learrn_value (see LearnValue() comments *
* later in this file). *
* *
*******************************************************************************
*/
void LearnBook() {
int nplies = 0, thisply = 0;
unsigned char buf32[4];
int i, j, cluster;
float book_learn[64], t_learn_value;
/*
************************************************************
* *
* If we have not been "out of book" for N moves, all *
* we need to do is take the search evaluation for the *
* search just completed and tuck it away in the book *
* learning array (book_learn_eval[]) for use later. *
* *
************************************************************
*/
if (!book_file)
return;
if (!learning)
return;
learning = 0;
Print(128, "Updating book database\n");
/*
************************************************************
* *
* Now we build a vector of book learning results. We *
* give every book move below the last point where there *
* were alternatives 100% of the learned score. We give *
* the book move played at that point 100% of the learned *
* score as well. Then we divide the learned score by *
* the number of alternatives, and propagate this score *
* back until there was another alternative, where we do *
* this again and again until we reach the top of the *
* book tree. *
* *
************************************************************
*/
t_learn_value = ((float) learn_value) / 100.0;
for (i = 0; i < 64; i++)
if (learn_nmoves[i] > 1)
nplies++;
nplies = Max(nplies, 1);
for (i = 0; i < 64; i++) {
if (learn_nmoves[i] > 1)
thisply++;
book_learn[i] = t_learn_value * (float) thisply / (float) nplies;
}
/*
************************************************************
* *
* Now find the appropriate cluster, find the key we were *
* passed, and update the resulting learn value. *
* *
************************************************************
*/
for (i = 0; i < 64 && learn_seekto[i]; i++) {
if (learn_seekto[i] > 0) {
fseek(book_file, learn_seekto[i], SEEK_SET);
fread(buf32, 4, 1, book_file);
cluster = BookIn32(buf32);
BookClusterIn(book_file, cluster, book_buffer);
for (j = 0; j < cluster; j++)
if (!(learn_key[i] ^ book_buffer[j].position))
break;
if (j >= cluster)
return;
if (fabs(book_buffer[j].learn) < 0.0001)
book_buffer[j].learn = book_learn[i];
else
book_buffer[j].learn = (book_buffer[j].learn + book_learn[i]) / 2.0;
fseek(book_file, learn_seekto[i] + 4, SEEK_SET);
BookClusterOut(book_file, cluster, book_buffer);
fflush(book_file);
}
}
}
/* last modified 02/26/09 */
/*
*******************************************************************************
* *
* LearnFunction() is called to compute the adjustment value added to the *
* learn counter in the opening book. It takes three pieces of information *
* into consideration to do this: the search value, the search depth that *
* produced this value, and the rating difference (Crafty-opponent) so that *
* + numbers means Crafty is expected to win, - numbers mean Crafty is ex- *
* pected to lose. *
* *
*******************************************************************************
*/
int LearnFunction(int sv, int search_depth, int rating_difference,
int trusted_value) {
static const float rating_mult_t[11] = { .00625, .0125, .025, .05, .075, .1,
0.15, 0.2, 0.25, 0.3, 0.35
};
static const float rating_mult_ut[11] = { .25, .2, .15, .1, .05, .025, .012,
.006, .003, .001
};
float multiplier;
int sd, rd;
sd = Max(Min(search_depth - 10, 19), 0);
rd = Max(Min(rating_difference / 200, 5), -5) + 5;
if (trusted_value)
multiplier = rating_mult_t[rd] * sd;
else
multiplier = rating_mult_ut[rd] * sd;
sv = Max(Min(sv, 600), -600);
return ((int) (sv * multiplier));
}
/* last modified 02/26/09 */
/*
*******************************************************************************
* *
* LearnValue() is used to monitor the scores over the first N moves out of *
* book. After these moves have been played, the evaluations are then used *
* to decide whether the last book move played was a reasonable choice or *
* not. (N is set by the #define LEARN_INTERVAL definition.) *
* *
* This procedure does not directly update the book. Rather, it sets the *
* global learn_value variable to represent the goodness or badness of the *
* position where we left the opening book. This will be used later to *
* update the book in the event the game ends without any sort of actual *
* result. In a normal situation, we will base our learning on the result *
* of the game, win-lose-draw. But it is possible that the game ends before *
* the final result is known. In this case, we will use the score from the *
* learn_value we compute here so that we learn _something_ from playing a *
* game fragment. *
* *
* There are three cases to be handled. (1) If the evaluation is bad right *
* out of book, or it drops enough to be considered a bad line, then the *
* book move will have its "learn" value reduced to discourage playing this *
* move again. (2) If the evaluation is even after N moves, then the learn *
* value will be increased, but by a relatively modest amount, so that a few *
* even results will offset one bad result. (3) If the evaluation is very *
* good after N moves, the learn value will be increased by a large amount *
* so that this move will be favored the next time the game is played. *
* *
*******************************************************************************
*/
void LearnValue(int search_value, int search_depth) {
int i;
int interval;
int best_eval = -999999, best_eval_p = 0;
int worst_eval = 999999, worst_eval_p = 0;
int best_after_worst_eval = -999999, worst_after_best_eval = 999999;
/*
************************************************************
* *
* If we have not been "out of book" for N moves, all *
* we need to do is take the search evaluation for the *
* search just completed and tuck it away in the book *
* learning array (book_learn_eval[]) for use later. *
* *
************************************************************
*/
if (!book_file)
return;
if (!learning || learn_value != 0)
return;
if (moves_out_of_book <= LEARN_INTERVAL) {
if (moves_out_of_book) {
book_learn_eval[moves_out_of_book - 1] = search_value;
book_learn_depth[moves_out_of_book - 1] = search_depth;
}
}
/*
************************************************************
* *
* Check the evaluations we've seen so far. If they are *
* within reason (+/- 1/3 of a pawn or so) we simply keep *
* playing and leave the book alone. If the eval is much *
* better or worse, we need to update the learning data. *
* *
************************************************************
*/
else if (moves_out_of_book == LEARN_INTERVAL + 1) {
if (moves_out_of_book < 1)
return;
Print(128, "LearnBook() executed\n");
interval = Min(LEARN_INTERVAL, moves_out_of_book);
if (interval < 2)
return;
for (i = 0; i < interval; i++) {
if (book_learn_eval[i] > best_eval) {
best_eval = book_learn_eval[i];
best_eval_p = i;
}
if (book_learn_eval[i] < worst_eval) {
worst_eval = book_learn_eval[i];
worst_eval_p = i;
}
}
if (best_eval_p < interval - 1) {
for (i = best_eval_p; i < interval; i++)
if (book_learn_eval[i] < worst_after_best_eval)
worst_after_best_eval = book_learn_eval[i];
} else
worst_after_best_eval = book_learn_eval[interval - 1];
if (worst_eval_p < interval - 1) {
for (i = worst_eval_p; i < interval; i++)
if (book_learn_eval[i] > best_after_worst_eval)
best_after_worst_eval = book_learn_eval[i];
} else
best_after_worst_eval = book_learn_eval[interval - 1];
#if defined(DEBUG)
Print(128, "Learning analysis ...\n");
Print(128, "worst=%d best=%d baw=%d wab=%d\n", worst_eval, best_eval,
best_after_worst_eval, worst_after_best_eval);
for (i = 0; i < interval; i++)
Print(128, "%d(%d) ", book_learn_eval[i], book_learn_depth[i]);
Print(128, "\n");
#endif
/*
************************************************************
* *
* We now have the best eval for the first N moves out *
* of book, the worst eval for the first N moves out of *
* book, and the worst eval that follows the best eval. *
* This will be used to recognize the following cases of *
* results that follow a book move: *
* *
************************************************************
*/
/*
************************************************************
* *
* (1) The best score is very good, and it doesn't drop *
* after following the game further. This case detects *
* those moves in book that are "good" and should be *
* played whenever possible, while avoiding the sound *
* gambits that leave us ahead in material for a short *
* while until the score starts to drop as the gambit *
* begins to show its effect. *
* *
************************************************************
*/
if (best_eval == best_after_worst_eval) {
learn_value = best_eval;
for (i = 0; i < interval; i++)
if (learn_value == book_learn_eval[i])
search_depth = Max(search_depth, book_learn_depth[i]);
}
/*
************************************************************
* *
* (2) The worst score is bad, and doesn't improve any *
* after the worst point, indicating that the book move *
* chosen was "bad" and should be avoided in the future. *
* *
************************************************************
*/
else if (worst_eval == worst_after_best_eval) {
learn_value = worst_eval;
for (i = 0; i < interval; i++)
if (learn_value == book_learn_eval[i])
search_depth = Max(search_depth, book_learn_depth[i]);
}
/*
************************************************************
* *
* (3) Things seem even out of book and remain that way *
* for N moves. We will just average the 10 scores and *
* use that as an approximation. *
* *
************************************************************
*/
else {
learn_value = 0;
search_depth = 0;
for (i = 0; i < interval; i++) {
learn_value += book_learn_eval[i];
search_depth += book_learn_depth[i];
}
learn_value /= interval;
search_depth /= interval;
}
learn_value =
LearnFunction(learn_value, search_depth,
crafty_rating - opponent_rating, learn_value < 0);
}
}
|