1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
# An opening book needs to be built as follows:
# - install "pgn-extract" and "polyglot"
# - collect many relevant games into a single PGN file "input.pgn"
# - remove the variant games (960, atomic...) from the file
# - solve the errors given by: pgn-extract -s -r input.pgn
# - create a file as "filter.txt":
# WhiteElo >= "1800"
# BlackElo >= "1800"
# - extract the best games: pgn-extract -tfilter.txt --notags --nocomments --nonags --novars -bl20 --plylimit 26 -s -owip.pgn input.pgn
# - create the opening book: polyglot make-book -min-game 10 -pgn wip.pgn -bin pychess_book.bin
# - merge the books (if needed): polyglot merge-book -in1 book1.bin -in2 book2.bin -out book.bin
#
# The opening book does not contain the names. They are stored in the separate file "eco.db" by
# running the current script. If a name refers to a position that is not part of the opening book,
# it cannot be displayed. If it refers to a shared position, the name is selected according to some
# priority rules. The source ECO file must also be sorted by ECO (at least) to be able to load the
# other languages.
#
# The current book supports 99.5% of the ECO names written in English.
import sys
import os
import sqlite3
from pychess.Savers.pgn import load
from pychess.System.protoopen import protoopen
from pychess.System.prefix import addDataPrefix
from pychess.Utils.eco import ECO_MAIN_LANG, ECO_LANGS
from pychess.Variants.fischerandom import FischerandomBoard
path = os.path.join(addDataPrefix("eco.db"))
conn = sqlite3.connect(path)
if __name__ == "__main__":
print("Creating the database")
c = conn.cursor()
c.execute("drop table if exists openings")
c.execute(
"create table openings (hash text, hkey integer, mainline integer, endline integer, eco text, lang text, opening text, variation text, fen text)"
)
c.execute("create index if not exists openings_index on openings (hkey)")
def feed(pgnfile, lang):
# Check the existence of the file
if not os.path.isfile(pgnfile):
return
# Load the ECO file first
print(" - Parsing")
cf = load(protoopen(pgnfile))
cf.limit = 5000
cf.init_tag_database()
records, plys = cf.get_records()
# Cache the content
entries = []
plyMax = 0
old_eco = ""
for rec in records:
model = cf.loadToModel(rec)
eco = "" if rec["ECO"] is None else rec["ECO"]
entry = {
"h": [], # Hashes
"f": "", # Final hash of the line
"n": [], # FENs
"m": old_eco
!= eco, # Main line = shortest sequence of moves for the ECO code. The 'EN' ECO file is specially crafted
"e": eco, # ECO
"o": "" if rec["White"] is None else rec["White"], # Opening
"v": "" if rec["Black"] is None else rec["Black"], # Variation
"p": len(model.moves),
} # Number of plies
plyMax = max(plyMax, entry["p"])
# No move means that we are translating the name of the ECO code, so we need to find all the related positions from another language
if entry["p"] == 0:
if lang == ECO_MAIN_LANG:
continue
c.execute(
"select hash, endline, fen from openings where eco=? and lang=? and mainline=1",
(eco, ECO_MAIN_LANG),
)
rows = c.fetchall()
for row in rows:
entry["h"].append(row[0])
if row[1] == int(True):
entry["f"] = row[0]
entry["n"].append(row[2])
else:
# Find the Polyglot hash for each position of the opening
for i in range(entry["p"]):
nextboard = model.getBoardAtPly(i, 0).board.next
h = hex(nextboard.hash)[2:]
entry["h"].append(h)
entry["f"] = h
entry["n"].append(nextboard.asFen())
entries.append(entry)
old_eco = entry["e"]
print(" - Max ply : %d" % plyMax)
# Process all the data in reverse order
for depth in reversed(range(plyMax + 1)):
sys.stdout.write("\r - Loading into the database (%d remaining) " % depth)
sys.stdout.flush()
for i in reversed(
range(len(entries))
): # Long lines are overwritten by short lines
entry = entries[i]
if entry["p"] != depth:
continue
for i in range(len(entry["h"])):
h = entry["h"][i]
hkey = int(h[-2:], 16)
c.execute(
"select endline from openings where hash=? and hkey=? and lang=?",
(h, hkey, lang),
)
r = c.fetchone()
if r is not None and r[0] == int(True):
continue
c.execute(
"delete from openings where hash=? and hkey=? and lang=?",
(h, hkey, lang),
)
c.execute(
"insert into openings (hash, hkey, mainline, endline, eco, lang, opening, variation, fen) values (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(
h,
hkey,
int(entry["m"]),
int(h == entry["f"]),
entry["e"],
lang,
entry["o"],
entry["v"],
entry["n"][i],
),
)
conn.commit()
print("\n - Processed %d openings" % len(entries))
# Several eco lists contain only eco+name pairs
# We use the base ECO line positions from EN/eco.pgn
# English is first in ECO_LANGS for that reason
for lang in ECO_LANGS:
print("Processing %s" % lang.upper())
feed("lang/%s/eco.pgn" % lang, lang)
# Start positions for Chess960
print("Processing Chess960")
chess960 = FischerandomBoard()
for i in range(960):
c.execute(
"insert into openings (mainline, endline, eco, lang, opening, fen) values (?, '1', '960', ?, ?, ?)",
(
"1" if i == 518 else "0",
ECO_MAIN_LANG,
"Chess%.3d" % (i + 1),
chess960.getFrcFen(i + 1),
),
)
conn.close()
|