generate.sh: improve portability and performance, use randomness using a seed - chess-puzzles - chess puzzle book generator
(HTM) git clone git://git.codemadness.org/chess-puzzles
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 667fa14261d797a2c04938992a4efa5061e558a2
(DIR) parent b9e10f90912e4d6c82e4a4738a2fcdbd77b0d6db
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Thu, 21 Dec 2023 18:14:27 +0100
generate.sh: improve portability and performance, use randomness using a seed
- Replace shuf with a custom shuffling using awk and sort -R.
- Use a random seed to generate random, but in a deterministic way.
Allows regenerating the same output (atleast on the same machine).
- Generating the puzzles is faster, tested on a machine: 10s to 3.5s
- Show an error message when the CSV database file doesn't exist yet.
Diffstat:
M generate.sh | 92 +++++++++++++++++++------------
1 file changed, 58 insertions(+), 34 deletions(-)
---
(DIR) diff --git a/generate.sh b/generate.sh
@@ -1,12 +1,47 @@
#!/bin/sh
fenbin="./fen"
+db="lichess_db_puzzle.csv"
+
+if ! test -f "$db"; then
+ printf 'File "%s" not found, run `make db` to update it\n' "$db" >&2
+ exit 1
+fi
index="puzzles/index.html"
rm -rf puzzles
mkdir -p puzzles/solutions
solutions="$(mktemp)"
+seedfile="$(mktemp)"
+seed=20231221 # must be a integer value
+# seed for random sorting, makes it deterministic for the same system
+# seed must be sufficiently long.
+echo "${seed}_chess_puzzles" > "$seedfile"
+
+# shuffle(file, amount)
+shuffle() {
+ f="$1"
+ total="$2"
+ nlines="$(wc -l < "$f")"
+ nlines="$((nlines + 0))"
+ results="$(mktemp)"
+
+# generate list of lines to use. Not perfectly random but good enough.
+LC_ALL=C awk -v "seed=$seed" -v "nlines=$nlines" -v "total=$total" '
+BEGIN {
+ srand(seed);
+ for (i = 0; i < total; i++)
+ sel[int(rand() * nlines)] = 1;
+}
+sel[NR] {
+ print $0;
+}' "$f" > "$results"
+
+ # now we have less results we can use the slow sort -R.
+ sort -R --random-source "$seedfile" "$results"
+ rm -f "$results"
+}
cat > "$index" <<!
<!DOCTYPE html>
@@ -38,42 +73,30 @@ footer {
!
# shuffle, some sort of order and point system based on rating of puzzle.
-db="lichess_db_puzzle.csv"
count=1
-(grep 'mateIn1' < "$db" | shuf -n 100 | sed 10q
-grep 'mateIn2' < "$db" | shuf -n 100 | sed 10q
-grep 'mateIn3' < "$db" | shuf -n 100 | sed 10q
-grep 'mateIn4' < "$db" | shuf -n 100 | sed 10q
-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) < 2000 { print $0 }' "$db" | shuf -n 100 | sed 5q
-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) >= 2000 { print $0 }' "$db" | shuf -n 100 | sed 3q
-LC_ALL=C awk -F ',' '(" " $8 " ") ~ / mateIn5 / && int($4) >= 2700 { print $0 }' "$db" | shuf -n 100 | sed 2q
-) |
-LC_ALL=C awk -F ',' '
-{
- points="1 point"; # default
-}
-(" " $8 " ") ~ / mateIn2 / {
- points="2 points";
-}
-(" " $8 " ") ~ / mateIn3 / {
- points="3 points";
-}
-(" " $8 " ") ~ / mateIn4 / {
- points="4 points";
-}
-(" " $8 " ") ~ / mateIn5 / && int($4) < 2000 {
- points="5 points";
-}
-(" " $8 " ") ~ / mateIn5 / && int($4) >= 2000 {
- points="7 points";
-}
-(" " $8 " ") ~ / mateIn5 / && int($4) >= 2700 {
- points="10 points";
-}
-{
- print $0 "," points;
-}' | \
+groupsdir="$(mktemp -d)"
+test "$groupsdir" = "" && exit 1
+
+grep 'mateIn1' "$db" > "$groupsdir/matein1.csv"
+grep 'mateIn2' "$db" > "$groupsdir/matein2.csv"
+grep 'mateIn3' "$db" > "$groupsdir/matein3.csv"
+grep 'mateIn4' "$db" > "$groupsdir/matein4.csv"
+grep 'mateIn5' "$db" > "$groupsdir/matein5.csv"
+LC_ALL=C awk -F ',' 'int($4) < 2000 { print $0 }' "$groupsdir/matein5.csv" > "$groupsdir/matein5_lt_2000.csv"
+LC_ALL=C awk -F ',' 'int($4) >= 2000 { print $0 }' "$groupsdir/matein5.csv" > "$groupsdir/matein5_ge_2000.csv"
+LC_ALL=C awk -F ',' 'int($4) >= 2700 { print $0 }' "$groupsdir/matein5.csv" > "$groupsdir/matein5_ge_2700.csv"
+
+(
+shuffle "$groupsdir/matein1.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",1 point" }'
+shuffle "$groupsdir/matein2.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",2 points" }'
+shuffle "$groupsdir/matein3.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",3 points" }'
+shuffle "$groupsdir/matein4.csv" 100 | sed 10q | LC_ALL=C awk '{ print $0 ",4 points" }'
+shuffle "$groupsdir/matein5_lt_2000.csv" 100 | sed 5q | LC_ALL=C awk '{ print $0 ",5 points" }'
+shuffle "$groupsdir/matein5_ge_2000.csv" | sed 3q | LC_ALL=C awk '{ print $0 ",7 points" }'
+shuffle "$groupsdir/matein5_ge_2700.csv" | sed 2q | LC_ALL=C awk '{ print $0 ",10 points" }'
+rm -rf "$groupsdir"
+) | \
while read -r line; do
i="$count"
fen=$(printf '%s' "$line" | cut -f 2 -d ',')
@@ -196,3 +219,4 @@ cat >> "$index" <<!
!
rm -f "$solutions"
+rm -f "$seedfile"