From 25918b5b666d1c0cbc50c3489b86407aa8500041 Mon Sep 17 00:00:00 2001 From: Seth Freiberg Date: Fri, 20 Mar 2026 07:36:01 -0400 Subject: [PATCH] Self-play: 50 rounds, 0.1s sleep, max GPU utilization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumped from 20 rounds/tier to 50. Reduced sleep from 1s to 0.1s. GPUs should run near 100% — Ollama queues requests internally. mortdecai-sites container (CT 650) created on pve112. Landing page live at mortdec.ai. Co-Authored-By: Claude Opus 4.6 (1M context) --- training/scripts/overnight_selfplay.sh | 2 +- training/scripts/self_play.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/training/scripts/overnight_selfplay.sh b/training/scripts/overnight_selfplay.sh index 78e05a1..6ea3ba0 100755 --- a/training/scripts/overnight_selfplay.sh +++ b/training/scripts/overnight_selfplay.sh @@ -8,7 +8,7 @@ MODEL="${1:-mortdecai-v4}" RCON_HOST="${2:-192.168.0.244}" RCON_PORT="${3:-25578}" -ROUNDS_PER_TIER=20 +ROUNDS_PER_TIER=50 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" OUTPUT="$SCRIPT_DIR/../../data/processed/self_play.jsonl" diff --git a/training/scripts/self_play.py b/training/scripts/self_play.py index 688f3d5..7754bb1 100644 --- a/training/scripts/self_play.py +++ b/training/scripts/self_play.py @@ -620,7 +620,7 @@ def main(): examples = trace_to_training(trace) all_examples.extend(examples) stats["training_examples"] += len(examples) - time.sleep(0.5) + time.sleep(0.1) elif tier == 2: # Self-critique: model generates prompt + response, RCON validates @@ -654,7 +654,7 @@ def main(): examples = trace_to_training(trace) all_examples.extend(examples) stats["training_examples"] += len(examples) - time.sleep(1) + time.sleep(0.1) elif tier == 3: # Adversarial: generate prompts in Session A, respond in fresh Session B @@ -703,7 +703,7 @@ def main(): examples = trace_to_training(trace) all_examples.extend(examples) stats["training_examples"] += len(examples) - time.sleep(1) + time.sleep(0.1) stats["rounds"] += 1