From 25918b5b666d1c0cbc50c3489b86407aa8500041 Mon Sep 17 00:00:00 2001
From: Seth Freiberg <seth@sethfreiberg.com>
Date: Fri, 20 Mar 2026 07:36:01 -0400
Subject: [PATCH] Self-play: 50 rounds, 0.1s sleep, max GPU utilization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumped from 20 rounds/tier to 50. Reduced sleep from 1s to 0.1s.
GPUs should run near 100% — Ollama queues requests internally.
mortdecai-sites container (CT 650) created on pve112.
Landing page live at mortdec.ai.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 training/scripts/overnight_selfplay.sh | 2 +-
 training/scripts/self_play.py          | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/training/scripts/overnight_selfplay.sh b/training/scripts/overnight_selfplay.sh
index 78e05a1..6ea3ba0 100755
--- a/training/scripts/overnight_selfplay.sh
+++ b/training/scripts/overnight_selfplay.sh
@@ -8,7 +8,7 @@
 MODEL="${1:-mortdecai-v4}"
 RCON_HOST="${2:-192.168.0.244}"
 RCON_PORT="${3:-25578}"
-ROUNDS_PER_TIER=20
+ROUNDS_PER_TIER=50
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 OUTPUT="$SCRIPT_DIR/../../data/processed/self_play.jsonl"
 
diff --git a/training/scripts/self_play.py b/training/scripts/self_play.py
index 688f3d5..7754bb1 100644
--- a/training/scripts/self_play.py
+++ b/training/scripts/self_play.py
@@ -620,7 +620,7 @@ def main():
                 examples = trace_to_training(trace)
                 all_examples.extend(examples)
                 stats["training_examples"] += len(examples)
-                time.sleep(0.5)
+                time.sleep(0.1)
 
         elif tier == 2:
             # Self-critique: model generates prompt + response, RCON validates
@@ -654,7 +654,7 @@ def main():
                 examples = trace_to_training(trace)
                 all_examples.extend(examples)
                 stats["training_examples"] += len(examples)
-                time.sleep(1)
+                time.sleep(0.1)
 
         elif tier == 3:
             # Adversarial: generate prompts in Session A, respond in fresh Session B
@@ -703,7 +703,7 @@ def main():
                 examples = trace_to_training(trace)
                 all_examples.extend(examples)
                 stats["training_examples"] += len(examples)
-                time.sleep(1)
+                time.sleep(0.1)
 
         stats["rounds"] += 1