diff --git a/training/scripts/overnight_selfplay.sh b/training/scripts/overnight_selfplay.sh index 78e05a1..6ea3ba0 100755 --- a/training/scripts/overnight_selfplay.sh +++ b/training/scripts/overnight_selfplay.sh @@ -8,7 +8,7 @@ MODEL="${1:-mortdecai-v4}" RCON_HOST="${2:-192.168.0.244}" RCON_PORT="${3:-25578}" -ROUNDS_PER_TIER=20 +ROUNDS_PER_TIER=50 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" OUTPUT="$SCRIPT_DIR/../../data/processed/self_play.jsonl" diff --git a/training/scripts/self_play.py b/training/scripts/self_play.py index 688f3d5..7754bb1 100644 --- a/training/scripts/self_play.py +++ b/training/scripts/self_play.py @@ -620,7 +620,7 @@ def main(): examples = trace_to_training(trace) all_examples.extend(examples) stats["training_examples"] += len(examples) - time.sleep(0.5) + time.sleep(0.1) elif tier == 2: # Self-critique: model generates prompt + response, RCON validates @@ -654,7 +654,7 @@ def main(): examples = trace_to_training(trace) all_examples.extend(examples) stats["training_examples"] += len(examples) - time.sleep(1) + time.sleep(0.1) elif tier == 3: # Adversarial: generate prompts in Session A, respond in fresh Session B @@ -703,7 +703,7 @@ def main(): examples = trace_to_training(trace) all_examples.extend(examples) stats["training_examples"] += len(examples) - time.sleep(1) + time.sleep(0.1) stats["rounds"] += 1