Parallel 3-GPU self-play: all tiers run simultaneously

Each cycle runs all three tiers at the same time on different GPUs: - Tier 1 (drills) on GPU A - Tier 2 (self-critique) on GPU B - Tier 3 (adversarial) on GPU C GPU assignments rotate each cycle for even wear. 3x throughput vs sequential. RCON handles concurrent commands. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 00:55:24 -04:00
parent de14f4a1c8
commit 3580d350b4
1 changed files with 52 additions and 34 deletions
@@ -1,59 +1,77 @@
 #!/bin/bash
-# overnight_selfplay.sh — Run 3-tier self-play overnight on dev server
-# Cycles: Tier 1 (1hr) → Tier 2 (1hr) → Tier 3 (1hr) → repeat
+# overnight_selfplay.sh — Run 3-tier self-play overnight, all three GPUs in parallel
+# Each GPU runs a different tier simultaneously, then they rotate.
 #
 # Usage: nohup ./overnight_selfplay.sh > /var/log/selfplay_overnight.log 2>&1 &
-#
-# Kill with: pkill -f overnight_selfplay
+# Kill with: pkill -f overnight_selfplay ; pkill -f self_play.py

 MODEL="${1:-mortdecai-v4}"
 RCON_HOST="${2:-192.168.0.244}"
 RCON_PORT="${3:-25578}"
-ROUNDS_PER_TIER=20  # ~20 rounds per hour at ~3min/round
+ROUNDS_PER_TIER=20
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+OUTPUT="$SCRIPT_DIR/../../data/processed/self_play.jsonl"

-# Load balance between three Ollama instances / GPUs
-OLLAMA_URLS=("http://192.168.0.141:11434" "http://192.168.0.141:11435" "http://192.168.0.179:11434")
-# 141:11434 = RTX 3090 Ti (24GB)
-# 141:11435 = RTX 2080 Ti (11GB)
-# 179:11434 = RTX 4000 (16GB)
-URL_IDX=0
+# Three GPUs
+GPU1="http://192.168.0.141:11434"  # RTX 3090 Ti
+GPU2="http://192.168.0.141:11435"  # RTX 2080 Ti
+GPU3="http://192.168.0.179:11434"  # RTX 4000

-next_ollama() {
-    OLLAMA_URL="${OLLAMA_URLS[$URL_IDX]}"
-    URL_IDX=$(( (URL_IDX + 1) % ${#OLLAMA_URLS[@]} ))
-}
-
-echo "=== Mortdecai Overnight Self-Play ==="
+echo "=== Mortdecai Overnight Self-Play (3 GPU Parallel) ==="
 echo "Model: $MODEL"
-echo "Ollama instances: ${OLLAMA_URLS[*]}"
+echo "GPU1: $GPU1 (RTX 3090 Ti)"
+echo "GPU2: $GPU2 (RTX 2080 Ti)"
+echo "GPU3: $GPU3 (RTX 4000)"
 echo "RCON: $RCON_HOST:$RCON_PORT"
 echo "Rounds per tier: $ROUNDS_PER_TIER"
-echo "Cycle: Tier 1 (drills) → Tier 2 (self-critique) → Tier 3 (adversarial) → repeat"
+echo "All three tiers run simultaneously, rotating GPUs each cycle"
 echo "Started: $(date)"
 echo "============================================"

+run_tier() {
+    local tier=$1
+    local gpu=$2
+    local tier_names=("" "Drills" "Self-Critique" "Adversarial")
+    echo "[Tier $tier - ${tier_names[$tier]}] Starting on $gpu at $(date)"
+    python3 "$SCRIPT_DIR/self_play.py" \
+        --model "$MODEL" \
+        --ollama-url "$gpu" \
+        --rcon-host "$RCON_HOST" \
+        --rcon-port "$RCON_PORT" \
+        --tier "$tier" \
+        --rounds "$ROUNDS_PER_TIER" \
+        --output "$OUTPUT" 2>&1 | while read line; do
+            echo "[Tier $tier] $line"
+        done
+    echo "[Tier $tier - ${tier_names[$tier]}] Done at $(date)"
+}
+
 CYCLE=0
 while true; do
    CYCLE=$((CYCLE + 1))
    echo ""
    echo "=== CYCLE $CYCLE — $(date) ==="
+    echo "Examples so far: $(wc -l < "$OUTPUT" 2>/dev/null || echo 0)"

-    for TIER in 1 2 3; do
-        next_ollama
-        TIER_NAMES=("" "Command Drills" "Self-Critique" "Adversarial")
-        echo "--- Tier $TIER: ${TIER_NAMES[$TIER]} (using $OLLAMA_URL) ---"
-        python3 "$SCRIPT_DIR/self_play.py" \
-            --model "$MODEL" \
-            --ollama-url "$OLLAMA_URL" \
-            --rcon-host "$RCON_HOST" \
-            --rcon-port "$RCON_PORT" \
-            --tier "$TIER" \
-            --rounds "$ROUNDS_PER_TIER" \
-            --output "$SCRIPT_DIR/../../data/processed/self_play.jsonl"
-        echo "Tier $TIER done: $(date)"
-    done
+    # Rotate GPU assignments each cycle
+    case $((CYCLE % 3)) in
+        0) T1_GPU=$GPU1; T2_GPU=$GPU2; T3_GPU=$GPU3 ;;
+        1) T1_GPU=$GPU2; T2_GPU=$GPU3; T3_GPU=$GPU1 ;;
+        2) T1_GPU=$GPU3; T2_GPU=$GPU1; T3_GPU=$GPU2 ;;
+    esac
+
+    # Run all three tiers in parallel
+    run_tier 1 "$T1_GPU" &
+    PID1=$!
+    run_tier 2 "$T2_GPU" &
+    PID2=$!
+    run_tier 3 "$T3_GPU" &
+    PID3=$!
+
+    # Wait for all three to finish
+    wait $PID1 $PID2 $PID3

    echo "=== CYCLE $CYCLE COMPLETE — $(date) ==="
-    echo "Self-play data: $(wc -l < "$SCRIPT_DIR/../../data/processed/self_play.jsonl" 2>/dev/null || echo 0) examples"
+    echo "Total examples: $(wc -l < "$OUTPUT" 2>/dev/null || echo 0)"
+    echo ""
 done