Mortdecai/training/scripts/overnight_selfplay.sh

#!/bin/bash
# overnight_selfplay.sh — Run 3-tier self-play overnight, all three GPUs in parallel
# Each GPU runs a different tier simultaneously, then they rotate.
#
# Usage: nohup ./overnight_selfplay.sh > /var/log/selfplay_overnight.log 2>&1 &
# Kill with: pkill -f overnight_selfplay ; pkill -f self_play.py

MODEL="${1:-mortdecai:0.4.0}"
RCON_HOST="${2:-192.168.0.244}"
RCON_PORT="${3:-25578}"
ROUNDS_PER_TIER=50
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
OUTPUT="$SCRIPT_DIR/../../data/processed/self_play.jsonl"

# Three GPUs
GPU1="http://192.168.0.141:11434"  # RTX 3090 Ti
GPU2="http://192.168.0.141:11435"  # RTX 2080 Ti
GPU3="http://192.168.0.179:11434"  # RTX 4000

echo "=== Mortdecai Overnight Self-Play (3 GPU Parallel) ==="
echo "Model: $MODEL"
echo "GPU1: $GPU1 (RTX 3090 Ti)"
echo "GPU2: $GPU2 (RTX 2080 Ti)"
echo "GPU3: $GPU3 (RTX 4000)"
echo "RCON: $RCON_HOST:$RCON_PORT"
echo "Rounds per tier: $ROUNDS_PER_TIER"
echo "All three tiers run simultaneously, rotating GPUs each cycle"
echo "Started: $(date)"
echo "============================================"

run_tier() {
    local tier=$1
    local gpu=$2
    local tier_names=("" "Drills" "Self-Critique" "Adversarial")
    echo "[Tier $tier - ${tier_names[$tier]}] Starting on $gpu at $(date)"
    python3 "$SCRIPT_DIR/self_play.py" \
        --model "$MODEL" \
        --ollama-url "$gpu" \
        --rcon-host "$RCON_HOST" \
        --rcon-port "$RCON_PORT" \
        --tier "$tier" \
        --rounds "$ROUNDS_PER_TIER" \
        --output "$OUTPUT" 2>&1 | while read line; do
            echo "[Tier $tier] $line"
        done
    echo "[Tier $tier - ${tier_names[$tier]}] Done at $(date)"
}

CYCLE=0
while true; do
    CYCLE=$((CYCLE + 1))
    echo ""
    echo "=== CYCLE $CYCLE — $(date) ==="
    echo "Examples so far: $(wc -l < "$OUTPUT" 2>/dev/null || echo 0)"

    # Rotate GPU assignments each cycle
    case $((CYCLE % 3)) in
        0) T1_GPU=$GPU1; T2_GPU=$GPU2; T3_GPU=$GPU3 ;;
        1) T1_GPU=$GPU2; T2_GPU=$GPU3; T3_GPU=$GPU1 ;;
        2) T1_GPU=$GPU3; T2_GPU=$GPU1; T3_GPU=$GPU2 ;;
    esac

    # Run all three tiers in parallel
    run_tier 1 "$T1_GPU" &
    PID1=$!
    run_tier 2 "$T2_GPU" &
    PID2=$!
    run_tier 3 "$T3_GPU" &
    PID3=$!

    # Wait for all three to finish
    wait $PID1 $PID2 $PID3

    echo "=== CYCLE $CYCLE COMPLETE — $(date) ==="
    echo "Total examples: $(wc -l < "$OUTPUT" 2>/dev/null || echo 0)"
    echo ""
done