Files
Mortdecai/training/scripts/overnight_selfplay.sh
T
Seth de14f4a1c8 3-GPU overnight self-play: 3090 Ti + 2080 Ti + RTX 4000
Round-robin load balancing across three Ollama instances:
- 141:11434 (RTX 3090 Ti 24GB)
- 141:11435 (RTX 2080 Ti 11GB) — new second instance
- 179:11434 (RTX 4000 16GB)

Each tier cycles to a different GPU. 3x throughput overnight.
Cycles: Tier 1 drills → Tier 2 self-critique → Tier 3 adversarial → repeat

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 00:54:29 -04:00

60 lines
2.0 KiB
Bash
Executable File

#!/bin/bash
# overnight_selfplay.sh — Run 3-tier self-play overnight on dev server
# Cycles: Tier 1 (1hr) → Tier 2 (1hr) → Tier 3 (1hr) → repeat
#
# Usage: nohup ./overnight_selfplay.sh > /var/log/selfplay_overnight.log 2>&1 &
#
# Kill with: pkill -f overnight_selfplay
MODEL="${1:-mortdecai-v4}"
RCON_HOST="${2:-192.168.0.244}"
RCON_PORT="${3:-25578}"
ROUNDS_PER_TIER=20 # ~20 rounds per hour at ~3min/round
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# Load balance between three Ollama instances / GPUs
OLLAMA_URLS=("http://192.168.0.141:11434" "http://192.168.0.141:11435" "http://192.168.0.179:11434")
# 141:11434 = RTX 3090 Ti (24GB)
# 141:11435 = RTX 2080 Ti (11GB)
# 179:11434 = RTX 4000 (16GB)
URL_IDX=0
next_ollama() {
OLLAMA_URL="${OLLAMA_URLS[$URL_IDX]}"
URL_IDX=$(( (URL_IDX + 1) % ${#OLLAMA_URLS[@]} ))
}
echo "=== Mortdecai Overnight Self-Play ==="
echo "Model: $MODEL"
echo "Ollama instances: ${OLLAMA_URLS[*]}"
echo "RCON: $RCON_HOST:$RCON_PORT"
echo "Rounds per tier: $ROUNDS_PER_TIER"
echo "Cycle: Tier 1 (drills) → Tier 2 (self-critique) → Tier 3 (adversarial) → repeat"
echo "Started: $(date)"
echo "============================================"
CYCLE=0
while true; do
CYCLE=$((CYCLE + 1))
echo ""
echo "=== CYCLE $CYCLE$(date) ==="
for TIER in 1 2 3; do
next_ollama
TIER_NAMES=("" "Command Drills" "Self-Critique" "Adversarial")
echo "--- Tier $TIER: ${TIER_NAMES[$TIER]} (using $OLLAMA_URL) ---"
python3 "$SCRIPT_DIR/self_play.py" \
--model "$MODEL" \
--ollama-url "$OLLAMA_URL" \
--rcon-host "$RCON_HOST" \
--rcon-port "$RCON_PORT" \
--tier "$TIER" \
--rounds "$ROUNDS_PER_TIER" \
--output "$SCRIPT_DIR/../../data/processed/self_play.jsonl"
echo "Tier $TIER done: $(date)"
done
echo "=== CYCLE $CYCLE COMPLETE — $(date) ==="
echo "Self-play data: $(wc -l < "$SCRIPT_DIR/../../data/processed/self_play.jsonl" 2>/dev/null || echo 0) examples"
done