Swarm bots, RCON validation, Haiku distillation complete

Swarm bots (ingame/swarm_bots.js):
- 10 survival bots with generated names (SwiftWolf, DarkWolf, etc.)
- All bots wander, take damage, auto-respawn, pray when hurt
- Gemini + Dolphin(5%) + Multilingual(3%) prompt generation
- 20-60s interaction interval per bot

Distillation results:
- 222 sudo examples via Haiku ($0.28)
- 122 god examples via Haiku ($0.37) — with God Soul personality
- Total: 344 distilled, $0.65 spent of $5 budget
- RCON validation: 74.7% fully valid, 30 real errors out of ~1000 commands

validate_distilled.py:
- Executes distilled commands on live server via RCON
- Distinguishes real errors from benign (no player online)
- Tags each example with validation status

Dev server switched to Claude Haiku via Anthropic API:
- llm_provider: anthropic with $5 budget cap
- Auto-fallback to Ollama when budget exhausted
- Cost tracking with logging

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-18 19:18:19 -04:00
parent 961f53ea7d
commit 65ee146043
5 changed files with 1224 additions and 2 deletions
+235
View File
@@ -0,0 +1,235 @@
#!/usr/bin/env python3
"""
validate_distilled.py — Execute distilled Claude responses on a live server via RCON.
Takes distilled.jsonl, executes each example's commands on the dev server,
captures RCON results, and writes validated training pairs to output.
This creates the strongest training signal: input → Claude's commands → actual server result.
Usage:
python3 training/scripts/validate_distilled.py # run all
python3 training/scripts/validate_distilled.py --dry-run # preview
python3 training/scripts/validate_distilled.py --max 10 # first 10 only
python3 training/scripts/validate_distilled.py --rcon-host 192.168.0.244 --rcon-port 25578
"""
import argparse
import json
import re
import sys
import time
from pathlib import Path
from mcrcon import MCRcon
ROOT = Path(__file__).resolve().parent.parent.parent
DISTILLED = ROOT / "data" / "processed" / "distilled.jsonl"
OUTPUT = ROOT / "data" / "processed" / "validated_distilled.jsonl"
# RCON error patterns
RCON_ERRORS = [
re.compile(r'Unknown or incomplete command', re.I),
re.compile(r'Incorrect argument', re.I),
re.compile(r'Expected .+ at position', re.I),
re.compile(r'Unknown item', re.I),
re.compile(r'Unknown item component', re.I),
re.compile(r'Invalid or unknown', re.I),
re.compile(r"Can't find element", re.I),
re.compile(r'Expected whitespace', re.I),
]
# Expected "failures" that are actually fine (no player online, no entity, unloaded chunks)
BENIGN_ERRORS = [
re.compile(r'No player was found', re.I),
re.compile(r'No entity was found', re.I),
re.compile(r'That position is not loaded', re.I),
]
def is_real_error(result: str) -> bool:
"""Check if RCON result is a real syntax/command error (not just missing player)."""
for pat in RCON_ERRORS:
if pat.search(result):
# Check it's not just a benign error
for bp in BENIGN_ERRORS:
if bp.search(result):
return False
return True
return False
def is_benign_error(result: str) -> bool:
"""Check if error is benign (would work with a player online)."""
for bp in BENIGN_ERRORS:
if bp.search(result):
return True
return False
def execute_commands(commands: list, rcon_host: str, rcon_port: int, rcon_pass: str) -> list:
"""Execute commands via RCON, return list of (cmd, result, success) tuples."""
results = []
try:
with MCRcon(rcon_host, rcon_pass, port=rcon_port) as rcon:
for cmd in commands:
try:
result = rcon.command(cmd)
real_err = is_real_error(result)
benign = is_benign_error(result)
success = not real_err
results.append({
"command": cmd,
"result": result[:200],
"success": success,
"benign_error": benign,
"real_error": real_err,
})
time.sleep(0.2)
except Exception as e:
results.append({
"command": cmd,
"result": str(e)[:200],
"success": False,
"benign_error": False,
"real_error": True,
})
except Exception as e:
results.append({
"command": "(connection failed)",
"result": str(e)[:200],
"success": False,
"benign_error": False,
"real_error": True,
})
return results
def main():
parser = argparse.ArgumentParser(description="Validate distilled responses via RCON")
parser.add_argument("--input", default=str(DISTILLED))
parser.add_argument("--output", default=str(OUTPUT))
parser.add_argument("--rcon-host", default="192.168.0.244")
parser.add_argument("--rcon-port", type=int, default=25578)
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
parser.add_argument("--max", type=int, default=0, help="Max examples to process (0=all)")
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--reset-between", action="store_true", default=True,
help="Clear effects between examples")
args = parser.parse_args()
with open(args.input) as f:
examples = [json.loads(l) for l in f if l.strip()]
if args.max > 0:
examples = examples[:args.max]
print(f"Validating {len(examples)} distilled examples")
print(f"RCON: {args.rcon_host}:{args.rcon_port}")
print(f"Output: {args.output}")
if args.dry_run:
total_cmds = sum(len(ex.get("output", {}).get("commands", [])) for ex in examples)
print(f"\n[DRY RUN] Would execute {total_cmds} commands across {len(examples)} examples")
return
# Test RCON
try:
with MCRcon(args.rcon_host, args.rcon_pass, port=args.rcon_port) as rcon:
print(f"RCON OK: {rcon.command('list')}")
except Exception as e:
print(f"RCON FAILED: {e}")
sys.exit(1)
validated = []
stats = {"total": 0, "all_success": 0, "partial": 0, "all_fail": 0, "no_cmds": 0,
"real_errors": 0, "benign_errors": 0}
for i, ex in enumerate(examples):
commands = ex.get("output", {}).get("commands", [])
msg = ex.get("input", {}).get("user_message", "")[:50]
mode = "god" if "pray" in msg.lower() or ex.get("source") == "prayer_log" else "sudo"
stats["total"] += 1
if not commands:
stats["no_cmds"] += 1
# Still valid — refusal or info-only response
ex["rcon_validation"] = {"status": "no_commands", "results": []}
validated.append(ex)
print(f" [{i+1}/{len(examples)}] ({mode}) {msg:50} [no cmds — kept]")
continue
# Execute
results = execute_commands(commands, args.rcon_host, args.rcon_port, args.rcon_pass)
real_errors = sum(1 for r in results if r["real_error"])
benign = sum(1 for r in results if r["benign_error"])
successes = sum(1 for r in results if r["success"])
stats["real_errors"] += real_errors
stats["benign_errors"] += benign
if real_errors == 0:
stats["all_success"] += 1
status = "valid"
elif real_errors < len(results):
stats["partial"] += 1
status = "partial"
else:
stats["all_fail"] += 1
status = "invalid"
# Tag the example with validation results
ex["rcon_validation"] = {
"status": status,
"results": results,
"real_errors": real_errors,
"benign_errors": benign,
"successes": successes,
}
validated.append(ex)
flag = ""
if real_errors > 0:
flag = f" [FAIL:{real_errors}]"
# Show first real error
for r in results:
if r["real_error"]:
flag += f" {r['command'][:30]}{r['result'][:40]}"
break
elif benign > 0:
flag = f" [benign:{benign}]"
print(f" [{i+1}/{len(examples)}] ({mode}) {msg:50} [{successes}/{len(results)} ok]{flag}")
# Reset effects between examples
if args.reset_between and mode == "god":
try:
with MCRcon(args.rcon_host, args.rcon_pass, port=args.rcon_port) as rcon:
rcon.command("effect clear @a")
except:
pass
time.sleep(0.5)
time.sleep(0.3)
# Write output
with open(args.output, "w") as f:
for ex in validated:
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
print(f"\n{'='*60}")
print(f"Validation complete")
print(f" Total: {stats['total']}")
print(f" All valid: {stats['all_success']} ({stats['all_success']/max(stats['total'],1)*100:.1f}%)")
print(f" Partial: {stats['partial']}")
print(f" All failed: {stats['all_fail']}")
print(f" No commands: {stats['no_cmds']}")
print(f" Real errors: {stats['real_errors']}")
print(f" Benign errors: {stats['benign_errors']} (would work with player online)")
print(f" Output: {args.output}")
if __name__ == "__main__":
main()