5b28002001
Major changes from this session: Training: - 0.6.0 training running: 9B on steel141 3090 Ti, 27B on rented H100 NVL - 7,256 merged training examples (up from 3,183) - New training data: failure modes (85), midloop messaging (27), prompt injection defense (29), personality (32), gold from quarantine bank (232), new tool examples (30), claude's own experience (10) - All training data RCON-validated at 100% pass rate - Bake-off: gemma3:27b 66%, qwen3.5:27b 61%, translategemma:27b 56% Oracle Bot (Mind's Eye): - Invisible spectator bot (mineflayer) streams world state via WebSocket - HTML5 Canvas frontend at mind.mortdec.ai - Real-time tool trace visualization with expandable entries - Streaming model tokens during inference - Gateway integration: fire-and-forget POST /trace on every tool call Reinforcement Learning: - Gymnasium environment wrapping mineflayer bot (minecraft_env.py) - PPO training via Stable Baselines3 (10K param policy network) - Behavioral cloning pretraining (97.5% accuracy on expert policy) - Infinite training loop with auto-restart and checkpoint resume - Bot learns combat, survival, navigation from raw experience Bot Army: - 8-soldier marching formation with autonomous combat - Combat bots using mineflayer-pvp, pathfinder, armor-manager - Multilingual prayer bots via translategemma:27b (18 languages) - Frame-based AI architecture: LLM planner + reactive micro-scripts Infrastructure: - Fixed mattpc.sethpc.xyz billing gateway (API key + player list parser) - Billing gateway now tracks all LAN traffic (LAN auto-auth) - Gateway fallback for empty god-mode responses - Updated mortdec.ai landing page Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
263 lines
8.9 KiB
Python
263 lines
8.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
rcon_test_training.py — Test training data commands against live dev RCON.
|
|
|
|
Extracts all commands from specified training files, replaces placeholder
|
|
player names with actual online players, and tests each via RCON.
|
|
|
|
Usage:
|
|
python3 training/scripts/rcon_test_training.py
|
|
python3 training/scripts/rcon_test_training.py --files data/raw/failure_mode_training.jsonl
|
|
python3 training/scripts/rcon_test_training.py --fix # Fix bad commands in-place
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from mcrcon import MCRcon
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent.parent
|
|
|
|
# New training files to test
|
|
DEFAULT_FILES = [
|
|
"data/raw/failure_mode_training.jsonl",
|
|
"data/raw/midloop_messaging_training.jsonl",
|
|
"data/raw/prompt_injection_defense_training.jsonl",
|
|
"data/raw/personality_training.jsonl",
|
|
"data/raw/gold_from_bank_training.jsonl",
|
|
"data/raw/new_tool_training.jsonl",
|
|
"data/processed/filtered_audit.jsonl",
|
|
]
|
|
|
|
RCON_HOST = "192.168.0.244"
|
|
RCON_PORT = 25578
|
|
RCON_PASS = "REDACTED_RCON"
|
|
|
|
# Player names used in training data that need substitution
|
|
TRAINING_PLAYERS = {
|
|
"slingshooter08", "SwiftWolf", "DarkWolf", "BraveWolf", "WildWolf",
|
|
"StoneWolf", "CraftMaster99", "EndermanSlayer", "DiamondKing",
|
|
"RedstoneWiz", "NetherWalker", "FrostByte", "PrayBot_0", "PrayBot_1",
|
|
"PrayBot_2", "xX_HackerZ_Xx", "TotallyAdmin",
|
|
}
|
|
|
|
# Commands that are safe to test (won't cause damage)
|
|
SAFE_PREFIXES = [
|
|
"give ", "effect ", "time set", "weather ", "gamemode ",
|
|
"gamerule ", "difficulty ", "tp ",
|
|
]
|
|
|
|
# Commands to NEVER run even on dev
|
|
NEVER_RUN = [
|
|
"kill @a", "kill @e[type=minecraft:player",
|
|
"ban ", "deop ", "op ", "stop", "kick ",
|
|
"fill ", "setblock ", # Might alter world
|
|
"worldborder ",
|
|
]
|
|
|
|
|
|
def get_online_players(mcr):
|
|
"""Get list of online players from dev server."""
|
|
resp = mcr.command("list")
|
|
# Parse "§6default§r: Player1, Player2..."
|
|
players = []
|
|
for part in resp.split(":"):
|
|
for name in re.findall(r'(?:§[0-9a-fk-or])*(\w+)', part):
|
|
if name and len(name) > 2 and name not in ("out", "of", "maximum", "players", "online", "There", "are", "builder", "default"):
|
|
players.append(name)
|
|
return list(set(players))
|
|
|
|
|
|
def extract_commands_from_record(rec):
|
|
"""Extract all commands from a training record."""
|
|
commands = []
|
|
if not isinstance(rec, dict) or "messages" not in rec:
|
|
return commands
|
|
|
|
for msg in rec["messages"]:
|
|
if not isinstance(msg, dict):
|
|
continue
|
|
if msg.get("role") != "assistant":
|
|
continue
|
|
content = msg.get("content", "")
|
|
|
|
# From tool_call blocks with rcon.execute
|
|
for m in re.finditer(r'"command"\s*:\s*"([^"]+)"', content):
|
|
cmd = m.group(1)
|
|
if not cmd.startswith("tellraw"): # tellraw has nested JSON
|
|
commands.append(cmd)
|
|
|
|
# From JSON response commands arrays
|
|
try:
|
|
parsed = json.loads(content)
|
|
if isinstance(parsed, dict) and "commands" in parsed:
|
|
for cmd in parsed["commands"]:
|
|
if isinstance(cmd, str):
|
|
commands.append(cmd)
|
|
except (json.JSONDecodeError, TypeError):
|
|
pass
|
|
|
|
return commands
|
|
|
|
|
|
def substitute_player(cmd, online_players):
|
|
"""Replace training player names with actual online player."""
|
|
if not online_players:
|
|
return cmd, False
|
|
|
|
target = online_players[0] # Use first online player
|
|
|
|
# Replace @p with actual player (more reliable for RCON testing)
|
|
cmd = cmd.replace("@p", target)
|
|
|
|
# Replace known training player names
|
|
for training_name in TRAINING_PLAYERS:
|
|
if training_name in cmd:
|
|
cmd = cmd.replace(training_name, target)
|
|
return cmd, True
|
|
|
|
return cmd, False
|
|
|
|
|
|
def is_safe(cmd):
|
|
"""Check if command is safe to run on dev."""
|
|
for never in NEVER_RUN:
|
|
if never in cmd:
|
|
return False
|
|
return any(cmd.startswith(p) for p in SAFE_PREFIXES)
|
|
|
|
|
|
def test_command(mcr, cmd):
|
|
"""Test a single command via RCON. Returns (success, response)."""
|
|
try:
|
|
resp = mcr.command(cmd)
|
|
# Check for error indicators
|
|
if any(err in resp.lower() for err in [
|
|
"unknown command", "incorrect argument", "expected",
|
|
"invalid", "no entity was found", "unknown or incomplete",
|
|
]):
|
|
return False, resp
|
|
return True, resp
|
|
except Exception as e:
|
|
return False, str(e)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="RCON-test training data commands")
|
|
parser.add_argument("--files", nargs="*", help="Specific files to test")
|
|
parser.add_argument("--fix", action="store_true", help="Fix bad commands in-place")
|
|
parser.add_argument("--max-per-file", type=int, default=50, help="Max commands to test per file")
|
|
parser.add_argument("--verbose", "-v", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
files = args.files or DEFAULT_FILES
|
|
|
|
print("Connecting to dev RCON...")
|
|
with MCRcon(RCON_HOST, RCON_PASS, port=RCON_PORT) as mcr:
|
|
online = get_online_players(mcr)
|
|
print(f"Online players: {online}")
|
|
|
|
if not online:
|
|
print("WARNING: No players online. Player-targeted commands will fail.")
|
|
|
|
total_tested = 0
|
|
total_passed = 0
|
|
total_failed = 0
|
|
total_skipped = 0
|
|
failures_by_file = {}
|
|
|
|
for filepath in files:
|
|
path = ROOT / filepath
|
|
if not path.exists():
|
|
print(f"\n SKIP (not found): {filepath}")
|
|
continue
|
|
|
|
file_commands = []
|
|
with open(path) as f:
|
|
for line_num, line in enumerate(f):
|
|
if not line.strip():
|
|
continue
|
|
try:
|
|
rec = json.loads(line)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
cmds = extract_commands_from_record(rec)
|
|
for cmd in cmds:
|
|
file_commands.append((line_num, cmd))
|
|
|
|
# Deduplicate and limit
|
|
seen = set()
|
|
unique_cmds = []
|
|
for line_num, cmd in file_commands:
|
|
# Normalize for dedup
|
|
norm = re.sub(r'(?:' + '|'.join(TRAINING_PLAYERS) + r')', '@p', cmd)
|
|
if norm not in seen:
|
|
seen.add(norm)
|
|
unique_cmds.append((line_num, cmd))
|
|
|
|
test_cmds = unique_cmds[:args.max_per_file]
|
|
|
|
file_pass = 0
|
|
file_fail = 0
|
|
file_skip = 0
|
|
file_failures = []
|
|
|
|
for line_num, original_cmd in test_cmds:
|
|
cmd, was_subbed = substitute_player(original_cmd, online)
|
|
|
|
if not is_safe(cmd):
|
|
file_skip += 1
|
|
total_skipped += 1
|
|
if args.verbose:
|
|
print(f" SKIP (unsafe): {cmd[:80]}")
|
|
continue
|
|
|
|
ok, resp = test_command(mcr, cmd)
|
|
total_tested += 1
|
|
|
|
if ok:
|
|
file_pass += 1
|
|
total_passed += 1
|
|
if args.verbose:
|
|
print(f" PASS: {cmd[:60]} → {resp[:40]}")
|
|
else:
|
|
file_fail += 1
|
|
total_failed += 1
|
|
file_failures.append((line_num, original_cmd, cmd, resp))
|
|
if args.verbose:
|
|
print(f" FAIL: {cmd[:60]} → {resp[:60]}")
|
|
|
|
failures_by_file[filepath] = file_failures
|
|
|
|
status = "✓" if file_fail == 0 else "✗"
|
|
print(f"\n {status} {Path(filepath).name}: {file_pass} pass, {file_fail} fail, {file_skip} skip (of {len(unique_cmds)} unique commands)")
|
|
|
|
if file_failures and not args.verbose:
|
|
for ln, orig, tested, resp in file_failures[:5]:
|
|
print(f" L{ln}: {orig[:60]}")
|
|
print(f" → {resp[:80]}")
|
|
if len(file_failures) > 5:
|
|
print(f" ... and {len(file_failures) - 5} more failures")
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"TOTAL: {total_tested} tested, {total_passed} passed, {total_failed} failed, {total_skipped} skipped")
|
|
if total_tested > 0:
|
|
print(f"Pass rate: {total_passed/total_tested*100:.1f}%")
|
|
|
|
# Summary of all failures
|
|
if total_failed > 0:
|
|
print(f"\nAll failures by file:")
|
|
for filepath, failures in failures_by_file.items():
|
|
if failures:
|
|
print(f"\n {Path(filepath).name} ({len(failures)} failures):")
|
|
for ln, orig, tested, resp in failures:
|
|
print(f" L{ln}: {orig[:70]}")
|
|
print(f" RCON: {resp[:80]}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|