0.6.0 training session: Oracle Bot, RL combat, Mind's Eye, multilingual pipeline
Major changes from this session: Training: - 0.6.0 training running: 9B on steel141 3090 Ti, 27B on rented H100 NVL - 7,256 merged training examples (up from 3,183) - New training data: failure modes (85), midloop messaging (27), prompt injection defense (29), personality (32), gold from quarantine bank (232), new tool examples (30), claude's own experience (10) - All training data RCON-validated at 100% pass rate - Bake-off: gemma3:27b 66%, qwen3.5:27b 61%, translategemma:27b 56% Oracle Bot (Mind's Eye): - Invisible spectator bot (mineflayer) streams world state via WebSocket - HTML5 Canvas frontend at mind.mortdec.ai - Real-time tool trace visualization with expandable entries - Streaming model tokens during inference - Gateway integration: fire-and-forget POST /trace on every tool call Reinforcement Learning: - Gymnasium environment wrapping mineflayer bot (minecraft_env.py) - PPO training via Stable Baselines3 (10K param policy network) - Behavioral cloning pretraining (97.5% accuracy on expert policy) - Infinite training loop with auto-restart and checkpoint resume - Bot learns combat, survival, navigation from raw experience Bot Army: - 8-soldier marching formation with autonomous combat - Combat bots using mineflayer-pvp, pathfinder, armor-manager - Multilingual prayer bots via translategemma:27b (18 languages) - Frame-based AI architecture: LLM planner + reactive micro-scripts Infrastructure: - Fixed mattpc.sethpc.xyz billing gateway (API key + player list parser) - Billing gateway now tracks all LAN traffic (LAN auto-auth) - Gateway fallback for empty god-mode responses - Updated mortdec.ai landing page Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,262 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
rcon_test_training.py — Test training data commands against live dev RCON.
|
||||
|
||||
Extracts all commands from specified training files, replaces placeholder
|
||||
player names with actual online players, and tests each via RCON.
|
||||
|
||||
Usage:
|
||||
python3 training/scripts/rcon_test_training.py
|
||||
python3 training/scripts/rcon_test_training.py --files data/raw/failure_mode_training.jsonl
|
||||
python3 training/scripts/rcon_test_training.py --fix # Fix bad commands in-place
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from mcrcon import MCRcon
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
|
||||
# New training files to test
|
||||
DEFAULT_FILES = [
|
||||
"data/raw/failure_mode_training.jsonl",
|
||||
"data/raw/midloop_messaging_training.jsonl",
|
||||
"data/raw/prompt_injection_defense_training.jsonl",
|
||||
"data/raw/personality_training.jsonl",
|
||||
"data/raw/gold_from_bank_training.jsonl",
|
||||
"data/raw/new_tool_training.jsonl",
|
||||
"data/processed/filtered_audit.jsonl",
|
||||
]
|
||||
|
||||
RCON_HOST = "192.168.0.244"
|
||||
RCON_PORT = 25578
|
||||
RCON_PASS = "REDACTED_RCON"
|
||||
|
||||
# Player names used in training data that need substitution
|
||||
TRAINING_PLAYERS = {
|
||||
"slingshooter08", "SwiftWolf", "DarkWolf", "BraveWolf", "WildWolf",
|
||||
"StoneWolf", "CraftMaster99", "EndermanSlayer", "DiamondKing",
|
||||
"RedstoneWiz", "NetherWalker", "FrostByte", "PrayBot_0", "PrayBot_1",
|
||||
"PrayBot_2", "xX_HackerZ_Xx", "TotallyAdmin",
|
||||
}
|
||||
|
||||
# Commands that are safe to test (won't cause damage)
|
||||
SAFE_PREFIXES = [
|
||||
"give ", "effect ", "time set", "weather ", "gamemode ",
|
||||
"gamerule ", "difficulty ", "tp ",
|
||||
]
|
||||
|
||||
# Commands to NEVER run even on dev
|
||||
NEVER_RUN = [
|
||||
"kill @a", "kill @e[type=minecraft:player",
|
||||
"ban ", "deop ", "op ", "stop", "kick ",
|
||||
"fill ", "setblock ", # Might alter world
|
||||
"worldborder ",
|
||||
]
|
||||
|
||||
|
||||
def get_online_players(mcr):
|
||||
"""Get list of online players from dev server."""
|
||||
resp = mcr.command("list")
|
||||
# Parse "§6default§r: Player1, Player2..."
|
||||
players = []
|
||||
for part in resp.split(":"):
|
||||
for name in re.findall(r'(?:§[0-9a-fk-or])*(\w+)', part):
|
||||
if name and len(name) > 2 and name not in ("out", "of", "maximum", "players", "online", "There", "are", "builder", "default"):
|
||||
players.append(name)
|
||||
return list(set(players))
|
||||
|
||||
|
||||
def extract_commands_from_record(rec):
|
||||
"""Extract all commands from a training record."""
|
||||
commands = []
|
||||
if not isinstance(rec, dict) or "messages" not in rec:
|
||||
return commands
|
||||
|
||||
for msg in rec["messages"]:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if msg.get("role") != "assistant":
|
||||
continue
|
||||
content = msg.get("content", "")
|
||||
|
||||
# From tool_call blocks with rcon.execute
|
||||
for m in re.finditer(r'"command"\s*:\s*"([^"]+)"', content):
|
||||
cmd = m.group(1)
|
||||
if not cmd.startswith("tellraw"): # tellraw has nested JSON
|
||||
commands.append(cmd)
|
||||
|
||||
# From JSON response commands arrays
|
||||
try:
|
||||
parsed = json.loads(content)
|
||||
if isinstance(parsed, dict) and "commands" in parsed:
|
||||
for cmd in parsed["commands"]:
|
||||
if isinstance(cmd, str):
|
||||
commands.append(cmd)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
return commands
|
||||
|
||||
|
||||
def substitute_player(cmd, online_players):
|
||||
"""Replace training player names with actual online player."""
|
||||
if not online_players:
|
||||
return cmd, False
|
||||
|
||||
target = online_players[0] # Use first online player
|
||||
|
||||
# Replace @p with actual player (more reliable for RCON testing)
|
||||
cmd = cmd.replace("@p", target)
|
||||
|
||||
# Replace known training player names
|
||||
for training_name in TRAINING_PLAYERS:
|
||||
if training_name in cmd:
|
||||
cmd = cmd.replace(training_name, target)
|
||||
return cmd, True
|
||||
|
||||
return cmd, False
|
||||
|
||||
|
||||
def is_safe(cmd):
|
||||
"""Check if command is safe to run on dev."""
|
||||
for never in NEVER_RUN:
|
||||
if never in cmd:
|
||||
return False
|
||||
return any(cmd.startswith(p) for p in SAFE_PREFIXES)
|
||||
|
||||
|
||||
def test_command(mcr, cmd):
|
||||
"""Test a single command via RCON. Returns (success, response)."""
|
||||
try:
|
||||
resp = mcr.command(cmd)
|
||||
# Check for error indicators
|
||||
if any(err in resp.lower() for err in [
|
||||
"unknown command", "incorrect argument", "expected",
|
||||
"invalid", "no entity was found", "unknown or incomplete",
|
||||
]):
|
||||
return False, resp
|
||||
return True, resp
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="RCON-test training data commands")
|
||||
parser.add_argument("--files", nargs="*", help="Specific files to test")
|
||||
parser.add_argument("--fix", action="store_true", help="Fix bad commands in-place")
|
||||
parser.add_argument("--max-per-file", type=int, default=50, help="Max commands to test per file")
|
||||
parser.add_argument("--verbose", "-v", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
files = args.files or DEFAULT_FILES
|
||||
|
||||
print("Connecting to dev RCON...")
|
||||
with MCRcon(RCON_HOST, RCON_PASS, port=RCON_PORT) as mcr:
|
||||
online = get_online_players(mcr)
|
||||
print(f"Online players: {online}")
|
||||
|
||||
if not online:
|
||||
print("WARNING: No players online. Player-targeted commands will fail.")
|
||||
|
||||
total_tested = 0
|
||||
total_passed = 0
|
||||
total_failed = 0
|
||||
total_skipped = 0
|
||||
failures_by_file = {}
|
||||
|
||||
for filepath in files:
|
||||
path = ROOT / filepath
|
||||
if not path.exists():
|
||||
print(f"\n SKIP (not found): {filepath}")
|
||||
continue
|
||||
|
||||
file_commands = []
|
||||
with open(path) as f:
|
||||
for line_num, line in enumerate(f):
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
rec = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
cmds = extract_commands_from_record(rec)
|
||||
for cmd in cmds:
|
||||
file_commands.append((line_num, cmd))
|
||||
|
||||
# Deduplicate and limit
|
||||
seen = set()
|
||||
unique_cmds = []
|
||||
for line_num, cmd in file_commands:
|
||||
# Normalize for dedup
|
||||
norm = re.sub(r'(?:' + '|'.join(TRAINING_PLAYERS) + r')', '@p', cmd)
|
||||
if norm not in seen:
|
||||
seen.add(norm)
|
||||
unique_cmds.append((line_num, cmd))
|
||||
|
||||
test_cmds = unique_cmds[:args.max_per_file]
|
||||
|
||||
file_pass = 0
|
||||
file_fail = 0
|
||||
file_skip = 0
|
||||
file_failures = []
|
||||
|
||||
for line_num, original_cmd in test_cmds:
|
||||
cmd, was_subbed = substitute_player(original_cmd, online)
|
||||
|
||||
if not is_safe(cmd):
|
||||
file_skip += 1
|
||||
total_skipped += 1
|
||||
if args.verbose:
|
||||
print(f" SKIP (unsafe): {cmd[:80]}")
|
||||
continue
|
||||
|
||||
ok, resp = test_command(mcr, cmd)
|
||||
total_tested += 1
|
||||
|
||||
if ok:
|
||||
file_pass += 1
|
||||
total_passed += 1
|
||||
if args.verbose:
|
||||
print(f" PASS: {cmd[:60]} → {resp[:40]}")
|
||||
else:
|
||||
file_fail += 1
|
||||
total_failed += 1
|
||||
file_failures.append((line_num, original_cmd, cmd, resp))
|
||||
if args.verbose:
|
||||
print(f" FAIL: {cmd[:60]} → {resp[:60]}")
|
||||
|
||||
failures_by_file[filepath] = file_failures
|
||||
|
||||
status = "✓" if file_fail == 0 else "✗"
|
||||
print(f"\n {status} {Path(filepath).name}: {file_pass} pass, {file_fail} fail, {file_skip} skip (of {len(unique_cmds)} unique commands)")
|
||||
|
||||
if file_failures and not args.verbose:
|
||||
for ln, orig, tested, resp in file_failures[:5]:
|
||||
print(f" L{ln}: {orig[:60]}")
|
||||
print(f" → {resp[:80]}")
|
||||
if len(file_failures) > 5:
|
||||
print(f" ... and {len(file_failures) - 5} more failures")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"TOTAL: {total_tested} tested, {total_passed} passed, {total_failed} failed, {total_skipped} skipped")
|
||||
if total_tested > 0:
|
||||
print(f"Pass rate: {total_passed/total_tested*100:.1f}%")
|
||||
|
||||
# Summary of all failures
|
||||
if total_failed > 0:
|
||||
print(f"\nAll failures by file:")
|
||||
for filepath, failures in failures_by_file.items():
|
||||
if failures:
|
||||
print(f"\n {Path(filepath).name} ({len(failures)} failures):")
|
||||
for ln, orig, tested, resp in failures:
|
||||
print(f" L{ln}: {orig[:70]}")
|
||||
print(f" RCON: {resp[:80]}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user