Files
Mortdecai/training/scripts/validate_all_training.py
Seth 5b28002001 0.6.0 training session: Oracle Bot, RL combat, Mind's Eye, multilingual pipeline
Major changes from this session:

Training:
- 0.6.0 training running: 9B on steel141 3090 Ti, 27B on rented H100 NVL
- 7,256 merged training examples (up from 3,183)
- New training data: failure modes (85), midloop messaging (27),
  prompt injection defense (29), personality (32), gold from quarantine
  bank (232), new tool examples (30), claude's own experience (10)
- All training data RCON-validated at 100% pass rate
- Bake-off: gemma3:27b 66%, qwen3.5:27b 61%, translategemma:27b 56%

Oracle Bot (Mind's Eye):
- Invisible spectator bot (mineflayer) streams world state via WebSocket
- HTML5 Canvas frontend at mind.mortdec.ai
- Real-time tool trace visualization with expandable entries
- Streaming model tokens during inference
- Gateway integration: fire-and-forget POST /trace on every tool call

Reinforcement Learning:
- Gymnasium environment wrapping mineflayer bot (minecraft_env.py)
- PPO training via Stable Baselines3 (10K param policy network)
- Behavioral cloning pretraining (97.5% accuracy on expert policy)
- Infinite training loop with auto-restart and checkpoint resume
- Bot learns combat, survival, navigation from raw experience

Bot Army:
- 8-soldier marching formation with autonomous combat
- Combat bots using mineflayer-pvp, pathfinder, armor-manager
- Multilingual prayer bots via translategemma:27b (18 languages)
- Frame-based AI architecture: LLM planner + reactive micro-scripts

Infrastructure:
- Fixed mattpc.sethpc.xyz billing gateway (API key + player list parser)
- Billing gateway now tracks all LAN traffic (LAN auto-auth)
- Gateway fallback for empty god-mode responses
- Updated mortdec.ai landing page

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-22 20:22:50 -04:00

527 lines
19 KiB
Python

#!/usr/bin/env python3
"""
Unified training data validator for Mortdecai 0.6.0.
Validates ALL training data files:
- RCON command syntax (live server when available, pattern matching always)
- System prompt correctness (24-tool list)
- Format consistency (messages[] chat format)
- Known bad patterns (@s, enchantment syntax, leading slashes, generic items)
- Tool call schema compliance
- Duplicate detection
Modes:
--check Dry run, report issues only (default)
--fix Auto-fix known issues in place
--rcon Enable live RCON validation (requires dev server)
Usage:
python3 training/scripts/validate_all_training.py --check
python3 training/scripts/validate_all_training.py --fix --rcon
"""
import argparse
import glob
import json
import logging
import os
import re
import socket
import struct
import sys
import time
from collections import Counter, defaultdict
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
log = logging.getLogger(__name__)
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
# --- Current 24-tool list ---
CURRENT_TOOLS = [
"rcon.execute", "minecraft.lookup", "plugin.docs_lookup",
"world.player_info", "world.server_state", "world.nearby_entities",
"world.scan_area", "world.redstone_trace", "world.render",
"server.config",
"memory.read", "memory.write",
"journal.read", "journal.write",
"log.query", "user.ask",
"script.write", "script.validate", "script.execute",
"script.read", "script.list", "script.delete", "script.schedule",
"training.save",
]
# Legacy tool names that should be updated
LEGACY_TOOLS = {
"minecraft.wiki_lookup": "minecraft.lookup",
"minecraft.changelog_lookup": "minecraft.lookup",
"paper.docs_lookup": "minecraft.lookup",
}
# Known bad patterns in commands
BAD_PATTERNS = [
(re.compile(r'^/'), "leading_slash", "Commands should not start with /"),
(re.compile(r'@s\b'), "at_s", "@s invalid via RCON (no executor). Auto-fix: @p (imprecise — selects nearest player, not necessarily the requester)"),
(re.compile(r'\[enchantments=\{'), "enchantment_syntax", "Paper RCON rejects [enchantments={...}] component syntax"),
(re.compile(r'\[potion_contents='), "potion_syntax", "Paper RCON rejects [potion_contents={...}] syntax"),
(re.compile(r'give \S+ minecraft:bed\b'), "generic_bed", "minecraft:bed doesn't exist, use minecraft:white_bed"),
(re.compile(r'give \S+ minecraft:log\b'), "generic_log", "minecraft:log doesn't exist, use minecraft:oak_log"),
(re.compile(r'give \S+ minecraft:wood\b'), "generic_wood", "minecraft:wood doesn't exist, use minecraft:oak_planks"),
(re.compile(r'give \S+ minecraft:boat\b'), "generic_boat", "minecraft:boat doesn't exist, use minecraft:oak_boat"),
(re.compile(r'give \S+ minecraft:steak\b'), "generic_steak", "minecraft:steak doesn't exist, use minecraft:cooked_beef"),
(re.compile(r'"weather clear"'), "weather_no_world", "Paper needs world name: weather devworld clear"),
(re.compile(r'template (search|pick|build)'), "template_cmd", "Template commands removed in 0.5.0+"),
]
# Fix map for auto-repair
FIXES = {
"leading_slash": lambda cmd: cmd.lstrip("/"),
"at_s": lambda cmd: cmd.replace("@s", "@p"),
"generic_bed": lambda cmd: cmd.replace("minecraft:bed", "minecraft:white_bed"),
"generic_log": lambda cmd: cmd.replace("minecraft:log", "minecraft:oak_log"),
"generic_wood": lambda cmd: cmd.replace("minecraft:wood", "minecraft:oak_planks"),
"generic_boat": lambda cmd: cmd.replace("minecraft:boat", "minecraft:oak_boat"),
"generic_steak": lambda cmd: cmd.replace("minecraft:steak", "minecraft:cooked_beef"),
}
# Enchantment fix: replace enchanted give with plain give
def fix_enchantment(cmd: str) -> List[str]:
m = re.match(r'(give \S+ minecraft:\S+)\[enchantments=\{[^}]+\}\]\s*(\d*)', cmd)
if m:
base = m.group(1)
count = m.group(2) or "1"
return [f"{base} {count}"]
m = re.match(r'(give \S+ minecraft:\S+)\[potion_contents=\{[^}]+\}\]\s*(\d*)', cmd)
if m:
base = m.group(1).split("[")[0]
count = m.group(2) or "1"
return [f"{base.rsplit(' ', 1)[0]} minecraft:potion {count}"]
return [cmd]
# --- RCON helper ---
def rcon_connect(host: str, port: int, password: str) -> Optional[socket.socket]:
try:
s = socket.socket()
s.settimeout(10)
s.connect((host, port))
data = password.encode() + b'\x00\x00'
s.sendall(struct.pack('<iii', len(data)+8, 1, 3) + data)
time.sleep(0.1)
s.recv(4096)
return s
except Exception:
return None
def rcon_cmd(sock: socket.socket, cmd: str) -> str:
try:
data = cmd.encode() + b'\x00\x00'
sock.sendall(struct.pack('<iii', len(data)+8, 2, 2) + data)
time.sleep(0.05)
raw = sock.recv(4096)
if len(raw) >= 14:
return raw[12:-2].decode('utf-8', errors='replace')
return ''
except Exception as e:
return f'ERROR: {e}'
def rcon_validate_cmd(sock: socket.socket, cmd: str) -> Tuple[bool, str]:
"""Validate a command via RCON. Returns (valid, result)."""
result = rcon_cmd(sock, cmd)
bad_words = ["unknown", "invalid", "incorrect", "expected whitespace", "error"]
benign = ["no player", "no entity", "not loaded", "not online"]
result_lower = result.lower()
if any(b in result_lower for b in benign):
return True, result # command syntax is fine, just no target
if any(b in result_lower for b in bad_words):
return False, result
return True, result
# --- Extract commands from training examples ---
def extract_commands(rec: dict) -> List[str]:
"""Extract all RCON commands from a training example (any format)."""
commands = []
# Format 1: messages[] chat format
messages = rec.get("messages", [])
for msg in messages:
if not isinstance(msg, dict):
continue
content = msg.get("content", "")
if not isinstance(content, str):
continue
# Tool calls with rcon.execute
if msg.get("role") == "assistant" and "<tool_call>" in content:
try:
tc_json = content.split("<tool_call>")[1].split("</tool_call>")[0].strip()
tc = json.loads(tc_json)
if tc.get("name") == "rcon.execute":
cmd = tc["arguments"].get("command", "")
if cmd:
commands.append(cmd)
except (json.JSONDecodeError, KeyError, IndexError):
pass
# Final JSON with commands array
if msg.get("role") == "assistant" and "<tool_call>" not in content:
try:
parsed = json.loads(content)
for cmd in parsed.get("commands", []):
if isinstance(cmd, str) and cmd:
commands.append(cmd)
except (json.JSONDecodeError, TypeError):
pass
# Format 2: old dict format
output = rec.get("output", {})
if isinstance(output, dict):
for cmd in output.get("commands", output.get("commands_generated", [])):
if isinstance(cmd, str) and cmd:
commands.append(cmd)
return commands
def extract_tool_calls(rec: dict) -> List[str]:
"""Extract tool names called in a training example."""
tools = []
for msg in rec.get("messages", []):
if not isinstance(msg, dict):
continue
content = msg.get("content", "")
if msg.get("role") == "assistant" and "<tool_call>" in content:
try:
tc = json.loads(content.split("<tool_call>")[1].split("</tool_call>")[0].strip())
tools.append(tc.get("name", "unknown"))
except:
pass
return tools
def check_system_prompt(rec: dict) -> List[str]:
"""Check if system prompt has correct tool list."""
issues = []
messages = rec.get("messages", [])
if not messages:
return issues
sys_content = ""
for msg in messages:
if isinstance(msg, dict) and msg.get("role") == "system":
sys_content = msg.get("content", "")
break
if not sys_content:
return ["no_system_prompt"]
# Check for outdated tool names
for old_name in LEGACY_TOOLS:
if old_name in sys_content:
issues.append(f"legacy_tool:{old_name}")
# Check for missing new tools
for tool in ["world.scan_area", "world.redstone_trace", "world.render",
"training.save", "server.config"]:
# Only flag if the prompt lists tools at all
if "rcon.execute" in sys_content and tool not in sys_content:
issues.append(f"missing_tool:{tool}")
return issues
# --- Main validation ---
def validate_file(filepath: str, fix_mode: bool, rcon_sock: Optional[socket.socket]) -> dict:
"""Validate a single JSONL file. Returns stats dict."""
stats = {
"file": os.path.basename(filepath),
"total": 0,
"valid": 0,
"issues": Counter(),
"cmd_issues": Counter(),
"format_issues": Counter(),
"fixed": 0,
"examples_with_issues": 0,
"rcon_tested": 0,
"rcon_passed": 0,
"rcon_failed": 0,
}
lines = []
with open(filepath) as f:
for line in f:
try:
rec = json.loads(line.strip())
lines.append(rec)
except json.JSONDecodeError:
stats["format_issues"]["bad_json"] += 1
output_lines = []
seen_ids = set()
for rec in lines:
stats["total"] += 1
example_issues = []
# Duplicate check
rec_id = rec.get("id", "")
if rec_id and rec_id in seen_ids:
example_issues.append("duplicate_id")
stats["issues"]["duplicate_id"] += 1
seen_ids.add(rec_id)
# Format check
has_messages = "messages" in rec and isinstance(rec.get("messages"), list)
has_old_format = "input" in rec and "output" in rec
if not has_messages and not has_old_format:
example_issues.append("unknown_format")
stats["format_issues"]["unknown_format"] += 1
if has_old_format and not has_messages:
stats["format_issues"]["old_format"] += 1
# System prompt check
prompt_issues = check_system_prompt(rec)
for pi in prompt_issues:
stats["issues"][pi] += 1
example_issues.append(pi)
# Tool call check
tool_calls = extract_tool_calls(rec)
for tool_name in tool_calls:
if tool_name in LEGACY_TOOLS:
stats["issues"][f"legacy_tool_call:{tool_name}"] += 1
example_issues.append(f"legacy_tool_call:{tool_name}")
# Command validation
commands = extract_commands(rec)
fixed_commands = {} # index → fixed command(s)
for i, cmd in enumerate(commands):
matched_issues = []
for pattern, issue_name, _desc in BAD_PATTERNS:
if pattern.search(cmd):
stats["cmd_issues"][issue_name] += 1
example_issues.append(issue_name)
matched_issues.append(issue_name)
# Auto-fix if in fix mode
if fix_mode:
if issue_name in FIXES:
fixed_commands[i] = [FIXES[issue_name](cmd)]
stats["fixed"] += 1
elif issue_name in ("enchantment_syntax", "potion_syntax"):
fixed_commands[i] = fix_enchantment(cmd)
stats["fixed"] += 1
# RCON validation — skip commands with unfixable syntax issues
skip_rcon = any(mi in ("enchantment_syntax", "potion_syntax", "template_cmd")
for mi in matched_issues)
if rcon_sock and not skip_rcon:
test_cmd = cmd
if i in fixed_commands:
test_cmd = fixed_commands[i][0]
# Skip player-targeted commands (bots not guaranteed online)
player_cmds = ["give ", "tp ", "effect ", "tellraw ", "execute at "]
if not any(test_cmd.startswith(p) for p in player_cmds):
valid, result = rcon_validate_cmd(rcon_sock, test_cmd)
stats["rcon_tested"] += 1
if valid:
stats["rcon_passed"] += 1
else:
stats["rcon_failed"] += 1
example_issues.append(f"rcon_fail:{result[:60]}")
if example_issues:
stats["examples_with_issues"] += 1
else:
stats["valid"] += 1
# Apply fixes to the record if in fix mode
if fix_mode and fixed_commands:
rec = _apply_fixes_to_record(rec, commands, fixed_commands)
output_lines.append(rec)
# Write back if fixing and changes were made
if fix_mode and stats["fixed"] > 0:
with open(filepath, 'w', encoding='utf-8') as f:
for rec in output_lines:
f.write(json.dumps(rec, ensure_ascii=True) + '\n')
log.info("Wrote %d fixed examples to %s", stats["fixed"], filepath)
return stats
def _apply_fixes_to_record(rec: dict, original_commands: List[str],
fixed_commands: Dict[int, List[str]]) -> dict:
"""Apply command fixes to all locations in a training record."""
# Build old→new mapping
fix_map = {}
for i, new_cmds in fixed_commands.items():
if i < len(original_commands):
fix_map[original_commands[i]] = new_cmds
if not fix_map:
return rec
# Fix messages[] format
for msg in rec.get("messages", []):
if not isinstance(msg, dict) or msg.get("role") != "assistant":
continue
content = msg.get("content", "")
# Fix tool_call blocks
if "<tool_call>" in content and "rcon.execute" in content:
try:
tc = json.loads(content.split("<tool_call>")[1].split("</tool_call>")[0].strip())
if tc.get("name") == "rcon.execute":
cmd = tc["arguments"].get("command", "")
if cmd in fix_map:
tc["arguments"]["command"] = fix_map[cmd][0]
msg["content"] = f'<tool_call>\n{json.dumps(tc)}\n</tool_call>'
except:
pass
# Fix final JSON commands array
if "<tool_call>" not in content:
try:
parsed = json.loads(content)
if "commands" in parsed:
new_cmds = []
for cmd in parsed["commands"]:
if cmd in fix_map:
new_cmds.extend(fix_map[cmd])
else:
new_cmds.append(cmd)
parsed["commands"] = new_cmds
msg["content"] = json.dumps(parsed, ensure_ascii=True)
except:
pass
# Fix old dict format
output = rec.get("output", {})
if isinstance(output, dict):
for key in ("commands", "commands_generated", "commands_executed"):
if key in output and isinstance(output[key], list):
new_cmds = []
for cmd in output[key]:
if isinstance(cmd, str) and cmd in fix_map:
new_cmds.extend(fix_map[cmd])
else:
new_cmds.append(cmd)
output[key] = new_cmds
return rec
def main():
parser = argparse.ArgumentParser(description="Validate all training data")
parser.add_argument("--check", action="store_true", default=True, help="Dry run (default)")
parser.add_argument("--fix", action="store_true", help="Auto-fix known issues")
parser.add_argument("--rcon", action="store_true", help="Enable live RCON validation")
parser.add_argument("--files", nargs="*", help="Specific files to validate (default: all)")
args = parser.parse_args()
# Find all training files
if args.files:
files = args.files
else:
files = sorted(
glob.glob(str(PROJECT_ROOT / "data/raw/*.jsonl"))
+ glob.glob(str(PROJECT_ROOT / "data/processed/*.jsonl"))
)
# Exclude quarantine and queue files
files = [f for f in files if "quarantine" not in f and "queue" not in f]
print(f"Validating {len(files)} files...")
# RCON connection
rcon_sock = None
if args.rcon:
print("Connecting to dev RCON (192.168.0.244:25578)...")
rcon_sock = rcon_connect("192.168.0.244", 25578, "REDACTED_RCON")
if rcon_sock:
print(" RCON connected")
else:
print(" RCON connection failed — running without live validation")
# Validate each file
all_stats = []
total_issues = Counter()
total_cmd_issues = Counter()
for filepath in files:
stats = validate_file(filepath, args.fix, rcon_sock)
all_stats.append(stats)
total_issues.update(stats["issues"])
total_cmd_issues.update(stats["cmd_issues"])
# Close RCON
if rcon_sock:
try:
rcon_sock.close()
except:
pass
# Report
print(f"\n{'='*70}")
print(f"VALIDATION REPORT")
print(f"{'='*70}")
total_examples = sum(s["total"] for s in all_stats)
total_valid = sum(s["valid"] for s in all_stats)
total_with_issues = sum(s["examples_with_issues"] for s in all_stats)
total_fixed = sum(s["fixed"] for s in all_stats)
print(f"\nTotal examples: {total_examples}")
print(f"Valid: {total_valid} ({total_valid/total_examples*100:.1f}%)")
print(f"With issues: {total_with_issues} ({total_with_issues/total_examples*100:.1f}%)")
if args.fix:
print(f"Fixed: {total_fixed}")
# Per-file summary
print(f"\n{'File':<45} {'Total':>6} {'Valid':>6} {'Issues':>6} {'Rate':>6}")
print("-" * 75)
for s in sorted(all_stats, key=lambda x: -x["examples_with_issues"]):
if s["total"] == 0:
continue
rate = f"{s['valid']/s['total']*100:.0f}%"
print(f"{s['file']:<45} {s['total']:>6} {s['valid']:>6} {s['examples_with_issues']:>6} {rate:>6}")
# Issue breakdown
if total_cmd_issues:
print(f"\nCommand issues:")
for issue, count in total_cmd_issues.most_common():
print(f" {count:>5} {issue}")
if total_issues:
print(f"\nOther issues:")
for issue, count in total_issues.most_common(20):
print(f" {count:>5} {issue}")
# RCON stats
rcon_tested = sum(s["rcon_tested"] for s in all_stats)
if rcon_tested:
rcon_passed = sum(s["rcon_passed"] for s in all_stats)
rcon_failed = sum(s["rcon_failed"] for s in all_stats)
print(f"\nRCON validation: {rcon_tested} tested, {rcon_passed} passed, {rcon_failed} failed")
# Format breakdown
format_issues = Counter()
for s in all_stats:
format_issues.update(s["format_issues"])
if format_issues:
print(f"\nFormat issues:")
for issue, count in format_issues.most_common():
print(f" {count:>5} {issue}")
if __name__ == "__main__":
main()