Files
Mortdecai/training/scripts/tool_self_play.py
T
Mortdecai da8f557219 GPU scheduler, 14-tool architecture, plugin deployment, event dispatcher
GPU Scheduler (gpu.sethpc.xyz):
- Live dashboard with 4 GPUs, training monitor, loss sparklines
- Preset-based job scheduler with 3 triggers (time, finish_training, cost)
- Model selection per GPU, pipeline configuration
- Tool self-play and training pipeline types
- Behind Google OAuth, live-refresh without page reload

Tool Architecture (14 tools):
- 3 new tools: world.nearby_entities, memory.read, memory.write
- 7 script.* tools: write, validate, execute, read, list, delete, schedule
- ScriptManager: full mcfunction datapack CRUD with RCON validation
- Training data: 1,430 tool examples (up from 1,159)

Plugin Deployment (paper-ai-25567):
- WorldGuard 7.0.12, CoreProtect CE 23.1, EssentialsX 2.21.2, Vault 1.7.3
- Fresh greenfield world reset
- 104 RCON-validated plugin training examples

Event Dispatcher:
- Watches server log for deaths, joins, advancements, PvP kills
- Configurable trigger probability and cooldowns per event type
- Deployed to dev server, fires god_system prompts on events
- 21 event-response training examples

Training Infrastructure:
- train_lora.py: --save-steps 50, --resume from checkpoint
- run_training.sh: stops Ollama, activates conda, restarts after
- Passwordless sudo for ollama services on steel141
- Dev server added to MCSManager with autoStart

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 03:14:45 -04:00

415 lines
16 KiB
Python

#!/usr/bin/env python3
"""
Tool-focused self-play — exercises all 14 tools on a live dev server.
Unlike regular self-play (which tests command generation), this script
specifically generates prompts that require tool use: script writing,
memory operations, entity scanning, wiki lookups, and chained multi-tool
flows. Runs on the dev server via RCON.
The model responds, its tool calls get executed for real, and the full
interaction (prompt + tool calls + results + final response) gets logged
as training data.
Usage:
python3 tool_self_play.py --ollama-url http://192.168.0.179:11434 \\
--rcon-host 192.168.0.112 --rcon-port 25578 --rounds 30
# Or via the scheduler preset
"""
import argparse
import json
import os
import random
import re
import sys
import time
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
import requests
from agent.tools.persistent_rcon import get_rcon
OUTPUT_DIR = PROJECT_ROOT / "data" / "raw" / "tool_selfplay"
# ── Prompt categories that exercise specific tools ─────────────────────────
PROMPTS = {
"script_build": [
"sudo build me a small cobblestone house with a door and windows",
"sudo create a fighting arena with red and blue corners",
"sudo make a nether portal room with soul lanterns",
"sudo build a watchtower 15 blocks tall with a ladder",
"sudo create a 9x9 wheat farm with water in the center",
"sudo build an enchanting setup with bookshelves",
"sudo make a mob grinder platform with water channels",
"sudo create a trophy room with item frames",
"sudo build a bridge 30 blocks long over this ravine",
"sudo make a lighthouse with a glowstone top",
"sudo create a hedge maze using oak leaves",
"sudo build a dock with oak wood extending into the water",
"sudo make an underground bunker with iron doors",
"sudo create a garden with flowers and paths",
"sudo build a market stall with a counter and signs",
],
"script_schedule": [
"sudo make cherry leaf particles fall around spawn forever",
"sudo set up a scoreboard that tracks deaths on server load",
"sudo make ambient campfire smoke particles at spawn every tick",
"sudo create a function that heals everyone every 5 minutes",
"sudo make it always rain XP orbs at spawn",
],
"script_manage": [
"sudo show me all scripts",
"sudo what scripts are running on tick?",
"sudo delete the test script",
"sudo read me the arena script",
"sudo what did I build last?",
],
"memory_write": [
"sudo remember this as my home",
"sudo save this location as my base",
"sudo remember my nether portal is here",
"sudo my favorite item is a diamond pickaxe, remember that",
"sudo save this as my farm",
"sudo remember that Ace13245 is my friend",
"sudo mark this spot as the village center",
"sudo save this as my fishing spot",
],
"memory_read": [
"sudo tp me home",
"sudo take me to my base",
"sudo where's my nether portal?",
"sudo what do you know about me?",
"sudo tp me to my farm",
"sudo where was I building?",
"sudo do I have any saved locations?",
],
"nearby_entities": [
"sudo what mobs are near me?",
"sudo kill all the zombies around me",
"sudo how many animals are nearby?",
"sudo clear hostile mobs in a 50 block radius",
"sudo are there any creepers close to me?",
"sudo kill the nearest skeleton",
"sudo count everything within 30 blocks",
"sudo protect me from nearby hostiles",
],
"wiki_lookup": [
"sudo what enchantments can go on a mace?",
"sudo how do I craft a lodestone?",
"sudo what food gives the best saturation?",
"sudo what are the new 1.21 armor trim patterns?",
"sudo what's the difference between smite and sharpness?",
"sudo how does fortune work on ores?",
"sudo what are all the copper variants?",
"sudo how do trial spawners work?",
],
"player_info": [
"sudo build a wall around me",
"sudo teleport me 50 blocks up",
"sudo place torches around me",
"sudo create a beacon at my location",
"sudo surround me with glass",
"sudo set my spawn here",
"sudo light up this cave around me",
],
"server_state": [
"sudo if it's night, make it day",
"sudo give everyone online a golden apple",
"sudo how many people are playing right now?",
"sudo announce the current time and weather",
"sudo clear the weather if it's raining",
],
"chained": [
"sudo save this location as home, then build a marker here",
"sudo check what's near me and kill all hostiles, then give me resistance",
"sudo look up the best sword enchantments and give me one",
"sudo tp me home and heal me",
"sudo build an arena and save it as a script I can rerun",
"sudo check my health, if low heal me and give me food",
"sudo what scripts do I have? run the arena one at my position",
"sudo remember this spot, scan for mobs, kill hostiles, build a fort",
],
# ── Plugin categories ──
"worldguard": [
"sudo protect this area as my base",
"sudo make a no-pvp zone around spawn",
"sudo prevent mob spawning in the village",
"sudo add Ace13245 as a member of my region",
"sudo block entry for non-members in the vault",
"sudo allow TNT in the arena region",
"sudo set a greeting message for my base region",
"sudo list all protected regions",
"sudo prevent creeper explosions globally",
"sudo create a healing zone at spawn",
"sudo remove the old-test region",
"sudo make a safe zone with no fire spread",
],
"coreprotect": [
"sudo check who broke blocks near me",
"sudo rollback griefing from the last hour",
"sudo rollback what TheBigBoss did recently",
"sudo who placed blocks around here today?",
"sudo undo TNT damage from the last 2 hours",
"sudo rollback all container theft recently",
"sudo restore what was rolled back",
"sudo check CoreProtect status",
"sudo rollback fire damage near spawn",
"sudo lookup what Ace13245 did in the last day",
],
"essentialsx": [
"sudo set my home here",
"sudo tp me to my home",
"sudo create a warp called arena",
"sudo tp me to the arena warp",
"sudo give Ace 1000 coins",
"sudo check my balance",
"sudo heal me",
"sudo feed me",
"sudo repair what I'm holding",
"sudo set my nickname to DragonLord",
"sudo give me god mode",
"sudo toggle fly for me",
"sudo broadcast a server message",
"sudo set spawn point here",
"sudo check when Ace was last online",
],
"luckperms": [
"sudo give me permission to fly",
"sudo create a VIP group",
"sudo add Ace to the VIP group",
"sudo give VIP access to fly and heal",
"sudo give me temporary VIP for 1 day",
"sudo set VIP chat prefix to gold",
"sudo create a builder group with WorldEdit",
"sudo list all permission groups",
"sudo check what permissions I have",
"sudo remove TheBigBoss from VIP",
],
"fawe": [
"sudo make a glass sphere 10 blocks wide",
"sudo hollow sphere of stone",
"sudo cylinder of quartz 5 wide 10 tall",
"sudo replace all stone with deepslate in my selection",
"sudo smooth the terrain around here",
"sudo drain all water within 20 blocks",
"sudo build a sandstone pyramid 10 tall",
"sudo hollow out the selected area",
"sudo make walls around my selection with stone bricks",
"sudo fill with a checkerboard pattern",
"sudo stack my selection 5 times north",
"sudo undo my last WorldEdit action",
],
"plugin_combined": [
"sudo create a protected pvp arena with WorldEdit and WorldGuard",
"sudo rollback Ace's griefing and revoke his builder perms",
"sudo set up a VIP lounge — build it, protect it, make a warp",
"sudo give TheBigBoss a reward: money, items, and temp VIP",
"sudo prepare the server for an event: announce, set arena flags, heal everyone",
"pray someone destroyed my house, please restore it",
"pray protect my village from monsters",
"pray smite TheBigBoss for griefing",
"pray make me a temple worthy of your glory",
],
}
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
def query_model(prompt, player, ollama_url, model, rcon):
"""Send a prompt to the model and capture the full interaction."""
system = (
"You are a Minecraft 1.21 command translator for a Paper server.\n"
"Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n"
"Tools: rcon.execute, minecraft.wiki_lookup, world.player_info, "
"world.server_state, world.nearby_entities, memory.read, memory.write, "
"script.write, script.validate, script.execute, script.read, script.list, "
"script.delete, script.schedule.\n\n"
"Plugin commands: //set, //sphere, //cyl (FAWE), /rg define/flag (WorldGuard), "
"/co rollback/inspect (CoreProtect), /home, /warp, /eco (EssentialsX), "
"/lp user/group (LuckPerms).\n\n"
"For complex builds (4+ commands), write a mcfunction script. "
"For simple tasks, use rcon.execute directly.\n\n"
"Return JSON: {\"commands\": [...], \"reasoning\": \"...\", \"message\": \"...\"}\n"
"Use /no_think mode."
)
try:
r = requests.post(f"{ollama_url}/api/chat", json={
"model": model,
"messages": [
{"role": "system", "content": "/no_think\n" + system},
{"role": "user", "content": f"Player {player}: {prompt}"},
],
"stream": False, "format": "json",
"options": {"temperature": 0.4, "num_predict": 800},
}, timeout=120)
content = r.json()["message"]["content"]
content = re.sub(r'<think>[\s\S]*?</think>\s*', '', content)
parsed = json.loads(content)
return parsed
except Exception as e:
return {"error": str(e), "raw": content if 'content' in dir() else ""}
def validate_commands(commands, rcon):
"""Run commands through RCON and capture results."""
results = []
for cmd in commands[:12]:
if not isinstance(cmd, str) or not cmd.strip():
continue
try:
result = rcon.command(cmd)
is_error = any(e in result for e in ("<--[HERE]", "Unknown", "Incorrect", "Expected"))
results.append({"cmd": cmd, "result": result[:200], "ok": not is_error})
except Exception as e:
results.append({"cmd": cmd, "result": str(e), "ok": False})
return results
def run_round(category, ollama_url, model, rcon, player):
"""Run one self-play round for a specific tool category."""
prompt = random.choice(PROMPTS[category])
print(f" [{category:18s}] {prompt[:60]}")
start = time.time()
response = query_model(prompt, player, ollama_url, model, rcon)
elapsed = time.time() - start
if "error" in response:
print(f" ERROR: {response['error'][:80]}")
return None
commands = response.get("commands", [])
message = response.get("message", "")
reasoning = response.get("reasoning", "")
# Validate commands via RCON
rcon_results = []
if commands and all(isinstance(c, str) for c in commands):
rcon_results = validate_commands(commands, rcon)
success = all(r["ok"] for r in rcon_results)
else:
success = False
ok_count = sum(1 for r in rcon_results if r["ok"])
fail_count = sum(1 for r in rcon_results if not r["ok"])
status = "OK" if success else f"PARTIAL ({ok_count}/{ok_count+fail_count})" if ok_count > 0 else "FAIL"
print(f"{len(commands)} cmds, {status}, {elapsed:.1f}s")
# Build training example
example = {
"id": f"tool-selfplay-{int(time.time())}-{random.randint(0,9999):04d}",
"source": "tool_self_play",
"category": category,
"input": {
"user_message": prompt,
"server_context": {
"server_type": "paper",
"version": "1.21.x",
"online_players": [player],
},
},
"output": {
"commands": commands,
"message": message,
"reasoning": reasoning,
},
"metadata": {
"rcon_results": rcon_results,
"all_success": success,
"elapsed_seconds": round(elapsed, 2),
"model": model,
"tool_category": category,
},
}
return example
def main():
parser = argparse.ArgumentParser(description="Tool-focused self-play")
parser.add_argument("--ollama-url", default="http://192.168.0.179:11434")
parser.add_argument("--model", default="mortdecai:0.4.0")
parser.add_argument("--rcon-host", default="192.168.0.112")
parser.add_argument("--rcon-port", type=int, default=25578)
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
parser.add_argument("--rounds", type=int, default=30, help="Rounds per category")
parser.add_argument("--categories", default="all", help="Comma-separated categories or 'all'")
parser.add_argument("--output", default="")
args = parser.parse_args()
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
output_path = args.output or str(OUTPUT_DIR / f"tool_selfplay_{int(time.time())}.jsonl")
rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)
if args.categories == "all":
categories = list(PROMPTS.keys())
else:
categories = [c.strip() for c in args.categories.split(",")]
print(f"Tool Self-Play")
print(f" Model: {args.model} on {args.ollama_url}")
print(f" RCON: {args.rcon_host}:{args.rcon_port}")
print(f" Categories: {', '.join(categories)}")
print(f" Rounds per category: {args.rounds}")
print(f" Output: {output_path}")
print()
stats = {"total": 0, "success": 0, "partial": 0, "fail": 0, "error": 0}
examples = []
for round_num in range(args.rounds):
print(f"\n── Round {round_num + 1}/{args.rounds} ──")
random.shuffle(categories)
for cat in categories:
player = random.choice(PLAYERS)
example = run_round(cat, args.ollama_url, args.model, rcon, player)
stats["total"] += 1
if example is None:
stats["error"] += 1
continue
if example["metadata"]["all_success"]:
stats["success"] += 1
elif any(r["ok"] for r in example["metadata"].get("rcon_results", [])):
stats["partial"] += 1
else:
stats["fail"] += 1
examples.append(example)
# Write incrementally
with open(output_path, "a") as f:
f.write(json.dumps(example, ensure_ascii=False) + "\n")
time.sleep(0.3)
# Progress report
if (round_num + 1) % 5 == 0:
rate = stats["success"] / max(stats["total"], 1) * 100
print(f"\n Progress: {stats['total']} total, {rate:.0f}% success, "
f"{stats['partial']} partial, {stats['fail']} fail, {stats['error']} error")
print(f"\n{'='*60}")
print(f"Tool Self-Play Complete")
print(f" Total: {stats['total']}")
print(f" Success: {stats['success']} ({stats['success']/max(stats['total'],1)*100:.0f}%)")
print(f" Partial: {stats['partial']}")
print(f" Fail: {stats['fail']}")
print(f" Error: {stats['error']}")
print(f" Output: {output_path} ({len(examples)} examples)")
if __name__ == "__main__":
main()