da8f557219
GPU Scheduler (gpu.sethpc.xyz): - Live dashboard with 4 GPUs, training monitor, loss sparklines - Preset-based job scheduler with 3 triggers (time, finish_training, cost) - Model selection per GPU, pipeline configuration - Tool self-play and training pipeline types - Behind Google OAuth, live-refresh without page reload Tool Architecture (14 tools): - 3 new tools: world.nearby_entities, memory.read, memory.write - 7 script.* tools: write, validate, execute, read, list, delete, schedule - ScriptManager: full mcfunction datapack CRUD with RCON validation - Training data: 1,430 tool examples (up from 1,159) Plugin Deployment (paper-ai-25567): - WorldGuard 7.0.12, CoreProtect CE 23.1, EssentialsX 2.21.2, Vault 1.7.3 - Fresh greenfield world reset - 104 RCON-validated plugin training examples Event Dispatcher: - Watches server log for deaths, joins, advancements, PvP kills - Configurable trigger probability and cooldowns per event type - Deployed to dev server, fires god_system prompts on events - 21 event-response training examples Training Infrastructure: - train_lora.py: --save-steps 50, --resume from checkpoint - run_training.sh: stops Ollama, activates conda, restarts after - Passwordless sudo for ollama services on steel141 - Dev server added to MCSManager with autoStart Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
415 lines
16 KiB
Python
415 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tool-focused self-play — exercises all 14 tools on a live dev server.
|
|
|
|
Unlike regular self-play (which tests command generation), this script
|
|
specifically generates prompts that require tool use: script writing,
|
|
memory operations, entity scanning, wiki lookups, and chained multi-tool
|
|
flows. Runs on the dev server via RCON.
|
|
|
|
The model responds, its tool calls get executed for real, and the full
|
|
interaction (prompt + tool calls + results + final response) gets logged
|
|
as training data.
|
|
|
|
Usage:
|
|
python3 tool_self_play.py --ollama-url http://192.168.0.179:11434 \\
|
|
--rcon-host 192.168.0.112 --rcon-port 25578 --rounds 30
|
|
|
|
# Or via the scheduler preset
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import random
|
|
import re
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
|
sys.path.insert(0, str(PROJECT_ROOT))
|
|
|
|
import requests
|
|
from agent.tools.persistent_rcon import get_rcon
|
|
|
|
OUTPUT_DIR = PROJECT_ROOT / "data" / "raw" / "tool_selfplay"
|
|
|
|
# ── Prompt categories that exercise specific tools ─────────────────────────
|
|
|
|
PROMPTS = {
|
|
"script_build": [
|
|
"sudo build me a small cobblestone house with a door and windows",
|
|
"sudo create a fighting arena with red and blue corners",
|
|
"sudo make a nether portal room with soul lanterns",
|
|
"sudo build a watchtower 15 blocks tall with a ladder",
|
|
"sudo create a 9x9 wheat farm with water in the center",
|
|
"sudo build an enchanting setup with bookshelves",
|
|
"sudo make a mob grinder platform with water channels",
|
|
"sudo create a trophy room with item frames",
|
|
"sudo build a bridge 30 blocks long over this ravine",
|
|
"sudo make a lighthouse with a glowstone top",
|
|
"sudo create a hedge maze using oak leaves",
|
|
"sudo build a dock with oak wood extending into the water",
|
|
"sudo make an underground bunker with iron doors",
|
|
"sudo create a garden with flowers and paths",
|
|
"sudo build a market stall with a counter and signs",
|
|
],
|
|
"script_schedule": [
|
|
"sudo make cherry leaf particles fall around spawn forever",
|
|
"sudo set up a scoreboard that tracks deaths on server load",
|
|
"sudo make ambient campfire smoke particles at spawn every tick",
|
|
"sudo create a function that heals everyone every 5 minutes",
|
|
"sudo make it always rain XP orbs at spawn",
|
|
],
|
|
"script_manage": [
|
|
"sudo show me all scripts",
|
|
"sudo what scripts are running on tick?",
|
|
"sudo delete the test script",
|
|
"sudo read me the arena script",
|
|
"sudo what did I build last?",
|
|
],
|
|
"memory_write": [
|
|
"sudo remember this as my home",
|
|
"sudo save this location as my base",
|
|
"sudo remember my nether portal is here",
|
|
"sudo my favorite item is a diamond pickaxe, remember that",
|
|
"sudo save this as my farm",
|
|
"sudo remember that Ace13245 is my friend",
|
|
"sudo mark this spot as the village center",
|
|
"sudo save this as my fishing spot",
|
|
],
|
|
"memory_read": [
|
|
"sudo tp me home",
|
|
"sudo take me to my base",
|
|
"sudo where's my nether portal?",
|
|
"sudo what do you know about me?",
|
|
"sudo tp me to my farm",
|
|
"sudo where was I building?",
|
|
"sudo do I have any saved locations?",
|
|
],
|
|
"nearby_entities": [
|
|
"sudo what mobs are near me?",
|
|
"sudo kill all the zombies around me",
|
|
"sudo how many animals are nearby?",
|
|
"sudo clear hostile mobs in a 50 block radius",
|
|
"sudo are there any creepers close to me?",
|
|
"sudo kill the nearest skeleton",
|
|
"sudo count everything within 30 blocks",
|
|
"sudo protect me from nearby hostiles",
|
|
],
|
|
"wiki_lookup": [
|
|
"sudo what enchantments can go on a mace?",
|
|
"sudo how do I craft a lodestone?",
|
|
"sudo what food gives the best saturation?",
|
|
"sudo what are the new 1.21 armor trim patterns?",
|
|
"sudo what's the difference between smite and sharpness?",
|
|
"sudo how does fortune work on ores?",
|
|
"sudo what are all the copper variants?",
|
|
"sudo how do trial spawners work?",
|
|
],
|
|
"player_info": [
|
|
"sudo build a wall around me",
|
|
"sudo teleport me 50 blocks up",
|
|
"sudo place torches around me",
|
|
"sudo create a beacon at my location",
|
|
"sudo surround me with glass",
|
|
"sudo set my spawn here",
|
|
"sudo light up this cave around me",
|
|
],
|
|
"server_state": [
|
|
"sudo if it's night, make it day",
|
|
"sudo give everyone online a golden apple",
|
|
"sudo how many people are playing right now?",
|
|
"sudo announce the current time and weather",
|
|
"sudo clear the weather if it's raining",
|
|
],
|
|
"chained": [
|
|
"sudo save this location as home, then build a marker here",
|
|
"sudo check what's near me and kill all hostiles, then give me resistance",
|
|
"sudo look up the best sword enchantments and give me one",
|
|
"sudo tp me home and heal me",
|
|
"sudo build an arena and save it as a script I can rerun",
|
|
"sudo check my health, if low heal me and give me food",
|
|
"sudo what scripts do I have? run the arena one at my position",
|
|
"sudo remember this spot, scan for mobs, kill hostiles, build a fort",
|
|
],
|
|
# ── Plugin categories ──
|
|
"worldguard": [
|
|
"sudo protect this area as my base",
|
|
"sudo make a no-pvp zone around spawn",
|
|
"sudo prevent mob spawning in the village",
|
|
"sudo add Ace13245 as a member of my region",
|
|
"sudo block entry for non-members in the vault",
|
|
"sudo allow TNT in the arena region",
|
|
"sudo set a greeting message for my base region",
|
|
"sudo list all protected regions",
|
|
"sudo prevent creeper explosions globally",
|
|
"sudo create a healing zone at spawn",
|
|
"sudo remove the old-test region",
|
|
"sudo make a safe zone with no fire spread",
|
|
],
|
|
"coreprotect": [
|
|
"sudo check who broke blocks near me",
|
|
"sudo rollback griefing from the last hour",
|
|
"sudo rollback what TheBigBoss did recently",
|
|
"sudo who placed blocks around here today?",
|
|
"sudo undo TNT damage from the last 2 hours",
|
|
"sudo rollback all container theft recently",
|
|
"sudo restore what was rolled back",
|
|
"sudo check CoreProtect status",
|
|
"sudo rollback fire damage near spawn",
|
|
"sudo lookup what Ace13245 did in the last day",
|
|
],
|
|
"essentialsx": [
|
|
"sudo set my home here",
|
|
"sudo tp me to my home",
|
|
"sudo create a warp called arena",
|
|
"sudo tp me to the arena warp",
|
|
"sudo give Ace 1000 coins",
|
|
"sudo check my balance",
|
|
"sudo heal me",
|
|
"sudo feed me",
|
|
"sudo repair what I'm holding",
|
|
"sudo set my nickname to DragonLord",
|
|
"sudo give me god mode",
|
|
"sudo toggle fly for me",
|
|
"sudo broadcast a server message",
|
|
"sudo set spawn point here",
|
|
"sudo check when Ace was last online",
|
|
],
|
|
"luckperms": [
|
|
"sudo give me permission to fly",
|
|
"sudo create a VIP group",
|
|
"sudo add Ace to the VIP group",
|
|
"sudo give VIP access to fly and heal",
|
|
"sudo give me temporary VIP for 1 day",
|
|
"sudo set VIP chat prefix to gold",
|
|
"sudo create a builder group with WorldEdit",
|
|
"sudo list all permission groups",
|
|
"sudo check what permissions I have",
|
|
"sudo remove TheBigBoss from VIP",
|
|
],
|
|
"fawe": [
|
|
"sudo make a glass sphere 10 blocks wide",
|
|
"sudo hollow sphere of stone",
|
|
"sudo cylinder of quartz 5 wide 10 tall",
|
|
"sudo replace all stone with deepslate in my selection",
|
|
"sudo smooth the terrain around here",
|
|
"sudo drain all water within 20 blocks",
|
|
"sudo build a sandstone pyramid 10 tall",
|
|
"sudo hollow out the selected area",
|
|
"sudo make walls around my selection with stone bricks",
|
|
"sudo fill with a checkerboard pattern",
|
|
"sudo stack my selection 5 times north",
|
|
"sudo undo my last WorldEdit action",
|
|
],
|
|
"plugin_combined": [
|
|
"sudo create a protected pvp arena with WorldEdit and WorldGuard",
|
|
"sudo rollback Ace's griefing and revoke his builder perms",
|
|
"sudo set up a VIP lounge — build it, protect it, make a warp",
|
|
"sudo give TheBigBoss a reward: money, items, and temp VIP",
|
|
"sudo prepare the server for an event: announce, set arena flags, heal everyone",
|
|
"pray someone destroyed my house, please restore it",
|
|
"pray protect my village from monsters",
|
|
"pray smite TheBigBoss for griefing",
|
|
"pray make me a temple worthy of your glory",
|
|
],
|
|
}
|
|
|
|
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
|
|
|
|
|
|
def query_model(prompt, player, ollama_url, model, rcon):
|
|
"""Send a prompt to the model and capture the full interaction."""
|
|
system = (
|
|
"You are a Minecraft 1.21 command translator for a Paper server.\n"
|
|
"Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n"
|
|
"Tools: rcon.execute, minecraft.wiki_lookup, world.player_info, "
|
|
"world.server_state, world.nearby_entities, memory.read, memory.write, "
|
|
"script.write, script.validate, script.execute, script.read, script.list, "
|
|
"script.delete, script.schedule.\n\n"
|
|
"Plugin commands: //set, //sphere, //cyl (FAWE), /rg define/flag (WorldGuard), "
|
|
"/co rollback/inspect (CoreProtect), /home, /warp, /eco (EssentialsX), "
|
|
"/lp user/group (LuckPerms).\n\n"
|
|
"For complex builds (4+ commands), write a mcfunction script. "
|
|
"For simple tasks, use rcon.execute directly.\n\n"
|
|
"Return JSON: {\"commands\": [...], \"reasoning\": \"...\", \"message\": \"...\"}\n"
|
|
"Use /no_think mode."
|
|
)
|
|
|
|
try:
|
|
r = requests.post(f"{ollama_url}/api/chat", json={
|
|
"model": model,
|
|
"messages": [
|
|
{"role": "system", "content": "/no_think\n" + system},
|
|
{"role": "user", "content": f"Player {player}: {prompt}"},
|
|
],
|
|
"stream": False, "format": "json",
|
|
"options": {"temperature": 0.4, "num_predict": 800},
|
|
}, timeout=120)
|
|
|
|
content = r.json()["message"]["content"]
|
|
content = re.sub(r'<think>[\s\S]*?</think>\s*', '', content)
|
|
parsed = json.loads(content)
|
|
return parsed
|
|
except Exception as e:
|
|
return {"error": str(e), "raw": content if 'content' in dir() else ""}
|
|
|
|
|
|
def validate_commands(commands, rcon):
|
|
"""Run commands through RCON and capture results."""
|
|
results = []
|
|
for cmd in commands[:12]:
|
|
if not isinstance(cmd, str) or not cmd.strip():
|
|
continue
|
|
try:
|
|
result = rcon.command(cmd)
|
|
is_error = any(e in result for e in ("<--[HERE]", "Unknown", "Incorrect", "Expected"))
|
|
results.append({"cmd": cmd, "result": result[:200], "ok": not is_error})
|
|
except Exception as e:
|
|
results.append({"cmd": cmd, "result": str(e), "ok": False})
|
|
return results
|
|
|
|
|
|
def run_round(category, ollama_url, model, rcon, player):
|
|
"""Run one self-play round for a specific tool category."""
|
|
prompt = random.choice(PROMPTS[category])
|
|
|
|
print(f" [{category:18s}] {prompt[:60]}")
|
|
start = time.time()
|
|
|
|
response = query_model(prompt, player, ollama_url, model, rcon)
|
|
elapsed = time.time() - start
|
|
|
|
if "error" in response:
|
|
print(f" ERROR: {response['error'][:80]}")
|
|
return None
|
|
|
|
commands = response.get("commands", [])
|
|
message = response.get("message", "")
|
|
reasoning = response.get("reasoning", "")
|
|
|
|
# Validate commands via RCON
|
|
rcon_results = []
|
|
if commands and all(isinstance(c, str) for c in commands):
|
|
rcon_results = validate_commands(commands, rcon)
|
|
success = all(r["ok"] for r in rcon_results)
|
|
else:
|
|
success = False
|
|
|
|
ok_count = sum(1 for r in rcon_results if r["ok"])
|
|
fail_count = sum(1 for r in rcon_results if not r["ok"])
|
|
status = "OK" if success else f"PARTIAL ({ok_count}/{ok_count+fail_count})" if ok_count > 0 else "FAIL"
|
|
print(f" → {len(commands)} cmds, {status}, {elapsed:.1f}s")
|
|
|
|
# Build training example
|
|
example = {
|
|
"id": f"tool-selfplay-{int(time.time())}-{random.randint(0,9999):04d}",
|
|
"source": "tool_self_play",
|
|
"category": category,
|
|
"input": {
|
|
"user_message": prompt,
|
|
"server_context": {
|
|
"server_type": "paper",
|
|
"version": "1.21.x",
|
|
"online_players": [player],
|
|
},
|
|
},
|
|
"output": {
|
|
"commands": commands,
|
|
"message": message,
|
|
"reasoning": reasoning,
|
|
},
|
|
"metadata": {
|
|
"rcon_results": rcon_results,
|
|
"all_success": success,
|
|
"elapsed_seconds": round(elapsed, 2),
|
|
"model": model,
|
|
"tool_category": category,
|
|
},
|
|
}
|
|
|
|
return example
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Tool-focused self-play")
|
|
parser.add_argument("--ollama-url", default="http://192.168.0.179:11434")
|
|
parser.add_argument("--model", default="mortdecai:0.4.0")
|
|
parser.add_argument("--rcon-host", default="192.168.0.112")
|
|
parser.add_argument("--rcon-port", type=int, default=25578)
|
|
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
|
|
parser.add_argument("--rounds", type=int, default=30, help="Rounds per category")
|
|
parser.add_argument("--categories", default="all", help="Comma-separated categories or 'all'")
|
|
parser.add_argument("--output", default="")
|
|
args = parser.parse_args()
|
|
|
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
output_path = args.output or str(OUTPUT_DIR / f"tool_selfplay_{int(time.time())}.jsonl")
|
|
|
|
rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)
|
|
|
|
if args.categories == "all":
|
|
categories = list(PROMPTS.keys())
|
|
else:
|
|
categories = [c.strip() for c in args.categories.split(",")]
|
|
|
|
print(f"Tool Self-Play")
|
|
print(f" Model: {args.model} on {args.ollama_url}")
|
|
print(f" RCON: {args.rcon_host}:{args.rcon_port}")
|
|
print(f" Categories: {', '.join(categories)}")
|
|
print(f" Rounds per category: {args.rounds}")
|
|
print(f" Output: {output_path}")
|
|
print()
|
|
|
|
stats = {"total": 0, "success": 0, "partial": 0, "fail": 0, "error": 0}
|
|
examples = []
|
|
|
|
for round_num in range(args.rounds):
|
|
print(f"\n── Round {round_num + 1}/{args.rounds} ──")
|
|
random.shuffle(categories)
|
|
|
|
for cat in categories:
|
|
player = random.choice(PLAYERS)
|
|
example = run_round(cat, args.ollama_url, args.model, rcon, player)
|
|
|
|
stats["total"] += 1
|
|
if example is None:
|
|
stats["error"] += 1
|
|
continue
|
|
|
|
if example["metadata"]["all_success"]:
|
|
stats["success"] += 1
|
|
elif any(r["ok"] for r in example["metadata"].get("rcon_results", [])):
|
|
stats["partial"] += 1
|
|
else:
|
|
stats["fail"] += 1
|
|
|
|
examples.append(example)
|
|
|
|
# Write incrementally
|
|
with open(output_path, "a") as f:
|
|
f.write(json.dumps(example, ensure_ascii=False) + "\n")
|
|
|
|
time.sleep(0.3)
|
|
|
|
# Progress report
|
|
if (round_num + 1) % 5 == 0:
|
|
rate = stats["success"] / max(stats["total"], 1) * 100
|
|
print(f"\n Progress: {stats['total']} total, {rate:.0f}% success, "
|
|
f"{stats['partial']} partial, {stats['fail']} fail, {stats['error']} error")
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"Tool Self-Play Complete")
|
|
print(f" Total: {stats['total']}")
|
|
print(f" Success: {stats['success']} ({stats['success']/max(stats['total'],1)*100:.0f}%)")
|
|
print(f" Partial: {stats['partial']}")
|
|
print(f" Fail: {stats['fail']}")
|
|
print(f" Error: {stats['error']}")
|
|
print(f" Output: {output_path} ({len(examples)} examples)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|