1200+ distilled gold examples, journal system, redstone mastery, safety awareness
Distilled Training Data (1,203 examples): - 341 initial gold (plugins, enchantments, builds, effects, god, errors) - 165 buildings + pipeline (100 structures built on dev, 65 request→query→act) - 24 safety-aware (worldborder, safe tp, intentional harm, gamemode checks) - 17 advanced logic (decanonized items, redstone gates, iterative builds) - 12 redstone mastery (NOT/OR/AND/XOR/RS-latch/T-flip-flop/comparator/clock) - 7 circuit verification and diagnosis - 1 compact comparator gates - 10 redstone methodology (build→test→save→recall→learn from mistakes) - 8 player journal usage - 29 creative+uncommon+pipeline+god with full tool chains Player Journal System: - agent/tools/player_journal.py — per-player text files (1-10 lines) - journal.read + journal.write tool schemas added - Cross-contaminated: God and Sudo share same journal per player - Includes sentiment, relationship, builds, preferences, skill level Redstone Engineering: - agent/prompts/redstone_rules.md — baked-in wall torch, dedicated lead, repeater rules - Learned from 4 iterations of 8-switch circuit: wall_torch on back face, not top - T-junction bypass prevention: dedicated lead wire between merge and NOT block - RCON limitation: can build circuits but cannot test them (lever toggle doesn't propagate) Training Data Cleaning: - 466 @s→@p fixes, 10 template commands removed - 12 outdated refusals replaced with correct plugin commands - Data de-duped across all sources Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tool-focused self-play — exercises all 14 tools on a live dev server.
|
||||
Tool-focused self-play — exercises all 17 tools on a live dev server.
|
||||
|
||||
Unlike regular self-play (which tests command generation), this script
|
||||
specifically generates prompts that require tool use: script writing,
|
||||
memory operations, entity scanning, wiki lookups, and chained multi-tool
|
||||
flows. Runs on the dev server via RCON.
|
||||
memory operations, entity scanning, wiki/plugin/changelog/paper lookups,
|
||||
and chained multi-tool flows. Runs on the dev server via RCON.
|
||||
|
||||
The model responds, its tool calls get executed for real, and the full
|
||||
interaction (prompt + tool calls + results + final response) gets logged
|
||||
@@ -15,7 +15,11 @@ Usage:
|
||||
python3 tool_self_play.py --ollama-url http://192.168.0.179:11434 \\
|
||||
--rcon-host 192.168.0.112 --rcon-port 25578 --rounds 30
|
||||
|
||||
# Or via the scheduler preset
|
||||
# Load extra prompts from prayer bank
|
||||
python3 tool_self_play.py --prompt-bank data/raw/prayer_prompt_bank.jsonl
|
||||
|
||||
# Focus on weak categories only
|
||||
python3 tool_self_play.py --categories worldguard,coreprotect,luckperms
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -34,10 +38,91 @@ import requests
|
||||
from agent.tools.persistent_rcon import get_rcon
|
||||
|
||||
OUTPUT_DIR = PROJECT_ROOT / "data" / "raw" / "tool_selfplay"
|
||||
PROMPTS_DIR = PROJECT_ROOT / "training" / "prompts"
|
||||
|
||||
# ── Prompt categories that exercise specific tools ─────────────────────────
|
||||
# ── Template variables for prompt expansion ────────────────────────────────
|
||||
|
||||
PROMPTS = {
|
||||
TEMPLATE_VARS = {
|
||||
"player": ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"],
|
||||
"target": ["Ace13245", "TheBigBoss", "xXDragonSlayerXx", "slingshooter08"],
|
||||
"region": ["my-base", "spawn-zone", "pvp-arena", "vip-lounge", "farm-area"],
|
||||
"warp": ["arena", "spawn", "shop", "nether", "farm", "end"],
|
||||
"group": ["vip", "builder", "moderator", "default"],
|
||||
"world": ["world", "world_nether", "world_the_end"],
|
||||
}
|
||||
|
||||
|
||||
def expand_template(prompt: str) -> str:
|
||||
"""Replace {placeholder} tokens with random values from TEMPLATE_VARS."""
|
||||
for key, values in TEMPLATE_VARS.items():
|
||||
token = "{" + key + "}"
|
||||
while token in prompt:
|
||||
prompt = prompt.replace(token, random.choice(values), 1)
|
||||
return prompt
|
||||
|
||||
|
||||
def load_prompts(prompts_dir: Path = PROMPTS_DIR,
|
||||
mode_filter: str = None,
|
||||
call_type_filter: str = None) -> dict[str, list[str]]:
|
||||
"""Load prompt templates from per-category JSONL files.
|
||||
|
||||
Args:
|
||||
prompts_dir: Directory containing manifest.json and prompt JSONL files.
|
||||
mode_filter: If set, only load categories matching this mode (sudo/god/god_system).
|
||||
call_type_filter: If set, only load categories matching this call type (model/gateway).
|
||||
|
||||
Returns:
|
||||
Dict mapping category name -> list of prompt template strings.
|
||||
Falls back to inline PROMPTS_FALLBACK if files don't exist.
|
||||
"""
|
||||
manifest_path = prompts_dir / "manifest.json"
|
||||
if not manifest_path.exists():
|
||||
print(f" Warning: {manifest_path} not found, using inline fallback")
|
||||
return PROMPTS_FALLBACK
|
||||
|
||||
with open(manifest_path) as f:
|
||||
manifest = json.load(f)
|
||||
|
||||
prompts = {}
|
||||
for category, meta in manifest.items():
|
||||
# Apply filters
|
||||
if mode_filter and meta.get("mode") not in (mode_filter, "mixed"):
|
||||
continue
|
||||
if call_type_filter and meta.get("call_type") != call_type_filter:
|
||||
continue
|
||||
|
||||
filepath = prompts_dir / meta["file"]
|
||||
if not filepath.exists():
|
||||
print(f" Warning: {filepath} not found, skipping {category}")
|
||||
continue
|
||||
cat_prompts = []
|
||||
with open(filepath) as f:
|
||||
for line in f:
|
||||
entry = json.loads(line)
|
||||
cat_prompts.append(entry["prompt"])
|
||||
prompts[category] = cat_prompts
|
||||
|
||||
print(f" Loaded {sum(len(v) for v in prompts.values())} prompts "
|
||||
f"from {len(prompts)} categories")
|
||||
return prompts
|
||||
|
||||
|
||||
def load_manifest(prompts_dir: Path = PROMPTS_DIR) -> dict:
|
||||
"""Load the prompt manifest with full metadata.
|
||||
|
||||
Used by the chat app for template selection UI.
|
||||
Returns the raw manifest dict with mode, call_type, count per category.
|
||||
"""
|
||||
manifest_path = prompts_dir / "manifest.json"
|
||||
if not manifest_path.exists():
|
||||
return {}
|
||||
with open(manifest_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
# ── Inline fallback (subset, used if prompt files missing) ─────────────────
|
||||
|
||||
PROMPTS_FALLBACK = {
|
||||
"script_build": [
|
||||
"sudo build me a small cobblestone house with a door and windows",
|
||||
"sudo create a fighting arena with red and blue corners",
|
||||
@@ -107,6 +192,37 @@ PROMPTS = {
|
||||
"sudo how does fortune work on ores?",
|
||||
"sudo what are all the copper variants?",
|
||||
"sudo how do trial spawners work?",
|
||||
"sudo what does the breeze drop?",
|
||||
"sudo how do you tame an armadillo?",
|
||||
"sudo what biomes have cherry blossoms?",
|
||||
],
|
||||
"plugin_docs": [
|
||||
"sudo how do I create a WorldGuard region?",
|
||||
"sudo what flags can I set on a region?",
|
||||
"sudo how does CoreProtect rollback work?",
|
||||
"sudo what's the command for LuckPerms group inheritance?",
|
||||
"sudo how do I set up EssentialsX warps?",
|
||||
"sudo what are the WorldEdit brush commands?",
|
||||
"sudo how do I configure CoreProtect auto-purge?",
|
||||
"sudo what permissions does the builder group need for WorldEdit?",
|
||||
"sudo how do I set a WorldGuard greeting message?",
|
||||
"sudo what's the difference between /rg flag and /rg addmember?",
|
||||
],
|
||||
"changelog_lookup": [
|
||||
"sudo what changed in 1.21?",
|
||||
"sudo what was added in the tricky trials update?",
|
||||
"sudo when were trial chambers added?",
|
||||
"sudo what's new with the mace weapon?",
|
||||
"sudo what version added the breeze mob?",
|
||||
"sudo what got nerfed in the latest update?",
|
||||
],
|
||||
"paper_docs": [
|
||||
"sudo how do I set the view distance on Paper?",
|
||||
"sudo what Paper config controls mob spawning rates?",
|
||||
"sudo how do I enable async chunk loading?",
|
||||
"sudo what's the Paper command to reload config?",
|
||||
"sudo how do I optimize TPS on Paper?",
|
||||
"sudo what Paper settings affect redstone performance?",
|
||||
],
|
||||
"player_info": [
|
||||
"sudo build a wall around me",
|
||||
@@ -215,9 +331,71 @@ PROMPTS = {
|
||||
"pray smite TheBigBoss for griefing",
|
||||
"pray make me a temple worthy of your glory",
|
||||
],
|
||||
# ── Direct command passthrough — teach faithful execution ──
|
||||
"direct_passthrough": [
|
||||
# WorldGuard — exact commands
|
||||
'sudo run this exactly: rg define test-region',
|
||||
'sudo run this exactly: rg flag test-region pvp deny',
|
||||
'sudo run this exactly: rg flag test-region mob-spawning deny',
|
||||
'sudo run this exactly: rg addmember test-region Ace13245',
|
||||
'sudo run this exactly: rg removemember test-region Ace13245',
|
||||
'sudo run this exactly: rg flag test-region greeting Welcome to the zone!',
|
||||
'sudo run this exactly: rg flag test-region entry -g nonmembers deny',
|
||||
'sudo run this exactly: rg list',
|
||||
'sudo run this exactly: rg info test-region',
|
||||
'sudo run this exactly: rg remove test-region',
|
||||
# CoreProtect — exact commands
|
||||
'sudo run this exactly: co status',
|
||||
'sudo run this exactly: co lookup u:Ace13245 t:1h',
|
||||
'sudo run this exactly: co lookup u:Ace13245 t:1h a:block',
|
||||
'sudo run this exactly: co rollback u:Ace13245 t:1h r:20',
|
||||
'sudo run this exactly: co restore u:Ace13245 t:1h r:20',
|
||||
'sudo run this exactly: co inspect',
|
||||
'sudo run this exactly: co lookup t:30m r:10 a:container',
|
||||
# LuckPerms — exact commands
|
||||
'sudo run this exactly: lp creategroup vip',
|
||||
'sudo run this exactly: lp group vip permission set essentials.fly true',
|
||||
'sudo run this exactly: lp group vip permission set essentials.heal true',
|
||||
'sudo run this exactly: lp user Ace13245 parent add vip',
|
||||
'sudo run this exactly: lp user Ace13245 parent remove vip',
|
||||
'sudo run this exactly: lp user Ace13245 info',
|
||||
'sudo run this exactly: lp group vip info',
|
||||
'sudo run this exactly: lp listgroups',
|
||||
'sudo run this exactly: lp group vip meta setprefix "&6[VIP] "',
|
||||
'sudo run this exactly: lp deletegroup vip',
|
||||
# EssentialsX — exact commands
|
||||
'sudo run this exactly: heal Ace13245',
|
||||
'sudo run this exactly: feed Ace13245',
|
||||
'sudo run this exactly: eco give Ace13245 1000',
|
||||
'sudo run this exactly: eco take Ace13245 500',
|
||||
'sudo run this exactly: bal Ace13245',
|
||||
'sudo run this exactly: broadcast Welcome to the server!',
|
||||
'sudo run this exactly: setwarp arena',
|
||||
'sudo run this exactly: warp arena',
|
||||
'sudo run this exactly: delwarp arena',
|
||||
'sudo run this exactly: nick Ace13245 DragonLord',
|
||||
# FAWE — exact commands
|
||||
'sudo run this exactly: /worldedit version',
|
||||
],
|
||||
# ── Correction examples — model should fix wrong syntax ──
|
||||
"direct_correction": [
|
||||
'sudo gamemode slingshooter08 creative', # wrong arg order
|
||||
'sudo give slingshooter08 minecraft:bed 1', # should be white_bed
|
||||
'sudo effect slingshooter08 night_vision', # missing give and duration
|
||||
'sudo weather thunderstorm', # should be thunder
|
||||
'sudo give slingshooter08 minecraft:diamond_pickaxe[sharpness:5] 1', # wrong enchant syntax
|
||||
'sudo tp 100 64 100', # missing player
|
||||
'sudo kill zombie 50', # wrong kill syntax
|
||||
'sudo enchant slingshooter08 sharpness 10', # max is 5
|
||||
'sudo effect give slingshooter08 minecraft:haste 99999', # duration too long
|
||||
'sudo fill 0 0 0 100 100 100 diamond_block', # too large, missing namespace
|
||||
'sudo rg define', # missing region name
|
||||
'sudo co rollback Ace13245 1h', # missing u: and t: prefixes
|
||||
'sudo lp addgroup vip Ace13245', # wrong syntax (should be lp user X parent add Y)
|
||||
],
|
||||
}
|
||||
|
||||
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
|
||||
PLAYERS = TEMPLATE_VARS["player"]
|
||||
|
||||
|
||||
def query_model(prompt, player, ollama_url, model, rcon):
|
||||
@@ -225,7 +403,8 @@ def query_model(prompt, player, ollama_url, model, rcon):
|
||||
system = (
|
||||
"You are a Minecraft 1.21 command translator for a Paper server.\n"
|
||||
"Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n"
|
||||
"Tools: rcon.execute, minecraft.wiki_lookup, world.player_info, "
|
||||
"Tools: rcon.execute, minecraft.wiki_lookup, plugin.docs_lookup, "
|
||||
"minecraft.changelog_lookup, paper.docs_lookup, world.player_info, "
|
||||
"world.server_state, world.nearby_entities, memory.read, memory.write, "
|
||||
"script.write, script.validate, script.execute, script.read, script.list, "
|
||||
"script.delete, script.schedule.\n\n"
|
||||
@@ -246,7 +425,7 @@ def query_model(prompt, player, ollama_url, model, rcon):
|
||||
{"role": "user", "content": f"Player {player}: {prompt}"},
|
||||
],
|
||||
"stream": False, "format": "json",
|
||||
"options": {"temperature": 0.4, "num_predict": 800},
|
||||
"options": {"temperature": 0.85, "num_predict": 800},
|
||||
}, timeout=120)
|
||||
|
||||
content = r.json()["message"]["content"]
|
||||
@@ -272,9 +451,10 @@ def validate_commands(commands, rcon):
|
||||
return results
|
||||
|
||||
|
||||
def run_round(category, ollama_url, model, rcon, player):
|
||||
def run_round(category, ollama_url, model, rcon, player, prompts):
|
||||
"""Run one self-play round for a specific tool category."""
|
||||
prompt = random.choice(PROMPTS[category])
|
||||
raw_prompt = random.choice(prompts[category])
|
||||
prompt = expand_template(raw_prompt)
|
||||
|
||||
print(f" [{category:18s}] {prompt[:60]}")
|
||||
start = time.time()
|
||||
@@ -336,12 +516,13 @@ def run_round(category, ollama_url, model, rcon, player):
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Tool-focused self-play")
|
||||
parser.add_argument("--ollama-url", default="http://192.168.0.179:11434")
|
||||
parser.add_argument("--model", default="mortdecai:0.4.0")
|
||||
parser.add_argument("--rcon-host", default="192.168.0.112")
|
||||
parser.add_argument("--model", default="mortdecai:0.5.0")
|
||||
parser.add_argument("--rcon-host", default="192.168.0.244")
|
||||
parser.add_argument("--rcon-port", type=int, default=25578)
|
||||
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
|
||||
parser.add_argument("--rounds", type=int, default=30, help="Rounds per category")
|
||||
parser.add_argument("--categories", default="all", help="Comma-separated categories or 'all'")
|
||||
parser.add_argument("--prompt-bank", default="", help="JSONL file with extra prompts to mix in")
|
||||
parser.add_argument("--output", default="")
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -350,8 +531,23 @@ def main():
|
||||
|
||||
rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)
|
||||
|
||||
# Load prompts from template files (falls back to inline)
|
||||
prompts = load_prompts(PROMPTS_DIR)
|
||||
|
||||
# Load extra prompts from prompt bank (quarantine salvage, etc.)
|
||||
if args.prompt_bank:
|
||||
bank_path = Path(args.prompt_bank)
|
||||
if bank_path.exists():
|
||||
bank_prompts = []
|
||||
with open(bank_path) as f:
|
||||
for line in f:
|
||||
entry = json.loads(line)
|
||||
bank_prompts.append(entry["prompt"])
|
||||
prompts["prompt_bank"] = bank_prompts
|
||||
print(f" Loaded {len(bank_prompts)} prompts from {bank_path}")
|
||||
|
||||
if args.categories == "all":
|
||||
categories = list(PROMPTS.keys())
|
||||
categories = list(prompts.keys())
|
||||
else:
|
||||
categories = [c.strip() for c in args.categories.split(",")]
|
||||
|
||||
@@ -372,7 +568,7 @@ def main():
|
||||
|
||||
for cat in categories:
|
||||
player = random.choice(PLAYERS)
|
||||
example = run_round(cat, args.ollama_url, args.model, rcon, player)
|
||||
example = run_round(cat, args.ollama_url, args.model, rcon, player, prompts)
|
||||
|
||||
stats["total"] += 1
|
||||
if example is None:
|
||||
|
||||
Reference in New Issue
Block a user