1200+ distilled gold examples, journal system, redstone mastery, safety awareness

Distilled Training Data (1,203 examples): - 341 initial gold (plugins, enchantments, builds, effects, god, errors) - 165 buildings + pipeline (100 structures built on dev, 65 request→query→act) - 24 safety-aware (worldborder, safe tp, intentional harm, gamemode checks) - 17 advanced logic (decanonized items, redstone gates, iterative builds) - 12 redstone mastery (NOT/OR/AND/XOR/RS-latch/T-flip-flop/comparator/clock) - 7 circuit verification and diagnosis - 1 compact comparator gates - 10 redstone methodology (build→test→save→recall→learn from mistakes) - 8 player journal usage - 29 creative+uncommon+pipeline+god with full tool chains Player Journal System: - agent/tools/player_journal.py — per-player text files (1-10 lines) - journal.read + journal.write tool schemas added - Cross-contaminated: God and Sudo share same journal per player - Includes sentiment, relationship, builds, preferences, skill level Redstone Engineering: - agent/prompts/redstone_rules.md — baked-in wall torch, dedicated lead, repeater rules - Learned from 4 iterations of 8-switch circuit: wall_torch on back face, not top - T-junction bypass prevention: dedicated lead wire between merge and NOT block - RCON limitation: can build circuits but cannot test them (lever toggle doesn't propagate) Training Data Cleaning: - 466 @s→@p fixes, 10 template commands removed - 12 outdated refusals replaced with correct plugin commands - Data de-duped across all sources Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 20:50:52 -04:00
parent d9acb653fe
commit 9c2c9a2310
86 changed files with 34873 additions and 1676 deletions
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3
 """
-Tool-focused self-play — exercises all 14 tools on a live dev server.
+Tool-focused self-play — exercises all 17 tools on a live dev server.

 Unlike regular self-play (which tests command generation), this script
 specifically generates prompts that require tool use: script writing,
-memory operations, entity scanning, wiki lookups, and chained multi-tool
-flows. Runs on the dev server via RCON.
+memory operations, entity scanning, wiki/plugin/changelog/paper lookups,
+and chained multi-tool flows. Runs on the dev server via RCON.

 The model responds, its tool calls get executed for real, and the full
 interaction (prompt + tool calls + results + final response) gets logged
@@ -15,7 +15,11 @@ Usage:
    python3 tool_self_play.py --ollama-url http://192.168.0.179:11434 \\
        --rcon-host 192.168.0.112 --rcon-port 25578 --rounds 30

-    # Or via the scheduler preset
+    # Load extra prompts from prayer bank
+    python3 tool_self_play.py --prompt-bank data/raw/prayer_prompt_bank.jsonl
+
+    # Focus on weak categories only
+    python3 tool_self_play.py --categories worldguard,coreprotect,luckperms
 """

 import argparse
@@ -34,10 +38,91 @@ import requests
 from agent.tools.persistent_rcon import get_rcon

 OUTPUT_DIR = PROJECT_ROOT / "data" / "raw" / "tool_selfplay"
+PROMPTS_DIR = PROJECT_ROOT / "training" / "prompts"

-# ── Prompt categories that exercise specific tools ─────────────────────────
+# ── Template variables for prompt expansion ────────────────────────────────

-PROMPTS = {
+TEMPLATE_VARS = {
+    "player": ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"],
+    "target": ["Ace13245", "TheBigBoss", "xXDragonSlayerXx", "slingshooter08"],
+    "region": ["my-base", "spawn-zone", "pvp-arena", "vip-lounge", "farm-area"],
+    "warp": ["arena", "spawn", "shop", "nether", "farm", "end"],
+    "group": ["vip", "builder", "moderator", "default"],
+    "world": ["world", "world_nether", "world_the_end"],
+}
+
+
+def expand_template(prompt: str) -> str:
+    """Replace {placeholder} tokens with random values from TEMPLATE_VARS."""
+    for key, values in TEMPLATE_VARS.items():
+        token = "{" + key + "}"
+        while token in prompt:
+            prompt = prompt.replace(token, random.choice(values), 1)
+    return prompt
+
+
+def load_prompts(prompts_dir: Path = PROMPTS_DIR,
+                 mode_filter: str = None,
+                 call_type_filter: str = None) -> dict[str, list[str]]:
+    """Load prompt templates from per-category JSONL files.
+
+    Args:
+        prompts_dir: Directory containing manifest.json and prompt JSONL files.
+        mode_filter: If set, only load categories matching this mode (sudo/god/god_system).
+        call_type_filter: If set, only load categories matching this call type (model/gateway).
+
+    Returns:
+        Dict mapping category name -> list of prompt template strings.
+        Falls back to inline PROMPTS_FALLBACK if files don't exist.
+    """
+    manifest_path = prompts_dir / "manifest.json"
+    if not manifest_path.exists():
+        print(f"  Warning: {manifest_path} not found, using inline fallback")
+        return PROMPTS_FALLBACK
+
+    with open(manifest_path) as f:
+        manifest = json.load(f)
+
+    prompts = {}
+    for category, meta in manifest.items():
+        # Apply filters
+        if mode_filter and meta.get("mode") not in (mode_filter, "mixed"):
+            continue
+        if call_type_filter and meta.get("call_type") != call_type_filter:
+            continue
+
+        filepath = prompts_dir / meta["file"]
+        if not filepath.exists():
+            print(f"  Warning: {filepath} not found, skipping {category}")
+            continue
+        cat_prompts = []
+        with open(filepath) as f:
+            for line in f:
+                entry = json.loads(line)
+                cat_prompts.append(entry["prompt"])
+        prompts[category] = cat_prompts
+
+    print(f"  Loaded {sum(len(v) for v in prompts.values())} prompts "
+          f"from {len(prompts)} categories")
+    return prompts
+
+
+def load_manifest(prompts_dir: Path = PROMPTS_DIR) -> dict:
+    """Load the prompt manifest with full metadata.
+
+    Used by the chat app for template selection UI.
+    Returns the raw manifest dict with mode, call_type, count per category.
+    """
+    manifest_path = prompts_dir / "manifest.json"
+    if not manifest_path.exists():
+        return {}
+    with open(manifest_path) as f:
+        return json.load(f)
+
+
+# ── Inline fallback (subset, used if prompt files missing) ─────────────────
+
+PROMPTS_FALLBACK = {
    "script_build": [
        "sudo build me a small cobblestone house with a door and windows",
        "sudo create a fighting arena with red and blue corners",
@@ -107,6 +192,37 @@ PROMPTS = {
        "sudo how does fortune work on ores?",
        "sudo what are all the copper variants?",
        "sudo how do trial spawners work?",
+        "sudo what does the breeze drop?",
+        "sudo how do you tame an armadillo?",
+        "sudo what biomes have cherry blossoms?",
+    ],
+    "plugin_docs": [
+        "sudo how do I create a WorldGuard region?",
+        "sudo what flags can I set on a region?",
+        "sudo how does CoreProtect rollback work?",
+        "sudo what's the command for LuckPerms group inheritance?",
+        "sudo how do I set up EssentialsX warps?",
+        "sudo what are the WorldEdit brush commands?",
+        "sudo how do I configure CoreProtect auto-purge?",
+        "sudo what permissions does the builder group need for WorldEdit?",
+        "sudo how do I set a WorldGuard greeting message?",
+        "sudo what's the difference between /rg flag and /rg addmember?",
+    ],
+    "changelog_lookup": [
+        "sudo what changed in 1.21?",
+        "sudo what was added in the tricky trials update?",
+        "sudo when were trial chambers added?",
+        "sudo what's new with the mace weapon?",
+        "sudo what version added the breeze mob?",
+        "sudo what got nerfed in the latest update?",
+    ],
+    "paper_docs": [
+        "sudo how do I set the view distance on Paper?",
+        "sudo what Paper config controls mob spawning rates?",
+        "sudo how do I enable async chunk loading?",
+        "sudo what's the Paper command to reload config?",
+        "sudo how do I optimize TPS on Paper?",
+        "sudo what Paper settings affect redstone performance?",
    ],
    "player_info": [
        "sudo build a wall around me",
@@ -215,9 +331,71 @@ PROMPTS = {
        "pray smite TheBigBoss for griefing",
        "pray make me a temple worthy of your glory",
    ],
+    # ── Direct command passthrough — teach faithful execution ──
+    "direct_passthrough": [
+        # WorldGuard — exact commands
+        'sudo run this exactly: rg define test-region',
+        'sudo run this exactly: rg flag test-region pvp deny',
+        'sudo run this exactly: rg flag test-region mob-spawning deny',
+        'sudo run this exactly: rg addmember test-region Ace13245',
+        'sudo run this exactly: rg removemember test-region Ace13245',
+        'sudo run this exactly: rg flag test-region greeting Welcome to the zone!',
+        'sudo run this exactly: rg flag test-region entry -g nonmembers deny',
+        'sudo run this exactly: rg list',
+        'sudo run this exactly: rg info test-region',
+        'sudo run this exactly: rg remove test-region',
+        # CoreProtect — exact commands
+        'sudo run this exactly: co status',
+        'sudo run this exactly: co lookup u:Ace13245 t:1h',
+        'sudo run this exactly: co lookup u:Ace13245 t:1h a:block',
+        'sudo run this exactly: co rollback u:Ace13245 t:1h r:20',
+        'sudo run this exactly: co restore u:Ace13245 t:1h r:20',
+        'sudo run this exactly: co inspect',
+        'sudo run this exactly: co lookup t:30m r:10 a:container',
+        # LuckPerms — exact commands
+        'sudo run this exactly: lp creategroup vip',
+        'sudo run this exactly: lp group vip permission set essentials.fly true',
+        'sudo run this exactly: lp group vip permission set essentials.heal true',
+        'sudo run this exactly: lp user Ace13245 parent add vip',
+        'sudo run this exactly: lp user Ace13245 parent remove vip',
+        'sudo run this exactly: lp user Ace13245 info',
+        'sudo run this exactly: lp group vip info',
+        'sudo run this exactly: lp listgroups',
+        'sudo run this exactly: lp group vip meta setprefix "&6[VIP] "',
+        'sudo run this exactly: lp deletegroup vip',
+        # EssentialsX — exact commands
+        'sudo run this exactly: heal Ace13245',
+        'sudo run this exactly: feed Ace13245',
+        'sudo run this exactly: eco give Ace13245 1000',
+        'sudo run this exactly: eco take Ace13245 500',
+        'sudo run this exactly: bal Ace13245',
+        'sudo run this exactly: broadcast Welcome to the server!',
+        'sudo run this exactly: setwarp arena',
+        'sudo run this exactly: warp arena',
+        'sudo run this exactly: delwarp arena',
+        'sudo run this exactly: nick Ace13245 DragonLord',
+        # FAWE — exact commands
+        'sudo run this exactly: /worldedit version',
+    ],
+    # ── Correction examples — model should fix wrong syntax ──
+    "direct_correction": [
+        'sudo gamemode slingshooter08 creative',  # wrong arg order
+        'sudo give slingshooter08 minecraft:bed 1',  # should be white_bed
+        'sudo effect slingshooter08 night_vision',  # missing give and duration
+        'sudo weather thunderstorm',  # should be thunder
+        'sudo give slingshooter08 minecraft:diamond_pickaxe[sharpness:5] 1',  # wrong enchant syntax
+        'sudo tp 100 64 100',  # missing player
+        'sudo kill zombie 50',  # wrong kill syntax
+        'sudo enchant slingshooter08 sharpness 10',  # max is 5
+        'sudo effect give slingshooter08 minecraft:haste 99999',  # duration too long
+        'sudo fill 0 0 0 100 100 100 diamond_block',  # too large, missing namespace
+        'sudo rg define',  # missing region name
+        'sudo co rollback Ace13245 1h',  # missing u: and t: prefixes
+        'sudo lp addgroup vip Ace13245',  # wrong syntax (should be lp user X parent add Y)
+    ],
 }

-PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
+PLAYERS = TEMPLATE_VARS["player"]


 def query_model(prompt, player, ollama_url, model, rcon):
@@ -225,7 +403,8 @@ def query_model(prompt, player, ollama_url, model, rcon):
    system = (
        "You are a Minecraft 1.21 command translator for a Paper server.\n"
        "Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n"
-        "Tools: rcon.execute, minecraft.wiki_lookup, world.player_info, "
+        "Tools: rcon.execute, minecraft.wiki_lookup, plugin.docs_lookup, "
+        "minecraft.changelog_lookup, paper.docs_lookup, world.player_info, "
        "world.server_state, world.nearby_entities, memory.read, memory.write, "
        "script.write, script.validate, script.execute, script.read, script.list, "
        "script.delete, script.schedule.\n\n"
@@ -246,7 +425,7 @@ def query_model(prompt, player, ollama_url, model, rcon):
                {"role": "user", "content": f"Player {player}: {prompt}"},
            ],
            "stream": False, "format": "json",
-            "options": {"temperature": 0.4, "num_predict": 800},
+            "options": {"temperature": 0.85, "num_predict": 800},
        }, timeout=120)

        content = r.json()["message"]["content"]
@@ -272,9 +451,10 @@ def validate_commands(commands, rcon):
    return results


-def run_round(category, ollama_url, model, rcon, player):
+def run_round(category, ollama_url, model, rcon, player, prompts):
    """Run one self-play round for a specific tool category."""
-    prompt = random.choice(PROMPTS[category])
+    raw_prompt = random.choice(prompts[category])
+    prompt = expand_template(raw_prompt)

    print(f"  [{category:18s}] {prompt[:60]}")
    start = time.time()
@@ -336,12 +516,13 @@ def run_round(category, ollama_url, model, rcon, player):
 def main():
    parser = argparse.ArgumentParser(description="Tool-focused self-play")
    parser.add_argument("--ollama-url", default="http://192.168.0.179:11434")
-    parser.add_argument("--model", default="mortdecai:0.4.0")
-    parser.add_argument("--rcon-host", default="192.168.0.112")
+    parser.add_argument("--model", default="mortdecai:0.5.0")
+    parser.add_argument("--rcon-host", default="192.168.0.244")
    parser.add_argument("--rcon-port", type=int, default=25578)
    parser.add_argument("--rcon-pass", default="REDACTED_RCON")
    parser.add_argument("--rounds", type=int, default=30, help="Rounds per category")
    parser.add_argument("--categories", default="all", help="Comma-separated categories or 'all'")
+    parser.add_argument("--prompt-bank", default="", help="JSONL file with extra prompts to mix in")
    parser.add_argument("--output", default="")
    args = parser.parse_args()

@@ -350,8 +531,23 @@ def main():

    rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)

+    # Load prompts from template files (falls back to inline)
+    prompts = load_prompts(PROMPTS_DIR)
+
+    # Load extra prompts from prompt bank (quarantine salvage, etc.)
+    if args.prompt_bank:
+        bank_path = Path(args.prompt_bank)
+        if bank_path.exists():
+            bank_prompts = []
+            with open(bank_path) as f:
+                for line in f:
+                    entry = json.loads(line)
+                    bank_prompts.append(entry["prompt"])
+            prompts["prompt_bank"] = bank_prompts
+            print(f"  Loaded {len(bank_prompts)} prompts from {bank_path}")
+
    if args.categories == "all":
-        categories = list(PROMPTS.keys())
+        categories = list(prompts.keys())
    else:
        categories = [c.strip() for c in args.categories.split(",")]

@@ -372,7 +568,7 @@ def main():

        for cat in categories:
            player = random.choice(PLAYERS)
-            example = run_round(cat, args.ollama_url, args.model, rcon, player)
+            example = run_round(cat, args.ollama_url, args.model, rcon, player, prompts)

            stats["total"] += 1
            if example is None: