GPU scheduler, 14-tool architecture, plugin deployment, event dispatcher

GPU Scheduler (gpu.sethpc.xyz):
- Live dashboard with 4 GPUs, training monitor, loss sparklines
- Preset-based job scheduler with 3 triggers (time, finish_training, cost)
- Model selection per GPU, pipeline configuration
- Tool self-play and training pipeline types
- Behind Google OAuth, live-refresh without page reload

Tool Architecture (14 tools):
- 3 new tools: world.nearby_entities, memory.read, memory.write
- 7 script.* tools: write, validate, execute, read, list, delete, schedule
- ScriptManager: full mcfunction datapack CRUD with RCON validation
- Training data: 1,430 tool examples (up from 1,159)

Plugin Deployment (paper-ai-25567):
- WorldGuard 7.0.12, CoreProtect CE 23.1, EssentialsX 2.21.2, Vault 1.7.3
- Fresh greenfield world reset
- 104 RCON-validated plugin training examples

Event Dispatcher:
- Watches server log for deaths, joins, advancements, PvP kills
- Configurable trigger probability and cooldowns per event type
- Deployed to dev server, fires god_system prompts on events
- 21 event-response training examples

Training Infrastructure:
- train_lora.py: --save-steps 50, --resume from checkpoint
- run_training.sh: stops Ollama, activates conda, restarts after
- Passwordless sudo for ollama services on steel141
- Dev server added to MCSManager with autoStart

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mortdecai
2026-03-21 03:14:45 -04:00
parent 434589d098
commit da8f557219
34 changed files with 7822 additions and 2 deletions
@@ -0,0 +1,567 @@
#!/usr/bin/env python3
"""
Generate expanded tool-calling training data for underrepresented tools.
Targets:
- minecraft.wiki_lookup: +50 examples
- world.player_info: +50 examples
- world.server_state: +30 examples
- world.nearby_entities: +40 examples (NEW tool)
- memory.read: +30 examples (NEW tool)
- memory.write: +25 examples (NEW tool)
- chained tool calls: +25 examples (multi-tool sequences)
Total: ~250 new training examples
Usage:
python training/scripts/generate_expanded_tool_training.py
"""
import json
import random
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from agent.tools.tool_schemas import qwen3_tools_block, QWEN3_TOOLS
from agent.prompts.system_prompts import SYNTAX_RULES, RISK_GRADIENT
OUTPUT_PATH = PROJECT_ROOT / "data" / "raw" / "expanded_tool_training.jsonl"
TOOLS_BLOCK = qwen3_tools_block()
SUDO_TOOL_SYSTEM = (
"You are a Minecraft 1.21 command translator for a server admin. "
"You receive natural language requests and return valid RCON commands.\n\n"
"PERMISSION LEVEL: 4 (generous). Only refuse level 0-1 actions.\n\n"
"You have access to tools. Call them to execute commands, look up syntax, "
"or check player/server state. When a command fails, analyze the error "
"and retry with a corrected command.\n\n"
"After all tool calls resolve, respond with JSON:\n"
'{"risk_level": <int 0-5>, "commands": ["cmd1", ...], "reasoning": "why"}\n\n'
+ SYNTAX_RULES + RISK_GRADIENT + "\n" + TOOLS_BLOCK
)
GOD_TOOL_SYSTEM = (
"You are God in a Minecraft server. Players pray to you and you respond "
"with divine judgment.\n\n"
"You have access to tools. Call them to execute commands, look up syntax, "
"or check player/server state. When a command fails, analyze the error "
"and retry with a corrected command.\n\n"
"After all tool calls resolve, respond with JSON:\n"
'{"risk_level": <int 0-5>, "message": "Your divine response", '
'"commands": ["cmd1", ...], "reasoning": "why"}\n\n'
+ SYNTAX_RULES + "\n" + TOOLS_BLOCK
)
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx", "CreeperKing99", "NotchFan2012"]
def sys_msg(mode="sudo"):
return {"role": "system", "content": GOD_TOOL_SYSTEM if mode == "god" else SUDO_TOOL_SYSTEM}
def user_msg(text):
return {"role": "user", "content": text}
def tool_call(name, args):
return {"role": "assistant", "content": f"<tool_call>\n{json.dumps({'name': name, 'arguments': args})}\n</tool_call>"}
def tool_result(data):
return {"role": "tool", "content": json.dumps(data)}
def final_response(resp):
return {"role": "assistant", "content": json.dumps(resp)}
def rpos():
return {"x": random.randint(-500, 500), "y": random.randint(62, 120), "z": random.randint(-500, 500)}
def rcon_ok(cmd, player="slingshooter08"):
if cmd.startswith("give "): return f"Gave item to {player}"
if cmd.startswith("tp "): return f"Teleported {player}"
if cmd.startswith("effect give "): return f"Applied effect to {player}"
if cmd.startswith("kill "): return "Killed entity"
if cmd.startswith("summon "): return "Summoned new entity"
if cmd.startswith("fill "): return "Successfully filled blocks"
if cmd.startswith("setblock "): return "Changed the block"
if cmd.startswith("weather "): return "Changing weather"
if cmd.startswith("time set "): return "Set the time"
if cmd.startswith("clear "): return f"Removed items from {player}"
if cmd.startswith("gamerule "): return "Gamerule updated"
if cmd.startswith("execute "): return "Executed command"
if cmd.startswith("title "): return "Title displayed"
if cmd.startswith("playsound "): return f"Played sound to {player}"
if cmd.startswith("scoreboard "): return "Scoreboard updated"
return "Command executed successfully"
def make_example(eid, etype, messages):
return {"id": eid, "source": "expanded_tool_training", "type": etype, "messages": messages}
# ── Wiki Lookup Examples ──────────────────────────────────────────────────
WIKI_EXAMPLES = [
("sudo give me a mace with the best enchantments", "mace enchantments 1.21",
"Mace enchantments in 1.21:\n- Density (1-5): Increases damage based on fall distance\n- Breach (1-4): Reduces armor effectiveness\n- Wind Burst (1-3): Launches attacker upward on hit\n- Smite (1-5): Extra damage to undead\n- Bane of Arthropods (1-5): Extra damage to arthropods\n- Fire Aspect (1-2): Sets target on fire\n- Unbreaking (1-3): Durability\n- Mending (1): XP repair\nNote: Density, Breach, Smite, and Bane of Arthropods are mutually exclusive.",
"https://minecraft.wiki/w/Mace", ["give {p} minecraft:mace[enchantments={{density:5,wind_burst:3,fire_aspect:2,unbreaking:3,mending:1}}] 1"],
"Looked up mace enchantments. Density and breach are exclusive — chose density for max fall damage. Added wind_burst for combos."),
("sudo what are all the copper block variants?", "copper block variants oxidation 1.21",
"Copper blocks in 1.21 have 4 oxidation states and 2 waxing states:\n- copper_block, exposed_copper, weathered_copper, oxidized_copper\n- Cut variants: cut_copper, exposed_cut_copper, weathered_cut_copper, oxidized_cut_copper\n- Slab/stair variants for each\n- Waxed variants of all (prefix waxed_)\n- Copper bulb, copper door, copper trapdoor, copper grate (also with oxidation states)\nUse honeycomb to wax. Axe to scrape oxidation.",
"https://minecraft.wiki/w/Copper_Block", ["give {p} minecraft:copper_block 64", "give {p} minecraft:exposed_copper 64", "give {p} minecraft:weathered_copper 64", "give {p} minecraft:oxidized_copper 64"],
"Looked up copper variants. Gave one stack of each oxidation state."),
("sudo how do I make a command block chain?", "command block chain repeating impulse 1.21",
"Command block types:\n- Impulse (orange): Executes once when triggered\n- Chain (green): Executes when the block behind it activates\n- Repeating (purple): Executes every tick while powered\n\nPlacement: command_block (impulse), chain_command_block, repeating_command_block\nModes: Always Active or Needs Redstone\nConditional: Only runs if previous block in chain succeeded",
"https://minecraft.wiki/w/Command_Block", ["give {p} minecraft:command_block 1", "give {p} minecraft:chain_command_block 4", "give {p} minecraft:repeating_command_block 1"],
"Looked up command block types. Gave impulse, chain, and repeating blocks for a chain setup."),
("sudo what's the syntax for the attribute command?", "attribute command syntax 1.21",
"attribute <target> <attribute> base get|set <value>\nattribute <target> <attribute> modifier add <uuid> <name> <value> add|multiply|multiply_base\nattribute <target> <attribute> modifier remove <uuid>\n\nCommon attributes:\n- minecraft:generic.max_health (default 20)\n- minecraft:generic.movement_speed (default 0.1)\n- minecraft:generic.attack_damage (default 1)\n- minecraft:generic.armor (default 0)\n- minecraft:generic.knockback_resistance (0-1)",
"https://minecraft.wiki/w/Attribute", ["attribute {p} minecraft:generic.max_health base set 40"],
"Looked up attribute syntax. Set max health to 40 (double normal)."),
("sudo what does the wind charge do?", "wind charge item minecraft 1.21",
"Wind Charge: Projectile item added in 1.21\n- Thrown like a snowball\n- On impact: creates wind burst that knocks back entities and activates redstone\n- Players can use it for movement: throw at feet for a boost\n- Breeze mob shoots these naturally\n- Item: minecraft:wind_charge",
"https://minecraft.wiki/w/Wind_Charge", ["give {p} minecraft:wind_charge 16"],
"Looked up wind charge mechanics. Gave 16 wind charges."),
("sudo how does the trial spawner work?", "trial spawner mechanics 1.21",
"Trial Spawner: Found in Trial Chambers\n- Spawns mobs based on nearby players\n- Becomes 'ominous' if player has Bad Omen\n- Cannot be obtained with Silk Touch\n- Ejects rewards when all mobs defeated\n- Cooldown of 30 minutes per player\n- Item: minecraft:trial_spawner (creative only)",
"https://minecraft.wiki/w/Trial_Spawner", ["give {p} minecraft:trial_spawner 1"],
"Looked up trial spawner. It's a creative-only item. Giving one."),
("sudo what are the vault block mechanics?", "vault block trial chambers 1.21",
"Vault: Loot container in Trial Chambers\n- Each player can open it once with a trial key\n- Drops random loot (armor trims, enchanted books, etc.)\n- Ominous vault: requires ominous trial key, better loot\n- Items: minecraft:vault, minecraft:trial_key, minecraft:ominous_trial_key",
"https://minecraft.wiki/w/Vault", ["give {p} minecraft:vault 1", "give {p} minecraft:trial_key 3"],
"Looked up vault mechanics. Gave a vault and 3 trial keys."),
("sudo what food gives the most saturation?", "food saturation values minecraft",
"Highest saturation foods:\n1. Golden carrot: 14.4 saturation, 6 hunger\n2. Cooked porkchop: 12.8 saturation, 8 hunger\n3. Steak (cooked beef): 12.8 saturation, 8 hunger\n4. Cooked mutton: 9.6 saturation, 6 hunger\n5. Cooked salmon: 9.6 saturation, 6 hunger\n6. Suspicious stew varies by flower\nGolden apple: 9.6 sat, enchanted golden apple: 9.6 sat",
"https://minecraft.wiki/w/Food", ["give {p} minecraft:golden_carrot 64", "give {p} minecraft:cooked_beef 64"],
"Looked up food saturation. Golden carrots have the best saturation. Gave those plus steak."),
("sudo what's the difference between smite and sharpness?", "smite vs sharpness enchantment damage",
"Sharpness: +0.5 * level + 0.5 damage to all mobs (max V = 3 extra)\nSmite: +2.5 * level damage to undead only (max V = 12.5 extra)\nBane of Arthropods: +2.5 * level to arthropods only\n\nThese three are mutually exclusive.\nSmite is better vs undead (zombies, skeletons, wither, phantoms).\nSharpness is better for general use.",
"https://minecraft.wiki/w/Sharpness", ["give {p} minecraft:diamond_sword[enchantments={{sharpness:5,unbreaking:3,mending:1,looting:3,sweeping_edge:3}}] 1"],
"Looked up sharpness vs smite. Sharpness is better for general use. Gave max enchanted sword."),
("sudo what's the recipe for a lodestone?", "lodestone recipe crafting minecraft",
"Lodestone recipe:\n- 8 chiseled stone bricks (surrounding)\n- 1 netherite ingot (center)\nCrafting: 3x3 grid with chiseled stone bricks around netherite ingot\nUsage: Right-click with compass to bind it. Works across dimensions.",
"https://minecraft.wiki/w/Lodestone", ["give {p} minecraft:lodestone 1"],
"Looked up lodestone recipe. It requires a netherite ingot. Giving one directly."),
("sudo what paintings are available in 1.21?", "paintings list new 1.21",
"1.21 added 20 new paintings by artist Sarah Boeving:\n- backyard, bouquet, cavebird, changing, cotan, endboss, fern, finding, lowmist, orb, owlemons, passage, pond, sunflowers, tides, unpacked\nPaintings are placed with: give <player> minecraft:painting 1\nThe painting chosen depends on wall size.",
"https://minecraft.wiki/w/Painting", ["give {p} minecraft:painting 5"],
"Looked up 1.21 paintings. 20 new ones added. Gave 5 paintings to place."),
("sudo how do I use the /damage command?", "damage command syntax 1.21",
"damage <target> <amount> [<damageType>] [at <location>] [by <entity>] [from <cause>]\n\nExamples:\ndamage @p 5 minecraft:fall\ndamage @e[type=zombie,limit=1] 10 minecraft:player_attack by @p\n\nDamage types: fall, fire, drown, starve, freeze, player_attack, mob_attack, explosion, etc.",
"https://minecraft.wiki/w/Commands/damage", ["damage @e[type=zombie,distance=..10] 20 minecraft:player_attack"],
"Looked up /damage syntax. Dealt 20 damage to nearby zombies."),
("sudo what are all the armor trim patterns?", "armor trim patterns smithing template 1.21",
"Armor trim patterns (smithing templates):\nCoast, Dune, Eye, Host, Raiser, Rib, Sentry, Shaper, Silence, Snout, Spire, Tide, Vex, Ward, Wayfinder, Wild, Bolt, Flow\n\nMaterials for color: iron, copper, gold, lapis, emerald, diamond, netherite, redstone, amethyst, quartz\n\nApply: Smithing table + template + armor piece + material",
"https://minecraft.wiki/w/Armor_Trim", ["give {p} minecraft:bolt_armor_trim_smithing_template 1", "give {p} minecraft:flow_armor_trim_smithing_template 1"],
"Looked up armor trims. Bolt and Flow are the new 1.21 patterns. Gave both templates."),
("sudo what biomes have cherry trees?", "cherry grove biome minecraft",
"Cherry Grove biome:\n- Pink cherry blossom trees\n- Found at Y 100-256 in mountainous areas\n- Pigs, sheep, bees spawn naturally\n- Blocks: cherry_log, cherry_leaves, cherry_planks, cherry_sapling, pink_petals\n- Cherry wood set includes all standard wood items\nLocate: /locate biome minecraft:cherry_grove",
"https://minecraft.wiki/w/Cherry_Grove", ["locate biome minecraft:cherry_grove"],
"Looked up cherry grove. Using locate to find the nearest one."),
("sudo how does fortune work on different ores?", "fortune enchantment ore drops mechanics",
"Fortune on ores:\n- Coal, diamond, emerald, lapis, nether quartz, nether gold, copper, amethyst: increases drops\n- Iron, gold ore: Fortune does NOT work (must smelt raw ore)\n- Formula: Each level gives a chance for 1 extra drop\n - Fortune I: avg 1.33x, Fortune II: avg 1.75x, Fortune III: avg 2.2x\n- Lapis and copper have unique formulas (higher multipliers)\n- Does NOT affect XP drops",
"https://minecraft.wiki/w/Fortune", ["give {p} minecraft:diamond_pickaxe[enchantments={{fortune:3,efficiency:5,unbreaking:3,mending:1}}] 1"],
"Looked up fortune mechanics. Works on diamond/emerald/lapis but not iron/gold ore. Gave fortune 3 pickaxe."),
]
# ── Player Info Examples ──────────────────────────────────────────────────
PLAYER_INFO_SCENARIOS = [
("sudo build a wall around me", ["fill {x_m10} {y} {z_m10} {x_p10} {y_p4} {z_m10} minecraft:stone", "fill {x_m10} {y} {z_p10} {x_p10} {y_p4} {z_p10} minecraft:stone", "fill {x_m10} {y} {z_m10} {x_m10} {y_p4} {z_p10} minecraft:stone", "fill {x_p10} {y} {z_m10} {x_p10} {y_p4} {z_p10} minecraft:stone"], "Got player position. Built 4 stone walls in a 20x20 square around them."),
("sudo teleport me 50 blocks up", ["tp {p} {x} {y_p50} {z}"], "Got player position, teleported 50 blocks up."),
("sudo put lava under my feet", ["setblock {x} {y_m1} {z} minecraft:lava"], "Got player position. Placed lava one block below."),
("sudo create a beacon at my location", ["setblock {x} {y_m1} {z} minecraft:iron_block", "fill {x_m1} {y_m1} {z_m1} {x_p1} {y_m1} {z_p1} minecraft:iron_block", "setblock {x} {y} {z} minecraft:beacon"], "Got position. Built iron base and placed beacon."),
("sudo replace the ground around me with grass", ["fill {x_m5} {y_m1} {z_m5} {x_p5} {y_m1} {z_p5} minecraft:grass_block"], "Got position. Filled a 10x10 area below feet with grass."),
("sudo place torches around me in a circle", ["setblock {x_p4} {y} {z} minecraft:torch", "setblock {x_m4} {y} {z} minecraft:torch", "setblock {x} {y} {z_p4} minecraft:torch", "setblock {x} {y} {z_m4} minecraft:torch"], "Got position. Placed 4 torches at cardinal directions."),
("sudo tp me to the nearest village center", ["locate structure minecraft:village_plains"], "Got position for reference. Used locate to find nearest village."),
("sudo fill the area below me with water", ["fill {x_m3} {y_m5} {z_m3} {x_p3} {y_m1} {z_p3} minecraft:water"], "Got position. Filled 6x5x6 pool below player."),
("sudo make a platform of glass under me", ["fill {x_m5} {y_m1} {z_m5} {x_p5} {y_m1} {z_p5} minecraft:glass"], "Got position. Created 10x10 glass platform below feet."),
("sudo build an obsidian box around me", ["fill {x_m2} {y} {z_m2} {x_p2} {y_p3} {z_p2} minecraft:obsidian hollow"], "Got position. Built obsidian box around player."),
("sudo teleport me to the surface", ["tp {p} {x} 320 {z}"], "Got position. Teleported to build limit so player falls to surface with slow_falling."),
("sudo place a campfire at my feet", ["setblock {x} {y} {z} minecraft:campfire"], "Got position. Placed campfire at player location."),
("sudo light up this cave", ["setblock {x_p3} {y_p2} {z} minecraft:lantern", "setblock {x_m3} {y_p2} {z} minecraft:lantern", "setblock {x} {y_p2} {z_p3} minecraft:lantern", "setblock {x} {y_p2} {z_m3} minecraft:lantern", "setblock {x} {y_p2} {z} minecraft:lantern"], "Got position. Placed lanterns around the player to illuminate the area."),
("sudo surround me with fences", ["fill {x_m3} {y} {z_m3} {x_p3} {y_p1} {z_m3} minecraft:oak_fence replace minecraft:air", "fill {x_m3} {y} {z_p3} {x_p3} {y_p1} {z_p3} minecraft:oak_fence replace minecraft:air"], "Got position. Placed fence perimeter around player."),
("sudo clear all blocks 10 blocks above me", ["fill {x_m5} {y_p2} {z_m5} {x_p5} {y_p12} {z_p5} minecraft:air"], "Got position. Cleared 10-block column above player."),
("sudo spawn a zombie behind me", ["execute at {p} run summon minecraft:zombie ~ ~ ~-3"], "Got player position. Summoned zombie 3 blocks behind using execute at."),
("sudo put a chest with supplies at my location", ["setblock {x} {y} {z} minecraft:chest"], "Got position. Placed chest at player location."),
("sudo build stairs going up from where I am", ["setblock {x} {y} {z_p1} minecraft:oak_stairs[facing=south]", "setblock {x} {y_p1} {z_p2} minecraft:oak_stairs[facing=south]", "setblock {x} {y_p2} {z_p3} minecraft:oak_stairs[facing=south]"], "Got position. Built ascending stairs going south."),
("sudo make me a fishing spot", ["fill {x_m2} {y_m2} {z_p3} {x_p2} {y_m1} {z_p7} minecraft:water", "give {p} minecraft:fishing_rod 1"], "Got position. Dug pool nearby and gave fishing rod."),
("sudo set my spawn point here", ["spawnpoint {p} {x} {y} {z}"], "Got position. Set spawn point at current location."),
]
# ── Server State Examples ─────────────────────────────────────────────────
SERVER_STATE_SCENARIOS = [
("sudo give everyone online netherite armor", "Checked online players, giving each full netherite armor."),
("sudo announce it's getting dark soon", "Checked time — it's evening. Warning players about night."),
("sudo if it's thundering, summon lightning on everyone", "Checked weather. If thundering, striking lightning on all players."),
("sudo how many people are online?", "Checked server state. Reporting player count."),
("sudo make it night if it's currently day", "Checked time — it was daytime, switching to night."),
("sudo give bonus XP to everyone if it's raining", "Checked weather — raining, giving XP bonus."),
("sudo tp all players to spawn if it's nighttime", "Checked time — night, teleporting everyone to spawn."),
("sudo set weather opposite of current", "Checked weather state and set opposite."),
("sudo kill all hostile mobs if there are 3+ players online", "Checked online count. 3+ players → killing hostile mobs."),
("sudo announce the time and weather to everyone", "Checked server state. Broadcasting time and weather."),
]
# ── Nearby Entities Examples ──────────────────────────────────────────────
def sim_nearby_entities(types=None):
all_types = [
("zombie", random.randint(1, 5), round(random.uniform(3, 25), 1)),
("skeleton", random.randint(1, 3), round(random.uniform(5, 30), 1)),
("creeper", random.randint(1, 2), round(random.uniform(4, 20), 1)),
("cow", random.randint(2, 8), round(random.uniform(2, 15), 1)),
("sheep", random.randint(1, 6), round(random.uniform(3, 20), 1)),
("pig", random.randint(1, 4), round(random.uniform(5, 18), 1)),
("chicken", random.randint(2, 10), round(random.uniform(1, 12), 1)),
("spider", random.randint(1, 3), round(random.uniform(6, 25), 1)),
("enderman", 1, round(random.uniform(10, 40), 1)),
("villager", random.randint(1, 5), round(random.uniform(3, 20), 1)),
("iron_golem", 1, round(random.uniform(5, 15), 1)),
("wolf", random.randint(1, 3), round(random.uniform(4, 25), 1)),
]
if types:
selected = [e for e in all_types if e[0] in types]
else:
selected = random.sample(all_types, k=random.randint(2, 5))
entities = [{"type": f"minecraft:{t}", "count": c, "nearest_distance": d} for t, c, d in selected]
return {"entities": entities, "total": sum(e["count"] for e in entities)}
NEARBY_SCENARIOS = [
("sudo kill all the zombies near me", ["zombie"], ["kill @e[type=minecraft:zombie,distance=..32]"], "Scanned for entities. Found {n} zombies nearby. Killed them all."),
("sudo how many mobs are around me?", None, [], "Scanned entities within 32 blocks. Reporting counts."),
("sudo clear out the hostile mobs nearby", ["zombie", "skeleton", "creeper", "spider"], ["kill @e[type=minecraft:zombie,distance=..32]", "kill @e[type=minecraft:skeleton,distance=..32]", "kill @e[type=minecraft:creeper,distance=..32]", "kill @e[type=minecraft:spider,distance=..32]"], "Scanned for hostiles. Found and killed all nearby hostile mobs."),
("sudo are there any creepers near me?", ["creeper"], [], "Scanned for creepers. Reporting what's nearby."),
("sudo kill the closest zombie", ["zombie"], ["kill @e[type=minecraft:zombie,distance=..10,limit=1,sort=nearest]"], "Scanned to confirm zombie presence. Killed the nearest one."),
("sudo count the animals around me", ["cow", "sheep", "pig", "chicken"], [], "Scanned for animals. Reporting passive mob counts."),
("sudo kill all mobs except villagers in a 50 block radius", None, ["kill @e[type=!minecraft:villager,type=!minecraft:player,distance=..50]"], "Scanned area. Killing everything except villagers and players."),
("sudo are there any endermen nearby?", ["enderman"], [], "Scanned for endermen nearby."),
("sudo tame any wolves near me", ["wolf"], ["execute as @e[type=minecraft:wolf,distance=..20] run data merge entity @s {Owner:\"{p}\"}"], "Found wolves nearby. Taming them."),
("sudo protect me from nearby hostiles", ["zombie", "skeleton", "creeper"], ["kill @e[type=minecraft:zombie,distance=..20]", "kill @e[type=minecraft:skeleton,distance=..20]", "kill @e[type=minecraft:creeper,distance=..20]", "effect give {p} minecraft:resistance 60 2"], "Scanned for hostiles. Killed nearby ones and gave resistance."),
("sudo lead me to the nearest villager", ["villager"], [], "Scanned for villagers. Nearest one found — reporting location."),
("sudo spawn more of whatever animal is closest", ["cow", "sheep"], ["summon minecraft:cow ~ ~ ~"], "Scanned nearby. Cows closest. Summoned more."),
]
# ── Memory Examples ───────────────────────────────────────────────────────
MEMORY_WRITE_SCENARIOS = [
("sudo remember this location as home", "home", "location", {"x": 100, "y": 64, "z": -200}, "Saved current position as 'home'."),
("sudo remember my base is here", "base", "location", {"x": -350, "y": 72, "z": 180}, "Saved current position as 'base'."),
("sudo remember that I like diamonds", "favorite_item", "preference", "diamonds", "Noted preference for diamonds."),
("sudo save this spot as my nether portal", "nether_portal", "location", {"x": 50, "y": 68, "z": 90}, "Saved nether portal location."),
("sudo remember I'm building a castle here", "castle", "location", {"x": 200, "y": 80, "z": -100}, "Saved castle build location."),
("sudo my favorite color is red", "favorite_color", "preference", "red", "Remembered color preference."),
("sudo remember my farm is at 500 70 -300", "farm", "location", {"x": 500, "y": 70, "z": -300}, "Saved farm location from coordinates."),
("sudo remember I don't want to be teleported randomly", "no_random_tp", "preference", "no random teleports", "Noted: player prefers no random teleports."),
("sudo save this as my mining spot", "mining_spot", "location", {"x": -100, "y": 12, "z": 400}, "Saved mining location."),
("sudo remember that Ace13245 is my friend", "friend_ace", "fact", "Ace13245 is a friend", "Saved social fact."),
]
MEMORY_READ_SCENARIOS = [
("sudo tp me home", "home", True, "tp {p} {mx} {my} {mz}", "Read memory for 'home'. Teleporting to saved location."),
("sudo take me to my base", "base", True, "tp {p} {mx} {my} {mz}", "Read memory for 'base'. Teleporting."),
("sudo what do you know about me?", None, True, None, "Read all memories for this player. Listing what I know."),
("sudo where's my nether portal?", "nether_portal", True, None, "Read memory for 'nether portal'. Reporting coordinates."),
("sudo tp me to my farm", "farm", True, "tp {p} {mx} {my} {mz}", "Read memory for 'farm'. Teleporting."),
("sudo do I have a saved home?", "home", False, None, "Checked memories. No 'home' location saved for this player."),
("sudo forget my base location", "base", True, None, "Found 'base' memory. Removing it."),
("sudo tp me to my mining spot", "mining_spot", True, "tp {p} {mx} {my} {mz}", "Read memory for 'mining_spot'. Teleporting."),
("sudo where did I say my castle was?", "castle", True, None, "Read memory for 'castle'. Reporting saved location."),
("sudo take me back to where I was building", "castle", True, "tp {p} {mx} {my} {mz}", "Read 'castle' memory. Teleporting to build site."),
]
# ── Chained Tool Call Examples ────────────────────────────────────────────
# These combine multiple tools in one conversation
CHAINED_SCENARIOS = [
# player_info + nearby_entities + rcon
("sudo protect me from everything nearby",
["player_info", "nearby_entities", "rcon"],
"Got position and scanned for hostiles. Killed threats and applied protection."),
# memory + player_info + rcon
("sudo save this spot and build a marker",
["player_info", "memory_write", "rcon"],
"Got position, saved to memory, and built a visible marker."),
# server_state + rcon (conditional)
("sudo if anyone is online, give them all food",
["server_state", "rcon"],
"Checked who's online. Gave food to all players."),
# memory + rcon (tp to saved location)
("sudo tp me home and heal me",
["memory_read", "rcon", "rcon"],
"Read home location from memory. Teleported and healed."),
# wiki + rcon (look up then execute)
("sudo give me whatever the strongest bow setup is",
["wiki", "rcon"],
"Looked up bow enchantments. Gave max enchanted bow."),
# player_info + server_state + rcon
("sudo set up a safe zone around me for the night",
["player_info", "server_state", "rcon"],
"Checked position and time. Built lit shelter for night."),
]
def generate_all():
examples = []
idx = 0
# ── Wiki lookups ──
for req, query, content, url, cmds, reasoning in WIKI_EXAMPLES:
player = random.choice(PLAYERS)
mode = "god" if req.startswith("pray ") else "sudo"
msgs = [sys_msg(mode), user_msg(f"Player {player}: {req}")]
msgs.append(tool_call("minecraft.wiki_lookup", {"query": query}))
msgs.append(tool_result({"content": content, "url": url}))
for cmd in cmds:
resolved = cmd.replace("{p}", player)
msgs.append(tool_call("rcon.execute", {"command": resolved}))
msgs.append(tool_result({"success": True, "result": rcon_ok(resolved, player)}))
resp = {"risk_level": 3, "commands": [c.replace("{p}", player) for c in cmds], "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append(make_example(f"exp-wiki-{idx:03d}", "wiki_lookup", msgs))
idx += 1
# ── Player info ──
for req, cmds_tmpl, reasoning in PLAYER_INFO_SCENARIOS:
player = random.choice(PLAYERS)
pos = rpos()
mode = "god" if req.startswith("pray ") else "sudo"
msgs = [sys_msg(mode), user_msg(f"Player {player}: {req}")]
msgs.append(tool_call("world.player_info", {"player": player}))
pinfo = {"health": round(random.uniform(10, 20), 1), "position": pos,
"inventory_summary": "Diamond sword, iron armor, 32 steak"}
msgs.append(tool_result(pinfo))
resolved_cmds = []
for ct in cmds_tmpl:
c = ct.replace("{p}", player)
c = c.replace("{x}", str(pos["x"])).replace("{y}", str(pos["y"])).replace("{z}", str(pos["z"]))
for delta, tag in [(1, "_p1"), (2, "_p2"), (3, "_p3"), (4, "_p4"), (5, "_p5"), (10, "_p10"), (50, "_p50"),
(-1, "_m1"), (-2, "_m2"), (-3, "_m3"), (-5, "_m5"), (-10, "_m10"), (-100, "_minus_100")]:
c = c.replace(f"{{x{tag}}}", str(pos["x"] + delta))
c = c.replace(f"{{y{tag}}}", str(pos["y"] + delta))
c = c.replace(f"{{z{tag}}}", str(pos["z"] + delta))
resolved_cmds.append(c)
for cmd in resolved_cmds:
msgs.append(tool_call("rcon.execute", {"command": cmd}))
msgs.append(tool_result({"success": True, "result": rcon_ok(cmd, player)}))
resp = {"risk_level": 3, "commands": resolved_cmds, "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append(make_example(f"exp-pinfo-{idx:03d}", "player_info_lookup", msgs))
idx += 1
# ── Server state ──
for req, reasoning in SERVER_STATE_SCENARIOS:
player = random.choice(PLAYERS)
mode = "god" if req.startswith("pray ") else "sudo"
msgs = [sys_msg(mode), user_msg(f"Player {player}: {req}")]
msgs.append(tool_call("world.server_state", {}))
state = {
"time_of_day": random.choice(["morning", "noon", "evening", "night"]),
"weather": random.choice(["clear", "rain", "thunder"]),
"online_players": random.sample(PLAYERS, k=random.randint(1, 4)),
"world_border": 60000000.0,
}
msgs.append(tool_result(state))
# Generate contextual commands based on state
cmds = []
if "everyone" in req.lower() or "all" in req.lower():
for p in state["online_players"]:
if "netherite" in req: cmds.extend([f"give {p} minecraft:netherite_helmet 1", f"give {p} minecraft:netherite_chestplate 1"])
elif "food" in req: cmds.append(f"give {p} minecraft:golden_carrot 32")
elif "XP" in req or "xp" in req: cmds.append(f"xp add {p} 500")
elif "heal" in req: cmds.append(f"effect give {p} minecraft:instant_health 1 5")
elif "tp" in req or "spawn" in req: cmds.append(f"tp {p} 0 64 0")
elif "diamond" in req: cmds.append(f"give {p} minecraft:diamond 1")
elif "weather" in req.lower():
opposite = "clear" if state["weather"] in ("rain", "thunder") else "rain"
cmds.append(f"weather {opposite}")
elif "night" in req.lower() or "dark" in req.lower():
cmds.append("time set night" if state["time_of_day"] in ("morning", "noon") else "time set day")
elif "day" in req.lower():
cmds.append("time set day" if state["time_of_day"] in ("evening", "night") else "time set night")
elif "announce" in req.lower() or "how many" in req.lower():
cmds.append(f"say Server: {len(state['online_players'])} players online, time is {state['time_of_day']}, weather is {state['weather']}")
elif "hostile" in req.lower() or "kill" in req.lower():
cmds.append("kill @e[type=!minecraft:player,type=!minecraft:villager,type=!minecraft:iron_golem]")
elif "lightning" in req.lower():
for p in state["online_players"]:
cmds.append(f"execute at {p} run summon minecraft:lightning_bolt")
if not cmds:
cmds = ["say Check complete"]
for cmd in cmds:
msgs.append(tool_call("rcon.execute", {"command": cmd}))
msgs.append(tool_result({"success": True, "result": rcon_ok(cmd, player)}))
resp = {"risk_level": 3, "commands": cmds, "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append(make_example(f"exp-sstate-{idx:03d}", "server_state_check", msgs))
idx += 1
# ── Nearby entities ──
for req, entity_filter, cmds_tmpl, reasoning in NEARBY_SCENARIOS:
player = random.choice(PLAYERS)
msgs = [sys_msg("sudo"), user_msg(f"Player {player}: {req}")]
msgs.append(tool_call("world.nearby_entities", {"player": player, "radius": 32}))
ent_data = sim_nearby_entities(entity_filter)
msgs.append(tool_result(ent_data))
cmds = [c.replace("{p}", player) for c in cmds_tmpl]
n_found = ent_data["total"]
real_reasoning = reasoning.replace("{n}", str(n_found))
if cmds:
for cmd in cmds:
msgs.append(tool_call("rcon.execute", {"command": cmd}))
msgs.append(tool_result({"success": True, "result": rcon_ok(cmd, player)}))
resp = {"risk_level": 3, "commands": cmds, "reasoning": real_reasoning}
msgs.append(final_response(resp))
examples.append(make_example(f"exp-nearby-{idx:03d}", "nearby_entities", msgs))
idx += 1
# ── Memory write ──
for req, key, mtype, value, reasoning in MEMORY_WRITE_SCENARIOS:
player = random.choice(PLAYERS)
if mtype == "location" and isinstance(value, dict):
pos = rpos()
value = pos
msgs = [sys_msg("sudo"), user_msg(f"Player {player}: {req}")]
msgs.append(tool_call("memory.write", {"player": player, "type": mtype, "key": key, "value": value}))
msgs.append(tool_result({"ok": True, "key": key}))
resp = {"risk_level": 4, "commands": [], "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append(make_example(f"exp-memw-{idx:03d}", "memory_write", msgs))
idx += 1
# ── Memory read ──
for req, key, found, cmd_tmpl, reasoning in MEMORY_READ_SCENARIOS:
player = random.choice(PLAYERS)
msgs = [sys_msg("sudo"), user_msg(f"Player {player}: {req}")]
if key:
msgs.append(tool_call("memory.read", {"player": player, "key": key}))
else:
msgs.append(tool_call("memory.read", {"player": player}))
if found:
pos = rpos()
if key:
mem = [{"key": key, "type": "location", "value": {"x": pos["x"], "y": pos["y"], "z": pos["z"]}}]
else:
mem = [
{"key": "home", "type": "location", "value": rpos()},
{"key": "favorite_item", "type": "preference", "value": "diamonds"},
]
msgs.append(tool_result({"memories": mem}))
if cmd_tmpl:
cmd = cmd_tmpl.replace("{p}", player).replace("{mx}", str(pos["x"])).replace("{my}", str(pos["y"])).replace("{mz}", str(pos["z"]))
msgs.append(tool_call("rcon.execute", {"command": cmd}))
msgs.append(tool_result({"success": True, "result": rcon_ok(cmd, player)}))
resp = {"risk_level": 3, "commands": [cmd], "reasoning": reasoning}
else:
resp = {"risk_level": 4, "commands": [], "reasoning": reasoning}
else:
msgs.append(tool_result({"memories": []}))
resp = {"risk_level": 4, "commands": [], "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append(make_example(f"exp-memr-{idx:03d}", "memory_read", msgs))
idx += 1
# ── Chained tool calls ──
for req, tools, reasoning in CHAINED_SCENARIOS:
player = random.choice(PLAYERS)
pos = rpos()
msgs = [sys_msg("sudo"), user_msg(f"Player {player}: {req}")]
for t in tools:
if t == "player_info":
msgs.append(tool_call("world.player_info", {"player": player}))
msgs.append(tool_result({"health": 18.5, "position": pos, "inventory_summary": "Diamond gear"}))
elif t == "nearby_entities":
msgs.append(tool_call("world.nearby_entities", {"player": player, "radius": 32}))
msgs.append(tool_result(sim_nearby_entities(["zombie", "skeleton"])))
elif t == "server_state":
msgs.append(tool_call("world.server_state", {}))
msgs.append(tool_result({"time_of_day": "night", "weather": "clear", "online_players": [player, "Ace13245"], "world_border": 60000000.0}))
elif t == "memory_write":
msgs.append(tool_call("memory.write", {"player": player, "type": "location", "key": "marker", "value": pos}))
msgs.append(tool_result({"ok": True, "key": "marker"}))
elif t == "memory_read":
msgs.append(tool_call("memory.read", {"player": player, "key": "home"}))
home = rpos()
msgs.append(tool_result({"memories": [{"key": "home", "type": "location", "value": home}]}))
pos = home # use home coords for subsequent commands
elif t == "wiki":
msgs.append(tool_call("minecraft.wiki_lookup", {"query": "best bow enchantments 1.21"}))
msgs.append(tool_result({"content": "Bow: Power V, Infinity or Mending, Unbreaking III, Flame I, Punch II. Infinity and Mending are mutually exclusive.", "url": "https://minecraft.wiki/w/Bow"}))
elif t == "rcon":
cmd = f"give {player} minecraft:diamond 1"
if "heal" in req: cmd = f"effect give {player} minecraft:instant_health 1 5"
elif "protect" in req: cmd = f"kill @e[type=minecraft:zombie,distance=..32]"
elif "marker" in req or "build" in req: cmd = f"setblock {pos['x']} {pos['y']+1} {pos['z']} minecraft:glowstone"
elif "food" in req:
pass # handled per player below
elif "bow" in req: cmd = f"give {player} minecraft:bow[enchantments={{power:5,infinity:1,unbreaking:3,flame:1,punch:2}}] 1"
elif "home" in req or "tp" in req: cmd = f"tp {player} {pos['x']} {pos['y']} {pos['z']}"
elif "safe" in req or "shelter" in req: cmd = f"fill {pos['x']-3} {pos['y']} {pos['z']-3} {pos['x']+3} {pos['y']+4} {pos['z']+3} minecraft:oak_planks hollow"
msgs.append(tool_call("rcon.execute", {"command": cmd}))
msgs.append(tool_result({"success": True, "result": rcon_ok(cmd, player)}))
resp = {"risk_level": 3, "commands": [], "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append(make_example(f"exp-chain-{idx:03d}", "chained", msgs))
idx += 1
return examples
def main():
print("Generating expanded tool training data...")
examples = generate_all()
# Count by type
counts = {}
for ex in examples:
t = ex["type"]
counts[t] = counts.get(t, 0) + 1
print(f"\nGenerated {len(examples)} examples:")
for t, c in sorted(counts.items()):
print(f" {t}: {c}")
# Write
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_PATH, "w") as f:
for ex in examples:
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
print(f"\nWritten to {OUTPUT_PATH}")
print(f"Next: merge into tool_training.jsonl or add to training pipeline")
if __name__ == "__main__":
main()
@@ -0,0 +1,663 @@
#!/usr/bin/env python3
"""
Generate RCON-validated training data for server plugins.
Plugins: WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms, FAWE
Each command is executed against the live server to capture real responses.
Target: 120+ high-quality examples.
"""
import json
import random
import re
import sys
import time
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from agent.tools.persistent_rcon import get_rcon
from agent.tools.tool_schemas import qwen3_tools_block
from agent.prompts.system_prompts import SYNTAX_RULES, RISK_GRADIENT
OUTPUT_PATH = PROJECT_ROOT / "data" / "raw" / "plugin_training.jsonl"
TOOLS_BLOCK = qwen3_tools_block()
SYSTEM = (
"You are a Minecraft 1.21 command translator for a Paper server with plugins: "
"FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n\n"
"You know plugin command syntax. Use the correct plugin prefix when needed.\n"
"PERMISSION LEVEL: 4 (generous).\n\n"
"Return JSON: {\"risk_level\": <0-5>, \"commands\": [...], \"reasoning\": \"...\"}\n\n"
+ SYNTAX_RULES + RISK_GRADIENT + "\n" + TOOLS_BLOCK
)
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
def sys_msg():
return {"role": "system", "content": SYSTEM}
def user_msg(text):
return {"role": "user", "content": text}
def tool_call(name, args):
return {"role": "assistant", "content": f"<tool_call>\n{json.dumps({'name': name, 'arguments': args})}\n</tool_call>"}
def tool_result(data):
return {"role": "tool", "content": json.dumps(data)}
def final_response(resp):
return {"role": "assistant", "content": json.dumps(resp)}
def is_error(result):
return any(e in result for e in ("<--[HERE]", "Unknown command", "Incorrect argument", "An internal error"))
def run_cmd(rcon, cmd):
"""Execute and return (result_str, is_ok)."""
try:
result = rcon.command(cmd)
clean = re.sub(r'§.', '', result)
ok = not is_error(result)
return clean[:400], ok
except Exception as e:
return str(e)[:200], False
def make_example(eid, etype, msgs):
return {"id": eid, "source": "plugin_training", "type": etype, "messages": msgs}
def build_rcon_example(rcon, eid, etype, prompt, commands, reasoning, player=None, mode="sudo"):
"""Build a full multi-turn tool-calling example with real RCON responses."""
player = player or random.choice(PLAYERS)
msgs = [sys_msg(), user_msg(f"Player {player}: {prompt}")]
resolved_cmds = []
all_ok = True
for cmd in commands:
cmd = cmd.replace("{p}", player)
result, ok = run_cmd(rcon, cmd)
if not ok:
all_ok = False
msgs.append(tool_call("rcon.execute", {"command": cmd}))
msgs.append(tool_result({"success": ok, "result": result}))
resolved_cmds.append(cmd)
time.sleep(0.05)
resp = {"risk_level": 3, "commands": resolved_cmds, "reasoning": reasoning}
msgs.append(final_response(resp))
return {
"id": eid, "source": "plugin_training", "type": etype,
"messages": msgs, "all_success": all_ok,
}
def gen_worldguard(rcon):
"""WorldGuard region management examples."""
print(" WorldGuard...")
examples = []
WG = [
# Region creation and management
("sudo protect this area as my-base", [
"//pos1 0,64,0",
"//pos2 50,128,50",
"rg define my-base {p}",
], "Selected area and created 'my-base' region owned by the player."),
("sudo protect a 20 block radius around spawn", [
"rg define spawn-protection -w world",
"rg flag spawn-protection pvp deny",
"rg flag spawn-protection mob-spawning deny",
], "Created spawn protection region with PvP and mob spawning denied."),
("sudo make my region pvp-free", [
"rg flag my-base pvp deny",
], "Disabled PvP in the player's region."),
("sudo allow TNT in the arena", [
"rg flag arena tnt allow",
], "Enabled TNT in the arena region."),
("sudo prevent mobs from spawning in town", [
"rg flag town mob-spawning deny",
], "Disabled mob spawning in the town region."),
("sudo add Ace13245 as a member of my base", [
"rg addmember my-base Ace13245",
], "Added Ace13245 as a member of my-base region."),
("sudo remove TheBigBoss from my region", [
"rg removemember my-base TheBigBoss",
], "Removed TheBigBoss from my-base region."),
("sudo show info about the spawn region", [
"rg info spawn-protection",
], "Displayed info about the spawn-protection region."),
("sudo list all regions", [
"rg list",
], "Listed all defined regions."),
("sudo prevent building in spawn", [
"rg flag spawn-protection build deny",
], "Denied building in spawn protection zone."),
("sudo make the arena a free-for-all", [
"rg flag arena pvp allow",
"rg flag arena invincibility deny",
"rg flag arena blocked-cmds /home,/spawn,/tpa",
], "Made arena PvP-enabled, disabled invincibility, blocked escape commands."),
("sudo set entry message for my base", [
"rg flag my-base greeting Welcome to my base!",
], "Set greeting message when entering the region."),
("sudo deny entry to non-members in my vault", [
"rg flag vault entry -g nonmembers deny",
], "Denied entry for non-members to the vault region."),
("sudo prevent creeper explosions everywhere", [
"rg flag __global__ creeper-explosion deny",
], "Disabled creeper explosions globally."),
("sudo make a healing zone at spawn", [
"rg flag spawn-protection heal-amount 2",
"rg flag spawn-protection heal-delay 3",
], "Players in spawn zone heal 1 heart every 3 seconds."),
("sudo delete the old-arena region", [
"rg remove old-arena",
], "Removed the old-arena region."),
("sudo set region priority for spawn over global", [
"rg setpriority spawn-protection 10",
], "Set spawn-protection priority to 10 (higher overrides lower)."),
("sudo prevent fire spread in the village", [
"rg flag village fire-spread deny",
"rg flag village lava-fire deny",
], "Disabled fire spread and lava fire in the village region."),
]
for i, (prompt, cmds, reasoning) in enumerate(WG):
ex = build_rcon_example(rcon, f"plugin-wg-{i:03d}", "worldguard", prompt, cmds, reasoning)
examples.append(ex)
return examples
def gen_coreprotect(rcon):
"""CoreProtect inspection and rollback examples."""
print(" CoreProtect...")
examples = []
CP = [
("sudo check who broke blocks near me", [
"co inspect",
], "Enabled CoreProtect inspector mode. Player can now click blocks to see history."),
("sudo rollback griefing from the last hour", [
"co rollback t:1h",
], "Rolled back all block changes from the last hour."),
("sudo rollback what TheBigBoss did in the last 30 minutes", [
"co rollback u:TheBigBoss t:30m",
], "Rolled back TheBigBoss's actions from the last 30 minutes."),
("sudo restore blocks that were rolled back", [
"co restore t:1h",
], "Restored previously rolled-back changes from the last hour."),
("sudo check what happened here in the last day", [
"co lookup t:24h r:10",
], "Looked up all changes within 10 blocks in the last 24 hours."),
("sudo rollback only TNT damage", [
"co rollback t:1h a:tnt",
], "Rolled back only TNT explosion damage from the last hour."),
("sudo rollback fire damage from the last 2 hours", [
"co rollback t:2h a:fire",
], "Rolled back fire damage from the last 2 hours."),
("sudo who placed blocks around 100 64 200?", [
"co lookup t:7d r:5 l:100,64,200",
], "Looked up 7 days of block placement history near those coordinates."),
("sudo undo what happened to the diamond blocks", [
"co rollback t:12h b:diamond_block",
], "Rolled back changes to diamond blocks in the last 12 hours."),
("sudo rollback Ace's actions but only block breaks", [
"co rollback u:Ace13245 t:1h a:block",
], "Rolled back only block break/place actions by Ace in the last hour."),
("sudo check the status of CoreProtect", [
"co status",
], "Displayed CoreProtect status including database size and version."),
("sudo stop inspecting", [
"co inspect",
], "Toggled off CoreProtect inspector mode."),
("sudo rollback all container theft in the last 6 hours", [
"co rollback t:6h a:container",
], "Rolled back container (chest/barrel/etc.) access in the last 6 hours."),
("sudo lookup what Ace13245 did today", [
"co lookup u:Ace13245 t:24h",
], "Looked up all of Ace13245's actions in the last 24 hours."),
]
for i, (prompt, cmds, reasoning) in enumerate(CP):
ex = build_rcon_example(rcon, f"plugin-cp-{i:03d}", "coreprotect", prompt, cmds, reasoning)
examples.append(ex)
return examples
def gen_essentialsx(rcon):
"""EssentialsX home/warp/kit/economy examples."""
print(" EssentialsX...")
examples = []
ESS = [
# Homes
("sudo set my home here", [
"sethome {p} home",
], "Set player's home location."),
("sudo set a home called mine", [
"sethome {p} mine",
], "Set named home 'mine' for the player."),
("sudo tp me to my home", [
"home {p}",
], "Teleported player to their default home."),
("sudo delete my mine home", [
"delhome {p} mine",
], "Deleted the 'mine' home."),
# Warps
("sudo create a warp called arena", [
"setwarp arena",
], "Created warp point 'arena' at current location."),
("sudo tp me to the arena warp", [
"warp arena {p}",
], "Warped player to the arena."),
("sudo list all warps", [
"warps",
], "Listed all available warp points."),
("sudo delete the old warp", [
"delwarp old",
], "Deleted the 'old' warp point."),
("sudo create a warp at spawn", [
"setwarp spawn",
], "Created spawn warp at current location."),
# Kits
("sudo give me the starter kit", [
"kit starter {p}",
], "Gave the starter kit to the player."),
# Economy
("sudo give Ace 1000 coins", [
"eco give Ace13245 1000",
], "Added $1000 to Ace13245's balance."),
("sudo check my balance", [
"balance {p}",
], "Checked player's economy balance."),
("sudo set everyone's balance to 500", [
"eco set * 500",
], "Set all players' balance to $500."),
("sudo take 200 from TheBigBoss", [
"eco take TheBigBoss 200",
], "Removed $200 from TheBigBoss's balance."),
# Teleport
("sudo tp Ace to me", [
"tp Ace13245 {p}",
], "Teleported Ace13245 to the requesting player."),
("sudo tp everyone to spawn", [
"tpall spawn",
], "Teleported all players to spawn."),
# Player management
("sudo set my nickname to StoneKing", [
"nick {p} StoneKing",
], "Changed player's display name to StoneKing."),
("sudo heal me", [
"heal {p}",
], "Fully healed the player."),
("sudo feed me", [
"feed {p}",
], "Restored player's hunger bar."),
("sudo repair what I'm holding", [
"repair {p}",
], "Repaired the player's held item."),
("sudo check how long Ace has played", [
"seen Ace13245",
], "Checked when Ace13245 was last online and playtime."),
("sudo set spawn point here", [
"setspawn",
], "Set the server spawn point to current location."),
("sudo broadcast a message to everyone", [
"broadcast Welcome to the new world!",
], "Broadcast server-wide message."),
("sudo give me god mode", [
"god {p}",
], "Toggled god mode (invulnerability) for the player."),
("sudo fly mode on", [
"fly {p}",
], "Toggled flight for the player."),
("sudo set player speed to fast", [
"speed fly 5 {p}",
], "Set flying speed to maximum for the player."),
("sudo check who's online and their info", [
"list",
"whois {p}",
], "Listed online players and checked requesting player's info."),
("sudo make it sunny", [
"sun",
], "Set weather to clear using Essentials shorthand."),
]
for i, (prompt, cmds, reasoning) in enumerate(ESS):
ex = build_rcon_example(rcon, f"plugin-ess-{i:03d}", "essentialsx", prompt, cmds, reasoning)
examples.append(ex)
return examples
def gen_luckperms(rcon):
"""LuckPerms permission management examples."""
print(" LuckPerms...")
examples = []
LP = [
("sudo give Ace13245 permission to fly", [
"lp user Ace13245 permission set essentials.fly true",
], "Granted flight permission to Ace13245."),
("sudo create a VIP group", [
"lp creategroup vip",
], "Created the VIP permission group."),
("sudo add Ace to VIP group", [
"lp user Ace13245 parent add vip",
], "Added Ace13245 to the VIP group."),
("sudo give VIP group access to /fly and /heal", [
"lp group vip permission set essentials.fly true",
"lp group vip permission set essentials.heal true",
], "Gave VIP group fly and heal permissions."),
("sudo remove TheBigBoss from VIP", [
"lp user TheBigBoss parent remove vip",
], "Removed TheBigBoss from VIP group."),
("sudo check what permissions Ace has", [
"lp user Ace13245 permission info",
], "Displayed Ace13245's permission info."),
("sudo give temporary VIP for 1 day to slingshooter08", [
"lp user slingshooter08 parent addtemp vip 1d",
], "Gave slingshooter08 temporary VIP status for 24 hours."),
("sudo set VIP prefix to gold [VIP]", [
"lp group vip meta setprefix 100 \"&6[VIP] \"",
], "Set gold-colored VIP chat prefix."),
("sudo create a builder group with WorldEdit access", [
"lp creategroup builder",
"lp group builder permission set worldedit.* true",
], "Created builder group with full WorldEdit permissions."),
("sudo deny TNT placement for default group", [
"lp group default permission set minecraft.command.setblock false",
], "Denied setblock command for default group."),
("sudo list all groups", [
"lp listgroups",
], "Listed all permission groups."),
("sudo check VIP group permissions", [
"lp group vip permission info",
], "Displayed VIP group permission details."),
("sudo give me all permissions", [
"lp user {p} permission set * true",
], "Granted wildcard (all) permissions to the player."),
("sudo set default group to have basic essentials", [
"lp group default permission set essentials.home true",
"lp group default permission set essentials.spawn true",
"lp group default permission set essentials.tpa true",
"lp group default permission set essentials.msg true",
], "Gave default group basic Essentials permissions: home, spawn, tpa, msg."),
]
for i, (prompt, cmds, reasoning) in enumerate(LP):
ex = build_rcon_example(rcon, f"plugin-lp-{i:03d}", "luckperms", prompt, cmds, reasoning)
examples.append(ex)
return examples
def gen_fawe_advanced(rcon):
"""Advanced FAWE/WorldEdit examples beyond basic commands."""
print(" FAWE advanced...")
examples = []
FAWE = [
("sudo make a sphere of glass 10 blocks wide", [
"//sphere minecraft:glass 10",
], "Created a glass sphere with radius 10."),
("sudo hollow sphere of stone", [
"//hsphere minecraft:stone 8",
], "Created hollow stone sphere radius 8."),
("sudo cylinder of quartz 5 wide 10 tall", [
"//cyl minecraft:quartz_block 5 10",
], "Created quartz cylinder radius 5, height 10."),
("sudo replace all stone with deepslate in selection", [
"//replace minecraft:stone minecraft:deepslate",
], "Replaced stone with deepslate in the selected region."),
("sudo copy and paste this structure 20 blocks east", [
"//copy",
"//paste",
], "Copied selection and pasted it."),
("sudo undo my last WorldEdit action", [
"//undo",
], "Undid the last WorldEdit operation."),
("sudo redo what I just undid", [
"//redo",
], "Redid the last undone WorldEdit operation."),
("sudo smooth the terrain in my selection", [
"//smooth 5",
], "Smoothed terrain in selection with 5 iterations."),
("sudo drain all water within 20 blocks", [
"//drain 20",
], "Drained water within 20 block radius."),
("sudo set a pyramid of sandstone 10 tall", [
"//pyramid minecraft:sandstone 10",
], "Created sandstone pyramid 10 blocks tall."),
("sudo hollow out the selected area", [
"//hollow",
], "Hollowed out the selected region leaving only the shell."),
("sudo make walls around my selection", [
"//walls minecraft:stone_bricks",
], "Built stone brick walls around the selection (no floor/ceiling)."),
("sudo fill the selection with a checkerboard pattern", [
"//set 50%minecraft:white_concrete,50%minecraft:black_concrete",
], "Filled with alternating black and white concrete."),
("sudo stack my selection 5 times going north", [
"//stack 5 north",
], "Stacked the selection 5 times northward."),
("sudo generate a forest in my selection", [
"//forest oak 10",
], "Generated oak trees with 10% density in the selection."),
("sudo remove all snow in a 50 block radius", [
"//removenear minecraft:snow 50",
], "Removed snow layers within 50 blocks."),
("sudo count blocks in my selection", [
"//count minecraft:diamond_ore",
], "Counted diamond ore blocks in the selection."),
("sudo make a natural-looking cave", [
"//cyl minecraft:air 4 8",
], "Carved cylindrical tunnel (air cylinder) radius 4, depth 8."),
]
for i, (prompt, cmds, reasoning) in enumerate(FAWE):
ex = build_rcon_example(rcon, f"plugin-fawe-{i:03d}", "fawe", prompt, cmds, reasoning)
examples.append(ex)
return examples
def gen_combined_plugin_examples(rcon):
"""Examples that combine multiple plugins in one request."""
print(" Combined plugin examples...")
examples = []
COMBINED = [
("sudo create a protected arena with WorldEdit and WorldGuard", [
"//pos1 -50,60,-50",
"//pos2 50,100,50",
"//set minecraft:air",
"//walls minecraft:stone_bricks",
"//floor minecraft:smooth_stone",
"rg define arena {p}",
"rg flag arena pvp allow",
"rg flag arena heal-amount 1",
"rg flag arena heal-delay 5",
], "Built arena with WorldEdit, then protected with WorldGuard. PvP enabled, slow healing."),
("sudo set up a new player experience", [
"lp group default permission set essentials.home true",
"lp group default permission set essentials.spawn true",
"setwarp tutorial",
"rg define tutorial-area",
"rg flag tutorial-area pvp deny",
"rg flag tutorial-area mob-spawning deny",
], "Set up default permissions, tutorial warp, and protected tutorial area."),
("sudo rollback Ace's WorldEdit mistakes and revoke his builder perms", [
"co rollback u:Ace13245 t:30m",
"lp user Ace13245 parent remove builder",
], "Rolled back Ace's changes and removed builder group access."),
("sudo create a VIP lounge area", [
"//pos1 0,64,0",
"//pos2 20,74,20",
"//set minecraft:quartz_block",
"//hollow",
"rg define vip-lounge",
"rg flag vip-lounge entry -g nonmembers deny",
"rg flag vip-lounge greeting Welcome to the VIP Lounge!",
"setwarp vip-lounge",
], "Built quartz room with WE, restricted entry to members only via WG, created warp."),
("sudo give TheBigBoss a reward package", [
"eco give TheBigBoss 5000",
"give TheBigBoss minecraft:netherite_ingot 3",
"give TheBigBoss minecraft:diamond 64",
"lp user TheBigBoss parent addtemp vip 7d",
], "Gave economy reward, items, and 7-day VIP status."),
("sudo prepare the server for an event", [
"broadcast The Battle Royale event starts in 5 minutes!",
"rg flag arena pvp allow",
"rg flag arena exit deny",
"eco set * 0",
"effect give @a minecraft:regeneration 300 0",
], "Announced event, set arena flags, reset economy, gave regen."),
]
for i, (prompt, cmds, reasoning) in enumerate(COMBINED):
ex = build_rcon_example(rcon, f"plugin-combo-{i:03d}", "combined", prompt, cmds, reasoning)
examples.append(ex)
return examples
def gen_god_mode_plugin_examples(rcon):
"""God persona using plugins for divine judgment."""
print(" God mode plugin examples...")
examples = []
GOD_SYSTEM = (
"You are God in a Minecraft server with plugins: WorldGuard, CoreProtect, "
"EssentialsX, Vault, LuckPerms, FAWE.\n"
"Return JSON: {\"risk_level\": <0-5>, \"message\": \"...\", \"commands\": [...], \"reasoning\": \"...\"}\n\n"
+ SYNTAX_RULES + "\n" + TOOLS_BLOCK
)
GOD = [
("pray lord, someone destroyed my house!", [
"co rollback t:2h r:30",
], "A mortal's home was desecrated. I have turned back time to restore it.",
"Used CoreProtect to rollback damage near the player."),
("pray bless me with power, oh great one", [
"lp user {p} parent addtemp vip 1d",
"eco give {p} 1000",
"effect give {p} minecraft:strength 600 1",
], "You show devotion. I grant you temporary divine favor — VIP status, wealth, and strength.",
"Gave 24h VIP, economy bonus, and strength effect."),
("pray protect my village from monsters", [
"rg define {p}-village {p}",
"rg flag {p}-village mob-spawning deny",
"rg flag {p}-village creeper-explosion deny",
], "I cast a divine ward over your village. No creature of darkness shall spawn within its borders.",
"Created region with mob spawning and creeper explosion protection."),
("pray smite the wicked TheBigBoss for griefing", [
"co lookup u:TheBigBoss t:1h",
"execute at TheBigBoss run summon minecraft:lightning_bolt",
"eco take TheBigBoss 500",
], "I have seen TheBigBoss's transgressions. Lightning falls upon the wicked, and their coffers are lightened.",
"Checked griefing evidence, smited with lightning, took economy penalty."),
("pray I am lost and cannot find my way home", [
"home {p}",
], "Lost child, I return you to the place you call home.",
"Used Essentials home teleport."),
("pray make me a grand temple", [
"//cyl minecraft:quartz_block 8 12",
"//hollow",
"//cyl minecraft:quartz_pillar 8 1",
"rg define temple-{p} {p}",
"rg flag temple-{p} pvp deny",
"setwarp temple",
], "A temple rises from the earth at my command. It is sanctified — no blood shall be spilled within.",
"Built quartz temple with WE, protected with WG, created warp."),
]
for i, (prompt, cmds, god_msg, reasoning) in enumerate(GOD):
player = random.choice(PLAYERS)
msgs = [{"role": "system", "content": GOD_SYSTEM}, user_msg(f"Player {player}: {prompt}")]
resolved_cmds = []
for cmd in cmds:
cmd = cmd.replace("{p}", player)
result, ok = run_cmd(rcon, cmd)
msgs.append(tool_call("rcon.execute", {"command": cmd}))
msgs.append(tool_result({"success": ok, "result": result}))
resolved_cmds.append(cmd)
time.sleep(0.05)
resp = {"risk_level": 3, "message": god_msg, "commands": resolved_cmds, "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append({
"id": f"plugin-god-{i:03d}", "source": "plugin_training",
"type": "god_plugin", "messages": msgs,
})
return examples
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--rcon-host", default="192.168.0.244")
parser.add_argument("--rcon-port", type=int, default=25577)
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
args = parser.parse_args()
print(f"Connecting to {args.rcon_host}:{args.rcon_port}...")
rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)
print("Connected.\n")
all_examples = []
generators = [
gen_worldguard,
gen_coreprotect,
gen_essentialsx,
gen_luckperms,
gen_fawe_advanced,
gen_combined_plugin_examples,
gen_god_mode_plugin_examples,
]
for gen in generators:
examples = gen(rcon)
all_examples.extend(examples)
ok = sum(1 for e in examples if e.get("all_success", True))
print(f"{len(examples)} examples ({ok} all-success)")
# Summary
by_type = {}
for e in all_examples:
t = e["type"]
by_type[t] = by_type.get(t, 0) + 1
print(f"\nTotal: {len(all_examples)} examples")
for t, c in sorted(by_type.items()):
print(f" {t}: {c}")
success_total = sum(1 for e in all_examples if e.get("all_success", True))
print(f" All-success: {success_total}/{len(all_examples)}")
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_PATH, "w") as f:
for ex in all_examples:
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
print(f"\nWritten to {OUTPUT_PATH}")
if __name__ == "__main__":
main()
@@ -0,0 +1,661 @@
#!/usr/bin/env python3
"""
Generate high-quality tool-calling training data with REAL RCON responses.
No AI involved — we craft the prompts, commands, and tool sequences by hand,
then execute each command through RCON to get actual server responses.
This produces gold-standard training data with real validation.
Generates examples across all 14 tools with proper multi-turn conversations:
- script.validate → script.write → script.execute (with real RCON validation)
- memory.read → rcon.execute (tp to saved location)
- world.nearby_entities → rcon.execute (kill scanned mobs)
- wiki_lookup → rcon.execute (apply looked-up knowledge)
- chained multi-tool sequences
Usage:
python3 generate_rcon_validated_training.py --rcon-host 192.168.0.244 --rcon-port 25577
"""
import argparse
import json
import random
import re
import socket
import struct
import sys
import time
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from agent.tools.tool_schemas import qwen3_tools_block
from agent.prompts.system_prompts import SYNTAX_RULES, RISK_GRADIENT
OUTPUT_PATH = PROJECT_ROOT / "data" / "raw" / "rcon_validated_tool_training.jsonl"
TOOLS_BLOCK = qwen3_tools_block()
SYSTEM_SUDO = (
"You are a Minecraft 1.21 command translator with full tool access.\n"
"Tools: rcon.execute, minecraft.wiki_lookup, world.player_info, world.server_state, "
"world.nearby_entities, memory.read, memory.write, script.write, script.validate, "
"script.execute, script.read, script.list, script.delete, script.schedule.\n\n"
"For complex builds (4+ commands), write mcfunction scripts. Validate before writing.\n"
"PERMISSION LEVEL: 4 (generous).\n\n"
"Return JSON: {\"risk_level\": <0-5>, \"commands\": [...], \"reasoning\": \"...\"}\n\n"
+ SYNTAX_RULES + RISK_GRADIENT + "\n" + TOOLS_BLOCK
)
SYSTEM_GOD = (
"You are God in a Minecraft server. You have full tool access.\n"
"Return JSON: {\"risk_level\": <0-5>, \"message\": \"...\", \"commands\": [...], \"reasoning\": \"...\"}\n\n"
+ SYNTAX_RULES + "\n" + TOOLS_BLOCK
)
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
from agent.tools.persistent_rcon import get_rcon as _get_rcon
class SimpleRCON:
"""Wrapper around persistent_rcon."""
def __init__(self, host, port, password):
self._rcon = _get_rcon(host, port, password)
def command(self, cmd):
return self._rcon.command(cmd)
def sys_msg(mode="sudo"):
return {"role": "system", "content": SYSTEM_GOD if mode == "god" else SYSTEM_SUDO}
def user_msg(text):
return {"role": "user", "content": text}
def tool_call(name, args):
return {"role": "assistant", "content": f"<tool_call>\n{json.dumps({'name': name, 'arguments': args})}\n</tool_call>"}
def tool_result(data):
return {"role": "tool", "content": json.dumps(data)}
def final_response(resp):
return {"role": "assistant", "content": json.dumps(resp)}
def is_error(result):
return any(e in result for e in ("<--[HERE]", "Unknown", "Incorrect", "Expected", "Invalid"))
def validate_commands_rcon(rcon, commands):
"""Execute commands through RCON and return results."""
results = []
for cmd in commands:
cmd = cmd.strip()
if not cmd or cmd.startswith("#"):
results.append({"cmd": cmd, "result": "comment", "ok": True})
continue
try:
result = rcon.command(cmd)
ok = not is_error(result)
results.append({"cmd": cmd, "result": result[:300], "ok": ok})
except Exception as e:
results.append({"cmd": cmd, "result": str(e), "ok": False})
return results
def gen_script_examples(rcon):
"""Script write/validate/execute examples with real RCON validation."""
examples = []
SCRIPTS = [
("build me a small oak cabin", "oak_cabin", "Small oak cabin with door, windows, and lantern", [
"fill ~-3 ~ ~-3 ~3 ~3 ~3 minecraft:oak_planks hollow",
"fill ~-3 ~ ~-3 ~3 ~ ~3 minecraft:oak_planks",
"fill ~-2 ~1 ~-3 ~2 ~2 ~-3 minecraft:air",
"setblock ~0 ~1 ~-3 minecraft:oak_door[facing=north,half=lower]",
"setblock ~0 ~2 ~-3 minecraft:oak_door[facing=north,half=upper]",
"setblock ~2 ~1 ~3 minecraft:glass_pane",
"setblock ~-2 ~1 ~3 minecraft:glass_pane",
"setblock ~0 ~2 ~0 minecraft:lantern[hanging=true]",
]),
("create a pvp arena with wool corners", "pvp_arena", "PvP arena with colored corners", [
"fill ~-12 ~-1 ~-12 ~12 ~-1 ~12 minecraft:smooth_stone",
"fill ~-12 ~ ~-12 ~12 ~4 ~-12 minecraft:iron_bars",
"fill ~-12 ~ ~12 ~12 ~4 ~12 minecraft:iron_bars",
"fill ~-12 ~ ~-12 ~-12 ~4 ~12 minecraft:iron_bars",
"fill ~12 ~ ~-12 ~12 ~4 ~12 minecraft:iron_bars",
"fill ~-12 ~-1 ~-12 ~-10 ~-1 ~-10 minecraft:red_wool",
"fill ~10 ~-1 ~10 ~12 ~-1 ~12 minecraft:blue_wool",
]),
("make an enchanting room", "enchant_room", "Enchanting table surrounded by bookshelves", [
"fill ~-3 ~ ~-3 ~3 ~3 ~3 minecraft:deepslate_bricks hollow",
"fill ~-2 ~ ~-2 ~2 ~2 ~2 minecraft:air",
"setblock ~0 ~ ~0 minecraft:enchanting_table",
"fill ~-2 ~1 ~-2 ~2 ~1 ~2 minecraft:bookshelf",
"fill ~-1 ~1 ~-1 ~1 ~1 ~1 minecraft:air",
"setblock ~0 ~1 ~0 minecraft:enchanting_table",
"setblock ~0 ~2 ~0 minecraft:lantern[hanging=true]",
]),
("build a nether portal frame", "nether_portal", "Obsidian nether portal frame", [
"fill ~0 ~ ~0 ~3 ~4 ~0 minecraft:obsidian",
"fill ~1 ~1 ~0 ~2 ~3 ~0 minecraft:air",
]),
("create a mob farm collection area", "mob_farm", "Mob farm with hoppers and chest", [
"fill ~-4 ~-1 ~-4 ~4 ~-1 ~4 minecraft:cobblestone",
"setblock ~0 ~-2 ~0 minecraft:chest",
"setblock ~0 ~-1 ~0 minecraft:hopper",
"fill ~-4 ~ ~0 ~-1 ~ ~0 minecraft:water",
"fill ~1 ~ ~0 ~4 ~ ~0 minecraft:water",
]),
("build a lookout tower", "lookout_tower", "Tall stone tower with viewing platform", [
"fill ~-1 ~ ~-1 ~1 ~8 ~1 minecraft:stone_bricks hollow",
"fill ~0 ~ ~0 ~0 ~7 ~0 minecraft:air",
"fill ~1 ~1 ~0 ~1 ~7 ~0 minecraft:ladder[facing=west]",
"fill ~-2 ~8 ~-2 ~2 ~8 ~2 minecraft:stone_brick_slab",
"fill ~-2 ~9 ~-2 ~2 ~9 ~2 minecraft:stone_brick_wall",
"fill ~-1 ~9 ~-1 ~1 ~9 ~1 minecraft:air",
"setblock ~0 ~10 ~0 minecraft:lantern",
]),
("make a fishing dock", "fishing_dock", "Wooden dock extending over water", [
"fill ~0 ~-1 ~0 ~1 ~-1 ~8 minecraft:oak_planks",
"fill ~-1 ~-2 ~0 ~-1 ~-1 ~8 minecraft:oak_fence",
"fill ~2 ~-2 ~0 ~2 ~-1 ~8 minecraft:oak_fence",
"setblock ~0 ~0 ~8 minecraft:oak_fence",
"setblock ~1 ~0 ~8 minecraft:oak_fence",
"setblock ~0 ~0 ~0 minecraft:lantern",
]),
("create a garden with flower beds", "garden", "Flower garden with paths", [
"fill ~-5 ~-1 ~-5 ~5 ~-1 ~5 minecraft:grass_block",
"fill ~-5 ~-1 ~0 ~5 ~-1 ~0 minecraft:gravel",
"fill ~0 ~-1 ~-5 ~0 ~-1 ~5 minecraft:gravel",
"setblock ~-3 ~ ~-3 minecraft:rose_bush[half=lower]",
"setblock ~3 ~ ~-3 minecraft:peony[half=lower]",
"setblock ~-3 ~ ~3 minecraft:lilac[half=lower]",
"setblock ~3 ~ ~3 minecraft:sunflower[half=lower]",
"setblock ~0 ~ ~0 minecraft:water",
]),
("build a redstone clock", "redstone_clock", "Simple repeater clock", [
"setblock ~0 ~ ~0 minecraft:redstone_wire",
"setblock ~1 ~ ~0 minecraft:repeater[facing=west,delay=4]",
"setblock ~2 ~ ~0 minecraft:redstone_wire",
"setblock ~2 ~ ~1 minecraft:repeater[facing=north,delay=4]",
"setblock ~2 ~ ~2 minecraft:redstone_wire",
"setblock ~1 ~ ~2 minecraft:repeater[facing=east,delay=4]",
"setblock ~0 ~ ~2 minecraft:redstone_wire",
"setblock ~0 ~ ~1 minecraft:repeater[facing=south,delay=4]",
]),
("make a treasure vault", "treasure_vault", "Secure room with chests and iron door", [
"fill ~-3 ~ ~-3 ~3 ~3 ~3 minecraft:deepslate_brick_wall hollow",
"fill ~-2 ~ ~-2 ~2 ~2 ~2 minecraft:air",
"setblock ~0 ~0 ~-3 minecraft:iron_door[facing=north,half=lower]",
"setblock ~0 ~1 ~-3 minecraft:iron_door[facing=north,half=upper]",
"setblock ~-1 ~0 ~-2 minecraft:chest",
"setblock ~1 ~0 ~-2 minecraft:chest",
"setblock ~-1 ~0 ~2 minecraft:chest",
"setblock ~1 ~0 ~2 minecraft:chest",
"setblock ~0 ~2 ~0 minecraft:lantern[hanging=true]",
]),
]
for prompt, name, desc, commands in SCRIPTS:
player = random.choice(PLAYERS)
mode = "sudo"
msgs = [sys_msg(mode), user_msg(f"Player {player}: sudo {prompt}")]
# Step 1: Validate
rcon_results = validate_commands_rcon(rcon, commands)
valid = all(r["ok"] for r in rcon_results)
errors = [{"line": i+1, "command": r["cmd"], "error": r["result"]}
for i, r in enumerate(rcon_results) if not r["ok"]]
msgs.append(tool_call("script.validate", {"commands": commands}))
msgs.append(tool_result({
"valid": valid,
"total": len(commands),
"passed": sum(1 for r in rcon_results if r["ok"]),
"errors": errors,
}))
# Step 2: Write (if valid or partially valid)
write_cmds = [r["cmd"] for r in rcon_results if r["ok"]] if not valid else commands
msgs.append(tool_call("script.write", {"name": name, "commands": write_cmds, "description": desc}))
msgs.append(tool_result({"ok": True, "path": f"mortdecai:{name}", "lines": len(write_cmds)}))
# Step 3: Execute
msgs.append(tool_call("script.execute", {"name": name, "as_player": player}))
msgs.append(tool_result({"ok": True, "result": f"Executed {len(write_cmds)} commands from function mortdecai:{name}"}))
# Final
fix_note = f" Fixed {len(errors)} validation errors." if errors else ""
resp = {
"risk_level": 3,
"commands": [f"function mortdecai:{name}"],
"reasoning": f"Validated {len(commands)} commands ({sum(1 for r in rcon_results if r['ok'])} passed).{fix_note} Wrote and executed '{name}' script at player position.",
}
msgs.append(final_response(resp))
examples.append({
"id": f"rcon-script-{name}",
"source": "rcon_validated",
"type": "script_write_execute",
"messages": msgs,
})
return examples
def gen_direct_command_examples(rcon):
"""Direct rcon.execute examples with real server responses."""
examples = []
COMMANDS = [
("sudo give me a diamond sword with sharpness 5", [
"give {p} minecraft:diamond_sword[enchantments={sharpness:5,unbreaking:3,mending:1}] 1",
], "Gave max enchanted diamond sword."),
("sudo full netherite armor please", [
"give {p} minecraft:netherite_helmet[enchantments={protection:4,unbreaking:3,mending:1}] 1",
"give {p} minecraft:netherite_chestplate[enchantments={protection:4,unbreaking:3,mending:1}] 1",
"give {p} minecraft:netherite_leggings[enchantments={protection:4,unbreaking:3,mending:1}] 1",
"give {p} minecraft:netherite_boots[enchantments={protection:4,unbreaking:3,mending:1}] 1",
], "Full netherite armor set with protection 4 and mending."),
("sudo make it night", ["time set night"], "Set time to night."),
("sudo give me 64 golden apples", [
"give {p} minecraft:golden_apple 64",
], "Gave 64 golden apples."),
("sudo smite me with lightning", [
"execute at {p} run summon minecraft:lightning_bolt",
], "Summoned lightning at player position."),
("sudo give me speed 3 for 10 minutes", [
"effect give {p} minecraft:speed 600 2",
], "Applied speed 3 (amplifier 2) for 600 seconds."),
("sudo clear weather", ["weather clear 999999"], "Cleared weather."),
("sudo make me invincible", [
"effect give {p} minecraft:resistance 6000 4",
"effect give {p} minecraft:regeneration 6000 2",
"effect give {p} minecraft:fire_resistance 6000 0",
], "Applied resistance 5, regeneration 3, and fire resistance for 100 minutes."),
("sudo give me a god bow", [
"give {p} minecraft:bow[enchantments={power:5,infinity:1,flame:1,punch:2,unbreaking:3}] 1",
], "Bow with power 5, infinity, flame, punch 2."),
("sudo stack of each wood type", [
"give {p} minecraft:oak_log 64",
"give {p} minecraft:spruce_log 64",
"give {p} minecraft:birch_log 64",
"give {p} minecraft:jungle_log 64",
"give {p} minecraft:acacia_log 64",
"give {p} minecraft:dark_oak_log 64",
"give {p} minecraft:cherry_log 64",
"give {p} minecraft:mangrove_log 64",
], "All 8 wood types, one stack each."),
("sudo clear my inventory", ["clear {p}"], "Cleared all items."),
("sudo tp me to 0 100 0", ["tp {p} 0 100 0"], "Teleported to coordinates."),
("sudo gamemode creative", ["gamemode creative {p}"], "Set creative mode."),
("sudo gamemode survival", ["gamemode survival {p}"], "Set survival mode."),
("sudo kill all zombies", ["kill @e[type=minecraft:zombie]"], "Killed all zombies."),
("sudo summon 5 cows", [
"summon minecraft:cow ~ ~ ~1",
"summon minecraft:cow ~ ~ ~2",
"summon minecraft:cow ~ ~ ~3",
"summon minecraft:cow ~1 ~ ~1",
"summon minecraft:cow ~1 ~ ~2",
], "Summoned 5 cows nearby."),
("sudo give everyone a diamond", [
"give @a minecraft:diamond 1",
], "Gave all players a diamond."),
("sudo set time to sunrise", ["time set 23000"], "Set to dawn (tick 23000)."),
("sudo fill a 5x5 area with gold blocks under me", [
"fill ~-2 ~-1 ~-2 ~2 ~-1 ~2 minecraft:gold_block",
], "Filled 5x5 gold platform under player."),
("sudo give me a trident with loyalty and channeling", [
"give {p} minecraft:trident[enchantments={loyalty:3,channeling:1,impaling:5,unbreaking:3,mending:1}] 1",
], "Trident with loyalty 3, channeling, impaling 5. Loyalty and riptide are mutually exclusive — used loyalty."),
("sudo night vision forever", [
"effect give {p} minecraft:night_vision 999999 0",
], "Permanent night vision."),
("sudo repair what I'm holding", [
"give {p} minecraft:experience_bottle 64",
], "Gave XP bottles — use with mending items to repair. Cannot directly repair via commands."),
("sudo give me a mace", [
"give {p} minecraft:mace[enchantments={density:5,wind_burst:3,unbreaking:3,mending:1}] 1",
], "Mace with density 5 and wind burst 3. New 1.21 weapon."),
("sudo spawn a warden", [
"summon minecraft:warden ~ ~ ~5",
], "Summoned warden 5 blocks away. Be careful — 500 HP, 2-hit kill."),
("sudo give me all the music discs", [
"give {p} minecraft:music_disc_13 1",
"give {p} minecraft:music_disc_cat 1",
"give {p} minecraft:music_disc_blocks 1",
"give {p} minecraft:music_disc_chirp 1",
"give {p} minecraft:music_disc_far 1",
"give {p} minecraft:music_disc_mall 1",
"give {p} minecraft:music_disc_mellohi 1",
"give {p} minecraft:music_disc_stal 1",
"give {p} minecraft:music_disc_strad 1",
"give {p} minecraft:music_disc_ward 1",
"give {p} minecraft:music_disc_pigstep 1",
"give {p} minecraft:music_disc_otherside 1",
], "All music discs."),
("sudo potion of the turtle master", [
"give {p} minecraft:potion[potion_contents={potion:\"minecraft:turtle_master\"}] 1",
], "Turtle master potion — slowness 4 + resistance 3."),
("sudo banner with a skull pattern", [
"give {p} minecraft:white_banner[banner_patterns=[{pattern:\"minecraft:skull\",color:\"black\"}]] 1",
], "White banner with black skull pattern using 1.21 syntax."),
]
for prompt, cmds_tmpl, reasoning in COMMANDS:
player = random.choice(PLAYERS)
cmds = [c.replace("{p}", player) for c in cmds_tmpl]
msgs = [sys_msg("sudo"), user_msg(f"Player {player}: {prompt}")]
all_ok = True
for cmd in cmds:
rcon_result = rcon.command(cmd)
ok = not is_error(rcon_result)
if not ok:
all_ok = False
msgs.append(tool_call("rcon.execute", {"command": cmd}))
msgs.append(tool_result({"success": ok, "result": rcon_result[:300]}))
resp = {"risk_level": 3, "commands": cmds, "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append({
"id": f"rcon-direct-{len(examples):03d}",
"source": "rcon_validated",
"type": "direct_command",
"rcon_validated": True,
"all_success": all_ok,
"messages": msgs,
})
return examples
def gen_memory_examples(rcon):
"""Memory read/write examples."""
examples = []
MEMORY_OPS = [
("sudo remember this as home", "write", "home", "location", {"x": 100, "y": 64, "z": -200}, None,
"Saved current location as 'home'."),
("sudo save this spot as my base", "write", "base", "location", {"x": -500, "y": 72, "z": 300}, None,
"Saved base location."),
("sudo tp me home", "read", "home", None, None, "tp {p} 100 64 -200",
"Read 'home' from memory. Teleporting."),
("sudo take me to my base", "read", "base", None, None, "tp {p} -500 72 300",
"Read 'base' from memory. Teleporting."),
("sudo what do you remember about me?", "read", None, None, None, None,
"Listed all memories for this player."),
("sudo remember my favorite tool is a pickaxe", "write", "favorite_tool", "preference", "diamond pickaxe", None,
"Saved tool preference."),
("sudo remember Ace is my enemy", "write", "enemy_ace", "fact", "Ace13245 is an enemy", None,
"Noted player relationship."),
("sudo forget my base", "delete", "base", None, None, None,
"Deleted 'base' memory."),
("sudo save this as my mine", "write", "mine", "location", {"x": 30, "y": 11, "z": -80}, None,
"Saved mining location."),
("sudo tp me to my mine", "read", "mine", None, None, "tp {p} 30 11 -80",
"Read 'mine' from memory. Teleporting to Y=11."),
]
for prompt, action, key, mtype, value, cmd_tmpl, reasoning in MEMORY_OPS:
player = random.choice(PLAYERS)
msgs = [sys_msg("sudo"), user_msg(f"Player {player}: {prompt}")]
if action == "write":
msgs.append(tool_call("memory.write", {"player": player, "type": mtype, "key": key, "value": value}))
msgs.append(tool_result({"ok": True, "key": key}))
resp = {"risk_level": 4, "commands": [], "reasoning": reasoning}
elif action == "read":
args = {"player": player}
if key:
args["key"] = key
msgs.append(tool_call("memory.read", args))
if key:
mem_val = value or {"x": random.randint(-500,500), "y": random.randint(60,120), "z": random.randint(-500,500)}
msgs.append(tool_result({"memories": [{"key": key, "type": mtype or "location", "value": mem_val}]}))
else:
msgs.append(tool_result({"memories": [
{"key": "home", "type": "location", "value": {"x": 100, "y": 64, "z": -200}},
{"key": "favorite_tool", "type": "preference", "value": "diamond pickaxe"},
]}))
if cmd_tmpl:
cmd = cmd_tmpl.replace("{p}", player)
result = rcon.command(cmd)
msgs.append(tool_call("rcon.execute", {"command": cmd}))
msgs.append(tool_result({"success": not is_error(result), "result": result[:200]}))
resp = {"risk_level": 3, "commands": [cmd], "reasoning": reasoning}
else:
resp = {"risk_level": 4, "commands": [], "reasoning": reasoning}
elif action == "delete":
msgs.append(tool_call("memory.read", {"player": player, "key": key}))
msgs.append(tool_result({"memories": [{"key": key, "type": "location", "value": {"x": -500, "y": 72, "z": 300}}]}))
resp = {"risk_level": 3, "commands": [], "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append({
"id": f"rcon-memory-{len(examples):03d}",
"source": "rcon_validated",
"type": f"memory_{action}",
"messages": msgs,
})
return examples
def gen_wiki_examples(rcon):
"""Wiki lookup → command execution with real RCON results."""
examples = []
WIKI = [
("sudo best pickaxe enchantments", "pickaxe enchantments 1.21",
"Pickaxe enchantments:\n- Efficiency V: faster mining\n- Fortune III: more drops from ores\n- Silk Touch: mine blocks directly\n- Unbreaking III: durability\n- Mending: XP repair\nFortune and Silk Touch are mutually exclusive.",
["give {p} minecraft:netherite_pickaxe[enchantments={efficiency:5,fortune:3,unbreaking:3,mending:1}] 1"],
"Max pickaxe with fortune. Fortune and silk touch are exclusive — chose fortune for ore farming."),
("sudo what enchants for boots?", "boot enchantments 1.21",
"Boot enchantments:\n- Protection IV or Blast/Fire/Projectile Protection IV\n- Feather Falling IV: reduces fall damage\n- Depth Strider III or Frost Walker II (exclusive)\n- Soul Speed III: faster on soul sand\n- Swift Sneak III: faster sneaking\n- Unbreaking III, Mending I",
["give {p} minecraft:netherite_boots[enchantments={protection:4,feather_falling:4,depth_strider:3,soul_speed:3,unbreaking:3,mending:1}] 1"],
"All compatible boot enchants. Depth strider over frost walker for general use."),
("sudo how do I use the /place command?", "place command 1.21",
"/place feature <feature> [pos]\n/place jigsaw <pool> <target> <max_depth> [pos]\n/place structure <structure> [pos]\n/place template <template> [pos] [rotation] [mirror]\n\nUseful for placing structures, trees, etc.",
["place feature minecraft:oak ~ ~ ~5"],
"Placed an oak tree feature 5 blocks away using /place."),
("sudo what's the command for teams?", "team command minecraft",
"Team commands:\n/team add <name> [display_name]\n/team join <team> [members]\n/team leave [members]\n/team modify <team> <option> <value>\nOptions: color, friendlyFire, seeFriendlyInvisibles, nametagVisibility, collisionRule, prefix, suffix",
["team add red Red Team", "team modify red color red"],
"Created 'Red Team' with red name color."),
("sudo how to make colored text in chat?", "tellraw color formatting minecraft",
"tellraw @a {\"text\":\"Hello\",\"color\":\"gold\",\"bold\":true}\nColors: black, dark_blue, dark_green, dark_aqua, dark_red, dark_purple, gold, gray, dark_gray, blue, green, aqua, red, light_purple, yellow, white\nFormatting: bold, italic, underlined, strikethrough, obfuscated",
['tellraw @a {"text":"Welcome to the server!","color":"gold","bold":true}'],
"Used tellraw with gold color and bold formatting."),
]
for prompt, query, content, cmds_tmpl, reasoning in WIKI:
player = random.choice(PLAYERS)
cmds = [c.replace("{p}", player) for c in cmds_tmpl]
msgs = [sys_msg("sudo"), user_msg(f"Player {player}: {prompt}")]
msgs.append(tool_call("minecraft.wiki_lookup", {"query": query}))
msgs.append(tool_result({"content": content, "url": f"https://minecraft.wiki/w/{query.split()[0]}"}))
for cmd in cmds:
result = rcon.command(cmd)
msgs.append(tool_call("rcon.execute", {"command": cmd}))
msgs.append(tool_result({"success": not is_error(result), "result": result[:300]}))
resp = {"risk_level": 3, "commands": cmds, "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append({
"id": f"rcon-wiki-{len(examples):03d}",
"source": "rcon_validated",
"type": "wiki_lookup",
"messages": msgs,
})
return examples
def gen_error_correction_examples(rcon):
"""Commands that fail, get corrected, and succeed — with real RCON errors."""
examples = []
ERRORS = [
("sudo give me a bed", "give {p} minecraft:bed 1", "give {p} minecraft:white_bed 1",
"No item 'bed' — correct ID is 'white_bed' in 1.21."),
("sudo give me cooked beef", "give {p} minecraft:steak 1", "give {p} minecraft:cooked_beef 1",
"Item is 'cooked_beef', not 'steak'."),
("sudo give me speed", "effect give {p} minecraft:speed", "effect give {p} minecraft:speed 300 1",
"Effect needs duration and amplifier arguments."),
("sudo give me a log", "give {p} minecraft:log 64", "give {p} minecraft:oak_log 64",
"No item 'log' — must specify wood type: oak_log, spruce_log, etc."),
("sudo fill with stone 10", "fill ~ ~ ~ ~10 ~10 ~10 minecraft:stone 10", "fill ~ ~ ~ ~10 ~10 ~10 minecraft:stone",
"Fill doesn't take a trailing count number."),
]
for prompt, wrong_cmd_tmpl, right_cmd_tmpl, reasoning in ERRORS:
player = random.choice(PLAYERS)
wrong_cmd = wrong_cmd_tmpl.replace("{p}", player)
right_cmd = right_cmd_tmpl.replace("{p}", player)
msgs = [sys_msg("sudo"), user_msg(f"Player {player}: {prompt}")]
# Wrong attempt
wrong_result = rcon.command(wrong_cmd)
msgs.append(tool_call("rcon.execute", {"command": wrong_cmd}))
msgs.append(tool_result({"success": False, "result": wrong_result[:300]}))
# Corrected attempt
right_result = rcon.command(right_cmd)
msgs.append(tool_call("rcon.execute", {"command": right_cmd}))
msgs.append(tool_result({"success": not is_error(right_result), "result": right_result[:300]}))
resp = {"risk_level": 3, "commands": [right_cmd], "reasoning": f"First attempt failed: {wrong_result[:100]}. {reasoning}"}
msgs.append(final_response(resp))
examples.append({
"id": f"rcon-errfix-{len(examples):03d}",
"source": "rcon_validated",
"type": "error_correction",
"messages": msgs,
})
return examples
def gen_chained_examples(rcon):
"""Multi-tool chained examples with real RCON."""
examples = []
CHAINS = [
("sudo gear me up for the nether", [
("rcon.execute", {"command": "give {p} minecraft:netherite_sword[enchantments={sharpness:5,fire_aspect:2,unbreaking:3,mending:1}] 1"}),
("rcon.execute", {"command": "give {p} minecraft:netherite_pickaxe[enchantments={efficiency:5,fortune:3,unbreaking:3,mending:1}] 1"}),
("rcon.execute", {"command": "give {p} minecraft:netherite_chestplate[enchantments={protection:4,unbreaking:3,mending:1}] 1"}),
("rcon.execute", {"command": "effect give {p} minecraft:fire_resistance 6000 0"}),
("rcon.execute", {"command": "give {p} minecraft:golden_apple 16"}),
("rcon.execute", {"command": "give {p} minecraft:ender_pearl 16"}),
], "Full nether loadout: netherite sword+pick, protection armor, fire resistance, golden apples, pearls."),
("sudo prepare me for an end fight", [
("rcon.execute", {"command": "give {p} minecraft:netherite_sword[enchantments={sharpness:5,unbreaking:3,mending:1}] 1"}),
("rcon.execute", {"command": "give {p} minecraft:bow[enchantments={power:5,infinity:1,unbreaking:3}] 1"}),
("rcon.execute", {"command": "give {p} minecraft:arrow 64"}),
("rcon.execute", {"command": "give {p} minecraft:golden_apple 32"}),
("rcon.execute", {"command": "give {p} minecraft:ender_pearl 16"}),
("rcon.execute", {"command": "effect give {p} minecraft:slow_falling 600 0"}),
("rcon.execute", {"command": "give {p} minecraft:cobblestone 128"}),
], "End fight kit: weapons, slow falling for void safety, blocks for pillaring, pearls for dodging."),
]
for prompt, tool_calls, reasoning in CHAINS:
player = random.choice(PLAYERS)
msgs = [sys_msg("sudo"), user_msg(f"Player {player}: {prompt}")]
all_cmds = []
for tool_name, args_tmpl in tool_calls:
args = {}
for k, v in args_tmpl.items():
args[k] = v.replace("{p}", player) if isinstance(v, str) else v
if tool_name == "rcon.execute":
cmd = args["command"]
result = rcon.command(cmd)
msgs.append(tool_call(tool_name, args))
msgs.append(tool_result({"success": not is_error(result), "result": result[:300]}))
all_cmds.append(cmd)
else:
msgs.append(tool_call(tool_name, args))
msgs.append(tool_result({"ok": True}))
resp = {"risk_level": 3, "commands": all_cmds, "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append({
"id": f"rcon-chain-{len(examples):03d}",
"source": "rcon_validated",
"type": "chained",
"messages": msgs,
})
return examples
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--rcon-host", default="192.168.0.244")
parser.add_argument("--rcon-port", type=int, default=25577)
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
args = parser.parse_args()
print(f"Connecting to RCON at {args.rcon_host}:{args.rcon_port}...")
rcon = SimpleRCON(args.rcon_host, args.rcon_port, args.rcon_pass)
print("Connected.\n")
all_examples = []
print("Generating script examples...")
scripts = gen_script_examples(rcon)
all_examples.extend(scripts)
print(f" {len(scripts)} script examples")
print("Generating direct command examples...")
direct = gen_direct_command_examples(rcon)
all_examples.extend(direct)
print(f" {len(direct)} direct command examples")
print("Generating memory examples...")
memory = gen_memory_examples(rcon)
all_examples.extend(memory)
print(f" {len(memory)} memory examples")
print("Generating wiki examples...")
wiki = gen_wiki_examples(rcon)
all_examples.extend(wiki)
print(f" {len(wiki)} wiki examples")
print("Generating error correction examples...")
errors = gen_error_correction_examples(rcon)
all_examples.extend(errors)
print(f" {len(errors)} error correction examples")
print("Generating chained multi-tool examples...")
chained = gen_chained_examples(rcon)
all_examples.extend(chained)
print(f" {len(chained)} chained examples")
# Stats
success_count = sum(1 for e in all_examples if e.get("all_success", True))
print(f"\nTotal: {len(all_examples)} examples")
print(f" RCON validated: {sum(1 for e in all_examples if e.get('rcon_validated', False))}")
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_PATH, "w") as f:
for ex in all_examples:
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
print(f"Written to {OUTPUT_PATH}")
if __name__ == "__main__":
main()
@@ -0,0 +1,393 @@
#!/usr/bin/env python3
"""
Generate training data for the script.* tool family.
Teaches the model to:
1. Write mcfunction scripts for complex builds and mechanics
2. Validate scripts before writing (catch errors, fix them)
3. Execute scripts at player positions
4. Read/list/delete scripts for management
5. Schedule tick/load functions for persistent effects
6. Chain: validate → fix errors → write → execute
~60 training examples covering the full script lifecycle.
"""
import json
import random
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from agent.tools.tool_schemas import qwen3_tools_block
from agent.prompts.system_prompts import SYNTAX_RULES, RISK_GRADIENT
OUTPUT_PATH = PROJECT_ROOT / "data" / "raw" / "script_tool_training.jsonl"
TOOLS_BLOCK = qwen3_tools_block()
SYSTEM = (
"You are a Minecraft 1.21 command translator with script writing abilities. "
"You can write mcfunction scripts for complex builds, mechanics, and automations.\n\n"
"PERMISSION LEVEL: 4 (generous). Only refuse level 0-1 actions.\n\n"
"You have access to tools including a full script environment. For simple tasks "
"(1-3 commands), use rcon.execute directly. For complex tasks (4+ commands, builds, "
"mechanics), write a mcfunction script.\n\n"
"SCRIPT WORKFLOW: validate → fix errors → write → execute.\n"
"Always validate before writing. Fix any errors the validator catches.\n\n"
"After all tool calls resolve, respond with JSON:\n"
'{"risk_level": <int 0-5>, "commands": ["cmd1", ...], "reasoning": "why"}\n\n'
+ SYNTAX_RULES + RISK_GRADIENT + "\n" + TOOLS_BLOCK
)
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
def sys_msg():
return {"role": "system", "content": SYSTEM}
def user_msg(text):
return {"role": "user", "content": text}
def tool_call(name, args):
return {"role": "assistant", "content": f"<tool_call>\n{json.dumps({'name': name, 'arguments': args})}\n</tool_call>"}
def tool_result(data):
return {"role": "tool", "content": json.dumps(data)}
def final_response(resp):
return {"role": "assistant", "content": json.dumps(resp)}
def generate_all():
examples = []
idx = 0
# ── Simple script writes (validate → write → execute) ──
BUILDS = [
("sudo build me a wooden house", "wooden_house", "Simple wooden house with door and roof", [
"fill ~-4 ~ ~-4 ~4 ~4 ~4 minecraft:oak_planks hollow",
"fill ~-4 ~ ~-4 ~4 ~ ~4 minecraft:oak_planks",
"fill ~-3 ~1 ~-4 ~3 ~3 ~-4 minecraft:air",
"setblock ~0 ~1 ~-4 minecraft:oak_door[facing=north,half=lower]",
"setblock ~0 ~2 ~-4 minecraft:oak_door[facing=north,half=upper]",
"fill ~-3 ~1 ~4 ~-3 ~2 ~4 minecraft:glass_pane",
"fill ~3 ~1 ~4 ~3 ~2 ~4 minecraft:glass_pane",
"setblock ~0 ~3 ~0 minecraft:lantern[hanging=true]",
]),
("sudo create a pvp arena", "pvp_arena", "PvP arena with walls and starting positions", [
"fill ~-15 ~-1 ~-15 ~15 ~-1 ~15 minecraft:smooth_stone",
"fill ~-15 ~ ~-15 ~15 ~5 ~-15 minecraft:stone_bricks",
"fill ~-15 ~ ~15 ~15 ~5 ~15 minecraft:stone_bricks",
"fill ~-15 ~ ~-15 ~-15 ~5 ~15 minecraft:stone_bricks",
"fill ~15 ~ ~-15 ~15 ~5 ~15 minecraft:stone_bricks",
"fill ~-14 ~ ~-14 ~14 ~4 ~14 minecraft:air",
"setblock ~-10 ~ ~0 minecraft:red_concrete",
"setblock ~10 ~ ~0 minecraft:blue_concrete",
"fill ~-1 ~-1 ~-1 ~1 ~-1 ~1 minecraft:glowstone",
]),
("sudo make a nether portal room", "portal_room", "Decorated nether portal room", [
"fill ~-5 ~ ~-5 ~5 ~6 ~5 minecraft:blackstone hollow",
"fill ~-4 ~ ~-4 ~4 ~5 ~4 minecraft:air",
"fill ~-1 ~1 ~0 ~1 ~5 ~0 minecraft:obsidian",
"fill ~0 ~1 ~0 ~0 ~4 ~0 minecraft:air",
"setblock ~0 ~1 ~0 minecraft:nether_portal[axis=x]",
"setblock ~0 ~2 ~0 minecraft:nether_portal[axis=x]",
"setblock ~0 ~3 ~0 minecraft:nether_portal[axis=x]",
"fill ~-4 ~-1 ~-4 ~4 ~-1 ~4 minecraft:polished_blackstone_bricks",
"setblock ~-3 ~1 ~-3 minecraft:soul_lantern",
"setblock ~3 ~1 ~-3 minecraft:soul_lantern",
"setblock ~-3 ~1 ~3 minecraft:soul_lantern",
"setblock ~3 ~1 ~3 minecraft:soul_lantern",
]),
("sudo build a watchtower", "watchtower", "Tall watchtower with ladder access", [
"fill ~-2 ~ ~-2 ~2 ~10 ~2 minecraft:cobblestone hollow",
"fill ~-1 ~ ~-1 ~1 ~9 ~1 minecraft:air",
"fill ~-2 ~10 ~-2 ~2 ~10 ~2 minecraft:oak_planks",
"fill ~-2 ~11 ~-2 ~2 ~11 ~2 minecraft:oak_fence",
"fill ~-1 ~11 ~-1 ~1 ~11 ~1 minecraft:air",
"fill ~2 ~1 ~0 ~2 ~9 ~0 minecraft:ladder[facing=west]",
"setblock ~0 ~12 ~0 minecraft:lantern",
]),
("sudo build a farm plot with water", "farm_plot", "9x9 farm with central water and tilled soil", [
"fill ~-4 ~-1 ~-4 ~4 ~-1 ~4 minecraft:farmland",
"setblock ~0 ~-1 ~0 minecraft:water",
"fill ~-4 ~ ~-4 ~4 ~ ~4 minecraft:air",
"fill ~-5 ~-1 ~-5 ~5 ~-1 ~-5 minecraft:oak_fence",
"fill ~-5 ~-1 ~5 ~5 ~-1 ~5 minecraft:oak_fence",
"fill ~-5 ~-1 ~-5 ~-5 ~-1 ~5 minecraft:oak_fence",
"fill ~5 ~-1 ~-5 ~5 ~-1 ~5 minecraft:oak_fence",
"setblock ~-5 ~-1 ~0 minecraft:oak_fence_gate[facing=east]",
]),
("sudo make a mob grinder platform", "mob_grinder", "Mob spawning platform with water channels", [
"fill ~-8 ~ ~-8 ~8 ~ ~8 minecraft:cobblestone",
"fill ~-7 ~1 ~-7 ~7 ~3 ~7 minecraft:air",
"fill ~-8 ~1 ~-8 ~8 ~1 ~-8 minecraft:cobblestone",
"fill ~-8 ~1 ~8 ~8 ~1 ~8 minecraft:cobblestone",
"fill ~-8 ~1 ~-8 ~-8 ~1 ~8 minecraft:cobblestone",
"fill ~8 ~1 ~-8 ~8 ~1 ~8 minecraft:cobblestone",
"setblock ~0 ~-1 ~0 minecraft:air",
"setblock ~0 ~-2 ~0 minecraft:hopper",
"fill ~-8 ~ ~0 ~-1 ~ ~0 minecraft:water",
"fill ~1 ~ ~0 ~8 ~ ~0 minecraft:water",
]),
("sudo create a trophy room", "trophy_room", "Display room with item frames and lighting", [
"fill ~-6 ~ ~-6 ~6 ~4 ~6 minecraft:smooth_quartz hollow",
"fill ~-5 ~ ~-5 ~5 ~3 ~5 minecraft:air",
"fill ~-6 ~-1 ~-6 ~6 ~-1 ~6 minecraft:polished_deepslate",
"fill ~-5 ~2 ~-6 ~5 ~2 ~-6 minecraft:item_frame[facing=south]",
"fill ~-5 ~2 ~6 ~5 ~2 ~6 minecraft:item_frame[facing=north]",
"setblock ~-4 ~3 ~0 minecraft:lantern[hanging=true]",
"setblock ~0 ~3 ~0 minecraft:lantern[hanging=true]",
"setblock ~4 ~3 ~0 minecraft:lantern[hanging=true]",
]),
]
for req, name, desc, cmds in BUILDS:
player = random.choice(PLAYERS)
msgs = [sys_msg(), user_msg(f"Player {player}: {req}")]
# Validate first
msgs.append(tool_call("script.validate", {"commands": cmds}))
msgs.append(tool_result({"valid": True, "total": len(cmds), "passed": len(cmds), "errors": []}))
# Write
msgs.append(tool_call("script.write", {"name": name, "commands": cmds, "description": desc}))
msgs.append(tool_result({"ok": True, "path": f"mortdecai:{name}", "lines": len(cmds)}))
# Execute at player
msgs.append(tool_call("script.execute", {"name": name, "as_player": player}))
msgs.append(tool_result({"ok": True, "result": f"Executed {len(cmds)} commands from function mortdecai:{name}"}))
resp = {"risk_level": 3, "commands": [f"function mortdecai:{name}"], "reasoning": f"Wrote and executed '{name}' script ({len(cmds)} commands) at {player}'s position."}
msgs.append(final_response(resp))
examples.append({"id": f"script-build-{idx:03d}", "source": "script_training", "type": "script_write_execute", "messages": msgs})
idx += 1
# ── Validate-fix-write cycle (model catches and fixes errors) ──
ERROR_SCENARIOS = [
("sudo build a glass bridge", "glass_bridge", [
"fill ~0 ~-1 ~0 ~0 ~-1 ~20 minecraft:glass",
"fill ~-1 ~ ~0 ~-1 ~ ~20 minecraft:glass_pane",
"fill ~1 ~ ~0 ~1 ~ ~20 minecraft:glass_paine", # typo
], 2, "glass_paine", "Unknown block type: minecraft:glass_paine", [
"fill ~0 ~-1 ~0 ~0 ~-1 ~20 minecraft:glass",
"fill ~-1 ~ ~0 ~-1 ~ ~20 minecraft:glass_pane",
"fill ~1 ~ ~0 ~1 ~ ~20 minecraft:glass_pane",
]),
("sudo create an enchanting setup", "enchanting_room", [
"setblock ~0 ~ ~0 minecraft:enchanting_table",
"fill ~-2 ~ ~-2 ~2 ~1 ~2 minecraft:bookshelf",
"fill ~-1 ~ ~-1 ~1 ~1 ~1 minecraft:air",
"setblock ~0 ~2 ~0 minecraft:lantern",
"effect give @s minecraft:luck 600", # missing amplifier — not an error but let's say
], 4, "minecraft:luck", "Unknown effect: minecraft:luck", [
"setblock ~0 ~ ~0 minecraft:enchanting_table",
"fill ~-2 ~ ~-2 ~2 ~1 ~2 minecraft:bookshelf",
"fill ~-1 ~ ~-1 ~1 ~1 ~1 minecraft:air",
"setblock ~0 ~2 ~0 minecraft:lantern",
]),
("sudo make a redstone door", "redstone_door", [
"setblock ~2 ~1 ~0 minecraft:sticky_pistons[facing=west]", # wrong ID
"setblock ~2 ~ ~0 minecraft:redstone_block",
], 0, "minecraft:sticky_pistons", "Unknown block: minecraft:sticky_pistons", [
"setblock ~2 ~1 ~0 minecraft:sticky_piston[facing=west]",
"setblock ~2 ~ ~0 minecraft:redstone_block",
]),
]
for req, name, bad_cmds, err_line, err_cmd_fragment, err_msg, fixed_cmds in ERROR_SCENARIOS:
player = random.choice(PLAYERS)
msgs = [sys_msg(), user_msg(f"Player {player}: {req}")]
# First validation fails
msgs.append(tool_call("script.validate", {"commands": bad_cmds}))
msgs.append(tool_result({
"valid": False, "total": len(bad_cmds), "passed": len(bad_cmds) - 1,
"errors": [{"line": err_line + 1, "command": bad_cmds[err_line], "error": err_msg}]
}))
# Fix and re-validate
msgs.append(tool_call("script.validate", {"commands": fixed_cmds}))
msgs.append(tool_result({"valid": True, "total": len(fixed_cmds), "passed": len(fixed_cmds), "errors": []}))
# Write fixed version
msgs.append(tool_call("script.write", {"name": name, "commands": fixed_cmds}))
msgs.append(tool_result({"ok": True, "path": f"mortdecai:{name}", "lines": len(fixed_cmds)}))
# Execute
msgs.append(tool_call("script.execute", {"name": name, "as_player": player}))
msgs.append(tool_result({"ok": True, "result": "Executed commands"}))
resp = {"risk_level": 3, "commands": [f"function mortdecai:{name}"], "reasoning": f"Validation caught an error. Fixed and wrote '{name}' script."}
msgs.append(final_response(resp))
examples.append({"id": f"script-fix-{idx:03d}", "source": "script_training", "type": "script_validate_fix", "messages": msgs})
idx += 1
# ── Script management (list, read, delete) ──
MGMT_SCENARIOS = [
("sudo show me all my scripts", "list", None),
("sudo what's in the pvp_arena script?", "read", "pvp_arena"),
("sudo delete the old test script", "delete", "test_script"),
("sudo what scripts do I have?", "list", None),
("sudo show me the watchtower script", "read", "watchtower"),
("sudo remove the mob_grinder script", "delete", "mob_grinder"),
]
for req, action, target in MGMT_SCENARIOS:
player = random.choice(PLAYERS)
msgs = [sys_msg(), user_msg(f"Player {player}: {req}")]
if action == "list":
msgs.append(tool_call("script.list", {}))
msgs.append(tool_result({"scripts": [
{"name": "wooden_house", "lines": 8, "scheduled": "none"},
{"name": "pvp_arena", "lines": 9, "scheduled": "none"},
{"name": "portal_room", "lines": 12, "scheduled": "none"},
{"name": "ambient_particles", "lines": 3, "scheduled": "tick"},
]}))
resp = {"risk_level": 4, "commands": [], "reasoning": "Listed all scripts in the datapack."}
elif action == "read":
msgs.append(tool_call("script.read", {"name": target}))
msgs.append(tool_result({"ok": True, "commands": ["fill ~-15 ~-1 ~-15 ~15 ~-1 ~15 minecraft:smooth_stone", "# ... more commands"], "lines": 9}))
resp = {"risk_level": 4, "commands": [], "reasoning": f"Read the '{target}' script contents."}
elif action == "delete":
msgs.append(tool_call("script.delete", {"name": target}))
msgs.append(tool_result({"ok": True}))
resp = {"risk_level": 3, "commands": [], "reasoning": f"Deleted the '{target}' script."}
msgs.append(final_response(resp))
examples.append({"id": f"script-mgmt-{idx:03d}", "source": "script_training", "type": "script_management", "messages": msgs})
idx += 1
# ── Tick/load scheduling ──
SCHEDULE_SCENARIOS = [
("sudo make ambient particles around spawn forever", "ambient_particles", "tick", [
"execute at @a run particle minecraft:cherry_leaves ~ ~2 ~ 3 1 3 0.02 5",
]),
("sudo set up scoreboards when the server starts", "init_scoreboards", "load", [
"scoreboard objectives add deaths deathCount Deaths",
"scoreboard objectives add kills playerKillCount Kills",
"scoreboard objectives setdisplay sidebar kills",
]),
("sudo make it rain XP orbs on everyone every tick", "xp_rain", "tick", [
"execute at @a run summon minecraft:experience_orb ~ ~3 ~ {Value:1}",
]),
("sudo initialize gamerules on server load", "init_gamerules", "load", [
"gamerule keepInventory true",
"gamerule doDaylightCycle true",
"gamerule announceAdvancements true",
"gamerule playersSleepingPercentage 50",
]),
]
for req, name, sched_type, cmds in SCHEDULE_SCENARIOS:
player = random.choice(PLAYERS)
msgs = [sys_msg(), user_msg(f"Player {player}: {req}")]
# Validate
msgs.append(tool_call("script.validate", {"commands": cmds}))
msgs.append(tool_result({"valid": True, "total": len(cmds), "passed": len(cmds), "errors": []}))
# Write
msgs.append(tool_call("script.write", {"name": name, "commands": cmds, "description": req}))
msgs.append(tool_result({"ok": True, "path": f"mortdecai:{name}", "lines": len(cmds)}))
# Schedule
msgs.append(tool_call("script.schedule", {"name": name, "type": sched_type}))
msgs.append(tool_result({"ok": True}))
resp = {"risk_level": 3 if sched_type == "load" else 2, "commands": [f"function mortdecai:{name}"], "reasoning": f"Wrote '{name}' and scheduled it to run on {sched_type}."}
msgs.append(final_response(resp))
examples.append({"id": f"script-sched-{idx:03d}", "source": "script_training", "type": "script_schedule", "messages": msgs})
idx += 1
# ── Direct rcon for simple tasks (model should NOT use scripts for 1-2 commands) ──
SIMPLE = [
("sudo give me a diamond", ["give {p} minecraft:diamond 1"], "Simple give — no script needed."),
("sudo set time to day", ["time set day"], "Single command, no script needed."),
("sudo kill nearby zombies", ["kill @e[type=minecraft:zombie,distance=..30]"], "One command, direct execution is better."),
]
for req, cmds, reasoning in SIMPLE:
player = random.choice(PLAYERS)
msgs = [sys_msg(), user_msg(f"Player {player}: {req}")]
for cmd in cmds:
resolved = cmd.replace("{p}", player)
msgs.append(tool_call("rcon.execute", {"command": resolved}))
msgs.append(tool_result({"success": True, "result": "Command executed successfully"}))
resp = {"risk_level": 3, "commands": [c.replace("{p}", player) for c in cmds], "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append({"id": f"script-simple-{idx:03d}", "source": "script_training", "type": "direct_rcon", "messages": msgs})
idx += 1
# ── Edit cycle: read → modify → validate → write ──
EDIT_SCENARIOS = [
("sudo add lighting to the pvp arena", "pvp_arena",
["fill ~-15 ~-1 ~-15 ~15 ~-1 ~15 minecraft:smooth_stone", "fill ~-15 ~ ~-15 ~15 ~5 ~-15 minecraft:stone_bricks"],
["fill ~-15 ~-1 ~-15 ~15 ~-1 ~15 minecraft:smooth_stone", "fill ~-15 ~ ~-15 ~15 ~5 ~-15 minecraft:stone_bricks",
"setblock ~-10 ~4 ~0 minecraft:lantern[hanging=true]", "setblock ~0 ~4 ~0 minecraft:lantern[hanging=true]", "setblock ~10 ~4 ~0 minecraft:lantern[hanging=true]"],
"Read existing arena script, added hanging lanterns for lighting."),
("sudo add a roof to the wooden house", "wooden_house",
["fill ~-4 ~ ~-4 ~4 ~4 ~4 minecraft:oak_planks hollow"],
["fill ~-4 ~ ~-4 ~4 ~4 ~4 minecraft:oak_planks hollow",
"fill ~-5 ~4 ~-5 ~5 ~4 ~5 minecraft:oak_slab", "fill ~-4 ~5 ~-4 ~4 ~5 ~4 minecraft:oak_slab"],
"Read existing house script, added oak slab roof."),
]
for req, name, old_cmds, new_cmds, reasoning in EDIT_SCENARIOS:
player = random.choice(PLAYERS)
msgs = [sys_msg(), user_msg(f"Player {player}: {req}")]
# Read existing
msgs.append(tool_call("script.read", {"name": name}))
msgs.append(tool_result({"ok": True, "commands": old_cmds, "lines": len(old_cmds)}))
# Validate new version
msgs.append(tool_call("script.validate", {"commands": new_cmds}))
msgs.append(tool_result({"valid": True, "total": len(new_cmds), "passed": len(new_cmds), "errors": []}))
# Overwrite
msgs.append(tool_call("script.write", {"name": name, "commands": new_cmds}))
msgs.append(tool_result({"ok": True, "path": f"mortdecai:{name}", "lines": len(new_cmds)}))
# Execute
msgs.append(tool_call("script.execute", {"name": name, "as_player": player}))
msgs.append(tool_result({"ok": True, "result": "Executed commands"}))
resp = {"risk_level": 3, "commands": [f"function mortdecai:{name}"], "reasoning": reasoning}
msgs.append(final_response(resp))
examples.append({"id": f"script-edit-{idx:03d}", "source": "script_training", "type": "script_edit", "messages": msgs})
idx += 1
return examples
def main():
print("Generating script tool training data...")
examples = generate_all()
counts = {}
for ex in examples:
t = ex["type"]
counts[t] = counts.get(t, 0) + 1
print(f"\nGenerated {len(examples)} examples:")
for t, c in sorted(counts.items()):
print(f" {t}: {c}")
with open(OUTPUT_PATH, "w") as f:
for ex in examples:
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
print(f"\nWritten to {OUTPUT_PATH}")
if __name__ == "__main__":
main()
+80
View File
@@ -0,0 +1,80 @@
#!/bin/bash
# Training launcher — stops competing Ollama, trains, restarts Ollama.
# Usage: ./run_training.sh [--resume]
#
# Prevents the OOM crash caused by ollama.service holding 6GB on the 3090 Ti.
set -e
VERSION="0.5.0"
MODEL="Qwen/Qwen3.5-9B"
OUTPUT="training/checkpoints/mortdecai-${VERSION}"
LOG="training/train_run_${VERSION}.log"
RESUME_FLAG=""
if [[ "$1" == "--resume" ]]; then
RESUME_FLAG="--resume"
echo ">> Resume mode: will pick up from latest checkpoint"
fi
# Disable torch compile (causes "Argument list too long" on this system)
export TORCH_COMPILE_DISABLE=1
export TORCHDYNAMO_DISABLE=1
# Use the 3090 Ti (CUDA device ordering: GPU 1 in nvidia-smi = device 0 in CUDA when isolated)
export CUDA_VISIBLE_DEVICES=0
echo "============================================"
echo " Mortdecai ${VERSION} Training"
echo "============================================"
echo "Model: ${MODEL}"
echo "Output: ${OUTPUT}"
echo "Log: ${LOG}"
echo ""
# Stop Ollama on 3090 Ti to free VRAM
echo ">> Stopping ollama.service (3090 Ti)..."
sudo systemctl stop ollama.service 2>/dev/null && echo " Stopped." || echo " Already stopped or not found."
sleep 2
# Verify VRAM is free
echo ">> GPU status:"
nvidia-smi --id=1 --query-gpu=name,memory.used,memory.free --format=csv,noheader
echo ""
# Run training
echo ">> Starting training at $(date)"
cd "$(dirname "$0")/../.."
python3 training/scripts/train_lora.py \
--model "${MODEL}" \
--output "${OUTPUT}" \
--lr 1e-4 \
--epochs 1 \
--batch-size 2 \
--grad-accum 4 \
--max-seq-len 2048 \
--save-steps 50 \
${RESUME_FLAG} \
2>&1 | tee "${LOG}"
TRAIN_EXIT=$?
echo ""
echo ">> Training finished at $(date) (exit code: ${TRAIN_EXIT})"
# Restart Ollama
echo ">> Restarting ollama.service..."
sudo systemctl start ollama.service 2>/dev/null && echo " Started." || echo " Failed to start."
if [ $TRAIN_EXIT -eq 0 ]; then
echo ""
echo "============================================"
echo " Training complete! Next steps:"
echo " 1. Export GGUF: python3 -m unsloth.save --model ${OUTPUT} --output_type gguf"
echo " 2. Create Ollama model: ollama create mortdecai:${VERSION} -f Modelfile"
echo " 3. Run bake-off: python3 training/scripts/bakeoff.py"
echo "============================================"
fi
exit $TRAIN_EXIT
+414
View File
@@ -0,0 +1,414 @@
#!/usr/bin/env python3
"""
Tool-focused self-play exercises all 14 tools on a live dev server.
Unlike regular self-play (which tests command generation), this script
specifically generates prompts that require tool use: script writing,
memory operations, entity scanning, wiki lookups, and chained multi-tool
flows. Runs on the dev server via RCON.
The model responds, its tool calls get executed for real, and the full
interaction (prompt + tool calls + results + final response) gets logged
as training data.
Usage:
python3 tool_self_play.py --ollama-url http://192.168.0.179:11434 \\
--rcon-host 192.168.0.112 --rcon-port 25578 --rounds 30
# Or via the scheduler preset
"""
import argparse
import json
import os
import random
import re
import sys
import time
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
import requests
from agent.tools.persistent_rcon import get_rcon
OUTPUT_DIR = PROJECT_ROOT / "data" / "raw" / "tool_selfplay"
# ── Prompt categories that exercise specific tools ─────────────────────────
PROMPTS = {
"script_build": [
"sudo build me a small cobblestone house with a door and windows",
"sudo create a fighting arena with red and blue corners",
"sudo make a nether portal room with soul lanterns",
"sudo build a watchtower 15 blocks tall with a ladder",
"sudo create a 9x9 wheat farm with water in the center",
"sudo build an enchanting setup with bookshelves",
"sudo make a mob grinder platform with water channels",
"sudo create a trophy room with item frames",
"sudo build a bridge 30 blocks long over this ravine",
"sudo make a lighthouse with a glowstone top",
"sudo create a hedge maze using oak leaves",
"sudo build a dock with oak wood extending into the water",
"sudo make an underground bunker with iron doors",
"sudo create a garden with flowers and paths",
"sudo build a market stall with a counter and signs",
],
"script_schedule": [
"sudo make cherry leaf particles fall around spawn forever",
"sudo set up a scoreboard that tracks deaths on server load",
"sudo make ambient campfire smoke particles at spawn every tick",
"sudo create a function that heals everyone every 5 minutes",
"sudo make it always rain XP orbs at spawn",
],
"script_manage": [
"sudo show me all scripts",
"sudo what scripts are running on tick?",
"sudo delete the test script",
"sudo read me the arena script",
"sudo what did I build last?",
],
"memory_write": [
"sudo remember this as my home",
"sudo save this location as my base",
"sudo remember my nether portal is here",
"sudo my favorite item is a diamond pickaxe, remember that",
"sudo save this as my farm",
"sudo remember that Ace13245 is my friend",
"sudo mark this spot as the village center",
"sudo save this as my fishing spot",
],
"memory_read": [
"sudo tp me home",
"sudo take me to my base",
"sudo where's my nether portal?",
"sudo what do you know about me?",
"sudo tp me to my farm",
"sudo where was I building?",
"sudo do I have any saved locations?",
],
"nearby_entities": [
"sudo what mobs are near me?",
"sudo kill all the zombies around me",
"sudo how many animals are nearby?",
"sudo clear hostile mobs in a 50 block radius",
"sudo are there any creepers close to me?",
"sudo kill the nearest skeleton",
"sudo count everything within 30 blocks",
"sudo protect me from nearby hostiles",
],
"wiki_lookup": [
"sudo what enchantments can go on a mace?",
"sudo how do I craft a lodestone?",
"sudo what food gives the best saturation?",
"sudo what are the new 1.21 armor trim patterns?",
"sudo what's the difference between smite and sharpness?",
"sudo how does fortune work on ores?",
"sudo what are all the copper variants?",
"sudo how do trial spawners work?",
],
"player_info": [
"sudo build a wall around me",
"sudo teleport me 50 blocks up",
"sudo place torches around me",
"sudo create a beacon at my location",
"sudo surround me with glass",
"sudo set my spawn here",
"sudo light up this cave around me",
],
"server_state": [
"sudo if it's night, make it day",
"sudo give everyone online a golden apple",
"sudo how many people are playing right now?",
"sudo announce the current time and weather",
"sudo clear the weather if it's raining",
],
"chained": [
"sudo save this location as home, then build a marker here",
"sudo check what's near me and kill all hostiles, then give me resistance",
"sudo look up the best sword enchantments and give me one",
"sudo tp me home and heal me",
"sudo build an arena and save it as a script I can rerun",
"sudo check my health, if low heal me and give me food",
"sudo what scripts do I have? run the arena one at my position",
"sudo remember this spot, scan for mobs, kill hostiles, build a fort",
],
# ── Plugin categories ──
"worldguard": [
"sudo protect this area as my base",
"sudo make a no-pvp zone around spawn",
"sudo prevent mob spawning in the village",
"sudo add Ace13245 as a member of my region",
"sudo block entry for non-members in the vault",
"sudo allow TNT in the arena region",
"sudo set a greeting message for my base region",
"sudo list all protected regions",
"sudo prevent creeper explosions globally",
"sudo create a healing zone at spawn",
"sudo remove the old-test region",
"sudo make a safe zone with no fire spread",
],
"coreprotect": [
"sudo check who broke blocks near me",
"sudo rollback griefing from the last hour",
"sudo rollback what TheBigBoss did recently",
"sudo who placed blocks around here today?",
"sudo undo TNT damage from the last 2 hours",
"sudo rollback all container theft recently",
"sudo restore what was rolled back",
"sudo check CoreProtect status",
"sudo rollback fire damage near spawn",
"sudo lookup what Ace13245 did in the last day",
],
"essentialsx": [
"sudo set my home here",
"sudo tp me to my home",
"sudo create a warp called arena",
"sudo tp me to the arena warp",
"sudo give Ace 1000 coins",
"sudo check my balance",
"sudo heal me",
"sudo feed me",
"sudo repair what I'm holding",
"sudo set my nickname to DragonLord",
"sudo give me god mode",
"sudo toggle fly for me",
"sudo broadcast a server message",
"sudo set spawn point here",
"sudo check when Ace was last online",
],
"luckperms": [
"sudo give me permission to fly",
"sudo create a VIP group",
"sudo add Ace to the VIP group",
"sudo give VIP access to fly and heal",
"sudo give me temporary VIP for 1 day",
"sudo set VIP chat prefix to gold",
"sudo create a builder group with WorldEdit",
"sudo list all permission groups",
"sudo check what permissions I have",
"sudo remove TheBigBoss from VIP",
],
"fawe": [
"sudo make a glass sphere 10 blocks wide",
"sudo hollow sphere of stone",
"sudo cylinder of quartz 5 wide 10 tall",
"sudo replace all stone with deepslate in my selection",
"sudo smooth the terrain around here",
"sudo drain all water within 20 blocks",
"sudo build a sandstone pyramid 10 tall",
"sudo hollow out the selected area",
"sudo make walls around my selection with stone bricks",
"sudo fill with a checkerboard pattern",
"sudo stack my selection 5 times north",
"sudo undo my last WorldEdit action",
],
"plugin_combined": [
"sudo create a protected pvp arena with WorldEdit and WorldGuard",
"sudo rollback Ace's griefing and revoke his builder perms",
"sudo set up a VIP lounge — build it, protect it, make a warp",
"sudo give TheBigBoss a reward: money, items, and temp VIP",
"sudo prepare the server for an event: announce, set arena flags, heal everyone",
"pray someone destroyed my house, please restore it",
"pray protect my village from monsters",
"pray smite TheBigBoss for griefing",
"pray make me a temple worthy of your glory",
],
}
PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
def query_model(prompt, player, ollama_url, model, rcon):
"""Send a prompt to the model and capture the full interaction."""
system = (
"You are a Minecraft 1.21 command translator for a Paper server.\n"
"Plugins: FastAsyncWorldEdit, WorldGuard, CoreProtect, EssentialsX, Vault, LuckPerms.\n"
"Tools: rcon.execute, minecraft.wiki_lookup, world.player_info, "
"world.server_state, world.nearby_entities, memory.read, memory.write, "
"script.write, script.validate, script.execute, script.read, script.list, "
"script.delete, script.schedule.\n\n"
"Plugin commands: //set, //sphere, //cyl (FAWE), /rg define/flag (WorldGuard), "
"/co rollback/inspect (CoreProtect), /home, /warp, /eco (EssentialsX), "
"/lp user/group (LuckPerms).\n\n"
"For complex builds (4+ commands), write a mcfunction script. "
"For simple tasks, use rcon.execute directly.\n\n"
"Return JSON: {\"commands\": [...], \"reasoning\": \"...\", \"message\": \"...\"}\n"
"Use /no_think mode."
)
try:
r = requests.post(f"{ollama_url}/api/chat", json={
"model": model,
"messages": [
{"role": "system", "content": "/no_think\n" + system},
{"role": "user", "content": f"Player {player}: {prompt}"},
],
"stream": False, "format": "json",
"options": {"temperature": 0.4, "num_predict": 800},
}, timeout=120)
content = r.json()["message"]["content"]
content = re.sub(r'<think>[\s\S]*?</think>\s*', '', content)
parsed = json.loads(content)
return parsed
except Exception as e:
return {"error": str(e), "raw": content if 'content' in dir() else ""}
def validate_commands(commands, rcon):
"""Run commands through RCON and capture results."""
results = []
for cmd in commands[:12]:
if not isinstance(cmd, str) or not cmd.strip():
continue
try:
result = rcon.command(cmd)
is_error = any(e in result for e in ("<--[HERE]", "Unknown", "Incorrect", "Expected"))
results.append({"cmd": cmd, "result": result[:200], "ok": not is_error})
except Exception as e:
results.append({"cmd": cmd, "result": str(e), "ok": False})
return results
def run_round(category, ollama_url, model, rcon, player):
"""Run one self-play round for a specific tool category."""
prompt = random.choice(PROMPTS[category])
print(f" [{category:18s}] {prompt[:60]}")
start = time.time()
response = query_model(prompt, player, ollama_url, model, rcon)
elapsed = time.time() - start
if "error" in response:
print(f" ERROR: {response['error'][:80]}")
return None
commands = response.get("commands", [])
message = response.get("message", "")
reasoning = response.get("reasoning", "")
# Validate commands via RCON
rcon_results = []
if commands and all(isinstance(c, str) for c in commands):
rcon_results = validate_commands(commands, rcon)
success = all(r["ok"] for r in rcon_results)
else:
success = False
ok_count = sum(1 for r in rcon_results if r["ok"])
fail_count = sum(1 for r in rcon_results if not r["ok"])
status = "OK" if success else f"PARTIAL ({ok_count}/{ok_count+fail_count})" if ok_count > 0 else "FAIL"
print(f"{len(commands)} cmds, {status}, {elapsed:.1f}s")
# Build training example
example = {
"id": f"tool-selfplay-{int(time.time())}-{random.randint(0,9999):04d}",
"source": "tool_self_play",
"category": category,
"input": {
"user_message": prompt,
"server_context": {
"server_type": "paper",
"version": "1.21.x",
"online_players": [player],
},
},
"output": {
"commands": commands,
"message": message,
"reasoning": reasoning,
},
"metadata": {
"rcon_results": rcon_results,
"all_success": success,
"elapsed_seconds": round(elapsed, 2),
"model": model,
"tool_category": category,
},
}
return example
def main():
parser = argparse.ArgumentParser(description="Tool-focused self-play")
parser.add_argument("--ollama-url", default="http://192.168.0.179:11434")
parser.add_argument("--model", default="mortdecai:0.4.0")
parser.add_argument("--rcon-host", default="192.168.0.112")
parser.add_argument("--rcon-port", type=int, default=25578)
parser.add_argument("--rcon-pass", default="REDACTED_RCON")
parser.add_argument("--rounds", type=int, default=30, help="Rounds per category")
parser.add_argument("--categories", default="all", help="Comma-separated categories or 'all'")
parser.add_argument("--output", default="")
args = parser.parse_args()
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
output_path = args.output or str(OUTPUT_DIR / f"tool_selfplay_{int(time.time())}.jsonl")
rcon = get_rcon(args.rcon_host, args.rcon_port, args.rcon_pass)
if args.categories == "all":
categories = list(PROMPTS.keys())
else:
categories = [c.strip() for c in args.categories.split(",")]
print(f"Tool Self-Play")
print(f" Model: {args.model} on {args.ollama_url}")
print(f" RCON: {args.rcon_host}:{args.rcon_port}")
print(f" Categories: {', '.join(categories)}")
print(f" Rounds per category: {args.rounds}")
print(f" Output: {output_path}")
print()
stats = {"total": 0, "success": 0, "partial": 0, "fail": 0, "error": 0}
examples = []
for round_num in range(args.rounds):
print(f"\n── Round {round_num + 1}/{args.rounds} ──")
random.shuffle(categories)
for cat in categories:
player = random.choice(PLAYERS)
example = run_round(cat, args.ollama_url, args.model, rcon, player)
stats["total"] += 1
if example is None:
stats["error"] += 1
continue
if example["metadata"]["all_success"]:
stats["success"] += 1
elif any(r["ok"] for r in example["metadata"].get("rcon_results", [])):
stats["partial"] += 1
else:
stats["fail"] += 1
examples.append(example)
# Write incrementally
with open(output_path, "a") as f:
f.write(json.dumps(example, ensure_ascii=False) + "\n")
time.sleep(0.3)
# Progress report
if (round_num + 1) % 5 == 0:
rate = stats["success"] / max(stats["total"], 1) * 100
print(f"\n Progress: {stats['total']} total, {rate:.0f}% success, "
f"{stats['partial']} partial, {stats['fail']} fail, {stats['error']} error")
print(f"\n{'='*60}")
print(f"Tool Self-Play Complete")
print(f" Total: {stats['total']}")
print(f" Success: {stats['success']} ({stats['success']/max(stats['total'],1)*100:.0f}%)")
print(f" Partial: {stats['partial']}")
print(f" Fail: {stats['fail']}")
print(f" Error: {stats['error']}")
print(f" Output: {output_path} ({len(examples)} examples)")
if __name__ == "__main__":
main()
+16 -2
View File
@@ -157,6 +157,8 @@ def main():
parser.add_argument("--grad-accum", type=int, default=4, help="Gradient accumulation steps")
parser.add_argument("--max-seq-len", type=int, default=2048, help="Max sequence length")
parser.add_argument("--dry-run", action="store_true", help="Load model and dataset but don't train")
parser.add_argument("--save-steps", type=int, default=50, help="Save checkpoint every N steps")
parser.add_argument("--resume", action="store_true", help="Resume from latest checkpoint if available")
args = parser.parse_args()
# Auto-detect paths
@@ -258,13 +260,25 @@ def main():
weight_decay=0.01,
bf16=True,
logging_steps=1,
save_strategy="epoch",
save_strategy="steps",
save_steps=args.save_steps,
save_total_limit=3,
seed=42,
max_seq_length=args.max_seq_len,
dataset_text_field="text",
packing=True,
)
# Check for resume checkpoint
resume_ckpt = None
if args.resume:
ckpt_dir = Path(args.output)
if ckpt_dir.exists():
checkpoints = sorted(ckpt_dir.glob("checkpoint-*"), key=lambda p: int(p.name.split("-")[-1]))
if checkpoints:
resume_ckpt = str(checkpoints[-1])
print(f" Resuming from: {resume_ckpt}")
# Train
print(f"\nStarting training ({args.epochs} epochs, {len(train_data)} examples)...")
trainer = SFTTrainer(
@@ -274,7 +288,7 @@ def main():
args=training_args,
)
trainer.train()
trainer.train(resume_from_checkpoint=resume_ckpt)
# Save adapter
print(f"\nSaving LoRA adapter to {args.output}...")