Tool-calling training: 1,159 multi-turn examples with error correction

Tool schemas (agent/tools/tool_schemas.py):
- rcon.execute: execute commands, get success/error results
- minecraft.wiki_lookup: look up syntax and item info
- world.player_info: player health, position, inventory
- world.server_state: time, weather, online players
- 10 RCON error patterns with corrections
- 12 common error scenarios for training

Training data generator (training/scripts/generate_tool_training.py):
- Converts seed dataset to multi-turn tool conversations
- Error correction: model tries wrong command → gets error → self-corrects
- Wiki/player/server lookups for uncertainty scenarios
- Qwen3 native tool-calling format with <tool_call> tags

1,159 examples: 1043 success, 79 error correction, 24 error scenarios,
13 tool lookups. Ready for v4 training.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-19 18:49:08 -04:00
parent 4e83da39fd
commit ee764cd22a
3 changed files with 2417 additions and 0 deletions
+408
View File
@@ -0,0 +1,408 @@
"""
Tool schemas for the Minecraft AI God model.
Defines all tools the model can call during inference, plus common RCON error
patterns used for generating error-correction training data.
Exports:
TOOL_SCHEMAS — Python list of dicts (name, description, parameters in JSON Schema)
QWEN3_TOOLS — Same tools formatted for Qwen3 chat template
RCON_ERROR_PATTERNS — Dict mapping RCON error substrings to correction metadata
"""
from typing import List, Dict, Any
# ---------------------------------------------------------------------------
# Tool definitions (canonical source of truth)
# ---------------------------------------------------------------------------
TOOL_SCHEMAS: List[Dict[str, Any]] = [
{
"name": "rcon.execute",
"description": (
"Execute a Minecraft command via RCON on the server. "
"Returns whether the command succeeded and the server's response text. "
"Commands should NOT start with a leading slash."
),
"parameters": {
"type": "object",
"properties": {
"command": {
"type": "string",
"description": "The Minecraft command to execute (no leading slash)."
}
},
"required": ["command"],
"additionalProperties": False
},
"returns": {
"type": "object",
"properties": {
"success": {"type": "boolean"},
"result": {"type": "string"}
}
}
},
{
"name": "minecraft.wiki_lookup",
"description": (
"Look up command syntax, item info, or game mechanics from the "
"Minecraft Wiki. Use this when you are unsure about exact syntax, "
"item IDs, enchantment names, effect names, or entity types."
),
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query (e.g. 'give command syntax', 'trident enchantments')."
}
},
"required": ["query"],
"additionalProperties": False
},
"returns": {
"type": "object",
"properties": {
"content": {"type": "string"},
"url": {"type": "string"}
}
}
},
{
"name": "world.player_info",
"description": (
"Get a player's current state including health, position, and a "
"summary of their inventory. Requires the player to be online."
),
"parameters": {
"type": "object",
"properties": {
"player": {
"type": "string",
"description": "The player's in-game name (case-sensitive)."
}
},
"required": ["player"],
"additionalProperties": False
},
"returns": {
"type": "object",
"properties": {
"health": {"type": "number"},
"position": {
"type": "object",
"properties": {
"x": {"type": "integer"},
"y": {"type": "integer"},
"z": {"type": "integer"}
}
},
"inventory_summary": {"type": "string"}
}
}
},
{
"name": "world.server_state",
"description": (
"Get the current server state: time of day, weather, online players, "
"and world border size. No parameters required."
),
"parameters": {
"type": "object",
"properties": {},
"required": [],
"additionalProperties": False
},
"returns": {
"type": "object",
"properties": {
"time_of_day": {"type": "string"},
"weather": {"type": "string"},
"online_players": {
"type": "array",
"items": {"type": "string"}
},
"world_border": {"type": "number"}
}
}
},
]
# ---------------------------------------------------------------------------
# Qwen3 tool format (for chat template injection)
# ---------------------------------------------------------------------------
def _to_qwen3_tool(schema: Dict[str, Any]) -> Dict[str, Any]:
"""Convert a tool schema dict to the Qwen3 function-calling format."""
return {
"type": "function",
"function": {
"name": schema["name"],
"description": schema["description"],
"parameters": schema["parameters"],
}
}
QWEN3_TOOLS: List[Dict[str, Any]] = [_to_qwen3_tool(t) for t in TOOL_SCHEMAS]
def qwen3_tools_block() -> str:
"""Return the tools block string to inject into the system prompt for Qwen3."""
import json
lines = ["# Tools", "", "You may call one or more functions to assist.",
"The available tools are:", ""]
for tool in QWEN3_TOOLS:
lines.append(json.dumps(tool, indent=2))
lines.append("")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Common RCON error patterns and their corrections
# ---------------------------------------------------------------------------
RCON_ERROR_PATTERNS: Dict[str, Dict[str, Any]] = {
# --- Syntax / structural errors ---
"Unknown or incomplete command": {
"type": "syntax_error",
"description": "Command is malformed, misspelled, or missing subcommands.",
"common_causes": [
"Missing 'give' subcommand in effect (e.g. 'effect player' instead of 'effect give player')",
"Invalid weather value (e.g. 'weather storm' instead of 'weather thunder')",
"Old NBT syntax that the parser cannot parse",
"Missing minecraft: namespace prefix",
],
"example_wrong": "effect slingshooter08 minecraft:strength 300 2",
"example_right": "effect give slingshooter08 minecraft:strength 300 2",
},
"Incorrect argument for command": {
"type": "wrong_argument",
"description": "A command argument has an invalid type or value.",
"common_causes": [
"Wrong gamemode string (e.g. 's' instead of 'survival')",
"Non-integer where integer expected",
"Invalid enchantment or effect name",
],
"example_wrong": "gamemode s slingshooter08",
"example_right": "gamemode survival slingshooter08",
},
"Expected whitespace": {
"type": "missing_space",
"description": "Parser expected a space between tokens but found something else.",
"common_causes": [
"Missing space before count in give command",
"Squashed arguments with no separator",
],
"example_wrong": "give slingshooter08 minecraft:diamond_sword1",
"example_right": "give slingshooter08 minecraft:diamond_sword 1",
},
"Unknown item 'minecraft:": {
"type": "invalid_item",
"description": "The item ID does not exist in 1.21.",
"common_causes": [
"Using generic name (bed -> white_bed, log -> oak_log, wood -> oak_planks)",
"Misspelled item ID",
"Using a removed or renamed item",
],
"corrections": {
"minecraft:bed": "minecraft:white_bed",
"minecraft:log": "minecraft:oak_log",
"minecraft:wood": "minecraft:oak_planks",
"minecraft:plank": "minecraft:oak_planks",
"minecraft:stone_brick": "minecraft:stone_bricks",
"minecraft:wooden_sword": "minecraft:wooden_sword",
"minecraft:iron": "minecraft:iron_ingot",
"minecraft:gold": "minecraft:gold_ingot",
"minecraft:diamond": "minecraft:diamond",
"minecraft:notch_apple": "minecraft:enchanted_golden_apple",
"minecraft:gapple": "minecraft:golden_apple",
"minecraft:grass": "minecraft:short_grass",
"minecraft:boat": "minecraft:oak_boat",
},
"example_wrong": "give slingshooter08 minecraft:bed 1",
"example_right": "give slingshooter08 minecraft:white_bed 1",
},
"No player was found": {
"type": "player_not_found",
"description": "The specified player is not online or the name is wrong.",
"common_causes": [
"Player logged off",
"Case-sensitive name mismatch",
"Typo in player name",
],
"example_wrong": "give Slingshooter08 minecraft:diamond 1",
"example_right": "give slingshooter08 minecraft:diamond 1",
},
"That position is not loaded": {
"type": "unloaded_chunk",
"description": "The target coordinates are in an unloaded chunk.",
"common_causes": [
"Filling/placing blocks far from any player",
"Teleporting to unloaded area without a player there",
],
"example_wrong": "setblock 50000 64 50000 minecraft:diamond_block",
"example_right": "execute at slingshooter08 run setblock ~ ~1 ~ minecraft:diamond_block",
},
"Invalid or unknown enchantment": {
"type": "invalid_enchantment",
"description": "The enchantment name is wrong or incompatible with the item.",
"common_causes": [
"Typo in enchantment name",
"Using an enchantment on an incompatible item (e.g. sharpness on a bow)",
"Old enchantment names",
],
"example_wrong": "give slingshooter08 minecraft:bow[enchantments={sharpness:5}] 1",
"example_right": "give slingshooter08 minecraft:bow[enchantments={power:5}] 1",
},
"Could not parse the remainder": {
"type": "nbt_parse_error",
"description": "The NBT/component data could not be parsed.",
"common_causes": [
"Old {Enchantments:[{id:...,lvl:...}]} NBT syntax instead of 1.21 component syntax",
"Mismatched brackets or braces",
"Extra trailing data",
],
"example_wrong": "give slingshooter08 minecraft:diamond_sword{Enchantments:[{id:sharpness,lvl:5}]} 1",
"example_right": "give slingshooter08 minecraft:diamond_sword[enchantments={sharpness:5}] 1",
},
"Unknown effect 'minecraft:": {
"type": "invalid_effect",
"description": "The effect ID does not exist.",
"common_causes": [
"Misspelled effect name",
"Using display name instead of ID (e.g. 'haste' is correct, not 'fast_digging')",
],
"corrections": {
"minecraft:fast_digging": "minecraft:haste",
"minecraft:slow_digging": "minecraft:mining_fatigue",
"minecraft:confusion": "minecraft:nausea",
"minecraft:damage_boost": "minecraft:strength",
"minecraft:harm": "minecraft:instant_damage",
"minecraft:heal": "minecraft:instant_health",
},
"example_wrong": "effect give slingshooter08 minecraft:fast_digging 300 2",
"example_right": "effect give slingshooter08 minecraft:haste 300 2",
},
"Unknown entity type": {
"type": "invalid_entity",
"description": "The entity type does not exist in 1.21.",
"common_causes": [
"Missing minecraft: prefix",
"Old entity name (e.g. 'PigZombie' -> 'zombified_piglin')",
],
"corrections": {
"minecraft:pigzombie": "minecraft:zombified_piglin",
"minecraft:pig_zombie": "minecraft:zombified_piglin",
"minecraft:zombie_pigman": "minecraft:zombified_piglin",
},
"example_wrong": "summon minecraft:pig_zombie ~ ~ ~",
"example_right": "summon minecraft:zombified_piglin ~ ~ ~",
},
}
# ---------------------------------------------------------------------------
# Specific error scenarios for training data generation
# ---------------------------------------------------------------------------
ERROR_SCENARIOS: List[Dict[str, Any]] = [
{
"id": "missing_prefix",
"description": "Missing minecraft: prefix on item ID",
"wrong_command": "give {player} diamond_sword 1",
"error_message": "Unknown or incomplete command, see below for error at position ...",
"correct_command": "give {player} minecraft:diamond_sword 1",
"reasoning": "Item IDs require the minecraft: namespace prefix in 1.21+.",
},
{
"id": "old_nbt_enchantments",
"description": "Old NBT enchantment syntax instead of 1.21 component syntax",
"wrong_command": 'give {player} minecraft:diamond_sword{{Enchantments:[{{id:"minecraft:sharpness",lvl:5}}]}} 1',
"error_message": "Could not parse the remainder of the data tag",
"correct_command": "give {player} minecraft:diamond_sword[enchantments={{sharpness:5,unbreaking:3,fire_aspect:2}}] 1",
"reasoning": "1.21 uses component syntax item[enchantments={name:level}] not old NBT {Enchantments:[...]}.",
},
{
"id": "invalid_effect_name",
"description": "Invalid or old effect name",
"wrong_command": "effect give {player} minecraft:fast_digging 300 2",
"error_message": "Unknown effect 'minecraft:fast_digging'",
"correct_command": "effect give {player} minecraft:haste 300 2",
"reasoning": "The effect is called 'haste' in 1.21, not 'fast_digging'.",
},
{
"id": "wrong_item_bed",
"description": "Generic 'bed' instead of color-specific bed",
"wrong_command": "give {player} minecraft:bed 1",
"error_message": "Unknown item 'minecraft:bed'",
"correct_command": "give {player} minecraft:white_bed 1",
"reasoning": "In 1.13+, beds require a color prefix. 'bed' is not a valid item; use 'white_bed', 'red_bed', etc.",
},
{
"id": "wrong_item_log",
"description": "Generic 'log' instead of wood-specific log",
"wrong_command": "give {player} minecraft:log 64",
"error_message": "Unknown item 'minecraft:log'",
"correct_command": "give {player} minecraft:oak_log 64",
"reasoning": "In 1.13+, logs require a wood type prefix. Use 'oak_log', 'birch_log', etc.",
},
{
"id": "count_wrong_position",
"description": "Count placed before item in give command",
"wrong_command": "give {player} 64 minecraft:diamond",
"error_message": "Incorrect argument for command at position ...: Expected item, got '64'",
"correct_command": "give {player} minecraft:diamond 64",
"reasoning": "Give command syntax is: give <player> <item> [count]. Count comes after item, not before.",
},
{
"id": "effect_missing_give",
"description": "Missing 'give' subcommand in effect command",
"wrong_command": "effect {player} minecraft:speed 300 2",
"error_message": "Unknown or incomplete command, see below for error at position ...",
"correct_command": "effect give {player} minecraft:speed 300 2",
"reasoning": "In 1.21, effect requires a subcommand: 'effect give', 'effect clear'. Bare 'effect <player>' is invalid.",
},
{
"id": "weather_storm",
"description": "Invalid weather value 'storm'",
"wrong_command": "weather storm",
"error_message": "Unknown or incomplete command, see below for error at position ...",
"correct_command": "weather thunder",
"reasoning": "Valid weather values are: clear, rain, thunder. 'storm' is not valid; use 'thunder'.",
},
{
"id": "gamemode_abbreviation",
"description": "Gamemode abbreviation instead of full name",
"wrong_command": "gamemode c {player}",
"error_message": "Incorrect argument for command at position ...: Invalid game mode 'c'",
"correct_command": "gamemode creative {player}",
"reasoning": "Gamemode requires full names in 1.21: survival, creative, adventure, spectator. Abbreviations are invalid.",
},
{
"id": "wrong_item_grass",
"description": "Old 'grass' item renamed to 'short_grass'",
"wrong_command": "give {player} minecraft:grass 64",
"error_message": "Unknown item 'minecraft:grass'",
"correct_command": "give {player} minecraft:short_grass 64",
"reasoning": "In 1.20.3+, 'grass' was renamed to 'short_grass'. The block 'grass_block' is separate.",
},
{
"id": "summon_no_prefix",
"description": "Summon without minecraft: prefix",
"wrong_command": "summon zombie ~ ~ ~",
"error_message": "Unknown entity type: zombie",
"correct_command": "summon minecraft:zombie ~ ~ ~",
"reasoning": "Entity types require the minecraft: namespace prefix.",
},
{
"id": "old_zombie_pigman",
"description": "Old zombie pigman name",
"wrong_command": "summon minecraft:zombie_pigman ~ ~ ~",
"error_message": "Unknown entity type: minecraft:zombie_pigman",
"correct_command": "summon minecraft:zombified_piglin ~ ~ ~",
"reasoning": "Zombie pigmen were renamed to zombified piglins in 1.16.",
},
]
File diff suppressed because one or more lines are too long
+850
View File
@@ -0,0 +1,850 @@
#!/usr/bin/env python3
"""
Generate multi-turn tool-calling training data for the Minecraft AI God model.
Reads data/processed/seed_dataset.jsonl and produces data/processed/tool_training.jsonl
with Qwen3-format multi-turn conversations that teach the model to:
1. Call rcon.execute and handle success/error results
2. Self-correct on RCON errors (retry with fixed command)
3. Use minecraft.wiki_lookup when unsure about syntax
4. Use world.player_info / world.server_state for context-dependent actions
Usage:
python training/scripts/generate_tool_training.py
"""
import json
import random
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
# Ensure project root is importable
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from agent.tools.tool_schemas import (
QWEN3_TOOLS,
RCON_ERROR_PATTERNS,
ERROR_SCENARIOS,
qwen3_tools_block,
)
from agent.prompts.system_prompts import (
SUDO_SYSTEM_PROMPT,
GOD_SYSTEM_PROMPT,
SYNTAX_RULES,
RISK_GRADIENT,
)
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
SEED_PATH = PROJECT_ROOT / "data" / "processed" / "seed_dataset.jsonl"
OUTPUT_PATH = PROJECT_ROOT / "data" / "processed" / "tool_training.jsonl"
# ---------------------------------------------------------------------------
# System prompt with tools block for Qwen3 format
# ---------------------------------------------------------------------------
TOOLS_BLOCK = qwen3_tools_block()
SUDO_TOOL_SYSTEM = (
"You are a Minecraft 1.21 command translator for a server admin. "
"You receive natural language requests and return valid RCON commands.\n\n"
"PERMISSION LEVEL: 4 (generous). Only refuse level 0-1 actions.\n\n"
"You have access to tools. Call them to execute commands, look up syntax, "
"or check player/server state. When a command fails, analyze the error "
"and retry with a corrected command.\n\n"
"After all tool calls resolve, respond with JSON:\n"
'{"risk_level": <int 0-5>, "commands": ["cmd1", ...], "reasoning": "why"}\n\n'
+ SYNTAX_RULES + RISK_GRADIENT + "\n" + TOOLS_BLOCK
)
GOD_TOOL_SYSTEM = (
"You are God in a Minecraft server. Players pray to you and you respond "
"with divine judgment.\n\n"
"You have access to tools. Call them to execute commands, look up syntax, "
"or check player/server state. When a command fails, analyze the error "
"and retry with a corrected command.\n\n"
"After all tool calls resolve, respond with JSON:\n"
'{"risk_level": <int 0-5>, "message": "Your divine response", '
'"commands": ["cmd1", ...], "reasoning": "why"}\n\n'
+ SYNTAX_RULES + "\n" + TOOLS_BLOCK
)
# ---------------------------------------------------------------------------
# Player names used across the dataset
# ---------------------------------------------------------------------------
DEFAULT_PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"]
# ---------------------------------------------------------------------------
# Simulated success responses for common commands
# ---------------------------------------------------------------------------
def simulate_rcon_success(command: str, player: str = "slingshooter08") -> str:
"""Generate a plausible RCON success message for a command."""
cmd = command.strip()
if cmd.startswith("give "):
# Parse item from give command
parts = cmd.split()
item = parts[2] if len(parts) > 2 else "item"
count = parts[3] if len(parts) > 3 else "1"
item_display = item.replace("minecraft:", "").replace("_", " ").title()
if "[" in item_display:
item_display = item_display.split("[")[0].strip()
return f"Gave {count} [{item_display}] to {player}"
elif cmd.startswith("effect give "):
parts = cmd.split()
effect = parts[2] if len(parts) > 2 else ""
eff_name = parts[3].replace("minecraft:", "") if len(parts) > 3 else "effect"
return f"Applied effect {eff_name} to {effect}"
elif cmd.startswith("effect clear "):
target = cmd.split()[2] if len(cmd.split()) > 2 else player
return f"Removed every effect from {target}"
elif cmd.startswith("tp ") or cmd.startswith("teleport "):
return f"Teleported {player}"
elif cmd.startswith("weather "):
weather_type = cmd.split()[1] if len(cmd.split()) > 1 else "clear"
return f"Changing to {weather_type}"
elif cmd.startswith("time set "):
val = cmd.split("time set ")[1] if "time set " in cmd else "day"
return f"Set the time to {val}"
elif cmd.startswith("gamemode "):
mode = cmd.split()[1] if len(cmd.split()) > 1 else "creative"
return f"Set own game mode to {mode.title()}"
elif cmd.startswith("kill "):
return "Killed entity"
elif cmd.startswith("summon "):
return "Summoned new entity"
elif cmd.startswith("setblock "):
return "Changed the block at ..."
elif cmd.startswith("fill "):
return "Successfully filled ... blocks"
elif cmd.startswith("clear "):
return f"Removed items from {player}"
elif cmd.startswith("xp "):
return f"Gave experience to {player}"
elif cmd.startswith("execute "):
return "Executed command"
elif cmd.startswith("playsound "):
return f"Played sound to {player}"
elif cmd.startswith("title "):
return "Title displayed"
elif cmd.startswith("particle "):
return "Showing particle"
elif cmd.startswith("enchant "):
return f"Enchanted item for {player}"
elif cmd.startswith("spreadplayers "):
return "Spread players"
else:
return "Command executed successfully"
def simulate_player_info(player: str) -> Dict[str, Any]:
"""Generate plausible player info."""
return {
"health": round(random.uniform(10.0, 20.0), 1),
"position": {
"x": random.randint(-500, 500),
"y": random.randint(60, 120),
"z": random.randint(-500, 500),
},
"inventory_summary": random.choice([
"Diamond sword, iron armor, 32 steak, 14 torches",
"Netherite pickaxe, diamond armor, 64 cobblestone, bow with 28 arrows",
"Stone tools, leather armor, 12 bread, 3 oak logs",
"Full diamond gear, 8 golden apples, ender pearls x16",
"Iron sword, chainmail chest, 24 cooked porkchop, shield",
])
}
def simulate_server_state() -> Dict[str, Any]:
"""Generate plausible server state."""
ticks = random.randint(0, 24000)
if ticks < 6000:
tod = "morning"
elif ticks < 12000:
tod = "noon"
elif ticks < 18000:
tod = "evening"
else:
tod = "night"
return {
"time_of_day": tod,
"weather": random.choice(["clear", "rain", "thunder"]),
"online_players": random.sample(DEFAULT_PLAYERS, k=random.randint(1, 3)),
"world_border": 60000000.0,
}
# ---------------------------------------------------------------------------
# Conversation builders
# ---------------------------------------------------------------------------
def build_system_message(mode: str) -> Dict[str, str]:
"""Return the system message for the given mode."""
if mode == "god":
return {"role": "system", "content": GOD_TOOL_SYSTEM}
return {"role": "system", "content": SUDO_TOOL_SYSTEM}
def build_user_message(user_text: str, context: Optional[Dict] = None) -> Dict[str, str]:
"""Build the user turn."""
content = user_text
if context and context.get("online_players"):
content += f"\n\n[Server context: players online: {', '.join(context['online_players'])}]"
return {"role": "user", "content": content}
def build_tool_call(tool_name: str, arguments: Dict[str, Any]) -> Dict[str, str]:
"""Build an assistant turn that makes a tool call."""
tc = json.dumps({"name": tool_name, "arguments": arguments})
return {"role": "assistant", "content": f"<tool_call>\n{tc}\n</tool_call>"}
def build_tool_result(result: Dict[str, Any]) -> Dict[str, str]:
"""Build a tool result turn."""
return {"role": "tool", "content": json.dumps(result)}
def build_assistant_final(response: Dict[str, Any]) -> Dict[str, str]:
"""Build the final assistant JSON response."""
return {"role": "assistant", "content": json.dumps(response)}
# ---------------------------------------------------------------------------
# Example generators
# ---------------------------------------------------------------------------
def gen_command_success(example: Dict[str, Any], mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a multi-turn conversation for a command_gen example where
commands succeed on the first try.
"""
inp = example["input"]
out = example["output"]
player = (inp.get("server_context", {}).get("online_players") or ["slingshooter08"])[0]
commands = out.get("commands", [])
risk_level = example.get("metadata", {}).get("risk_level", 3)
if not commands:
return None
messages = [build_system_message(mode)]
messages.append(build_user_message(inp["user_message"], inp.get("server_context")))
# Execute each command via rcon.execute
for cmd in commands:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
success_msg = simulate_rcon_success(cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
# Final response
final = {"risk_level": risk_level, "commands": commands, "reasoning": out.get("reasoning", "")}
if mode == "god" or out.get("message"):
final["message"] = out.get("message", "It is done.")
messages.append(build_assistant_final(final))
return {
"id": f"tool-{example['id']}-success",
"source": "tool_training",
"type": "command_success",
"messages": messages,
}
def gen_error_correction_from_negative(example: Dict[str, Any], mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a multi-turn conversation from an example that has negative_output:
model tries wrong command -> error -> retries with correct command -> success.
"""
inp = example["input"]
out = example["output"]
neg = example.get("negative_output")
if not neg:
return None
player = (inp.get("server_context", {}).get("online_players") or ["slingshooter08"])[0]
wrong_commands = neg.get("commands", [])
correct_commands = out.get("commands", [])
error_text = neg.get("error", "Unknown or incomplete command")
risk_level = example.get("metadata", {}).get("risk_level", 3)
if not wrong_commands or not correct_commands:
return None
messages = [build_system_message(mode)]
messages.append(build_user_message(inp["user_message"], inp.get("server_context")))
# First attempt: wrong command fails
wrong_cmd = wrong_commands[0]
messages.append(build_tool_call("rcon.execute", {"command": wrong_cmd}))
messages.append(build_tool_result({"success": False, "result": error_text}))
# Retry with correct command(s)
for cmd in correct_commands:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
success_msg = simulate_rcon_success(cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
# Final response with reasoning about the correction
reasoning = out.get("reasoning", "")
if neg.get("error"):
reasoning = f"First attempt failed: {neg['error']}. {reasoning}"
final = {"risk_level": risk_level, "commands": correct_commands, "reasoning": reasoning}
if mode == "god" or out.get("message"):
final["message"] = out.get("message", "It is done.")
messages.append(build_assistant_final(final))
return {
"id": f"tool-{example['id']}-errorcorrect",
"source": "tool_training",
"type": "error_correction",
"messages": messages,
}
def gen_error_scenario(scenario: Dict[str, Any], user_request: str,
player: str = "slingshooter08", mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a multi-turn error correction example from a predefined error scenario.
"""
wrong_cmd = scenario["wrong_command"].format(player=player)
correct_cmd = scenario["correct_command"].format(player=player)
risk_level = 3
messages = [build_system_message(mode)]
messages.append(build_user_message(user_request))
# Wrong attempt
messages.append(build_tool_call("rcon.execute", {"command": wrong_cmd}))
messages.append(build_tool_result({"success": False, "result": scenario["error_message"]}))
# Corrected attempt
messages.append(build_tool_call("rcon.execute", {"command": correct_cmd}))
success_msg = simulate_rcon_success(correct_cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
final = {
"risk_level": risk_level,
"commands": [correct_cmd],
"reasoning": scenario["reasoning"],
}
if mode == "god":
final["message"] = "It is done."
messages.append(build_assistant_final(final))
return {
"id": f"tool-scenario-{scenario['id']}",
"source": "tool_training",
"type": "error_scenario",
"messages": messages,
}
def gen_wiki_lookup(user_request: str, query: str, wiki_content: str,
wiki_url: str, resulting_commands: List[str],
reasoning: str, player: str = "slingshooter08",
mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a conversation where the model looks up wiki info before executing.
"""
messages = [build_system_message(mode)]
messages.append(build_user_message(user_request))
# Wiki lookup
messages.append(build_tool_call("minecraft.wiki_lookup", {"query": query}))
messages.append(build_tool_result({"content": wiki_content, "url": wiki_url}))
# Execute commands
for cmd in resulting_commands:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
success_msg = simulate_rcon_success(cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
final = {"risk_level": 3, "commands": resulting_commands, "reasoning": reasoning}
messages.append(build_assistant_final(final))
return {
"id": f"tool-wiki-{hash(user_request) % 10000:04d}",
"source": "tool_training",
"type": "wiki_lookup",
"messages": messages,
}
def gen_player_info_lookup(user_request: str, player: str,
resulting_commands: List[str], reasoning: str,
mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a conversation where the model checks player info before acting.
"""
messages = [build_system_message(mode)]
messages.append(build_user_message(user_request))
# Get player info
messages.append(build_tool_call("world.player_info", {"player": player}))
pinfo = simulate_player_info(player)
messages.append(build_tool_result(pinfo))
# Use the position in commands (substitute coordinates)
pos = pinfo["position"]
resolved_cmds = []
for cmd in resulting_commands:
resolved = cmd.format(x=pos["x"], y=pos["y"], z=pos["z"], player=player)
resolved_cmds.append(resolved)
for cmd in resolved_cmds:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
success_msg = simulate_rcon_success(cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": reasoning}
messages.append(build_assistant_final(final))
return {
"id": f"tool-playerinfo-{hash(user_request) % 10000:04d}",
"source": "tool_training",
"type": "player_info_lookup",
"messages": messages,
}
def gen_server_state_check(user_request: str, resulting_commands: List[str],
reasoning: str, player: str = "slingshooter08",
mode: str = "sudo") -> Dict[str, Any]:
"""
Generate a conversation where the model checks server state before acting.
"""
messages = [build_system_message(mode)]
messages.append(build_user_message(user_request))
# Check server state
messages.append(build_tool_call("world.server_state", {}))
state = simulate_server_state()
messages.append(build_tool_result(state))
for cmd in resulting_commands:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
success_msg = simulate_rcon_success(cmd, player)
messages.append(build_tool_result({"success": True, "result": success_msg}))
final = {"risk_level": 3, "commands": resulting_commands, "reasoning": reasoning}
messages.append(build_assistant_final(final))
return {
"id": f"tool-serverstate-{hash(user_request) % 10000:04d}",
"source": "tool_training",
"type": "server_state_check",
"messages": messages,
}
# ---------------------------------------------------------------------------
# Handcrafted wiki/player/server lookup examples
# ---------------------------------------------------------------------------
WIKI_LOOKUP_EXAMPLES = [
{
"user_request": "sudo enchant my trident with the best enchants",
"query": "trident enchantments 1.21",
"wiki_content": (
"Trident enchantments in Minecraft 1.21:\n"
"- Loyalty (1-3): Trident returns after being thrown\n"
"- Channeling (1): Summons lightning during thunderstorms\n"
"- Riptide (1-3): Launches player when thrown in water/rain\n"
"- Impaling (1-5): Extra damage to aquatic mobs\n"
"- Unbreaking (1-3): Increases durability\n"
"- Mending (1): Repairs with XP\n"
"Note: Loyalty and Riptide are mutually exclusive. Channeling and Riptide are mutually exclusive."
),
"wiki_url": "https://minecraft.wiki/w/Trident#Enchantments",
"commands": [
"give {player} minecraft:trident[enchantments={{loyalty:3,channeling:1,impaling:5,unbreaking:3,mending:1}}] 1"
],
"reasoning": "Looked up trident enchantments. Used loyalty+channeling (not riptide, since loyalty and riptide are mutually exclusive). Max levels on all.",
},
{
"user_request": "sudo what's the command to set a custom player head?",
"query": "player head command give 1.21",
"wiki_content": (
"To give a player head with a custom skin in 1.21:\n"
"give <player> minecraft:player_head[profile={name:\"<player_name>\"}] 1\n"
"This creates a head with the skin of the specified player."
),
"wiki_url": "https://minecraft.wiki/w/Player_Head",
"commands": [
'give {player} minecraft:player_head[profile={{name:"{player}"}}] 1'
],
"reasoning": "Looked up player head syntax for 1.21 component format.",
},
{
"user_request": "sudo how do I make a custom spawner?",
"query": "spawner command setblock 1.21",
"wiki_content": (
"To place a mob spawner in 1.21:\n"
"setblock <x> <y> <z> minecraft:spawner{SpawnData:{entity:{id:\"minecraft:<mob>\"}},Delay:20}\n"
"Or use /give for a spawner item."
),
"wiki_url": "https://minecraft.wiki/w/Monster_Spawner",
"commands": [
'setblock ~ ~1 ~ minecraft:spawner{{SpawnData:{{entity:{{id:"minecraft:zombie"}}}},Delay:20}}'
],
"reasoning": "Looked up spawner syntax. Using setblock with SpawnData NBT for zombie spawner.",
},
{
"user_request": "sudo what potion effects can I use for a speed build challenge?",
"query": "speed and haste effects minecraft 1.21",
"wiki_content": (
"Relevant effects for speed building:\n"
"- minecraft:speed (1-255): Increases movement speed\n"
"- minecraft:haste (1-255): Increases mining/attack speed\n"
"- minecraft:jump_boost (1-255): Increases jump height\n"
"Duration is in seconds. Amplifier 0 = level 1."
),
"wiki_url": "https://minecraft.wiki/w/Effect",
"commands": [
"effect give {player} minecraft:speed 600 2",
"effect give {player} minecraft:haste 600 2",
"effect give {player} minecraft:jump_boost 600 1",
],
"reasoning": "Looked up speed-related effects. Gave speed 3, haste 3, and jump boost 2 for 10 minutes.",
},
{
"user_request": "sudo give me a crossbow with multishot and quick charge",
"query": "crossbow enchantments multishot quick charge 1.21",
"wiki_content": (
"Crossbow enchantments in 1.21:\n"
"- Quick Charge (1-3): Reduces reload time\n"
"- Multishot (1): Fires 3 arrows at once\n"
"- Piercing (1-4): Arrows pass through entities\n"
"- Unbreaking (1-3): Durability\n"
"- Mending (1): XP repair\n"
"Note: Multishot and Piercing are mutually exclusive."
),
"wiki_url": "https://minecraft.wiki/w/Crossbow#Enchantments",
"commands": [
"give {player} minecraft:crossbow[enchantments={{multishot:1,quick_charge:3,unbreaking:3,mending:1}}] 1"
],
"reasoning": "Looked up crossbow enchantments. Multishot and piercing are exclusive; chose multishot as requested. Max quick charge.",
},
]
PLAYER_INFO_EXAMPLES = [
{
"user_request": "sudo build a diamond block tower where I'm standing",
"player": "slingshooter08",
"commands": [
"setblock {x} {y} {z} minecraft:diamond_block",
"setblock {x} {y_1} {z} minecraft:diamond_block",
"setblock {x} {y_2} {z} minecraft:diamond_block",
"setblock {x} {y_3} {z} minecraft:diamond_block",
"setblock {x} {y_4} {z} minecraft:diamond_block",
],
"reasoning": "Got player position, then placed 5 diamond blocks in a tower at their location.",
},
{
"user_request": "sudo teleport me 100 blocks north",
"player": "slingshooter08",
"commands": ["tp {player} {x} {y} {z_minus_100}"],
"reasoning": "Got player position, then teleported 100 blocks north (negative Z direction).",
},
{
"user_request": "sudo put a glass dome over my head",
"player": "slingshooter08",
"commands": [
"fill {x_m5} {y} {z_m5} {x_p5} {y_p10} {z_p5} minecraft:glass hollow",
],
"reasoning": "Got player position to calculate dome coordinates. Used fill hollow with glass.",
},
{
"user_request": "sudo light up the area around me with glowstone",
"player": "slingshooter08",
"commands": [
"setblock {x_p3} {y_p5} {z} minecraft:glowstone",
"setblock {x_m3} {y_p5} {z} minecraft:glowstone",
"setblock {x} {y_p5} {z_p3} minecraft:glowstone",
"setblock {x} {y_p5} {z_m3} minecraft:glowstone",
],
"reasoning": "Got player position, placed glowstone lights at 4 cardinal points above the player.",
},
]
SERVER_STATE_EXAMPLES = [
{
"user_request": "sudo make it daytime if it's nighttime",
"commands": ["time set day"],
"reasoning": "Checked server state: it was nighttime, so set time to day.",
},
{
"user_request": "sudo clear weather if it's raining",
"commands": ["weather clear"],
"reasoning": "Checked server state: weather was rain, so cleared it.",
},
{
"user_request": "sudo give everyone online a diamond",
"commands": [
"give {p} minecraft:diamond 1"
],
"reasoning": "Checked server state to get online player list, then gave each player a diamond.",
},
{
"user_request": "sudo heal everyone on the server",
"commands": [
"effect give {p} minecraft:instant_health 1 5"
],
"reasoning": "Checked server state for online players, then healed each one.",
},
]
# Requests that map to error scenarios for natural phrasing
ERROR_SCENARIO_REQUESTS = {
"missing_prefix": "sudo give me a diamond sword",
"old_nbt_enchantments": "sudo give me a max enchanted diamond sword",
"invalid_effect_name": "sudo give me haste effect",
"wrong_item_bed": "sudo give me a bed",
"wrong_item_log": "sudo give me a stack of logs",
"count_wrong_position": "sudo give me 64 diamonds",
"effect_missing_give": "sudo give me speed",
"weather_storm": "sudo make it storm",
"gamemode_abbreviation": "sudo put me in creative",
"wrong_item_grass": "sudo give me some grass",
"summon_no_prefix": "sudo spawn a zombie near me",
"old_zombie_pigman": "sudo summon a zombie pigman",
}
# ---------------------------------------------------------------------------
# Format to Qwen3 chat template string
# ---------------------------------------------------------------------------
def format_qwen3(messages: List[Dict[str, str]]) -> str:
"""Convert messages list to Qwen3 chat template format."""
parts = []
for msg in messages:
role = msg["role"]
content = msg["content"]
parts.append(f"<|im_start|>{role}\n{content}\n<|im_end|>")
return "\n".join(parts)
# ---------------------------------------------------------------------------
# Main generation pipeline
# ---------------------------------------------------------------------------
def load_seed_data() -> List[Dict[str, Any]]:
"""Load the seed dataset."""
examples = []
with open(SEED_PATH) as f:
for line in f:
line = line.strip()
if line:
examples.append(json.loads(line))
return examples
def generate_all() -> List[Dict[str, Any]]:
"""Generate all tool-calling training examples."""
random.seed(42)
seed_data = load_seed_data()
all_examples = []
counts = {
"command_success": 0,
"error_correction": 0,
"error_scenario": 0,
"wiki_lookup": 0,
"player_info_lookup": 0,
"server_state_check": 0,
}
# --- 1. Command success examples from seed data ---
for ex in seed_data:
cat = ex.get("category", "")
if cat in ("command_gen", "safety", "troubleshoot"):
commands = ex.get("output", {}).get("commands", [])
if not commands:
continue
# Decide mode: if it has a "message" field, treat as god
mode = "god" if ex.get("output", {}).get("message") else "sudo"
result = gen_command_success(ex, mode=mode)
if result:
all_examples.append(result)
counts["command_success"] += 1
# --- 2. Error correction from negative_output examples ---
for ex in seed_data:
neg = ex.get("negative_output")
if neg and neg.get("commands"):
mode = "god" if ex.get("output", {}).get("message") else "sudo"
result = gen_error_correction_from_negative(ex, mode=mode)
if result:
all_examples.append(result)
counts["error_correction"] += 1
# --- 3. Error scenario examples ---
for scenario in ERROR_SCENARIOS:
user_req = ERROR_SCENARIO_REQUESTS.get(scenario["id"], "sudo do something")
player = random.choice(DEFAULT_PLAYERS)
# Generate both sudo and god mode variants for each scenario
for mode in ["sudo", "god"]:
result = gen_error_scenario(scenario, user_req, player=player, mode=mode)
if result:
result["id"] += f"-{mode}"
all_examples.append(result)
counts["error_scenario"] += 1
# --- 4. Wiki lookup examples ---
for wex in WIKI_LOOKUP_EXAMPLES:
player = random.choice(DEFAULT_PLAYERS)
resolved_cmds = [c.format(player=player) for c in wex["commands"]]
result = gen_wiki_lookup(
user_request=wex["user_request"],
query=wex["query"],
wiki_content=wex["wiki_content"],
wiki_url=wex["wiki_url"],
resulting_commands=resolved_cmds,
reasoning=wex["reasoning"],
player=player,
)
if result:
all_examples.append(result)
counts["wiki_lookup"] += 1
# --- 5. Player info lookup examples ---
for pex in PLAYER_INFO_EXAMPLES:
player = pex.get("player", "slingshooter08")
# We need to generate plausible coordinates for the commands
pos = simulate_player_info(player)["position"]
x, y, z = pos["x"], pos["y"], pos["z"]
# Build command templates with coordinates
resolved_cmds = []
for cmd in pex["commands"]:
resolved = cmd.format(
player=player, x=x, y=y, z=z,
y_1=y+1, y_2=y+2, y_3=y+3, y_4=y+4,
z_minus_100=z-100,
x_m5=x-5, x_p5=x+5, z_m5=z-5, z_p5=z+5,
y_p5=y+5, y_p10=y+10,
x_p3=x+3, x_m3=x-3, z_p3=z+3, z_m3=z-3,
)
resolved_cmds.append(resolved)
messages = [build_system_message("sudo")]
messages.append(build_user_message(pex["user_request"]))
messages.append(build_tool_call("world.player_info", {"player": player}))
messages.append(build_tool_result({
"health": 20.0,
"position": pos,
"inventory_summary": "Diamond gear, various items",
}))
for cmd in resolved_cmds:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
messages.append(build_tool_result({
"success": True,
"result": simulate_rcon_success(cmd, player),
}))
final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": pex["reasoning"]}
messages.append(build_assistant_final(final))
result = {
"id": f"tool-playerinfo-{hash(pex['user_request']) % 10000:04d}",
"source": "tool_training",
"type": "player_info_lookup",
"messages": messages,
}
all_examples.append(result)
counts["player_info_lookup"] += 1
# --- 6. Server state check examples ---
for sex in SERVER_STATE_EXAMPLES:
state = simulate_server_state()
players = state["online_players"]
messages = [build_system_message("sudo")]
messages.append(build_user_message(sex["user_request"]))
messages.append(build_tool_call("world.server_state", {}))
messages.append(build_tool_result(state))
# Resolve commands that reference {p} for each player
resolved_cmds = []
for cmd in sex["commands"]:
if "{p}" in cmd:
for p in players:
resolved_cmds.append(cmd.format(p=p))
else:
resolved_cmds.append(cmd)
for cmd in resolved_cmds:
messages.append(build_tool_call("rcon.execute", {"command": cmd}))
messages.append(build_tool_result({
"success": True,
"result": simulate_rcon_success(cmd, players[0] if players else "player"),
}))
final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": sex["reasoning"]}
messages.append(build_assistant_final(final))
result = {
"id": f"tool-serverstate-{hash(sex['user_request']) % 10000:04d}",
"source": "tool_training",
"type": "server_state_check",
"messages": messages,
}
all_examples.append(result)
counts["server_state_check"] += 1
return all_examples, counts
def main():
print("=" * 60)
print("Tool-Calling Training Data Generator")
print("=" * 60)
print(f"\nSeed dataset: {SEED_PATH}")
print(f"Output: {OUTPUT_PATH}")
if not SEED_PATH.exists():
print(f"\nERROR: Seed dataset not found at {SEED_PATH}")
sys.exit(1)
all_examples, counts = generate_all()
# Write output
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_PATH, "w") as f:
for ex in all_examples:
# Store both the structured messages and the formatted Qwen3 text
ex["qwen3_text"] = format_qwen3(ex["messages"])
f.write(json.dumps(ex) + "\n")
# Summary
total = len(all_examples)
print(f"\nGenerated {total} tool-calling training examples:\n")
print(f" {'Type':<25} {'Count':>6}")
print(f" {'-'*25} {'-'*6}")
for typ, count in sorted(counts.items()):
print(f" {typ:<25} {count:>6}")
print(f" {'-'*25} {'-'*6}")
print(f" {'TOTAL':<25} {total:>6}")
# Validate a sample
print(f"\n--- Sample validation ---")
sample = random.choice(all_examples)
print(f" ID: {sample['id']}")
print(f" Type: {sample['type']}")
print(f" Turns: {len(sample['messages'])}")
roles = [m['role'] for m in sample['messages']]
print(f" Roles: {' -> '.join(roles)}")
print(f"\n Qwen3 text preview (first 500 chars):")
print(f" {sample['qwen3_text'][:500]}")
print(f"\nOutput written to: {OUTPUT_PATH}")
if __name__ == "__main__":
main()