#!/usr/bin/env python3 """ Generate multi-turn tool-calling training data for the Minecraft AI God model. Reads data/processed/seed_dataset.jsonl and produces data/processed/tool_training.jsonl with Qwen3-format multi-turn conversations that teach the model to: 1. Call rcon.execute and handle success/error results 2. Self-correct on RCON errors (retry with fixed command) 3. Use minecraft.wiki_lookup when unsure about syntax 4. Use world.player_info / world.server_state for context-dependent actions Usage: python training/scripts/generate_tool_training.py """ import json import random import sys from pathlib import Path from typing import Any, Dict, List, Optional # Ensure project root is importable PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent sys.path.insert(0, str(PROJECT_ROOT)) from agent.tools.tool_schemas import ( QWEN3_TOOLS, RCON_ERROR_PATTERNS, ERROR_SCENARIOS, qwen3_tools_block, ) from agent.prompts.system_prompts import ( SUDO_SYSTEM_PROMPT, GOD_SYSTEM_PROMPT, SYNTAX_RULES, RISK_GRADIENT, ) # --------------------------------------------------------------------------- # Paths # --------------------------------------------------------------------------- SEED_PATH = PROJECT_ROOT / "data" / "processed" / "seed_dataset.jsonl" OUTPUT_PATH = PROJECT_ROOT / "data" / "processed" / "tool_training.jsonl" # --------------------------------------------------------------------------- # System prompt with tools block for Qwen3 format # --------------------------------------------------------------------------- TOOLS_BLOCK = qwen3_tools_block() SUDO_TOOL_SYSTEM = ( "You are a Minecraft 1.21 command translator for a server admin. " "You receive natural language requests and return valid RCON commands.\n\n" "PERMISSION LEVEL: 4 (generous). Only refuse level 0-1 actions.\n\n" "You have access to tools. Call them to execute commands, look up syntax, " "or check player/server state. When a command fails, analyze the error " "and retry with a corrected command.\n\n" "After all tool calls resolve, respond with JSON:\n" '{"risk_level": , "commands": ["cmd1", ...], "reasoning": "why"}\n\n' + SYNTAX_RULES + RISK_GRADIENT + "\n" + TOOLS_BLOCK ) GOD_TOOL_SYSTEM = ( "You are God in a Minecraft server. Players pray to you and you respond " "with divine judgment.\n\n" "You have access to tools. Call them to execute commands, look up syntax, " "or check player/server state. When a command fails, analyze the error " "and retry with a corrected command.\n\n" "After all tool calls resolve, respond with JSON:\n" '{"risk_level": , "message": "Your divine response", ' '"commands": ["cmd1", ...], "reasoning": "why"}\n\n' + SYNTAX_RULES + "\n" + TOOLS_BLOCK ) # --------------------------------------------------------------------------- # Player names used across the dataset # --------------------------------------------------------------------------- DEFAULT_PLAYERS = ["slingshooter08", "Ace13245", "TheBigBoss", "xXDragonSlayerXx"] # --------------------------------------------------------------------------- # Simulated success responses for common commands # --------------------------------------------------------------------------- def simulate_rcon_success(command: str, player: str = "slingshooter08") -> str: """Generate a plausible RCON success message for a command.""" cmd = command.strip() if cmd.startswith("give "): # Parse item from give command parts = cmd.split() item = parts[2] if len(parts) > 2 else "item" count = parts[3] if len(parts) > 3 else "1" item_display = item.replace("minecraft:", "").replace("_", " ").title() if "[" in item_display: item_display = item_display.split("[")[0].strip() return f"Gave {count} [{item_display}] to {player}" elif cmd.startswith("effect give "): parts = cmd.split() effect = parts[2] if len(parts) > 2 else "" eff_name = parts[3].replace("minecraft:", "") if len(parts) > 3 else "effect" return f"Applied effect {eff_name} to {effect}" elif cmd.startswith("effect clear "): target = cmd.split()[2] if len(cmd.split()) > 2 else player return f"Removed every effect from {target}" elif cmd.startswith("tp ") or cmd.startswith("teleport "): return f"Teleported {player}" elif cmd.startswith("weather "): weather_type = cmd.split()[1] if len(cmd.split()) > 1 else "clear" return f"Changing to {weather_type}" elif cmd.startswith("time set "): val = cmd.split("time set ")[1] if "time set " in cmd else "day" return f"Set the time to {val}" elif cmd.startswith("gamemode "): mode = cmd.split()[1] if len(cmd.split()) > 1 else "creative" return f"Set own game mode to {mode.title()}" elif cmd.startswith("kill "): return "Killed entity" elif cmd.startswith("summon "): return "Summoned new entity" elif cmd.startswith("setblock "): return "Changed the block at ..." elif cmd.startswith("fill "): return "Successfully filled ... blocks" elif cmd.startswith("clear "): return f"Removed items from {player}" elif cmd.startswith("xp "): return f"Gave experience to {player}" elif cmd.startswith("execute "): return "Executed command" elif cmd.startswith("playsound "): return f"Played sound to {player}" elif cmd.startswith("title "): return "Title displayed" elif cmd.startswith("particle "): return "Showing particle" elif cmd.startswith("enchant "): return f"Enchanted item for {player}" elif cmd.startswith("spreadplayers "): return "Spread players" else: return "Command executed successfully" def simulate_player_info(player: str) -> Dict[str, Any]: """Generate plausible player info.""" return { "health": round(random.uniform(10.0, 20.0), 1), "position": { "x": random.randint(-500, 500), "y": random.randint(60, 120), "z": random.randint(-500, 500), }, "inventory_summary": random.choice([ "Diamond sword, iron armor, 32 steak, 14 torches", "Netherite pickaxe, diamond armor, 64 cobblestone, bow with 28 arrows", "Stone tools, leather armor, 12 bread, 3 oak logs", "Full diamond gear, 8 golden apples, ender pearls x16", "Iron sword, chainmail chest, 24 cooked porkchop, shield", ]) } def simulate_server_state() -> Dict[str, Any]: """Generate plausible server state.""" ticks = random.randint(0, 24000) if ticks < 6000: tod = "morning" elif ticks < 12000: tod = "noon" elif ticks < 18000: tod = "evening" else: tod = "night" return { "time_of_day": tod, "weather": random.choice(["clear", "rain", "thunder"]), "online_players": random.sample(DEFAULT_PLAYERS, k=random.randint(1, 3)), "world_border": 60000000.0, } # --------------------------------------------------------------------------- # Conversation builders # --------------------------------------------------------------------------- def build_system_message(mode: str) -> Dict[str, str]: """Return the system message for the given mode.""" if mode == "god": return {"role": "system", "content": GOD_TOOL_SYSTEM} return {"role": "system", "content": SUDO_TOOL_SYSTEM} def build_user_message(user_text: str, context: Optional[Dict] = None) -> Dict[str, str]: """Build the user turn.""" content = user_text if context and context.get("online_players"): content += f"\n\n[Server context: players online: {', '.join(context['online_players'])}]" return {"role": "user", "content": content} def build_tool_call(tool_name: str, arguments: Dict[str, Any]) -> Dict[str, str]: """Build an assistant turn that makes a tool call.""" tc = json.dumps({"name": tool_name, "arguments": arguments}) return {"role": "assistant", "content": f"\n{tc}\n"} def build_tool_result(result: Dict[str, Any]) -> Dict[str, str]: """Build a tool result turn.""" return {"role": "tool", "content": json.dumps(result)} def build_assistant_final(response: Dict[str, Any]) -> Dict[str, str]: """Build the final assistant JSON response.""" return {"role": "assistant", "content": json.dumps(response)} # --------------------------------------------------------------------------- # Example generators # --------------------------------------------------------------------------- def gen_command_success(example: Dict[str, Any], mode: str = "sudo") -> Dict[str, Any]: """ Generate a multi-turn conversation for a command_gen example where commands succeed on the first try. """ inp = example["input"] out = example["output"] player = (inp.get("server_context", {}).get("online_players") or ["slingshooter08"])[0] commands = out.get("commands", []) risk_level = example.get("metadata", {}).get("risk_level", 3) if not commands: return None messages = [build_system_message(mode)] messages.append(build_user_message(inp["user_message"], inp.get("server_context"))) # Execute each command via rcon.execute for cmd in commands: messages.append(build_tool_call("rcon.execute", {"command": cmd})) success_msg = simulate_rcon_success(cmd, player) messages.append(build_tool_result({"success": True, "result": success_msg})) # Final response final = {"risk_level": risk_level, "commands": commands, "reasoning": out.get("reasoning", "")} if mode == "god" or out.get("message"): final["message"] = out.get("message", "It is done.") messages.append(build_assistant_final(final)) return { "id": f"tool-{example['id']}-success", "source": "tool_training", "type": "command_success", "messages": messages, } def gen_error_correction_from_negative(example: Dict[str, Any], mode: str = "sudo") -> Dict[str, Any]: """ Generate a multi-turn conversation from an example that has negative_output: model tries wrong command -> error -> retries with correct command -> success. """ inp = example["input"] out = example["output"] neg = example.get("negative_output") if not neg: return None player = (inp.get("server_context", {}).get("online_players") or ["slingshooter08"])[0] wrong_commands = neg.get("commands", []) correct_commands = out.get("commands", []) error_text = neg.get("error", "Unknown or incomplete command") risk_level = example.get("metadata", {}).get("risk_level", 3) if not wrong_commands or not correct_commands: return None messages = [build_system_message(mode)] messages.append(build_user_message(inp["user_message"], inp.get("server_context"))) # First attempt: wrong command fails wrong_cmd = wrong_commands[0] messages.append(build_tool_call("rcon.execute", {"command": wrong_cmd})) messages.append(build_tool_result({"success": False, "result": error_text})) # Retry with correct command(s) for cmd in correct_commands: messages.append(build_tool_call("rcon.execute", {"command": cmd})) success_msg = simulate_rcon_success(cmd, player) messages.append(build_tool_result({"success": True, "result": success_msg})) # Final response with reasoning about the correction reasoning = out.get("reasoning", "") if neg.get("error"): reasoning = f"First attempt failed: {neg['error']}. {reasoning}" final = {"risk_level": risk_level, "commands": correct_commands, "reasoning": reasoning} if mode == "god" or out.get("message"): final["message"] = out.get("message", "It is done.") messages.append(build_assistant_final(final)) return { "id": f"tool-{example['id']}-errorcorrect", "source": "tool_training", "type": "error_correction", "messages": messages, } def gen_error_scenario(scenario: Dict[str, Any], user_request: str, player: str = "slingshooter08", mode: str = "sudo") -> Dict[str, Any]: """ Generate a multi-turn error correction example from a predefined error scenario. """ wrong_cmd = scenario["wrong_command"].format(player=player) correct_cmd = scenario["correct_command"].format(player=player) risk_level = 3 messages = [build_system_message(mode)] messages.append(build_user_message(user_request)) # Wrong attempt messages.append(build_tool_call("rcon.execute", {"command": wrong_cmd})) messages.append(build_tool_result({"success": False, "result": scenario["error_message"]})) # Corrected attempt messages.append(build_tool_call("rcon.execute", {"command": correct_cmd})) success_msg = simulate_rcon_success(correct_cmd, player) messages.append(build_tool_result({"success": True, "result": success_msg})) final = { "risk_level": risk_level, "commands": [correct_cmd], "reasoning": scenario["reasoning"], } if mode == "god": final["message"] = "It is done." messages.append(build_assistant_final(final)) return { "id": f"tool-scenario-{scenario['id']}", "source": "tool_training", "type": "error_scenario", "messages": messages, } def gen_wiki_lookup(user_request: str, query: str, wiki_content: str, wiki_url: str, resulting_commands: List[str], reasoning: str, player: str = "slingshooter08", mode: str = "sudo") -> Dict[str, Any]: """ Generate a conversation where the model looks up wiki info before executing. """ messages = [build_system_message(mode)] messages.append(build_user_message(user_request)) # Wiki lookup messages.append(build_tool_call("minecraft.wiki_lookup", {"query": query})) messages.append(build_tool_result({"content": wiki_content, "url": wiki_url})) # Execute commands for cmd in resulting_commands: messages.append(build_tool_call("rcon.execute", {"command": cmd})) success_msg = simulate_rcon_success(cmd, player) messages.append(build_tool_result({"success": True, "result": success_msg})) final = {"risk_level": 3, "commands": resulting_commands, "reasoning": reasoning} messages.append(build_assistant_final(final)) return { "id": f"tool-wiki-{hash(user_request) % 10000:04d}", "source": "tool_training", "type": "wiki_lookup", "messages": messages, } def gen_player_info_lookup(user_request: str, player: str, resulting_commands: List[str], reasoning: str, mode: str = "sudo") -> Dict[str, Any]: """ Generate a conversation where the model checks player info before acting. """ messages = [build_system_message(mode)] messages.append(build_user_message(user_request)) # Get player info messages.append(build_tool_call("world.player_info", {"player": player})) pinfo = simulate_player_info(player) messages.append(build_tool_result(pinfo)) # Use the position in commands (substitute coordinates) pos = pinfo["position"] resolved_cmds = [] for cmd in resulting_commands: resolved = cmd.format(x=pos["x"], y=pos["y"], z=pos["z"], player=player) resolved_cmds.append(resolved) for cmd in resolved_cmds: messages.append(build_tool_call("rcon.execute", {"command": cmd})) success_msg = simulate_rcon_success(cmd, player) messages.append(build_tool_result({"success": True, "result": success_msg})) final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": reasoning} messages.append(build_assistant_final(final)) return { "id": f"tool-playerinfo-{hash(user_request) % 10000:04d}", "source": "tool_training", "type": "player_info_lookup", "messages": messages, } def gen_server_state_check(user_request: str, resulting_commands: List[str], reasoning: str, player: str = "slingshooter08", mode: str = "sudo") -> Dict[str, Any]: """ Generate a conversation where the model checks server state before acting. """ messages = [build_system_message(mode)] messages.append(build_user_message(user_request)) # Check server state messages.append(build_tool_call("world.server_state", {})) state = simulate_server_state() messages.append(build_tool_result(state)) for cmd in resulting_commands: messages.append(build_tool_call("rcon.execute", {"command": cmd})) success_msg = simulate_rcon_success(cmd, player) messages.append(build_tool_result({"success": True, "result": success_msg})) final = {"risk_level": 3, "commands": resulting_commands, "reasoning": reasoning} messages.append(build_assistant_final(final)) return { "id": f"tool-serverstate-{hash(user_request) % 10000:04d}", "source": "tool_training", "type": "server_state_check", "messages": messages, } # --------------------------------------------------------------------------- # Handcrafted wiki/player/server lookup examples # --------------------------------------------------------------------------- WIKI_LOOKUP_EXAMPLES = [ { "user_request": "sudo enchant my trident with the best enchants", "query": "trident enchantments 1.21", "wiki_content": ( "Trident enchantments in Minecraft 1.21:\n" "- Loyalty (1-3): Trident returns after being thrown\n" "- Channeling (1): Summons lightning during thunderstorms\n" "- Riptide (1-3): Launches player when thrown in water/rain\n" "- Impaling (1-5): Extra damage to aquatic mobs\n" "- Unbreaking (1-3): Increases durability\n" "- Mending (1): Repairs with XP\n" "Note: Loyalty and Riptide are mutually exclusive. Channeling and Riptide are mutually exclusive." ), "wiki_url": "https://minecraft.wiki/w/Trident#Enchantments", "commands": [ "give {player} minecraft:trident[enchantments={{loyalty:3,channeling:1,impaling:5,unbreaking:3,mending:1}}] 1" ], "reasoning": "Looked up trident enchantments. Used loyalty+channeling (not riptide, since loyalty and riptide are mutually exclusive). Max levels on all.", }, { "user_request": "sudo what's the command to set a custom player head?", "query": "player head command give 1.21", "wiki_content": ( "To give a player head with a custom skin in 1.21:\n" "give minecraft:player_head[profile={name:\"\"}] 1\n" "This creates a head with the skin of the specified player." ), "wiki_url": "https://minecraft.wiki/w/Player_Head", "commands": [ 'give {player} minecraft:player_head[profile={{name:"{player}"}}] 1' ], "reasoning": "Looked up player head syntax for 1.21 component format.", }, { "user_request": "sudo how do I make a custom spawner?", "query": "spawner command setblock 1.21", "wiki_content": ( "To place a mob spawner in 1.21:\n" "setblock minecraft:spawner{SpawnData:{entity:{id:\"minecraft:\"}},Delay:20}\n" "Or use /give for a spawner item." ), "wiki_url": "https://minecraft.wiki/w/Monster_Spawner", "commands": [ 'setblock ~ ~1 ~ minecraft:spawner{{SpawnData:{{entity:{{id:"minecraft:zombie"}}}},Delay:20}}' ], "reasoning": "Looked up spawner syntax. Using setblock with SpawnData NBT for zombie spawner.", }, { "user_request": "sudo what potion effects can I use for a speed build challenge?", "query": "speed and haste effects minecraft 1.21", "wiki_content": ( "Relevant effects for speed building:\n" "- minecraft:speed (1-255): Increases movement speed\n" "- minecraft:haste (1-255): Increases mining/attack speed\n" "- minecraft:jump_boost (1-255): Increases jump height\n" "Duration is in seconds. Amplifier 0 = level 1." ), "wiki_url": "https://minecraft.wiki/w/Effect", "commands": [ "effect give {player} minecraft:speed 600 2", "effect give {player} minecraft:haste 600 2", "effect give {player} minecraft:jump_boost 600 1", ], "reasoning": "Looked up speed-related effects. Gave speed 3, haste 3, and jump boost 2 for 10 minutes.", }, { "user_request": "sudo give me a crossbow with multishot and quick charge", "query": "crossbow enchantments multishot quick charge 1.21", "wiki_content": ( "Crossbow enchantments in 1.21:\n" "- Quick Charge (1-3): Reduces reload time\n" "- Multishot (1): Fires 3 arrows at once\n" "- Piercing (1-4): Arrows pass through entities\n" "- Unbreaking (1-3): Durability\n" "- Mending (1): XP repair\n" "Note: Multishot and Piercing are mutually exclusive." ), "wiki_url": "https://minecraft.wiki/w/Crossbow#Enchantments", "commands": [ "give {player} minecraft:crossbow[enchantments={{multishot:1,quick_charge:3,unbreaking:3,mending:1}}] 1" ], "reasoning": "Looked up crossbow enchantments. Multishot and piercing are exclusive; chose multishot as requested. Max quick charge.", }, ] PLAYER_INFO_EXAMPLES = [ { "user_request": "sudo build a diamond block tower where I'm standing", "player": "slingshooter08", "commands": [ "setblock {x} {y} {z} minecraft:diamond_block", "setblock {x} {y_1} {z} minecraft:diamond_block", "setblock {x} {y_2} {z} minecraft:diamond_block", "setblock {x} {y_3} {z} minecraft:diamond_block", "setblock {x} {y_4} {z} minecraft:diamond_block", ], "reasoning": "Got player position, then placed 5 diamond blocks in a tower at their location.", }, { "user_request": "sudo teleport me 100 blocks north", "player": "slingshooter08", "commands": ["tp {player} {x} {y} {z_minus_100}"], "reasoning": "Got player position, then teleported 100 blocks north (negative Z direction).", }, { "user_request": "sudo put a glass dome over my head", "player": "slingshooter08", "commands": [ "fill {x_m5} {y} {z_m5} {x_p5} {y_p10} {z_p5} minecraft:glass hollow", ], "reasoning": "Got player position to calculate dome coordinates. Used fill hollow with glass.", }, { "user_request": "sudo light up the area around me with glowstone", "player": "slingshooter08", "commands": [ "setblock {x_p3} {y_p5} {z} minecraft:glowstone", "setblock {x_m3} {y_p5} {z} minecraft:glowstone", "setblock {x} {y_p5} {z_p3} minecraft:glowstone", "setblock {x} {y_p5} {z_m3} minecraft:glowstone", ], "reasoning": "Got player position, placed glowstone lights at 4 cardinal points above the player.", }, ] SERVER_STATE_EXAMPLES = [ { "user_request": "sudo make it daytime if it's nighttime", "commands": ["time set day"], "reasoning": "Checked server state: it was nighttime, so set time to day.", }, { "user_request": "sudo clear weather if it's raining", "commands": ["weather clear"], "reasoning": "Checked server state: weather was rain, so cleared it.", }, { "user_request": "sudo give everyone online a diamond", "commands": [ "give {p} minecraft:diamond 1" ], "reasoning": "Checked server state to get online player list, then gave each player a diamond.", }, { "user_request": "sudo heal everyone on the server", "commands": [ "effect give {p} minecraft:instant_health 1 5" ], "reasoning": "Checked server state for online players, then healed each one.", }, ] # Requests that map to error scenarios for natural phrasing ERROR_SCENARIO_REQUESTS = { "missing_prefix": "sudo give me a diamond sword", "old_nbt_enchantments": "sudo give me a max enchanted diamond sword", "invalid_effect_name": "sudo give me haste effect", "wrong_item_bed": "sudo give me a bed", "wrong_item_log": "sudo give me a stack of logs", "count_wrong_position": "sudo give me 64 diamonds", "effect_missing_give": "sudo give me speed", "weather_storm": "sudo make it storm", "gamemode_abbreviation": "sudo put me in creative", "wrong_item_grass": "sudo give me some grass", "summon_no_prefix": "sudo spawn a zombie near me", "old_zombie_pigman": "sudo summon a zombie pigman", } # --------------------------------------------------------------------------- # Format to Qwen3 chat template string # --------------------------------------------------------------------------- def format_qwen3(messages: List[Dict[str, str]]) -> str: """Convert messages list to Qwen3 chat template format.""" parts = [] for msg in messages: role = msg["role"] content = msg["content"] parts.append(f"<|im_start|>{role}\n{content}\n<|im_end|>") return "\n".join(parts) # --------------------------------------------------------------------------- # Main generation pipeline # --------------------------------------------------------------------------- def load_seed_data() -> List[Dict[str, Any]]: """Load the seed dataset.""" examples = [] with open(SEED_PATH) as f: for line in f: line = line.strip() if line: examples.append(json.loads(line)) return examples def generate_all() -> List[Dict[str, Any]]: """Generate all tool-calling training examples.""" random.seed(42) seed_data = load_seed_data() all_examples = [] counts = { "command_success": 0, "error_correction": 0, "error_scenario": 0, "wiki_lookup": 0, "player_info_lookup": 0, "server_state_check": 0, } # --- 1. Command success examples from seed data --- for ex in seed_data: cat = ex.get("category", "") if cat in ("command_gen", "safety", "troubleshoot"): commands = ex.get("output", {}).get("commands", []) if not commands: continue # Decide mode: if it has a "message" field, treat as god mode = "god" if ex.get("output", {}).get("message") else "sudo" result = gen_command_success(ex, mode=mode) if result: all_examples.append(result) counts["command_success"] += 1 # --- 2. Error correction from negative_output examples --- for ex in seed_data: neg = ex.get("negative_output") if neg and neg.get("commands"): mode = "god" if ex.get("output", {}).get("message") else "sudo" result = gen_error_correction_from_negative(ex, mode=mode) if result: all_examples.append(result) counts["error_correction"] += 1 # --- 3. Error scenario examples --- for scenario in ERROR_SCENARIOS: user_req = ERROR_SCENARIO_REQUESTS.get(scenario["id"], "sudo do something") player = random.choice(DEFAULT_PLAYERS) # Generate both sudo and god mode variants for each scenario for mode in ["sudo", "god"]: result = gen_error_scenario(scenario, user_req, player=player, mode=mode) if result: result["id"] += f"-{mode}" all_examples.append(result) counts["error_scenario"] += 1 # --- 4. Wiki lookup examples --- for wex in WIKI_LOOKUP_EXAMPLES: player = random.choice(DEFAULT_PLAYERS) resolved_cmds = [c.format(player=player) for c in wex["commands"]] result = gen_wiki_lookup( user_request=wex["user_request"], query=wex["query"], wiki_content=wex["wiki_content"], wiki_url=wex["wiki_url"], resulting_commands=resolved_cmds, reasoning=wex["reasoning"], player=player, ) if result: all_examples.append(result) counts["wiki_lookup"] += 1 # --- 5. Player info lookup examples --- for pex in PLAYER_INFO_EXAMPLES: player = pex.get("player", "slingshooter08") # We need to generate plausible coordinates for the commands pos = simulate_player_info(player)["position"] x, y, z = pos["x"], pos["y"], pos["z"] # Build command templates with coordinates resolved_cmds = [] for cmd in pex["commands"]: resolved = cmd.format( player=player, x=x, y=y, z=z, y_1=y+1, y_2=y+2, y_3=y+3, y_4=y+4, z_minus_100=z-100, x_m5=x-5, x_p5=x+5, z_m5=z-5, z_p5=z+5, y_p5=y+5, y_p10=y+10, x_p3=x+3, x_m3=x-3, z_p3=z+3, z_m3=z-3, ) resolved_cmds.append(resolved) messages = [build_system_message("sudo")] messages.append(build_user_message(pex["user_request"])) messages.append(build_tool_call("world.player_info", {"player": player})) messages.append(build_tool_result({ "health": 20.0, "position": pos, "inventory_summary": "Diamond gear, various items", })) for cmd in resolved_cmds: messages.append(build_tool_call("rcon.execute", {"command": cmd})) messages.append(build_tool_result({ "success": True, "result": simulate_rcon_success(cmd, player), })) final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": pex["reasoning"]} messages.append(build_assistant_final(final)) result = { "id": f"tool-playerinfo-{hash(pex['user_request']) % 10000:04d}", "source": "tool_training", "type": "player_info_lookup", "messages": messages, } all_examples.append(result) counts["player_info_lookup"] += 1 # --- 6. Server state check examples --- for sex in SERVER_STATE_EXAMPLES: state = simulate_server_state() players = state["online_players"] messages = [build_system_message("sudo")] messages.append(build_user_message(sex["user_request"])) messages.append(build_tool_call("world.server_state", {})) messages.append(build_tool_result(state)) # Resolve commands that reference {p} for each player resolved_cmds = [] for cmd in sex["commands"]: if "{p}" in cmd: for p in players: resolved_cmds.append(cmd.format(p=p)) else: resolved_cmds.append(cmd) for cmd in resolved_cmds: messages.append(build_tool_call("rcon.execute", {"command": cmd})) messages.append(build_tool_result({ "success": True, "result": simulate_rcon_success(cmd, players[0] if players else "player"), })) final = {"risk_level": 3, "commands": resolved_cmds, "reasoning": sex["reasoning"]} messages.append(build_assistant_final(final)) result = { "id": f"tool-serverstate-{hash(sex['user_request']) % 10000:04d}", "source": "tool_training", "type": "server_state_check", "messages": messages, } all_examples.append(result) counts["server_state_check"] += 1 return all_examples, counts def main(): print("=" * 60) print("Tool-Calling Training Data Generator") print("=" * 60) print(f"\nSeed dataset: {SEED_PATH}") print(f"Output: {OUTPUT_PATH}") if not SEED_PATH.exists(): print(f"\nERROR: Seed dataset not found at {SEED_PATH}") sys.exit(1) all_examples, counts = generate_all() # Write output OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) with open(OUTPUT_PATH, "w") as f: for ex in all_examples: # Store both the structured messages and the formatted Qwen3 text ex["qwen3_text"] = format_qwen3(ex["messages"]) f.write(json.dumps(ex) + "\n") # Summary total = len(all_examples) print(f"\nGenerated {total} tool-calling training examples:\n") print(f" {'Type':<25} {'Count':>6}") print(f" {'-'*25} {'-'*6}") for typ, count in sorted(counts.items()): print(f" {typ:<25} {count:>6}") print(f" {'-'*25} {'-'*6}") print(f" {'TOTAL':<25} {total:>6}") # Validate a sample print(f"\n--- Sample validation ---") sample = random.choice(all_examples) print(f" ID: {sample['id']}") print(f" Type: {sample['type']}") print(f" Turns: {len(sample['messages'])}") roles = [m['role'] for m in sample['messages']] print(f" Roles: {' -> '.join(roles)}") print(f"\n Qwen3 text preview (first 500 chars):") print(f" {sample['qwen3_text'][:500]}") print(f"\nOutput written to: {OUTPUT_PATH}") if __name__ == "__main__": main()