Files
Mortdecai/knowledge/build_recipe_tree.py
T
Seth 0473eb0b50 Minecraft knowledge corpus, recipe trees, GitHub scraper, 644 examples
Knowledge corpus (knowledge/mc-data/):
- 1505 items, 886 crafting recipes, 1166 blocks from minecraft-data 1.21.11
- Recipe dependency tree builder (knowledge/build_recipe_tree.py)
- Crafting chain training: "give me everything to make X from scratch"
- Smelting recipes, version awareness examples

Training data (644 examples total):
- 107 command syntax reference examples (every command + common errors)
- 176 recipe/crafting chain examples (63 crafting, 103 material-giving, 11 smelting)
- 344 Claude-distilled examples (222 sudo + 122 god via Haiku)
- Live bot audit data ingested (128 examples from dev server)

Swarm bots:
- Swimming/water escape logic
- Door opening
- Context-aware prayers (inventory, health, time, depth)
- Prefix enforcement on all Gemini/Dolphin prompts

GitHub log scraper (data/scrape_server_logs.py):
- Searches GitHub for Minecraft server logs with commands
- Strict 1.20.5+ version filter
- Extracts command pairs, converts to training format

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 20:33:09 -04:00

288 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Build a crafting dependency tree from minecraft-data and generate training examples.
Creates:
1. knowledge/mc-data/recipe_tree.json — full item→recipe→ingredients tree
2. data/raw/recipe_training.jsonl — training examples about crafting chains
"""
import json
from pathlib import Path
from collections import defaultdict
ROOT = Path(__file__).resolve().parent.parent
KNOWLEDGE = ROOT / "knowledge" / "mc-data"
# Load data
items_raw = json.load(open(KNOWLEDGE / "items.json"))
recipes_raw = json.load(open(KNOWLEDGE / "recipes.json"))
blocks_raw = json.load(open(KNOWLEDGE / "blocks.json"))
# Build ID→name maps
id_to_name = {}
name_to_id = {}
for item in items_raw:
iid = item["id"]
name = item["name"]
id_to_name[iid] = name
name_to_id[name] = iid
# Build recipe tree: item_name → list of recipes with ingredient names
recipe_tree = {}
for item_id_str, recipe_list in recipes_raw.items():
item_id = int(item_id_str)
item_name = id_to_name.get(item_id, f"unknown_{item_id}")
parsed_recipes = []
for recipe in recipe_list:
ingredients = set()
# Shaped recipes (inShape)
if "inShape" in recipe:
for row in recipe["inShape"]:
if row is None:
continue
for cell in row:
if cell is not None:
if isinstance(cell, int):
ingredients.add(id_to_name.get(cell, f"unknown_{cell}"))
elif isinstance(cell, list):
# Multiple options for this slot
for opt in cell:
if opt is not None:
ingredients.add(id_to_name.get(opt, f"unknown_{opt}"))
# Shapeless recipes (ingredients list)
if "ingredients" in recipe:
for ing in recipe["ingredients"]:
if ing is not None:
if isinstance(ing, int):
ingredients.add(id_to_name.get(ing, f"unknown_{ing}"))
elif isinstance(ing, list):
for opt in ing:
if opt is not None:
ingredients.add(id_to_name.get(opt, f"unknown_{opt}"))
result_count = recipe.get("result", {}).get("count", 1) if isinstance(recipe.get("result"), dict) else 1
if ingredients:
parsed_recipes.append({
"ingredients": sorted(ingredients),
"count": result_count,
"shaped": "inShape" in recipe,
})
if parsed_recipes:
recipe_tree[item_name] = parsed_recipes
# Build dependency chains (what do you need from scratch?)
def get_full_chain(item_name, visited=None):
"""Recursively get all raw materials needed to craft an item."""
if visited is None:
visited = set()
if item_name in visited:
return {"item": item_name, "raw": True} # circular dependency
visited.add(item_name)
if item_name not in recipe_tree:
return {"item": item_name, "raw": True} # raw material (mined/found)
recipe = recipe_tree[item_name][0] # use first recipe
deps = []
for ing in recipe["ingredients"]:
deps.append(get_full_chain(ing, visited.copy()))
return {
"item": item_name,
"raw": False,
"ingredients": recipe["ingredients"],
"count": recipe["count"],
"deps": deps,
}
def get_raw_materials(item_name, visited=None):
"""Get flat list of raw materials needed."""
if visited is None:
visited = set()
if item_name in visited:
return []
visited.add(item_name)
if item_name not in recipe_tree:
return [item_name]
raw = []
recipe = recipe_tree[item_name][0]
for ing in recipe["ingredients"]:
raw.extend(get_raw_materials(ing, visited.copy()))
return raw
# Save recipe tree
with open(KNOWLEDGE / "recipe_tree.json", "w") as f:
json.dump(recipe_tree, f, indent=2)
print(f"Recipe tree: {len(recipe_tree)} craftable items")
# Generate training examples
examples = []
# Important crafting chains that players commonly ask about
KEY_ITEMS = [
"crafting_table", "furnace", "chest", "torch", "stick",
"wooden_pickaxe", "stone_pickaxe", "iron_pickaxe", "diamond_pickaxe",
"wooden_sword", "stone_sword", "iron_sword", "diamond_sword",
"iron_helmet", "iron_chestplate", "iron_leggings", "iron_boots",
"diamond_helmet", "diamond_chestplate", "diamond_leggings", "diamond_boots",
"shield", "bow", "arrow", "bed", "bucket", "compass", "clock",
"enchanting_table", "anvil", "brewing_stand", "beacon",
"glass", "glass_pane", "bookshelf", "ladder", "fence", "door",
"rail", "powered_rail", "hopper", "piston", "sticky_piston",
"observer", "dispenser", "dropper", "repeater", "comparator",
"oak_planks", "oak_slab", "oak_stairs", "oak_door", "oak_fence",
"iron_ingot", "gold_ingot", "netherite_ingot",
"bread", "cake", "cookie", "golden_apple",
"tnt", "firework_rocket", "map", "lantern", "campfire",
]
for item_name in KEY_ITEMS:
if item_name not in recipe_tree:
continue
recipe = recipe_tree[item_name][0]
ingredients = recipe["ingredients"]
count = recipe["count"]
raw_mats = list(set(get_raw_materials(item_name)))
# "How do I craft X"
ing_str = ", ".join(ingredients)
examples.append({
"id": f"recipe-{item_name}-craft",
"source": "manual",
"category": "info",
"input": {
"user_message": f"sudo how do I craft {item_name.replace('_', ' ')}",
"server_context": {"server_type": "paper", "version": "1.21.x"},
},
"output": {
"reasoning": f"Crafting recipe for {item_name} in 1.21.",
"commands": [],
"message": f"To craft {item_name.replace('_', ' ')} you need: {ing_str}. Makes {count}. Requires a crafting table." if recipe["shaped"] and len(ingredients) > 2 else f"To craft {item_name.replace('_', ' ')} you need: {ing_str}. Makes {count}.",
"safety_flags": [],
},
"metadata": {"difficulty": "easy", "validated": True, "extracted_from": "minecraft-data recipes", "risk_level": 3},
})
# "Give me everything I need to make X"
give_cmds = []
for mat in sorted(set(ingredients)):
give_cmds.append(f"give slingshooter08 minecraft:{mat} 64")
# Also give crafting table if shaped recipe needs it
if recipe["shaped"] and len(ingredients) > 2 and "crafting_table" not in ingredients:
give_cmds.insert(0, "give slingshooter08 minecraft:crafting_table 1")
examples.append({
"id": f"recipe-{item_name}-materials",
"source": "manual",
"category": "command_gen",
"input": {
"user_message": f"sudo give me everything I need to craft {item_name.replace('_', ' ')}",
"server_context": {"server_type": "paper", "version": "1.21.x", "online_players": ["slingshooter08"]},
},
"output": {
"reasoning": f"Player needs materials to craft {item_name}. Ingredients: {ing_str}. Giving materials + crafting table if needed.",
"commands": give_cmds,
"safety_flags": [],
},
"metadata": {"difficulty": "medium", "validated": True, "extracted_from": "minecraft-data recipes", "risk_level": 3},
})
# "Give me everything I need to make X from scratch" (raw materials)
if raw_mats != ingredients and len(raw_mats) > 0:
raw_cmds = []
for mat in sorted(set(raw_mats)):
raw_cmds.append(f"give slingshooter08 minecraft:{mat} 64")
raw_cmds.insert(0, "give slingshooter08 minecraft:crafting_table 1")
raw_str = ", ".join(sorted(set(raw_mats)))
examples.append({
"id": f"recipe-{item_name}-scratch",
"source": "manual",
"category": "command_gen",
"input": {
"user_message": f"sudo give me everything I need to make {item_name.replace('_', ' ')} from scratch",
"server_context": {"server_type": "paper", "version": "1.21.x", "online_players": ["slingshooter08"]},
},
"output": {
"reasoning": f"Player needs raw materials to craft {item_name} from scratch. Full chain: {raw_str}.",
"commands": raw_cmds,
"safety_flags": [],
},
"metadata": {"difficulty": "hard", "validated": True, "extracted_from": "minecraft-data recipes", "risk_level": 4},
})
# Smelting knowledge (furnace recipes)
SMELTING = {
"glass": {"input": "sand", "fuel": "coal"},
"iron_ingot": {"input": "raw_iron", "fuel": "coal"},
"gold_ingot": {"input": "raw_gold", "fuel": "coal"},
"stone": {"input": "cobblestone", "fuel": "coal"},
"smooth_stone": {"input": "stone", "fuel": "coal"},
"brick": {"input": "clay_ball", "fuel": "coal"},
"cooked_beef": {"input": "beef", "fuel": "coal"},
"cooked_porkchop": {"input": "porkchop", "fuel": "coal"},
"cooked_chicken": {"input": "chicken", "fuel": "coal"},
"dried_kelp": {"input": "kelp", "fuel": "coal"},
"charcoal": {"input": "oak_log", "fuel": "oak_log"},
}
for result, recipe in SMELTING.items():
examples.append({
"id": f"smelt-{result}",
"source": "manual",
"category": "info",
"input": {
"user_message": f"sudo how do I make {result.replace('_', ' ')}",
"server_context": {"server_type": "paper", "version": "1.21.x"},
},
"output": {
"reasoning": f"Smelting recipe: {recipe['input']} + fuel in a furnace = {result}.",
"commands": [],
"message": f"Smelt {recipe['input'].replace('_', ' ')} in a furnace with {recipe['fuel'].replace('_', ' ')} as fuel to get {result.replace('_', ' ')}.",
"safety_flags": [],
},
"metadata": {"difficulty": "easy", "validated": True, "extracted_from": "smelting knowledge", "risk_level": 3},
})
# Version awareness
examples.append({
"id": "recipe-version-note",
"source": "manual",
"category": "info",
"input": {
"user_message": "sudo are there any new items or recipes",
"server_context": {"server_type": "paper", "version": "1.21.x"},
},
"output": {
"reasoning": "The model should acknowledge it's trained on a specific version and recipes may change.",
"commands": [],
"message": f"I know {len(recipe_tree)} craftable items from Minecraft 1.21.x. New items and recipes are added with each update. If something doesn't work, it may have been added or changed in a newer version.",
"safety_flags": [],
},
"metadata": {"difficulty": "easy", "validated": True, "extracted_from": "version awareness", "risk_level": 3},
})
# Save training examples
output = ROOT / "data" / "raw" / "recipe_training.jsonl"
with open(output, "w") as f:
for ex in examples:
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
print(f"Training examples: {len(examples)}")
print(f" Crafting info: {sum(1 for e in examples if 'craft' in e['id'])}")
print(f" Material giving: {sum(1 for e in examples if 'materials' in e['id'])}")
print(f" From scratch: {sum(1 for e in examples if 'scratch' in e['id'])}")
print(f" Smelting: {sum(1 for e in examples if 'smelt' in e['id'])}")
print(f"Saved to {output}")