Minecraft knowledge corpus, recipe trees, GitHub scraper, 644 examples

Knowledge corpus (knowledge/mc-data/):
- 1505 items, 886 crafting recipes, 1166 blocks from minecraft-data 1.21.11
- Recipe dependency tree builder (knowledge/build_recipe_tree.py)
- Crafting chain training: "give me everything to make X from scratch"
- Smelting recipes, version awareness examples

Training data (644 examples total):
- 107 command syntax reference examples (every command + common errors)
- 176 recipe/crafting chain examples (63 crafting, 103 material-giving, 11 smelting)
- 344 Claude-distilled examples (222 sudo + 122 god via Haiku)
- Live bot audit data ingested (128 examples from dev server)

Swarm bots:
- Swimming/water escape logic
- Door opening
- Context-aware prayers (inventory, health, time, depth)
- Prefix enforcement on all Gemini/Dolphin prompts

GitHub log scraper (data/scrape_server_logs.py):
- Searches GitHub for Minecraft server logs with commands
- Strict 1.20.5+ version filter
- Extracts command pairs, converts to training format

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-18 20:33:09 -04:00
parent 65ee146043
commit 0473eb0b50
14 changed files with 103586 additions and 7 deletions
+287
View File
@@ -0,0 +1,287 @@
#!/usr/bin/env python3
"""
Build a crafting dependency tree from minecraft-data and generate training examples.
Creates:
1. knowledge/mc-data/recipe_tree.json — full item→recipe→ingredients tree
2. data/raw/recipe_training.jsonl — training examples about crafting chains
"""
import json
from pathlib import Path
from collections import defaultdict
ROOT = Path(__file__).resolve().parent.parent
KNOWLEDGE = ROOT / "knowledge" / "mc-data"
# Load data
items_raw = json.load(open(KNOWLEDGE / "items.json"))
recipes_raw = json.load(open(KNOWLEDGE / "recipes.json"))
blocks_raw = json.load(open(KNOWLEDGE / "blocks.json"))
# Build ID→name maps
id_to_name = {}
name_to_id = {}
for item in items_raw:
iid = item["id"]
name = item["name"]
id_to_name[iid] = name
name_to_id[name] = iid
# Build recipe tree: item_name → list of recipes with ingredient names
recipe_tree = {}
for item_id_str, recipe_list in recipes_raw.items():
item_id = int(item_id_str)
item_name = id_to_name.get(item_id, f"unknown_{item_id}")
parsed_recipes = []
for recipe in recipe_list:
ingredients = set()
# Shaped recipes (inShape)
if "inShape" in recipe:
for row in recipe["inShape"]:
if row is None:
continue
for cell in row:
if cell is not None:
if isinstance(cell, int):
ingredients.add(id_to_name.get(cell, f"unknown_{cell}"))
elif isinstance(cell, list):
# Multiple options for this slot
for opt in cell:
if opt is not None:
ingredients.add(id_to_name.get(opt, f"unknown_{opt}"))
# Shapeless recipes (ingredients list)
if "ingredients" in recipe:
for ing in recipe["ingredients"]:
if ing is not None:
if isinstance(ing, int):
ingredients.add(id_to_name.get(ing, f"unknown_{ing}"))
elif isinstance(ing, list):
for opt in ing:
if opt is not None:
ingredients.add(id_to_name.get(opt, f"unknown_{opt}"))
result_count = recipe.get("result", {}).get("count", 1) if isinstance(recipe.get("result"), dict) else 1
if ingredients:
parsed_recipes.append({
"ingredients": sorted(ingredients),
"count": result_count,
"shaped": "inShape" in recipe,
})
if parsed_recipes:
recipe_tree[item_name] = parsed_recipes
# Build dependency chains (what do you need from scratch?)
def get_full_chain(item_name, visited=None):
"""Recursively get all raw materials needed to craft an item."""
if visited is None:
visited = set()
if item_name in visited:
return {"item": item_name, "raw": True} # circular dependency
visited.add(item_name)
if item_name not in recipe_tree:
return {"item": item_name, "raw": True} # raw material (mined/found)
recipe = recipe_tree[item_name][0] # use first recipe
deps = []
for ing in recipe["ingredients"]:
deps.append(get_full_chain(ing, visited.copy()))
return {
"item": item_name,
"raw": False,
"ingredients": recipe["ingredients"],
"count": recipe["count"],
"deps": deps,
}
def get_raw_materials(item_name, visited=None):
"""Get flat list of raw materials needed."""
if visited is None:
visited = set()
if item_name in visited:
return []
visited.add(item_name)
if item_name not in recipe_tree:
return [item_name]
raw = []
recipe = recipe_tree[item_name][0]
for ing in recipe["ingredients"]:
raw.extend(get_raw_materials(ing, visited.copy()))
return raw
# Save recipe tree
with open(KNOWLEDGE / "recipe_tree.json", "w") as f:
json.dump(recipe_tree, f, indent=2)
print(f"Recipe tree: {len(recipe_tree)} craftable items")
# Generate training examples
examples = []
# Important crafting chains that players commonly ask about
KEY_ITEMS = [
"crafting_table", "furnace", "chest", "torch", "stick",
"wooden_pickaxe", "stone_pickaxe", "iron_pickaxe", "diamond_pickaxe",
"wooden_sword", "stone_sword", "iron_sword", "diamond_sword",
"iron_helmet", "iron_chestplate", "iron_leggings", "iron_boots",
"diamond_helmet", "diamond_chestplate", "diamond_leggings", "diamond_boots",
"shield", "bow", "arrow", "bed", "bucket", "compass", "clock",
"enchanting_table", "anvil", "brewing_stand", "beacon",
"glass", "glass_pane", "bookshelf", "ladder", "fence", "door",
"rail", "powered_rail", "hopper", "piston", "sticky_piston",
"observer", "dispenser", "dropper", "repeater", "comparator",
"oak_planks", "oak_slab", "oak_stairs", "oak_door", "oak_fence",
"iron_ingot", "gold_ingot", "netherite_ingot",
"bread", "cake", "cookie", "golden_apple",
"tnt", "firework_rocket", "map", "lantern", "campfire",
]
for item_name in KEY_ITEMS:
if item_name not in recipe_tree:
continue
recipe = recipe_tree[item_name][0]
ingredients = recipe["ingredients"]
count = recipe["count"]
raw_mats = list(set(get_raw_materials(item_name)))
# "How do I craft X"
ing_str = ", ".join(ingredients)
examples.append({
"id": f"recipe-{item_name}-craft",
"source": "manual",
"category": "info",
"input": {
"user_message": f"sudo how do I craft {item_name.replace('_', ' ')}",
"server_context": {"server_type": "paper", "version": "1.21.x"},
},
"output": {
"reasoning": f"Crafting recipe for {item_name} in 1.21.",
"commands": [],
"message": f"To craft {item_name.replace('_', ' ')} you need: {ing_str}. Makes {count}. Requires a crafting table." if recipe["shaped"] and len(ingredients) > 2 else f"To craft {item_name.replace('_', ' ')} you need: {ing_str}. Makes {count}.",
"safety_flags": [],
},
"metadata": {"difficulty": "easy", "validated": True, "extracted_from": "minecraft-data recipes", "risk_level": 3},
})
# "Give me everything I need to make X"
give_cmds = []
for mat in sorted(set(ingredients)):
give_cmds.append(f"give slingshooter08 minecraft:{mat} 64")
# Also give crafting table if shaped recipe needs it
if recipe["shaped"] and len(ingredients) > 2 and "crafting_table" not in ingredients:
give_cmds.insert(0, "give slingshooter08 minecraft:crafting_table 1")
examples.append({
"id": f"recipe-{item_name}-materials",
"source": "manual",
"category": "command_gen",
"input": {
"user_message": f"sudo give me everything I need to craft {item_name.replace('_', ' ')}",
"server_context": {"server_type": "paper", "version": "1.21.x", "online_players": ["slingshooter08"]},
},
"output": {
"reasoning": f"Player needs materials to craft {item_name}. Ingredients: {ing_str}. Giving materials + crafting table if needed.",
"commands": give_cmds,
"safety_flags": [],
},
"metadata": {"difficulty": "medium", "validated": True, "extracted_from": "minecraft-data recipes", "risk_level": 3},
})
# "Give me everything I need to make X from scratch" (raw materials)
if raw_mats != ingredients and len(raw_mats) > 0:
raw_cmds = []
for mat in sorted(set(raw_mats)):
raw_cmds.append(f"give slingshooter08 minecraft:{mat} 64")
raw_cmds.insert(0, "give slingshooter08 minecraft:crafting_table 1")
raw_str = ", ".join(sorted(set(raw_mats)))
examples.append({
"id": f"recipe-{item_name}-scratch",
"source": "manual",
"category": "command_gen",
"input": {
"user_message": f"sudo give me everything I need to make {item_name.replace('_', ' ')} from scratch",
"server_context": {"server_type": "paper", "version": "1.21.x", "online_players": ["slingshooter08"]},
},
"output": {
"reasoning": f"Player needs raw materials to craft {item_name} from scratch. Full chain: {raw_str}.",
"commands": raw_cmds,
"safety_flags": [],
},
"metadata": {"difficulty": "hard", "validated": True, "extracted_from": "minecraft-data recipes", "risk_level": 4},
})
# Smelting knowledge (furnace recipes)
SMELTING = {
"glass": {"input": "sand", "fuel": "coal"},
"iron_ingot": {"input": "raw_iron", "fuel": "coal"},
"gold_ingot": {"input": "raw_gold", "fuel": "coal"},
"stone": {"input": "cobblestone", "fuel": "coal"},
"smooth_stone": {"input": "stone", "fuel": "coal"},
"brick": {"input": "clay_ball", "fuel": "coal"},
"cooked_beef": {"input": "beef", "fuel": "coal"},
"cooked_porkchop": {"input": "porkchop", "fuel": "coal"},
"cooked_chicken": {"input": "chicken", "fuel": "coal"},
"dried_kelp": {"input": "kelp", "fuel": "coal"},
"charcoal": {"input": "oak_log", "fuel": "oak_log"},
}
for result, recipe in SMELTING.items():
examples.append({
"id": f"smelt-{result}",
"source": "manual",
"category": "info",
"input": {
"user_message": f"sudo how do I make {result.replace('_', ' ')}",
"server_context": {"server_type": "paper", "version": "1.21.x"},
},
"output": {
"reasoning": f"Smelting recipe: {recipe['input']} + fuel in a furnace = {result}.",
"commands": [],
"message": f"Smelt {recipe['input'].replace('_', ' ')} in a furnace with {recipe['fuel'].replace('_', ' ')} as fuel to get {result.replace('_', ' ')}.",
"safety_flags": [],
},
"metadata": {"difficulty": "easy", "validated": True, "extracted_from": "smelting knowledge", "risk_level": 3},
})
# Version awareness
examples.append({
"id": "recipe-version-note",
"source": "manual",
"category": "info",
"input": {
"user_message": "sudo are there any new items or recipes",
"server_context": {"server_type": "paper", "version": "1.21.x"},
},
"output": {
"reasoning": "The model should acknowledge it's trained on a specific version and recipes may change.",
"commands": [],
"message": f"I know {len(recipe_tree)} craftable items from Minecraft 1.21.x. New items and recipes are added with each update. If something doesn't work, it may have been added or changed in a newer version.",
"safety_flags": [],
},
"metadata": {"difficulty": "easy", "validated": True, "extracted_from": "version awareness", "risk_level": 3},
})
# Save training examples
output = ROOT / "data" / "raw" / "recipe_training.jsonl"
with open(output, "w") as f:
for ex in examples:
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
print(f"Training examples: {len(examples)}")
print(f" Crafting info: {sum(1 for e in examples if 'craft' in e['id'])}")
print(f" Material giving: {sum(1 for e in examples if 'materials' in e['id'])}")
print(f" From scratch: {sum(1 for e in examples if 'scratch' in e['id'])}")
print(f" Smelting: {sum(1 for e in examples if 'smelt' in e['id'])}")
print(f"Saved to {output}")
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff