Minecraft knowledge corpus, recipe trees, GitHub scraper, 644 examples
Knowledge corpus (knowledge/mc-data/): - 1505 items, 886 crafting recipes, 1166 blocks from minecraft-data 1.21.11 - Recipe dependency tree builder (knowledge/build_recipe_tree.py) - Crafting chain training: "give me everything to make X from scratch" - Smelting recipes, version awareness examples Training data (644 examples total): - 107 command syntax reference examples (every command + common errors) - 176 recipe/crafting chain examples (63 crafting, 103 material-giving, 11 smelting) - 344 Claude-distilled examples (222 sudo + 122 god via Haiku) - Live bot audit data ingested (128 examples from dev server) Swarm bots: - Swimming/water escape logic - Door opening - Context-aware prayers (inventory, health, time, depth) - Prefix enforcement on all Gemini/Dolphin prompts GitHub log scraper (data/scrape_server_logs.py): - Searches GitHub for Minecraft server logs with commands - Strict 1.20.5+ version filter - Extracts command pairs, converts to training format Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,287 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Build a crafting dependency tree from minecraft-data and generate training examples.
|
||||
|
||||
Creates:
|
||||
1. knowledge/mc-data/recipe_tree.json — full item→recipe→ingredients tree
|
||||
2. data/raw/recipe_training.jsonl — training examples about crafting chains
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
KNOWLEDGE = ROOT / "knowledge" / "mc-data"
|
||||
|
||||
# Load data
|
||||
items_raw = json.load(open(KNOWLEDGE / "items.json"))
|
||||
recipes_raw = json.load(open(KNOWLEDGE / "recipes.json"))
|
||||
blocks_raw = json.load(open(KNOWLEDGE / "blocks.json"))
|
||||
|
||||
# Build ID→name maps
|
||||
id_to_name = {}
|
||||
name_to_id = {}
|
||||
for item in items_raw:
|
||||
iid = item["id"]
|
||||
name = item["name"]
|
||||
id_to_name[iid] = name
|
||||
name_to_id[name] = iid
|
||||
|
||||
# Build recipe tree: item_name → list of recipes with ingredient names
|
||||
recipe_tree = {}
|
||||
for item_id_str, recipe_list in recipes_raw.items():
|
||||
item_id = int(item_id_str)
|
||||
item_name = id_to_name.get(item_id, f"unknown_{item_id}")
|
||||
|
||||
parsed_recipes = []
|
||||
for recipe in recipe_list:
|
||||
ingredients = set()
|
||||
|
||||
# Shaped recipes (inShape)
|
||||
if "inShape" in recipe:
|
||||
for row in recipe["inShape"]:
|
||||
if row is None:
|
||||
continue
|
||||
for cell in row:
|
||||
if cell is not None:
|
||||
if isinstance(cell, int):
|
||||
ingredients.add(id_to_name.get(cell, f"unknown_{cell}"))
|
||||
elif isinstance(cell, list):
|
||||
# Multiple options for this slot
|
||||
for opt in cell:
|
||||
if opt is not None:
|
||||
ingredients.add(id_to_name.get(opt, f"unknown_{opt}"))
|
||||
|
||||
# Shapeless recipes (ingredients list)
|
||||
if "ingredients" in recipe:
|
||||
for ing in recipe["ingredients"]:
|
||||
if ing is not None:
|
||||
if isinstance(ing, int):
|
||||
ingredients.add(id_to_name.get(ing, f"unknown_{ing}"))
|
||||
elif isinstance(ing, list):
|
||||
for opt in ing:
|
||||
if opt is not None:
|
||||
ingredients.add(id_to_name.get(opt, f"unknown_{opt}"))
|
||||
|
||||
result_count = recipe.get("result", {}).get("count", 1) if isinstance(recipe.get("result"), dict) else 1
|
||||
|
||||
if ingredients:
|
||||
parsed_recipes.append({
|
||||
"ingredients": sorted(ingredients),
|
||||
"count": result_count,
|
||||
"shaped": "inShape" in recipe,
|
||||
})
|
||||
|
||||
if parsed_recipes:
|
||||
recipe_tree[item_name] = parsed_recipes
|
||||
|
||||
# Build dependency chains (what do you need from scratch?)
|
||||
def get_full_chain(item_name, visited=None):
|
||||
"""Recursively get all raw materials needed to craft an item."""
|
||||
if visited is None:
|
||||
visited = set()
|
||||
if item_name in visited:
|
||||
return {"item": item_name, "raw": True} # circular dependency
|
||||
visited.add(item_name)
|
||||
|
||||
if item_name not in recipe_tree:
|
||||
return {"item": item_name, "raw": True} # raw material (mined/found)
|
||||
|
||||
recipe = recipe_tree[item_name][0] # use first recipe
|
||||
deps = []
|
||||
for ing in recipe["ingredients"]:
|
||||
deps.append(get_full_chain(ing, visited.copy()))
|
||||
|
||||
return {
|
||||
"item": item_name,
|
||||
"raw": False,
|
||||
"ingredients": recipe["ingredients"],
|
||||
"count": recipe["count"],
|
||||
"deps": deps,
|
||||
}
|
||||
|
||||
|
||||
def get_raw_materials(item_name, visited=None):
|
||||
"""Get flat list of raw materials needed."""
|
||||
if visited is None:
|
||||
visited = set()
|
||||
if item_name in visited:
|
||||
return []
|
||||
visited.add(item_name)
|
||||
|
||||
if item_name not in recipe_tree:
|
||||
return [item_name]
|
||||
|
||||
raw = []
|
||||
recipe = recipe_tree[item_name][0]
|
||||
for ing in recipe["ingredients"]:
|
||||
raw.extend(get_raw_materials(ing, visited.copy()))
|
||||
return raw
|
||||
|
||||
|
||||
# Save recipe tree
|
||||
with open(KNOWLEDGE / "recipe_tree.json", "w") as f:
|
||||
json.dump(recipe_tree, f, indent=2)
|
||||
print(f"Recipe tree: {len(recipe_tree)} craftable items")
|
||||
|
||||
# Generate training examples
|
||||
examples = []
|
||||
|
||||
# Important crafting chains that players commonly ask about
|
||||
KEY_ITEMS = [
|
||||
"crafting_table", "furnace", "chest", "torch", "stick",
|
||||
"wooden_pickaxe", "stone_pickaxe", "iron_pickaxe", "diamond_pickaxe",
|
||||
"wooden_sword", "stone_sword", "iron_sword", "diamond_sword",
|
||||
"iron_helmet", "iron_chestplate", "iron_leggings", "iron_boots",
|
||||
"diamond_helmet", "diamond_chestplate", "diamond_leggings", "diamond_boots",
|
||||
"shield", "bow", "arrow", "bed", "bucket", "compass", "clock",
|
||||
"enchanting_table", "anvil", "brewing_stand", "beacon",
|
||||
"glass", "glass_pane", "bookshelf", "ladder", "fence", "door",
|
||||
"rail", "powered_rail", "hopper", "piston", "sticky_piston",
|
||||
"observer", "dispenser", "dropper", "repeater", "comparator",
|
||||
"oak_planks", "oak_slab", "oak_stairs", "oak_door", "oak_fence",
|
||||
"iron_ingot", "gold_ingot", "netherite_ingot",
|
||||
"bread", "cake", "cookie", "golden_apple",
|
||||
"tnt", "firework_rocket", "map", "lantern", "campfire",
|
||||
]
|
||||
|
||||
for item_name in KEY_ITEMS:
|
||||
if item_name not in recipe_tree:
|
||||
continue
|
||||
|
||||
recipe = recipe_tree[item_name][0]
|
||||
ingredients = recipe["ingredients"]
|
||||
count = recipe["count"]
|
||||
raw_mats = list(set(get_raw_materials(item_name)))
|
||||
|
||||
# "How do I craft X"
|
||||
ing_str = ", ".join(ingredients)
|
||||
examples.append({
|
||||
"id": f"recipe-{item_name}-craft",
|
||||
"source": "manual",
|
||||
"category": "info",
|
||||
"input": {
|
||||
"user_message": f"sudo how do I craft {item_name.replace('_', ' ')}",
|
||||
"server_context": {"server_type": "paper", "version": "1.21.x"},
|
||||
},
|
||||
"output": {
|
||||
"reasoning": f"Crafting recipe for {item_name} in 1.21.",
|
||||
"commands": [],
|
||||
"message": f"To craft {item_name.replace('_', ' ')} you need: {ing_str}. Makes {count}. Requires a crafting table." if recipe["shaped"] and len(ingredients) > 2 else f"To craft {item_name.replace('_', ' ')} you need: {ing_str}. Makes {count}.",
|
||||
"safety_flags": [],
|
||||
},
|
||||
"metadata": {"difficulty": "easy", "validated": True, "extracted_from": "minecraft-data recipes", "risk_level": 3},
|
||||
})
|
||||
|
||||
# "Give me everything I need to make X"
|
||||
give_cmds = []
|
||||
for mat in sorted(set(ingredients)):
|
||||
give_cmds.append(f"give slingshooter08 minecraft:{mat} 64")
|
||||
# Also give crafting table if shaped recipe needs it
|
||||
if recipe["shaped"] and len(ingredients) > 2 and "crafting_table" not in ingredients:
|
||||
give_cmds.insert(0, "give slingshooter08 minecraft:crafting_table 1")
|
||||
|
||||
examples.append({
|
||||
"id": f"recipe-{item_name}-materials",
|
||||
"source": "manual",
|
||||
"category": "command_gen",
|
||||
"input": {
|
||||
"user_message": f"sudo give me everything I need to craft {item_name.replace('_', ' ')}",
|
||||
"server_context": {"server_type": "paper", "version": "1.21.x", "online_players": ["slingshooter08"]},
|
||||
},
|
||||
"output": {
|
||||
"reasoning": f"Player needs materials to craft {item_name}. Ingredients: {ing_str}. Giving materials + crafting table if needed.",
|
||||
"commands": give_cmds,
|
||||
"safety_flags": [],
|
||||
},
|
||||
"metadata": {"difficulty": "medium", "validated": True, "extracted_from": "minecraft-data recipes", "risk_level": 3},
|
||||
})
|
||||
|
||||
# "Give me everything I need to make X from scratch" (raw materials)
|
||||
if raw_mats != ingredients and len(raw_mats) > 0:
|
||||
raw_cmds = []
|
||||
for mat in sorted(set(raw_mats)):
|
||||
raw_cmds.append(f"give slingshooter08 minecraft:{mat} 64")
|
||||
raw_cmds.insert(0, "give slingshooter08 minecraft:crafting_table 1")
|
||||
|
||||
raw_str = ", ".join(sorted(set(raw_mats)))
|
||||
examples.append({
|
||||
"id": f"recipe-{item_name}-scratch",
|
||||
"source": "manual",
|
||||
"category": "command_gen",
|
||||
"input": {
|
||||
"user_message": f"sudo give me everything I need to make {item_name.replace('_', ' ')} from scratch",
|
||||
"server_context": {"server_type": "paper", "version": "1.21.x", "online_players": ["slingshooter08"]},
|
||||
},
|
||||
"output": {
|
||||
"reasoning": f"Player needs raw materials to craft {item_name} from scratch. Full chain: {raw_str}.",
|
||||
"commands": raw_cmds,
|
||||
"safety_flags": [],
|
||||
},
|
||||
"metadata": {"difficulty": "hard", "validated": True, "extracted_from": "minecraft-data recipes", "risk_level": 4},
|
||||
})
|
||||
|
||||
# Smelting knowledge (furnace recipes)
|
||||
SMELTING = {
|
||||
"glass": {"input": "sand", "fuel": "coal"},
|
||||
"iron_ingot": {"input": "raw_iron", "fuel": "coal"},
|
||||
"gold_ingot": {"input": "raw_gold", "fuel": "coal"},
|
||||
"stone": {"input": "cobblestone", "fuel": "coal"},
|
||||
"smooth_stone": {"input": "stone", "fuel": "coal"},
|
||||
"brick": {"input": "clay_ball", "fuel": "coal"},
|
||||
"cooked_beef": {"input": "beef", "fuel": "coal"},
|
||||
"cooked_porkchop": {"input": "porkchop", "fuel": "coal"},
|
||||
"cooked_chicken": {"input": "chicken", "fuel": "coal"},
|
||||
"dried_kelp": {"input": "kelp", "fuel": "coal"},
|
||||
"charcoal": {"input": "oak_log", "fuel": "oak_log"},
|
||||
}
|
||||
|
||||
for result, recipe in SMELTING.items():
|
||||
examples.append({
|
||||
"id": f"smelt-{result}",
|
||||
"source": "manual",
|
||||
"category": "info",
|
||||
"input": {
|
||||
"user_message": f"sudo how do I make {result.replace('_', ' ')}",
|
||||
"server_context": {"server_type": "paper", "version": "1.21.x"},
|
||||
},
|
||||
"output": {
|
||||
"reasoning": f"Smelting recipe: {recipe['input']} + fuel in a furnace = {result}.",
|
||||
"commands": [],
|
||||
"message": f"Smelt {recipe['input'].replace('_', ' ')} in a furnace with {recipe['fuel'].replace('_', ' ')} as fuel to get {result.replace('_', ' ')}.",
|
||||
"safety_flags": [],
|
||||
},
|
||||
"metadata": {"difficulty": "easy", "validated": True, "extracted_from": "smelting knowledge", "risk_level": 3},
|
||||
})
|
||||
|
||||
# Version awareness
|
||||
examples.append({
|
||||
"id": "recipe-version-note",
|
||||
"source": "manual",
|
||||
"category": "info",
|
||||
"input": {
|
||||
"user_message": "sudo are there any new items or recipes",
|
||||
"server_context": {"server_type": "paper", "version": "1.21.x"},
|
||||
},
|
||||
"output": {
|
||||
"reasoning": "The model should acknowledge it's trained on a specific version and recipes may change.",
|
||||
"commands": [],
|
||||
"message": f"I know {len(recipe_tree)} craftable items from Minecraft 1.21.x. New items and recipes are added with each update. If something doesn't work, it may have been added or changed in a newer version.",
|
||||
"safety_flags": [],
|
||||
},
|
||||
"metadata": {"difficulty": "easy", "validated": True, "extracted_from": "version awareness", "risk_level": 3},
|
||||
})
|
||||
|
||||
# Save training examples
|
||||
output = ROOT / "data" / "raw" / "recipe_training.jsonl"
|
||||
with open(output, "w") as f:
|
||||
for ex in examples:
|
||||
f.write(json.dumps(ex, ensure_ascii=False) + "\n")
|
||||
|
||||
print(f"Training examples: {len(examples)}")
|
||||
print(f" Crafting info: {sum(1 for e in examples if 'craft' in e['id'])}")
|
||||
print(f" Material giving: {sum(1 for e in examples if 'materials' in e['id'])}")
|
||||
print(f" From scratch: {sum(1 for e in examples if 'scratch' in e['id'])}")
|
||||
print(f" Smelting: {sum(1 for e in examples if 'smelt' in e['id'])}")
|
||||
print(f"Saved to {output}")
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user