1,542 seed + 1,159 tool-calling examples, async processing, validator tracking

New knowledge baked in:
- Enchantments (60): all 1.21 enchants, mutual exclusions, max levels, component syntax
- WorldEdit (45): //set, //replace, //sphere, //stack, selection, brushes
- Paper server (55): gamerules, permissions, plugins, scoreboard, moderation
- Cosmetics/XP (42): title, tellraw, playsound, particle, xp, effect mechanics
- Quantity boundaries (32): item tier caps, greedy→stingy, humble→generous

Training infrastructure:
- train_lora.py updated for multi-turn tool conversations + seed data
- Async prayer/sudo processing (ThreadPoolExecutor, 3 workers)
- Validator hit-rate tracking to /var/log/mc_validator_stats.json

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-19 19:03:30 -04:00
parent ee764cd22a
commit 750cf15c79
6 changed files with 469 additions and 11 deletions
+65 -11
View File
@@ -49,7 +49,7 @@ def get_system_prompt(mode: str) -> str:
return "You are a Minecraft 1.21 command translator. Return JSON: {\"commands\": [...], \"reasoning\": \"...\"}"
def load_dataset(path: str) -> list:
def load_seed_dataset(path: str) -> list:
"""Load seed dataset and format for SFT training with system prompts and mode awareness."""
examples = []
with open(path) as f:
@@ -101,6 +101,45 @@ def load_dataset(path: str) -> list:
return examples
def load_tool_dataset(path: str) -> list:
"""Load multi-turn tool-calling training data.
These examples are already in Qwen3 chat format with tool_call tags.
They contain multi-turn conversations: user → assistant tool_call → tool result → ... → final response.
We pass them through as pre-formatted text (not as conversations for the chat template).
"""
examples = []
with open(path) as f:
for line in f:
if not line.strip():
continue
ex = json.loads(line)
# Tool training data has a 'messages' field with multi-turn conversations
if "messages" in ex:
examples.append({"conversations": ex["messages"]})
# Or pre-formatted qwen3_text
elif "qwen3_text" in ex:
examples.append({"text": ex["qwen3_text"]})
return examples
def load_dataset(seed_path: str, tool_path: str = None) -> list:
"""Load and merge all training datasets."""
examples = load_seed_dataset(seed_path)
print(f" Seed examples: {len(examples)}")
if tool_path and os.path.exists(tool_path):
tool_examples = load_tool_dataset(tool_path)
print(f" Tool examples: {len(tool_examples)}")
examples.extend(tool_examples)
else:
print(f" Tool examples: 0 (no file)")
return examples
def main():
parser = argparse.ArgumentParser(description="LoRA fine-tuning for Minecraft AI")
parser.add_argument("--model", default="Qwen/Qwen3-8B", help="Base model from HuggingFace")
@@ -122,6 +161,7 @@ def main():
if not args.dataset:
args.dataset = str(project_root / "data" / "processed" / "seed_dataset.jsonl")
tool_dataset = str(project_root / "data" / "processed" / "tool_training.jsonl")
if not args.output:
args.output = str(project_root / "training" / "checkpoints" / "qwen3-8b-mc-lora")
@@ -135,10 +175,10 @@ def main():
print(f"Max seq len: {args.max_seq_len}")
print()
# Load dataset
print("Loading dataset...")
train_data = load_dataset(args.dataset)
print(f" {len(train_data)} training examples loaded")
# Load dataset (seed + tool-calling)
print("Loading datasets...")
train_data = load_dataset(args.dataset, tool_dataset)
print(f" Total: {len(train_data)} training examples")
if args.dry_run:
print("\n[DRY RUN] Would load model and train. Exiting.")
@@ -177,13 +217,27 @@ def main():
dataset = Dataset.from_list(train_data)
def formatting_func(examples):
"""Format conversations for the chat template."""
"""Format conversations for the chat template. Handles both:
- 'conversations': list of role/content dicts → apply chat template
- 'text': pre-formatted Qwen3 text (tool-calling examples) → pass through
"""
texts = []
for convos in examples["conversations"]:
text = tokenizer.apply_chat_template(
convos, tokenize=False, add_generation_prompt=False
)
texts.append(text)
convos_list = examples.get("conversations", [])
text_list = examples.get("text", [])
for i in range(len(convos_list)):
convos = convos_list[i]
pre_text = text_list[i] if i < len(text_list) else None
if pre_text:
# Pre-formatted tool-calling example
texts.append(pre_text)
elif convos:
# Standard conversation → apply chat template
text = tokenizer.apply_chat_template(
convos, tokenize=False, add_generation_prompt=False
)
texts.append(text)
else:
texts.append("")
return {"text": texts}
dataset = dataset.map(formatting_func, batched=True)