Add LoRA training scripts and fix bake-off token budget
- training/scripts/train_lora.py: Unsloth QLoRA trainer for qwen3:8b - training/scripts/train_lora.sh: Launch script for steel141 RTX 3090 Ti - eval/bakeoff.py: Fixed token budget (400->1500) that caused qwen3 models to exhaust tokens on thinking, added --no-think flag - agent/serve.py: Default model changed to gemma3n:e4b Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
LoRA fine-tuning script for Minecraft AI ops assistant.
|
||||
|
||||
Base model: Qwen/Qwen3-8B (dense, Apache 2.0)
|
||||
Method: QLoRA (4-bit base + LoRA adapters in FP16)
|
||||
Framework: Unsloth + HuggingFace TRL
|
||||
Target GPU: RTX 3090 Ti (24GB VRAM)
|
||||
|
||||
Usage:
|
||||
python train_lora.py
|
||||
python train_lora.py --epochs 5 --lr 2e-4 --rank 32
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def load_dataset(path: str) -> list:
|
||||
"""Load seed dataset and format for SFT training."""
|
||||
examples = []
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
if not line.strip():
|
||||
continue
|
||||
ex = json.loads(line)
|
||||
|
||||
# Build the training conversation
|
||||
inp = ex["input"]
|
||||
out = ex["output"]
|
||||
query = inp["user_message"]
|
||||
ctx = inp.get("server_context", {})
|
||||
|
||||
# User message with context
|
||||
user_parts = [f"Request from slingshooter08: {query}"]
|
||||
user_parts.append(f"\nContext:\nServer: {ctx.get('server_type', 'paper')} {ctx.get('version', '1.21.x')}")
|
||||
if ctx.get("online_players"):
|
||||
user_parts.append(f"Online: {', '.join(ctx['online_players'])}")
|
||||
pos = ctx.get("player_position")
|
||||
if pos:
|
||||
user_parts.append(f"Player position: ({pos['x']}, {pos['y']}, {pos['z']})")
|
||||
|
||||
user_msg = "\n".join(user_parts)
|
||||
|
||||
# Assistant response as JSON
|
||||
response = {
|
||||
"reasoning": out.get("reasoning", ""),
|
||||
"commands": out.get("commands", []),
|
||||
"message": out.get("message"),
|
||||
}
|
||||
|
||||
examples.append({
|
||||
"conversations": [
|
||||
{"role": "user", "content": user_msg},
|
||||
{"role": "assistant", "content": json.dumps(response)},
|
||||
]
|
||||
})
|
||||
|
||||
return examples
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="LoRA fine-tuning for Minecraft AI")
|
||||
parser.add_argument("--model", default="Qwen/Qwen3-8B", help="Base model from HuggingFace")
|
||||
parser.add_argument("--dataset", default="", help="Dataset path (default: auto-detect)")
|
||||
parser.add_argument("--output", default="", help="Output directory for adapter")
|
||||
parser.add_argument("--rank", type=int, default=16, help="LoRA rank")
|
||||
parser.add_argument("--alpha", type=int, default=32, help="LoRA alpha")
|
||||
parser.add_argument("--lr", type=float, default=2e-4, help="Learning rate")
|
||||
parser.add_argument("--epochs", type=int, default=3, help="Training epochs")
|
||||
parser.add_argument("--batch-size", type=int, default=2, help="Per-device batch size")
|
||||
parser.add_argument("--grad-accum", type=int, default=4, help="Gradient accumulation steps")
|
||||
parser.add_argument("--max-seq-len", type=int, default=2048, help="Max sequence length")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Load model and dataset but don't train")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Auto-detect paths
|
||||
script_dir = Path(__file__).resolve().parent
|
||||
project_root = script_dir.parent.parent
|
||||
|
||||
if not args.dataset:
|
||||
args.dataset = str(project_root / "data" / "processed" / "seed_dataset.jsonl")
|
||||
if not args.output:
|
||||
args.output = str(project_root / "training" / "checkpoints" / "qwen3-8b-mc-lora")
|
||||
|
||||
print(f"Base model: {args.model}")
|
||||
print(f"Dataset: {args.dataset}")
|
||||
print(f"Output: {args.output}")
|
||||
print(f"LoRA rank: {args.rank}, alpha: {args.alpha}")
|
||||
print(f"LR: {args.lr}")
|
||||
print(f"Epochs: {args.epochs}")
|
||||
print(f"Batch: {args.batch_size} x {args.grad_accum} grad accum")
|
||||
print(f"Max seq len: {args.max_seq_len}")
|
||||
print()
|
||||
|
||||
# Load dataset
|
||||
print("Loading dataset...")
|
||||
train_data = load_dataset(args.dataset)
|
||||
print(f" {len(train_data)} training examples loaded")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n[DRY RUN] Would load model and train. Exiting.")
|
||||
for ex in train_data[:2]:
|
||||
print(f" Example: {ex['conversations'][0]['content'][:80]}...")
|
||||
return
|
||||
|
||||
# Import Unsloth (heavy imports, only when actually training)
|
||||
from unsloth import FastLanguageModel
|
||||
from trl import SFTTrainer, SFTConfig
|
||||
from datasets import Dataset
|
||||
|
||||
# Load model with 4-bit quantization
|
||||
print(f"\nLoading {args.model} in 4-bit...")
|
||||
model, tokenizer = FastLanguageModel.from_pretrained(
|
||||
model_name=args.model,
|
||||
max_seq_length=args.max_seq_len,
|
||||
load_in_4bit=True,
|
||||
dtype=None, # auto-detect
|
||||
)
|
||||
|
||||
# Add LoRA adapters
|
||||
print(f"Adding LoRA adapters (rank={args.rank}, alpha={args.alpha})...")
|
||||
model = FastLanguageModel.get_peft_model(
|
||||
model,
|
||||
r=args.rank,
|
||||
lora_alpha=args.alpha,
|
||||
lora_dropout=0,
|
||||
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
|
||||
"gate_proj", "up_proj", "down_proj"],
|
||||
bias="none",
|
||||
use_gradient_checkpointing="unsloth",
|
||||
)
|
||||
|
||||
# Prepare dataset
|
||||
dataset = Dataset.from_list(train_data)
|
||||
|
||||
def formatting_func(examples):
|
||||
"""Format conversations for the chat template."""
|
||||
texts = []
|
||||
for convos in examples["conversations"]:
|
||||
text = tokenizer.apply_chat_template(
|
||||
convos, tokenize=False, add_generation_prompt=False
|
||||
)
|
||||
texts.append(text)
|
||||
return {"text": texts}
|
||||
|
||||
dataset = dataset.map(formatting_func, batched=True)
|
||||
|
||||
# Training config
|
||||
training_args = SFTConfig(
|
||||
output_dir=args.output,
|
||||
num_train_epochs=args.epochs,
|
||||
per_device_train_batch_size=args.batch_size,
|
||||
gradient_accumulation_steps=args.grad_accum,
|
||||
learning_rate=args.lr,
|
||||
lr_scheduler_type="cosine",
|
||||
warmup_ratio=0.1,
|
||||
weight_decay=0.01,
|
||||
fp16=True,
|
||||
logging_steps=1,
|
||||
save_strategy="epoch",
|
||||
seed=42,
|
||||
max_seq_length=args.max_seq_len,
|
||||
dataset_text_field="text",
|
||||
packing=True,
|
||||
)
|
||||
|
||||
# Train
|
||||
print(f"\nStarting training ({args.epochs} epochs, {len(train_data)} examples)...")
|
||||
trainer = SFTTrainer(
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
train_dataset=dataset,
|
||||
args=training_args,
|
||||
)
|
||||
|
||||
trainer.train()
|
||||
|
||||
# Save adapter
|
||||
print(f"\nSaving LoRA adapter to {args.output}...")
|
||||
model.save_pretrained(args.output)
|
||||
tokenizer.save_pretrained(args.output)
|
||||
|
||||
print("\nTraining complete!")
|
||||
print(f"Adapter saved to: {args.output}")
|
||||
print(f"To convert to GGUF for Ollama, use:")
|
||||
print(f" python -m unsloth.save --model {args.output} --output_type gguf")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Executable
+31
@@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env bash
|
||||
# LoRA fine-tuning launcher for qwen3:8b on steel141
|
||||
#
|
||||
# Prerequisites (already installed):
|
||||
# conda activate mc-train (or use full path below)
|
||||
# Python 3.11, PyTorch 2.10+cu124, Unsloth 2026.3.5
|
||||
#
|
||||
# Usage:
|
||||
# ssh steel141
|
||||
# cd ~/mc-ai-training
|
||||
# bash train_lora.sh
|
||||
#
|
||||
# Or from CT 629:
|
||||
# ssh steel141 "cd ~/mc-ai-training && bash train_lora.sh"
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
PYTHON=~/miniconda3/envs/mc-train/bin/python
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
# Use RTX 3090 Ti (GPU 0 in PyTorch ordering)
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
echo "=== Minecraft AI LoRA Training ==="
|
||||
echo "Python: $($PYTHON --version)"
|
||||
echo "GPU: RTX 3090 Ti (24GB)"
|
||||
echo "Base model: Qwen/Qwen3-8B"
|
||||
echo "Dataset: seed_dataset.jsonl"
|
||||
echo ""
|
||||
|
||||
exec $PYTHON "$SCRIPT_DIR/train_lora.py" "$@"
|
||||
Reference in New Issue
Block a user