#!/usr/bin/env python3 """ LoRA fine-tuning script for Minecraft AI ops assistant. Base model: Qwen/Qwen3-8B (dense, Apache 2.0) Method: QLoRA (4-bit base + LoRA adapters in FP16) Framework: Unsloth + HuggingFace TRL Target GPU: RTX 3090 Ti (24GB VRAM) Usage: python train_lora.py python train_lora.py --epochs 5 --lr 2e-4 --rank 32 """ import argparse import json import os from pathlib import Path def load_dataset(path: str) -> list: """Load seed dataset and format for SFT training.""" examples = [] with open(path) as f: for line in f: if not line.strip(): continue ex = json.loads(line) # Build the training conversation inp = ex["input"] out = ex["output"] query = inp["user_message"] ctx = inp.get("server_context", {}) # User message with context user_parts = [f"Request from slingshooter08: {query}"] user_parts.append(f"\nContext:\nServer: {ctx.get('server_type', 'paper')} {ctx.get('version', '1.21.x')}") if ctx.get("online_players"): user_parts.append(f"Online: {', '.join(ctx['online_players'])}") pos = ctx.get("player_position") if pos: user_parts.append(f"Player position: ({pos['x']}, {pos['y']}, {pos['z']})") user_msg = "\n".join(user_parts) # Assistant response as JSON response = { "reasoning": out.get("reasoning", ""), "commands": out.get("commands", []), "message": out.get("message"), } examples.append({ "conversations": [ {"role": "user", "content": user_msg}, {"role": "assistant", "content": json.dumps(response)}, ] }) return examples def main(): parser = argparse.ArgumentParser(description="LoRA fine-tuning for Minecraft AI") parser.add_argument("--model", default="Qwen/Qwen3-8B", help="Base model from HuggingFace") parser.add_argument("--dataset", default="", help="Dataset path (default: auto-detect)") parser.add_argument("--output", default="", help="Output directory for adapter") parser.add_argument("--rank", type=int, default=16, help="LoRA rank") parser.add_argument("--alpha", type=int, default=32, help="LoRA alpha") parser.add_argument("--lr", type=float, default=2e-4, help="Learning rate") parser.add_argument("--epochs", type=int, default=3, help="Training epochs") parser.add_argument("--batch-size", type=int, default=2, help="Per-device batch size") parser.add_argument("--grad-accum", type=int, default=4, help="Gradient accumulation steps") parser.add_argument("--max-seq-len", type=int, default=2048, help="Max sequence length") parser.add_argument("--dry-run", action="store_true", help="Load model and dataset but don't train") args = parser.parse_args() # Auto-detect paths script_dir = Path(__file__).resolve().parent project_root = script_dir.parent.parent if not args.dataset: args.dataset = str(project_root / "data" / "processed" / "seed_dataset.jsonl") if not args.output: args.output = str(project_root / "training" / "checkpoints" / "qwen3-8b-mc-lora") print(f"Base model: {args.model}") print(f"Dataset: {args.dataset}") print(f"Output: {args.output}") print(f"LoRA rank: {args.rank}, alpha: {args.alpha}") print(f"LR: {args.lr}") print(f"Epochs: {args.epochs}") print(f"Batch: {args.batch_size} x {args.grad_accum} grad accum") print(f"Max seq len: {args.max_seq_len}") print() # Load dataset print("Loading dataset...") train_data = load_dataset(args.dataset) print(f" {len(train_data)} training examples loaded") if args.dry_run: print("\n[DRY RUN] Would load model and train. Exiting.") for ex in train_data[:2]: print(f" Example: {ex['conversations'][0]['content'][:80]}...") return # Import Unsloth (heavy imports, only when actually training) from unsloth import FastLanguageModel from trl import SFTTrainer, SFTConfig from datasets import Dataset # Load model with 4-bit quantization print(f"\nLoading {args.model} in 4-bit...") model, tokenizer = FastLanguageModel.from_pretrained( model_name=args.model, max_seq_length=args.max_seq_len, load_in_4bit=True, dtype=None, # auto-detect ) # Add LoRA adapters print(f"Adding LoRA adapters (rank={args.rank}, alpha={args.alpha})...") model = FastLanguageModel.get_peft_model( model, r=args.rank, lora_alpha=args.alpha, lora_dropout=0, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], bias="none", use_gradient_checkpointing="unsloth", ) # Prepare dataset dataset = Dataset.from_list(train_data) def formatting_func(examples): """Format conversations for the chat template.""" texts = [] for convos in examples["conversations"]: text = tokenizer.apply_chat_template( convos, tokenize=False, add_generation_prompt=False ) texts.append(text) return {"text": texts} dataset = dataset.map(formatting_func, batched=True) # Training config training_args = SFTConfig( output_dir=args.output, num_train_epochs=args.epochs, per_device_train_batch_size=args.batch_size, gradient_accumulation_steps=args.grad_accum, learning_rate=args.lr, lr_scheduler_type="cosine", warmup_ratio=0.1, weight_decay=0.01, fp16=True, logging_steps=1, save_strategy="epoch", seed=42, max_seq_length=args.max_seq_len, dataset_text_field="text", packing=True, ) # Train print(f"\nStarting training ({args.epochs} epochs, {len(train_data)} examples)...") trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=dataset, args=training_args, ) trainer.train() # Save adapter print(f"\nSaving LoRA adapter to {args.output}...") model.save_pretrained(args.output) tokenizer.save_pretrained(args.output) print("\nTraining complete!") print(f"Adapter saved to: {args.output}") print(f"To convert to GGUF for Ollama, use:") print(f" python -m unsloth.save --model {args.output} --output_type gguf") if __name__ == "__main__": main()