From 48b627d4985f4704d29060f7d4d84555c5612d20 Mon Sep 17 00:00:00 2001 From: Seth Freiberg Date: Wed, 18 Mar 2026 10:40:18 -0400 Subject: [PATCH] Add LoRA training scripts and fix bake-off token budget - training/scripts/train_lora.py: Unsloth QLoRA trainer for qwen3:8b - training/scripts/train_lora.sh: Launch script for steel141 RTX 3090 Ti - eval/bakeoff.py: Fixed token budget (400->1500) that caused qwen3 models to exhaust tokens on thinking, added --no-think flag - agent/serve.py: Default model changed to gemma3n:e4b Co-Authored-By: Claude Opus 4.6 (1M context) --- agent/serve.py | 2 +- eval/bakeoff.py | 19 +++- training/scripts/train_lora.py | 193 +++++++++++++++++++++++++++++++++ training/scripts/train_lora.sh | 31 ++++++ 4 files changed, 240 insertions(+), 5 deletions(-) create mode 100644 training/scripts/train_lora.py create mode 100755 training/scripts/train_lora.sh diff --git a/agent/serve.py b/agent/serve.py index 05f5c32..78819a7 100644 --- a/agent/serve.py +++ b/agent/serve.py @@ -37,7 +37,7 @@ from agent.prompts.system_prompts import get_prompt DEFAULT_CONFIG = { 'ollama_url': 'http://192.168.0.179:11434', - 'model': 'qwen3-coder:30b', + 'model': 'gemma3n:e4b', 'rcon_host': '127.0.0.1', 'rcon_port': 25577, 'rcon_password': 'REDACTED_RCON', diff --git a/eval/bakeoff.py b/eval/bakeoff.py index ea2d1aa..c6fffd8 100644 --- a/eval/bakeoff.py +++ b/eval/bakeoff.py @@ -31,7 +31,8 @@ RESULTS_DIR = ROOT / "eval" / "results" def ollama_chat(model: str, messages: list, ollama_url: str, - temperature: float = 0.2, max_tokens: int = 400) -> dict: + temperature: float = 0.2, max_tokens: int = 1500, + no_think: bool = False) -> dict: """Call Ollama and return response + timing.""" payload = { "model": model, @@ -43,6 +44,12 @@ def ollama_chat(model: str, messages: list, ollama_url: str, "num_predict": max_tokens, }, } + if no_think: + # Prepend /no_think to the last user message to disable thinking tokens + for msg in reversed(payload["messages"]): + if msg["role"] == "user": + msg["content"] = "/no_think\n" + msg["content"] + break start = time.time() r = requests.post(f"{ollama_url}/api/chat", json=payload, timeout=180) r.raise_for_status() @@ -157,7 +164,7 @@ def score_result(example: dict, actual_cmds: list, parsed: dict) -> dict: } -def run_bakeoff(models: list, ollama_url: str): +def run_bakeoff(models: list, ollama_url: str, no_think: bool = False): """Run all models against the dataset and compare.""" # Load dataset with open(DATASET) as f: @@ -166,6 +173,8 @@ def run_bakeoff(models: list, ollama_url: str): print(f"Bake-off: {len(examples)} examples × {len(models)} models") print(f"Ollama: {ollama_url}") print(f"Models: {', '.join(models)}") + if no_think: + print("Mode: /no_think (thinking tokens disabled)") print("=" * 70) all_results = {} @@ -208,7 +217,7 @@ def run_bakeoff(models: list, ollama_url: str): # Call LLM try: - resp = ollama_chat(model, messages, ollama_url) + resp = ollama_chat(model, messages, ollama_url, no_think=no_think) except Exception as e: print(f" [{i+1}/{len(examples)}] ERROR: {e}") results.append({"id": eid, "error": str(e)}) @@ -311,9 +320,11 @@ def main(): parser.add_argument("--ollama-url", default="http://192.168.0.179:11434") parser.add_argument("--models", nargs="+", default=["qwen3-coder:30b", "gemma3n:e4b"]) + parser.add_argument("--no-think", action="store_true", + help="Prepend /no_think to disable thinking tokens (helps Qwen models)") args = parser.parse_args() - run_bakeoff(args.models, args.ollama_url) + run_bakeoff(args.models, args.ollama_url, no_think=args.no_think) if __name__ == "__main__": diff --git a/training/scripts/train_lora.py b/training/scripts/train_lora.py new file mode 100644 index 0000000..c2d5a58 --- /dev/null +++ b/training/scripts/train_lora.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +""" +LoRA fine-tuning script for Minecraft AI ops assistant. + +Base model: Qwen/Qwen3-8B (dense, Apache 2.0) +Method: QLoRA (4-bit base + LoRA adapters in FP16) +Framework: Unsloth + HuggingFace TRL +Target GPU: RTX 3090 Ti (24GB VRAM) + +Usage: + python train_lora.py + python train_lora.py --epochs 5 --lr 2e-4 --rank 32 +""" + +import argparse +import json +import os +from pathlib import Path + + +def load_dataset(path: str) -> list: + """Load seed dataset and format for SFT training.""" + examples = [] + with open(path) as f: + for line in f: + if not line.strip(): + continue + ex = json.loads(line) + + # Build the training conversation + inp = ex["input"] + out = ex["output"] + query = inp["user_message"] + ctx = inp.get("server_context", {}) + + # User message with context + user_parts = [f"Request from slingshooter08: {query}"] + user_parts.append(f"\nContext:\nServer: {ctx.get('server_type', 'paper')} {ctx.get('version', '1.21.x')}") + if ctx.get("online_players"): + user_parts.append(f"Online: {', '.join(ctx['online_players'])}") + pos = ctx.get("player_position") + if pos: + user_parts.append(f"Player position: ({pos['x']}, {pos['y']}, {pos['z']})") + + user_msg = "\n".join(user_parts) + + # Assistant response as JSON + response = { + "reasoning": out.get("reasoning", ""), + "commands": out.get("commands", []), + "message": out.get("message"), + } + + examples.append({ + "conversations": [ + {"role": "user", "content": user_msg}, + {"role": "assistant", "content": json.dumps(response)}, + ] + }) + + return examples + + +def main(): + parser = argparse.ArgumentParser(description="LoRA fine-tuning for Minecraft AI") + parser.add_argument("--model", default="Qwen/Qwen3-8B", help="Base model from HuggingFace") + parser.add_argument("--dataset", default="", help="Dataset path (default: auto-detect)") + parser.add_argument("--output", default="", help="Output directory for adapter") + parser.add_argument("--rank", type=int, default=16, help="LoRA rank") + parser.add_argument("--alpha", type=int, default=32, help="LoRA alpha") + parser.add_argument("--lr", type=float, default=2e-4, help="Learning rate") + parser.add_argument("--epochs", type=int, default=3, help="Training epochs") + parser.add_argument("--batch-size", type=int, default=2, help="Per-device batch size") + parser.add_argument("--grad-accum", type=int, default=4, help="Gradient accumulation steps") + parser.add_argument("--max-seq-len", type=int, default=2048, help="Max sequence length") + parser.add_argument("--dry-run", action="store_true", help="Load model and dataset but don't train") + args = parser.parse_args() + + # Auto-detect paths + script_dir = Path(__file__).resolve().parent + project_root = script_dir.parent.parent + + if not args.dataset: + args.dataset = str(project_root / "data" / "processed" / "seed_dataset.jsonl") + if not args.output: + args.output = str(project_root / "training" / "checkpoints" / "qwen3-8b-mc-lora") + + print(f"Base model: {args.model}") + print(f"Dataset: {args.dataset}") + print(f"Output: {args.output}") + print(f"LoRA rank: {args.rank}, alpha: {args.alpha}") + print(f"LR: {args.lr}") + print(f"Epochs: {args.epochs}") + print(f"Batch: {args.batch_size} x {args.grad_accum} grad accum") + print(f"Max seq len: {args.max_seq_len}") + print() + + # Load dataset + print("Loading dataset...") + train_data = load_dataset(args.dataset) + print(f" {len(train_data)} training examples loaded") + + if args.dry_run: + print("\n[DRY RUN] Would load model and train. Exiting.") + for ex in train_data[:2]: + print(f" Example: {ex['conversations'][0]['content'][:80]}...") + return + + # Import Unsloth (heavy imports, only when actually training) + from unsloth import FastLanguageModel + from trl import SFTTrainer, SFTConfig + from datasets import Dataset + + # Load model with 4-bit quantization + print(f"\nLoading {args.model} in 4-bit...") + model, tokenizer = FastLanguageModel.from_pretrained( + model_name=args.model, + max_seq_length=args.max_seq_len, + load_in_4bit=True, + dtype=None, # auto-detect + ) + + # Add LoRA adapters + print(f"Adding LoRA adapters (rank={args.rank}, alpha={args.alpha})...") + model = FastLanguageModel.get_peft_model( + model, + r=args.rank, + lora_alpha=args.alpha, + lora_dropout=0, + target_modules=["q_proj", "k_proj", "v_proj", "o_proj", + "gate_proj", "up_proj", "down_proj"], + bias="none", + use_gradient_checkpointing="unsloth", + ) + + # Prepare dataset + dataset = Dataset.from_list(train_data) + + def formatting_func(examples): + """Format conversations for the chat template.""" + texts = [] + for convos in examples["conversations"]: + text = tokenizer.apply_chat_template( + convos, tokenize=False, add_generation_prompt=False + ) + texts.append(text) + return {"text": texts} + + dataset = dataset.map(formatting_func, batched=True) + + # Training config + training_args = SFTConfig( + output_dir=args.output, + num_train_epochs=args.epochs, + per_device_train_batch_size=args.batch_size, + gradient_accumulation_steps=args.grad_accum, + learning_rate=args.lr, + lr_scheduler_type="cosine", + warmup_ratio=0.1, + weight_decay=0.01, + fp16=True, + logging_steps=1, + save_strategy="epoch", + seed=42, + max_seq_length=args.max_seq_len, + dataset_text_field="text", + packing=True, + ) + + # Train + print(f"\nStarting training ({args.epochs} epochs, {len(train_data)} examples)...") + trainer = SFTTrainer( + model=model, + tokenizer=tokenizer, + train_dataset=dataset, + args=training_args, + ) + + trainer.train() + + # Save adapter + print(f"\nSaving LoRA adapter to {args.output}...") + model.save_pretrained(args.output) + tokenizer.save_pretrained(args.output) + + print("\nTraining complete!") + print(f"Adapter saved to: {args.output}") + print(f"To convert to GGUF for Ollama, use:") + print(f" python -m unsloth.save --model {args.output} --output_type gguf") + + +if __name__ == "__main__": + main() diff --git a/training/scripts/train_lora.sh b/training/scripts/train_lora.sh new file mode 100755 index 0000000..dadc84d --- /dev/null +++ b/training/scripts/train_lora.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# LoRA fine-tuning launcher for qwen3:8b on steel141 +# +# Prerequisites (already installed): +# conda activate mc-train (or use full path below) +# Python 3.11, PyTorch 2.10+cu124, Unsloth 2026.3.5 +# +# Usage: +# ssh steel141 +# cd ~/mc-ai-training +# bash train_lora.sh +# +# Or from CT 629: +# ssh steel141 "cd ~/mc-ai-training && bash train_lora.sh" + +set -euo pipefail + +PYTHON=~/miniconda3/envs/mc-train/bin/python +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +# Use RTX 3090 Ti (GPU 0 in PyTorch ordering) +export CUDA_VISIBLE_DEVICES=0 + +echo "=== Minecraft AI LoRA Training ===" +echo "Python: $($PYTHON --version)" +echo "GPU: RTX 3090 Ti (24GB)" +echo "Base model: Qwen/Qwen3-8B" +echo "Dataset: seed_dataset.jsonl" +echo "" + +exec $PYTHON "$SCRIPT_DIR/train_lora.py" "$@"