5b28002001
Major changes from this session: Training: - 0.6.0 training running: 9B on steel141 3090 Ti, 27B on rented H100 NVL - 7,256 merged training examples (up from 3,183) - New training data: failure modes (85), midloop messaging (27), prompt injection defense (29), personality (32), gold from quarantine bank (232), new tool examples (30), claude's own experience (10) - All training data RCON-validated at 100% pass rate - Bake-off: gemma3:27b 66%, qwen3.5:27b 61%, translategemma:27b 56% Oracle Bot (Mind's Eye): - Invisible spectator bot (mineflayer) streams world state via WebSocket - HTML5 Canvas frontend at mind.mortdec.ai - Real-time tool trace visualization with expandable entries - Streaming model tokens during inference - Gateway integration: fire-and-forget POST /trace on every tool call Reinforcement Learning: - Gymnasium environment wrapping mineflayer bot (minecraft_env.py) - PPO training via Stable Baselines3 (10K param policy network) - Behavioral cloning pretraining (97.5% accuracy on expert policy) - Infinite training loop with auto-restart and checkpoint resume - Bot learns combat, survival, navigation from raw experience Bot Army: - 8-soldier marching formation with autonomous combat - Combat bots using mineflayer-pvp, pathfinder, armor-manager - Multilingual prayer bots via translategemma:27b (18 languages) - Frame-based AI architecture: LLM planner + reactive micro-scripts Infrastructure: - Fixed mattpc.sethpc.xyz billing gateway (API key + player list parser) - Billing gateway now tracks all LAN traffic (LAN auto-auth) - Gateway fallback for empty god-mode responses - Updated mortdec.ai landing page Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
212 lines
9.9 KiB
Bash
Executable File
212 lines
9.9 KiB
Bash
Executable File
#!/bin/bash
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Mortdecai Batch Training Pipeline
|
|
#
|
|
# Trains both 9B and 14B models sequentially on a rented GPU, then exports
|
|
# GGUFs at all required quant levels for the fleet.
|
|
#
|
|
# Usage (on rented H100):
|
|
# # 1. Upload this repo + dataset
|
|
# rsync -avz --exclude='.git' . gpu-host:~/mortdecai/
|
|
#
|
|
# # 2. SSH in and run
|
|
# cd ~/mortdecai
|
|
# bash training/scripts/batch_train.sh
|
|
#
|
|
# # 3. Monitor from another machine (pick one):
|
|
# ssh gpu-host "tail -f ~/mortdecai/training_progress.jsonl"
|
|
# # OR set DISCORD_WEBHOOK for push notifications:
|
|
# export DISCORD_WEBHOOK="https://discord.com/api/webhooks/..."
|
|
# bash training/scripts/batch_train.sh
|
|
#
|
|
# # 4. Download checkpoints when done
|
|
# rsync -avz gpu-host:~/mortdecai/training/checkpoints/mortdecai-0.6.0-* ./training/checkpoints/
|
|
#
|
|
# Prerequisites on the rented machine:
|
|
# pip install unsloth torch transformers datasets peft trl
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
set -euo pipefail
|
|
|
|
VERSION="0.6.0"
|
|
DATASET="data/processed/merged_training_v06.jsonl"
|
|
CHECKPOINT_DIR="training/checkpoints"
|
|
PROGRESS_LOG="training_progress.jsonl"
|
|
|
|
# Discord bot token + channel for progress notifications
|
|
DISCORD_TOKEN="${DISCORD_TOKEN:-REDACTED_DISCORD_TOKEN_2}"
|
|
DISCORD_CHANNEL="${DISCORD_CHANNEL:-1485160229573361664}"
|
|
|
|
# ── Progress reporting ────────────────────────────────────────────────────────
|
|
|
|
notify() {
|
|
local stage="$1"
|
|
local message="$2"
|
|
local ts=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
|
|
# Log to file
|
|
echo "{\"ts\":\"$ts\",\"stage\":\"$stage\",\"message\":\"$message\"}" >> "$PROGRESS_LOG"
|
|
|
|
# Print locally
|
|
echo " [$ts] $stage: $message"
|
|
|
|
# Discord bot API
|
|
if [ -n "$DISCORD_TOKEN" ]; then
|
|
curl -s -X POST "https://discord.com/api/v10/channels/${DISCORD_CHANNEL}/messages" \
|
|
-H "Authorization: Bot ${DISCORD_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"content\":\"**Mortdecai Training** [${stage}] ${message}\"}" \
|
|
> /dev/null 2>&1 || true
|
|
fi
|
|
}
|
|
|
|
# Models to train
|
|
MODELS=(
|
|
"Qwen/Qwen3.5-9B"
|
|
"Qwen/Qwen3.5-14B"
|
|
)
|
|
|
|
# Quant levels per model (mapped to target GPUs)
|
|
# 9B: Q4=RTX4000(8GB), Q6=2080Ti(11GB), Q8=3090Ti(24GB)
|
|
# 14B: Q3=RTX4000(8GB), Q4=2080Ti(11GB), Q6=3090Ti(24GB), F16=StrixHalo(64GB)
|
|
declare -A QUANTS
|
|
QUANTS["Qwen/Qwen3.5-9B"]="Q3_K_M Q4_K_M Q6_K Q8_0"
|
|
QUANTS["Qwen/Qwen3.5-14B"]="Q3_K_M Q4_K_M Q6_K Q8_0"
|
|
|
|
# ── Preflight ─────────────────────────────────────────────────────────────────
|
|
|
|
if [ ! -f "$DATASET" ]; then
|
|
echo "ERROR: Dataset not found at $DATASET"
|
|
echo "Run: python3 training/scripts/merge_datasets.py"
|
|
exit 1
|
|
fi
|
|
|
|
EXAMPLE_COUNT=$(wc -l < "$DATASET")
|
|
echo "╔══════════════════════════════════════════════════════════╗"
|
|
echo "║ Mortdecai Batch Training Pipeline v${VERSION} ║"
|
|
echo "╠══════════════════════════════════════════════════════════╣"
|
|
echo "║ Dataset: ${EXAMPLE_COUNT} examples ║"
|
|
echo "║ Models: ${#MODELS[@]} ($(printf '%s ' "${MODELS[@]}" | sed 's|Qwen/||g'))║"
|
|
echo "╚══════════════════════════════════════════════════════════╝"
|
|
echo ""
|
|
|
|
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || echo "WARNING: No GPU detected"
|
|
echo ""
|
|
|
|
# ── Conda/venv setup ──────────────────────────────────────────────────────────
|
|
|
|
if command -v conda &>/dev/null; then
|
|
source "$(conda info --base)/etc/profile.d/conda.sh"
|
|
conda activate mc-train 2>/dev/null || echo "No mc-train env, using current"
|
|
fi
|
|
|
|
mkdir -p "$CHECKPOINT_DIR"
|
|
|
|
# ── Training loop ─────────────────────────────────────────────────────────────
|
|
|
|
for BASE_MODEL in "${MODELS[@]}"; do
|
|
MODEL_SHORT=$(echo "$BASE_MODEL" | sed 's|Qwen/||; s|\.|-|g' | tr '[:upper:]' '[:lower:]')
|
|
CKPT_NAME="mortdecai-${VERSION}-${MODEL_SHORT}"
|
|
CKPT_PATH="${CHECKPOINT_DIR}/${CKPT_NAME}"
|
|
MERGED_PATH="${CKPT_PATH}-merged"
|
|
GGUF_DIR="${CKPT_PATH}-gguf"
|
|
|
|
echo ""
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo " Training: ${BASE_MODEL} → ${CKPT_NAME}"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
TRAIN_START=$(date +%s)
|
|
|
|
# ── Step 1: LoRA fine-tune ──
|
|
if [ -d "$CKPT_PATH" ] && [ -f "$CKPT_PATH/adapter_model.safetensors" ]; then
|
|
notify "SKIP" "${CKPT_NAME} LoRA checkpoint exists"
|
|
else
|
|
notify "TRAIN" "Starting ${BASE_MODEL} LoRA fine-tune (${EXAMPLE_COUNT} examples)"
|
|
python3 training/scripts/train_lora.py \
|
|
--model "$BASE_MODEL" \
|
|
--dataset "$DATASET" \
|
|
--output "$CKPT_PATH" \
|
|
--epochs 3 \
|
|
--batch-size 4 \
|
|
--lr 2e-4 \
|
|
--rank 64 \
|
|
--alpha 128 \
|
|
--save-steps 100 \
|
|
2>&1 | tee "${CKPT_PATH}.train.log"
|
|
notify "TRAIN" "${CKPT_NAME} LoRA training complete"
|
|
fi
|
|
|
|
# ── Step 2: Merge LoRA into base ──
|
|
if [ -d "$MERGED_PATH" ] && [ -f "$MERGED_PATH/model.safetensors.index.json" ]; then
|
|
notify "SKIP" "${CKPT_NAME} merged weights exist"
|
|
else
|
|
notify "MERGE" "Merging ${CKPT_NAME} LoRA into base model..."
|
|
python3 -c "
|
|
from unsloth import FastLanguageModel
|
|
model, tokenizer = FastLanguageModel.from_pretrained('${CKPT_PATH}')
|
|
model.save_pretrained_merged('${MERGED_PATH}', tokenizer, save_method='merged_16bit')
|
|
print('Merge complete: ${MERGED_PATH}')
|
|
"
|
|
fi
|
|
|
|
# ── Step 3: Convert to GGUF (F16) ──
|
|
mkdir -p "$GGUF_DIR"
|
|
F16_GGUF="${GGUF_DIR}/${MODEL_SHORT}.F16.gguf"
|
|
|
|
if [ -f "$F16_GGUF" ]; then
|
|
notify "SKIP" "${CKPT_NAME} F16 GGUF exists"
|
|
else
|
|
notify "GGUF" "Converting ${CKPT_NAME} to F16 GGUF..."
|
|
LLAMA_CONVERT=$(find / -name "convert_hf_to_gguf.py" 2>/dev/null | head -1)
|
|
if [ -z "$LLAMA_CONVERT" ]; then
|
|
echo " WARNING: convert_hf_to_gguf.py not found, skipping GGUF export"
|
|
echo " Run manually: python3 convert_hf_to_gguf.py $MERGED_PATH --outfile $F16_GGUF --outtype f16"
|
|
continue
|
|
fi
|
|
python3 "$LLAMA_CONVERT" "$MERGED_PATH" --outfile "$F16_GGUF" --outtype f16
|
|
fi
|
|
|
|
# ── Step 4: Quantize ──
|
|
LLAMA_QUANTIZE=$(find / -name "llama-quantize" -o -name "quantize" 2>/dev/null | head -1)
|
|
if [ -z "$LLAMA_QUANTIZE" ]; then
|
|
echo " WARNING: llama-quantize not found, skipping quantization"
|
|
echo " Run manually on steel141 after downloading F16 GGUF"
|
|
else
|
|
echo " [4/4] Quantizing..."
|
|
for QUANT in ${QUANTS[$BASE_MODEL]}; do
|
|
QFILE="${GGUF_DIR}/${MODEL_SHORT}.${QUANT}.gguf"
|
|
if [ -f "$QFILE" ]; then
|
|
echo " [SKIP] $QUANT exists"
|
|
else
|
|
echo " Quantizing $QUANT..."
|
|
"$LLAMA_QUANTIZE" "$F16_GGUF" "$QFILE" "$QUANT"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
TRAIN_END=$(date +%s)
|
|
ELAPSED=$(( (TRAIN_END - TRAIN_START) / 60 ))
|
|
notify "DONE" "${CKPT_NAME} complete in ${ELAPSED}m"
|
|
echo ""
|
|
echo " ✓ ${CKPT_NAME} complete in ${ELAPSED}m"
|
|
echo " LoRA: $CKPT_PATH"
|
|
echo " Merged: $MERGED_PATH"
|
|
echo " GGUFs: $GGUF_DIR/"
|
|
ls -lh "$GGUF_DIR/"*.gguf 2>/dev/null | awk '{print " " $NF " (" $5 ")"}'
|
|
done
|
|
|
|
# ── Summary ───────────────────────────────────────────────────────────────────
|
|
|
|
echo ""
|
|
echo "╔══════════════════════════════════════════════════════════╗"
|
|
echo "║ All training complete! ║"
|
|
echo "╠══════════════════════════════════════════════════════════╣"
|
|
echo "║ Next steps: ║"
|
|
echo "║ 1. Download checkpoints to steel141 ║"
|
|
echo "║ 2. Register in Ollama: ║"
|
|
echo "║ ollama create mortdecai:0.6.0-9b -f Modelfile.9b ║"
|
|
echo "║ ollama create mortdecai:0.6.0-14b -f Modelfile.14b ║"
|
|
echo "║ 3. Run bake-off against 0.5.0 ║"
|
|
echo "║ 4. Deploy winner to prod ║"
|
|
echo "╚══════════════════════════════════════════════════════════╝"
|