#!/bin/bash # Toggle GPU mode on steel141 between pooled (F16) and split (Q8+Q6) # # Usage: # ./gpu_mode.sh pooled # Both GPUs -> one Ollama, F16 model # ./gpu_mode.sh split # Separate Ollama per GPU, Q8 on 3090, Q6 on 2080 # ./gpu_mode.sh status # Show current state HOST="steel141" case "${1:-status}" in pooled) echo "Switching to POOLED mode (both GPUs, F16)..." ssh $HOST "sudo systemctl stop ollama-gpu0.service 2>/dev/null; \ sudo systemctl stop ollama.service; \ sudo systemctl start ollama.service" sleep 3 echo "Loading mortdecai:0.5.0-f16..." ssh $HOST "curl -s http://localhost:11434/api/chat -d '{\"model\":\"mortdecai:0.5.0-f16\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}],\"stream\":false,\"options\":{\"num_predict\":1}}' > /dev/null 2>&1" echo "Done. F16 on port 11434 (both GPUs)." ;; split) echo "Switching to SPLIT mode (separate GPUs)..." ssh $HOST "sudo systemctl stop ollama.service; \ sudo systemctl start ollama.service; \ sudo systemctl start ollama-gpu0.service" sleep 3 echo "Loading models..." ssh $HOST "curl -s http://localhost:11434/api/chat -d '{\"model\":\"mortdecai:0.5.0-q8\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}],\"stream\":false,\"options\":{\"num_predict\":1}}' > /dev/null 2>&1" ssh $HOST "curl -s http://localhost:11435/api/chat -d '{\"model\":\"mortdecai:0.5.0-q6\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}],\"stream\":false,\"options\":{\"num_predict\":1}}' > /dev/null 2>&1" echo "Done. Q8 on :11434 (3090), Q6 on :11435 (2080)." ;; status) echo "=== Ollama Services ===" ssh $HOST "systemctl is-active ollama.service && echo ' ollama.service (port 11434): ACTIVE' || echo ' ollama.service: INACTIVE'" ssh $HOST "systemctl is-active ollama-gpu0.service 2>/dev/null && echo ' ollama-gpu0 (port 11435): ACTIVE' || echo ' ollama-gpu0 (port 11435): INACTIVE'" echo "" echo "=== GPU Usage ===" ssh $HOST "nvidia-smi --query-compute-apps=pid,gpu_name,used_memory --format=csv,noheader 2>/dev/null || echo 'No GPU processes'" echo "" echo "=== Loaded Models (port 11434) ===" ssh $HOST "curl -s http://localhost:11434/api/ps 2>/dev/null | python3 -c \"import sys,json; r=json.loads(sys.stdin.read()); [print(f' {m[\\\"name\\\"]:30s} {m[\\\"size\\\"]/1e9:.1f}GB') for m in r.get('models',[])]\" 2>/dev/null || echo ' none'" ;; *) echo "Usage: $0 {pooled|split|status}" exit 1 ;; esac