docs: Mortdecai 0.6.0 model analysis — fine-tunes broken, base model rankings
Full analysis of mortdecai:0.6.0-9b and mortdecai:latest (27B) fine-tunes vs 6 base model candidates. Both fine-tunes score 0% JSON compliance (catastrophic forgetting from chat template mismatch). Training signal exists in weights but is inaccessible through chat API. Base model rankings: phi4:14b (100%, 7.4s) > gemma3:12b (100%, 12.9s) > gemma3:27b (100%, 25.3s). Qwen3.5 not recommended for conductor role. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Deep diagnostic probes to understand training failure modes."""
|
||||
import json, requests, time
|
||||
|
||||
OLLAMA_URL = "http://192.168.0.141:11437"
|
||||
|
||||
def query(model, messages, temp=0.1):
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"options": {"temperature": temp, "num_predict": 256}
|
||||
}
|
||||
try:
|
||||
r = requests.post(f"{OLLAMA_URL}/api/chat", json=payload, timeout=120)
|
||||
data = r.json()
|
||||
return data.get("message", {}).get("content", "NO CONTENT")
|
||||
except Exception as e:
|
||||
return f"ERROR: {e}"
|
||||
|
||||
# Probe 1: Does it remember ANY training signal?
|
||||
# Use exact phrases from training data
|
||||
print("=" * 80)
|
||||
print("PROBE 1: Training signal detection (exact training phrases)")
|
||||
print("=" * 80)
|
||||
|
||||
for model in ["mortdecai:0.6.0-9b", "mortdecai:latest"]:
|
||||
print(f"\n--- {model} ---")
|
||||
|
||||
# Try the exact system prompt format from training
|
||||
r = query(model, [
|
||||
{"role": "system", "content": "/no_think\nYou are a Minecraft 1.21 command translator for a server admin.\nReturn ONLY JSON: {\"commands\": [\"cmd1\", \"cmd2\"], \"reasoning\": \"why\"}\nNo prose, no markdown, no labels, no leading slash on commands."},
|
||||
{"role": "user", "content": "give me a diamond sword"}
|
||||
])
|
||||
print(f" Exact training format: {r[:200]}")
|
||||
|
||||
# Probe 2: Does /no_think suppress thinking?
|
||||
print("\n" + "=" * 80)
|
||||
print("PROBE 2: /no_think effect")
|
||||
print("=" * 80)
|
||||
|
||||
for model in ["mortdecai:0.6.0-9b", "mortdecai:latest", "qwen3.5:latest", "qwen3.5:27b"]:
|
||||
print(f"\n--- {model} ---")
|
||||
r = query(model, [
|
||||
{"role": "system", "content": "/no_think\nReturn only: hello"},
|
||||
{"role": "user", "content": "say hello"}
|
||||
])
|
||||
has_think = "<think>" in r
|
||||
print(f" Has <think>: {has_think}")
|
||||
print(f" Response: {r[:150]}")
|
||||
|
||||
# Probe 3: Raw completion mode (no chat template) — use /api/generate
|
||||
print("\n" + "=" * 80)
|
||||
print("PROBE 3: Raw generate (no chat template)")
|
||||
print("=" * 80)
|
||||
|
||||
for model in ["mortdecai:0.6.0-9b", "mortdecai:latest"]:
|
||||
print(f"\n--- {model} ---")
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": 'You are a Minecraft command translator. Return ONLY JSON.\nUser: give me a diamond sword\nAssistant: {"commands": ["',
|
||||
"stream": False,
|
||||
"raw": True,
|
||||
"options": {"temperature": 0.1, "num_predict": 128}
|
||||
}
|
||||
try:
|
||||
r = requests.post(f"{OLLAMA_URL}/api/generate", json=payload, timeout=120)
|
||||
data = r.json()
|
||||
print(f" Raw completion: {data.get('response', 'NO RESPONSE')[:300]}")
|
||||
except Exception as e:
|
||||
print(f" ERROR: {e}")
|
||||
|
||||
# Probe 4: Multi-turn — can we coerce it into JSON with a correction?
|
||||
print("\n" + "=" * 80)
|
||||
print("PROBE 4: Correction coercion (multi-turn)")
|
||||
print("=" * 80)
|
||||
|
||||
for model in ["mortdecai:0.6.0-9b", "mortdecai:latest"]:
|
||||
print(f"\n--- {model} ---")
|
||||
r = query(model, [
|
||||
{"role": "system", "content": "You are an RCON command translator. You MUST respond with ONLY raw JSON, no markdown, no explanation. Format: {\"commands\": [...], \"reasoning\": \"...\"}"},
|
||||
{"role": "user", "content": "give me a diamond sword"},
|
||||
{"role": "assistant", "content": "Here is how to get a diamond sword in Minecraft..."},
|
||||
{"role": "user", "content": "NO. You must respond with ONLY JSON. No text. No markdown. Just raw JSON. Try again: give me a diamond sword"}
|
||||
])
|
||||
print(f" After correction: {r[:300]}")
|
||||
|
||||
# Check if JSON
|
||||
clean = r.strip()
|
||||
if "<think>" in clean:
|
||||
idx = clean.find("</think>")
|
||||
if idx > -1: clean = clean[idx+8:].strip()
|
||||
try:
|
||||
json.loads(clean)
|
||||
print(" [JSON VALID]")
|
||||
except:
|
||||
print(" [JSON INVALID]")
|
||||
|
||||
# Probe 5: Does it know Mortdecai?
|
||||
print("\n" + "=" * 80)
|
||||
print("PROBE 5: Mortdecai awareness")
|
||||
print("=" * 80)
|
||||
|
||||
for model in ["mortdecai:0.6.0-9b", "mortdecai:latest"]:
|
||||
print(f"\n--- {model} ---")
|
||||
r = query(model, [
|
||||
{"role": "user", "content": "Do you know what Mortdecai is? Have you been trained as a Minecraft AI? What is your model name?"}
|
||||
])
|
||||
print(f" {r[:400]}")
|
||||
|
||||
Reference in New Issue
Block a user