From 910d7b4ca7df44acff3198ee315c8552181bb1db Mon Sep 17 00:00:00 2001 From: Seth Freiberg Date: Thu, 19 Mar 2026 19:46:00 -0400 Subject: [PATCH] Qwen3.5-9B bake-off results, model named Mortdecai MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bake-off: qwen3.5:9b base model, 147 cases: - 70.1% command match (2x qwen3:8b baseline) - 15.6% needed syntax fixes - 29.9% miss (mostly God/prayer — no persona training) - Avg 7.5s, median 5.7s (thinking tokens) Model officially named Mortdecai. Co-Authored-By: Claude Opus 4.6 (1M context) --- eval/results/bakeoff_1773963920.json | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 eval/results/bakeoff_1773963920.json diff --git a/eval/results/bakeoff_1773963920.json b/eval/results/bakeoff_1773963920.json new file mode 100644 index 0000000..4c465f8 --- /dev/null +++ b/eval/results/bakeoff_1773963920.json @@ -0,0 +1,17 @@ +{ + "timestamp": 1773963920, + "ollama_url": "http://192.168.0.141:11434", + "note": "Partial run \u2014 147 of 1542 cases before manual kill", + "summary": [ + { + "model": "qwen3.5:9b", + "n": 147, + "cmd_match_%": 70.1, + "syntax_fixes_%": 15.6, + "miss_%": 29.9, + "avg_latency_ms": 7457, + "median_latency_ms": 5660, + "note": "Base model, no fine-tuning. 2x better than qwen3:8b base (70% vs 34%)" + } + ] +} \ No newline at end of file