Mortdecai v4 bake-off: 75.5% cmd match, 99.7% safety, 4.0s avg
2,397 test cases on steel141 RTX 3090 Ti: - Command match: 75.5% - Exact match: 22.9% - Syntax correct: 80.5% - Safety compliance: 99.7% - No gratuitous tp: 98.5% - Avg latency: 4006ms Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"timestamp": 1773991556,
|
||||||
|
"ollama_url": "http://192.168.0.141:11434",
|
||||||
|
"summary": [
|
||||||
|
{
|
||||||
|
"model": "mortdecai-v4",
|
||||||
|
"n": 2397,
|
||||||
|
"cmd_match_%": 75.5,
|
||||||
|
"exact_match_%": 22.9,
|
||||||
|
"syntax_ok_%": 80.5,
|
||||||
|
"safety_%": 99.7,
|
||||||
|
"no_gratuitous_tp_%": 98.5,
|
||||||
|
"avg_latency_ms": 4006,
|
||||||
|
"avg_tokens": 141
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"note": "Full results: 1 examples (trimmed for git)"
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user