Phase 2: eval harness, 182 examples, live bake-off, playtest infrastructure
- Expanded dataset from 31 to 182 examples (45 manual + 106 extracted from server logs) - Built eval/harness.py with per-category breakdowns and baseline tracking - Built eval/live_bakeoff.py for RCON-verified model comparison on live server - Extracted training data from prayer logs, sudo logs, and bug reports on CT 644 - Added Reddit post draft and modmail for playtester recruitment - Updated server context: all servers now online-mode=false + whitelist - Updated PLAN.md with Phase 2 progress Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -10,9 +10,10 @@
|
||||
"difficulty": "easy",
|
||||
"gamemode": "survival",
|
||||
"max_players": 20,
|
||||
"online_mode": true,
|
||||
"online_mode": false,
|
||||
"whitelist": true,
|
||||
"pvp": true,
|
||||
"notes": "Main vanilla survival server. MCSManager-managed."
|
||||
"notes": "Main vanilla survival server. MCSManager-managed. autoStart + autoRestart enabled."
|
||||
},
|
||||
{
|
||||
"name": "shrink-world",
|
||||
@@ -24,7 +25,8 @@
|
||||
"difficulty": "hard",
|
||||
"gamemode": "survival",
|
||||
"max_players": 20,
|
||||
"online_mode": true,
|
||||
"online_mode": false,
|
||||
"whitelist": true,
|
||||
"pvp": true,
|
||||
"datapacks": ["shrinkborder (world border shrinks on death)", "morespawns (5x creeper spawns)"],
|
||||
"notes": "Hardcore-style challenge server. World border starts at 500x500."
|
||||
@@ -39,10 +41,12 @@
|
||||
"difficulty": "hard",
|
||||
"gamemode": "survival",
|
||||
"max_players": 20,
|
||||
"online_mode": true,
|
||||
"online_mode": false,
|
||||
"whitelist": true,
|
||||
"pvp": true,
|
||||
"plugins": ["FastAsyncWorldEdit", "LuckPerms"],
|
||||
"ai_services": ["mc-aigod-paper.service (God/sudo AI)", "mc-langgraph-gateway.service (session gateway)"],
|
||||
"training_role": "Primary training/eval server for playtesters. Has full AI God + sudo + LangGraph gateway.",
|
||||
"notes": "Paper fork with AI God, sudo translator, and world observation tools."
|
||||
},
|
||||
{
|
||||
@@ -56,8 +60,10 @@
|
||||
"gamemode": "creative",
|
||||
"max_players": 50,
|
||||
"online_mode": false,
|
||||
"whitelist": false,
|
||||
"pvp": false,
|
||||
"world_type": "flat",
|
||||
"training_role": "Secondary training server for bot-driven data collection and destructive testing. No auth required.",
|
||||
"notes": "Offline dev server for AI training bots. Superflat, no mobs."
|
||||
}
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user