Add Anthropic API provider, short God messages, budget tracking

- llm_provider config: "anthropic" or "ollama" (default)
- Anthropic call with cost tracking and budget cap
- Auto-fallback to Ollama when budget exhausted
- God message prompt: "1-2 sentences max, Old Testament telegram"

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Code
2026-03-18 19:19:11 -04:00
parent 545deb8674
commit e4c7dba580
+73 -2
View File
@@ -1967,7 +1967,9 @@ def build_message_system_prompt(config) -> str:
"You are God in a Minecraft server. Write a single spoken message to all players.\n"
"You will be told what action was taken (if any) in response to a player's prayer.\n"
"Respond with ONLY the message text — no JSON, no quotes, no formatting.\n"
"Match the language the player prayed in. If they prayed in Spanish, respond in Spanish.\n\n"
"Match the language the player prayed in. If they prayed in Spanish, respond in Spanish.\n"
"KEEP IT SHORT — 1-2 sentences max. This appears in Minecraft chat which has limited space.\n"
"Be punchy and dramatic, not verbose. Think Old Testament telegram.\n\n"
)
if _GOD_SOUL:
base += "Your identity and voice are defined by your soul:\n" + _GOD_SOUL + "\n\n"
@@ -1986,7 +1988,13 @@ def build_message_system_prompt(config) -> str:
def _llm_call(model: str, system: str, user: str, config: dict,
fmt = None, temperature: float = 0.85,
max_tokens: int = 400, timeout: int = 60) -> str:
"""Single Ollama chat call. Returns raw content string."""
"""LLM call — routes to Anthropic API or Ollama based on config."""
provider = config.get("llm_provider", "ollama")
if provider == "anthropic":
return _anthropic_call(model, system, user, config, temperature, max_tokens, timeout)
# Default: Ollama
payload = {
"model": model,
"messages": [
@@ -2006,6 +2014,69 @@ def _llm_call(model: str, system: str, user: str, config: dict,
return r.json()["message"]["content"]
# --- Anthropic API cost tracking ---
_anthropic_cost_lock = threading.Lock()
_anthropic_total_cost = 0.0
def _get_anthropic_cost():
with _anthropic_cost_lock:
return _anthropic_total_cost
def _anthropic_call(model: str, system: str, user: str, config: dict,
temperature: float = 0.85, max_tokens: int = 400,
timeout: int = 60) -> str:
"""Call Anthropic Claude API. Tracks cost and enforces budget."""
global _anthropic_total_cost
api_key = config.get("anthropic_api_key", "")
budget = config.get("anthropic_budget", 5.00)
with _anthropic_cost_lock:
if _anthropic_total_cost >= budget:
log.warning(f"Anthropic budget exhausted (${_anthropic_total_cost:.4f} >= ${budget:.2f}). Falling back to Ollama.")
# Fall back to Ollama
payload = {
"model": config.get("fallback_model", config.get("model", "gemma3n:e4b")),
"messages": [{"role": "system", "content": system}, {"role": "user", "content": user}],
"stream": False,
"options": {"temperature": temperature, "num_predict": max_tokens},
}
r = requests.post(f"{config['ollama_url']}/api/chat", json=payload, timeout=timeout)
r.raise_for_status()
return r.json()["message"]["content"]
headers = {
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
}
body = {
"model": model,
"max_tokens": max_tokens,
"system": system,
"messages": [{"role": "user", "content": user}],
"temperature": temperature,
}
r = requests.post("https://api.anthropic.com/v1/messages", headers=headers, json=body, timeout=timeout)
r.raise_for_status()
data = r.json()
text = data["content"][0]["text"]
input_tokens = data["usage"]["input_tokens"]
output_tokens = data["usage"]["output_tokens"]
# Track cost (Haiku pricing)
cost = (input_tokens / 1_000_000) * 0.80 + (output_tokens / 1_000_000) * 4.00
with _anthropic_cost_lock:
_anthropic_total_cost += cost
if int(_anthropic_total_cost * 100) % 50 == 0 or _anthropic_total_cost >= budget * 0.9:
log.info(f"Anthropic cost: ${_anthropic_total_cost:.4f} / ${budget:.2f}")
return text
def _gateway_enabled(config) -> bool:
return bool(config.get("use_langgraph_gateway", False))