Gemini-powered prayer bots, POS cost printer, first LoRA training run

Prayer bots (ingame/prayer_bots.js): - 3 Mineflayer bots that actively pray, sudo, and bug_log on dev server - Gemini 2.5 Flash Lite generates diverse natural prompts on the fly - Falls back to static pool if Gemini unavailable - 15-45s interval per bot, 50/35/10/5 pray/sudo/bug/chat split POS status printer (scripts/training_status_printer.py): - Prints training data collection status to Epson TM-m30 - Tracks: dataset size, audit logs, bot activity, Gemini API cost, service status - Triggers on $0.50 cost threshold (configurable), checks every 15 min - --dry-run, --check, --force flags Training: - First LoRA run completed (233 examples, 3 epochs, loss 1.5→0.10) - GGUF exported and loaded into Ollama as qwen3-8b-mc-lora on steel141 - Model is bad (expected) — hallucinating Chinese, leaking system prompt - Deployed to dev server for live testing and data collection - bf16 fix for Ampere GPU, system prompts included in training conversations Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 17:36:08 -04:00
parent 142e4fd3c4
commit 029bd28a58
2 changed files with 580 additions and 0 deletions
@@ -0,0 +1,273 @@
+/**
+ * prayer_bots.js -- Mineflayer bots that actively pray, sudo, and bug_log.
+ *
+ * Uses Gemini Flash Lite to generate diverse, natural prompts on the fly.
+ * Falls back to static pools if Gemini is unavailable.
+ *
+ * Usage: node prayer_bots.js [count] [host] [port]
+ * Defaults: 3 bots, 192.168.0.244:25568
+ */
+
+const mineflayer = require('mineflayer');
+const https = require('https');
+
+const count = parseInt(process.argv[2] || '3', 10);
+const host = process.argv[3] || '192.168.0.244';
+const port = parseInt(process.argv[4] || '25568', 10);
+
+const GEMINI_KEY = 'REDACTED_GEMINI_KEY_2';
+const GEMINI_MODEL = 'gemini-2.5-flash-lite';
+const GEMINI_URL = `https://generativelanguage.googleapis.com/v1beta/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_KEY}`;
+
+// --- Gemini prompt generation ---
+
+const PRAYER_GEN_PROMPT = `You are generating test prompts for a Minecraft server AI. The server has two chat commands:
+- "pray <message>" — talk to an AI God character who grants/denies requests
+- "sudo <command>" — ask for server commands in natural language
+
+Generate 5 diverse prompts that a Minecraft player might type. Mix these types:
+- Humble prayers asking for items, effects, or help
+- Greedy/demanding prayers
+- Creative roleplay prayers
+- Offensive/blasphemous prayers (mild, for testing punishment responses)
+- Sudo commands for items, effects, world changes, building
+- Sudo edge cases (typos, vague requests, impossible things)
+- Ambiguous or weird messages
+
+Return ONLY a JSON array of strings, no other text. Example:
+["pray lord give me a sword", "sudo set time to night", "pray LMAO", "sudo give me uhhh some blocks I guess", "pray dear god I offer you my wheat as tribute"]
+
+Be creative. Use casual gamer language. Vary between formal prayers and slang. Include typos sometimes.`;
+
+function geminiGenerate() {
+  return new Promise((resolve, reject) => {
+    const body = JSON.stringify({
+      contents: [{ parts: [{ text: PRAYER_GEN_PROMPT }] }],
+      generationConfig: { temperature: 1.2, maxOutputTokens: 400 },
+    });
+
+    const url = new URL(GEMINI_URL);
+    const options = {
+      hostname: url.hostname,
+      path: url.pathname + url.search,
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body) },
+    };
+
+    const req = https.request(options, (res) => {
+      let data = '';
+      res.on('data', chunk => data += chunk);
+      res.on('end', () => {
+        try {
+          const json = JSON.parse(data);
+          const text = json.candidates?.[0]?.content?.parts?.[0]?.text || '';
+          // Extract JSON array from response (may be wrapped in ```json ... ```)
+          const cleaned = text.replace(/```json\s*/g, '').replace(/```\s*/g, '');
+          const match = cleaned.match(/\[[\s\S]*\]/);
+          if (match) {
+            const prompts = JSON.parse(match[0]);
+            resolve(prompts.filter(p => typeof p === 'string' && p.length > 0));
+          } else {
+            reject(new Error('No JSON array in Gemini response'));
+          }
+        } catch (e) {
+          reject(e);
+        }
+      });
+    });
+
+    req.on('error', reject);
+    req.setTimeout(15000, () => { req.destroy(); reject(new Error('Gemini timeout')); });
+    req.write(body);
+    req.end();
+  });
+}
+
+// --- Prompt pool (Gemini-fed + static fallback) ---
+
+let promptPool = [];
+let geminiErrors = 0;
+
+// Static fallback pool
+const STATIC_PRAYERS = [
+  "pray lord I am hungry and have nothing",
+  "pray god please give me tools to survive",
+  "pray almighty one, bless me with protection",
+  "pray please heal me I am dying",
+  "pray give me 1000 diamonds NOW",
+  "pray I demand the best armor in the game",
+  "pray lord I wish to build you a temple of gold",
+  "pray god show me a sign that you exist",
+  "pray PENIS",
+  "pray there is no god",
+  "pray hello",
+  "pray asdfghjkl",
+];
+
+const STATIC_SUDO = [
+  "sudo give me a diamond sword",
+  "sudo give me 64 torches",
+  "sudo make it rain",
+  "sudo set time to day",
+  "sudo give me full diamond armor with protection 4",
+  "sudo kill all hostile mobs",
+  "sudo help",
+  "sudo give me dimand sword",
+  "sudo fly",
+  "sudo build a house",
+];
+
+const BUG_REPORTS = [
+  "bug_log no response from god",
+  "bug_log command did not work",
+  "bug_log I got nothing",
+  "bug_log wrong item given",
+  "bug_log empty response",
+  "bug_log god ignored me",
+];
+
+async function refillPool() {
+  try {
+    const prompts = await geminiGenerate();
+    promptPool.push(...prompts);
+    console.log(`[${ts()}] [Gemini] Generated ${prompts.length} prompts (pool: ${promptPool.length})`);
+    geminiErrors = 0;
+  } catch (e) {
+    geminiErrors++;
+    console.log(`[${ts()}] [Gemini] Error (${geminiErrors}): ${e.message}`);
+    // Fall back to static pool
+    if (promptPool.length < 5) {
+      const statics = [...STATIC_PRAYERS, ...STATIC_SUDO];
+      for (let i = 0; i < 10; i++) {
+        promptPool.push(statics[Math.floor(Math.random() * statics.length)]);
+      }
+    }
+  }
+}
+
+function getNextPrompt() {
+  // Refill when low
+  if (promptPool.length < 5) {
+    refillPool();
+  }
+
+  if (promptPool.length > 0) {
+    return promptPool.splice(Math.floor(Math.random() * promptPool.length), 1)[0];
+  }
+
+  // Emergency fallback
+  const all = [...STATIC_PRAYERS, ...STATIC_SUDO];
+  return all[Math.floor(Math.random() * all.length)];
+}
+
+// --- Bot logic ---
+
+const bots = [];
+let connected = 0;
+
+function ts() {
+  return new Date().toISOString().slice(11, 19);
+}
+
+function randomDelay(minSec, maxSec) {
+  return (minSec + Math.random() * (maxSec - minSec)) * 1000;
+}
+
+function spawnBot(index) {
+  const name = `PrayBot_${index}`;
+  console.log(`[${ts()}] [${name}] Connecting to ${host}:${port}...`);
+
+  const bot = mineflayer.createBot({
+    host,
+    port,
+    username: name,
+    auth: 'offline',
+    version: '1.21.11',
+    viewDistance: 'tiny',
+  });
+
+  bot._name = name;
+  bot._msgCount = 0;
+  bot._lastResponse = null;
+  bot._noResponseCount = 0;
+  bots.push(bot);
+
+  bot.on('login', () => {
+    connected++;
+    console.log(`[${ts()}] [${name}] Connected (${connected}/${count})`);
+    setTimeout(() => interactionLoop(bot), randomDelay(10, 20));
+  });
+
+  bot.on('message', (msg) => {
+    const text = msg.toString();
+    if (text.includes('GOD') || text.includes('SUDO') || text.includes('BUG_LOG')) {
+      console.log(`[${ts()}] [${name}] RECV: ${text.substring(0, 150)}`);
+      bot._lastResponse = text;
+      bot._noResponseCount = 0;
+    }
+  });
+
+  bot.on('error', (err) => {
+    console.error(`[${ts()}] [${name}] Error: ${err.message}`);
+  });
+
+  bot.on('kicked', (reason) => {
+    console.log(`[${ts()}] [${name}] Kicked: ${reason}`);
+    connected--;
+    setTimeout(() => spawnBot(index), 60000);
+  });
+
+  bot.on('end', () => {
+    console.log(`[${ts()}] [${name}] Disconnected`);
+    connected--;
+  });
+}
+
+function interactionLoop(bot) {
+  if (!bot.entity) return;
+
+  bot._msgCount++;
+
+  let message;
+  const roll = Math.random();
+
+  if (roll < 0.10 && bot._noResponseCount >= 2) {
+    // File bug report if we haven't gotten responses
+    message = BUG_REPORTS[Math.floor(Math.random() * BUG_REPORTS.length)];
+  } else {
+    message = getNextPrompt();
+    bot._noResponseCount++;
+  }
+
+  console.log(`[${ts()}] [${bot._name}] SEND (#${bot._msgCount}): ${message}`);
+  bot.chat(message);
+
+  // 15-45s between messages per bot
+  const delay = randomDelay(15, 45);
+  setTimeout(() => interactionLoop(bot), delay);
+}
+
+// Pre-fill the pool before bots connect
+refillPool();
+
+// Spawn bots staggered (10s apart to avoid throttle)
+for (let i = 0; i < count; i++) {
+  setTimeout(() => spawnBot(i), i * 10000);
+}
+
+// Periodically refill from Gemini
+setInterval(() => {
+  if (promptPool.length < 10) refillPool();
+}, 60000);
+
+// Graceful shutdown
+process.on('SIGINT', () => {
+  console.log(`\n[${ts()}] Shutting down ${bots.length} bots...`);
+  bots.forEach(b => { try { b.quit(); } catch(e) {} });
+  setTimeout(() => process.exit(0), 2000);
+});
+
+console.log(`[${ts()}] Spawning ${count} prayer bots on ${host}:${port}`);
+console.log(`[${ts()}] Using Gemini ${GEMINI_MODEL} for prompt generation`);
+console.log(`[${ts()}] Interaction interval: 15-45s per bot`);
+console.log(`[${ts()}] Press Ctrl+C to stop`);
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+"""
+training_status_printer.py — Prints training data collection status to the POS printer.
+
+Tracks:
+- Gemini API usage and estimated cost
+- Training audit log growth
+- Bot activity
+- Model performance snapshot
+
+Runs on a 4-hour interval via cron or direct invocation.
+
+Usage:
+    python3 training_status_printer.py          # print now
+    python3 training_status_printer.py --dry-run # show what would print
+"""
+
+import json
+import os
+import socket
+import subprocess
+import sys
+import time
+from datetime import datetime, timedelta
+from pathlib import Path
+
+# --- Config ---
+
+PRINTER_IP = "192.168.0.137"
+PRINTER_PORT = 9100
+COLS = 57
+
+# Gemini Flash Lite pricing (per 1M tokens, as of 2026-03)
+# https://ai.google.dev/pricing
+GEMINI_INPUT_COST_PER_M = 0.075   # $0.075 per 1M input tokens
+GEMINI_OUTPUT_COST_PER_M = 0.30   # $0.30 per 1M output tokens
+# Approximate: our prompt is ~300 tokens input, ~200 tokens output per call
+EST_INPUT_TOKENS_PER_CALL = 300
+EST_OUTPUT_TOKENS_PER_CALL = 200
+
+# Gemini usage tracking file
+GEMINI_USAGE_FILE = "/var/log/mc_gemini_usage.json"
+
+# Cost threshold for printing ($)
+COST_PRINT_THRESHOLD = 0.50
+LAST_PRINT_COST_FILE = "/var/log/mc_training_last_print_cost.json"
+
+# Remote paths (on CT 644 via pve112)
+DEV_AUDIT_LOG = "/var/log/mc_training_audit_dev.jsonl"
+PROD_AUDIT_LOG = "/var/log/mc_training_audit.jsonl"
+BOT_LOG = "/var/log/prayer_bots.log"
+DEV_BUG_LOG = "/var/log/mc_aigod_dev_bug.log"
+
+
+def remote_cmd(cmd, timeout=10):
+    """Run a command on CT 644 via pve112."""
+    try:
+        full_cmd = f'ssh pve112 "pct exec 644 -- {cmd}"'
+        result = subprocess.run(
+            full_cmd, shell=True,
+            capture_output=True, text=True, timeout=timeout
+        )
+        return result.stdout.strip()
+    except Exception as e:
+        return f"ERROR: {e}"
+
+
+def get_audit_stats():
+    """Get training audit log stats from both servers."""
+    dev_lines = remote_cmd(f"wc -l {DEV_AUDIT_LOG} 2>/dev/null | cut -d' ' -f1")
+    prod_lines = remote_cmd(f"wc -l {PROD_AUDIT_LOG} 2>/dev/null | cut -d' ' -f1")
+    bug_lines = remote_cmd(f"wc -l {DEV_BUG_LOG} 2>/dev/null | cut -d' ' -f1")
+
+    try:
+        dev_count = int(dev_lines) if dev_lines.isdigit() else 0
+    except:
+        dev_count = 0
+    try:
+        prod_count = int(prod_lines) if prod_lines.isdigit() else 0
+    except:
+        prod_count = 0
+    try:
+        bug_count = int(bug_lines) if bug_lines.isdigit() else 0
+    except:
+        bug_count = 0
+
+    return dev_count, prod_count, bug_count
+
+
+def get_bot_stats():
+    """Get bot activity stats."""
+    bot_procs = remote_cmd("ps aux | grep prayer_bots | grep -v grep | wc -l")
+    bot_last = remote_cmd(f"tail -1 {BOT_LOG} 2>/dev/null")
+    bot_sends = remote_cmd(f"grep -c 'SEND' {BOT_LOG} 2>/dev/null")
+
+    try:
+        num_bots = int(bot_procs)
+    except:
+        num_bots = 0
+    try:
+        num_sends = int(bot_sends)
+    except:
+        num_sends = 0
+
+    return num_bots, num_sends, bot_last[:60] if bot_last else "N/A"
+
+
+def get_gemini_usage():
+    """Track Gemini API calls. Reads/writes a local JSON counter."""
+    # Count Gemini calls from bot log
+    gemini_calls = remote_cmd(f"grep -c 'Gemini.*Generated' {BOT_LOG} 2>/dev/null")
+    gemini_errors = remote_cmd(f"grep -c 'Gemini.*Error' {BOT_LOG} 2>/dev/null")
+
+    try:
+        calls = int(gemini_calls)
+    except:
+        calls = 0
+    try:
+        errors = int(gemini_errors)
+    except:
+        errors = 0
+
+    # Estimate cost
+    total_input_tokens = calls * EST_INPUT_TOKENS_PER_CALL
+    total_output_tokens = calls * EST_OUTPUT_TOKENS_PER_CALL
+    input_cost = (total_input_tokens / 1_000_000) * GEMINI_INPUT_COST_PER_M
+    output_cost = (total_output_tokens / 1_000_000) * GEMINI_OUTPUT_COST_PER_M
+    total_cost = input_cost + output_cost
+
+    return {
+        "calls": calls,
+        "errors": errors,
+        "est_input_tokens": total_input_tokens,
+        "est_output_tokens": total_output_tokens,
+        "est_cost_usd": total_cost,
+    }
+
+
+def get_dataset_size():
+    """Get current seed dataset size."""
+    try:
+        path = Path(__file__).resolve().parent.parent / "data" / "processed" / "seed_dataset.jsonl"
+        with open(path) as f:
+            return sum(1 for line in f if line.strip())
+    except:
+        return 0
+
+
+def get_service_status():
+    """Check if AI God services are running."""
+    statuses = {}
+    for svc in ["mc-aigod-paper", "mc-aigod-dev", "mc-aigod"]:
+        status = remote_cmd(f"systemctl is-active {svc}.service 2>/dev/null")
+        statuses[svc] = status
+    return statuses
+
+
+def build_receipt():
+    """Build the POS receipt."""
+    from escpos.printer import Dummy
+
+    now = datetime.now()
+    p = Dummy(profile="default")
+
+    # Header
+    p.set(font='b', align='center', bold=True, height=2)
+    p.text("MC AI TRAINING\n")
+    p.set(font='b', align='center', bold=True, height=1)
+    p.text("STATUS REPORT\n")
+    p.set(font='b', align='center', bold=False)
+    p.text(now.strftime("%Y-%m-%d %H:%M") + "\n")
+    p.text("=" * COLS + "\n")
+
+    # Dataset
+    dataset_size = get_dataset_size()
+    p.set(font='b', align='left', bold=True)
+    p.text("DATASET\n")
+    p.set(font='b', align='left', bold=False)
+    p.text(f"  Seed examples:     {dataset_size}\n")
+
+    dev_audit, prod_audit, bug_count = get_audit_stats()
+    p.text(f"  Dev audit log:     {dev_audit}\n")
+    p.text(f"  Prod audit log:    {prod_audit}\n")
+    p.text(f"  Dev bug reports:   {bug_count}\n")
+    p.text(f"  Total unprocessed: {dev_audit + prod_audit}\n")
+    p.text("-" * COLS + "\n")
+
+    # Bot activity
+    num_bots, num_sends, last_msg = get_bot_stats()
+    p.set(font='b', align='left', bold=True)
+    p.text("BOT ACTIVITY\n")
+    p.set(font='b', align='left', bold=False)
+    p.text(f"  Active bots:       {num_bots}\n")
+    p.text(f"  Total messages:    {num_sends}\n")
+    p.text(f"  Last: {last_msg}\n")
+    p.text("-" * COLS + "\n")
+
+    # Gemini API
+    gemini = get_gemini_usage()
+    p.set(font='b', align='left', bold=True)
+    p.text("GEMINI API (flash-lite)\n")
+    p.set(font='b', align='left', bold=False)
+    p.text(f"  Calls:             {gemini['calls']}\n")
+    p.text(f"  Errors:            {gemini['errors']}\n")
+    p.text(f"  Est input tokens:  {gemini['est_input_tokens']:,}\n")
+    p.text(f"  Est output tokens: {gemini['est_output_tokens']:,}\n")
+    p.set(font='b', align='left', bold=True)
+    p.text(f"  Est cost:          ${gemini['est_cost_usd']:.4f}\n")
+    p.set(font='b', align='left', bold=False)
+    p.text("-" * COLS + "\n")
+
+    # Services
+    statuses = get_service_status()
+    p.set(font='b', align='left', bold=True)
+    p.text("SERVICES\n")
+    p.set(font='b', align='left', bold=False)
+    for svc, status in statuses.items():
+        indicator = "OK" if status == "active" else "DOWN"
+        p.text(f"  {svc:22} [{indicator}]\n")
+    p.text("-" * COLS + "\n")
+
+    # Footer
+    p.set(font='b', align='center', bold=False)
+    p.text("Next print in 4 hours\n")
+    p.text("=" * COLS + "\n")
+    p.cut()
+
+    return p.output
+
+
+def send_to_printer(raw_bytes):
+    """Send raw ESC/POS bytes to the TM-m30."""
+    with socket.create_connection((PRINTER_IP, PRINTER_PORT), timeout=10) as sock:
+        sock.sendall(raw_bytes)
+
+
+def get_last_print_cost():
+    """Get the cumulative cost at which we last printed."""
+    try:
+        with open(LAST_PRINT_COST_FILE) as f:
+            return json.load(f).get("cost", 0.0)
+    except:
+        return 0.0
+
+
+def save_last_print_cost(cost):
+    """Save the cumulative cost at which we printed."""
+    with open(LAST_PRINT_COST_FILE, "w") as f:
+        json.dump({"cost": cost, "timestamp": datetime.now().isoformat()}, f)
+
+
+def should_print(current_cost):
+    """Check if we've crossed the next $COST_PRINT_THRESHOLD boundary."""
+    last_cost = get_last_print_cost()
+    return current_cost - last_cost >= COST_PRINT_THRESHOLD
+
+
+def main():
+    dry_run = "--dry-run" in sys.argv
+    force = "--force" in sys.argv
+    check_only = "--check" in sys.argv
+
+    gemini = get_gemini_usage()
+    current_cost = gemini["est_cost_usd"]
+
+    if check_only:
+        last = get_last_print_cost()
+        next_at = last + COST_PRINT_THRESHOLD
+        print(f"Current cost:    ${current_cost:.4f}")
+        print(f"Last printed at: ${last:.4f}")
+        print(f"Next print at:   ${next_at:.4f}")
+        print(f"Will print:      {'YES' if should_print(current_cost) else 'NO'}")
+        return
+
+    if dry_run:
+        print("=== DRY RUN — would print: ===\n")
+
+        dataset_size = get_dataset_size()
+        dev_audit, prod_audit, bug_count = get_audit_stats()
+        num_bots, num_sends, last_msg = get_bot_stats()
+        statuses = get_service_status()
+
+        print(f"Dataset:        {dataset_size} seed examples")
+        print(f"Dev audit:      {dev_audit} entries")
+        print(f"Prod audit:     {prod_audit} entries")
+        print(f"Bug reports:    {bug_count}")
+        print(f"Bots:           {num_bots} active, {num_sends} messages sent")
+        print(f"Gemini:         {gemini['calls']} calls, {gemini['errors']} errors, ${gemini['est_cost_usd']:.4f}")
+        print(f"Services:       {statuses}")
+        print(f"Threshold:      ${COST_PRINT_THRESHOLD} (would print: {should_print(current_cost) or force})")
+        return
+
+    if not force and not should_print(current_cost):
+        print(f"[{datetime.now().isoformat()}] Cost ${current_cost:.4f} — threshold not reached (next at ${get_last_print_cost() + COST_PRINT_THRESHOLD:.4f})")
+        return
+
+    receipt = build_receipt()
+    try:
+        send_to_printer(receipt)
+        save_last_print_cost(current_cost)
+        print(f"[{datetime.now().isoformat()}] Receipt printed at ${current_cost:.4f}")
+    except Exception as e:
+        print(f"[{datetime.now().isoformat()}] Print failed: {e}")
+
+
+if __name__ == "__main__":
+    main()