ead16fd429
Root cause: self-play opened/closed a new TCP socket for every RCON command (hundreds/minute). Paper's RCON listener creates a thread per connection, overwhelming the server until it stopped. Fix: PersistentRCON class maintains a single connection per server with auto-reconnect. Thread-safe via lock. Connection pool keyed by host:port. Applied to: - mc_aigod_paper.py (prod paper-ai + dev) - mc_aigod.py (shrink-world) - self_play.py (training data generation) - persistent_rcon.py (shared module) Before: ~100+ RCON connections/minute → server crash After: 3 persistent connections total → stable Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
148 lines
4.7 KiB
Python
148 lines
4.7 KiB
Python
"""
|
|
Persistent RCON connection with auto-reconnect.
|
|
|
|
Replaces the pattern of opening/closing a socket per command.
|
|
Thread-safe — one instance per server, shared across threads.
|
|
|
|
Usage:
|
|
rcon = PersistentRCON("localhost", 25577, "password")
|
|
result = rcon.command("list")
|
|
result = rcon.command("give player minecraft:diamond 1")
|
|
# Connection stays open, auto-reconnects on failure
|
|
"""
|
|
|
|
import socket
|
|
import struct
|
|
import threading
|
|
import time
|
|
import logging
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class PersistentRCON:
|
|
def __init__(self, host: str, port: int, password: str, timeout: float = 10.0):
|
|
self.host = host
|
|
self.port = port
|
|
self.password = password
|
|
self.timeout = timeout
|
|
self._sock = None
|
|
self._lock = threading.Lock()
|
|
self._req_id = 0
|
|
self._connected = False
|
|
|
|
def _connect(self):
|
|
"""Establish connection and authenticate."""
|
|
try:
|
|
if self._sock:
|
|
try:
|
|
self._sock.close()
|
|
except:
|
|
pass
|
|
|
|
self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
self._sock.settimeout(self.timeout)
|
|
self._sock.connect((self.host, self.port))
|
|
|
|
# Authenticate
|
|
self._send_packet(1, 3, self.password)
|
|
resp = self._recv_packet()
|
|
if resp is None:
|
|
raise ConnectionError("Auth failed — no response")
|
|
|
|
self._connected = True
|
|
log.debug(f"RCON connected to {self.host}:{self.port}")
|
|
except Exception as e:
|
|
self._connected = False
|
|
self._sock = None
|
|
raise ConnectionError(f"RCON connect failed: {e}")
|
|
|
|
def _send_packet(self, req_id: int, ptype: int, payload: str):
|
|
data = struct.pack("<ii", req_id, ptype) + payload.encode("utf-8") + b"\x00\x00"
|
|
self._sock.sendall(struct.pack("<i", len(data)) + data)
|
|
|
|
def _recv_packet(self) -> str:
|
|
try:
|
|
raw_len = self._sock.recv(4)
|
|
if len(raw_len) < 4:
|
|
return None
|
|
length = struct.unpack("<i", raw_len)[0]
|
|
data = b""
|
|
while len(data) < length:
|
|
chunk = self._sock.recv(length - len(data))
|
|
if not chunk:
|
|
return None
|
|
data += chunk
|
|
return data[8:-2].decode("utf-8", errors="replace")
|
|
except (socket.timeout, ConnectionResetError, BrokenPipeError, OSError):
|
|
return None
|
|
|
|
def command(self, cmd: str) -> str:
|
|
"""Execute an RCON command. Auto-reconnects on failure."""
|
|
with self._lock:
|
|
# Try twice — first attempt, then reconnect and retry
|
|
for attempt in range(2):
|
|
try:
|
|
if not self._connected or self._sock is None:
|
|
self._connect()
|
|
|
|
self._req_id += 1
|
|
self._send_packet(self._req_id, 2, cmd)
|
|
result = self._recv_packet()
|
|
|
|
if result is None:
|
|
# Connection dropped
|
|
self._connected = False
|
|
if attempt == 0:
|
|
log.debug(f"RCON connection lost, reconnecting...")
|
|
continue
|
|
return ""
|
|
|
|
return result
|
|
|
|
except (ConnectionError, OSError, BrokenPipeError, socket.timeout) as e:
|
|
self._connected = False
|
|
self._sock = None
|
|
if attempt == 0:
|
|
log.debug(f"RCON error ({e}), reconnecting...")
|
|
time.sleep(0.5)
|
|
continue
|
|
log.warning(f"RCON failed after retry: {e}")
|
|
return f"ERROR: {e}"
|
|
|
|
return ""
|
|
|
|
def close(self):
|
|
with self._lock:
|
|
if self._sock:
|
|
try:
|
|
self._sock.close()
|
|
except:
|
|
pass
|
|
self._sock = None
|
|
self._connected = False
|
|
|
|
def __del__(self):
|
|
self.close()
|
|
|
|
|
|
# --- Global connection pool ---
|
|
_pool = {}
|
|
_pool_lock = threading.Lock()
|
|
|
|
|
|
def get_rcon(host: str, port: int, password: str) -> PersistentRCON:
|
|
"""Get or create a persistent RCON connection."""
|
|
key = f"{host}:{port}"
|
|
with _pool_lock:
|
|
if key not in _pool:
|
|
_pool[key] = PersistentRCON(host, port, password)
|
|
return _pool[key]
|
|
|
|
|
|
def rcon(cmd: str, host: str, port: int, password: str) -> str:
|
|
"""Drop-in replacement for the old rcon() function.
|
|
Uses persistent connections under the hood."""
|
|
conn = get_rcon(host, port, password)
|
|
return conn.command(cmd)
|