From fef3ef6184ac795604645b773abf19e6b03221fb Mon Sep 17 00:00:00 2001 From: hsiegeln <37154749+hsiegeln@users.noreply.github.com> Date: Wed, 8 Apr 2026 20:24:03 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20DockerEventMonitor=20=E2=80=94=20persis?= =?UTF-8?q?tent=20event=20stream=20for=20container=20lifecycle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Listens to Docker daemon events (die, oom, start, stop) for containers labeled managed-by=cameleer3-server, updates replica states in Postgres, and recomputes aggregate deployment status (RUNNING/DEGRADED/FAILED). Bean is wired in RuntimeOrchestratorAutoConfig via instanceof guard so it only activates when Docker is available. Co-Authored-By: Claude Sonnet 4.6 --- .../app/runtime/DockerEventMonitor.java | 129 ++++++++++++++++++ .../RuntimeOrchestratorAutoConfig.java | 11 ++ 2 files changed, 140 insertions(+) create mode 100644 cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerEventMonitor.java diff --git a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerEventMonitor.java b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerEventMonitor.java new file mode 100644 index 00000000..333139c1 --- /dev/null +++ b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerEventMonitor.java @@ -0,0 +1,129 @@ +package com.cameleer3.server.app.runtime; + +import com.cameleer3.server.app.storage.PostgresDeploymentRepository; +import com.cameleer3.server.core.runtime.Deployment; +import com.cameleer3.server.core.runtime.DeploymentStatus; +import com.github.dockerjava.api.DockerClient; +import com.github.dockerjava.api.async.ResultCallback; +import com.github.dockerjava.api.model.Event; +import com.github.dockerjava.api.model.EventType; +import jakarta.annotation.PostConstruct; +import jakarta.annotation.PreDestroy; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.util.*; + +public class DockerEventMonitor { + + private static final Logger log = LoggerFactory.getLogger(DockerEventMonitor.class); + + private final DockerClient dockerClient; + private final PostgresDeploymentRepository deploymentRepository; + private Closeable eventStream; + + public DockerEventMonitor(DockerRuntimeOrchestrator orchestrator, + PostgresDeploymentRepository deploymentRepository) { + this.dockerClient = orchestrator.getDockerClient(); + this.deploymentRepository = deploymentRepository; + } + + @PostConstruct + public void startListening() { + eventStream = dockerClient.eventsCmd() + .withEventTypeFilter(EventType.CONTAINER) + .withEventFilter("die", "oom", "start", "stop") + .exec(new ResultCallback.Adapter() { + @Override + public void onNext(Event event) { + handleEvent(event); + } + + @Override + public void onError(Throwable throwable) { + log.warn("Docker event stream error, reconnecting: {}", throwable.getMessage()); + reconnect(); + } + }); + + log.info("Docker event monitor started"); + } + + @PreDestroy + public void stop() { + if (eventStream != null) { + try { eventStream.close(); } catch (IOException e) { /* ignore */ } + } + } + + private void handleEvent(Event event) { + String containerId = event.getId(); + if (containerId == null) return; + + Map labels = event.getActor() != null ? event.getActor().getAttributes() : null; + if (labels == null || !"cameleer3-server".equals(labels.get("managed-by"))) return; + + String action = event.getAction(); + log.debug("Docker event: {} for container {} ({})", action, containerId.substring(0, 12), + labels.get("cameleer.app")); + + Optional deploymentOpt = deploymentRepository.findByContainerId(containerId); + if (deploymentOpt.isEmpty()) return; + + Deployment deployment = deploymentOpt.get(); + List> replicas = new ArrayList<>(deployment.replicaStates()); + + boolean changed = false; + for (int i = 0; i < replicas.size(); i++) { + Map replica = replicas.get(i); + if (containerId.equals(replica.get("containerId"))) { + Map updated = new HashMap<>(replica); + switch (action) { + case "die", "oom", "stop" -> { + updated.put("status", "DEAD"); + if ("oom".equals(action)) { + updated.put("oomKilled", true); + log.warn("Container {} OOM-killed (app={}, env={})", containerId.substring(0, 12), + labels.get("cameleer.app"), labels.get("cameleer.environment")); + } + } + case "start" -> updated.put("status", "RUNNING"); + } + replicas.set(i, updated); + changed = true; + break; + } + } + + if (!changed) return; + + deploymentRepository.updateReplicaStates(deployment.id(), replicas); + + long running = replicas.stream().filter(r -> "RUNNING".equals(r.get("status"))).count(); + DeploymentStatus newStatus; + if (running == replicas.size()) { + newStatus = DeploymentStatus.RUNNING; + } else if (running > 0) { + newStatus = DeploymentStatus.DEGRADED; + } else { + newStatus = DeploymentStatus.FAILED; + } + + if (deployment.status() != newStatus) { + deploymentRepository.updateStatus(deployment.id(), newStatus, deployment.containerId(), deployment.errorMessage()); + log.info("Deployment {} status: {} -> {} ({}/{} replicas running)", + deployment.id(), deployment.status(), newStatus, running, replicas.size()); + } + } + + private void reconnect() { + try { + Thread.sleep(5000); + startListening(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +} diff --git a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/RuntimeOrchestratorAutoConfig.java b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/RuntimeOrchestratorAutoConfig.java index 2ff23601..ff3bd654 100644 --- a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/RuntimeOrchestratorAutoConfig.java +++ b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/RuntimeOrchestratorAutoConfig.java @@ -1,5 +1,7 @@ package com.cameleer3.server.app.runtime; +import com.cameleer3.server.app.storage.PostgresDeploymentRepository; +import com.cameleer3.server.core.runtime.DeploymentRepository; import com.cameleer3.server.core.runtime.RuntimeOrchestrator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,4 +35,13 @@ public class RuntimeOrchestratorAutoConfig { } return null; } + + @Bean + public DockerEventMonitor dockerEventMonitor(RuntimeOrchestrator orchestrator, + DeploymentRepository deploymentRepository) { + if (orchestrator instanceof DockerRuntimeOrchestrator docker) { + return new DockerEventMonitor(docker, (PostgresDeploymentRepository) deploymentRepository); + } + return null; + } }