fix: deployment health check, container cleanup, and status reporting

Three fixes for the deployment pipeline: 1. Health check path: /health -> /cameleer/health (matches agent) 2. Container cleanup: stop AND remove old container before starting new one, plus orphan cleanup by container name to prevent conflicts 3. Container status: read health.status instead of state.status so waitForHealthy correctly detects the "healthy" state Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 16:20:33 +02:00
parent 35276f66e9
commit 8407d8b3c0
2 changed files with 18 additions and 3 deletions
--- a/src/main/java/net/siegeln/cameleer/saas/deployment/DeploymentService.java
+++ b/src/main/java/net/siegeln/cameleer/saas/deployment/DeploymentService.java
@@ -119,12 +119,23 @@ public class DeploymentService {
                        var oldContainerId = (String) oldMetadata.get("containerId");
                        try {
                            runtimeOrchestrator.stopContainer(oldContainerId);
                            runtimeOrchestrator.removeContainer(oldContainerId);
                        } catch (Exception e) {
-                            log.warn("Failed to stop old container {}: {}", oldContainerId, e.getMessage());
+                            log.warn("Failed to stop/remove old container {}: {}", oldContainerId, e.getMessage());
                        }
                    }
                });
            }
            // Also try removing any container with the same name (handles orphaned containers)
            try {
                var existing = runtimeOrchestrator.getContainerStatus(containerName);
                if (!"not_found".equals(existing.state())) {
                    runtimeOrchestrator.stopContainer(containerName);
                    runtimeOrchestrator.removeContainer(containerName);
                }
            } catch (Exception e) {
                // Container doesn't exist — expected for fresh deploys
            }
            // Build Traefik labels for inbound routing
            var labels = new java.util.HashMap<String, String>();
--- a/src/main/java/net/siegeln/cameleer/saas/runtime/DockerRuntimeOrchestrator.java
+++ b/src/main/java/net/siegeln/cameleer/saas/runtime/DockerRuntimeOrchestrator.java
@@ -94,7 +94,7 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
                .withHostConfig(hostConfig)
                .withHealthcheck(new HealthCheck()
                        .withTest(List.of("CMD-SHELL",
-                                "wget -qO- http://localhost:" + request.healthCheckPort() + "/health || exit 1"))
+                                "wget -qO- http://localhost:" + request.healthCheckPort() + "/cameleer/health || exit 1"))
                        .withInterval(10_000_000_000L)      // 10s
                        .withTimeout(5_000_000_000L)         // 5s
                        .withRetries(3)
@@ -131,8 +131,12 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
        try {
            var inspection = dockerClient.inspectContainerCmd(containerId).exec();
            var state = inspection.getState();
            var health = state.getHealth();
            var healthStatus = health != null ? health.getStatus() : null;
            // Use health status if available, otherwise fall back to container state
            var effectiveState = healthStatus != null ? healthStatus : state.getStatus();
            return new ContainerStatus(
-                    state.getStatus(),
+                    effectiveState,
                    Boolean.TRUE.equals(state.getRunning()),
                    state.getExitCodeLong() != null ? state.getExitCodeLong().intValue() : 0,
                    state.getError());