fix: deployment health check, container cleanup, and status reporting

Three fixes for the deployment pipeline:
1. Health check path: /health -> /cameleer/health (matches agent)
2. Container cleanup: stop AND remove old container before starting
   new one, plus orphan cleanup by container name to prevent conflicts
3. Container status: read health.status instead of state.status so
   waitForHealthy correctly detects the "healthy" state

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-07 16:20:33 +02:00
parent 35276f66e9
commit 8407d8b3c0
2 changed files with 18 additions and 3 deletions

View File

@@ -119,12 +119,23 @@ public class DeploymentService {
var oldContainerId = (String) oldMetadata.get("containerId"); var oldContainerId = (String) oldMetadata.get("containerId");
try { try {
runtimeOrchestrator.stopContainer(oldContainerId); runtimeOrchestrator.stopContainer(oldContainerId);
runtimeOrchestrator.removeContainer(oldContainerId);
} catch (Exception e) { } catch (Exception e) {
log.warn("Failed to stop old container {}: {}", oldContainerId, e.getMessage()); log.warn("Failed to stop/remove old container {}: {}", oldContainerId, e.getMessage());
} }
} }
}); });
} }
// Also try removing any container with the same name (handles orphaned containers)
try {
var existing = runtimeOrchestrator.getContainerStatus(containerName);
if (!"not_found".equals(existing.state())) {
runtimeOrchestrator.stopContainer(containerName);
runtimeOrchestrator.removeContainer(containerName);
}
} catch (Exception e) {
// Container doesn't exist — expected for fresh deploys
}
// Build Traefik labels for inbound routing // Build Traefik labels for inbound routing
var labels = new java.util.HashMap<String, String>(); var labels = new java.util.HashMap<String, String>();

View File

@@ -94,7 +94,7 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
.withHostConfig(hostConfig) .withHostConfig(hostConfig)
.withHealthcheck(new HealthCheck() .withHealthcheck(new HealthCheck()
.withTest(List.of("CMD-SHELL", .withTest(List.of("CMD-SHELL",
"wget -qO- http://localhost:" + request.healthCheckPort() + "/health || exit 1")) "wget -qO- http://localhost:" + request.healthCheckPort() + "/cameleer/health || exit 1"))
.withInterval(10_000_000_000L) // 10s .withInterval(10_000_000_000L) // 10s
.withTimeout(5_000_000_000L) // 5s .withTimeout(5_000_000_000L) // 5s
.withRetries(3) .withRetries(3)
@@ -131,8 +131,12 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
try { try {
var inspection = dockerClient.inspectContainerCmd(containerId).exec(); var inspection = dockerClient.inspectContainerCmd(containerId).exec();
var state = inspection.getState(); var state = inspection.getState();
var health = state.getHealth();
var healthStatus = health != null ? health.getStatus() : null;
// Use health status if available, otherwise fall back to container state
var effectiveState = healthStatus != null ? healthStatus : state.getStatus();
return new ContainerStatus( return new ContainerStatus(
state.getStatus(), effectiveState,
Boolean.TRUE.equals(state.getRunning()), Boolean.TRUE.equals(state.getRunning()),
state.getExitCodeLong() != null ? state.getExitCodeLong().intValue() : 0, state.getExitCodeLong() != null ? state.getExitCodeLong().intValue() : 0,
state.getError()); state.getError());