fix: deployment health check, container cleanup, and status reporting
Three fixes for the deployment pipeline: 1. Health check path: /health -> /cameleer/health (matches agent) 2. Container cleanup: stop AND remove old container before starting new one, plus orphan cleanup by container name to prevent conflicts 3. Container status: read health.status instead of state.status so waitForHealthy correctly detects the "healthy" state Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -119,12 +119,23 @@ public class DeploymentService {
|
||||
var oldContainerId = (String) oldMetadata.get("containerId");
|
||||
try {
|
||||
runtimeOrchestrator.stopContainer(oldContainerId);
|
||||
runtimeOrchestrator.removeContainer(oldContainerId);
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to stop old container {}: {}", oldContainerId, e.getMessage());
|
||||
log.warn("Failed to stop/remove old container {}: {}", oldContainerId, e.getMessage());
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
// Also try removing any container with the same name (handles orphaned containers)
|
||||
try {
|
||||
var existing = runtimeOrchestrator.getContainerStatus(containerName);
|
||||
if (!"not_found".equals(existing.state())) {
|
||||
runtimeOrchestrator.stopContainer(containerName);
|
||||
runtimeOrchestrator.removeContainer(containerName);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// Container doesn't exist — expected for fresh deploys
|
||||
}
|
||||
|
||||
// Build Traefik labels for inbound routing
|
||||
var labels = new java.util.HashMap<String, String>();
|
||||
|
||||
@@ -94,7 +94,7 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
|
||||
.withHostConfig(hostConfig)
|
||||
.withHealthcheck(new HealthCheck()
|
||||
.withTest(List.of("CMD-SHELL",
|
||||
"wget -qO- http://localhost:" + request.healthCheckPort() + "/health || exit 1"))
|
||||
"wget -qO- http://localhost:" + request.healthCheckPort() + "/cameleer/health || exit 1"))
|
||||
.withInterval(10_000_000_000L) // 10s
|
||||
.withTimeout(5_000_000_000L) // 5s
|
||||
.withRetries(3)
|
||||
@@ -131,8 +131,12 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
|
||||
try {
|
||||
var inspection = dockerClient.inspectContainerCmd(containerId).exec();
|
||||
var state = inspection.getState();
|
||||
var health = state.getHealth();
|
||||
var healthStatus = health != null ? health.getStatus() : null;
|
||||
// Use health status if available, otherwise fall back to container state
|
||||
var effectiveState = healthStatus != null ? healthStatus : state.getStatus();
|
||||
return new ContainerStatus(
|
||||
state.getStatus(),
|
||||
effectiveState,
|
||||
Boolean.TRUE.equals(state.getRunning()),
|
||||
state.getExitCodeLong() != null ? state.getExitCodeLong().intValue() : 0,
|
||||
state.getError());
|
||||
|
||||
Reference in New Issue
Block a user