fix: deployment health check, container cleanup, and status reporting
Three fixes for the deployment pipeline: 1. Health check path: /health -> /cameleer/health (matches agent) 2. Container cleanup: stop AND remove old container before starting new one, plus orphan cleanup by container name to prevent conflicts 3. Container status: read health.status instead of state.status so waitForHealthy correctly detects the "healthy" state Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -119,12 +119,23 @@ public class DeploymentService {
|
|||||||
var oldContainerId = (String) oldMetadata.get("containerId");
|
var oldContainerId = (String) oldMetadata.get("containerId");
|
||||||
try {
|
try {
|
||||||
runtimeOrchestrator.stopContainer(oldContainerId);
|
runtimeOrchestrator.stopContainer(oldContainerId);
|
||||||
|
runtimeOrchestrator.removeContainer(oldContainerId);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.warn("Failed to stop old container {}: {}", oldContainerId, e.getMessage());
|
log.warn("Failed to stop/remove old container {}: {}", oldContainerId, e.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
// Also try removing any container with the same name (handles orphaned containers)
|
||||||
|
try {
|
||||||
|
var existing = runtimeOrchestrator.getContainerStatus(containerName);
|
||||||
|
if (!"not_found".equals(existing.state())) {
|
||||||
|
runtimeOrchestrator.stopContainer(containerName);
|
||||||
|
runtimeOrchestrator.removeContainer(containerName);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Container doesn't exist — expected for fresh deploys
|
||||||
|
}
|
||||||
|
|
||||||
// Build Traefik labels for inbound routing
|
// Build Traefik labels for inbound routing
|
||||||
var labels = new java.util.HashMap<String, String>();
|
var labels = new java.util.HashMap<String, String>();
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
|
|||||||
.withHostConfig(hostConfig)
|
.withHostConfig(hostConfig)
|
||||||
.withHealthcheck(new HealthCheck()
|
.withHealthcheck(new HealthCheck()
|
||||||
.withTest(List.of("CMD-SHELL",
|
.withTest(List.of("CMD-SHELL",
|
||||||
"wget -qO- http://localhost:" + request.healthCheckPort() + "/health || exit 1"))
|
"wget -qO- http://localhost:" + request.healthCheckPort() + "/cameleer/health || exit 1"))
|
||||||
.withInterval(10_000_000_000L) // 10s
|
.withInterval(10_000_000_000L) // 10s
|
||||||
.withTimeout(5_000_000_000L) // 5s
|
.withTimeout(5_000_000_000L) // 5s
|
||||||
.withRetries(3)
|
.withRetries(3)
|
||||||
@@ -131,8 +131,12 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
|
|||||||
try {
|
try {
|
||||||
var inspection = dockerClient.inspectContainerCmd(containerId).exec();
|
var inspection = dockerClient.inspectContainerCmd(containerId).exec();
|
||||||
var state = inspection.getState();
|
var state = inspection.getState();
|
||||||
|
var health = state.getHealth();
|
||||||
|
var healthStatus = health != null ? health.getStatus() : null;
|
||||||
|
// Use health status if available, otherwise fall back to container state
|
||||||
|
var effectiveState = healthStatus != null ? healthStatus : state.getStatus();
|
||||||
return new ContainerStatus(
|
return new ContainerStatus(
|
||||||
state.getStatus(),
|
effectiveState,
|
||||||
Boolean.TRUE.equals(state.getRunning()),
|
Boolean.TRUE.equals(state.getRunning()),
|
||||||
state.getExitCodeLong() != null ? state.getExitCodeLong().intValue() : 0,
|
state.getExitCodeLong() != null ? state.getExitCodeLong().intValue() : 0,
|
||||||
state.getError());
|
state.getError());
|
||||||
|
|||||||
Reference in New Issue
Block a user