fix(runtime): pre-pull loader image, plug volume-leak windows, document network dep

Pre-pull the loader image at PULL_IMAGE so the implicit pull on first
createContainerCmd doesn't bypass the 120s loader-wait timeout.

Wrap createAndStartLoader in try/catch so a create/start failure cleans
up the just-created volume; same guard around createAndStartMain on
phase-2 failures. Folds the wait-error message into the rethrown
RuntimeException so the cause chain is visible.

Add a @PostConstruct WARN when neither artifactbaseurl nor serverurl is
set so the implicit cameleer-server DNS dependency is loud at boot, and
document the loader-to-server reachability contract in
.claude/rules/docker-orchestration.md.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-27 16:26:35 +02:00
parent 1ddae94930
commit cc076b1923
3 changed files with 38 additions and 4 deletions

View File

@@ -106,6 +106,17 @@ public class DeploymentExecutor {
this.licenseUsageReader = licenseUsageReader;
}
@jakarta.annotation.PostConstruct
public void validateArtifactBaseUrl() {
if (artifactBaseUrl.isBlank() && globalServerUrl.isBlank()) {
log.warn("Neither cameleer.server.runtime.artifactbaseurl nor cameleer.server.runtime.serverurl is set. "
+ "Loader containers will fall back to http://cameleer-server:8081 — this requires the loader's "
+ "Docker network to resolve `cameleer-server`. In SaaS mode the server is on `cameleer-traefik` "
+ "which is added as an additional network for tenant containers, so this works. For other "
+ "deployment topologies, set CAMELEER_SERVER_RUNTIME_ARTIFACTBASEURL explicitly.");
}
}
/** Deployment-scoped id suffix — distinguishes container names and
* CAMELEER_AGENT_INSTANCEID across redeploys so old + new replicas can
* coexist during a blue/green swap. First 8 chars of the deployment UUID. */
@@ -211,6 +222,7 @@ public class DeploymentExecutor {
// === PULL IMAGE ===
updateStage(deployment.id(), DeployStage.PULL_IMAGE);
orchestrator.pullImage(baseImage);
orchestrator.pullImage(loaderImage);
// === CREATE NETWORKS ===
updateStage(deployment.id(), DeployStage.CREATE_NETWORK);

View File

@@ -138,7 +138,15 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
// the shared volume. Hardened identically to the main container, plus
// RW bind on /app/jars and the artifact env vars the loader entrypoint
// expects. We block on its exit code before bringing the main up.
String loaderId = createAndStartLoader(request, volumeName);
String loaderId;
try {
loaderId = createAndStartLoader(request, volumeName);
} catch (Exception e) {
// Volume created but loader never reached the wait/cleanup paths — clean up here.
cleanupVolume(volumeName);
throw new RuntimeException("Loader create/start failed for " + request.containerName(), e);
}
int exitCode;
try {
exitCode = dockerClient.waitContainerCmd(loaderId)
@@ -146,7 +154,7 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
.awaitStatusCode(LOADER_WAIT_TIMEOUT_SECONDS, TimeUnit.SECONDS);
} catch (Exception e) {
cleanup(loaderId, volumeName);
throw new RuntimeException("Loader wait failed for " + request.containerName(), e);
throw new RuntimeException("Loader wait failed for " + request.containerName() + ": " + e.getMessage(), e);
} finally {
try {
dockerClient.removeContainerCmd(loaderId).withForce(true).exec();
@@ -159,8 +167,14 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
throw new RuntimeException("Loader exited " + exitCode + " for " + request.containerName());
}
// Phase 2: Main container — RO on the shared volume.
return createAndStartMain(request, volumeName);
// Phase 2: Main container — RO on the shared volume. Wrap in try/catch
// so a main-create failure cleans up the volume too (loader already gone).
try {
return createAndStartMain(request, volumeName);
} catch (Exception e) {
cleanupVolume(volumeName);
throw new RuntimeException("Main container create/start failed for " + request.containerName(), e);
}
}
private String createAndStartLoader(ContainerRequest request, String volumeName) {