diff --git a/.claude/rules/docker-orchestration.md b/.claude/rules/docker-orchestration.md index 65d95c79..9c9a83d0 100644 --- a/.claude/rules/docker-orchestration.md +++ b/.claude/rules/docker-orchestration.md @@ -48,6 +48,14 @@ When deployed via the cameleer-saas platform, this server orchestrates customer `DeploymentExecutor` generates the signed URL via `ArtifactDownloadTokenSigner.sign(appVersion.id(), Duration.ofSeconds(artifactTokenTtlSeconds))` and passes `appVersion.id()`, the URL, `appVersion.jarSizeBytes()`, and the loader image into `ContainerRequest`. The host filesystem is no longer involved at deploy time. +**Loader → server reachability**: the loader container hits the Cameleer server over HTTP from inside its +own Docker network. The signed URL is built from `cameleer.server.runtime.artifactbaseurl` (preferred), falling +back to `cameleer.server.runtime.serverurl`, falling back to `http://cameleer-server:8081`. The default works +in SaaS mode because `DockerNetworkManager` adds `cameleer-traefik` as an additional network for tenant +containers, and the server is reachable on that network via the `cameleer-server` DNS alias. For non-SaaS +topologies (server on a different network than tenants), set `CAMELEER_SERVER_RUNTIME_ARTIFACTBASEURL` +explicitly to a URL the loader can reach. + ## DeploymentExecutor Details Primary network for app containers is set via `CAMELEER_SERVER_RUNTIME_DOCKERNETWORK` env var (in SaaS mode: `cameleer-tenant-{slug}`); apps also connect to `cameleer-traefik` (routing) and `cameleer-env-{tenantId}-{envSlug}` (per-environment discovery) as additional networks. Resolves `runtimeType: auto` to concrete type from `AppVersion.detectedRuntimeType` at PRE_FLIGHT (fails deployment if unresolvable). Builds Docker entrypoint per runtime type (all JVM types use `-javaagent:/app/agent.jar -jar`, plain Java uses `-cp` with main class, native runs binary directly). Sets per-replica `CAMELEER_AGENT_INSTANCEID` env var to `{envSlug}-{appSlug}-{replicaIndex}-{generation}` so container logs and agent logs share the same instance identity. Sets `CAMELEER_AGENT_*` env vars from `ResolvedContainerConfig` (routeControlEnabled, replayEnabled, health port). These are startup-only agent properties — changing them requires redeployment. diff --git a/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DeploymentExecutor.java b/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DeploymentExecutor.java index 78841946..83757a4c 100644 --- a/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DeploymentExecutor.java +++ b/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DeploymentExecutor.java @@ -106,6 +106,17 @@ public class DeploymentExecutor { this.licenseUsageReader = licenseUsageReader; } + @jakarta.annotation.PostConstruct + public void validateArtifactBaseUrl() { + if (artifactBaseUrl.isBlank() && globalServerUrl.isBlank()) { + log.warn("Neither cameleer.server.runtime.artifactbaseurl nor cameleer.server.runtime.serverurl is set. " + + "Loader containers will fall back to http://cameleer-server:8081 — this requires the loader's " + + "Docker network to resolve `cameleer-server`. In SaaS mode the server is on `cameleer-traefik` " + + "which is added as an additional network for tenant containers, so this works. For other " + + "deployment topologies, set CAMELEER_SERVER_RUNTIME_ARTIFACTBASEURL explicitly."); + } + } + /** Deployment-scoped id suffix — distinguishes container names and * CAMELEER_AGENT_INSTANCEID across redeploys so old + new replicas can * coexist during a blue/green swap. First 8 chars of the deployment UUID. */ @@ -211,6 +222,7 @@ public class DeploymentExecutor { // === PULL IMAGE === updateStage(deployment.id(), DeployStage.PULL_IMAGE); orchestrator.pullImage(baseImage); + orchestrator.pullImage(loaderImage); // === CREATE NETWORKS === updateStage(deployment.id(), DeployStage.CREATE_NETWORK); diff --git a/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DockerRuntimeOrchestrator.java b/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DockerRuntimeOrchestrator.java index bd24d90b..6770685c 100644 --- a/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DockerRuntimeOrchestrator.java +++ b/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DockerRuntimeOrchestrator.java @@ -138,7 +138,15 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator { // the shared volume. Hardened identically to the main container, plus // RW bind on /app/jars and the artifact env vars the loader entrypoint // expects. We block on its exit code before bringing the main up. - String loaderId = createAndStartLoader(request, volumeName); + String loaderId; + try { + loaderId = createAndStartLoader(request, volumeName); + } catch (Exception e) { + // Volume created but loader never reached the wait/cleanup paths — clean up here. + cleanupVolume(volumeName); + throw new RuntimeException("Loader create/start failed for " + request.containerName(), e); + } + int exitCode; try { exitCode = dockerClient.waitContainerCmd(loaderId) @@ -146,7 +154,7 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator { .awaitStatusCode(LOADER_WAIT_TIMEOUT_SECONDS, TimeUnit.SECONDS); } catch (Exception e) { cleanup(loaderId, volumeName); - throw new RuntimeException("Loader wait failed for " + request.containerName(), e); + throw new RuntimeException("Loader wait failed for " + request.containerName() + ": " + e.getMessage(), e); } finally { try { dockerClient.removeContainerCmd(loaderId).withForce(true).exec(); @@ -159,8 +167,14 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator { throw new RuntimeException("Loader exited " + exitCode + " for " + request.containerName()); } - // Phase 2: Main container — RO on the shared volume. - return createAndStartMain(request, volumeName); + // Phase 2: Main container — RO on the shared volume. Wrap in try/catch + // so a main-create failure cleans up the volume too (loader already gone). + try { + return createAndStartMain(request, volumeName); + } catch (Exception e) { + cleanupVolume(volumeName); + throw new RuntimeException("Main container create/start failed for " + request.containerName(), e); + } } private String createAndStartLoader(ContainerRequest request, String volumeName) {