diff --git a/.claude/rules/docker-orchestration.md b/.claude/rules/docker-orchestration.md index 5de80ebd..37b147dc 100644 --- a/.claude/rules/docker-orchestration.md +++ b/.claude/rules/docker-orchestration.md @@ -41,7 +41,7 @@ When deployed via the cameleer-saas platform, this server orchestrates customer `startContainer` is now a two-phase op per replica: 1. **Volume create** — `cameleer-jars-{containerName}` named volume (per-replica, deterministic so cleanup in `removeContainer` can derive it). -2. **Loader container** — `loaderImage` (default `gitea.siegeln.net/cameleer/cameleer-runtime-loader:latest`), name `{containerName}-loader`, mount the volume **RW at `/app/jars`**, env vars `ARTIFACT_URL` + `ARTIFACT_EXPECTED_SIZE`. Loader downloads the JAR from the signed URL into the volume and exits 0. Orchestrator blocks on `waitContainerCmd().exec(WaitContainerResultCallback).awaitStatusCode(120, SECONDS)`. Loader container is removed in a `finally` block; on non-zero exit the volume is also removed and `RuntimeException` propagates so `DeploymentExecutor` marks the deployment FAILED. **Loader logs are captured before removal** (`captureLoaderLogs` — `logContainerCmd` with `withTail(50)`, capped at 4096 chars, 5s timeout) and appended to the thrown `RuntimeException` message as `". loader output: "`. Best-effort: log-capture failures are swallowed and don't mask the original exit. The loader image's Dockerfile pre-creates `/app/jars` owned by `loader:loader` (UID 1000) so the orchestrator's fresh named volume initialises with that ownership — without it the empty volume comes up as `root:root 0755` and wget exits 1 with "Permission denied". `LoaderHardeningIT` is the regression guard. +2. **Loader container** — `loaderImage` (default `gitea.siegeln.net/cameleer/cameleer-runtime-loader:latest`, **built and published by the cameleer-saas repo** at `docker/runtime-loader/`), name `{containerName}-loader`, mount the volume **RW at `/app/jars`**, env vars `ARTIFACT_URL` + `ARTIFACT_EXPECTED_SIZE`. Loader downloads the JAR from the signed URL into the volume and exits 0. Orchestrator blocks on `waitContainerCmd().exec(WaitContainerResultCallback).awaitStatusCode(120, SECONDS)`. Loader container is removed in a `finally` block; on non-zero exit the volume is also removed and `RuntimeException` propagates so `DeploymentExecutor` marks the deployment FAILED. **Loader logs are captured before removal** (`captureLoaderLogs` — `logContainerCmd` with `withTail(50)`, capped at 4096 chars, 5s timeout) and appended to the thrown `RuntimeException` message as `". loader output: "`. Best-effort: log-capture failures are swallowed and don't mask the original exit. The loader image's Dockerfile pre-creates `/app/jars` owned by `loader:loader` (UID 1000) so the orchestrator's fresh named volume initialises with that ownership — without it the empty volume comes up as `root:root 0755` and wget exits 1 with "Permission denied". `LoaderHardeningIT` is the cross-repo contract test (pulls the published `:latest` and asserts exit 0 under the orchestrator's hardening shape). 3. **Main container** — same hardening contract, mount the same volume **RO at `/app/jars`**, entrypoint reads `/app/jars/app.jar` (Spring Boot/Quarkus: `-jar /app/jars/app.jar`; plain Java: `-cp /app/jars/app.jar `; native: `exec /app/jars/app.jar`). `removeContainer(id)` derives the volume name from the inspected container name (Docker prefixes it with `/`) and removes the volume after the container removes — blue/green doesn't leak volumes. diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index e8140eb1..430654f5 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -30,29 +30,8 @@ jobs: credentials: username: cameleer password: ${{ secrets.REGISTRY_TOKEN }} - outputs: - loader_changed: ${{ steps.loader_changed.outputs.changed }} steps: - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Detect runtime-loader changes - id: loader_changed - run: | - BEFORE="${{ github.event.before }}" - if [ -z "$BEFORE" ] \ - || [ "$BEFORE" = "0000000000000000000000000000000000000000" ] \ - || ! git cat-file -e "$BEFORE^{commit}" 2>/dev/null; then - echo "No prior commit available — assuming loader changed." - echo "changed=true" >> "$GITHUB_OUTPUT" - elif git diff --name-only "$BEFORE" "${{ github.sha }}" | grep -q '^cameleer-runtime-loader/'; then - echo "cameleer-runtime-loader/ changed since $BEFORE." - echo "changed=true" >> "$GITHUB_OUTPUT" - else - echo "No changes under cameleer-runtime-loader/ — skipping image build." - echo "changed=false" >> "$GITHUB_OUTPUT" - fi - name: Configure Gitea Maven Registry run: | @@ -177,19 +156,6 @@ jobs: --push ui/ env: REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }} - - name: Build and push runtime-loader - if: needs.build.outputs.loader_changed == 'true' - run: | - TAGS="-t gitea.siegeln.net/cameleer/cameleer-runtime-loader:${{ github.sha }}" - for TAG in $IMAGE_TAGS; do - TAGS="$TAGS -t gitea.siegeln.net/cameleer/cameleer-runtime-loader:$TAG" - done - docker buildx build --platform linux/amd64 \ - $TAGS \ - --provenance=false \ - --push cameleer-runtime-loader/ - env: - REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }} - name: Cleanup local Docker run: docker system prune -af --filter "until=24h" if: always() @@ -203,7 +169,7 @@ jobs: if [ "$BRANCH_SLUG" != "main" ]; then KEEP_TAGS="$KEEP_TAGS branch-$BRANCH_SLUG" fi - for PKG in cameleer-server cameleer-server-ui cameleer-runtime-loader; do + for PKG in cameleer-server cameleer-server-ui; do curl -sf -H "$AUTH" "$API/packages/cameleer/container/$PKG" | \ jq -r '.[] | "\(.id) \(.version)"' | \ while read id version; do @@ -433,7 +399,7 @@ jobs: run: | API="https://gitea.siegeln.net/api/v1" AUTH="Authorization: token ${REGISTRY_TOKEN}" - for PKG in cameleer-server cameleer-server-ui cameleer-runtime-loader; do + for PKG in cameleer-server cameleer-server-ui; do # Delete branch-specific tag curl -sf -X DELETE -H "$AUTH" "$API/packages/cameleer/container/$PKG/branch-${BRANCH_SLUG}" || true done diff --git a/cameleer-runtime-loader/Dockerfile b/cameleer-runtime-loader/Dockerfile deleted file mode 100644 index 31db2ae1..00000000 --- a/cameleer-runtime-loader/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -# Tiny init-container image. No app code, no shell-injection surface — script -# only sees env vars set by the orchestrator. -FROM busybox:1.37-musl - -# Run as non-root (UID 1000 inside the container; with userns_mode this is -# remapped to host UID ~101000 — fully unprivileged on the host). -# Pre-create /app/jars owned by `loader` so the orchestrator's named-volume -# mount inherits that ownership at first init — without it the empty named -# volume comes up as root:root 0755 and wget can't write app.jar. -RUN adduser -D -u 1000 loader && mkdir -p /app/jars && chown -R loader:loader /app - -COPY entrypoint.sh /usr/local/bin/loader -RUN chmod +x /usr/local/bin/loader - -USER loader -WORKDIR /app -ENTRYPOINT ["/usr/local/bin/loader"] diff --git a/cameleer-runtime-loader/README.md b/cameleer-runtime-loader/README.md deleted file mode 100644 index f7482ecf..00000000 --- a/cameleer-runtime-loader/README.md +++ /dev/null @@ -1,23 +0,0 @@ -# cameleer-runtime-loader - -Init container that fetches the deployable JAR into a shared volume before the -main runtime container starts. Pairs with `DockerRuntimeOrchestrator` / -(future) K8s init-container deploys. - -## Build - -CI (`.gitea/workflows/ci.yml`, `docker` job) builds and pushes this image -automatically on pushes that change anything under `cameleer-runtime-loader/`. -Manual build for local testing: - - docker build -t gitea.siegeln.net/cameleer/cameleer-runtime-loader: . - docker push gitea.siegeln.net/cameleer/cameleer-runtime-loader: - -## Contract - -- Env: `ARTIFACT_URL` (signed download URL), `ARTIFACT_EXPECTED_SIZE` (bytes). -- Volume: writes `/app/jars/app.jar`. -- Exit 0 on success; non-zero on fetch/size failure. -- Runs as UID 1000 (loader user), drops all caps, read-only rootfs except `/app/jars`. - -See `docs/superpowers/plans/2026-04-27-init-container-jar-fetch.md`. diff --git a/cameleer-runtime-loader/entrypoint.sh b/cameleer-runtime-loader/entrypoint.sh deleted file mode 100644 index 2e2043e9..00000000 --- a/cameleer-runtime-loader/entrypoint.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/sh -# cameleer-runtime-loader: fetches one JAR from a signed URL into the shared -# /app/jars/ volume, verifies size, exits. Runs in the same hardened sandbox as -# the main container (cap_drop ALL, read-only rootfs, etc.) — only /app/jars/ -# is writeable. -set -eu - -: "${ARTIFACT_URL:?ARTIFACT_URL is required}" -: "${ARTIFACT_EXPECTED_SIZE:?ARTIFACT_EXPECTED_SIZE is required}" - -OUT=/app/jars/app.jar -mkdir -p /app/jars - -echo "loader: fetching artifact (expected $ARTIFACT_EXPECTED_SIZE bytes)" -# -q quiet, -O output, --tries=3 retry transient network blips, -# --timeout=30 cap stalls. wget exits non-zero on HTTP >=400. -wget -q --tries=3 --timeout=30 -O "$OUT" "$ARTIFACT_URL" - -actual=$(wc -c < "$OUT") -if [ "$actual" -ne "$ARTIFACT_EXPECTED_SIZE" ]; then - echo "loader: size mismatch — expected $ARTIFACT_EXPECTED_SIZE, got $actual" >&2 - exit 2 -fi - -echo "loader: artifact written to $OUT ($actual bytes)" diff --git a/cameleer-server-app/src/test/java/com/cameleer/server/app/runtime/LoaderHardeningIT.java b/cameleer-server-app/src/test/java/com/cameleer/server/app/runtime/LoaderHardeningIT.java index 8d058aac..8252202b 100644 --- a/cameleer-server-app/src/test/java/com/cameleer/server/app/runtime/LoaderHardeningIT.java +++ b/cameleer-server-app/src/test/java/com/cameleer/server/app/runtime/LoaderHardeningIT.java @@ -13,13 +13,11 @@ import org.testcontainers.containers.BindMode; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.Network; import org.testcontainers.containers.startupcheck.OneShotStartupCheckStrategy; -import org.testcontainers.images.builder.ImageFromDockerfile; import org.testcontainers.junit.jupiter.Testcontainers; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.time.Duration; import java.util.List; import java.util.Map; @@ -37,14 +35,19 @@ import static org.assertj.core.api.Assertions.assertThat; * {@link DockerRuntimeOrchestrator}'s {@code baseHardenedHostConfig()} + * loader-specific bind, against a real artifact server, and asserts the * loader writes the expected file. + * + *

Image source: the loader's Dockerfile lives in the cameleer-saas repo + * (`docker/runtime-loader/`) and is built+pushed by saas CI. This IT pulls + * the published `:latest` tag — running it locally requires a `docker login + * gitea.siegeln.net` (CI runners are pre-authenticated). The IT acts as a + * cross-repo contract test: cameleer-server's hardening expectations versus + * the saas-published artifact. */ @Testcontainers class LoaderHardeningIT { - private static final Path LOADER_DIR = Paths - .get(System.getProperty("user.dir")) - .getParent() - .resolve("cameleer-runtime-loader"); + private static final String LOADER_IMAGE = + "gitea.siegeln.net/cameleer/cameleer-runtime-loader:latest"; private static final int ARTIFACT_BYTES = 1024; @@ -54,7 +57,6 @@ class LoaderHardeningIT { private GenericContainer loader; private Path fixtureDir; private String volumeName; - private String loaderImageId; @BeforeEach void setUp() throws IOException { @@ -74,10 +76,6 @@ class LoaderHardeningIT { BindMode.READ_ONLY); fileServer.start(); - loaderImageId = new ImageFromDockerfile() - .withFileFromPath(".", LOADER_DIR) - .get(); - volumeName = "cameleer-loader-it-" + UUID.randomUUID().toString().substring(0, 8); dockerClient.createVolumeCmd().withName(volumeName).exec(); } @@ -107,7 +105,7 @@ class LoaderHardeningIT { // exited with status 0. Anything else (non-zero exit, timeout) throws // ContainerLaunchException — the assertion below is a belt-and-braces // explicit check on the resolved exit code. - loader = new GenericContainer<>(loaderImageId) + loader = new GenericContainer<>(LOADER_IMAGE) .withNetwork(network) .withEnv("ARTIFACT_URL", "http://file-server/artifact.jar") .withEnv("ARTIFACT_EXPECTED_SIZE", String.valueOf(ARTIFACT_BYTES))