diff --git a/.claude/rules/docker-orchestration.md b/.claude/rules/docker-orchestration.md index c79959f7..89ecbc72 100644 --- a/.claude/rules/docker-orchestration.md +++ b/.claude/rules/docker-orchestration.md @@ -23,6 +23,18 @@ When deployed via the cameleer-saas platform, this server orchestrates customer - **ContainerLogForwarder** (`app/runtime/ContainerLogForwarder.java`) — streams Docker container stdout/stderr to ClickHouse `logs` table with `source='container'`. Uses `docker logs --follow` per container, batches lines every 2s or 50 lines. Parses Docker timestamp prefix, infers log level via regex. `DeploymentExecutor` starts capture after each replica launches with the replica's `instanceId` (`{envSlug}-{appSlug}-{replicaIndex}-{generation}`); `DockerEventMonitor` stops capture on die/oom. 60-second max capture timeout with 30s cleanup scheduler. Thread pool of 10 daemon threads. Container logs use the same `instanceId` as the agent (set via `CAMELEER_AGENT_INSTANCEID` env var) for unified log correlation at the instance level. Instance-id changes per deployment — cross-deploy queries aggregate on `application + environment` (and optionally `replica_index`). - **StartupLogPanel** (`ui/src/components/StartupLogPanel.tsx`) — collapsible log panel rendered below `DeploymentProgress`. Queries `/api/v1/logs?source=container&application={appSlug}&environment={envSlug}`. Auto-polls every 3s while deployment is STARTING; shows green "live" badge during polling, red "stopped" badge on FAILED. Uses `useStartupLogs` hook and `LogViewer` (design system). +## Container Hardening (issue #152) + +`DockerRuntimeOrchestrator.startContainer` applies an unconditional hardening contract to every tenant container — Java 17 has no SecurityManager so the JVM is not a security boundary, and isolation must live below it. Defaults are fail-closed and have no opt-out: + +- `cap_drop` = every `Capability.values()` (effectively ALL — docker-java's enum has no `ALL` constant). Outbound TCP still works (no caps needed); raw sockets, ptrace, mounts, and bind <1024 are denied. +- `security_opt`: `no-new-privileges:true`, `apparmor=docker-default`. Default seccomp profile is applied implicitly when `seccomp=` is absent. +- `read_only` rootfs = true. +- `pids_limit` = 512 (`PIDS_LIMIT` constant). +- `tmpfs` mount: `/tmp` with `rw,nosuid,size=256m`. **No `noexec`** — Netty/tcnative, Snappy, LZ4, Zstd dlopen native libs from `/tmp` via `mmap(PROT_EXEC)` which `noexec` blocks. Issue #153 will add per-app `writeableVolumes` for stateful tenants (Kafka Streams etc.). + +**Sandboxed runtime auto-detect**: at construction the orchestrator calls `dockerClient.infoCmd().exec().getRuntimes()` and uses `runsc` (gVisor) when present. Override with `cameleer.server.runtime.dockerruntime` (e.g. `kata` to force Kata Containers, or any other registered runtime). Empty/blank = auto. The override always wins over auto-detect. The `DockerRuntimeOrchestrator(DockerClient, String)` constructor is the canonical entry point; the single-arg constructor exists only as a convenience for tests that don't need an override. + ## DeploymentExecutor Details Primary network for app containers is set via `CAMELEER_SERVER_RUNTIME_DOCKERNETWORK` env var (in SaaS mode: `cameleer-tenant-{slug}`); apps also connect to `cameleer-traefik` (routing) and `cameleer-env-{tenantId}-{envSlug}` (per-environment discovery) as additional networks. Resolves `runtimeType: auto` to concrete type from `AppVersion.detectedRuntimeType` at PRE_FLIGHT (fails deployment if unresolvable). Builds Docker entrypoint per runtime type (all JVM types use `-javaagent:/app/agent.jar -jar`, plain Java uses `-cp` with main class, native runs binary directly). Sets per-replica `CAMELEER_AGENT_INSTANCEID` env var to `{envSlug}-{appSlug}-{replicaIndex}-{generation}` so container logs and agent logs share the same instance identity. Sets `CAMELEER_AGENT_*` env vars from `ResolvedContainerConfig` (routeControlEnabled, replayEnabled, health port). These are startup-only agent properties — changing them requires redeployment. diff --git a/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DockerRuntimeOrchestrator.java b/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DockerRuntimeOrchestrator.java index 01c2f3e9..702a6602 100644 --- a/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DockerRuntimeOrchestrator.java +++ b/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/DockerRuntimeOrchestrator.java @@ -7,6 +7,7 @@ import com.github.dockerjava.api.DockerClient; import com.github.dockerjava.api.async.ResultCallback; import com.github.dockerjava.api.model.AccessMode; import com.github.dockerjava.api.model.Bind; +import com.github.dockerjava.api.model.Capability; import com.github.dockerjava.api.model.Frame; import com.github.dockerjava.api.model.HealthCheck; import com.github.dockerjava.api.model.HostConfig; @@ -25,12 +26,58 @@ import java.util.stream.Stream; public class DockerRuntimeOrchestrator implements RuntimeOrchestrator { private static final Logger log = LoggerFactory.getLogger(DockerRuntimeOrchestrator.class); + + /** Sandboxed runtime we prefer when the daemon has it registered. */ + private static final String SANDBOX_RUNTIME = "runsc"; + + /** Hard cap on processes/threads per tenant container. Spring Boot + Camel + * + a Kafka client comfortably fits in 512; raise via daemon-wide limits if + * a tenant legitimately needs more (and revisit the multi-tenancy threat + * model when that happens). */ + private static final long PIDS_LIMIT = 512L; + + /** /tmp must be writeable for JVM tmpdir, JIT scratch, and JNI native lib + * unpacking (Netty tcnative, Snappy, LZ4, Zstd all dlopen from here). + * `noexec` would block dlopen via mmap(PROT_EXEC) — keep it off. */ + private static final String TMPFS_TMP_OPTS = "rw,nosuid,size=256m"; + private final DockerClient dockerClient; + private final String dockerRuntime; private ContainerLogForwarder logForwarder; public DockerRuntimeOrchestrator(DockerClient dockerClient) { + this(dockerClient, ""); + } + + public DockerRuntimeOrchestrator(DockerClient dockerClient, String runtimeOverride) { this.dockerClient = dockerClient; + this.dockerRuntime = resolveRuntime(runtimeOverride); + } + + private String resolveRuntime(String override) { + if (override != null && !override.isBlank()) { + log.info("Container runtime forced to '{}' via cameleer.server.runtime.dockerruntime", override); + return override; + } + try { + Map runtimes = dockerClient.infoCmd().exec().getRuntimes(); + if (runtimes != null && runtimes.containsKey(SANDBOX_RUNTIME)) { + log.info("gVisor ({}) detected — sandboxed runtime will be used for tenant containers", + SANDBOX_RUNTIME); + return SANDBOX_RUNTIME; + } + } catch (Exception e) { + log.warn("Could not query Docker runtimes: {} — falling back to daemon default", e.getMessage()); + } + log.info("No sandboxed runtime detected — using Docker default (runc). Install gVisor on the host " + + "for tenant kernel isolation; see issue #152."); + return ""; + } + + /** Visible for tests / introspection. Empty string = let Docker pick its default. */ + String getDockerRuntime() { + return dockerRuntime; } public void setLogForwarder(ContainerLogForwarder logForwarder) { @@ -68,12 +115,36 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator { List envList = request.envVars().entrySet().stream() .map(e -> e.getKey() + "=" + e.getValue()).toList(); + // Tenant containers run untrusted user JVMs — every tenant JAR can call + // Runtime.exec, reflective bean dispatch, MVEL/Groovy templating. Java 17 + // has no SecurityManager, so isolation MUST live below the JVM. + // See issue #152 for the full threat model. Defaults are fail-closed: + // - cap_drop ALL: outbound TCP still works (no caps needed); raw sockets, + // ptrace, mounts, and bind <1024 are all denied. + // - no-new-privileges: setuid binaries cannot escalate. + // - apparmor=docker-default: Docker's stock MAC profile. + // Daemon's default seccomp profile is applied implicitly when no + // `seccomp=` override is set — no need to declare it. + // - readonly rootfs + /tmp tmpfs: persistence-via-write defeated; apps + // needing durable state declare writeableVolumes (issue #153). + // - pids-limit: fork bombs cannot exhaust the host PID namespace. HostConfig hostConfig = HostConfig.newHostConfig() .withMemory(request.memoryLimitBytes()) .withMemorySwap(request.memoryLimitBytes()) .withCpuShares(request.cpuShares()) .withNetworkMode(request.network()) - .withRestartPolicy(RestartPolicy.onFailureRestart(request.restartPolicyMaxRetries())); + .withRestartPolicy(RestartPolicy.onFailureRestart(request.restartPolicyMaxRetries())) + .withCapDrop(Capability.values()) + .withSecurityOpts(List.of( + "no-new-privileges:true", + "apparmor=docker-default")) + .withReadonlyRootfs(true) + .withPidsLimit(PIDS_LIMIT) + .withTmpFs(Map.of("/tmp", TMPFS_TMP_OPTS)); + + if (!dockerRuntime.isBlank()) { + hostConfig.withRuntime(dockerRuntime); + } // JAR mounting: volume mount (Docker-in-Docker) or bind mount (host path) if (request.jarVolumeName() != null && !request.jarVolumeName().isBlank()) { diff --git a/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/RuntimeOrchestratorAutoConfig.java b/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/RuntimeOrchestratorAutoConfig.java index 67f7b6c2..b1388f94 100644 --- a/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/RuntimeOrchestratorAutoConfig.java +++ b/cameleer-server-app/src/main/java/com/cameleer/server/app/runtime/RuntimeOrchestratorAutoConfig.java @@ -11,6 +11,7 @@ import com.github.dockerjava.zerodep.ZerodepDockerHttpClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -41,10 +42,12 @@ public class RuntimeOrchestratorAutoConfig { @Bean public RuntimeOrchestrator runtimeOrchestrator( @Autowired(required = false) DockerClient dockerClient, - @Autowired(required = false) ContainerLogForwarder logForwarder) { + @Autowired(required = false) ContainerLogForwarder logForwarder, + @Value("${cameleer.server.runtime.dockerruntime:}") String dockerRuntimeOverride) { if (dockerClient != null) { log.info("Docker socket detected - enabling Docker runtime orchestrator"); - DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator(dockerClient); + DockerRuntimeOrchestrator orchestrator = + new DockerRuntimeOrchestrator(dockerClient, dockerRuntimeOverride); if (logForwarder != null) { orchestrator.setLogForwarder(logForwarder); } diff --git a/cameleer-server-app/src/test/java/com/cameleer/server/app/runtime/DockerRuntimeOrchestratorHardeningTest.java b/cameleer-server-app/src/test/java/com/cameleer/server/app/runtime/DockerRuntimeOrchestratorHardeningTest.java new file mode 100644 index 00000000..b865a6dd --- /dev/null +++ b/cameleer-server-app/src/test/java/com/cameleer/server/app/runtime/DockerRuntimeOrchestratorHardeningTest.java @@ -0,0 +1,207 @@ +package com.cameleer.server.app.runtime; + +import com.cameleer.server.core.runtime.ContainerRequest; +import com.github.dockerjava.api.DockerClient; +import com.github.dockerjava.api.command.CreateContainerCmd; +import com.github.dockerjava.api.command.CreateContainerResponse; +import com.github.dockerjava.api.command.InfoCmd; +import com.github.dockerjava.api.command.StartContainerCmd; +import com.github.dockerjava.api.model.Capability; +import com.github.dockerjava.api.model.HostConfig; +import com.github.dockerjava.api.model.Info; +import org.junit.jupiter.api.Test; +import org.mockito.Answers; +import org.mockito.ArgumentCaptor; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Verifies the multi-tenant hardening contract from issue #152: every tenant + * container is launched with cap_drop ALL, no-new-privileges, AppArmor profile, + * read-only rootfs, a pids limit, and a writeable /tmp tmpfs. Also verifies the + * runsc auto-detect via `docker info` and the explicit override. + */ +class DockerRuntimeOrchestratorHardeningTest { + + private static ContainerRequest sampleRequest() { + return new ContainerRequest( + "tenant-app-0-abcd1234", + "registry.example/runtime:latest", + "/data/jars/app.jar", + null, null, + "tenant-net", + List.of(), + Map.of("CAMELEER_AGENT_APPLICATION", "myapp"), + Map.of(), + 512L * 1024 * 1024, + null, + 512, + null, + List.of(8080), + 9464, + "on-failure", + 3, + "spring-boot", + "", + null); + } + + private static DockerClient mockDockerClientWithRuntimes(Map runtimes) { + DockerClient dockerClient = mock(DockerClient.class); + InfoCmd infoCmd = mock(InfoCmd.class); + Info info = mock(Info.class); + when(dockerClient.infoCmd()).thenReturn(infoCmd); + when(infoCmd.exec()).thenReturn(info); + when(info.getRuntimes()).thenReturn((Map) runtimes); + return dockerClient; + } + + @Test + void resolveRuntime_picksRunscWhenDaemonHasIt() { + DockerClient dockerClient = mockDockerClientWithRuntimes(Map.of( + "runc", new Object(), + "runsc", new Object())); + + DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator(dockerClient, ""); + + assertThat(orchestrator.getDockerRuntime()).isEqualTo("runsc"); + } + + @Test + void resolveRuntime_returnsEmptyWhenSandboxedRuntimeMissing() { + DockerClient dockerClient = mockDockerClientWithRuntimes(Map.of("runc", new Object())); + + DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator(dockerClient, ""); + + assertThat(orchestrator.getDockerRuntime()).isEmpty(); + } + + @Test + void resolveRuntime_overrideWinsOverAutoDetect() { + DockerClient dockerClient = mockDockerClientWithRuntimes(Map.of( + "runc", new Object(), + "runsc", new Object())); + + DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator(dockerClient, "kata"); + + assertThat(orchestrator.getDockerRuntime()).isEqualTo("kata"); + } + + @Test + void resolveRuntime_blankOverrideTreatedAsAuto() { + DockerClient dockerClient = mockDockerClientWithRuntimes(Map.of("runsc", new Object())); + + DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator(dockerClient, " "); + + assertThat(orchestrator.getDockerRuntime()).isEqualTo("runsc"); + } + + @Test + void resolveRuntime_swallowsDockerInfoFailure() { + DockerClient dockerClient = mock(DockerClient.class); + InfoCmd infoCmd = mock(InfoCmd.class); + when(dockerClient.infoCmd()).thenReturn(infoCmd); + when(infoCmd.exec()).thenThrow(new RuntimeException("docker daemon unreachable")); + + DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator(dockerClient, ""); + + assertThat(orchestrator.getDockerRuntime()).isEmpty(); + } + + @Test + void startContainer_appliesHardeningContractToHostConfig() { + DockerClient dockerClient = mockDockerClientWithRuntimes(new HashMap<>()); + + CreateContainerCmd createCmd = mock(CreateContainerCmd.class, Answers.RETURNS_SELF); + when(dockerClient.createContainerCmd(anyString())).thenReturn(createCmd); + CreateContainerResponse createResponse = mock(CreateContainerResponse.class); + when(createResponse.getId()).thenReturn("container-id-1"); + when(createCmd.exec()).thenReturn(createResponse); + StartContainerCmd startCmd = mock(StartContainerCmd.class); + when(dockerClient.startContainerCmd(anyString())).thenReturn(startCmd); + + DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator(dockerClient, ""); + orchestrator.startContainer(sampleRequest()); + + ArgumentCaptor hostCaptor = ArgumentCaptor.forClass(HostConfig.class); + org.mockito.Mockito.verify(createCmd).withHostConfig(hostCaptor.capture()); + HostConfig hc = hostCaptor.getValue(); + + // cap_drop ALL — every capability the SDK knows about + assertThat(hc.getCapDrop()) + .as("cap_drop should drop every capability") + .containsExactlyInAnyOrder(Capability.values()); + + // no-new-privileges + apparmor stock profile + assertThat(hc.getSecurityOpts()) + .as("security_opt must include no-new-privileges and apparmor=docker-default") + .contains("no-new-privileges:true", "apparmor=docker-default"); + + // readonly rootfs + assertThat(hc.getReadonlyRootfs()) + .as("read_only rootfs must be enabled") + .isTrue(); + + // pids-limit applied + assertThat(hc.getPidsLimit()) + .as("pids_limit must be set to bound fork-bomb damage") + .isNotNull() + .isPositive(); + + // /tmp tmpfs writable, nosuid, no `noexec` (would break JNI dlopen) + assertThat(hc.getTmpFs()) + .as("/tmp must be a writeable tmpfs") + .containsKey("/tmp"); + String tmpOpts = hc.getTmpFs().get("/tmp"); + assertThat(tmpOpts).contains("rw").contains("nosuid").doesNotContain("noexec"); + } + + @Test + void startContainer_doesNotForceRuntimeWhenAutoDetectFindsNothing() { + DockerClient dockerClient = mockDockerClientWithRuntimes(Map.of("runc", new Object())); + CreateContainerCmd createCmd = mock(CreateContainerCmd.class, Answers.RETURNS_SELF); + when(dockerClient.createContainerCmd(anyString())).thenReturn(createCmd); + CreateContainerResponse createResponse = mock(CreateContainerResponse.class); + when(createResponse.getId()).thenReturn("c"); + when(createCmd.exec()).thenReturn(createResponse); + when(dockerClient.startContainerCmd(anyString())).thenReturn(mock(StartContainerCmd.class)); + + DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator(dockerClient, ""); + orchestrator.startContainer(sampleRequest()); + + ArgumentCaptor hostCaptor = ArgumentCaptor.forClass(HostConfig.class); + org.mockito.Mockito.verify(createCmd).withHostConfig(hostCaptor.capture()); + + // When daemon has no sandboxed runtime, we leave runtime null/empty so Docker picks its default. + String runtime = hostCaptor.getValue().getRuntime(); + assertThat(runtime == null || runtime.isBlank()) + .as("no runtime should be forced when sandboxed runtime unavailable") + .isTrue(); + } + + @Test + void startContainer_appliesRunscWhenAvailable() { + DockerClient dockerClient = mockDockerClientWithRuntimes(Map.of("runsc", new Object())); + CreateContainerCmd createCmd = mock(CreateContainerCmd.class, Answers.RETURNS_SELF); + when(dockerClient.createContainerCmd(anyString())).thenReturn(createCmd); + CreateContainerResponse createResponse = mock(CreateContainerResponse.class); + when(createResponse.getId()).thenReturn("c"); + when(createCmd.exec()).thenReturn(createResponse); + when(dockerClient.startContainerCmd(anyString())).thenReturn(mock(StartContainerCmd.class)); + + DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator(dockerClient, ""); + orchestrator.startContainer(sampleRequest()); + + ArgumentCaptor hostCaptor = ArgumentCaptor.forClass(HostConfig.class); + org.mockito.Mockito.verify(createCmd).withHostConfig(hostCaptor.capture()); + + assertThat(hostCaptor.getValue().getRuntime()).isEqualTo("runsc"); + } +}