Compare commits

13 Commits

Author SHA1 Message Date
hsiegeln
0dafad883e chore: bump @cameleer/design-system to 0.1.49
Some checks failed
CI / cleanup-branch (push) Has been skipped
CI / build (push) Failing after 37s
CI / docker (push) Has been skipped
CI / deploy (push) Has been skipped
CI / deploy-feature (push) Has been skipped
LogViewer now renders source badges (container/app/agent) on log entries.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:33:19 +02:00
hsiegeln
1287952387 feat: show startup logs panel below deployment progress
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:24:08 +02:00
hsiegeln
81dd81fc07 feat: add container source option to log source filters
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:21:34 +02:00
hsiegeln
e7732703a6 feat: add StartupLogPanel component for deployment startup logs
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:21:26 +02:00
hsiegeln
119cf912b8 feat: add useStartupLogs hook for container startup log polling
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:21:23 +02:00
hsiegeln
81f42d5409 feat: stop container log capture on Docker die/oom events
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:19:26 +02:00
hsiegeln
49c7de7082 feat: stop container log capture when agent SSE connects
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:19:17 +02:00
hsiegeln
4940bf3376 feat: start log capture when deployment replicas are created
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:18:56 +02:00
hsiegeln
de85a861c7 feat: wire ContainerLogForwarder into DockerRuntimeOrchestrator
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:17:54 +02:00
hsiegeln
729944d3ac feat: add ContainerLogForwarder for Docker log streaming to ClickHouse
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:15:49 +02:00
hsiegeln
9c65a3c3b9 feat: add log capture methods to RuntimeOrchestrator interface
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:14:31 +02:00
hsiegeln
8fabc2b308 docs: add container startup log capture implementation plan
12 tasks covering RuntimeOrchestrator extension, ContainerLogForwarder,
deployment/SSE/event monitor integration, and UI startup log panel.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:12:01 +02:00
hsiegeln
14215bebec docs: add container startup log capture design spec
Covers streaming Docker logs to ClickHouse until agent SSE connect,
deployment log panel UI, and source badge in general log views.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 23:04:24 +02:00
18 changed files with 1638 additions and 11 deletions

View File

@@ -3,7 +3,9 @@ package com.cameleer3.server.app.agent;
import com.cameleer3.server.app.config.AgentRegistryConfig;
import com.cameleer3.server.core.agent.AgentCommand;
import com.cameleer3.server.core.agent.AgentEventListener;
import com.cameleer3.server.core.agent.AgentInfo;
import com.cameleer3.server.core.agent.AgentRegistryService;
import com.cameleer3.server.core.runtime.RuntimeOrchestrator;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import jakarta.annotation.PostConstruct;
@@ -35,13 +37,16 @@ public class SseConnectionManager implements AgentEventListener {
private final AgentRegistryConfig config;
private final SsePayloadSigner ssePayloadSigner;
private final ObjectMapper objectMapper;
private final RuntimeOrchestrator runtimeOrchestrator;
public SseConnectionManager(AgentRegistryService registryService, AgentRegistryConfig config,
SsePayloadSigner ssePayloadSigner, ObjectMapper objectMapper) {
SsePayloadSigner ssePayloadSigner, ObjectMapper objectMapper,
RuntimeOrchestrator runtimeOrchestrator) {
this.registryService = registryService;
this.config = config;
this.ssePayloadSigner = ssePayloadSigner;
this.objectMapper = objectMapper;
this.runtimeOrchestrator = runtimeOrchestrator;
}
@PostConstruct
@@ -81,6 +86,13 @@ public class SseConnectionManager implements AgentEventListener {
});
log.info("SSE connection established for agent {}", agentId);
// Stop container log capture — agent is now online and will send its own logs
AgentInfo agent = registryService.findById(agentId);
if (agent != null) {
runtimeOrchestrator.stopLogCaptureByApp(agent.applicationId(), agent.environmentId());
}
return emitter;
}

View File

@@ -0,0 +1,260 @@
package com.cameleer3.server.app.runtime;
import com.cameleer3.common.model.LogEntry;
import com.cameleer3.server.app.search.ClickHouseLogStore;
import com.cameleer3.server.core.ingestion.BufferedLogEntry;
import com.github.dockerjava.api.DockerClient;
import com.github.dockerjava.api.async.ResultCallback;
import com.github.dockerjava.api.model.Frame;
import jakarta.annotation.PreDestroy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Instant;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Streams Docker container stdout/stderr to ClickHouse until the agent registers via SSE.
* One capture session per container, managed by container ID.
*/
public class ContainerLogForwarder {
private static final Logger log = LoggerFactory.getLogger(ContainerLogForwarder.class);
private static final int FLUSH_BATCH_SIZE = 50;
private static final long FLUSH_INTERVAL_MS = 2_000;
private static final long MAX_CAPTURE_DURATION_MS = 5 * 60 * 1_000;
private static final long CLEANUP_INTERVAL_MS = 30_000;
// Pattern to match Docker timestamp prefix: "2026-04-14T14:23:01.234567890Z "
private static final Pattern DOCKER_TS_PATTERN = Pattern.compile(
"^(\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z)\\s(.*)$", Pattern.DOTALL);
// Pattern to infer log level from Java log output
private static final Pattern LEVEL_PATTERN = Pattern.compile(
"\\b(ERROR|WARN|INFO|DEBUG|TRACE)\\b");
private final DockerClient dockerClient;
private final ClickHouseLogStore logStore;
private final ConcurrentHashMap<String, CaptureSession> sessions = new ConcurrentHashMap<>();
private final ExecutorService executor = Executors.newFixedThreadPool(10,
r -> { Thread t = new Thread(r, "log-capture"); t.setDaemon(true); return t; });
private final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(
r -> { Thread t = new Thread(r, "log-capture-cleanup"); t.setDaemon(true); return t; });
public ContainerLogForwarder(DockerClient dockerClient, ClickHouseLogStore logStore) {
this.dockerClient = dockerClient;
this.logStore = logStore;
scheduler.scheduleAtFixedRate(this::cleanupExpiredSessions,
CLEANUP_INTERVAL_MS, CLEANUP_INTERVAL_MS, TimeUnit.MILLISECONDS);
}
public void startCapture(String containerId, String appSlug, String envSlug, String tenantId) {
if (sessions.containsKey(containerId)) {
log.debug("Already capturing logs for container {}", containerId.substring(0, 12));
return;
}
CaptureSession session = new CaptureSession(containerId, appSlug, envSlug, tenantId);
if (sessions.putIfAbsent(containerId, session) != null) {
return; // another thread beat us
}
Future<?> future = executor.submit(() -> streamLogs(session));
session.future = future;
log.info("Started log capture for container {} (app={}, env={})",
containerId.substring(0, 12), appSlug, envSlug);
}
public void stopCapture(String containerId) {
CaptureSession session = sessions.remove(containerId);
if (session == null) return;
session.cancel();
flushBuffer(session);
log.info("Stopped log capture for container {} ({} lines captured)",
containerId.substring(0, 12), session.lineCount);
}
public void stopCaptureByApp(String appSlug, String envSlug) {
List<String> toRemove = new ArrayList<>();
for (Map.Entry<String, CaptureSession> entry : sessions.entrySet()) {
CaptureSession s = entry.getValue();
if (appSlug.equals(s.appSlug) && envSlug.equals(s.envSlug)) {
toRemove.add(entry.getKey());
}
}
for (String containerId : toRemove) {
stopCapture(containerId);
}
if (!toRemove.isEmpty()) {
log.info("Stopped log capture for app={} env={} ({} containers)",
appSlug, envSlug, toRemove.size());
}
}
@PreDestroy
public void shutdown() {
for (String containerId : new ArrayList<>(sessions.keySet())) {
stopCapture(containerId);
}
scheduler.shutdownNow();
executor.shutdownNow();
}
private void streamLogs(CaptureSession session) {
try {
session.callback = dockerClient.logContainerCmd(session.containerId)
.withFollowStream(true)
.withStdOut(true)
.withStdErr(true)
.withTimestamps(true)
.exec(new ResultCallback.Adapter<Frame>() {
@Override
public void onNext(Frame frame) {
if (session.cancelled) return;
String line = new String(frame.getPayload()).trim();
if (line.isEmpty()) return;
session.buffer.add(line);
session.lineCount++;
if (session.buffer.size() >= FLUSH_BATCH_SIZE
|| System.currentTimeMillis() - session.lastFlush >= FLUSH_INTERVAL_MS) {
flushBuffer(session);
}
}
@Override
public void onComplete() {
flushBuffer(session);
sessions.remove(session.containerId);
log.debug("Log stream completed for container {}",
session.containerId.substring(0, 12));
}
@Override
public void onError(Throwable throwable) {
flushBuffer(session);
sessions.remove(session.containerId);
log.debug("Log stream error for container {}: {}",
session.containerId.substring(0, 12), throwable.getMessage());
}
});
} catch (Exception e) {
sessions.remove(session.containerId);
log.warn("Failed to start log capture for container {}: {}",
session.containerId.substring(0, 12), e.getMessage());
}
}
private void flushBuffer(CaptureSession session) {
List<String> lines;
synchronized (session.buffer) {
if (session.buffer.isEmpty()) return;
lines = new ArrayList<>(session.buffer);
session.buffer.clear();
}
session.lastFlush = System.currentTimeMillis();
List<BufferedLogEntry> entries = new ArrayList<>(lines.size());
for (String line : lines) {
Instant timestamp = Instant.now();
String message = line;
Matcher tsMatcher = DOCKER_TS_PATTERN.matcher(line);
if (tsMatcher.matches()) {
try {
timestamp = Instant.parse(tsMatcher.group(1));
} catch (DateTimeParseException e) {
// keep Instant.now()
}
message = tsMatcher.group(2);
}
String level = inferLevel(message);
LogEntry logEntry = new LogEntry();
logEntry.setTimestamp(timestamp);
logEntry.setLevel(level);
logEntry.setMessage(message);
logEntry.setLoggerName("");
logEntry.setThreadName("");
logEntry.setStackTrace("");
logEntry.setMdc(Collections.emptyMap());
logEntry.setSource("container");
entries.add(new BufferedLogEntry(
session.tenantId, session.envSlug, session.containerId.substring(0, 12),
session.appSlug, logEntry));
}
try {
logStore.insertBufferedBatch(entries);
} catch (Exception e) {
log.warn("Failed to flush {} container log entries for {}: {}",
entries.size(), session.appSlug, e.getMessage());
}
}
private String inferLevel(String message) {
if (message.startsWith("\tat ") || message.startsWith("Caused by:")) {
return "ERROR";
}
Matcher m = LEVEL_PATTERN.matcher(message);
if (m.find()) {
return m.group(1);
}
return "INFO";
}
private void cleanupExpiredSessions() {
long now = System.currentTimeMillis();
for (Map.Entry<String, CaptureSession> entry : sessions.entrySet()) {
CaptureSession session = entry.getValue();
if (now - session.startedAt > MAX_CAPTURE_DURATION_MS) {
log.info("Log capture timeout for container {} (app={}), stopping",
entry.getKey().substring(0, 12), session.appSlug);
stopCapture(entry.getKey());
}
}
}
private static class CaptureSession {
final String containerId;
final String appSlug;
final String envSlug;
final String tenantId;
final long startedAt = System.currentTimeMillis();
final List<String> buffer = Collections.synchronizedList(new ArrayList<>());
volatile long lastFlush = System.currentTimeMillis();
volatile long lineCount = 0;
volatile boolean cancelled = false;
volatile Future<?> future;
volatile ResultCallback.Adapter<Frame> callback;
CaptureSession(String containerId, String appSlug, String envSlug, String tenantId) {
this.containerId = containerId;
this.appSlug = appSlug;
this.envSlug = envSlug;
this.tenantId = tenantId;
}
void cancel() {
cancelled = true;
if (callback != null) {
try { callback.close(); } catch (Exception e) { /* ignore */ }
}
if (future != null) {
future.cancel(true);
}
}
}
}

View File

@@ -199,6 +199,8 @@ public class DeploymentExecutor {
}
}
orchestrator.startLogCapture(containerId, app.slug(), env.slug(), tenantId);
replicaStates.add(Map.of(
"index", i,
"containerId", containerId,

View File

@@ -13,4 +13,7 @@ public class DisabledRuntimeOrchestrator implements RuntimeOrchestrator {
@Override public void removeContainer(String id) { throw new UnsupportedOperationException("Runtime management disabled"); }
@Override public ContainerStatus getContainerStatus(String id) { return ContainerStatus.notFound(); }
@Override public Stream<String> getLogs(String id, int tail) { return Stream.empty(); }
@Override public void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {}
@Override public void stopLogCapture(String containerId) {}
@Override public void stopLogCaptureByApp(String appSlug, String envSlug) {}
}

View File

@@ -98,6 +98,10 @@ public class DockerEventMonitor {
}
replicas.set(i, updated);
changed = true;
// Stop log capture for this container — it's dead or stopped
if ("die".equals(action) || "oom".equals(action) || "stop".equals(action)) {
runtimeOrchestrator.stopLogCapture(containerId);
}
break;
}
}

View File

@@ -31,6 +31,12 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
private static final Logger log = LoggerFactory.getLogger(DockerRuntimeOrchestrator.class);
private DockerClient dockerClient;
private ContainerLogForwarder logForwarder;
public void setLogForwarder(ContainerLogForwarder logForwarder) {
this.logForwarder = logForwarder;
}
@PostConstruct
public void init() {
var config = DefaultDockerClientConfig.createDefaultConfigBuilder()
@@ -196,4 +202,25 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
}
return logLines.stream();
}
@Override
public void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {
if (logForwarder != null) {
logForwarder.startCapture(containerId, appSlug, envSlug, tenantId);
}
}
@Override
public void stopLogCapture(String containerId) {
if (logForwarder != null) {
logForwarder.stopCapture(containerId);
}
}
@Override
public void stopLogCaptureByApp(String appSlug, String envSlug) {
if (logForwarder != null) {
logForwarder.stopCaptureByApp(appSlug, envSlug);
}
}
}

View File

@@ -1,5 +1,6 @@
package com.cameleer3.server.app.runtime;
import com.cameleer3.server.app.search.ClickHouseLogStore;
import com.cameleer3.server.app.storage.PostgresDeploymentRepository;
import com.cameleer3.server.core.runtime.DeploymentRepository;
import com.cameleer3.server.core.runtime.RuntimeOrchestrator;
@@ -17,13 +18,17 @@ public class RuntimeOrchestratorAutoConfig {
private static final Logger log = LoggerFactory.getLogger(RuntimeOrchestratorAutoConfig.class);
@Bean
public RuntimeOrchestrator runtimeOrchestrator() {
// Auto-detect: Docker socket available?
public RuntimeOrchestrator runtimeOrchestrator(
@org.springframework.beans.factory.annotation.Autowired(required = false)
ContainerLogForwarder logForwarder) {
if (Files.exists(Path.of("/var/run/docker.sock"))) {
log.info("Docker socket detected - enabling Docker runtime orchestrator");
return new DockerRuntimeOrchestrator();
DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator();
if (logForwarder != null) {
orchestrator.setLogForwarder(logForwarder);
}
return orchestrator;
}
// TODO: K8s detection (check for service account token)
log.info("No Docker socket or K8s detected - runtime management disabled (observability-only mode)");
return new DisabledRuntimeOrchestrator();
}
@@ -44,4 +49,13 @@ public class RuntimeOrchestratorAutoConfig {
}
return null;
}
@Bean
public ContainerLogForwarder containerLogForwarder(RuntimeOrchestrator orchestrator,
ClickHouseLogStore logStore) {
if (orchestrator instanceof DockerRuntimeOrchestrator docker) {
return new ContainerLogForwarder(docker.getDockerClient(), logStore);
}
return null;
}
}

View File

@@ -9,4 +9,13 @@ public interface RuntimeOrchestrator {
void removeContainer(String containerId);
ContainerStatus getContainerStatus(String containerId);
Stream<String> getLogs(String containerId, int tailLines);
/** Start streaming container logs to ClickHouse. */
default void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {}
/** Stop log capture for a specific container (e.g., on die/oom). */
default void stopLogCapture(String containerId) {}
/** Stop log capture for all containers matching this app+env (e.g., on agent SSE connect). */
default void stopLogCaptureByApp(String appSlug, String envSlug) {}
}

View File

@@ -0,0 +1,972 @@
# Container Startup Log Capture — Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Capture Docker container stdout/stderr from startup until the Cameleer agent registers via SSE, storing logs in ClickHouse for display in deployment views and general log search.
**Architecture:** Extend `RuntimeOrchestrator` with log capture methods. A new `ContainerLogForwarder` component (Docker-specific) streams container output using `docker logs --follow`, batches lines, and flushes them to ClickHouse via the existing `insertBufferedBatch()` path with `source = 'container'`. Capture stops on SSE connect or container death. UI adds a startup log panel below `DeploymentProgress` and source badges in log views.
**Tech Stack:** Java 17, Spring Boot 3.4, Docker Java client (zerodep), ClickHouse, React, TanStack Query
**Spec:** `docs/superpowers/specs/2026-04-14-container-startup-log-capture-design.md`
---
## File Map
### New Files
| File | Responsibility |
|------|----------------|
| `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/ContainerLogForwarder.java` | Docker log streaming, batching, ClickHouse flush, session lifecycle |
| `ui/src/components/StartupLogPanel.tsx` | Collapsible log panel for deployment startup logs |
| `ui/src/components/StartupLogPanel.module.css` | Styles for startup log panel |
### Modified Files
| File | Change |
|------|--------|
| `cameleer3-server-core/.../RuntimeOrchestrator.java` | Add 3 default no-op methods |
| `cameleer3-server-app/.../DockerRuntimeOrchestrator.java` | Implement log capture, delegate to forwarder |
| `cameleer3-server-app/.../DisabledRuntimeOrchestrator.java` | Override no-ops (explicit) |
| `cameleer3-server-app/.../RuntimeOrchestratorAutoConfig.java` | Create and wire `ContainerLogForwarder` bean |
| `cameleer3-server-app/.../DeploymentExecutor.java` | Call `startLogCapture()` after container start |
| `cameleer3-server-app/.../SseConnectionManager.java` | Call `stopLogCaptureByApp()` on SSE connect |
| `cameleer3-server-app/.../DockerEventMonitor.java` | Call `stopLogCapture()` on die/oom |
| `ui/src/pages/AppsTab/AppsTab.tsx` | Render `StartupLogPanel` below `DeploymentProgress` |
| `ui/src/pages/AgentInstance/AgentInstance.tsx` | Add `container` to `LOG_SOURCE_ITEMS` |
| `ui/src/api/queries/logs.ts` | Add `useStartupLogs` hook |
---
### Task 1: Extend RuntimeOrchestrator Interface
**Files:**
- Modify: `cameleer3-server-core/src/main/java/com/cameleer3/server/core/runtime/RuntimeOrchestrator.java`
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DisabledRuntimeOrchestrator.java`
- [ ] **Step 1: Add default methods to RuntimeOrchestrator**
In `cameleer3-server-core/src/main/java/com/cameleer3/server/core/runtime/RuntimeOrchestrator.java`, add three default no-op methods after the existing `getLogs` method (line 11):
```java
package com.cameleer3.server.core.runtime;
import java.util.stream.Stream;
public interface RuntimeOrchestrator {
boolean isEnabled();
String startContainer(ContainerRequest request);
void stopContainer(String containerId);
void removeContainer(String containerId);
ContainerStatus getContainerStatus(String containerId);
Stream<String> getLogs(String containerId, int tailLines);
/** Start streaming container logs to ClickHouse. */
default void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {}
/** Stop log capture for a specific container (e.g., on die/oom). */
default void stopLogCapture(String containerId) {}
/** Stop log capture for all containers matching this app+env (e.g., on agent SSE connect). */
default void stopLogCaptureByApp(String appSlug, String envSlug) {}
}
```
- [ ] **Step 2: Add explicit overrides to DisabledRuntimeOrchestrator**
In `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DisabledRuntimeOrchestrator.java`, add explicit no-op overrides after line 15:
```java
@Override public void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {}
@Override public void stopLogCapture(String containerId) {}
@Override public void stopLogCaptureByApp(String appSlug, String envSlug) {}
```
- [ ] **Step 3: Verify compilation**
Run: `mvn clean compile -pl cameleer3-server-core,cameleer3-server-app -q`
Expected: BUILD SUCCESS
- [ ] **Step 4: Commit**
```bash
git add cameleer3-server-core/src/main/java/com/cameleer3/server/core/runtime/RuntimeOrchestrator.java \
cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DisabledRuntimeOrchestrator.java
git commit -m "feat: add log capture methods to RuntimeOrchestrator interface"
```
---
### Task 2: Create ContainerLogForwarder
**Files:**
- Create: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/ContainerLogForwarder.java`
- [ ] **Step 1: Create ContainerLogForwarder class**
```java
package com.cameleer3.server.app.runtime;
import com.cameleer3.common.model.LogEntry;
import com.cameleer3.server.app.search.ClickHouseLogStore;
import com.cameleer3.server.core.ingestion.BufferedLogEntry;
import com.github.dockerjava.api.DockerClient;
import com.github.dockerjava.api.async.ResultCallback;
import com.github.dockerjava.api.model.Frame;
import jakarta.annotation.PreDestroy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Instant;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Streams Docker container stdout/stderr to ClickHouse until the agent registers via SSE.
* One capture session per container, managed by container ID.
*/
public class ContainerLogForwarder {
private static final Logger log = LoggerFactory.getLogger(ContainerLogForwarder.class);
private static final int FLUSH_BATCH_SIZE = 50;
private static final long FLUSH_INTERVAL_MS = 2_000;
private static final long MAX_CAPTURE_DURATION_MS = 5 * 60 * 1_000;
private static final long CLEANUP_INTERVAL_MS = 30_000;
// Pattern to match Docker timestamp prefix: "2026-04-14T14:23:01.234567890Z "
private static final Pattern DOCKER_TS_PATTERN = Pattern.compile(
"^(\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z)\\s(.*)$", Pattern.DOTALL);
// Pattern to infer log level from Java log output
private static final Pattern LEVEL_PATTERN = Pattern.compile(
"\\b(ERROR|WARN|INFO|DEBUG|TRACE)\\b");
private final DockerClient dockerClient;
private final ClickHouseLogStore logStore;
private final ConcurrentHashMap<String, CaptureSession> sessions = new ConcurrentHashMap<>();
private final ExecutorService executor = Executors.newFixedThreadPool(10,
r -> { Thread t = new Thread(r, "log-capture"); t.setDaemon(true); return t; });
private final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(
r -> { Thread t = new Thread(r, "log-capture-cleanup"); t.setDaemon(true); return t; });
public ContainerLogForwarder(DockerClient dockerClient, ClickHouseLogStore logStore) {
this.dockerClient = dockerClient;
this.logStore = logStore;
scheduler.scheduleAtFixedRate(this::cleanupExpiredSessions,
CLEANUP_INTERVAL_MS, CLEANUP_INTERVAL_MS, TimeUnit.MILLISECONDS);
}
public void startCapture(String containerId, String appSlug, String envSlug, String tenantId) {
if (sessions.containsKey(containerId)) {
log.debug("Already capturing logs for container {}", containerId.substring(0, 12));
return;
}
CaptureSession session = new CaptureSession(containerId, appSlug, envSlug, tenantId);
if (sessions.putIfAbsent(containerId, session) != null) {
return; // another thread beat us
}
Future<?> future = executor.submit(() -> streamLogs(session));
session.future = future;
log.info("Started log capture for container {} (app={}, env={})",
containerId.substring(0, 12), appSlug, envSlug);
}
public void stopCapture(String containerId) {
CaptureSession session = sessions.remove(containerId);
if (session == null) return;
session.cancel();
flushBuffer(session);
log.info("Stopped log capture for container {} ({} lines captured)",
containerId.substring(0, 12), session.lineCount);
}
public void stopCaptureByApp(String appSlug, String envSlug) {
List<String> toRemove = new ArrayList<>();
for (Map.Entry<String, CaptureSession> entry : sessions.entrySet()) {
CaptureSession s = entry.getValue();
if (appSlug.equals(s.appSlug) && envSlug.equals(s.envSlug)) {
toRemove.add(entry.getKey());
}
}
for (String containerId : toRemove) {
stopCapture(containerId);
}
if (!toRemove.isEmpty()) {
log.info("Stopped log capture for app={} env={} ({} containers)",
appSlug, envSlug, toRemove.size());
}
}
@PreDestroy
public void shutdown() {
for (String containerId : new ArrayList<>(sessions.keySet())) {
stopCapture(containerId);
}
scheduler.shutdownNow();
executor.shutdownNow();
}
private void streamLogs(CaptureSession session) {
try {
session.callback = dockerClient.logContainerCmd(session.containerId)
.withFollowStream(true)
.withStdOut(true)
.withStdErr(true)
.withTimestamps(true)
.exec(new ResultCallback.Adapter<Frame>() {
@Override
public void onNext(Frame frame) {
if (session.cancelled) return;
String line = new String(frame.getPayload()).trim();
if (line.isEmpty()) return;
session.buffer.add(line);
session.lineCount++;
if (session.buffer.size() >= FLUSH_BATCH_SIZE
|| System.currentTimeMillis() - session.lastFlush >= FLUSH_INTERVAL_MS) {
flushBuffer(session);
}
}
@Override
public void onComplete() {
flushBuffer(session);
sessions.remove(session.containerId);
log.debug("Log stream completed for container {}",
session.containerId.substring(0, 12));
}
@Override
public void onError(Throwable throwable) {
flushBuffer(session);
sessions.remove(session.containerId);
log.debug("Log stream error for container {}: {}",
session.containerId.substring(0, 12), throwable.getMessage());
}
});
} catch (Exception e) {
sessions.remove(session.containerId);
log.warn("Failed to start log capture for container {}: {}",
session.containerId.substring(0, 12), e.getMessage());
}
}
private synchronized void flushBuffer(CaptureSession session) {
List<String> lines;
synchronized (session.buffer) {
if (session.buffer.isEmpty()) return;
lines = new ArrayList<>(session.buffer);
session.buffer.clear();
}
session.lastFlush = System.currentTimeMillis();
List<BufferedLogEntry> entries = new ArrayList<>(lines.size());
for (String line : lines) {
Instant timestamp = Instant.now();
String message = line;
Matcher tsMatcher = DOCKER_TS_PATTERN.matcher(line);
if (tsMatcher.matches()) {
try {
timestamp = Instant.parse(tsMatcher.group(1));
} catch (DateTimeParseException e) {
// keep Instant.now()
}
message = tsMatcher.group(2);
}
String level = inferLevel(message);
LogEntry logEntry = new LogEntry();
logEntry.setTimestamp(timestamp);
logEntry.setLevel(level);
logEntry.setMessage(message);
logEntry.setLoggerName("");
logEntry.setThreadName("");
logEntry.setStackTrace("");
logEntry.setMdc(Collections.emptyMap());
logEntry.setSource("container");
entries.add(new BufferedLogEntry(
session.tenantId, session.envSlug, session.containerId.substring(0, 12),
session.appSlug, logEntry));
}
try {
logStore.insertBufferedBatch(entries);
} catch (Exception e) {
log.warn("Failed to flush {} container log entries for {}: {}",
entries.size(), session.appSlug, e.getMessage());
}
}
private String inferLevel(String message) {
if (message.startsWith("\tat ") || message.startsWith("Caused by:")) {
return "ERROR";
}
Matcher m = LEVEL_PATTERN.matcher(message);
if (m.find()) {
return m.group(1);
}
return "INFO";
}
private void cleanupExpiredSessions() {
long now = System.currentTimeMillis();
for (Map.Entry<String, CaptureSession> entry : sessions.entrySet()) {
CaptureSession session = entry.getValue();
if (now - session.startedAt > MAX_CAPTURE_DURATION_MS) {
log.info("Log capture timeout for container {} (app={}), stopping",
entry.getKey().substring(0, 12), session.appSlug);
stopCapture(entry.getKey());
}
}
}
private static class CaptureSession {
final String containerId;
final String appSlug;
final String envSlug;
final String tenantId;
final long startedAt = System.currentTimeMillis();
final List<String> buffer = Collections.synchronizedList(new ArrayList<>());
volatile long lastFlush = System.currentTimeMillis();
volatile long lineCount = 0;
volatile boolean cancelled = false;
volatile Future<?> future;
volatile ResultCallback.Adapter<Frame> callback;
CaptureSession(String containerId, String appSlug, String envSlug, String tenantId) {
this.containerId = containerId;
this.appSlug = appSlug;
this.envSlug = envSlug;
this.tenantId = tenantId;
}
void cancel() {
cancelled = true;
if (callback != null) {
try { callback.close(); } catch (Exception e) { /* ignore */ }
}
if (future != null) {
future.cancel(true);
}
}
}
}
```
- [ ] **Step 2: Verify compilation**
Run: `mvn clean compile -pl cameleer3-server-app -q`
Expected: BUILD SUCCESS
- [ ] **Step 3: Commit**
```bash
git add cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/ContainerLogForwarder.java
git commit -m "feat: add ContainerLogForwarder for Docker log streaming to ClickHouse"
```
---
### Task 3: Wire ContainerLogForwarder and Implement in DockerRuntimeOrchestrator
**Files:**
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerRuntimeOrchestrator.java`
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/RuntimeOrchestratorAutoConfig.java`
- [ ] **Step 1: Add ContainerLogForwarder field and log capture methods to DockerRuntimeOrchestrator**
In `DockerRuntimeOrchestrator.java`, add a field and setter after the `dockerClient` field (line 32), and implement the three methods after `getLogs()` (line 198):
After line 32 (`private DockerClient dockerClient;`), add:
```java
private ContainerLogForwarder logForwarder;
public void setLogForwarder(ContainerLogForwarder logForwarder) {
this.logForwarder = logForwarder;
}
```
After line 198 (closing brace of `getLogs()`), add:
```java
@Override
public void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {
if (logForwarder != null) {
logForwarder.startCapture(containerId, appSlug, envSlug, tenantId);
}
}
@Override
public void stopLogCapture(String containerId) {
if (logForwarder != null) {
logForwarder.stopCapture(containerId);
}
}
@Override
public void stopLogCaptureByApp(String appSlug, String envSlug) {
if (logForwarder != null) {
logForwarder.stopCaptureByApp(appSlug, envSlug);
}
}
```
- [ ] **Step 2: Wire ContainerLogForwarder in RuntimeOrchestratorAutoConfig**
In `RuntimeOrchestratorAutoConfig.java`, add a new bean method and inject it into the orchestrator. Replace the `runtimeOrchestrator()` bean (lines 19-29):
```java
@Bean
public RuntimeOrchestrator runtimeOrchestrator(
@org.springframework.beans.factory.annotation.Autowired(required = false)
ContainerLogForwarder logForwarder) {
// Auto-detect: Docker socket available?
if (Files.exists(Path.of("/var/run/docker.sock"))) {
log.info("Docker socket detected - enabling Docker runtime orchestrator");
DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator();
if (logForwarder != null) {
orchestrator.setLogForwarder(logForwarder);
}
return orchestrator;
}
// TODO: K8s detection (check for service account token)
log.info("No Docker socket or K8s detected - runtime management disabled (observability-only mode)");
return new DisabledRuntimeOrchestrator();
}
```
Add the `ContainerLogForwarder` bean after `dockerEventMonitor()` (after line 46):
```java
@Bean
public ContainerLogForwarder containerLogForwarder(RuntimeOrchestrator orchestrator,
ClickHouseLogStore logStore) {
if (orchestrator instanceof DockerRuntimeOrchestrator docker) {
return new ContainerLogForwarder(docker.getDockerClient(), logStore);
}
return null;
}
```
Add the import at the top:
```java
import com.cameleer3.server.app.search.ClickHouseLogStore;
```
- [ ] **Step 3: Verify compilation**
Run: `mvn clean compile -pl cameleer3-server-app -q`
Expected: BUILD SUCCESS
- [ ] **Step 4: Commit**
```bash
git add cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerRuntimeOrchestrator.java \
cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/RuntimeOrchestratorAutoConfig.java
git commit -m "feat: wire ContainerLogForwarder into DockerRuntimeOrchestrator"
```
---
### Task 4: Integrate Start Capture in DeploymentExecutor
**Files:**
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DeploymentExecutor.java`
- [ ] **Step 1: Add startLogCapture call after each replica starts**
In `DeploymentExecutor.java`, inside the replica loop (after line 200, after the `connectContainer` loop), add a call to start log capture. Insert after the closing brace of the `for (String net : additionalNets)` block (line 200) and before the `replicaStates.add(...)` call (line 202):
```java
orchestrator.startLogCapture(containerId, app.slug(), env.slug(), tenantId);
```
This goes right between the network connection loop and the `replicaStates.add(Map.of(...))` call, so the full context looks like:
```java
// Connect to additional networks after container is started
for (String net : additionalNets) {
if (networkManager != null) {
networkManager.connectContainer(containerId, net);
}
}
orchestrator.startLogCapture(containerId, app.slug(), env.slug(), tenantId);
replicaStates.add(Map.of(
"index", i,
"containerId", containerId,
"containerName", containerName,
"status", "STARTING"
));
```
- [ ] **Step 2: Verify compilation**
Run: `mvn clean compile -pl cameleer3-server-app -q`
Expected: BUILD SUCCESS
- [ ] **Step 3: Commit**
```bash
git add cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DeploymentExecutor.java
git commit -m "feat: start log capture when deployment replicas are created"
```
---
### Task 5: Integrate Stop Capture in SseConnectionManager
**Files:**
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/agent/SseConnectionManager.java`
- [ ] **Step 1: Inject RuntimeOrchestrator and call stopLogCaptureByApp on SSE connect**
In `SseConnectionManager.java`, add the `RuntimeOrchestrator` dependency. Modify the constructor (lines 39-45) to accept it:
```java
private final RuntimeOrchestrator runtimeOrchestrator;
public SseConnectionManager(AgentRegistryService registryService, AgentRegistryConfig config,
SsePayloadSigner ssePayloadSigner, ObjectMapper objectMapper,
RuntimeOrchestrator runtimeOrchestrator) {
this.registryService = registryService;
this.config = config;
this.ssePayloadSigner = ssePayloadSigner;
this.objectMapper = objectMapper;
this.runtimeOrchestrator = runtimeOrchestrator;
}
```
Add the import at the top:
```java
import com.cameleer3.server.core.runtime.RuntimeOrchestrator;
import com.cameleer3.server.core.agent.AgentInfo;
```
In the `connect()` method (line 60), after the `log.info("SSE connection established...")` (line 83), add the stop capture call:
```java
// Stop container log capture — agent is now online and will send its own logs
AgentInfo agent = registryService.findById(agentId);
if (agent != null) {
runtimeOrchestrator.stopLogCaptureByApp(agent.applicationId(), agent.environmentId());
}
```
- [ ] **Step 2: Verify compilation**
Run: `mvn clean compile -pl cameleer3-server-app -q`
Expected: BUILD SUCCESS
- [ ] **Step 3: Commit**
```bash
git add cameleer3-server-app/src/main/java/com/cameleer3/server/app/agent/SseConnectionManager.java
git commit -m "feat: stop container log capture when agent SSE connects"
```
---
### Task 6: Integrate Stop Capture in DockerEventMonitor
**Files:**
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerEventMonitor.java`
- [ ] **Step 1: Call stopLogCapture on die/oom/stop events**
In `DockerEventMonitor.java`, inside the `handleEvent()` method, after the replica state is updated and before the `changed` flag check (after line 102, inside the `switch` block), add a call to stop log capture. The cleanest place is right after the `replicas.set(i, updated)` line (line 99), within the same `if` block:
Insert after line 101 (`break;`) and before line 102 (`}`):
```java
// Stop log capture for this container — it's dead or stopped
if ("die".equals(action) || "oom".equals(action) || "stop".equals(action)) {
runtimeOrchestrator.stopLogCapture(containerId);
}
```
- [ ] **Step 2: Verify compilation**
Run: `mvn clean compile -pl cameleer3-server-app -q`
Expected: BUILD SUCCESS
- [ ] **Step 3: Commit**
```bash
git add cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerEventMonitor.java
git commit -m "feat: stop container log capture on Docker die/oom events"
```
---
### Task 7: Full Backend Compilation and Verification
- [ ] **Step 1: Run full compile across all modules**
Run: `mvn clean compile test-compile -q`
Expected: BUILD SUCCESS
- [ ] **Step 2: Verify no missing imports or type errors**
If compilation fails, fix issues and re-run. Common issues to watch for:
- Circular bean dependency between `RuntimeOrchestrator` and `ContainerLogForwarder` — the `@Autowired(required = false)` pattern in the auto-config handles this
- Missing `AgentInfo` import or `findById()` method on `AgentRegistryService` — verify the method exists
- [ ] **Step 3: Commit any fixes**
Only if Step 2 required changes.
---
### Task 8: Add useStartupLogs Hook
**Files:**
- Modify: `ui/src/api/queries/logs.ts`
- [ ] **Step 1: Add environment to LogSearchParams and fetchLogs**
In `ui/src/api/queries/logs.ts`, add `environment` to the `LogSearchParams` interface (after the `source` field, line 33):
```typescript
export interface LogSearchParams {
q?: string;
level?: string;
application?: string;
agentId?: string;
source?: string;
environment?: string;
exchangeId?: string;
logger?: string;
from?: string;
to?: string;
cursor?: string;
limit?: number;
sort?: 'asc' | 'desc';
}
```
In the `fetchLogs` function, add the environment param after the source line (after line 50):
```typescript
if (params.environment) urlParams.set('environment', params.environment);
```
- [ ] **Step 2: Add useStartupLogs hook**
In `ui/src/api/queries/logs.ts`, add the following hook after the `useApplicationLogs` function (after line 128):
```typescript
/**
* Fetches container startup logs for a deployment.
* Polls every 3s while the deployment is STARTING, stops when RUNNING/FAILED.
*/
export function useStartupLogs(
application: string | undefined,
environment: string | undefined,
deployCreatedAt: string | undefined,
isStarting: boolean,
) {
const params: LogSearchParams = {
application: application || undefined,
environment: environment || undefined,
source: 'container',
from: deployCreatedAt || undefined,
sort: 'asc',
limit: 500,
};
return useLogs(params, {
enabled: !!application && !!deployCreatedAt,
refetchInterval: isStarting ? 3_000 : false,
});
}
```
- [ ] **Step 2: Commit**
```bash
git add ui/src/api/queries/logs.ts
git commit -m "feat: add useStartupLogs hook for container startup log polling"
```
---
### Task 9: Create StartupLogPanel Component
**Files:**
- Create: `ui/src/components/StartupLogPanel.module.css`
- Create: `ui/src/components/StartupLogPanel.tsx`
- [ ] **Step 1: Create CSS module**
Create `ui/src/components/StartupLogPanel.module.css`:
```css
.panel {
background: var(--bg-secondary);
border: 1px solid var(--border);
border-radius: 6px;
overflow: hidden;
margin-top: 8px;
}
.header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 8px 12px;
border-bottom: 1px solid var(--border);
}
.headerLeft {
display: flex;
align-items: center;
gap: 8px;
}
.title {
font-size: 13px;
font-weight: 600;
color: var(--text-primary);
}
.badge {
font-size: 11px;
padding: 1px 8px;
border-radius: 10px;
}
.badgeLive {
background: var(--success-muted);
color: var(--success);
}
.badgeStopped {
background: var(--error-muted);
color: var(--error);
}
.pollingHint {
font-size: 11px;
color: var(--text-muted);
}
.lineCount {
font-size: 12px;
color: var(--text-muted);
}
.empty {
padding: 16px;
text-align: center;
font-size: 13px;
color: var(--text-muted);
}
```
- [ ] **Step 2: Create StartupLogPanel component**
Create `ui/src/components/StartupLogPanel.tsx`:
```tsx
import { LogViewer } from '@cameleer/design-system';
import { useStartupLogs } from '../api/queries/logs';
import type { Deployment } from '../api/queries/admin/apps';
import styles from './StartupLogPanel.module.css';
interface StartupLogPanelProps {
deployment: Deployment;
appSlug: string;
envSlug: string;
}
export function StartupLogPanel({ deployment, appSlug, envSlug }: StartupLogPanelProps) {
const isStarting = deployment.status === 'STARTING';
const isFailed = deployment.status === 'FAILED';
const { data } = useStartupLogs(appSlug, envSlug, deployment.createdAt, isStarting);
const entries = data?.data ?? [];
if (entries.length === 0 && !isStarting) return null;
return (
<div className={styles.panel}>
<div className={styles.header}>
<div className={styles.headerLeft}>
<span className={styles.title}>Startup Logs</span>
{isStarting && (
<>
<span className={`${styles.badge} ${styles.badgeLive}`}> live</span>
<span className={styles.pollingHint}>polling every 3s</span>
</>
)}
{isFailed && (
<span className={`${styles.badge} ${styles.badgeStopped}`}>stopped</span>
)}
</div>
<span className={styles.lineCount}>{entries.length} lines</span>
</div>
{entries.length > 0 ? (
<LogViewer entries={entries} maxHeight={300} />
) : (
<div className={styles.empty}>Waiting for container output...</div>
)}
</div>
);
}
```
- [ ] **Step 3: Commit**
```bash
git add ui/src/components/StartupLogPanel.tsx ui/src/components/StartupLogPanel.module.css
git commit -m "feat: add StartupLogPanel component for deployment startup logs"
```
---
### Task 10: Integrate StartupLogPanel into AppsTab
**Files:**
- Modify: `ui/src/pages/AppsTab/AppsTab.tsx`
- [ ] **Step 1: Import StartupLogPanel**
In `AppsTab.tsx`, add the import after the `DeploymentProgress` import (line 41):
```typescript
import { StartupLogPanel } from '../../components/StartupLogPanel';
```
- [ ] **Step 2: Render StartupLogPanel below DeploymentProgress**
In `AppsTab.tsx`, find the block that renders `DeploymentProgress` (lines 774-779):
```tsx
{deployments.filter((d) => d.deployStage).map((d) => (
<div key={`progress-${d.id}`} style={{ marginBottom: 8 }}>
<span className={styles.cellMeta}>{d.containerName}</span>
<DeploymentProgress currentStage={d.deployStage} failed={d.status === 'FAILED'} />
</div>
))}
```
Replace with:
```tsx
{deployments.filter((d) => d.deployStage || d.status === 'FAILED').map((d) => (
<div key={`progress-${d.id}`} style={{ marginBottom: 8 }}>
<span className={styles.cellMeta}>{d.containerName}</span>
<DeploymentProgress currentStage={d.deployStage} failed={d.status === 'FAILED'} />
<StartupLogPanel deployment={d} appSlug={app.slug} envSlug={environments.find(e => e.id === d.environmentId)?.slug ?? ''} />
</div>
))}
```
Note: the filter changes from `d.deployStage` to `d.deployStage || d.status === 'FAILED'` so that failed deployments still show their startup logs even after the deploy stage is cleared.
- [ ] **Step 3: Verify the UI compiles**
Run: `cd ui && npx tsc --noEmit`
Expected: No errors
- [ ] **Step 4: Commit**
```bash
git add ui/src/pages/AppsTab/AppsTab.tsx
git commit -m "feat: show startup logs panel below deployment progress"
```
---
### Task 11: Add Container Source to Log Source Filters
**Files:**
- Modify: `ui/src/pages/AgentInstance/AgentInstance.tsx`
- [ ] **Step 1: Add container to LOG_SOURCE_ITEMS**
In `AgentInstance.tsx`, find the `LOG_SOURCE_ITEMS` constant (lines 28-31):
```typescript
const LOG_SOURCE_ITEMS: ButtonGroupItem[] = [
{ value: 'app', label: 'App' },
{ value: 'agent', label: 'Agent' },
];
```
Replace with:
```typescript
const LOG_SOURCE_ITEMS: ButtonGroupItem[] = [
{ value: 'app', label: 'App' },
{ value: 'agent', label: 'Agent' },
{ value: 'container', label: 'Container' },
];
```
- [ ] **Step 2: Check if AgentHealth has the same filter**
Check `ui/src/pages/AgentHealth/AgentHealth.tsx` for `LOG_SOURCE_ITEMS` — if it has the same constant, add `container` there too.
- [ ] **Step 3: Commit**
```bash
git add ui/src/pages/AgentInstance/AgentInstance.tsx
# Also add AgentHealth.tsx if modified
git commit -m "feat: add container source option to log source filters"
```
---
### Task 12: End-to-End Verification
- [ ] **Step 1: Full backend build**
Run: `mvn clean compile test-compile -q`
Expected: BUILD SUCCESS
- [ ] **Step 2: Full UI type check**
Run: `cd ui && npx tsc --noEmit`
Expected: No errors
- [ ] **Step 3: Start dev server and verify UI**
Run: `cd ui && npm run dev`
Open in browser and verify:
1. Navigate to an app in the Deployments tab
2. The deployment progress section renders without errors
3. The source filter in Agent Instance page shows App / Agent / Container options
4. No console errors
- [ ] **Step 4: Commit any fixes from verification**
Only if prior steps required changes.
---
## Out of Scope (Design System)
**Source badge on log lines:** The spec calls for a small `container` / `app` badge on each log line in the `LogViewer` component. `LogViewer` lives in `@cameleer/design-system` — a separate repo we don't build here. This requires a design system update to render the `source` field from `LogEntryResponse`. The data is already flowing (`source` field exists on all log entries); the rendering change belongs in the DS repo. Track as a follow-up.

View File

@@ -0,0 +1,187 @@
# Container Startup Log Capture
Capture Docker container stdout/stderr from the moment a container starts until the Cameleer agent inside fully registers (SSE connection established). Stores logs in ClickHouse for display in the deployment view and general log search.
## Problem
When a deployed application crashes during startup — before the Cameleer agent can connect and send logs via the normal ingestion pipeline — all diagnostic output is lost. The container may be removed before anyone can inspect it, leaving operators blind to the root cause.
## Solution
A `ContainerLogForwarder` component streams Docker log output in real-time for each managed container, batches lines, and flushes them to the existing ClickHouse `logs` table with `source = 'container'`. Capture stops when the agent establishes its SSE connection, at which point the agent's own log pipeline takes over.
## Architecture
### Core Interface Extension
Extend `RuntimeOrchestrator` (core module) with three new methods:
```java
// in RuntimeOrchestrator.java
void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId);
void stopLogCapture(String containerId);
void stopLogCaptureByApp(String appSlug, String envSlug);
```
`DisabledRuntimeOrchestrator` implements these as no-ops. `DockerRuntimeOrchestrator` delegates to `ContainerLogForwarder`.
### ContainerLogForwarder
**Package:** `com.cameleer3.server.app.runtime` (Docker-specific, alongside `DockerRuntimeOrchestrator`, `DockerEventMonitor`, etc.)
**Responsibilities:**
- Manages active capture sessions in a `ConcurrentHashMap<String, CaptureSession>` keyed by container ID
- Each `CaptureSession` holds: containerId, appSlug, envSlug, tenantId, a `Future<?>` for the streaming thread, and a buffer of pending log lines
- Uses a bounded thread pool (fixed size ~10 threads)
**Streaming logic:**
- Calls `dockerClient.logContainerCmd(containerId).withFollowStream(true).withStdOut(true).withStdErr(true).withTimestamps(true)`
- Callback `onNext(Frame)` appends to an in-memory buffer
- Every ~2 seconds (or every 50 lines, whichever comes first), flushes the buffer to ClickHouse via `ClickHouseLogStore.insertBufferedBatch()` — constructs `BufferedLogEntry` records with `source = "container"`, the deployment's app/env/tenant metadata, and container name as `instanceId`
- On `onComplete()` (container stopped) or `onError()` — final flush, remove session from map
**Safety:**
- Max capture duration: 5 minutes. A scheduled cleanup (every 30s) stops sessions exceeding this limit.
- `@PreDestroy` cleanup: stop all active captures on server shutdown.
### ClickHouse Field Mapping
Uses the existing `logs` table. No schema changes required.
| Field | Value |
|-------|-------|
| `source` | `'container'` |
| `application` | appSlug from deployment |
| `environment` | envSlug from deployment |
| `tenant_id` | tenantId from deployment |
| `instance_id` | containerName (e.g., `prod-orderservice-0`) |
| `timestamp` | Parsed from Docker timestamp prefix |
| `message` | Log line content (after timestamp) |
| `level` | Inferred by regex (see below) |
| `logger_name` | Empty string (not parseable from raw stdout) |
| `thread_name` | Empty string |
| `stack_trace` | Empty string (stack traces appear as consecutive message lines) |
| `exchange_id` | Empty string |
| `mdc` | Empty map |
### Log Level Inference
- Regex scan for common Java log patterns: ` ERROR `, ` WARN `, ` INFO `, ` DEBUG `, ` TRACE `
- Stack trace continuation lines (starting with `\tat ` or `Caused by:`) inherit ERROR level
- Lines matching no pattern default to INFO
## Integration Points
### Start Capture — DeploymentExecutor
After each replica container is started (inside the replica loop):
```java
orchestrator.startLogCapture(containerId, appSlug, envSlug, tenantId);
```
### Stop Capture — SseConnectionManager.connect()
When an agent connects SSE, look up its `AgentInfo` from the registry to get `application` + `environmentId`:
```java
orchestrator.stopLogCaptureByApp(application, environmentId);
```
Best-effort call — no-op if no capture exists for that app+env (e.g., non-Docker agent).
### Stop Capture — Container Death
`DockerEventMonitor` handles `die`/`oom` events. After updating replica state:
```java
orchestrator.stopLogCapture(containerId);
```
Triggers final flush of buffered lines before cleanup.
### Stop Capture — Deployment Failure Cleanup
No extra code needed. When `DeploymentExecutor` stops/removes containers on health check failure, the Docker `die` event flows through `DockerEventMonitor` which calls `stopLogCapture`. The event monitor path handles it.
## UI Changes
### 1. Deployment Startup Log Panel
A collapsible log panel below the `DeploymentProgress` component in the deployment detail view.
**Data source:** Queries `/api/v1/logs?application={appSlug}&environment={envSlug}&source=container&from={deployCreatedAt}`
**Polling behavior:**
- Auto-refreshes every 3 seconds while deployment status is STARTING
- Stops polling when status reaches RUNNING or FAILED
- Manual refresh button available in all states
**Status indicator:**
- Green "live" badge + "polling every 3s" text while STARTING
- Red "stopped" badge when FAILED
- No badge when RUNNING (panel remains visible with historical startup logs)
**Layout:** Uses existing `LogViewer` component from `@cameleer/design-system` and shared log panel styles from `ui/src/styles/log-panel.module.css`.
### 2. Source Badge in Log Views
Everywhere logs are displayed (AgentInstance page, LogTab, general log search), each log line gets a small source badge:
- `container` — slate/gray badge
- `app` — green badge
- `agent` — existing behavior
The `source` field already exists in `LogEntryResponse`. This is a rendering-only change in the LogViewer or its wrapper.
### 3. Source Filter Update
The log toolbar source filter (currently App vs Agent) adds `Container` as a third option. The backend `/api/v1/logs` endpoint already accepts `source` as a query parameter — no backend change needed for filtering.
## Edge Cases
**Multi-replica:** Each replica gets its own capture session keyed by container ID. `instance_id` in ClickHouse is the container name (e.g., `prod-orderservice-0`). `stopLogCaptureByApp()` stops all sessions for that app+env pair.
**Server restart during capture:** Active sessions are in-memory and lost on restart. Not a problem — containers likely restart too (Docker restart policy), and new captures start when `DeploymentExecutor` runs again. Already-flushed logs survive in ClickHouse.
**Container produces no output:** Follow stream stays open but idle (parked thread, no CPU cost). Cleaned up by the 5-minute timeout or container death.
**Rapid redeployment:** Old container dies -> `stopLogCapture(oldContainerId)`. New container starts -> `startLogCapture(newContainerId, ...)`. Different container IDs, no conflict.
**Log overlap:** When the agent connects and starts sending `source='app'` logs, there may be a brief overlap with `source='container'` logs for the same timeframe. Both are shown with source badges. Users can filter by source if needed.
## Files Changed
### Backend — New
| File | Description |
|------|-------------|
| `app/runtime/ContainerLogForwarder.java` | Docker log streaming, buffering, ClickHouse flush |
### Backend — Modified
| File | Change |
|------|--------|
| `core/runtime/RuntimeOrchestrator.java` | Add 3 log capture methods to interface |
| `app/runtime/DockerRuntimeOrchestrator.java` | Implement log capture methods, delegate to ContainerLogForwarder |
| `app/runtime/DisabledRuntimeOrchestrator.java` | No-op implementations of new methods |
| `app/runtime/DeploymentExecutor.java` | Call `startLogCapture()` after container start |
| `app/agent/SseConnectionManager.java` | Call `stopLogCaptureByApp()` on SSE connect |
| `app/runtime/DockerEventMonitor.java` | Call `stopLogCapture()` on die/oom events |
| `app/runtime/RuntimeOrchestratorAutoConfig.java` | Wire ContainerLogForwarder into DockerRuntimeOrchestrator |
### Frontend — Modified
| File | Change |
|------|--------|
| `ui/src/pages/AppsTab/AppsTab.tsx` | Add startup log panel below DeploymentProgress |
| `ui/src/api/queries/logs.ts` | Hook for deployment startup logs query |
| Log display components | Add source badge rendering |
| Log toolbar | Add Container to source filter options |
### No Changes
| File | Reason |
|------|--------|
| ClickHouse `init.sql` | Existing `logs` table with `source` column is sufficient |
| `LogQueryController.java` | Already accepts `source` filter parameter |
| `ClickHouseLogStore.java` | Already writes `source` field from log entries |

8
ui/package-lock.json generated
View File

@@ -9,7 +9,7 @@
"version": "0.0.0",
"hasInstallScript": true,
"dependencies": {
"@cameleer/design-system": "^0.1.48",
"@cameleer/design-system": "^0.1.49",
"@tanstack/react-query": "^5.90.21",
"js-yaml": "^4.1.1",
"lucide-react": "^1.7.0",
@@ -281,9 +281,9 @@
}
},
"node_modules/@cameleer/design-system": {
"version": "0.1.48",
"resolved": "https://gitea.siegeln.net/api/packages/cameleer/npm/%40cameleer%2Fdesign-system/-/0.1.48/design-system-0.1.48.tgz",
"integrity": "sha512-XieqGufsjucb5j43YeW2UAqKkaSYvXShw7DuHOZUVjjcuPqLnw4fAZWY7nrs6k33kzYR/UOE3Z9nBx+77Ki8mA==",
"version": "0.1.49",
"resolved": "https://gitea.siegeln.net/api/packages/cameleer/npm/%40cameleer%2Fdesign-system/-/0.1.49/design-system-0.1.49.tgz",
"integrity": "sha512-ftBh7PZpcPwOdW1UFQnBpgo8ql0CfedRrXkOh4350UImMs7qrSdd+jME8Ind62IUytfUBnqk/f0L6j3MjsQaeQ==",
"dependencies": {
"lucide-react": "^1.7.0",
"react": "^19.0.0",

View File

@@ -15,7 +15,7 @@
"postinstall": "node -e \"const fs=require('fs');fs.mkdirSync('public',{recursive:true});fs.copyFileSync('node_modules/@cameleer/design-system/assets/cameleer3-logo.svg','public/favicon.svg')\""
},
"dependencies": {
"@cameleer/design-system": "^0.1.48",
"@cameleer/design-system": "^0.1.49",
"@tanstack/react-query": "^5.90.21",
"js-yaml": "^4.1.1",
"lucide-react": "^1.7.0",

View File

@@ -31,6 +31,7 @@ export interface LogSearchParams {
application?: string;
agentId?: string;
source?: string;
environment?: string;
exchangeId?: string;
logger?: string;
from?: string;
@@ -48,6 +49,7 @@ async function fetchLogs(params: LogSearchParams): Promise<LogSearchPageResponse
if (params.application) urlParams.set('application', params.application);
if (params.agentId) urlParams.set('agentId', params.agentId);
if (params.source) urlParams.set('source', params.source);
if (params.environment) urlParams.set('environment', params.environment);
if (params.exchangeId) urlParams.set('exchangeId', params.exchangeId);
if (params.logger) urlParams.set('logger', params.logger);
if (params.from) urlParams.set('from', params.from);
@@ -126,3 +128,28 @@ export function useApplicationLogs(
data: query.data?.data ?? (undefined as LogEntryResponse[] | undefined),
};
}
/**
* Fetches container startup logs for a deployment.
* Polls every 3s while the deployment is STARTING, stops when RUNNING/FAILED.
*/
export function useStartupLogs(
application: string | undefined,
environment: string | undefined,
deployCreatedAt: string | undefined,
isStarting: boolean,
) {
const params: LogSearchParams = {
application: application || undefined,
environment: environment || undefined,
source: 'container',
from: deployCreatedAt || undefined,
sort: 'asc',
limit: 500,
};
return useLogs(params, {
enabled: !!application && !!deployCreatedAt,
refetchInterval: isStarting ? 3_000 : false,
});
}

View File

@@ -0,0 +1,60 @@
.panel {
background: var(--bg-secondary);
border: 1px solid var(--border);
border-radius: 6px;
overflow: hidden;
margin-top: 8px;
}
.header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 8px 12px;
border-bottom: 1px solid var(--border);
}
.headerLeft {
display: flex;
align-items: center;
gap: 8px;
}
.title {
font-size: 13px;
font-weight: 600;
color: var(--text-primary);
}
.badge {
font-size: 11px;
padding: 1px 8px;
border-radius: 10px;
}
.badgeLive {
background: var(--success-muted);
color: var(--success);
}
.badgeStopped {
background: var(--error-muted);
color: var(--error);
}
.pollingHint {
font-size: 11px;
color: var(--text-muted);
}
.lineCount {
font-size: 12px;
color: var(--text-muted);
}
.empty {
padding: 16px;
text-align: center;
font-size: 13px;
color: var(--text-muted);
}

View File

@@ -0,0 +1,46 @@
import { LogViewer } from '@cameleer/design-system';
import { useStartupLogs } from '../api/queries/logs';
import type { Deployment } from '../api/queries/admin/apps';
import styles from './StartupLogPanel.module.css';
interface StartupLogPanelProps {
deployment: Deployment;
appSlug: string;
envSlug: string;
}
export function StartupLogPanel({ deployment, appSlug, envSlug }: StartupLogPanelProps) {
const isStarting = deployment.status === 'STARTING';
const isFailed = deployment.status === 'FAILED';
const { data } = useStartupLogs(appSlug, envSlug, deployment.createdAt, isStarting);
const entries = data?.data ?? [];
if (entries.length === 0 && !isStarting) return null;
return (
<div className={styles.panel}>
<div className={styles.header}>
<div className={styles.headerLeft}>
<span className={styles.title}>Startup Logs</span>
{isStarting && (
<>
<span className={`${styles.badge} ${styles.badgeLive}`}> live</span>
<span className={styles.pollingHint}>polling every 3s</span>
</>
)}
{isFailed && (
<span className={`${styles.badge} ${styles.badgeStopped}`}>stopped</span>
)}
</div>
<span className={styles.lineCount}>{entries.length} lines</span>
</div>
{entries.length > 0 ? (
<LogViewer entries={entries} maxHeight={300} />
) : (
<div className={styles.empty}>Waiting for container output...</div>
)}
</div>
);
}

View File

@@ -93,6 +93,7 @@ const LOG_LEVEL_ITEMS: ButtonGroupItem[] = [
const LOG_SOURCE_ITEMS: ButtonGroupItem[] = [
{ value: 'app', label: 'App' },
{ value: 'agent', label: 'Agent' },
{ value: 'container', label: 'Container' },
];
// ── AgentHealth page ─────────────────────────────────────────────────────────

View File

@@ -28,6 +28,7 @@ const LOG_LEVEL_ITEMS: ButtonGroupItem[] = [
const LOG_SOURCE_ITEMS: ButtonGroupItem[] = [
{ value: 'app', label: 'App' },
{ value: 'agent', label: 'Agent' },
{ value: 'container', label: 'Container' },
];
export default function AgentInstance() {

View File

@@ -39,6 +39,7 @@ import type { ApplicationConfig, TapDefinition } from '../../api/queries/command
import { useCatalog } from '../../api/queries/catalog';
import type { CatalogApp, CatalogRoute } from '../../api/queries/catalog';
import { DeploymentProgress } from '../../components/DeploymentProgress';
import { StartupLogPanel } from '../../components/StartupLogPanel';
import { timeAgo } from '../../utils/format-utils';
import { applyTracedProcessorUpdate, applyRouteRecordingUpdate } from '../../utils/config-draft-utils';
import { PageLoader } from '../../components/PageLoader';
@@ -771,10 +772,11 @@ function OverviewSubTab({ app, deployments, versions, environments, envMap, sele
: <DataTable<DeploymentRow> columns={deploymentColumns} data={deploymentRows} flush />
}
</div>
{deployments.filter((d) => d.deployStage).map((d) => (
{deployments.filter((d) => d.deployStage || d.status === 'FAILED').map((d) => (
<div key={`progress-${d.id}`} style={{ marginBottom: 8 }}>
<span className={styles.cellMeta}>{d.containerName}</span>
<DeploymentProgress currentStage={d.deployStage} failed={d.status === 'FAILED'} />
<StartupLogPanel deployment={d} appSlug={app.slug} envSlug={environments.find(e => e.id === d.environmentId)?.slug ?? ''} />
</div>
))}