Compare commits
13 Commits
92d7f5809b
...
0dafad883e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0dafad883e | ||
|
|
1287952387 | ||
|
|
81dd81fc07 | ||
|
|
e7732703a6 | ||
|
|
119cf912b8 | ||
|
|
81f42d5409 | ||
|
|
49c7de7082 | ||
|
|
4940bf3376 | ||
|
|
de85a861c7 | ||
|
|
729944d3ac | ||
|
|
9c65a3c3b9 | ||
|
|
8fabc2b308 | ||
|
|
14215bebec |
@@ -3,7 +3,9 @@ package com.cameleer3.server.app.agent;
|
||||
import com.cameleer3.server.app.config.AgentRegistryConfig;
|
||||
import com.cameleer3.server.core.agent.AgentCommand;
|
||||
import com.cameleer3.server.core.agent.AgentEventListener;
|
||||
import com.cameleer3.server.core.agent.AgentInfo;
|
||||
import com.cameleer3.server.core.agent.AgentRegistryService;
|
||||
import com.cameleer3.server.core.runtime.RuntimeOrchestrator;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import jakarta.annotation.PostConstruct;
|
||||
@@ -35,13 +37,16 @@ public class SseConnectionManager implements AgentEventListener {
|
||||
private final AgentRegistryConfig config;
|
||||
private final SsePayloadSigner ssePayloadSigner;
|
||||
private final ObjectMapper objectMapper;
|
||||
private final RuntimeOrchestrator runtimeOrchestrator;
|
||||
|
||||
public SseConnectionManager(AgentRegistryService registryService, AgentRegistryConfig config,
|
||||
SsePayloadSigner ssePayloadSigner, ObjectMapper objectMapper) {
|
||||
SsePayloadSigner ssePayloadSigner, ObjectMapper objectMapper,
|
||||
RuntimeOrchestrator runtimeOrchestrator) {
|
||||
this.registryService = registryService;
|
||||
this.config = config;
|
||||
this.ssePayloadSigner = ssePayloadSigner;
|
||||
this.objectMapper = objectMapper;
|
||||
this.runtimeOrchestrator = runtimeOrchestrator;
|
||||
}
|
||||
|
||||
@PostConstruct
|
||||
@@ -81,6 +86,13 @@ public class SseConnectionManager implements AgentEventListener {
|
||||
});
|
||||
|
||||
log.info("SSE connection established for agent {}", agentId);
|
||||
|
||||
// Stop container log capture — agent is now online and will send its own logs
|
||||
AgentInfo agent = registryService.findById(agentId);
|
||||
if (agent != null) {
|
||||
runtimeOrchestrator.stopLogCaptureByApp(agent.applicationId(), agent.environmentId());
|
||||
}
|
||||
|
||||
return emitter;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,260 @@
|
||||
package com.cameleer3.server.app.runtime;
|
||||
|
||||
import com.cameleer3.common.model.LogEntry;
|
||||
import com.cameleer3.server.app.search.ClickHouseLogStore;
|
||||
import com.cameleer3.server.core.ingestion.BufferedLogEntry;
|
||||
import com.github.dockerjava.api.DockerClient;
|
||||
import com.github.dockerjava.api.async.ResultCallback;
|
||||
import com.github.dockerjava.api.model.Frame;
|
||||
import jakarta.annotation.PreDestroy;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Streams Docker container stdout/stderr to ClickHouse until the agent registers via SSE.
|
||||
* One capture session per container, managed by container ID.
|
||||
*/
|
||||
public class ContainerLogForwarder {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ContainerLogForwarder.class);
|
||||
|
||||
private static final int FLUSH_BATCH_SIZE = 50;
|
||||
private static final long FLUSH_INTERVAL_MS = 2_000;
|
||||
private static final long MAX_CAPTURE_DURATION_MS = 5 * 60 * 1_000;
|
||||
private static final long CLEANUP_INTERVAL_MS = 30_000;
|
||||
|
||||
// Pattern to match Docker timestamp prefix: "2026-04-14T14:23:01.234567890Z "
|
||||
private static final Pattern DOCKER_TS_PATTERN = Pattern.compile(
|
||||
"^(\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z)\\s(.*)$", Pattern.DOTALL);
|
||||
|
||||
// Pattern to infer log level from Java log output
|
||||
private static final Pattern LEVEL_PATTERN = Pattern.compile(
|
||||
"\\b(ERROR|WARN|INFO|DEBUG|TRACE)\\b");
|
||||
|
||||
private final DockerClient dockerClient;
|
||||
private final ClickHouseLogStore logStore;
|
||||
private final ConcurrentHashMap<String, CaptureSession> sessions = new ConcurrentHashMap<>();
|
||||
private final ExecutorService executor = Executors.newFixedThreadPool(10,
|
||||
r -> { Thread t = new Thread(r, "log-capture"); t.setDaemon(true); return t; });
|
||||
private final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(
|
||||
r -> { Thread t = new Thread(r, "log-capture-cleanup"); t.setDaemon(true); return t; });
|
||||
|
||||
public ContainerLogForwarder(DockerClient dockerClient, ClickHouseLogStore logStore) {
|
||||
this.dockerClient = dockerClient;
|
||||
this.logStore = logStore;
|
||||
scheduler.scheduleAtFixedRate(this::cleanupExpiredSessions,
|
||||
CLEANUP_INTERVAL_MS, CLEANUP_INTERVAL_MS, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
public void startCapture(String containerId, String appSlug, String envSlug, String tenantId) {
|
||||
if (sessions.containsKey(containerId)) {
|
||||
log.debug("Already capturing logs for container {}", containerId.substring(0, 12));
|
||||
return;
|
||||
}
|
||||
|
||||
CaptureSession session = new CaptureSession(containerId, appSlug, envSlug, tenantId);
|
||||
if (sessions.putIfAbsent(containerId, session) != null) {
|
||||
return; // another thread beat us
|
||||
}
|
||||
|
||||
Future<?> future = executor.submit(() -> streamLogs(session));
|
||||
session.future = future;
|
||||
log.info("Started log capture for container {} (app={}, env={})",
|
||||
containerId.substring(0, 12), appSlug, envSlug);
|
||||
}
|
||||
|
||||
public void stopCapture(String containerId) {
|
||||
CaptureSession session = sessions.remove(containerId);
|
||||
if (session == null) return;
|
||||
|
||||
session.cancel();
|
||||
flushBuffer(session);
|
||||
log.info("Stopped log capture for container {} ({} lines captured)",
|
||||
containerId.substring(0, 12), session.lineCount);
|
||||
}
|
||||
|
||||
public void stopCaptureByApp(String appSlug, String envSlug) {
|
||||
List<String> toRemove = new ArrayList<>();
|
||||
for (Map.Entry<String, CaptureSession> entry : sessions.entrySet()) {
|
||||
CaptureSession s = entry.getValue();
|
||||
if (appSlug.equals(s.appSlug) && envSlug.equals(s.envSlug)) {
|
||||
toRemove.add(entry.getKey());
|
||||
}
|
||||
}
|
||||
for (String containerId : toRemove) {
|
||||
stopCapture(containerId);
|
||||
}
|
||||
if (!toRemove.isEmpty()) {
|
||||
log.info("Stopped log capture for app={} env={} ({} containers)",
|
||||
appSlug, envSlug, toRemove.size());
|
||||
}
|
||||
}
|
||||
|
||||
@PreDestroy
|
||||
public void shutdown() {
|
||||
for (String containerId : new ArrayList<>(sessions.keySet())) {
|
||||
stopCapture(containerId);
|
||||
}
|
||||
scheduler.shutdownNow();
|
||||
executor.shutdownNow();
|
||||
}
|
||||
|
||||
private void streamLogs(CaptureSession session) {
|
||||
try {
|
||||
session.callback = dockerClient.logContainerCmd(session.containerId)
|
||||
.withFollowStream(true)
|
||||
.withStdOut(true)
|
||||
.withStdErr(true)
|
||||
.withTimestamps(true)
|
||||
.exec(new ResultCallback.Adapter<Frame>() {
|
||||
@Override
|
||||
public void onNext(Frame frame) {
|
||||
if (session.cancelled) return;
|
||||
String line = new String(frame.getPayload()).trim();
|
||||
if (line.isEmpty()) return;
|
||||
|
||||
session.buffer.add(line);
|
||||
session.lineCount++;
|
||||
|
||||
if (session.buffer.size() >= FLUSH_BATCH_SIZE
|
||||
|| System.currentTimeMillis() - session.lastFlush >= FLUSH_INTERVAL_MS) {
|
||||
flushBuffer(session);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onComplete() {
|
||||
flushBuffer(session);
|
||||
sessions.remove(session.containerId);
|
||||
log.debug("Log stream completed for container {}",
|
||||
session.containerId.substring(0, 12));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Throwable throwable) {
|
||||
flushBuffer(session);
|
||||
sessions.remove(session.containerId);
|
||||
log.debug("Log stream error for container {}: {}",
|
||||
session.containerId.substring(0, 12), throwable.getMessage());
|
||||
}
|
||||
});
|
||||
} catch (Exception e) {
|
||||
sessions.remove(session.containerId);
|
||||
log.warn("Failed to start log capture for container {}: {}",
|
||||
session.containerId.substring(0, 12), e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void flushBuffer(CaptureSession session) {
|
||||
List<String> lines;
|
||||
synchronized (session.buffer) {
|
||||
if (session.buffer.isEmpty()) return;
|
||||
lines = new ArrayList<>(session.buffer);
|
||||
session.buffer.clear();
|
||||
}
|
||||
session.lastFlush = System.currentTimeMillis();
|
||||
|
||||
List<BufferedLogEntry> entries = new ArrayList<>(lines.size());
|
||||
for (String line : lines) {
|
||||
Instant timestamp = Instant.now();
|
||||
String message = line;
|
||||
|
||||
Matcher tsMatcher = DOCKER_TS_PATTERN.matcher(line);
|
||||
if (tsMatcher.matches()) {
|
||||
try {
|
||||
timestamp = Instant.parse(tsMatcher.group(1));
|
||||
} catch (DateTimeParseException e) {
|
||||
// keep Instant.now()
|
||||
}
|
||||
message = tsMatcher.group(2);
|
||||
}
|
||||
|
||||
String level = inferLevel(message);
|
||||
|
||||
LogEntry logEntry = new LogEntry();
|
||||
logEntry.setTimestamp(timestamp);
|
||||
logEntry.setLevel(level);
|
||||
logEntry.setMessage(message);
|
||||
logEntry.setLoggerName("");
|
||||
logEntry.setThreadName("");
|
||||
logEntry.setStackTrace("");
|
||||
logEntry.setMdc(Collections.emptyMap());
|
||||
logEntry.setSource("container");
|
||||
|
||||
entries.add(new BufferedLogEntry(
|
||||
session.tenantId, session.envSlug, session.containerId.substring(0, 12),
|
||||
session.appSlug, logEntry));
|
||||
}
|
||||
|
||||
try {
|
||||
logStore.insertBufferedBatch(entries);
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to flush {} container log entries for {}: {}",
|
||||
entries.size(), session.appSlug, e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private String inferLevel(String message) {
|
||||
if (message.startsWith("\tat ") || message.startsWith("Caused by:")) {
|
||||
return "ERROR";
|
||||
}
|
||||
Matcher m = LEVEL_PATTERN.matcher(message);
|
||||
if (m.find()) {
|
||||
return m.group(1);
|
||||
}
|
||||
return "INFO";
|
||||
}
|
||||
|
||||
private void cleanupExpiredSessions() {
|
||||
long now = System.currentTimeMillis();
|
||||
for (Map.Entry<String, CaptureSession> entry : sessions.entrySet()) {
|
||||
CaptureSession session = entry.getValue();
|
||||
if (now - session.startedAt > MAX_CAPTURE_DURATION_MS) {
|
||||
log.info("Log capture timeout for container {} (app={}), stopping",
|
||||
entry.getKey().substring(0, 12), session.appSlug);
|
||||
stopCapture(entry.getKey());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class CaptureSession {
|
||||
final String containerId;
|
||||
final String appSlug;
|
||||
final String envSlug;
|
||||
final String tenantId;
|
||||
final long startedAt = System.currentTimeMillis();
|
||||
final List<String> buffer = Collections.synchronizedList(new ArrayList<>());
|
||||
volatile long lastFlush = System.currentTimeMillis();
|
||||
volatile long lineCount = 0;
|
||||
volatile boolean cancelled = false;
|
||||
volatile Future<?> future;
|
||||
volatile ResultCallback.Adapter<Frame> callback;
|
||||
|
||||
CaptureSession(String containerId, String appSlug, String envSlug, String tenantId) {
|
||||
this.containerId = containerId;
|
||||
this.appSlug = appSlug;
|
||||
this.envSlug = envSlug;
|
||||
this.tenantId = tenantId;
|
||||
}
|
||||
|
||||
void cancel() {
|
||||
cancelled = true;
|
||||
if (callback != null) {
|
||||
try { callback.close(); } catch (Exception e) { /* ignore */ }
|
||||
}
|
||||
if (future != null) {
|
||||
future.cancel(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -199,6 +199,8 @@ public class DeploymentExecutor {
|
||||
}
|
||||
}
|
||||
|
||||
orchestrator.startLogCapture(containerId, app.slug(), env.slug(), tenantId);
|
||||
|
||||
replicaStates.add(Map.of(
|
||||
"index", i,
|
||||
"containerId", containerId,
|
||||
|
||||
@@ -13,4 +13,7 @@ public class DisabledRuntimeOrchestrator implements RuntimeOrchestrator {
|
||||
@Override public void removeContainer(String id) { throw new UnsupportedOperationException("Runtime management disabled"); }
|
||||
@Override public ContainerStatus getContainerStatus(String id) { return ContainerStatus.notFound(); }
|
||||
@Override public Stream<String> getLogs(String id, int tail) { return Stream.empty(); }
|
||||
@Override public void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {}
|
||||
@Override public void stopLogCapture(String containerId) {}
|
||||
@Override public void stopLogCaptureByApp(String appSlug, String envSlug) {}
|
||||
}
|
||||
|
||||
@@ -98,6 +98,10 @@ public class DockerEventMonitor {
|
||||
}
|
||||
replicas.set(i, updated);
|
||||
changed = true;
|
||||
// Stop log capture for this container — it's dead or stopped
|
||||
if ("die".equals(action) || "oom".equals(action) || "stop".equals(action)) {
|
||||
runtimeOrchestrator.stopLogCapture(containerId);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,6 +31,12 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
|
||||
private static final Logger log = LoggerFactory.getLogger(DockerRuntimeOrchestrator.class);
|
||||
private DockerClient dockerClient;
|
||||
|
||||
private ContainerLogForwarder logForwarder;
|
||||
|
||||
public void setLogForwarder(ContainerLogForwarder logForwarder) {
|
||||
this.logForwarder = logForwarder;
|
||||
}
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
var config = DefaultDockerClientConfig.createDefaultConfigBuilder()
|
||||
@@ -196,4 +202,25 @@ public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
|
||||
}
|
||||
return logLines.stream();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {
|
||||
if (logForwarder != null) {
|
||||
logForwarder.startCapture(containerId, appSlug, envSlug, tenantId);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stopLogCapture(String containerId) {
|
||||
if (logForwarder != null) {
|
||||
logForwarder.stopCapture(containerId);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stopLogCaptureByApp(String appSlug, String envSlug) {
|
||||
if (logForwarder != null) {
|
||||
logForwarder.stopCaptureByApp(appSlug, envSlug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.cameleer3.server.app.runtime;
|
||||
|
||||
import com.cameleer3.server.app.search.ClickHouseLogStore;
|
||||
import com.cameleer3.server.app.storage.PostgresDeploymentRepository;
|
||||
import com.cameleer3.server.core.runtime.DeploymentRepository;
|
||||
import com.cameleer3.server.core.runtime.RuntimeOrchestrator;
|
||||
@@ -17,13 +18,17 @@ public class RuntimeOrchestratorAutoConfig {
|
||||
private static final Logger log = LoggerFactory.getLogger(RuntimeOrchestratorAutoConfig.class);
|
||||
|
||||
@Bean
|
||||
public RuntimeOrchestrator runtimeOrchestrator() {
|
||||
// Auto-detect: Docker socket available?
|
||||
public RuntimeOrchestrator runtimeOrchestrator(
|
||||
@org.springframework.beans.factory.annotation.Autowired(required = false)
|
||||
ContainerLogForwarder logForwarder) {
|
||||
if (Files.exists(Path.of("/var/run/docker.sock"))) {
|
||||
log.info("Docker socket detected - enabling Docker runtime orchestrator");
|
||||
return new DockerRuntimeOrchestrator();
|
||||
DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator();
|
||||
if (logForwarder != null) {
|
||||
orchestrator.setLogForwarder(logForwarder);
|
||||
}
|
||||
return orchestrator;
|
||||
}
|
||||
// TODO: K8s detection (check for service account token)
|
||||
log.info("No Docker socket or K8s detected - runtime management disabled (observability-only mode)");
|
||||
return new DisabledRuntimeOrchestrator();
|
||||
}
|
||||
@@ -44,4 +49,13 @@ public class RuntimeOrchestratorAutoConfig {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Bean
|
||||
public ContainerLogForwarder containerLogForwarder(RuntimeOrchestrator orchestrator,
|
||||
ClickHouseLogStore logStore) {
|
||||
if (orchestrator instanceof DockerRuntimeOrchestrator docker) {
|
||||
return new ContainerLogForwarder(docker.getDockerClient(), logStore);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,4 +9,13 @@ public interface RuntimeOrchestrator {
|
||||
void removeContainer(String containerId);
|
||||
ContainerStatus getContainerStatus(String containerId);
|
||||
Stream<String> getLogs(String containerId, int tailLines);
|
||||
|
||||
/** Start streaming container logs to ClickHouse. */
|
||||
default void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {}
|
||||
|
||||
/** Stop log capture for a specific container (e.g., on die/oom). */
|
||||
default void stopLogCapture(String containerId) {}
|
||||
|
||||
/** Stop log capture for all containers matching this app+env (e.g., on agent SSE connect). */
|
||||
default void stopLogCaptureByApp(String appSlug, String envSlug) {}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,972 @@
|
||||
# Container Startup Log Capture — Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Capture Docker container stdout/stderr from startup until the Cameleer agent registers via SSE, storing logs in ClickHouse for display in deployment views and general log search.
|
||||
|
||||
**Architecture:** Extend `RuntimeOrchestrator` with log capture methods. A new `ContainerLogForwarder` component (Docker-specific) streams container output using `docker logs --follow`, batches lines, and flushes them to ClickHouse via the existing `insertBufferedBatch()` path with `source = 'container'`. Capture stops on SSE connect or container death. UI adds a startup log panel below `DeploymentProgress` and source badges in log views.
|
||||
|
||||
**Tech Stack:** Java 17, Spring Boot 3.4, Docker Java client (zerodep), ClickHouse, React, TanStack Query
|
||||
|
||||
**Spec:** `docs/superpowers/specs/2026-04-14-container-startup-log-capture-design.md`
|
||||
|
||||
---
|
||||
|
||||
## File Map
|
||||
|
||||
### New Files
|
||||
|
||||
| File | Responsibility |
|
||||
|------|----------------|
|
||||
| `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/ContainerLogForwarder.java` | Docker log streaming, batching, ClickHouse flush, session lifecycle |
|
||||
| `ui/src/components/StartupLogPanel.tsx` | Collapsible log panel for deployment startup logs |
|
||||
| `ui/src/components/StartupLogPanel.module.css` | Styles for startup log panel |
|
||||
|
||||
### Modified Files
|
||||
|
||||
| File | Change |
|
||||
|------|--------|
|
||||
| `cameleer3-server-core/.../RuntimeOrchestrator.java` | Add 3 default no-op methods |
|
||||
| `cameleer3-server-app/.../DockerRuntimeOrchestrator.java` | Implement log capture, delegate to forwarder |
|
||||
| `cameleer3-server-app/.../DisabledRuntimeOrchestrator.java` | Override no-ops (explicit) |
|
||||
| `cameleer3-server-app/.../RuntimeOrchestratorAutoConfig.java` | Create and wire `ContainerLogForwarder` bean |
|
||||
| `cameleer3-server-app/.../DeploymentExecutor.java` | Call `startLogCapture()` after container start |
|
||||
| `cameleer3-server-app/.../SseConnectionManager.java` | Call `stopLogCaptureByApp()` on SSE connect |
|
||||
| `cameleer3-server-app/.../DockerEventMonitor.java` | Call `stopLogCapture()` on die/oom |
|
||||
| `ui/src/pages/AppsTab/AppsTab.tsx` | Render `StartupLogPanel` below `DeploymentProgress` |
|
||||
| `ui/src/pages/AgentInstance/AgentInstance.tsx` | Add `container` to `LOG_SOURCE_ITEMS` |
|
||||
| `ui/src/api/queries/logs.ts` | Add `useStartupLogs` hook |
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Extend RuntimeOrchestrator Interface
|
||||
|
||||
**Files:**
|
||||
- Modify: `cameleer3-server-core/src/main/java/com/cameleer3/server/core/runtime/RuntimeOrchestrator.java`
|
||||
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DisabledRuntimeOrchestrator.java`
|
||||
|
||||
- [ ] **Step 1: Add default methods to RuntimeOrchestrator**
|
||||
|
||||
In `cameleer3-server-core/src/main/java/com/cameleer3/server/core/runtime/RuntimeOrchestrator.java`, add three default no-op methods after the existing `getLogs` method (line 11):
|
||||
|
||||
```java
|
||||
package com.cameleer3.server.core.runtime;
|
||||
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public interface RuntimeOrchestrator {
|
||||
boolean isEnabled();
|
||||
String startContainer(ContainerRequest request);
|
||||
void stopContainer(String containerId);
|
||||
void removeContainer(String containerId);
|
||||
ContainerStatus getContainerStatus(String containerId);
|
||||
Stream<String> getLogs(String containerId, int tailLines);
|
||||
|
||||
/** Start streaming container logs to ClickHouse. */
|
||||
default void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {}
|
||||
|
||||
/** Stop log capture for a specific container (e.g., on die/oom). */
|
||||
default void stopLogCapture(String containerId) {}
|
||||
|
||||
/** Stop log capture for all containers matching this app+env (e.g., on agent SSE connect). */
|
||||
default void stopLogCaptureByApp(String appSlug, String envSlug) {}
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Add explicit overrides to DisabledRuntimeOrchestrator**
|
||||
|
||||
In `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DisabledRuntimeOrchestrator.java`, add explicit no-op overrides after line 15:
|
||||
|
||||
```java
|
||||
@Override public void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {}
|
||||
@Override public void stopLogCapture(String containerId) {}
|
||||
@Override public void stopLogCaptureByApp(String appSlug, String envSlug) {}
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Verify compilation**
|
||||
|
||||
Run: `mvn clean compile -pl cameleer3-server-core,cameleer3-server-app -q`
|
||||
Expected: BUILD SUCCESS
|
||||
|
||||
- [ ] **Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add cameleer3-server-core/src/main/java/com/cameleer3/server/core/runtime/RuntimeOrchestrator.java \
|
||||
cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DisabledRuntimeOrchestrator.java
|
||||
git commit -m "feat: add log capture methods to RuntimeOrchestrator interface"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Create ContainerLogForwarder
|
||||
|
||||
**Files:**
|
||||
- Create: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/ContainerLogForwarder.java`
|
||||
|
||||
- [ ] **Step 1: Create ContainerLogForwarder class**
|
||||
|
||||
```java
|
||||
package com.cameleer3.server.app.runtime;
|
||||
|
||||
import com.cameleer3.common.model.LogEntry;
|
||||
import com.cameleer3.server.app.search.ClickHouseLogStore;
|
||||
import com.cameleer3.server.core.ingestion.BufferedLogEntry;
|
||||
import com.github.dockerjava.api.DockerClient;
|
||||
import com.github.dockerjava.api.async.ResultCallback;
|
||||
import com.github.dockerjava.api.model.Frame;
|
||||
import jakarta.annotation.PreDestroy;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Streams Docker container stdout/stderr to ClickHouse until the agent registers via SSE.
|
||||
* One capture session per container, managed by container ID.
|
||||
*/
|
||||
public class ContainerLogForwarder {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ContainerLogForwarder.class);
|
||||
|
||||
private static final int FLUSH_BATCH_SIZE = 50;
|
||||
private static final long FLUSH_INTERVAL_MS = 2_000;
|
||||
private static final long MAX_CAPTURE_DURATION_MS = 5 * 60 * 1_000;
|
||||
private static final long CLEANUP_INTERVAL_MS = 30_000;
|
||||
|
||||
// Pattern to match Docker timestamp prefix: "2026-04-14T14:23:01.234567890Z "
|
||||
private static final Pattern DOCKER_TS_PATTERN = Pattern.compile(
|
||||
"^(\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z)\\s(.*)$", Pattern.DOTALL);
|
||||
|
||||
// Pattern to infer log level from Java log output
|
||||
private static final Pattern LEVEL_PATTERN = Pattern.compile(
|
||||
"\\b(ERROR|WARN|INFO|DEBUG|TRACE)\\b");
|
||||
|
||||
private final DockerClient dockerClient;
|
||||
private final ClickHouseLogStore logStore;
|
||||
private final ConcurrentHashMap<String, CaptureSession> sessions = new ConcurrentHashMap<>();
|
||||
private final ExecutorService executor = Executors.newFixedThreadPool(10,
|
||||
r -> { Thread t = new Thread(r, "log-capture"); t.setDaemon(true); return t; });
|
||||
private final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(
|
||||
r -> { Thread t = new Thread(r, "log-capture-cleanup"); t.setDaemon(true); return t; });
|
||||
|
||||
public ContainerLogForwarder(DockerClient dockerClient, ClickHouseLogStore logStore) {
|
||||
this.dockerClient = dockerClient;
|
||||
this.logStore = logStore;
|
||||
scheduler.scheduleAtFixedRate(this::cleanupExpiredSessions,
|
||||
CLEANUP_INTERVAL_MS, CLEANUP_INTERVAL_MS, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
public void startCapture(String containerId, String appSlug, String envSlug, String tenantId) {
|
||||
if (sessions.containsKey(containerId)) {
|
||||
log.debug("Already capturing logs for container {}", containerId.substring(0, 12));
|
||||
return;
|
||||
}
|
||||
|
||||
CaptureSession session = new CaptureSession(containerId, appSlug, envSlug, tenantId);
|
||||
if (sessions.putIfAbsent(containerId, session) != null) {
|
||||
return; // another thread beat us
|
||||
}
|
||||
|
||||
Future<?> future = executor.submit(() -> streamLogs(session));
|
||||
session.future = future;
|
||||
log.info("Started log capture for container {} (app={}, env={})",
|
||||
containerId.substring(0, 12), appSlug, envSlug);
|
||||
}
|
||||
|
||||
public void stopCapture(String containerId) {
|
||||
CaptureSession session = sessions.remove(containerId);
|
||||
if (session == null) return;
|
||||
|
||||
session.cancel();
|
||||
flushBuffer(session);
|
||||
log.info("Stopped log capture for container {} ({} lines captured)",
|
||||
containerId.substring(0, 12), session.lineCount);
|
||||
}
|
||||
|
||||
public void stopCaptureByApp(String appSlug, String envSlug) {
|
||||
List<String> toRemove = new ArrayList<>();
|
||||
for (Map.Entry<String, CaptureSession> entry : sessions.entrySet()) {
|
||||
CaptureSession s = entry.getValue();
|
||||
if (appSlug.equals(s.appSlug) && envSlug.equals(s.envSlug)) {
|
||||
toRemove.add(entry.getKey());
|
||||
}
|
||||
}
|
||||
for (String containerId : toRemove) {
|
||||
stopCapture(containerId);
|
||||
}
|
||||
if (!toRemove.isEmpty()) {
|
||||
log.info("Stopped log capture for app={} env={} ({} containers)",
|
||||
appSlug, envSlug, toRemove.size());
|
||||
}
|
||||
}
|
||||
|
||||
@PreDestroy
|
||||
public void shutdown() {
|
||||
for (String containerId : new ArrayList<>(sessions.keySet())) {
|
||||
stopCapture(containerId);
|
||||
}
|
||||
scheduler.shutdownNow();
|
||||
executor.shutdownNow();
|
||||
}
|
||||
|
||||
private void streamLogs(CaptureSession session) {
|
||||
try {
|
||||
session.callback = dockerClient.logContainerCmd(session.containerId)
|
||||
.withFollowStream(true)
|
||||
.withStdOut(true)
|
||||
.withStdErr(true)
|
||||
.withTimestamps(true)
|
||||
.exec(new ResultCallback.Adapter<Frame>() {
|
||||
@Override
|
||||
public void onNext(Frame frame) {
|
||||
if (session.cancelled) return;
|
||||
String line = new String(frame.getPayload()).trim();
|
||||
if (line.isEmpty()) return;
|
||||
|
||||
session.buffer.add(line);
|
||||
session.lineCount++;
|
||||
|
||||
if (session.buffer.size() >= FLUSH_BATCH_SIZE
|
||||
|| System.currentTimeMillis() - session.lastFlush >= FLUSH_INTERVAL_MS) {
|
||||
flushBuffer(session);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onComplete() {
|
||||
flushBuffer(session);
|
||||
sessions.remove(session.containerId);
|
||||
log.debug("Log stream completed for container {}",
|
||||
session.containerId.substring(0, 12));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Throwable throwable) {
|
||||
flushBuffer(session);
|
||||
sessions.remove(session.containerId);
|
||||
log.debug("Log stream error for container {}: {}",
|
||||
session.containerId.substring(0, 12), throwable.getMessage());
|
||||
}
|
||||
});
|
||||
} catch (Exception e) {
|
||||
sessions.remove(session.containerId);
|
||||
log.warn("Failed to start log capture for container {}: {}",
|
||||
session.containerId.substring(0, 12), e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized void flushBuffer(CaptureSession session) {
|
||||
List<String> lines;
|
||||
synchronized (session.buffer) {
|
||||
if (session.buffer.isEmpty()) return;
|
||||
lines = new ArrayList<>(session.buffer);
|
||||
session.buffer.clear();
|
||||
}
|
||||
session.lastFlush = System.currentTimeMillis();
|
||||
|
||||
List<BufferedLogEntry> entries = new ArrayList<>(lines.size());
|
||||
for (String line : lines) {
|
||||
Instant timestamp = Instant.now();
|
||||
String message = line;
|
||||
|
||||
Matcher tsMatcher = DOCKER_TS_PATTERN.matcher(line);
|
||||
if (tsMatcher.matches()) {
|
||||
try {
|
||||
timestamp = Instant.parse(tsMatcher.group(1));
|
||||
} catch (DateTimeParseException e) {
|
||||
// keep Instant.now()
|
||||
}
|
||||
message = tsMatcher.group(2);
|
||||
}
|
||||
|
||||
String level = inferLevel(message);
|
||||
|
||||
LogEntry logEntry = new LogEntry();
|
||||
logEntry.setTimestamp(timestamp);
|
||||
logEntry.setLevel(level);
|
||||
logEntry.setMessage(message);
|
||||
logEntry.setLoggerName("");
|
||||
logEntry.setThreadName("");
|
||||
logEntry.setStackTrace("");
|
||||
logEntry.setMdc(Collections.emptyMap());
|
||||
logEntry.setSource("container");
|
||||
|
||||
entries.add(new BufferedLogEntry(
|
||||
session.tenantId, session.envSlug, session.containerId.substring(0, 12),
|
||||
session.appSlug, logEntry));
|
||||
}
|
||||
|
||||
try {
|
||||
logStore.insertBufferedBatch(entries);
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to flush {} container log entries for {}: {}",
|
||||
entries.size(), session.appSlug, e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private String inferLevel(String message) {
|
||||
if (message.startsWith("\tat ") || message.startsWith("Caused by:")) {
|
||||
return "ERROR";
|
||||
}
|
||||
Matcher m = LEVEL_PATTERN.matcher(message);
|
||||
if (m.find()) {
|
||||
return m.group(1);
|
||||
}
|
||||
return "INFO";
|
||||
}
|
||||
|
||||
private void cleanupExpiredSessions() {
|
||||
long now = System.currentTimeMillis();
|
||||
for (Map.Entry<String, CaptureSession> entry : sessions.entrySet()) {
|
||||
CaptureSession session = entry.getValue();
|
||||
if (now - session.startedAt > MAX_CAPTURE_DURATION_MS) {
|
||||
log.info("Log capture timeout for container {} (app={}), stopping",
|
||||
entry.getKey().substring(0, 12), session.appSlug);
|
||||
stopCapture(entry.getKey());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class CaptureSession {
|
||||
final String containerId;
|
||||
final String appSlug;
|
||||
final String envSlug;
|
||||
final String tenantId;
|
||||
final long startedAt = System.currentTimeMillis();
|
||||
final List<String> buffer = Collections.synchronizedList(new ArrayList<>());
|
||||
volatile long lastFlush = System.currentTimeMillis();
|
||||
volatile long lineCount = 0;
|
||||
volatile boolean cancelled = false;
|
||||
volatile Future<?> future;
|
||||
volatile ResultCallback.Adapter<Frame> callback;
|
||||
|
||||
CaptureSession(String containerId, String appSlug, String envSlug, String tenantId) {
|
||||
this.containerId = containerId;
|
||||
this.appSlug = appSlug;
|
||||
this.envSlug = envSlug;
|
||||
this.tenantId = tenantId;
|
||||
}
|
||||
|
||||
void cancel() {
|
||||
cancelled = true;
|
||||
if (callback != null) {
|
||||
try { callback.close(); } catch (Exception e) { /* ignore */ }
|
||||
}
|
||||
if (future != null) {
|
||||
future.cancel(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Verify compilation**
|
||||
|
||||
Run: `mvn clean compile -pl cameleer3-server-app -q`
|
||||
Expected: BUILD SUCCESS
|
||||
|
||||
- [ ] **Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/ContainerLogForwarder.java
|
||||
git commit -m "feat: add ContainerLogForwarder for Docker log streaming to ClickHouse"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Wire ContainerLogForwarder and Implement in DockerRuntimeOrchestrator
|
||||
|
||||
**Files:**
|
||||
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerRuntimeOrchestrator.java`
|
||||
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/RuntimeOrchestratorAutoConfig.java`
|
||||
|
||||
- [ ] **Step 1: Add ContainerLogForwarder field and log capture methods to DockerRuntimeOrchestrator**
|
||||
|
||||
In `DockerRuntimeOrchestrator.java`, add a field and setter after the `dockerClient` field (line 32), and implement the three methods after `getLogs()` (line 198):
|
||||
|
||||
After line 32 (`private DockerClient dockerClient;`), add:
|
||||
|
||||
```java
|
||||
private ContainerLogForwarder logForwarder;
|
||||
|
||||
public void setLogForwarder(ContainerLogForwarder logForwarder) {
|
||||
this.logForwarder = logForwarder;
|
||||
}
|
||||
```
|
||||
|
||||
After line 198 (closing brace of `getLogs()`), add:
|
||||
|
||||
```java
|
||||
@Override
|
||||
public void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId) {
|
||||
if (logForwarder != null) {
|
||||
logForwarder.startCapture(containerId, appSlug, envSlug, tenantId);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stopLogCapture(String containerId) {
|
||||
if (logForwarder != null) {
|
||||
logForwarder.stopCapture(containerId);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stopLogCaptureByApp(String appSlug, String envSlug) {
|
||||
if (logForwarder != null) {
|
||||
logForwarder.stopCaptureByApp(appSlug, envSlug);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Wire ContainerLogForwarder in RuntimeOrchestratorAutoConfig**
|
||||
|
||||
In `RuntimeOrchestratorAutoConfig.java`, add a new bean method and inject it into the orchestrator. Replace the `runtimeOrchestrator()` bean (lines 19-29):
|
||||
|
||||
```java
|
||||
@Bean
|
||||
public RuntimeOrchestrator runtimeOrchestrator(
|
||||
@org.springframework.beans.factory.annotation.Autowired(required = false)
|
||||
ContainerLogForwarder logForwarder) {
|
||||
// Auto-detect: Docker socket available?
|
||||
if (Files.exists(Path.of("/var/run/docker.sock"))) {
|
||||
log.info("Docker socket detected - enabling Docker runtime orchestrator");
|
||||
DockerRuntimeOrchestrator orchestrator = new DockerRuntimeOrchestrator();
|
||||
if (logForwarder != null) {
|
||||
orchestrator.setLogForwarder(logForwarder);
|
||||
}
|
||||
return orchestrator;
|
||||
}
|
||||
// TODO: K8s detection (check for service account token)
|
||||
log.info("No Docker socket or K8s detected - runtime management disabled (observability-only mode)");
|
||||
return new DisabledRuntimeOrchestrator();
|
||||
}
|
||||
```
|
||||
|
||||
Add the `ContainerLogForwarder` bean after `dockerEventMonitor()` (after line 46):
|
||||
|
||||
```java
|
||||
@Bean
|
||||
public ContainerLogForwarder containerLogForwarder(RuntimeOrchestrator orchestrator,
|
||||
ClickHouseLogStore logStore) {
|
||||
if (orchestrator instanceof DockerRuntimeOrchestrator docker) {
|
||||
return new ContainerLogForwarder(docker.getDockerClient(), logStore);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
```
|
||||
|
||||
Add the import at the top:
|
||||
|
||||
```java
|
||||
import com.cameleer3.server.app.search.ClickHouseLogStore;
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Verify compilation**
|
||||
|
||||
Run: `mvn clean compile -pl cameleer3-server-app -q`
|
||||
Expected: BUILD SUCCESS
|
||||
|
||||
- [ ] **Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerRuntimeOrchestrator.java \
|
||||
cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/RuntimeOrchestratorAutoConfig.java
|
||||
git commit -m "feat: wire ContainerLogForwarder into DockerRuntimeOrchestrator"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 4: Integrate Start Capture in DeploymentExecutor
|
||||
|
||||
**Files:**
|
||||
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DeploymentExecutor.java`
|
||||
|
||||
- [ ] **Step 1: Add startLogCapture call after each replica starts**
|
||||
|
||||
In `DeploymentExecutor.java`, inside the replica loop (after line 200, after the `connectContainer` loop), add a call to start log capture. Insert after the closing brace of the `for (String net : additionalNets)` block (line 200) and before the `replicaStates.add(...)` call (line 202):
|
||||
|
||||
```java
|
||||
orchestrator.startLogCapture(containerId, app.slug(), env.slug(), tenantId);
|
||||
```
|
||||
|
||||
This goes right between the network connection loop and the `replicaStates.add(Map.of(...))` call, so the full context looks like:
|
||||
|
||||
```java
|
||||
// Connect to additional networks after container is started
|
||||
for (String net : additionalNets) {
|
||||
if (networkManager != null) {
|
||||
networkManager.connectContainer(containerId, net);
|
||||
}
|
||||
}
|
||||
|
||||
orchestrator.startLogCapture(containerId, app.slug(), env.slug(), tenantId);
|
||||
|
||||
replicaStates.add(Map.of(
|
||||
"index", i,
|
||||
"containerId", containerId,
|
||||
"containerName", containerName,
|
||||
"status", "STARTING"
|
||||
));
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Verify compilation**
|
||||
|
||||
Run: `mvn clean compile -pl cameleer3-server-app -q`
|
||||
Expected: BUILD SUCCESS
|
||||
|
||||
- [ ] **Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DeploymentExecutor.java
|
||||
git commit -m "feat: start log capture when deployment replicas are created"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 5: Integrate Stop Capture in SseConnectionManager
|
||||
|
||||
**Files:**
|
||||
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/agent/SseConnectionManager.java`
|
||||
|
||||
- [ ] **Step 1: Inject RuntimeOrchestrator and call stopLogCaptureByApp on SSE connect**
|
||||
|
||||
In `SseConnectionManager.java`, add the `RuntimeOrchestrator` dependency. Modify the constructor (lines 39-45) to accept it:
|
||||
|
||||
```java
|
||||
private final RuntimeOrchestrator runtimeOrchestrator;
|
||||
|
||||
public SseConnectionManager(AgentRegistryService registryService, AgentRegistryConfig config,
|
||||
SsePayloadSigner ssePayloadSigner, ObjectMapper objectMapper,
|
||||
RuntimeOrchestrator runtimeOrchestrator) {
|
||||
this.registryService = registryService;
|
||||
this.config = config;
|
||||
this.ssePayloadSigner = ssePayloadSigner;
|
||||
this.objectMapper = objectMapper;
|
||||
this.runtimeOrchestrator = runtimeOrchestrator;
|
||||
}
|
||||
```
|
||||
|
||||
Add the import at the top:
|
||||
|
||||
```java
|
||||
import com.cameleer3.server.core.runtime.RuntimeOrchestrator;
|
||||
import com.cameleer3.server.core.agent.AgentInfo;
|
||||
```
|
||||
|
||||
In the `connect()` method (line 60), after the `log.info("SSE connection established...")` (line 83), add the stop capture call:
|
||||
|
||||
```java
|
||||
// Stop container log capture — agent is now online and will send its own logs
|
||||
AgentInfo agent = registryService.findById(agentId);
|
||||
if (agent != null) {
|
||||
runtimeOrchestrator.stopLogCaptureByApp(agent.applicationId(), agent.environmentId());
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Verify compilation**
|
||||
|
||||
Run: `mvn clean compile -pl cameleer3-server-app -q`
|
||||
Expected: BUILD SUCCESS
|
||||
|
||||
- [ ] **Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add cameleer3-server-app/src/main/java/com/cameleer3/server/app/agent/SseConnectionManager.java
|
||||
git commit -m "feat: stop container log capture when agent SSE connects"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 6: Integrate Stop Capture in DockerEventMonitor
|
||||
|
||||
**Files:**
|
||||
- Modify: `cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerEventMonitor.java`
|
||||
|
||||
- [ ] **Step 1: Call stopLogCapture on die/oom/stop events**
|
||||
|
||||
In `DockerEventMonitor.java`, inside the `handleEvent()` method, after the replica state is updated and before the `changed` flag check (after line 102, inside the `switch` block), add a call to stop log capture. The cleanest place is right after the `replicas.set(i, updated)` line (line 99), within the same `if` block:
|
||||
|
||||
Insert after line 101 (`break;`) and before line 102 (`}`):
|
||||
|
||||
```java
|
||||
// Stop log capture for this container — it's dead or stopped
|
||||
if ("die".equals(action) || "oom".equals(action) || "stop".equals(action)) {
|
||||
runtimeOrchestrator.stopLogCapture(containerId);
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Verify compilation**
|
||||
|
||||
Run: `mvn clean compile -pl cameleer3-server-app -q`
|
||||
Expected: BUILD SUCCESS
|
||||
|
||||
- [ ] **Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add cameleer3-server-app/src/main/java/com/cameleer3/server/app/runtime/DockerEventMonitor.java
|
||||
git commit -m "feat: stop container log capture on Docker die/oom events"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 7: Full Backend Compilation and Verification
|
||||
|
||||
- [ ] **Step 1: Run full compile across all modules**
|
||||
|
||||
Run: `mvn clean compile test-compile -q`
|
||||
Expected: BUILD SUCCESS
|
||||
|
||||
- [ ] **Step 2: Verify no missing imports or type errors**
|
||||
|
||||
If compilation fails, fix issues and re-run. Common issues to watch for:
|
||||
- Circular bean dependency between `RuntimeOrchestrator` and `ContainerLogForwarder` — the `@Autowired(required = false)` pattern in the auto-config handles this
|
||||
- Missing `AgentInfo` import or `findById()` method on `AgentRegistryService` — verify the method exists
|
||||
|
||||
- [ ] **Step 3: Commit any fixes**
|
||||
|
||||
Only if Step 2 required changes.
|
||||
|
||||
---
|
||||
|
||||
### Task 8: Add useStartupLogs Hook
|
||||
|
||||
**Files:**
|
||||
- Modify: `ui/src/api/queries/logs.ts`
|
||||
|
||||
- [ ] **Step 1: Add environment to LogSearchParams and fetchLogs**
|
||||
|
||||
In `ui/src/api/queries/logs.ts`, add `environment` to the `LogSearchParams` interface (after the `source` field, line 33):
|
||||
|
||||
```typescript
|
||||
export interface LogSearchParams {
|
||||
q?: string;
|
||||
level?: string;
|
||||
application?: string;
|
||||
agentId?: string;
|
||||
source?: string;
|
||||
environment?: string;
|
||||
exchangeId?: string;
|
||||
logger?: string;
|
||||
from?: string;
|
||||
to?: string;
|
||||
cursor?: string;
|
||||
limit?: number;
|
||||
sort?: 'asc' | 'desc';
|
||||
}
|
||||
```
|
||||
|
||||
In the `fetchLogs` function, add the environment param after the source line (after line 50):
|
||||
|
||||
```typescript
|
||||
if (params.environment) urlParams.set('environment', params.environment);
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Add useStartupLogs hook**
|
||||
|
||||
In `ui/src/api/queries/logs.ts`, add the following hook after the `useApplicationLogs` function (after line 128):
|
||||
|
||||
```typescript
|
||||
/**
|
||||
* Fetches container startup logs for a deployment.
|
||||
* Polls every 3s while the deployment is STARTING, stops when RUNNING/FAILED.
|
||||
*/
|
||||
export function useStartupLogs(
|
||||
application: string | undefined,
|
||||
environment: string | undefined,
|
||||
deployCreatedAt: string | undefined,
|
||||
isStarting: boolean,
|
||||
) {
|
||||
const params: LogSearchParams = {
|
||||
application: application || undefined,
|
||||
environment: environment || undefined,
|
||||
source: 'container',
|
||||
from: deployCreatedAt || undefined,
|
||||
sort: 'asc',
|
||||
limit: 500,
|
||||
};
|
||||
|
||||
return useLogs(params, {
|
||||
enabled: !!application && !!deployCreatedAt,
|
||||
refetchInterval: isStarting ? 3_000 : false,
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Commit**
|
||||
|
||||
```bash
|
||||
git add ui/src/api/queries/logs.ts
|
||||
git commit -m "feat: add useStartupLogs hook for container startup log polling"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 9: Create StartupLogPanel Component
|
||||
|
||||
**Files:**
|
||||
- Create: `ui/src/components/StartupLogPanel.module.css`
|
||||
- Create: `ui/src/components/StartupLogPanel.tsx`
|
||||
|
||||
- [ ] **Step 1: Create CSS module**
|
||||
|
||||
Create `ui/src/components/StartupLogPanel.module.css`:
|
||||
|
||||
```css
|
||||
.panel {
|
||||
background: var(--bg-secondary);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
overflow: hidden;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
.header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 8px 12px;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.headerLeft {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.title {
|
||||
font-size: 13px;
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.badge {
|
||||
font-size: 11px;
|
||||
padding: 1px 8px;
|
||||
border-radius: 10px;
|
||||
}
|
||||
|
||||
.badgeLive {
|
||||
background: var(--success-muted);
|
||||
color: var(--success);
|
||||
}
|
||||
|
||||
.badgeStopped {
|
||||
background: var(--error-muted);
|
||||
color: var(--error);
|
||||
}
|
||||
|
||||
.pollingHint {
|
||||
font-size: 11px;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.lineCount {
|
||||
font-size: 12px;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.empty {
|
||||
padding: 16px;
|
||||
text-align: center;
|
||||
font-size: 13px;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Create StartupLogPanel component**
|
||||
|
||||
Create `ui/src/components/StartupLogPanel.tsx`:
|
||||
|
||||
```tsx
|
||||
import { LogViewer } from '@cameleer/design-system';
|
||||
import { useStartupLogs } from '../api/queries/logs';
|
||||
import type { Deployment } from '../api/queries/admin/apps';
|
||||
import styles from './StartupLogPanel.module.css';
|
||||
|
||||
interface StartupLogPanelProps {
|
||||
deployment: Deployment;
|
||||
appSlug: string;
|
||||
envSlug: string;
|
||||
}
|
||||
|
||||
export function StartupLogPanel({ deployment, appSlug, envSlug }: StartupLogPanelProps) {
|
||||
const isStarting = deployment.status === 'STARTING';
|
||||
const isFailed = deployment.status === 'FAILED';
|
||||
|
||||
const { data } = useStartupLogs(appSlug, envSlug, deployment.createdAt, isStarting);
|
||||
|
||||
const entries = data?.data ?? [];
|
||||
|
||||
if (entries.length === 0 && !isStarting) return null;
|
||||
|
||||
return (
|
||||
<div className={styles.panel}>
|
||||
<div className={styles.header}>
|
||||
<div className={styles.headerLeft}>
|
||||
<span className={styles.title}>Startup Logs</span>
|
||||
{isStarting && (
|
||||
<>
|
||||
<span className={`${styles.badge} ${styles.badgeLive}`}>● live</span>
|
||||
<span className={styles.pollingHint}>polling every 3s</span>
|
||||
</>
|
||||
)}
|
||||
{isFailed && (
|
||||
<span className={`${styles.badge} ${styles.badgeStopped}`}>stopped</span>
|
||||
)}
|
||||
</div>
|
||||
<span className={styles.lineCount}>{entries.length} lines</span>
|
||||
</div>
|
||||
{entries.length > 0 ? (
|
||||
<LogViewer entries={entries} maxHeight={300} />
|
||||
) : (
|
||||
<div className={styles.empty}>Waiting for container output...</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add ui/src/components/StartupLogPanel.tsx ui/src/components/StartupLogPanel.module.css
|
||||
git commit -m "feat: add StartupLogPanel component for deployment startup logs"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 10: Integrate StartupLogPanel into AppsTab
|
||||
|
||||
**Files:**
|
||||
- Modify: `ui/src/pages/AppsTab/AppsTab.tsx`
|
||||
|
||||
- [ ] **Step 1: Import StartupLogPanel**
|
||||
|
||||
In `AppsTab.tsx`, add the import after the `DeploymentProgress` import (line 41):
|
||||
|
||||
```typescript
|
||||
import { StartupLogPanel } from '../../components/StartupLogPanel';
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Render StartupLogPanel below DeploymentProgress**
|
||||
|
||||
In `AppsTab.tsx`, find the block that renders `DeploymentProgress` (lines 774-779):
|
||||
|
||||
```tsx
|
||||
{deployments.filter((d) => d.deployStage).map((d) => (
|
||||
<div key={`progress-${d.id}`} style={{ marginBottom: 8 }}>
|
||||
<span className={styles.cellMeta}>{d.containerName}</span>
|
||||
<DeploymentProgress currentStage={d.deployStage} failed={d.status === 'FAILED'} />
|
||||
</div>
|
||||
))}
|
||||
```
|
||||
|
||||
Replace with:
|
||||
|
||||
```tsx
|
||||
{deployments.filter((d) => d.deployStage || d.status === 'FAILED').map((d) => (
|
||||
<div key={`progress-${d.id}`} style={{ marginBottom: 8 }}>
|
||||
<span className={styles.cellMeta}>{d.containerName}</span>
|
||||
<DeploymentProgress currentStage={d.deployStage} failed={d.status === 'FAILED'} />
|
||||
<StartupLogPanel deployment={d} appSlug={app.slug} envSlug={environments.find(e => e.id === d.environmentId)?.slug ?? ''} />
|
||||
</div>
|
||||
))}
|
||||
```
|
||||
|
||||
Note: the filter changes from `d.deployStage` to `d.deployStage || d.status === 'FAILED'` so that failed deployments still show their startup logs even after the deploy stage is cleared.
|
||||
|
||||
- [ ] **Step 3: Verify the UI compiles**
|
||||
|
||||
Run: `cd ui && npx tsc --noEmit`
|
||||
Expected: No errors
|
||||
|
||||
- [ ] **Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add ui/src/pages/AppsTab/AppsTab.tsx
|
||||
git commit -m "feat: show startup logs panel below deployment progress"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 11: Add Container Source to Log Source Filters
|
||||
|
||||
**Files:**
|
||||
- Modify: `ui/src/pages/AgentInstance/AgentInstance.tsx`
|
||||
|
||||
- [ ] **Step 1: Add container to LOG_SOURCE_ITEMS**
|
||||
|
||||
In `AgentInstance.tsx`, find the `LOG_SOURCE_ITEMS` constant (lines 28-31):
|
||||
|
||||
```typescript
|
||||
const LOG_SOURCE_ITEMS: ButtonGroupItem[] = [
|
||||
{ value: 'app', label: 'App' },
|
||||
{ value: 'agent', label: 'Agent' },
|
||||
];
|
||||
```
|
||||
|
||||
Replace with:
|
||||
|
||||
```typescript
|
||||
const LOG_SOURCE_ITEMS: ButtonGroupItem[] = [
|
||||
{ value: 'app', label: 'App' },
|
||||
{ value: 'agent', label: 'Agent' },
|
||||
{ value: 'container', label: 'Container' },
|
||||
];
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Check if AgentHealth has the same filter**
|
||||
|
||||
Check `ui/src/pages/AgentHealth/AgentHealth.tsx` for `LOG_SOURCE_ITEMS` — if it has the same constant, add `container` there too.
|
||||
|
||||
- [ ] **Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add ui/src/pages/AgentInstance/AgentInstance.tsx
|
||||
# Also add AgentHealth.tsx if modified
|
||||
git commit -m "feat: add container source option to log source filters"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 12: End-to-End Verification
|
||||
|
||||
- [ ] **Step 1: Full backend build**
|
||||
|
||||
Run: `mvn clean compile test-compile -q`
|
||||
Expected: BUILD SUCCESS
|
||||
|
||||
- [ ] **Step 2: Full UI type check**
|
||||
|
||||
Run: `cd ui && npx tsc --noEmit`
|
||||
Expected: No errors
|
||||
|
||||
- [ ] **Step 3: Start dev server and verify UI**
|
||||
|
||||
Run: `cd ui && npm run dev`
|
||||
|
||||
Open in browser and verify:
|
||||
1. Navigate to an app in the Deployments tab
|
||||
2. The deployment progress section renders without errors
|
||||
3. The source filter in Agent Instance page shows App / Agent / Container options
|
||||
4. No console errors
|
||||
|
||||
- [ ] **Step 4: Commit any fixes from verification**
|
||||
|
||||
Only if prior steps required changes.
|
||||
|
||||
---
|
||||
|
||||
## Out of Scope (Design System)
|
||||
|
||||
**Source badge on log lines:** The spec calls for a small `container` / `app` badge on each log line in the `LogViewer` component. `LogViewer` lives in `@cameleer/design-system` — a separate repo we don't build here. This requires a design system update to render the `source` field from `LogEntryResponse`. The data is already flowing (`source` field exists on all log entries); the rendering change belongs in the DS repo. Track as a follow-up.
|
||||
@@ -0,0 +1,187 @@
|
||||
# Container Startup Log Capture
|
||||
|
||||
Capture Docker container stdout/stderr from the moment a container starts until the Cameleer agent inside fully registers (SSE connection established). Stores logs in ClickHouse for display in the deployment view and general log search.
|
||||
|
||||
## Problem
|
||||
|
||||
When a deployed application crashes during startup — before the Cameleer agent can connect and send logs via the normal ingestion pipeline — all diagnostic output is lost. The container may be removed before anyone can inspect it, leaving operators blind to the root cause.
|
||||
|
||||
## Solution
|
||||
|
||||
A `ContainerLogForwarder` component streams Docker log output in real-time for each managed container, batches lines, and flushes them to the existing ClickHouse `logs` table with `source = 'container'`. Capture stops when the agent establishes its SSE connection, at which point the agent's own log pipeline takes over.
|
||||
|
||||
## Architecture
|
||||
|
||||
### Core Interface Extension
|
||||
|
||||
Extend `RuntimeOrchestrator` (core module) with three new methods:
|
||||
|
||||
```java
|
||||
// in RuntimeOrchestrator.java
|
||||
void startLogCapture(String containerId, String appSlug, String envSlug, String tenantId);
|
||||
void stopLogCapture(String containerId);
|
||||
void stopLogCaptureByApp(String appSlug, String envSlug);
|
||||
```
|
||||
|
||||
`DisabledRuntimeOrchestrator` implements these as no-ops. `DockerRuntimeOrchestrator` delegates to `ContainerLogForwarder`.
|
||||
|
||||
### ContainerLogForwarder
|
||||
|
||||
**Package:** `com.cameleer3.server.app.runtime` (Docker-specific, alongside `DockerRuntimeOrchestrator`, `DockerEventMonitor`, etc.)
|
||||
|
||||
**Responsibilities:**
|
||||
- Manages active capture sessions in a `ConcurrentHashMap<String, CaptureSession>` keyed by container ID
|
||||
- Each `CaptureSession` holds: containerId, appSlug, envSlug, tenantId, a `Future<?>` for the streaming thread, and a buffer of pending log lines
|
||||
- Uses a bounded thread pool (fixed size ~10 threads)
|
||||
|
||||
**Streaming logic:**
|
||||
- Calls `dockerClient.logContainerCmd(containerId).withFollowStream(true).withStdOut(true).withStdErr(true).withTimestamps(true)`
|
||||
- Callback `onNext(Frame)` appends to an in-memory buffer
|
||||
- Every ~2 seconds (or every 50 lines, whichever comes first), flushes the buffer to ClickHouse via `ClickHouseLogStore.insertBufferedBatch()` — constructs `BufferedLogEntry` records with `source = "container"`, the deployment's app/env/tenant metadata, and container name as `instanceId`
|
||||
- On `onComplete()` (container stopped) or `onError()` — final flush, remove session from map
|
||||
|
||||
**Safety:**
|
||||
- Max capture duration: 5 minutes. A scheduled cleanup (every 30s) stops sessions exceeding this limit.
|
||||
- `@PreDestroy` cleanup: stop all active captures on server shutdown.
|
||||
|
||||
### ClickHouse Field Mapping
|
||||
|
||||
Uses the existing `logs` table. No schema changes required.
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| `source` | `'container'` |
|
||||
| `application` | appSlug from deployment |
|
||||
| `environment` | envSlug from deployment |
|
||||
| `tenant_id` | tenantId from deployment |
|
||||
| `instance_id` | containerName (e.g., `prod-orderservice-0`) |
|
||||
| `timestamp` | Parsed from Docker timestamp prefix |
|
||||
| `message` | Log line content (after timestamp) |
|
||||
| `level` | Inferred by regex (see below) |
|
||||
| `logger_name` | Empty string (not parseable from raw stdout) |
|
||||
| `thread_name` | Empty string |
|
||||
| `stack_trace` | Empty string (stack traces appear as consecutive message lines) |
|
||||
| `exchange_id` | Empty string |
|
||||
| `mdc` | Empty map |
|
||||
|
||||
### Log Level Inference
|
||||
|
||||
- Regex scan for common Java log patterns: ` ERROR `, ` WARN `, ` INFO `, ` DEBUG `, ` TRACE `
|
||||
- Stack trace continuation lines (starting with `\tat ` or `Caused by:`) inherit ERROR level
|
||||
- Lines matching no pattern default to INFO
|
||||
|
||||
## Integration Points
|
||||
|
||||
### Start Capture — DeploymentExecutor
|
||||
|
||||
After each replica container is started (inside the replica loop):
|
||||
|
||||
```java
|
||||
orchestrator.startLogCapture(containerId, appSlug, envSlug, tenantId);
|
||||
```
|
||||
|
||||
### Stop Capture — SseConnectionManager.connect()
|
||||
|
||||
When an agent connects SSE, look up its `AgentInfo` from the registry to get `application` + `environmentId`:
|
||||
|
||||
```java
|
||||
orchestrator.stopLogCaptureByApp(application, environmentId);
|
||||
```
|
||||
|
||||
Best-effort call — no-op if no capture exists for that app+env (e.g., non-Docker agent).
|
||||
|
||||
### Stop Capture — Container Death
|
||||
|
||||
`DockerEventMonitor` handles `die`/`oom` events. After updating replica state:
|
||||
|
||||
```java
|
||||
orchestrator.stopLogCapture(containerId);
|
||||
```
|
||||
|
||||
Triggers final flush of buffered lines before cleanup.
|
||||
|
||||
### Stop Capture — Deployment Failure Cleanup
|
||||
|
||||
No extra code needed. When `DeploymentExecutor` stops/removes containers on health check failure, the Docker `die` event flows through `DockerEventMonitor` which calls `stopLogCapture`. The event monitor path handles it.
|
||||
|
||||
## UI Changes
|
||||
|
||||
### 1. Deployment Startup Log Panel
|
||||
|
||||
A collapsible log panel below the `DeploymentProgress` component in the deployment detail view.
|
||||
|
||||
**Data source:** Queries `/api/v1/logs?application={appSlug}&environment={envSlug}&source=container&from={deployCreatedAt}`
|
||||
|
||||
**Polling behavior:**
|
||||
- Auto-refreshes every 3 seconds while deployment status is STARTING
|
||||
- Stops polling when status reaches RUNNING or FAILED
|
||||
- Manual refresh button available in all states
|
||||
|
||||
**Status indicator:**
|
||||
- Green "live" badge + "polling every 3s" text while STARTING
|
||||
- Red "stopped" badge when FAILED
|
||||
- No badge when RUNNING (panel remains visible with historical startup logs)
|
||||
|
||||
**Layout:** Uses existing `LogViewer` component from `@cameleer/design-system` and shared log panel styles from `ui/src/styles/log-panel.module.css`.
|
||||
|
||||
### 2. Source Badge in Log Views
|
||||
|
||||
Everywhere logs are displayed (AgentInstance page, LogTab, general log search), each log line gets a small source badge:
|
||||
- `container` — slate/gray badge
|
||||
- `app` — green badge
|
||||
- `agent` — existing behavior
|
||||
|
||||
The `source` field already exists in `LogEntryResponse`. This is a rendering-only change in the LogViewer or its wrapper.
|
||||
|
||||
### 3. Source Filter Update
|
||||
|
||||
The log toolbar source filter (currently App vs Agent) adds `Container` as a third option. The backend `/api/v1/logs` endpoint already accepts `source` as a query parameter — no backend change needed for filtering.
|
||||
|
||||
## Edge Cases
|
||||
|
||||
**Multi-replica:** Each replica gets its own capture session keyed by container ID. `instance_id` in ClickHouse is the container name (e.g., `prod-orderservice-0`). `stopLogCaptureByApp()` stops all sessions for that app+env pair.
|
||||
|
||||
**Server restart during capture:** Active sessions are in-memory and lost on restart. Not a problem — containers likely restart too (Docker restart policy), and new captures start when `DeploymentExecutor` runs again. Already-flushed logs survive in ClickHouse.
|
||||
|
||||
**Container produces no output:** Follow stream stays open but idle (parked thread, no CPU cost). Cleaned up by the 5-minute timeout or container death.
|
||||
|
||||
**Rapid redeployment:** Old container dies -> `stopLogCapture(oldContainerId)`. New container starts -> `startLogCapture(newContainerId, ...)`. Different container IDs, no conflict.
|
||||
|
||||
**Log overlap:** When the agent connects and starts sending `source='app'` logs, there may be a brief overlap with `source='container'` logs for the same timeframe. Both are shown with source badges. Users can filter by source if needed.
|
||||
|
||||
## Files Changed
|
||||
|
||||
### Backend — New
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `app/runtime/ContainerLogForwarder.java` | Docker log streaming, buffering, ClickHouse flush |
|
||||
|
||||
### Backend — Modified
|
||||
|
||||
| File | Change |
|
||||
|------|--------|
|
||||
| `core/runtime/RuntimeOrchestrator.java` | Add 3 log capture methods to interface |
|
||||
| `app/runtime/DockerRuntimeOrchestrator.java` | Implement log capture methods, delegate to ContainerLogForwarder |
|
||||
| `app/runtime/DisabledRuntimeOrchestrator.java` | No-op implementations of new methods |
|
||||
| `app/runtime/DeploymentExecutor.java` | Call `startLogCapture()` after container start |
|
||||
| `app/agent/SseConnectionManager.java` | Call `stopLogCaptureByApp()` on SSE connect |
|
||||
| `app/runtime/DockerEventMonitor.java` | Call `stopLogCapture()` on die/oom events |
|
||||
| `app/runtime/RuntimeOrchestratorAutoConfig.java` | Wire ContainerLogForwarder into DockerRuntimeOrchestrator |
|
||||
|
||||
### Frontend — Modified
|
||||
|
||||
| File | Change |
|
||||
|------|--------|
|
||||
| `ui/src/pages/AppsTab/AppsTab.tsx` | Add startup log panel below DeploymentProgress |
|
||||
| `ui/src/api/queries/logs.ts` | Hook for deployment startup logs query |
|
||||
| Log display components | Add source badge rendering |
|
||||
| Log toolbar | Add Container to source filter options |
|
||||
|
||||
### No Changes
|
||||
|
||||
| File | Reason |
|
||||
|------|--------|
|
||||
| ClickHouse `init.sql` | Existing `logs` table with `source` column is sufficient |
|
||||
| `LogQueryController.java` | Already accepts `source` filter parameter |
|
||||
| `ClickHouseLogStore.java` | Already writes `source` field from log entries |
|
||||
8
ui/package-lock.json
generated
8
ui/package-lock.json
generated
@@ -9,7 +9,7 @@
|
||||
"version": "0.0.0",
|
||||
"hasInstallScript": true,
|
||||
"dependencies": {
|
||||
"@cameleer/design-system": "^0.1.48",
|
||||
"@cameleer/design-system": "^0.1.49",
|
||||
"@tanstack/react-query": "^5.90.21",
|
||||
"js-yaml": "^4.1.1",
|
||||
"lucide-react": "^1.7.0",
|
||||
@@ -281,9 +281,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@cameleer/design-system": {
|
||||
"version": "0.1.48",
|
||||
"resolved": "https://gitea.siegeln.net/api/packages/cameleer/npm/%40cameleer%2Fdesign-system/-/0.1.48/design-system-0.1.48.tgz",
|
||||
"integrity": "sha512-XieqGufsjucb5j43YeW2UAqKkaSYvXShw7DuHOZUVjjcuPqLnw4fAZWY7nrs6k33kzYR/UOE3Z9nBx+77Ki8mA==",
|
||||
"version": "0.1.49",
|
||||
"resolved": "https://gitea.siegeln.net/api/packages/cameleer/npm/%40cameleer%2Fdesign-system/-/0.1.49/design-system-0.1.49.tgz",
|
||||
"integrity": "sha512-ftBh7PZpcPwOdW1UFQnBpgo8ql0CfedRrXkOh4350UImMs7qrSdd+jME8Ind62IUytfUBnqk/f0L6j3MjsQaeQ==",
|
||||
"dependencies": {
|
||||
"lucide-react": "^1.7.0",
|
||||
"react": "^19.0.0",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
"postinstall": "node -e \"const fs=require('fs');fs.mkdirSync('public',{recursive:true});fs.copyFileSync('node_modules/@cameleer/design-system/assets/cameleer3-logo.svg','public/favicon.svg')\""
|
||||
},
|
||||
"dependencies": {
|
||||
"@cameleer/design-system": "^0.1.48",
|
||||
"@cameleer/design-system": "^0.1.49",
|
||||
"@tanstack/react-query": "^5.90.21",
|
||||
"js-yaml": "^4.1.1",
|
||||
"lucide-react": "^1.7.0",
|
||||
|
||||
@@ -31,6 +31,7 @@ export interface LogSearchParams {
|
||||
application?: string;
|
||||
agentId?: string;
|
||||
source?: string;
|
||||
environment?: string;
|
||||
exchangeId?: string;
|
||||
logger?: string;
|
||||
from?: string;
|
||||
@@ -48,6 +49,7 @@ async function fetchLogs(params: LogSearchParams): Promise<LogSearchPageResponse
|
||||
if (params.application) urlParams.set('application', params.application);
|
||||
if (params.agentId) urlParams.set('agentId', params.agentId);
|
||||
if (params.source) urlParams.set('source', params.source);
|
||||
if (params.environment) urlParams.set('environment', params.environment);
|
||||
if (params.exchangeId) urlParams.set('exchangeId', params.exchangeId);
|
||||
if (params.logger) urlParams.set('logger', params.logger);
|
||||
if (params.from) urlParams.set('from', params.from);
|
||||
@@ -126,3 +128,28 @@ export function useApplicationLogs(
|
||||
data: query.data?.data ?? (undefined as LogEntryResponse[] | undefined),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches container startup logs for a deployment.
|
||||
* Polls every 3s while the deployment is STARTING, stops when RUNNING/FAILED.
|
||||
*/
|
||||
export function useStartupLogs(
|
||||
application: string | undefined,
|
||||
environment: string | undefined,
|
||||
deployCreatedAt: string | undefined,
|
||||
isStarting: boolean,
|
||||
) {
|
||||
const params: LogSearchParams = {
|
||||
application: application || undefined,
|
||||
environment: environment || undefined,
|
||||
source: 'container',
|
||||
from: deployCreatedAt || undefined,
|
||||
sort: 'asc',
|
||||
limit: 500,
|
||||
};
|
||||
|
||||
return useLogs(params, {
|
||||
enabled: !!application && !!deployCreatedAt,
|
||||
refetchInterval: isStarting ? 3_000 : false,
|
||||
});
|
||||
}
|
||||
|
||||
60
ui/src/components/StartupLogPanel.module.css
Normal file
60
ui/src/components/StartupLogPanel.module.css
Normal file
@@ -0,0 +1,60 @@
|
||||
.panel {
|
||||
background: var(--bg-secondary);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
overflow: hidden;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
.header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 8px 12px;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.headerLeft {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.title {
|
||||
font-size: 13px;
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.badge {
|
||||
font-size: 11px;
|
||||
padding: 1px 8px;
|
||||
border-radius: 10px;
|
||||
}
|
||||
|
||||
.badgeLive {
|
||||
background: var(--success-muted);
|
||||
color: var(--success);
|
||||
}
|
||||
|
||||
.badgeStopped {
|
||||
background: var(--error-muted);
|
||||
color: var(--error);
|
||||
}
|
||||
|
||||
.pollingHint {
|
||||
font-size: 11px;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.lineCount {
|
||||
font-size: 12px;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.empty {
|
||||
padding: 16px;
|
||||
text-align: center;
|
||||
font-size: 13px;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
46
ui/src/components/StartupLogPanel.tsx
Normal file
46
ui/src/components/StartupLogPanel.tsx
Normal file
@@ -0,0 +1,46 @@
|
||||
import { LogViewer } from '@cameleer/design-system';
|
||||
import { useStartupLogs } from '../api/queries/logs';
|
||||
import type { Deployment } from '../api/queries/admin/apps';
|
||||
import styles from './StartupLogPanel.module.css';
|
||||
|
||||
interface StartupLogPanelProps {
|
||||
deployment: Deployment;
|
||||
appSlug: string;
|
||||
envSlug: string;
|
||||
}
|
||||
|
||||
export function StartupLogPanel({ deployment, appSlug, envSlug }: StartupLogPanelProps) {
|
||||
const isStarting = deployment.status === 'STARTING';
|
||||
const isFailed = deployment.status === 'FAILED';
|
||||
|
||||
const { data } = useStartupLogs(appSlug, envSlug, deployment.createdAt, isStarting);
|
||||
|
||||
const entries = data?.data ?? [];
|
||||
|
||||
if (entries.length === 0 && !isStarting) return null;
|
||||
|
||||
return (
|
||||
<div className={styles.panel}>
|
||||
<div className={styles.header}>
|
||||
<div className={styles.headerLeft}>
|
||||
<span className={styles.title}>Startup Logs</span>
|
||||
{isStarting && (
|
||||
<>
|
||||
<span className={`${styles.badge} ${styles.badgeLive}`}>● live</span>
|
||||
<span className={styles.pollingHint}>polling every 3s</span>
|
||||
</>
|
||||
)}
|
||||
{isFailed && (
|
||||
<span className={`${styles.badge} ${styles.badgeStopped}`}>stopped</span>
|
||||
)}
|
||||
</div>
|
||||
<span className={styles.lineCount}>{entries.length} lines</span>
|
||||
</div>
|
||||
{entries.length > 0 ? (
|
||||
<LogViewer entries={entries} maxHeight={300} />
|
||||
) : (
|
||||
<div className={styles.empty}>Waiting for container output...</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -93,6 +93,7 @@ const LOG_LEVEL_ITEMS: ButtonGroupItem[] = [
|
||||
const LOG_SOURCE_ITEMS: ButtonGroupItem[] = [
|
||||
{ value: 'app', label: 'App' },
|
||||
{ value: 'agent', label: 'Agent' },
|
||||
{ value: 'container', label: 'Container' },
|
||||
];
|
||||
|
||||
// ── AgentHealth page ─────────────────────────────────────────────────────────
|
||||
|
||||
@@ -28,6 +28,7 @@ const LOG_LEVEL_ITEMS: ButtonGroupItem[] = [
|
||||
const LOG_SOURCE_ITEMS: ButtonGroupItem[] = [
|
||||
{ value: 'app', label: 'App' },
|
||||
{ value: 'agent', label: 'Agent' },
|
||||
{ value: 'container', label: 'Container' },
|
||||
];
|
||||
|
||||
export default function AgentInstance() {
|
||||
|
||||
@@ -39,6 +39,7 @@ import type { ApplicationConfig, TapDefinition } from '../../api/queries/command
|
||||
import { useCatalog } from '../../api/queries/catalog';
|
||||
import type { CatalogApp, CatalogRoute } from '../../api/queries/catalog';
|
||||
import { DeploymentProgress } from '../../components/DeploymentProgress';
|
||||
import { StartupLogPanel } from '../../components/StartupLogPanel';
|
||||
import { timeAgo } from '../../utils/format-utils';
|
||||
import { applyTracedProcessorUpdate, applyRouteRecordingUpdate } from '../../utils/config-draft-utils';
|
||||
import { PageLoader } from '../../components/PageLoader';
|
||||
@@ -771,10 +772,11 @@ function OverviewSubTab({ app, deployments, versions, environments, envMap, sele
|
||||
: <DataTable<DeploymentRow> columns={deploymentColumns} data={deploymentRows} flush />
|
||||
}
|
||||
</div>
|
||||
{deployments.filter((d) => d.deployStage).map((d) => (
|
||||
{deployments.filter((d) => d.deployStage || d.status === 'FAILED').map((d) => (
|
||||
<div key={`progress-${d.id}`} style={{ marginBottom: 8 }}>
|
||||
<span className={styles.cellMeta}>{d.containerName}</span>
|
||||
<DeploymentProgress currentStage={d.deployStage} failed={d.status === 'FAILED'} />
|
||||
<StartupLogPanel deployment={d} appSlug={app.slug} envSlug={environments.find(e => e.id === d.environmentId)?.slug ?? ''} />
|
||||
</div>
|
||||
))}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user