feat: rename agent identity fields for protocol v2 + add SHUTDOWN lifecycle state
All checks were successful
CI / cleanup-branch (push) Has been skipped
CI / build (push) Successful in 1m7s
CI / docker (push) Successful in 45s
CI / deploy-feature (push) Has been skipped
CI / deploy (push) Successful in 22s

Align all internal naming with the agent team's protocol v2 identity rename:
- agentId → instanceId (unique per-JVM identifier)
- applicationName → applicationId (shared app identifier)
- AgentInfo: id → instanceId, name → displayName, application → applicationId

Add SHUTDOWN lifecycle state for graceful agent shutdowns:
- New POST /data/events endpoint receives agent lifecycle events
- AGENT_STOPPED event transitions agent to SHUTDOWN (skips STALE/DEAD)
- New POST /{id}/deregister endpoint removes agent from registry
- Server now distinguishes graceful shutdown from crash (heartbeat timeout)

Includes ClickHouse V9 and PostgreSQL V14 migrations for column renames.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-01 12:22:42 +02:00
parent ad8dd73596
commit 909d713837
85 changed files with 645 additions and 494 deletions

View File

@@ -29,9 +29,9 @@ public class TestSecurityHelper {
/**
* Registers a test agent and returns a valid JWT access token with AGENT role.
*/
public String registerTestAgent(String agentId) {
agentRegistryService.register(agentId, "test", "test-group", "1.0", List.of(), Map.of());
return jwtService.createAccessToken(agentId, "test-group", List.of("AGENT"));
public String registerTestAgent(String instanceId) {
agentRegistryService.register(instanceId, "test", "test-group", "1.0", List.of(), Map.of());
return jwtService.createAccessToken(instanceId, "test-group", List.of("AGENT"));
}
/**

View File

@@ -41,9 +41,9 @@ class AgentCommandControllerIT extends AbstractPostgresIT {
private ResponseEntity<String> registerAgent(String agentId, String name, String application) {
String json = """
{
"agentId": "%s",
"name": "%s",
"application": "%s",
"instanceId": "%s",
"displayName": "%s",
"applicationId": "%s",
"version": "1.0.0",
"routeIds": ["route-1"],
"capabilities": {}

View File

@@ -39,9 +39,9 @@ class AgentRegistrationControllerIT extends AbstractPostgresIT {
private ResponseEntity<String> registerAgent(String agentId, String name) {
String json = """
{
"agentId": "%s",
"name": "%s",
"application": "test-group",
"instanceId": "%s",
"displayName": "%s",
"applicationId": "test-group",
"version": "1.0.0",
"routeIds": ["route-1", "route-2"],
"capabilities": {"tracing": true}
@@ -61,7 +61,7 @@ class AgentRegistrationControllerIT extends AbstractPostgresIT {
assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);
JsonNode body = objectMapper.readTree(response.getBody());
assertThat(body.get("agentId").asText()).isEqualTo("agent-it-1");
assertThat(body.get("instanceId").asText()).isEqualTo("agent-it-1");
assertThat(body.get("sseEndpoint").asText()).isEqualTo("/api/v1/agents/agent-it-1/events");
assertThat(body.get("heartbeatIntervalMs").asLong()).isGreaterThan(0);
assertThat(body.has("serverPublicKey")).isTrue();
@@ -81,7 +81,7 @@ class AgentRegistrationControllerIT extends AbstractPostgresIT {
assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);
JsonNode body = objectMapper.readTree(response.getBody());
assertThat(body.get("agentId").asText()).isEqualTo("agent-it-reregister");
assertThat(body.get("instanceId").asText()).isEqualTo("agent-it-reregister");
}
@Test

View File

@@ -56,9 +56,9 @@ class AgentSseControllerIT extends AbstractPostgresIT {
private ResponseEntity<String> registerAgent(String agentId, String name, String application) {
String json = """
{
"agentId": "%s",
"name": "%s",
"application": "%s",
"instanceId": "%s",
"displayName": "%s",
"applicationId": "%s",
"version": "1.0.0",
"routeIds": ["route-1"],
"capabilities": {}

View File

@@ -50,11 +50,11 @@ class BackpressureIT extends AbstractPostgresIT {
// Fill the metrics buffer completely with a batch of 5
String batchJson = """
[
{"agentId":"bp-agent","collectedAt":"2026-03-11T10:00:00Z","metricName":"test.metric","metricValue":1.0,"tags":{}},
{"agentId":"bp-agent","collectedAt":"2026-03-11T10:00:01Z","metricName":"test.metric","metricValue":2.0,"tags":{}},
{"agentId":"bp-agent","collectedAt":"2026-03-11T10:00:02Z","metricName":"test.metric","metricValue":3.0,"tags":{}},
{"agentId":"bp-agent","collectedAt":"2026-03-11T10:00:03Z","metricName":"test.metric","metricValue":4.0,"tags":{}},
{"agentId":"bp-agent","collectedAt":"2026-03-11T10:00:04Z","metricName":"test.metric","metricValue":5.0,"tags":{}}
{"instanceId":"bp-agent","collectedAt":"2026-03-11T10:00:00Z","metricName":"test.metric","metricValue":1.0,"tags":{}},
{"instanceId":"bp-agent","collectedAt":"2026-03-11T10:00:01Z","metricName":"test.metric","metricValue":2.0,"tags":{}},
{"instanceId":"bp-agent","collectedAt":"2026-03-11T10:00:02Z","metricName":"test.metric","metricValue":3.0,"tags":{}},
{"instanceId":"bp-agent","collectedAt":"2026-03-11T10:00:03Z","metricName":"test.metric","metricValue":4.0,"tags":{}},
{"instanceId":"bp-agent","collectedAt":"2026-03-11T10:00:04Z","metricName":"test.metric","metricValue":5.0,"tags":{}}
]
""";
@@ -66,7 +66,7 @@ class BackpressureIT extends AbstractPostgresIT {
// Now buffer should be full -- next POST should get 503
String overflowJson = """
[{"agentId":"bp-agent","collectedAt":"2026-03-11T10:00:05Z","metricName":"test.metric","metricValue":6.0,"tags":{}}]
[{"instanceId":"bp-agent","collectedAt":"2026-03-11T10:00:05Z","metricName":"test.metric","metricValue":6.0,"tags":{}}]
""";
ResponseEntity<String> response = restTemplate.postForEntity(

View File

@@ -35,7 +35,7 @@ class MetricsControllerIT extends AbstractPostgresIT {
void postMetrics_returns202() {
String json = """
[{
"agentId": "agent-1",
"instanceId": "agent-1",
"collectedAt": "2026-03-11T10:00:00Z",
"metricName": "cpu.usage",
"metricValue": 75.5,
@@ -55,7 +55,7 @@ class MetricsControllerIT extends AbstractPostgresIT {
void postMetrics_dataAppearsAfterFlush() {
String json = """
[{
"agentId": "agent-flush-test",
"instanceId": "agent-flush-test",
"collectedAt": "2026-03-11T10:00:00Z",
"metricName": "memory.used",
"metricValue": 1024.0,
@@ -70,7 +70,7 @@ class MetricsControllerIT extends AbstractPostgresIT {
await().atMost(10, SECONDS).untilAsserted(() -> {
Integer count = jdbcTemplate.queryForObject(
"SELECT count(*) FROM agent_metrics WHERE agent_id = 'agent-flush-test'",
"SELECT count(*) FROM agent_metrics WHERE instance_id = 'agent-flush-test'",
Integer.class);
assertThat(count).isGreaterThanOrEqualTo(1);
});

View File

@@ -27,9 +27,9 @@ class BootstrapTokenIT extends AbstractPostgresIT {
private static final String REGISTRATION_JSON = """
{
"agentId": "bootstrap-test-agent",
"name": "Bootstrap Test",
"application": "test-group",
"instanceId": "bootstrap-test-agent",
"displayName": "Bootstrap Test",
"applicationId": "test-group",
"version": "1.0.0",
"routeIds": [],
"capabilities": {}
@@ -95,9 +95,9 @@ class BootstrapTokenIT extends AbstractPostgresIT {
String json = """
{
"agentId": "bootstrap-test-previous",
"name": "Previous Token Test",
"application": "test-group",
"instanceId": "bootstrap-test-previous",
"displayName": "Previous Token Test",
"applicationId": "test-group",
"version": "1.0.0",
"routeIds": [],
"capabilities": {}

View File

@@ -37,9 +37,9 @@ class JwtRefreshIT extends AbstractPostgresIT {
private JsonNode registerAndGetTokens(String agentId) throws Exception {
String json = """
{
"agentId": "%s",
"name": "Refresh Test Agent",
"application": "test-group",
"instanceId": "%s",
"displayName": "Refresh Test Agent",
"applicationId": "test-group",
"version": "1.0.0",
"routeIds": [],
"capabilities": {}

View File

@@ -30,9 +30,9 @@ class RegistrationSecurityIT extends AbstractPostgresIT {
private ResponseEntity<String> registerAgent(String agentId) {
String json = """
{
"agentId": "%s",
"name": "Security Test Agent",
"application": "test-group",
"instanceId": "%s",
"displayName": "Security Test Agent",
"applicationId": "test-group",
"version": "1.0.0",
"routeIds": [],
"capabilities": {}

View File

@@ -83,14 +83,14 @@ class SseSigningIT extends AbstractPostgresIT {
/**
* Registers an agent using the bootstrap token and returns the registration response.
* The response contains: agentId, sseEndpoint, accessToken, refreshToken, serverPublicKey.
* The response contains: instanceId, sseEndpoint, accessToken, refreshToken, serverPublicKey.
*/
private JsonNode registerAgentWithAuth(String agentId) throws Exception {
String json = """
{
"agentId": "%s",
"name": "SSE Signing Test Agent",
"application": "test-group",
"instanceId": "%s",
"displayName": "SSE Signing Test Agent",
"applicationId": "test-group",
"version": "1.0.0",
"routeIds": ["route-1"],
"capabilities": {}

View File

@@ -39,6 +39,9 @@ class ClickHouseAgentEventRepositoryIT {
String ddl = new ClassPathResource("clickhouse/V7__agent_events.sql")
.getContentAsString(StandardCharsets.UTF_8);
jdbc.execute(ddl);
// Apply identity column renames (subset of V9 migration)
jdbc.execute("ALTER TABLE agent_events RENAME COLUMN agent_id TO instance_id");
jdbc.execute("ALTER TABLE agent_events RENAME COLUMN app_id TO application_id");
jdbc.execute("TRUNCATE TABLE agent_events");
repo = new ClickHouseAgentEventRepository(jdbc);
@@ -49,10 +52,10 @@ class ClickHouseAgentEventRepositoryIT {
/**
* Insert a row with an explicit timestamp so tests can control ordering and ranges.
*/
private void insertAt(String agentId, String appId, String eventType, String detail, Instant ts) {
private void insertAt(String instanceId, String applicationId, String eventType, String detail, Instant ts) {
jdbc.update(
"INSERT INTO agent_events (tenant_id, agent_id, app_id, event_type, detail, timestamp) VALUES (?, ?, ?, ?, ?, ?)",
"default", agentId, appId, eventType, detail, Timestamp.from(ts));
"INSERT INTO agent_events (tenant_id, instance_id, application_id, event_type, detail, timestamp) VALUES (?, ?, ?, ?, ?, ?)",
"default", instanceId, applicationId, eventType, detail, Timestamp.from(ts));
}
// ── Tests ─────────────────────────────────────────────────────────────────
@@ -62,7 +65,7 @@ class ClickHouseAgentEventRepositoryIT {
repo.insert("agent-1", "app-a", "CONNECTED", "agent came online");
Long count = jdbc.queryForObject(
"SELECT count() FROM agent_events WHERE agent_id = 'agent-1'",
"SELECT count() FROM agent_events WHERE instance_id = 'agent-1'",
Long.class);
assertThat(count).isEqualTo(1);
}
@@ -75,8 +78,8 @@ class ClickHouseAgentEventRepositoryIT {
List<AgentEventRecord> results = repo.query("app-x", null, null, null, 100);
assertThat(results).hasSize(1);
assertThat(results.get(0).appId()).isEqualTo("app-x");
assertThat(results.get(0).agentId()).isEqualTo("agent-1");
assertThat(results.get(0).applicationId()).isEqualTo("app-x");
assertThat(results.get(0).instanceId()).isEqualTo("agent-1");
}
@Test
@@ -87,7 +90,7 @@ class ClickHouseAgentEventRepositoryIT {
List<AgentEventRecord> results = repo.query(null, "agent-alpha", null, null, 100);
assertThat(results).hasSize(1);
assertThat(results.get(0).agentId()).isEqualTo("agent-alpha");
assertThat(results.get(0).instanceId()).isEqualTo("agent-alpha");
}
@Test

View File

@@ -75,8 +75,8 @@ class ClickHouseChunkPipelineIT {
// Chunk 0: RUNNING with initial processors
ExecutionChunk chunk0 = new ExecutionChunk();
chunk0.setExchangeId("pipeline-1");
chunk0.setApplicationName("order-service");
chunk0.setAgentId("pod-1");
chunk0.setApplicationId("order-service");
chunk0.setInstanceId("pod-1");
chunk0.setRouteId("order-route");
chunk0.setCorrelationId("corr-1");
chunk0.setStatus(ExecutionStatus.RUNNING);
@@ -118,8 +118,8 @@ class ClickHouseChunkPipelineIT {
// Chunk 1: COMPLETED (final)
ExecutionChunk chunk1 = new ExecutionChunk();
chunk1.setExchangeId("pipeline-1");
chunk1.setApplicationName("order-service");
chunk1.setAgentId("pod-1");
chunk1.setApplicationId("order-service");
chunk1.setInstanceId("pod-1");
chunk1.setRouteId("order-route");
chunk1.setCorrelationId("corr-1");
chunk1.setStatus(ExecutionStatus.COMPLETED);
@@ -152,7 +152,7 @@ class ClickHouseChunkPipelineIT {
for (ChunkAccumulator.ProcessorBatch batch : processorBuffer) {
executionStore.insertProcessorBatch(
batch.tenantId(), batch.executionId(),
batch.routeId(), batch.applicationName(),
batch.routeId(), batch.applicationId(),
batch.execStartTime(), batch.processors());
}

View File

@@ -41,6 +41,9 @@ class ClickHouseDiagramStoreIT {
String ddl = new ClassPathResource("clickhouse/V6__route_diagrams.sql")
.getContentAsString(StandardCharsets.UTF_8);
jdbc.execute(ddl);
// Apply identity column renames (subset of V9 migration)
jdbc.execute("ALTER TABLE route_diagrams RENAME COLUMN agent_id TO instance_id");
jdbc.execute("ALTER TABLE route_diagrams RENAME COLUMN application_name TO application_id");
jdbc.execute("TRUNCATE TABLE route_diagrams");
store = new ClickHouseDiagramStore(jdbc);
@@ -60,8 +63,8 @@ class ClickHouseDiagramStoreIT {
return graph;
}
private TaggedDiagram tagged(String agentId, String appName, RouteGraph graph) {
return new TaggedDiagram(agentId, appName, graph);
private TaggedDiagram tagged(String instanceId, String applicationId, RouteGraph graph) {
return new TaggedDiagram(instanceId, applicationId, graph);
}
// ── Tests ─────────────────────────────────────────────────────────────

View File

@@ -105,8 +105,8 @@ class ClickHouseExecutionReadIT {
assertThat(result.get().executionId()).isEqualTo("exec-1");
assertThat(result.get().routeId()).isEqualTo("route-a");
assertThat(result.get().status()).isEqualTo("COMPLETED");
assertThat(result.get().agentId()).isEqualTo("agent-1");
assertThat(result.get().applicationName()).isEqualTo("my-app");
assertThat(result.get().instanceId()).isEqualTo("agent-1");
assertThat(result.get().applicationId()).isEqualTo("my-app");
assertThat(result.get().processorsJson()).isNull();
}

View File

@@ -38,14 +38,14 @@ class ClickHouseMetricsQueryStoreIT {
CREATE TABLE IF NOT EXISTS agent_metrics (
tenant_id LowCardinality(String) DEFAULT 'default',
collected_at DateTime64(3),
agent_id LowCardinality(String),
instance_id LowCardinality(String),
metric_name LowCardinality(String),
metric_value Float64,
tags Map(String, String) DEFAULT map(),
server_received_at DateTime64(3) DEFAULT now64(3)
)
ENGINE = MergeTree()
ORDER BY (tenant_id, agent_id, metric_name, collected_at)
ORDER BY (tenant_id, instance_id, metric_name, collected_at)
""");
jdbc.execute("TRUNCATE TABLE agent_metrics");
@@ -54,9 +54,9 @@ class ClickHouseMetricsQueryStoreIT {
Instant base = Instant.parse("2026-03-31T10:00:00Z");
for (int i = 0; i < 6; i++) {
Instant ts = base.plusSeconds(i * 600); // every 10 minutes
jdbc.update("INSERT INTO agent_metrics (agent_id, metric_name, metric_value, collected_at) VALUES (?, ?, ?, ?)",
jdbc.update("INSERT INTO agent_metrics (instance_id, metric_name, metric_value, collected_at) VALUES (?, ?, ?, ?)",
"agent-1", "cpu.usage", 50.0 + i * 5, java.sql.Timestamp.from(ts));
jdbc.update("INSERT INTO agent_metrics (agent_id, metric_name, metric_value, collected_at) VALUES (?, ?, ?, ?)",
jdbc.update("INSERT INTO agent_metrics (instance_id, metric_name, metric_value, collected_at) VALUES (?, ?, ?, ?)",
"agent-1", "memory.free", 1000.0 - i * 100, java.sql.Timestamp.from(ts));
}

View File

@@ -39,14 +39,14 @@ class ClickHouseMetricsStoreIT {
CREATE TABLE IF NOT EXISTS agent_metrics (
tenant_id LowCardinality(String) DEFAULT 'default',
collected_at DateTime64(3),
agent_id LowCardinality(String),
instance_id LowCardinality(String),
metric_name LowCardinality(String),
metric_value Float64,
tags Map(String, String) DEFAULT map(),
server_received_at DateTime64(3) DEFAULT now64(3)
)
ENGINE = MergeTree()
ORDER BY (tenant_id, agent_id, metric_name, collected_at)
ORDER BY (tenant_id, instance_id, metric_name, collected_at)
""");
jdbc.execute("TRUNCATE TABLE agent_metrics");
@@ -66,7 +66,7 @@ class ClickHouseMetricsStoreIT {
store.insertBatch(batch);
Integer count = jdbc.queryForObject(
"SELECT count() FROM agent_metrics WHERE agent_id = 'agent-1'",
"SELECT count() FROM agent_metrics WHERE instance_id = 'agent-1'",
Integer.class);
assertThat(count).isEqualTo(2);
}
@@ -80,7 +80,7 @@ class ClickHouseMetricsStoreIT {
// Just verify we can read back the row with tags
Integer count = jdbc.queryForObject(
"SELECT count() FROM agent_metrics WHERE agent_id = 'agent-2'",
"SELECT count() FROM agent_metrics WHERE instance_id = 'agent-2'",
Integer.class);
assertThat(count).isEqualTo(1);
}
@@ -101,7 +101,7 @@ class ClickHouseMetricsStoreIT {
));
Integer count = jdbc.queryForObject(
"SELECT count() FROM agent_metrics WHERE agent_id = 'agent-3'",
"SELECT count() FROM agent_metrics WHERE instance_id = 'agent-3'",
Integer.class);
assertThat(count).isEqualTo(1);
}

View File

@@ -43,13 +43,15 @@ class ClickHouseStatsStoreIT {
jdbc = new JdbcTemplate(ds);
// Load DDL from classpath resources
// Load DDL from classpath resources (V2, V3, V4 create tables with old column names)
String executionsDdl = new ClassPathResource("clickhouse/V2__executions.sql")
.getContentAsString(StandardCharsets.UTF_8);
String processorsDdl = new ClassPathResource("clickhouse/V3__processor_executions.sql")
.getContentAsString(StandardCharsets.UTF_8);
String statsDdl = new ClassPathResource("clickhouse/V4__stats_tables_and_mvs.sql")
.getContentAsString(StandardCharsets.UTF_8);
String renameDdl = new ClassPathResource("clickhouse/V9__rename_identity_columns.sql")
.getContentAsString(StandardCharsets.UTF_8);
jdbc.execute(executionsDdl);
jdbc.execute(processorsDdl);
@@ -66,10 +68,18 @@ class ClickHouseStatsStoreIT {
jdbc.execute("DROP TABLE IF EXISTS stats_1m_processor");
jdbc.execute("DROP TABLE IF EXISTS stats_1m_processor_detail");
// Strip SQL line comments first (they may contain semicolons),
// then split by ';' and execute non-empty statements.
String cleanedDdl = statsDdl.replaceAll("--[^\n]*", "");
for (String stmt : cleanedDdl.split(";")) {
// Create stats tables and MVs (using old column names from V4)
String cleanedStatsDdl = statsDdl.replaceAll("--[^\n]*", "");
for (String stmt : cleanedStatsDdl.split(";")) {
String trimmed = stmt.trim();
if (!trimmed.isEmpty()) {
jdbc.execute(trimmed);
}
}
// Apply identity column renames (V9 migration)
String cleanedRenameDdl = renameDdl.replaceAll("--[^\n]*", "");
for (String stmt : cleanedRenameDdl.split(";")) {
String trimmed = stmt.trim();
if (!trimmed.isEmpty()) {
jdbc.execute(trimmed);
@@ -157,27 +167,27 @@ class ClickHouseStatsStoreIT {
"app-1", "route-a", "COMPLETED", 15L);
}
private void insertExecution(String executionId, Instant startTime, String appName,
String routeId, String agentId, String status,
private void insertExecution(String executionId, Instant startTime, String appId,
String routeId, String instanceId, String status,
Long durationMs, String errorType, String errorMessage) {
jdbc.update(
"INSERT INTO executions (tenant_id, execution_id, start_time, route_id, " +
"agent_id, application_name, status, duration_ms, error_type, error_message) " +
"instance_id, application_id, status, duration_ms, error_type, error_message) " +
"VALUES ('default', ?, ?, ?, ?, ?, ?, ?, ?, ?)",
executionId, Timestamp.from(startTime), routeId, agentId, appName,
executionId, Timestamp.from(startTime), routeId, instanceId, appId,
status, durationMs, errorType, errorMessage);
}
private void insertProcessor(String executionId, int seq, String processorId,
String processorType, Instant startTime,
String appName, String routeId, String status,
String appId, String routeId, String status,
Long durationMs) {
jdbc.update(
"INSERT INTO processor_executions (tenant_id, execution_id, seq, processor_id, " +
"processor_type, start_time, route_id, application_name, status, duration_ms) " +
"processor_type, start_time, route_id, application_id, status, duration_ms) " +
"VALUES ('default', ?, ?, ?, ?, ?, ?, ?, ?, ?)",
executionId, seq, processorId, processorType, Timestamp.from(startTime),
routeId, appName, status, durationMs);
routeId, appId, status, durationMs);
}
// ── Stats Tests ──────────────────────────────────────────────────────

View File

@@ -54,10 +54,10 @@ class PostgresStatsStoreIT extends AbstractPostgresIT {
assertFalse(ts.buckets().isEmpty());
}
private void insertExecution(String id, String routeId, String applicationName,
private void insertExecution(String id, String routeId, String applicationId,
String status, Instant startTime, long durationMs) {
executionStore.upsert(new ExecutionRecord(
id, routeId, "agent-1", applicationName, status, null, null,
id, routeId, "agent-1", applicationId, status, null, null,
startTime, startTime.plusMillis(durationMs), durationMs,
status.equals("FAILED") ? "error" : null, null, null,
null, null, null, null, null, null,