feat: rename agent identity fields for protocol v2 + add SHUTDOWN lifecycle state
All checks were successful
CI / cleanup-branch (push) Has been skipped
CI / build (push) Successful in 1m7s
CI / docker (push) Successful in 45s
CI / deploy-feature (push) Has been skipped
CI / deploy (push) Successful in 22s

Align all internal naming with the agent team's protocol v2 identity rename:
- agentId → instanceId (unique per-JVM identifier)
- applicationName → applicationId (shared app identifier)
- AgentInfo: id → instanceId, name → displayName, application → applicationId

Add SHUTDOWN lifecycle state for graceful agent shutdowns:
- New POST /data/events endpoint receives agent lifecycle events
- AGENT_STOPPED event transitions agent to SHUTDOWN (skips STALE/DEAD)
- New POST /{id}/deregister endpoint removes agent from registry
- Server now distinguishes graceful shutdown from crash (heartbeat timeout)

Includes ClickHouse V9 and PostgreSQL V14 migrations for column renames.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-01 12:22:42 +02:00
parent ad8dd73596
commit 909d713837
85 changed files with 645 additions and 494 deletions

View File

@@ -3,7 +3,7 @@ package com.cameleer3.server.core.admin;
import java.time.Instant;
public record AppSettings(
String appId,
String applicationId,
int slaThresholdMs,
double healthErrorWarn,
double healthErrorCrit,
@@ -12,8 +12,8 @@ public record AppSettings(
Instant createdAt,
Instant updatedAt) {
public static AppSettings defaults(String appId) {
public static AppSettings defaults(String applicationId) {
Instant now = Instant.now();
return new AppSettings(appId, 300, 1.0, 5.0, 99.0, 95.0, now, now);
return new AppSettings(applicationId, 300, 1.0, 5.0, 99.0, 95.0, now, now);
}
}

View File

@@ -4,8 +4,8 @@ import java.util.List;
import java.util.Optional;
public interface AppSettingsRepository {
Optional<AppSettings> findByAppId(String appId);
Optional<AppSettings> findByApplicationId(String applicationId);
List<AppSettings> findAll();
AppSettings save(AppSettings settings);
void delete(String appId);
void delete(String applicationId);
}

View File

@@ -8,7 +8,7 @@ import java.time.Instant;
* @param id unique command identifier (UUID)
* @param type command type
* @param payload raw JSON payload
* @param targetAgentId target agent identifier
* @param targetInstanceId target agent instance identifier
* @param createdAt when the command was created
* @param status current delivery status
*/
@@ -16,12 +16,12 @@ public record AgentCommand(
String id,
CommandType type,
String payload,
String targetAgentId,
String targetInstanceId,
Instant createdAt,
CommandStatus status
) {
public AgentCommand withStatus(CommandStatus newStatus) {
return new AgentCommand(id, type, payload, targetAgentId, createdAt, newStatus);
return new AgentCommand(id, type, payload, targetInstanceId, createdAt, newStatus);
}
}

View File

@@ -12,8 +12,8 @@ public interface AgentEventListener {
/**
* Called when a new command is ready to be delivered to an agent.
*
* @param agentId the target agent identifier
* @param instanceId the target agent instance identifier
* @param command the command to deliver
*/
void onCommandReady(String agentId, AgentCommand command);
void onCommandReady(String instanceId, AgentCommand command);
}

View File

@@ -4,8 +4,8 @@ import java.time.Instant;
public record AgentEventRecord(
long id,
String agentId,
String appId,
String instanceId,
String applicationId,
String eventType,
String detail,
Instant timestamp

View File

@@ -5,7 +5,7 @@ import java.util.List;
public interface AgentEventRepository {
void insert(String agentId, String appId, String eventType, String detail);
void insert(String instanceId, String applicationId, String eventType, String detail);
List<AgentEventRecord> query(String appId, String agentId, Instant from, Instant to, int limit);
List<AgentEventRecord> query(String applicationId, String instanceId, Instant from, Instant to, int limit);
}

View File

@@ -16,12 +16,12 @@ public class AgentEventService {
this.repository = repository;
}
public void recordEvent(String agentId, String appId, String eventType, String detail) {
log.debug("Recording agent event: agent={}, app={}, type={}", agentId, appId, eventType);
repository.insert(agentId, appId, eventType, detail);
public void recordEvent(String instanceId, String applicationId, String eventType, String detail) {
log.debug("Recording agent event: instance={}, app={}, type={}", instanceId, applicationId, eventType);
repository.insert(instanceId, applicationId, eventType, detail);
}
public List<AgentEventRecord> queryEvents(String appId, String agentId, Instant from, Instant to, int limit) {
return repository.query(appId, agentId, from, to, limit);
public List<AgentEventRecord> queryEvents(String applicationId, String instanceId, Instant from, Instant to, int limit) {
return repository.query(applicationId, instanceId, from, to, limit);
}
}

View File

@@ -11,9 +11,9 @@ import java.util.Map;
* via {@code computeIfPresent} for thread-safe state transitions. Wither-style methods
* return new instances with the specified field changed.
*
* @param id agent-provided persistent identifier
* @param name human-readable agent name
* @param application application name (e.g., "order-service-prod")
* @param instanceId agent-provided persistent identifier
* @param displayName human-readable agent name
* @param applicationId application identifier (e.g., "order-service-prod")
* @param version agent software version
* @param routeIds list of Camel route IDs managed by this agent
* @param capabilities agent-declared capabilities (free-form)
@@ -23,9 +23,9 @@ import java.util.Map;
* @param staleTransitionTime when the agent transitioned to STALE (null if not STALE/DEAD)
*/
public record AgentInfo(
String id,
String name,
String application,
String instanceId,
String displayName,
String applicationId,
String version,
List<String> routeIds,
Map<String, Object> capabilities,
@@ -36,28 +36,28 @@ public record AgentInfo(
) {
public AgentInfo withState(AgentState newState) {
return new AgentInfo(id, name, application, version, routeIds, capabilities,
return new AgentInfo(instanceId, displayName, applicationId, version, routeIds, capabilities,
newState, registeredAt, lastHeartbeat, staleTransitionTime);
}
public AgentInfo withLastHeartbeat(Instant newLastHeartbeat) {
return new AgentInfo(id, name, application, version, routeIds, capabilities,
return new AgentInfo(instanceId, displayName, applicationId, version, routeIds, capabilities,
state, registeredAt, newLastHeartbeat, staleTransitionTime);
}
public AgentInfo withRegisteredAt(Instant newRegisteredAt) {
return new AgentInfo(id, name, application, version, routeIds, capabilities,
return new AgentInfo(instanceId, displayName, applicationId, version, routeIds, capabilities,
state, newRegisteredAt, lastHeartbeat, staleTransitionTime);
}
public AgentInfo withStaleTransitionTime(Instant newStaleTransitionTime) {
return new AgentInfo(id, name, application, version, routeIds, capabilities,
return new AgentInfo(instanceId, displayName, applicationId, version, routeIds, capabilities,
state, registeredAt, lastHeartbeat, newStaleTransitionTime);
}
public AgentInfo withMetadata(String name, String application, String version,
public AgentInfo withMetadata(String displayName, String applicationId, String version,
List<String> routeIds, Map<String, Object> capabilities) {
return new AgentInfo(id, name, application, version, routeIds, capabilities,
return new AgentInfo(instanceId, displayName, applicationId, version, routeIds, capabilities,
state, registeredAt, lastHeartbeat, staleTransitionTime);
}
}

View File

@@ -106,10 +106,39 @@ public class AgentRegistryService {
});
}
/**
* Gracefully shut down an agent. Transitions to SHUTDOWN state,
* which is excluded from the LIVE -> STALE -> DEAD lifecycle.
*
* @return true if the agent was found and transitioned
*/
public boolean shutdown(String id) {
AgentInfo updated = agents.computeIfPresent(id, (key, existing) -> {
log.info("Agent {} graceful shutdown ({} -> SHUTDOWN)", id, existing.state());
return existing.withState(AgentState.SHUTDOWN);
});
return updated != null;
}
/**
* Remove an agent from the registry entirely.
*
* @return true if the agent was found and removed
*/
public boolean deregister(String id) {
AgentInfo removed = agents.remove(id);
if (removed != null) {
commands.remove(id);
log.info("Agent {} deregistered (was {})", id, removed.state());
}
return removed != null;
}
/**
* Check all agents and apply lifecycle transitions:
* LIVE -> STALE when lastHeartbeat exceeds staleThresholdMs,
* STALE -> DEAD when staleTransitionTime exceeds deadThresholdMs.
* SHUTDOWN and DEAD agents are skipped.
*/
public void checkLifecycle() {
Instant now = Instant.now();
@@ -174,7 +203,7 @@ public class AgentRegistryService {
*/
public List<AgentInfo> findByApplication(String application) {
return agents.values().stream()
.filter(a -> application.equals(a.application()))
.filter(a -> application.equals(a.applicationId()))
.collect(Collectors.toList());
}

View File

@@ -6,5 +6,6 @@ package com.cameleer3.server.core.agent;
public enum AgentState {
LIVE,
STALE,
DEAD
DEAD,
SHUTDOWN
}

View File

@@ -35,8 +35,8 @@ public class DetailService {
processors = buildTree(records);
}
return new ExecutionDetail(
exec.executionId(), exec.routeId(), exec.agentId(),
exec.applicationName(),
exec.executionId(), exec.routeId(), exec.instanceId(),
exec.applicationId(),
exec.status(), exec.startTime(), exec.endTime(),
exec.durationMs() != null ? exec.durationMs() : 0L,
exec.correlationId(), exec.exchangeId(),

View File

@@ -12,7 +12,7 @@ import java.util.Map;
*
* @param executionId unique execution identifier
* @param routeId Camel route ID
* @param agentId agent instance that reported the execution
* @param instanceId agent instance that reported the execution
* @param status execution status (COMPLETED, FAILED, RUNNING)
* @param startTime execution start time
* @param endTime execution end time (may be null for RUNNING)
@@ -31,8 +31,8 @@ import java.util.Map;
public record ExecutionDetail(
String executionId,
String routeId,
String agentId,
String applicationName,
String instanceId,
String applicationId,
String status,
Instant startTime,
Instant endTime,

View File

@@ -75,7 +75,7 @@ public class SearchIndexer implements SearchIndexerStats {
.toList();
searchIndex.index(new ExecutionDocument(
exec.executionId(), exec.routeId(), exec.agentId(), exec.applicationName(),
exec.executionId(), exec.routeId(), exec.instanceId(), exec.applicationId(),
exec.status(), exec.correlationId(), exec.exchangeId(),
exec.startTime(), exec.endTime(), exec.durationMs(),
exec.errorMessage(), exec.errorStacktrace(), processorDocs,

View File

@@ -55,7 +55,7 @@ public class ChunkAccumulator {
DEFAULT_TENANT,
chunk.getExchangeId(),
chunk.getRouteId(),
chunk.getApplicationName(),
chunk.getApplicationId(),
chunk.getStartTime(),
chunk.getProcessors()));
}
@@ -110,8 +110,8 @@ public class ChunkAccumulator {
private static ExecutionChunk mergeEnvelopes(ExecutionChunk older, ExecutionChunk newer) {
ExecutionChunk merged = new ExecutionChunk();
merged.setExchangeId(coalesce(newer.getExchangeId(), older.getExchangeId()));
merged.setApplicationName(coalesce(newer.getApplicationName(), older.getApplicationName()));
merged.setAgentId(coalesce(newer.getAgentId(), older.getAgentId()));
merged.setApplicationId(coalesce(newer.getApplicationId(), older.getApplicationId()));
merged.setInstanceId(coalesce(newer.getInstanceId(), older.getInstanceId()));
merged.setRouteId(coalesce(newer.getRouteId(), older.getRouteId()));
merged.setCorrelationId(coalesce(newer.getCorrelationId(), older.getCorrelationId()));
merged.setStatus(coalesce(newer.getStatus(), older.getStatus()));
@@ -146,7 +146,7 @@ public class ChunkAccumulator {
String diagramHash = "";
try {
diagramHash = diagramStore
.findContentHashForRoute(envelope.getRouteId(), envelope.getAgentId())
.findContentHashForRoute(envelope.getRouteId(), envelope.getInstanceId())
.orElse("");
} catch (Exception e) {
log.debug("Could not resolve diagram hash for route={}", envelope.getRouteId());
@@ -156,8 +156,8 @@ public class ChunkAccumulator {
1L,
envelope.getExchangeId(),
envelope.getRouteId(),
envelope.getAgentId(),
envelope.getApplicationName(),
envelope.getInstanceId(),
envelope.getApplicationId(),
envelope.getStatus() != null ? envelope.getStatus().name() : "RUNNING",
envelope.getCorrelationId(),
envelope.getExchangeId(),
@@ -207,7 +207,7 @@ public class ChunkAccumulator {
String tenantId,
String executionId,
String routeId,
String applicationName,
String applicationId,
Instant execStartTime,
List<FlatProcessorRecord> processors
) {}

View File

@@ -42,18 +42,18 @@ public class IngestionService {
this.bodySizeLimit = bodySizeLimit;
}
public void ingestExecution(String agentId, String applicationName, RouteExecution execution) {
ExecutionRecord record = toExecutionRecord(agentId, applicationName, execution);
public void ingestExecution(String instanceId, String applicationId, RouteExecution execution) {
ExecutionRecord record = toExecutionRecord(instanceId, applicationId, execution);
executionStore.upsert(record);
if (execution.getProcessors() != null && !execution.getProcessors().isEmpty()) {
List<ProcessorRecord> processors = flattenProcessors(
execution.getProcessors(), record.executionId(),
record.startTime(), applicationName, execution.getRouteId(),
record.startTime(), applicationId, execution.getRouteId(),
null, 0);
executionStore.upsertProcessors(
record.executionId(), record.startTime(),
applicationName, execution.getRouteId(), processors);
applicationId, execution.getRouteId(), processors);
}
eventPublisher.accept(new ExecutionUpdatedEvent(
@@ -76,10 +76,10 @@ public class IngestionService {
return metricsBuffer;
}
private ExecutionRecord toExecutionRecord(String agentId, String applicationName,
private ExecutionRecord toExecutionRecord(String instanceId, String applicationId,
RouteExecution exec) {
String diagramHash = diagramStore
.findContentHashForRoute(exec.getRouteId(), agentId)
.findContentHashForRoute(exec.getRouteId(), instanceId)
.orElse("");
// Extract route-level snapshots (critical for REGULAR mode where no processors are recorded)
@@ -109,7 +109,7 @@ public class IngestionService {
}
return new ExecutionRecord(
exec.getExchangeId(), exec.getRouteId(), agentId, applicationName,
exec.getExchangeId(), exec.getRouteId(), instanceId, applicationId,
exec.getStatus() != null ? exec.getStatus().name() : "RUNNING",
exec.getCorrelationId(), exec.getExchangeId(),
exec.getStartTime(), exec.getEndTime(),
@@ -138,13 +138,13 @@ public class IngestionService {
private List<ProcessorRecord> flattenProcessors(
List<ProcessorExecution> processors, String executionId,
java.time.Instant execStartTime, String applicationName, String routeId,
java.time.Instant execStartTime, String applicationId, String routeId,
String parentProcessorId, int depth) {
List<ProcessorRecord> flat = new ArrayList<>();
for (ProcessorExecution p : processors) {
flat.add(new ProcessorRecord(
executionId, p.getProcessorId(), p.getProcessorType(),
applicationName, routeId,
applicationId, routeId,
depth, parentProcessorId,
p.getStatus() != null ? p.getStatus().name() : "RUNNING",
p.getStartTime() != null ? p.getStartTime() : execStartTime,

View File

@@ -11,8 +11,8 @@ public record MergedExecution(
long version,
String executionId,
String routeId,
String agentId,
String applicationName,
String instanceId,
String applicationId,
String status,
String correlationId,
String exchangeId,

View File

@@ -8,4 +8,4 @@ import com.cameleer3.common.graph.RouteGraph;
* The agent ID is extracted from the SecurityContext in the controller layer
* and carried through the write buffer so the flush scheduler can persist it.
*/
public record TaggedDiagram(String agentId, String applicationName, RouteGraph graph) {}
public record TaggedDiagram(String instanceId, String applicationId, RouteGraph graph) {}

View File

@@ -8,4 +8,4 @@ import com.cameleer3.common.model.RouteExecution;
* The agent ID is extracted from the SecurityContext in the controller layer
* and carried through the write buffer so the flush scheduler can persist it.
*/
public record TaggedExecution(String agentId, RouteExecution execution) {}
public record TaggedExecution(String instanceId, RouteExecution execution) {}

View File

@@ -11,7 +11,7 @@ import java.util.Map;
*
* @param executionId unique execution identifier
* @param routeId Camel route ID
* @param agentId agent instance that reported the execution
* @param instanceId agent instance that reported the execution
* @param status execution status (COMPLETED, FAILED, RUNNING)
* @param startTime execution start time
* @param endTime execution end time (may be null for RUNNING)
@@ -23,8 +23,8 @@ import java.util.Map;
public record ExecutionSummary(
String executionId,
String routeId,
String agentId,
String applicationName,
String instanceId,
String applicationId,
String status,
Instant startTime,
Instant endTime,

View File

@@ -20,10 +20,10 @@ import java.util.List;
* @param textInHeaders full-text search scoped to exchange headers
* @param textInErrors full-text search scoped to error messages and stack traces
* @param routeId exact match on route_id
* @param agentId exact match on agent_id
* @param instanceId exact match on instance_id
* @param processorType matches processor_types array via has()
* @param application application name filter (resolved to agentIds server-side)
* @param agentIds list of agent IDs (resolved from group, used for IN clause)
* @param applicationId application ID filter (resolved to instanceIds server-side)
* @param instanceIds list of instance IDs (resolved from application, used for IN clause)
* @param offset pagination offset (0-based)
* @param limit page size (default 50, max 500)
* @param sortField column to sort by (default: startTime)
@@ -41,10 +41,10 @@ public record SearchRequest(
String textInHeaders,
String textInErrors,
String routeId,
String agentId,
String instanceId,
String processorType,
String application,
List<String> agentIds,
String applicationId,
List<String> instanceIds,
int offset,
int limit,
String sortField,
@@ -55,8 +55,8 @@ public record SearchRequest(
private static final int MAX_LIMIT = 500;
private static final java.util.Set<String> ALLOWED_SORT_FIELDS = java.util.Set.of(
"startTime", "status", "agentId", "routeId", "correlationId",
"durationMs", "executionId", "applicationName"
"startTime", "status", "instanceId", "routeId", "correlationId",
"durationMs", "executionId", "applicationId"
);
/** Maps camelCase API sort field names to OpenSearch field names.
@@ -65,11 +65,11 @@ public record SearchRequest(
java.util.Map.entry("startTime", "start_time"),
java.util.Map.entry("durationMs", "duration_ms"),
java.util.Map.entry("status", "status.keyword"),
java.util.Map.entry("agentId", "agent_id.keyword"),
java.util.Map.entry("instanceId", "instance_id.keyword"),
java.util.Map.entry("routeId", "route_id.keyword"),
java.util.Map.entry("correlationId", "correlation_id.keyword"),
java.util.Map.entry("executionId", "execution_id.keyword"),
java.util.Map.entry("applicationName", "application_name.keyword")
java.util.Map.entry("applicationId", "application_id.keyword")
);
public SearchRequest {
@@ -85,12 +85,12 @@ public record SearchRequest(
return SORT_FIELD_TO_COLUMN.getOrDefault(sortField, "start_time");
}
/** Create a copy with resolved agentIds (from application name lookup). */
public SearchRequest withAgentIds(List<String> resolvedAgentIds) {
/** Create a copy with resolved instanceIds (from application ID lookup). */
public SearchRequest withInstanceIds(List<String> resolvedInstanceIds) {
return new SearchRequest(
status, timeFrom, timeTo, durationMin, durationMax, correlationId,
text, textInBody, textInHeaders, textInErrors,
routeId, agentId, processorType, application, resolvedAgentIds,
routeId, instanceId, processorType, applicationId, resolvedInstanceIds,
offset, limit, sortField, sortDir
);
}

View File

@@ -29,8 +29,8 @@ public class SearchService {
return statsStore.stats(from, to);
}
public ExecutionStats statsForApp(Instant from, Instant to, String applicationName) {
return statsStore.statsForApp(from, to, applicationName);
public ExecutionStats statsForApp(Instant from, Instant to, String applicationId) {
return statsStore.statsForApp(from, to, applicationId);
}
public ExecutionStats stats(Instant from, Instant to, String routeId, List<String> agentIds) {
@@ -41,8 +41,8 @@ public class SearchService {
return statsStore.timeseries(from, to, bucketCount);
}
public StatsTimeseries timeseriesForApp(Instant from, Instant to, int bucketCount, String applicationName) {
return statsStore.timeseriesForApp(from, to, bucketCount, applicationName);
public StatsTimeseries timeseriesForApp(Instant from, Instant to, int bucketCount, String applicationId) {
return statsStore.timeseriesForApp(from, to, bucketCount, applicationId);
}
public StatsTimeseries timeseries(Instant from, Instant to, int bucketCount,
@@ -57,13 +57,13 @@ public class SearchService {
}
public Map<String, StatsTimeseries> timeseriesGroupedByRoute(Instant from, Instant to,
int bucketCount, String applicationName) {
return statsStore.timeseriesGroupedByRoute(from, to, bucketCount, applicationName);
int bucketCount, String applicationId) {
return statsStore.timeseriesGroupedByRoute(from, to, bucketCount, applicationId);
}
public double slaCompliance(Instant from, Instant to, int thresholdMs,
String applicationName, String routeId) {
return statsStore.slaCompliance(from, to, thresholdMs, applicationName, routeId);
String applicationId, String routeId) {
return statsStore.slaCompliance(from, to, thresholdMs, applicationId, routeId);
}
public Map<String, long[]> slaCountsByApp(Instant from, Instant to, int defaultThresholdMs) {
@@ -71,20 +71,20 @@ public class SearchService {
}
public Map<String, long[]> slaCountsByRoute(Instant from, Instant to,
String applicationName, int thresholdMs) {
return statsStore.slaCountsByRoute(from, to, applicationName, thresholdMs);
String applicationId, int thresholdMs) {
return statsStore.slaCountsByRoute(from, to, applicationId, thresholdMs);
}
public List<TopError> topErrors(Instant from, Instant to, String applicationName,
public List<TopError> topErrors(Instant from, Instant to, String applicationId,
String routeId, int limit) {
return statsStore.topErrors(from, to, applicationName, routeId, limit);
return statsStore.topErrors(from, to, applicationId, routeId, limit);
}
public int activeErrorTypes(Instant from, Instant to, String applicationName) {
return statsStore.activeErrorTypes(from, to, applicationName);
public int activeErrorTypes(Instant from, Instant to, String applicationId) {
return statsStore.activeErrorTypes(from, to, applicationId);
}
public List<StatsStore.PunchcardCell> punchcard(Instant from, Instant to, String applicationName) {
return statsStore.punchcard(from, to, applicationName);
public List<StatsStore.PunchcardCell> punchcard(Instant from, Instant to, String applicationId) {
return statsStore.punchcard(from, to, applicationId);
}
}

View File

@@ -13,9 +13,9 @@ public interface DiagramStore {
Optional<RouteGraph> findByContentHash(String contentHash);
Optional<String> findContentHashForRoute(String routeId, String agentId);
Optional<String> findContentHashForRoute(String routeId, String instanceId);
Optional<String> findContentHashForRouteByAgents(String routeId, List<String> agentIds);
Optional<String> findContentHashForRouteByAgents(String routeId, List<String> instanceIds);
Map<String, String> findProcessorRouteMapping(String applicationName);
Map<String, String> findProcessorRouteMapping(String applicationId);
}

View File

@@ -9,7 +9,7 @@ public interface ExecutionStore {
void upsert(ExecutionRecord execution);
void upsertProcessors(String executionId, Instant startTime,
String applicationName, String routeId,
String applicationId, String routeId,
List<ProcessorRecord> processors);
Optional<ExecutionRecord> findById(String executionId);
@@ -19,7 +19,7 @@ public interface ExecutionStore {
Optional<ProcessorRecord> findProcessorById(String executionId, String processorId);
record ExecutionRecord(
String executionId, String routeId, String agentId, String applicationName,
String executionId, String routeId, String instanceId, String applicationId,
String status, String correlationId, String exchangeId,
Instant startTime, Instant endTime, Long durationMs,
String errorMessage, String errorStacktrace, String diagramContentHash,
@@ -36,7 +36,7 @@ public interface ExecutionStore {
record ProcessorRecord(
String executionId, String processorId, String processorType,
String applicationName, String routeId,
String applicationId, String routeId,
int depth, String parentProcessorId, String status,
Instant startTime, Instant endTime, Long durationMs,
String errorMessage, String errorStacktrace,

View File

@@ -7,9 +7,9 @@ import java.util.List;
public interface LogIndex {
List<LogEntryResult> search(String application, String agentId, String level,
List<LogEntryResult> search(String applicationId, String instanceId, String level,
String query, String exchangeId,
Instant from, Instant to, int limit);
void indexBatch(String agentId, String application, List<LogEntry> entries);
void indexBatch(String instanceId, String applicationId, List<LogEntry> entries);
}

View File

@@ -9,6 +9,6 @@ import java.util.Map;
public interface MetricsQueryStore {
Map<String, List<MetricTimeSeries.Bucket>> queryTimeSeries(
String agentId, List<String> metricNames,
String instanceId, List<String> metricNames,
Instant from, Instant to, int buckets);
}

View File

@@ -14,7 +14,7 @@ public interface StatsStore {
ExecutionStats stats(Instant from, Instant to);
// Per-app stats (stats_1m_app)
ExecutionStats statsForApp(Instant from, Instant to, String applicationName);
ExecutionStats statsForApp(Instant from, Instant to, String applicationId);
// Per-route stats (stats_1m_route), optionally scoped to specific agents
ExecutionStats statsForRoute(Instant from, Instant to, String routeId, List<String> agentIds);
@@ -26,7 +26,7 @@ public interface StatsStore {
StatsTimeseries timeseries(Instant from, Instant to, int bucketCount);
// Per-app timeseries
StatsTimeseries timeseriesForApp(Instant from, Instant to, int bucketCount, String applicationName);
StatsTimeseries timeseriesForApp(Instant from, Instant to, int bucketCount, String applicationId);
// Per-route timeseries, optionally scoped to specific agents
StatsTimeseries timeseriesForRoute(Instant from, Instant to, int bucketCount,
@@ -41,28 +41,28 @@ public interface StatsStore {
// Grouped timeseries by route within an application (for L2 dashboard charts)
Map<String, StatsTimeseries> timeseriesGroupedByRoute(Instant from, Instant to, int bucketCount,
String applicationName);
String applicationId);
// SLA compliance: % of completed exchanges with duration <= thresholdMs
double slaCompliance(Instant from, Instant to, int thresholdMs,
String applicationName, String routeId);
String applicationId, String routeId);
// Batch SLA counts by app: {appId -> [compliant, total]}
Map<String, long[]> slaCountsByApp(Instant from, Instant to, int defaultThresholdMs);
// Batch SLA counts by route within an app: {routeId -> [compliant, total]}
Map<String, long[]> slaCountsByRoute(Instant from, Instant to, String applicationName,
Map<String, long[]> slaCountsByRoute(Instant from, Instant to, String applicationId,
int thresholdMs);
// Top N errors with velocity trend
List<TopError> topErrors(Instant from, Instant to, String applicationName,
List<TopError> topErrors(Instant from, Instant to, String applicationId,
String routeId, int limit);
// Count of distinct error types in window
int activeErrorTypes(Instant from, Instant to, String applicationName);
int activeErrorTypes(Instant from, Instant to, String applicationId);
// Punchcard: aggregate by weekday (0=Sun..6=Sat) x hour (0-23) over last 7 days
List<PunchcardCell> punchcard(Instant from, Instant to, String applicationName);
List<PunchcardCell> punchcard(Instant from, Instant to, String applicationId);
record PunchcardCell(int weekday, int hour, long totalCount, long failedCount) {}
}

View File

@@ -4,7 +4,7 @@ import java.time.Instant;
import java.util.List;
public record ExecutionDocument(
String executionId, String routeId, String agentId, String applicationName,
String executionId, String routeId, String instanceId, String applicationId,
String status, String correlationId, String exchangeId,
Instant startTime, Instant endTime, Long durationMs,
String errorMessage, String errorStacktrace,

View File

@@ -7,7 +7,7 @@ import java.util.Map;
* A single metrics data point from an agent.
*/
public record MetricsSnapshot(
String agentId,
String instanceId,
Instant collectedAt,
String metricName,
double metricValue,

View File

@@ -30,9 +30,9 @@ class AgentRegistryServiceTest {
"1.0.0", List.of("route1", "route2"), Map.of("feature", "tracing"));
assertThat(agent).isNotNull();
assertThat(agent.id()).isEqualTo("agent-1");
assertThat(agent.name()).isEqualTo("Order Agent");
assertThat(agent.application()).isEqualTo("order-svc");
assertThat(agent.instanceId()).isEqualTo("agent-1");
assertThat(agent.displayName()).isEqualTo("Order Agent");
assertThat(agent.applicationId()).isEqualTo("order-svc");
assertThat(agent.version()).isEqualTo("1.0.0");
assertThat(agent.routeIds()).containsExactly("route1", "route2");
assertThat(agent.capabilities()).containsEntry("feature", "tracing");
@@ -50,9 +50,9 @@ class AgentRegistryServiceTest {
AgentInfo updated = registry.register("agent-1", "New Name", "new-group",
"2.0.0", List.of("route1", "route2"), Map.of("new", "cap"));
assertThat(updated.id()).isEqualTo("agent-1");
assertThat(updated.name()).isEqualTo("New Name");
assertThat(updated.application()).isEqualTo("new-group");
assertThat(updated.instanceId()).isEqualTo("agent-1");
assertThat(updated.displayName()).isEqualTo("New Name");
assertThat(updated.applicationId()).isEqualTo("new-group");
assertThat(updated.version()).isEqualTo("2.0.0");
assertThat(updated.routeIds()).containsExactly("route1", "route2");
assertThat(updated.capabilities()).containsEntry("new", "cap");
@@ -192,7 +192,7 @@ class AgentRegistryServiceTest {
List<AgentInfo> all = registry.findAll();
assertThat(all).hasSize(2);
assertThat(all).extracting(AgentInfo::id).containsExactlyInAnyOrder("agent-1", "agent-2");
assertThat(all).extracting(AgentInfo::instanceId).containsExactlyInAnyOrder("agent-1", "agent-2");
}
@Test
@@ -204,8 +204,8 @@ class AgentRegistryServiceTest {
List<AgentInfo> live = registry.findByState(AgentState.LIVE);
List<AgentInfo> stale = registry.findByState(AgentState.STALE);
assertThat(live).hasSize(1).extracting(AgentInfo::id).containsExactly("agent-1");
assertThat(stale).hasSize(1).extracting(AgentInfo::id).containsExactly("agent-2");
assertThat(live).hasSize(1).extracting(AgentInfo::instanceId).containsExactly("agent-1");
assertThat(stale).hasSize(1).extracting(AgentInfo::instanceId).containsExactly("agent-2");
}
@Test
@@ -222,7 +222,7 @@ class AgentRegistryServiceTest {
AgentInfo result = registry.findById("agent-1");
assertThat(result).isNotNull();
assertThat(result.id()).isEqualTo("agent-1");
assertThat(result.instanceId()).isEqualTo("agent-1");
}
}
@@ -239,7 +239,7 @@ class AgentRegistryServiceTest {
assertThat(cmd.id()).isNotNull();
assertThat(cmd.type()).isEqualTo(CommandType.CONFIG_UPDATE);
assertThat(cmd.payload()).isEqualTo("{\"key\":\"val\"}");
assertThat(cmd.targetAgentId()).isEqualTo("agent-1");
assertThat(cmd.targetInstanceId()).isEqualTo("agent-1");
assertThat(cmd.status()).isEqualTo(CommandStatus.PENDING);
assertThat(cmd.createdAt()).isNotNull();
}

View File

@@ -58,7 +58,7 @@ class ChunkAccumulatorTest {
assertThat(batch.tenantId()).isEqualTo("default");
assertThat(batch.executionId()).isEqualTo("ex-1");
assertThat(batch.routeId()).isEqualTo("route-1");
assertThat(batch.applicationName()).isEqualTo("order-service");
assertThat(batch.applicationId()).isEqualTo("order-service");
assertThat(batch.execStartTime()).isEqualTo(Instant.parse("2026-03-31T10:00:00Z"));
assertThat(batch.processors()).hasSize(1);
@@ -206,8 +206,8 @@ class ChunkAccumulatorTest {
int chunkSeq, boolean isFinal, List<FlatProcessorRecord> processors) {
ExecutionChunk c = new ExecutionChunk();
c.setExchangeId(exchangeId);
c.setApplicationName(exchangeId.equals("ex-1") ? "order-service" : "app");
c.setAgentId("agent-1");
c.setApplicationId(exchangeId.equals("ex-1") ? "order-service" : "app");
c.setInstanceId("agent-1");
c.setRouteId("route-1");
c.setCorrelationId(null);
c.setStatus(ExecutionStatus.valueOf(status));