feat: rename agent identity fields for protocol v2 + add SHUTDOWN lifecycle state
Align all internal naming with the agent team's protocol v2 identity rename:
- agentId → instanceId (unique per-JVM identifier)
- applicationName → applicationId (shared app identifier)
- AgentInfo: id → instanceId, name → displayName, application → applicationId
Add SHUTDOWN lifecycle state for graceful agent shutdowns:
- New POST /data/events endpoint receives agent lifecycle events
- AGENT_STOPPED event transitions agent to SHUTDOWN (skips STALE/DEAD)
- New POST /{id}/deregister endpoint removes agent from registry
- Server now distinguishes graceful shutdown from crash (heartbeat timeout)
Includes ClickHouse V9 and PostgreSQL V14 migrations for column renames.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,7 @@ package com.cameleer3.server.core.admin;
|
||||
import java.time.Instant;
|
||||
|
||||
public record AppSettings(
|
||||
String appId,
|
||||
String applicationId,
|
||||
int slaThresholdMs,
|
||||
double healthErrorWarn,
|
||||
double healthErrorCrit,
|
||||
@@ -12,8 +12,8 @@ public record AppSettings(
|
||||
Instant createdAt,
|
||||
Instant updatedAt) {
|
||||
|
||||
public static AppSettings defaults(String appId) {
|
||||
public static AppSettings defaults(String applicationId) {
|
||||
Instant now = Instant.now();
|
||||
return new AppSettings(appId, 300, 1.0, 5.0, 99.0, 95.0, now, now);
|
||||
return new AppSettings(applicationId, 300, 1.0, 5.0, 99.0, 95.0, now, now);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,8 +4,8 @@ import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
public interface AppSettingsRepository {
|
||||
Optional<AppSettings> findByAppId(String appId);
|
||||
Optional<AppSettings> findByApplicationId(String applicationId);
|
||||
List<AppSettings> findAll();
|
||||
AppSettings save(AppSettings settings);
|
||||
void delete(String appId);
|
||||
void delete(String applicationId);
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import java.time.Instant;
|
||||
* @param id unique command identifier (UUID)
|
||||
* @param type command type
|
||||
* @param payload raw JSON payload
|
||||
* @param targetAgentId target agent identifier
|
||||
* @param targetInstanceId target agent instance identifier
|
||||
* @param createdAt when the command was created
|
||||
* @param status current delivery status
|
||||
*/
|
||||
@@ -16,12 +16,12 @@ public record AgentCommand(
|
||||
String id,
|
||||
CommandType type,
|
||||
String payload,
|
||||
String targetAgentId,
|
||||
String targetInstanceId,
|
||||
Instant createdAt,
|
||||
CommandStatus status
|
||||
) {
|
||||
|
||||
public AgentCommand withStatus(CommandStatus newStatus) {
|
||||
return new AgentCommand(id, type, payload, targetAgentId, createdAt, newStatus);
|
||||
return new AgentCommand(id, type, payload, targetInstanceId, createdAt, newStatus);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,8 +12,8 @@ public interface AgentEventListener {
|
||||
/**
|
||||
* Called when a new command is ready to be delivered to an agent.
|
||||
*
|
||||
* @param agentId the target agent identifier
|
||||
* @param instanceId the target agent instance identifier
|
||||
* @param command the command to deliver
|
||||
*/
|
||||
void onCommandReady(String agentId, AgentCommand command);
|
||||
void onCommandReady(String instanceId, AgentCommand command);
|
||||
}
|
||||
|
||||
@@ -4,8 +4,8 @@ import java.time.Instant;
|
||||
|
||||
public record AgentEventRecord(
|
||||
long id,
|
||||
String agentId,
|
||||
String appId,
|
||||
String instanceId,
|
||||
String applicationId,
|
||||
String eventType,
|
||||
String detail,
|
||||
Instant timestamp
|
||||
|
||||
@@ -5,7 +5,7 @@ import java.util.List;
|
||||
|
||||
public interface AgentEventRepository {
|
||||
|
||||
void insert(String agentId, String appId, String eventType, String detail);
|
||||
void insert(String instanceId, String applicationId, String eventType, String detail);
|
||||
|
||||
List<AgentEventRecord> query(String appId, String agentId, Instant from, Instant to, int limit);
|
||||
List<AgentEventRecord> query(String applicationId, String instanceId, Instant from, Instant to, int limit);
|
||||
}
|
||||
|
||||
@@ -16,12 +16,12 @@ public class AgentEventService {
|
||||
this.repository = repository;
|
||||
}
|
||||
|
||||
public void recordEvent(String agentId, String appId, String eventType, String detail) {
|
||||
log.debug("Recording agent event: agent={}, app={}, type={}", agentId, appId, eventType);
|
||||
repository.insert(agentId, appId, eventType, detail);
|
||||
public void recordEvent(String instanceId, String applicationId, String eventType, String detail) {
|
||||
log.debug("Recording agent event: instance={}, app={}, type={}", instanceId, applicationId, eventType);
|
||||
repository.insert(instanceId, applicationId, eventType, detail);
|
||||
}
|
||||
|
||||
public List<AgentEventRecord> queryEvents(String appId, String agentId, Instant from, Instant to, int limit) {
|
||||
return repository.query(appId, agentId, from, to, limit);
|
||||
public List<AgentEventRecord> queryEvents(String applicationId, String instanceId, Instant from, Instant to, int limit) {
|
||||
return repository.query(applicationId, instanceId, from, to, limit);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,9 +11,9 @@ import java.util.Map;
|
||||
* via {@code computeIfPresent} for thread-safe state transitions. Wither-style methods
|
||||
* return new instances with the specified field changed.
|
||||
*
|
||||
* @param id agent-provided persistent identifier
|
||||
* @param name human-readable agent name
|
||||
* @param application application name (e.g., "order-service-prod")
|
||||
* @param instanceId agent-provided persistent identifier
|
||||
* @param displayName human-readable agent name
|
||||
* @param applicationId application identifier (e.g., "order-service-prod")
|
||||
* @param version agent software version
|
||||
* @param routeIds list of Camel route IDs managed by this agent
|
||||
* @param capabilities agent-declared capabilities (free-form)
|
||||
@@ -23,9 +23,9 @@ import java.util.Map;
|
||||
* @param staleTransitionTime when the agent transitioned to STALE (null if not STALE/DEAD)
|
||||
*/
|
||||
public record AgentInfo(
|
||||
String id,
|
||||
String name,
|
||||
String application,
|
||||
String instanceId,
|
||||
String displayName,
|
||||
String applicationId,
|
||||
String version,
|
||||
List<String> routeIds,
|
||||
Map<String, Object> capabilities,
|
||||
@@ -36,28 +36,28 @@ public record AgentInfo(
|
||||
) {
|
||||
|
||||
public AgentInfo withState(AgentState newState) {
|
||||
return new AgentInfo(id, name, application, version, routeIds, capabilities,
|
||||
return new AgentInfo(instanceId, displayName, applicationId, version, routeIds, capabilities,
|
||||
newState, registeredAt, lastHeartbeat, staleTransitionTime);
|
||||
}
|
||||
|
||||
public AgentInfo withLastHeartbeat(Instant newLastHeartbeat) {
|
||||
return new AgentInfo(id, name, application, version, routeIds, capabilities,
|
||||
return new AgentInfo(instanceId, displayName, applicationId, version, routeIds, capabilities,
|
||||
state, registeredAt, newLastHeartbeat, staleTransitionTime);
|
||||
}
|
||||
|
||||
public AgentInfo withRegisteredAt(Instant newRegisteredAt) {
|
||||
return new AgentInfo(id, name, application, version, routeIds, capabilities,
|
||||
return new AgentInfo(instanceId, displayName, applicationId, version, routeIds, capabilities,
|
||||
state, newRegisteredAt, lastHeartbeat, staleTransitionTime);
|
||||
}
|
||||
|
||||
public AgentInfo withStaleTransitionTime(Instant newStaleTransitionTime) {
|
||||
return new AgentInfo(id, name, application, version, routeIds, capabilities,
|
||||
return new AgentInfo(instanceId, displayName, applicationId, version, routeIds, capabilities,
|
||||
state, registeredAt, lastHeartbeat, newStaleTransitionTime);
|
||||
}
|
||||
|
||||
public AgentInfo withMetadata(String name, String application, String version,
|
||||
public AgentInfo withMetadata(String displayName, String applicationId, String version,
|
||||
List<String> routeIds, Map<String, Object> capabilities) {
|
||||
return new AgentInfo(id, name, application, version, routeIds, capabilities,
|
||||
return new AgentInfo(instanceId, displayName, applicationId, version, routeIds, capabilities,
|
||||
state, registeredAt, lastHeartbeat, staleTransitionTime);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,10 +106,39 @@ public class AgentRegistryService {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Gracefully shut down an agent. Transitions to SHUTDOWN state,
|
||||
* which is excluded from the LIVE -> STALE -> DEAD lifecycle.
|
||||
*
|
||||
* @return true if the agent was found and transitioned
|
||||
*/
|
||||
public boolean shutdown(String id) {
|
||||
AgentInfo updated = agents.computeIfPresent(id, (key, existing) -> {
|
||||
log.info("Agent {} graceful shutdown ({} -> SHUTDOWN)", id, existing.state());
|
||||
return existing.withState(AgentState.SHUTDOWN);
|
||||
});
|
||||
return updated != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove an agent from the registry entirely.
|
||||
*
|
||||
* @return true if the agent was found and removed
|
||||
*/
|
||||
public boolean deregister(String id) {
|
||||
AgentInfo removed = agents.remove(id);
|
||||
if (removed != null) {
|
||||
commands.remove(id);
|
||||
log.info("Agent {} deregistered (was {})", id, removed.state());
|
||||
}
|
||||
return removed != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check all agents and apply lifecycle transitions:
|
||||
* LIVE -> STALE when lastHeartbeat exceeds staleThresholdMs,
|
||||
* STALE -> DEAD when staleTransitionTime exceeds deadThresholdMs.
|
||||
* SHUTDOWN and DEAD agents are skipped.
|
||||
*/
|
||||
public void checkLifecycle() {
|
||||
Instant now = Instant.now();
|
||||
@@ -174,7 +203,7 @@ public class AgentRegistryService {
|
||||
*/
|
||||
public List<AgentInfo> findByApplication(String application) {
|
||||
return agents.values().stream()
|
||||
.filter(a -> application.equals(a.application()))
|
||||
.filter(a -> application.equals(a.applicationId()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
@@ -6,5 +6,6 @@ package com.cameleer3.server.core.agent;
|
||||
public enum AgentState {
|
||||
LIVE,
|
||||
STALE,
|
||||
DEAD
|
||||
DEAD,
|
||||
SHUTDOWN
|
||||
}
|
||||
|
||||
@@ -35,8 +35,8 @@ public class DetailService {
|
||||
processors = buildTree(records);
|
||||
}
|
||||
return new ExecutionDetail(
|
||||
exec.executionId(), exec.routeId(), exec.agentId(),
|
||||
exec.applicationName(),
|
||||
exec.executionId(), exec.routeId(), exec.instanceId(),
|
||||
exec.applicationId(),
|
||||
exec.status(), exec.startTime(), exec.endTime(),
|
||||
exec.durationMs() != null ? exec.durationMs() : 0L,
|
||||
exec.correlationId(), exec.exchangeId(),
|
||||
|
||||
@@ -12,7 +12,7 @@ import java.util.Map;
|
||||
*
|
||||
* @param executionId unique execution identifier
|
||||
* @param routeId Camel route ID
|
||||
* @param agentId agent instance that reported the execution
|
||||
* @param instanceId agent instance that reported the execution
|
||||
* @param status execution status (COMPLETED, FAILED, RUNNING)
|
||||
* @param startTime execution start time
|
||||
* @param endTime execution end time (may be null for RUNNING)
|
||||
@@ -31,8 +31,8 @@ import java.util.Map;
|
||||
public record ExecutionDetail(
|
||||
String executionId,
|
||||
String routeId,
|
||||
String agentId,
|
||||
String applicationName,
|
||||
String instanceId,
|
||||
String applicationId,
|
||||
String status,
|
||||
Instant startTime,
|
||||
Instant endTime,
|
||||
|
||||
@@ -75,7 +75,7 @@ public class SearchIndexer implements SearchIndexerStats {
|
||||
.toList();
|
||||
|
||||
searchIndex.index(new ExecutionDocument(
|
||||
exec.executionId(), exec.routeId(), exec.agentId(), exec.applicationName(),
|
||||
exec.executionId(), exec.routeId(), exec.instanceId(), exec.applicationId(),
|
||||
exec.status(), exec.correlationId(), exec.exchangeId(),
|
||||
exec.startTime(), exec.endTime(), exec.durationMs(),
|
||||
exec.errorMessage(), exec.errorStacktrace(), processorDocs,
|
||||
|
||||
@@ -55,7 +55,7 @@ public class ChunkAccumulator {
|
||||
DEFAULT_TENANT,
|
||||
chunk.getExchangeId(),
|
||||
chunk.getRouteId(),
|
||||
chunk.getApplicationName(),
|
||||
chunk.getApplicationId(),
|
||||
chunk.getStartTime(),
|
||||
chunk.getProcessors()));
|
||||
}
|
||||
@@ -110,8 +110,8 @@ public class ChunkAccumulator {
|
||||
private static ExecutionChunk mergeEnvelopes(ExecutionChunk older, ExecutionChunk newer) {
|
||||
ExecutionChunk merged = new ExecutionChunk();
|
||||
merged.setExchangeId(coalesce(newer.getExchangeId(), older.getExchangeId()));
|
||||
merged.setApplicationName(coalesce(newer.getApplicationName(), older.getApplicationName()));
|
||||
merged.setAgentId(coalesce(newer.getAgentId(), older.getAgentId()));
|
||||
merged.setApplicationId(coalesce(newer.getApplicationId(), older.getApplicationId()));
|
||||
merged.setInstanceId(coalesce(newer.getInstanceId(), older.getInstanceId()));
|
||||
merged.setRouteId(coalesce(newer.getRouteId(), older.getRouteId()));
|
||||
merged.setCorrelationId(coalesce(newer.getCorrelationId(), older.getCorrelationId()));
|
||||
merged.setStatus(coalesce(newer.getStatus(), older.getStatus()));
|
||||
@@ -146,7 +146,7 @@ public class ChunkAccumulator {
|
||||
String diagramHash = "";
|
||||
try {
|
||||
diagramHash = diagramStore
|
||||
.findContentHashForRoute(envelope.getRouteId(), envelope.getAgentId())
|
||||
.findContentHashForRoute(envelope.getRouteId(), envelope.getInstanceId())
|
||||
.orElse("");
|
||||
} catch (Exception e) {
|
||||
log.debug("Could not resolve diagram hash for route={}", envelope.getRouteId());
|
||||
@@ -156,8 +156,8 @@ public class ChunkAccumulator {
|
||||
1L,
|
||||
envelope.getExchangeId(),
|
||||
envelope.getRouteId(),
|
||||
envelope.getAgentId(),
|
||||
envelope.getApplicationName(),
|
||||
envelope.getInstanceId(),
|
||||
envelope.getApplicationId(),
|
||||
envelope.getStatus() != null ? envelope.getStatus().name() : "RUNNING",
|
||||
envelope.getCorrelationId(),
|
||||
envelope.getExchangeId(),
|
||||
@@ -207,7 +207,7 @@ public class ChunkAccumulator {
|
||||
String tenantId,
|
||||
String executionId,
|
||||
String routeId,
|
||||
String applicationName,
|
||||
String applicationId,
|
||||
Instant execStartTime,
|
||||
List<FlatProcessorRecord> processors
|
||||
) {}
|
||||
|
||||
@@ -42,18 +42,18 @@ public class IngestionService {
|
||||
this.bodySizeLimit = bodySizeLimit;
|
||||
}
|
||||
|
||||
public void ingestExecution(String agentId, String applicationName, RouteExecution execution) {
|
||||
ExecutionRecord record = toExecutionRecord(agentId, applicationName, execution);
|
||||
public void ingestExecution(String instanceId, String applicationId, RouteExecution execution) {
|
||||
ExecutionRecord record = toExecutionRecord(instanceId, applicationId, execution);
|
||||
executionStore.upsert(record);
|
||||
|
||||
if (execution.getProcessors() != null && !execution.getProcessors().isEmpty()) {
|
||||
List<ProcessorRecord> processors = flattenProcessors(
|
||||
execution.getProcessors(), record.executionId(),
|
||||
record.startTime(), applicationName, execution.getRouteId(),
|
||||
record.startTime(), applicationId, execution.getRouteId(),
|
||||
null, 0);
|
||||
executionStore.upsertProcessors(
|
||||
record.executionId(), record.startTime(),
|
||||
applicationName, execution.getRouteId(), processors);
|
||||
applicationId, execution.getRouteId(), processors);
|
||||
}
|
||||
|
||||
eventPublisher.accept(new ExecutionUpdatedEvent(
|
||||
@@ -76,10 +76,10 @@ public class IngestionService {
|
||||
return metricsBuffer;
|
||||
}
|
||||
|
||||
private ExecutionRecord toExecutionRecord(String agentId, String applicationName,
|
||||
private ExecutionRecord toExecutionRecord(String instanceId, String applicationId,
|
||||
RouteExecution exec) {
|
||||
String diagramHash = diagramStore
|
||||
.findContentHashForRoute(exec.getRouteId(), agentId)
|
||||
.findContentHashForRoute(exec.getRouteId(), instanceId)
|
||||
.orElse("");
|
||||
|
||||
// Extract route-level snapshots (critical for REGULAR mode where no processors are recorded)
|
||||
@@ -109,7 +109,7 @@ public class IngestionService {
|
||||
}
|
||||
|
||||
return new ExecutionRecord(
|
||||
exec.getExchangeId(), exec.getRouteId(), agentId, applicationName,
|
||||
exec.getExchangeId(), exec.getRouteId(), instanceId, applicationId,
|
||||
exec.getStatus() != null ? exec.getStatus().name() : "RUNNING",
|
||||
exec.getCorrelationId(), exec.getExchangeId(),
|
||||
exec.getStartTime(), exec.getEndTime(),
|
||||
@@ -138,13 +138,13 @@ public class IngestionService {
|
||||
|
||||
private List<ProcessorRecord> flattenProcessors(
|
||||
List<ProcessorExecution> processors, String executionId,
|
||||
java.time.Instant execStartTime, String applicationName, String routeId,
|
||||
java.time.Instant execStartTime, String applicationId, String routeId,
|
||||
String parentProcessorId, int depth) {
|
||||
List<ProcessorRecord> flat = new ArrayList<>();
|
||||
for (ProcessorExecution p : processors) {
|
||||
flat.add(new ProcessorRecord(
|
||||
executionId, p.getProcessorId(), p.getProcessorType(),
|
||||
applicationName, routeId,
|
||||
applicationId, routeId,
|
||||
depth, parentProcessorId,
|
||||
p.getStatus() != null ? p.getStatus().name() : "RUNNING",
|
||||
p.getStartTime() != null ? p.getStartTime() : execStartTime,
|
||||
|
||||
@@ -11,8 +11,8 @@ public record MergedExecution(
|
||||
long version,
|
||||
String executionId,
|
||||
String routeId,
|
||||
String agentId,
|
||||
String applicationName,
|
||||
String instanceId,
|
||||
String applicationId,
|
||||
String status,
|
||||
String correlationId,
|
||||
String exchangeId,
|
||||
|
||||
@@ -8,4 +8,4 @@ import com.cameleer3.common.graph.RouteGraph;
|
||||
* The agent ID is extracted from the SecurityContext in the controller layer
|
||||
* and carried through the write buffer so the flush scheduler can persist it.
|
||||
*/
|
||||
public record TaggedDiagram(String agentId, String applicationName, RouteGraph graph) {}
|
||||
public record TaggedDiagram(String instanceId, String applicationId, RouteGraph graph) {}
|
||||
|
||||
@@ -8,4 +8,4 @@ import com.cameleer3.common.model.RouteExecution;
|
||||
* The agent ID is extracted from the SecurityContext in the controller layer
|
||||
* and carried through the write buffer so the flush scheduler can persist it.
|
||||
*/
|
||||
public record TaggedExecution(String agentId, RouteExecution execution) {}
|
||||
public record TaggedExecution(String instanceId, RouteExecution execution) {}
|
||||
|
||||
@@ -11,7 +11,7 @@ import java.util.Map;
|
||||
*
|
||||
* @param executionId unique execution identifier
|
||||
* @param routeId Camel route ID
|
||||
* @param agentId agent instance that reported the execution
|
||||
* @param instanceId agent instance that reported the execution
|
||||
* @param status execution status (COMPLETED, FAILED, RUNNING)
|
||||
* @param startTime execution start time
|
||||
* @param endTime execution end time (may be null for RUNNING)
|
||||
@@ -23,8 +23,8 @@ import java.util.Map;
|
||||
public record ExecutionSummary(
|
||||
String executionId,
|
||||
String routeId,
|
||||
String agentId,
|
||||
String applicationName,
|
||||
String instanceId,
|
||||
String applicationId,
|
||||
String status,
|
||||
Instant startTime,
|
||||
Instant endTime,
|
||||
|
||||
@@ -20,10 +20,10 @@ import java.util.List;
|
||||
* @param textInHeaders full-text search scoped to exchange headers
|
||||
* @param textInErrors full-text search scoped to error messages and stack traces
|
||||
* @param routeId exact match on route_id
|
||||
* @param agentId exact match on agent_id
|
||||
* @param instanceId exact match on instance_id
|
||||
* @param processorType matches processor_types array via has()
|
||||
* @param application application name filter (resolved to agentIds server-side)
|
||||
* @param agentIds list of agent IDs (resolved from group, used for IN clause)
|
||||
* @param applicationId application ID filter (resolved to instanceIds server-side)
|
||||
* @param instanceIds list of instance IDs (resolved from application, used for IN clause)
|
||||
* @param offset pagination offset (0-based)
|
||||
* @param limit page size (default 50, max 500)
|
||||
* @param sortField column to sort by (default: startTime)
|
||||
@@ -41,10 +41,10 @@ public record SearchRequest(
|
||||
String textInHeaders,
|
||||
String textInErrors,
|
||||
String routeId,
|
||||
String agentId,
|
||||
String instanceId,
|
||||
String processorType,
|
||||
String application,
|
||||
List<String> agentIds,
|
||||
String applicationId,
|
||||
List<String> instanceIds,
|
||||
int offset,
|
||||
int limit,
|
||||
String sortField,
|
||||
@@ -55,8 +55,8 @@ public record SearchRequest(
|
||||
private static final int MAX_LIMIT = 500;
|
||||
|
||||
private static final java.util.Set<String> ALLOWED_SORT_FIELDS = java.util.Set.of(
|
||||
"startTime", "status", "agentId", "routeId", "correlationId",
|
||||
"durationMs", "executionId", "applicationName"
|
||||
"startTime", "status", "instanceId", "routeId", "correlationId",
|
||||
"durationMs", "executionId", "applicationId"
|
||||
);
|
||||
|
||||
/** Maps camelCase API sort field names to OpenSearch field names.
|
||||
@@ -65,11 +65,11 @@ public record SearchRequest(
|
||||
java.util.Map.entry("startTime", "start_time"),
|
||||
java.util.Map.entry("durationMs", "duration_ms"),
|
||||
java.util.Map.entry("status", "status.keyword"),
|
||||
java.util.Map.entry("agentId", "agent_id.keyword"),
|
||||
java.util.Map.entry("instanceId", "instance_id.keyword"),
|
||||
java.util.Map.entry("routeId", "route_id.keyword"),
|
||||
java.util.Map.entry("correlationId", "correlation_id.keyword"),
|
||||
java.util.Map.entry("executionId", "execution_id.keyword"),
|
||||
java.util.Map.entry("applicationName", "application_name.keyword")
|
||||
java.util.Map.entry("applicationId", "application_id.keyword")
|
||||
);
|
||||
|
||||
public SearchRequest {
|
||||
@@ -85,12 +85,12 @@ public record SearchRequest(
|
||||
return SORT_FIELD_TO_COLUMN.getOrDefault(sortField, "start_time");
|
||||
}
|
||||
|
||||
/** Create a copy with resolved agentIds (from application name lookup). */
|
||||
public SearchRequest withAgentIds(List<String> resolvedAgentIds) {
|
||||
/** Create a copy with resolved instanceIds (from application ID lookup). */
|
||||
public SearchRequest withInstanceIds(List<String> resolvedInstanceIds) {
|
||||
return new SearchRequest(
|
||||
status, timeFrom, timeTo, durationMin, durationMax, correlationId,
|
||||
text, textInBody, textInHeaders, textInErrors,
|
||||
routeId, agentId, processorType, application, resolvedAgentIds,
|
||||
routeId, instanceId, processorType, applicationId, resolvedInstanceIds,
|
||||
offset, limit, sortField, sortDir
|
||||
);
|
||||
}
|
||||
|
||||
@@ -29,8 +29,8 @@ public class SearchService {
|
||||
return statsStore.stats(from, to);
|
||||
}
|
||||
|
||||
public ExecutionStats statsForApp(Instant from, Instant to, String applicationName) {
|
||||
return statsStore.statsForApp(from, to, applicationName);
|
||||
public ExecutionStats statsForApp(Instant from, Instant to, String applicationId) {
|
||||
return statsStore.statsForApp(from, to, applicationId);
|
||||
}
|
||||
|
||||
public ExecutionStats stats(Instant from, Instant to, String routeId, List<String> agentIds) {
|
||||
@@ -41,8 +41,8 @@ public class SearchService {
|
||||
return statsStore.timeseries(from, to, bucketCount);
|
||||
}
|
||||
|
||||
public StatsTimeseries timeseriesForApp(Instant from, Instant to, int bucketCount, String applicationName) {
|
||||
return statsStore.timeseriesForApp(from, to, bucketCount, applicationName);
|
||||
public StatsTimeseries timeseriesForApp(Instant from, Instant to, int bucketCount, String applicationId) {
|
||||
return statsStore.timeseriesForApp(from, to, bucketCount, applicationId);
|
||||
}
|
||||
|
||||
public StatsTimeseries timeseries(Instant from, Instant to, int bucketCount,
|
||||
@@ -57,13 +57,13 @@ public class SearchService {
|
||||
}
|
||||
|
||||
public Map<String, StatsTimeseries> timeseriesGroupedByRoute(Instant from, Instant to,
|
||||
int bucketCount, String applicationName) {
|
||||
return statsStore.timeseriesGroupedByRoute(from, to, bucketCount, applicationName);
|
||||
int bucketCount, String applicationId) {
|
||||
return statsStore.timeseriesGroupedByRoute(from, to, bucketCount, applicationId);
|
||||
}
|
||||
|
||||
public double slaCompliance(Instant from, Instant to, int thresholdMs,
|
||||
String applicationName, String routeId) {
|
||||
return statsStore.slaCompliance(from, to, thresholdMs, applicationName, routeId);
|
||||
String applicationId, String routeId) {
|
||||
return statsStore.slaCompliance(from, to, thresholdMs, applicationId, routeId);
|
||||
}
|
||||
|
||||
public Map<String, long[]> slaCountsByApp(Instant from, Instant to, int defaultThresholdMs) {
|
||||
@@ -71,20 +71,20 @@ public class SearchService {
|
||||
}
|
||||
|
||||
public Map<String, long[]> slaCountsByRoute(Instant from, Instant to,
|
||||
String applicationName, int thresholdMs) {
|
||||
return statsStore.slaCountsByRoute(from, to, applicationName, thresholdMs);
|
||||
String applicationId, int thresholdMs) {
|
||||
return statsStore.slaCountsByRoute(from, to, applicationId, thresholdMs);
|
||||
}
|
||||
|
||||
public List<TopError> topErrors(Instant from, Instant to, String applicationName,
|
||||
public List<TopError> topErrors(Instant from, Instant to, String applicationId,
|
||||
String routeId, int limit) {
|
||||
return statsStore.topErrors(from, to, applicationName, routeId, limit);
|
||||
return statsStore.topErrors(from, to, applicationId, routeId, limit);
|
||||
}
|
||||
|
||||
public int activeErrorTypes(Instant from, Instant to, String applicationName) {
|
||||
return statsStore.activeErrorTypes(from, to, applicationName);
|
||||
public int activeErrorTypes(Instant from, Instant to, String applicationId) {
|
||||
return statsStore.activeErrorTypes(from, to, applicationId);
|
||||
}
|
||||
|
||||
public List<StatsStore.PunchcardCell> punchcard(Instant from, Instant to, String applicationName) {
|
||||
return statsStore.punchcard(from, to, applicationName);
|
||||
public List<StatsStore.PunchcardCell> punchcard(Instant from, Instant to, String applicationId) {
|
||||
return statsStore.punchcard(from, to, applicationId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,9 +13,9 @@ public interface DiagramStore {
|
||||
|
||||
Optional<RouteGraph> findByContentHash(String contentHash);
|
||||
|
||||
Optional<String> findContentHashForRoute(String routeId, String agentId);
|
||||
Optional<String> findContentHashForRoute(String routeId, String instanceId);
|
||||
|
||||
Optional<String> findContentHashForRouteByAgents(String routeId, List<String> agentIds);
|
||||
Optional<String> findContentHashForRouteByAgents(String routeId, List<String> instanceIds);
|
||||
|
||||
Map<String, String> findProcessorRouteMapping(String applicationName);
|
||||
Map<String, String> findProcessorRouteMapping(String applicationId);
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ public interface ExecutionStore {
|
||||
void upsert(ExecutionRecord execution);
|
||||
|
||||
void upsertProcessors(String executionId, Instant startTime,
|
||||
String applicationName, String routeId,
|
||||
String applicationId, String routeId,
|
||||
List<ProcessorRecord> processors);
|
||||
|
||||
Optional<ExecutionRecord> findById(String executionId);
|
||||
@@ -19,7 +19,7 @@ public interface ExecutionStore {
|
||||
Optional<ProcessorRecord> findProcessorById(String executionId, String processorId);
|
||||
|
||||
record ExecutionRecord(
|
||||
String executionId, String routeId, String agentId, String applicationName,
|
||||
String executionId, String routeId, String instanceId, String applicationId,
|
||||
String status, String correlationId, String exchangeId,
|
||||
Instant startTime, Instant endTime, Long durationMs,
|
||||
String errorMessage, String errorStacktrace, String diagramContentHash,
|
||||
@@ -36,7 +36,7 @@ public interface ExecutionStore {
|
||||
|
||||
record ProcessorRecord(
|
||||
String executionId, String processorId, String processorType,
|
||||
String applicationName, String routeId,
|
||||
String applicationId, String routeId,
|
||||
int depth, String parentProcessorId, String status,
|
||||
Instant startTime, Instant endTime, Long durationMs,
|
||||
String errorMessage, String errorStacktrace,
|
||||
|
||||
@@ -7,9 +7,9 @@ import java.util.List;
|
||||
|
||||
public interface LogIndex {
|
||||
|
||||
List<LogEntryResult> search(String application, String agentId, String level,
|
||||
List<LogEntryResult> search(String applicationId, String instanceId, String level,
|
||||
String query, String exchangeId,
|
||||
Instant from, Instant to, int limit);
|
||||
|
||||
void indexBatch(String agentId, String application, List<LogEntry> entries);
|
||||
void indexBatch(String instanceId, String applicationId, List<LogEntry> entries);
|
||||
}
|
||||
|
||||
@@ -9,6 +9,6 @@ import java.util.Map;
|
||||
public interface MetricsQueryStore {
|
||||
|
||||
Map<String, List<MetricTimeSeries.Bucket>> queryTimeSeries(
|
||||
String agentId, List<String> metricNames,
|
||||
String instanceId, List<String> metricNames,
|
||||
Instant from, Instant to, int buckets);
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ public interface StatsStore {
|
||||
ExecutionStats stats(Instant from, Instant to);
|
||||
|
||||
// Per-app stats (stats_1m_app)
|
||||
ExecutionStats statsForApp(Instant from, Instant to, String applicationName);
|
||||
ExecutionStats statsForApp(Instant from, Instant to, String applicationId);
|
||||
|
||||
// Per-route stats (stats_1m_route), optionally scoped to specific agents
|
||||
ExecutionStats statsForRoute(Instant from, Instant to, String routeId, List<String> agentIds);
|
||||
@@ -26,7 +26,7 @@ public interface StatsStore {
|
||||
StatsTimeseries timeseries(Instant from, Instant to, int bucketCount);
|
||||
|
||||
// Per-app timeseries
|
||||
StatsTimeseries timeseriesForApp(Instant from, Instant to, int bucketCount, String applicationName);
|
||||
StatsTimeseries timeseriesForApp(Instant from, Instant to, int bucketCount, String applicationId);
|
||||
|
||||
// Per-route timeseries, optionally scoped to specific agents
|
||||
StatsTimeseries timeseriesForRoute(Instant from, Instant to, int bucketCount,
|
||||
@@ -41,28 +41,28 @@ public interface StatsStore {
|
||||
|
||||
// Grouped timeseries by route within an application (for L2 dashboard charts)
|
||||
Map<String, StatsTimeseries> timeseriesGroupedByRoute(Instant from, Instant to, int bucketCount,
|
||||
String applicationName);
|
||||
String applicationId);
|
||||
|
||||
// SLA compliance: % of completed exchanges with duration <= thresholdMs
|
||||
double slaCompliance(Instant from, Instant to, int thresholdMs,
|
||||
String applicationName, String routeId);
|
||||
String applicationId, String routeId);
|
||||
|
||||
// Batch SLA counts by app: {appId -> [compliant, total]}
|
||||
Map<String, long[]> slaCountsByApp(Instant from, Instant to, int defaultThresholdMs);
|
||||
|
||||
// Batch SLA counts by route within an app: {routeId -> [compliant, total]}
|
||||
Map<String, long[]> slaCountsByRoute(Instant from, Instant to, String applicationName,
|
||||
Map<String, long[]> slaCountsByRoute(Instant from, Instant to, String applicationId,
|
||||
int thresholdMs);
|
||||
|
||||
// Top N errors with velocity trend
|
||||
List<TopError> topErrors(Instant from, Instant to, String applicationName,
|
||||
List<TopError> topErrors(Instant from, Instant to, String applicationId,
|
||||
String routeId, int limit);
|
||||
|
||||
// Count of distinct error types in window
|
||||
int activeErrorTypes(Instant from, Instant to, String applicationName);
|
||||
int activeErrorTypes(Instant from, Instant to, String applicationId);
|
||||
|
||||
// Punchcard: aggregate by weekday (0=Sun..6=Sat) x hour (0-23) over last 7 days
|
||||
List<PunchcardCell> punchcard(Instant from, Instant to, String applicationName);
|
||||
List<PunchcardCell> punchcard(Instant from, Instant to, String applicationId);
|
||||
|
||||
record PunchcardCell(int weekday, int hour, long totalCount, long failedCount) {}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ import java.time.Instant;
|
||||
import java.util.List;
|
||||
|
||||
public record ExecutionDocument(
|
||||
String executionId, String routeId, String agentId, String applicationName,
|
||||
String executionId, String routeId, String instanceId, String applicationId,
|
||||
String status, String correlationId, String exchangeId,
|
||||
Instant startTime, Instant endTime, Long durationMs,
|
||||
String errorMessage, String errorStacktrace,
|
||||
|
||||
@@ -7,7 +7,7 @@ import java.util.Map;
|
||||
* A single metrics data point from an agent.
|
||||
*/
|
||||
public record MetricsSnapshot(
|
||||
String agentId,
|
||||
String instanceId,
|
||||
Instant collectedAt,
|
||||
String metricName,
|
||||
double metricValue,
|
||||
|
||||
@@ -30,9 +30,9 @@ class AgentRegistryServiceTest {
|
||||
"1.0.0", List.of("route1", "route2"), Map.of("feature", "tracing"));
|
||||
|
||||
assertThat(agent).isNotNull();
|
||||
assertThat(agent.id()).isEqualTo("agent-1");
|
||||
assertThat(agent.name()).isEqualTo("Order Agent");
|
||||
assertThat(agent.application()).isEqualTo("order-svc");
|
||||
assertThat(agent.instanceId()).isEqualTo("agent-1");
|
||||
assertThat(agent.displayName()).isEqualTo("Order Agent");
|
||||
assertThat(agent.applicationId()).isEqualTo("order-svc");
|
||||
assertThat(agent.version()).isEqualTo("1.0.0");
|
||||
assertThat(agent.routeIds()).containsExactly("route1", "route2");
|
||||
assertThat(agent.capabilities()).containsEntry("feature", "tracing");
|
||||
@@ -50,9 +50,9 @@ class AgentRegistryServiceTest {
|
||||
AgentInfo updated = registry.register("agent-1", "New Name", "new-group",
|
||||
"2.0.0", List.of("route1", "route2"), Map.of("new", "cap"));
|
||||
|
||||
assertThat(updated.id()).isEqualTo("agent-1");
|
||||
assertThat(updated.name()).isEqualTo("New Name");
|
||||
assertThat(updated.application()).isEqualTo("new-group");
|
||||
assertThat(updated.instanceId()).isEqualTo("agent-1");
|
||||
assertThat(updated.displayName()).isEqualTo("New Name");
|
||||
assertThat(updated.applicationId()).isEqualTo("new-group");
|
||||
assertThat(updated.version()).isEqualTo("2.0.0");
|
||||
assertThat(updated.routeIds()).containsExactly("route1", "route2");
|
||||
assertThat(updated.capabilities()).containsEntry("new", "cap");
|
||||
@@ -192,7 +192,7 @@ class AgentRegistryServiceTest {
|
||||
List<AgentInfo> all = registry.findAll();
|
||||
|
||||
assertThat(all).hasSize(2);
|
||||
assertThat(all).extracting(AgentInfo::id).containsExactlyInAnyOrder("agent-1", "agent-2");
|
||||
assertThat(all).extracting(AgentInfo::instanceId).containsExactlyInAnyOrder("agent-1", "agent-2");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -204,8 +204,8 @@ class AgentRegistryServiceTest {
|
||||
List<AgentInfo> live = registry.findByState(AgentState.LIVE);
|
||||
List<AgentInfo> stale = registry.findByState(AgentState.STALE);
|
||||
|
||||
assertThat(live).hasSize(1).extracting(AgentInfo::id).containsExactly("agent-1");
|
||||
assertThat(stale).hasSize(1).extracting(AgentInfo::id).containsExactly("agent-2");
|
||||
assertThat(live).hasSize(1).extracting(AgentInfo::instanceId).containsExactly("agent-1");
|
||||
assertThat(stale).hasSize(1).extracting(AgentInfo::instanceId).containsExactly("agent-2");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -222,7 +222,7 @@ class AgentRegistryServiceTest {
|
||||
AgentInfo result = registry.findById("agent-1");
|
||||
|
||||
assertThat(result).isNotNull();
|
||||
assertThat(result.id()).isEqualTo("agent-1");
|
||||
assertThat(result.instanceId()).isEqualTo("agent-1");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -239,7 +239,7 @@ class AgentRegistryServiceTest {
|
||||
assertThat(cmd.id()).isNotNull();
|
||||
assertThat(cmd.type()).isEqualTo(CommandType.CONFIG_UPDATE);
|
||||
assertThat(cmd.payload()).isEqualTo("{\"key\":\"val\"}");
|
||||
assertThat(cmd.targetAgentId()).isEqualTo("agent-1");
|
||||
assertThat(cmd.targetInstanceId()).isEqualTo("agent-1");
|
||||
assertThat(cmd.status()).isEqualTo(CommandStatus.PENDING);
|
||||
assertThat(cmd.createdAt()).isNotNull();
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ class ChunkAccumulatorTest {
|
||||
assertThat(batch.tenantId()).isEqualTo("default");
|
||||
assertThat(batch.executionId()).isEqualTo("ex-1");
|
||||
assertThat(batch.routeId()).isEqualTo("route-1");
|
||||
assertThat(batch.applicationName()).isEqualTo("order-service");
|
||||
assertThat(batch.applicationId()).isEqualTo("order-service");
|
||||
assertThat(batch.execStartTime()).isEqualTo(Instant.parse("2026-03-31T10:00:00Z"));
|
||||
assertThat(batch.processors()).hasSize(1);
|
||||
|
||||
@@ -206,8 +206,8 @@ class ChunkAccumulatorTest {
|
||||
int chunkSeq, boolean isFinal, List<FlatProcessorRecord> processors) {
|
||||
ExecutionChunk c = new ExecutionChunk();
|
||||
c.setExchangeId(exchangeId);
|
||||
c.setApplicationName(exchangeId.equals("ex-1") ? "order-service" : "app");
|
||||
c.setAgentId("agent-1");
|
||||
c.setApplicationId(exchangeId.equals("ex-1") ? "order-service" : "app");
|
||||
c.setInstanceId("agent-1");
|
||||
c.setRouteId("route-1");
|
||||
c.setCorrelationId(null);
|
||||
c.setStatus(ExecutionStatus.valueOf(status));
|
||||
|
||||
Reference in New Issue
Block a user