diff --git a/cameleer-server-app/src/main/java/com/cameleer/server/app/controller/AgentRegistrationController.java b/cameleer-server-app/src/main/java/com/cameleer/server/app/controller/AgentRegistrationController.java index 5ab042f6..185d45f2 100644 --- a/cameleer-server-app/src/main/java/com/cameleer/server/app/controller/AgentRegistrationController.java +++ b/cameleer-server-app/src/main/java/com/cameleer/server/app/controller/AgentRegistrationController.java @@ -329,6 +329,7 @@ public class AgentRegistrationController { // Enrich with runtime metrics from continuous aggregates Map agentMetrics = queryAgentMetrics(); + Map cpuByInstance = queryAgentCpuUsage(); final List finalAgents = agents; List response = finalAgents.stream() @@ -341,7 +342,11 @@ public class AgentRegistrationController { double agentTps = appAgentCount > 0 ? m[0] / appAgentCount : 0; double errorRate = m[1]; int activeRoutes = (int) m[2]; - return dto.withMetrics(agentTps, errorRate, activeRoutes); + dto = dto.withMetrics(agentTps, errorRate, activeRoutes); + } + Double cpu = cpuByInstance.get(a.instanceId()); + if (cpu != null) { + dto = dto.withCpuUsage(cpu); } return dto; }) @@ -377,6 +382,27 @@ public class AgentRegistrationController { return result; } + /** Query average CPU usage per agent instance over the last 2 minutes. */ + private Map queryAgentCpuUsage() { + Map result = new HashMap<>(); + Instant now = Instant.now(); + Instant from2m = now.minus(2, ChronoUnit.MINUTES); + try { + jdbc.query( + "SELECT instance_id, avg(metric_value) AS cpu_avg " + + "FROM agent_metrics " + + "WHERE metric_name = 'process.cpu.usage.value'" + + " AND collected_at >= " + lit(from2m) + " AND collected_at < " + lit(now) + + " GROUP BY instance_id", + rs -> { + result.put(rs.getString("instance_id"), rs.getDouble("cpu_avg")); + }); + } catch (Exception e) { + log.debug("Could not query agent CPU usage: {}", e.getMessage()); + } + return result; + } + /** Format an Instant as a ClickHouse DateTime literal. */ private static String lit(Instant instant) { return "'" + java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") diff --git a/cameleer-server-app/src/main/java/com/cameleer/server/app/dto/AgentInstanceResponse.java b/cameleer-server-app/src/main/java/com/cameleer/server/app/dto/AgentInstanceResponse.java index c30846b5..97c361c8 100644 --- a/cameleer-server-app/src/main/java/com/cameleer/server/app/dto/AgentInstanceResponse.java +++ b/cameleer-server-app/src/main/java/com/cameleer/server/app/dto/AgentInstanceResponse.java @@ -25,7 +25,8 @@ public record AgentInstanceResponse( double errorRate, int activeRoutes, int totalRoutes, - long uptimeSeconds + long uptimeSeconds, + @Schema(description = "Recent average CPU usage (0.0–1.0), -1 if unavailable") double cpuUsage ) { public static AgentInstanceResponse from(AgentInfo info) { long uptime = Duration.between(info.registeredAt(), Instant.now()).toSeconds(); @@ -37,7 +38,7 @@ public record AgentInstanceResponse( info.version(), info.capabilities(), 0.0, 0.0, 0, info.routeIds() != null ? info.routeIds().size() : 0, - uptime + uptime, -1 ); } @@ -46,7 +47,16 @@ public record AgentInstanceResponse( instanceId, displayName, applicationId, environmentId, status, routeIds, registeredAt, lastHeartbeat, version, capabilities, - tps, errorRate, activeRoutes, totalRoutes, uptimeSeconds + tps, errorRate, activeRoutes, totalRoutes, uptimeSeconds, cpuUsage + ); + } + + public AgentInstanceResponse withCpuUsage(double cpuUsage) { + return new AgentInstanceResponse( + instanceId, displayName, applicationId, environmentId, + status, routeIds, registeredAt, lastHeartbeat, + version, capabilities, + tps, errorRate, activeRoutes, totalRoutes, uptimeSeconds, cpuUsage ); } } diff --git a/ui/src/api/schema.d.ts b/ui/src/api/schema.d.ts index 732d591a..2c9a051c 100644 --- a/ui/src/api/schema.d.ts +++ b/ui/src/api/schema.d.ts @@ -2065,6 +2065,8 @@ export interface components { totalRoutes: number; /** Format: int64 */ uptimeSeconds: number; + /** Format: double */ + cpuUsage: number; }; SseEmitter: { /** Format: int64 */ diff --git a/ui/src/pages/AgentHealth/AgentHealth.module.css b/ui/src/pages/AgentHealth/AgentHealth.module.css index afe43c5f..cd77cabc 100644 --- a/ui/src/pages/AgentHealth/AgentHealth.module.css +++ b/ui/src/pages/AgentHealth/AgentHealth.module.css @@ -321,6 +321,11 @@ color: var(--text-muted); } +.compactCardCpu { + font-family: var(--font-mono); + color: var(--text-muted); +} + .compactCardHeartbeat { color: var(--text-muted); } diff --git a/ui/src/pages/AgentHealth/AgentHealth.tsx b/ui/src/pages/AgentHealth/AgentHealth.tsx index ec9ecfae..60f3b35e 100644 --- a/ui/src/pages/AgentHealth/AgentHealth.tsx +++ b/ui/src/pages/AgentHealth/AgentHealth.tsx @@ -52,6 +52,7 @@ interface AppGroup { totalTps: number; totalActiveRoutes: number; totalRoutes: number; + maxCpu: number; } function groupByApp(agentList: AgentInstance[]): AppGroup[] { @@ -71,6 +72,7 @@ function groupByApp(agentList: AgentInstance[]): AppGroup[] { totalTps: instances.reduce((s, i) => s + (i.tps ?? 0), 0), totalActiveRoutes: instances.reduce((s, i) => s + (i.activeRoutes ?? 0), 0), totalRoutes: instances.reduce((s, i) => s + (i.totalRoutes ?? 0), 0), + maxCpu: Math.max(...instances.map((i) => (i as AgentInstance & { cpuUsage?: number }).cpuUsage ?? -1)), })); } @@ -141,6 +143,11 @@ function CompactAppCard({ group, onExpand, onNavigate }: { group: AppGroup; onEx {group.totalTps.toFixed(1)} tps + {group.maxCpu >= 0 && ( + + {(group.maxCpu * 100).toFixed(0)}% cpu + + )} {heartbeat ? timeAgo(heartbeat) : '\u2014'}