feat: add CPU usage to agent response and compact cards

Backend:
- Add cpuUsage field to AgentInstanceResponse (-1 if unavailable)
- Add queryAgentCpuUsage() to AgentRegistrationController — queries
  avg CPU per instance from agent_metrics over last 2 minutes
- Wire CPU into agent list response via withCpuUsage()

Frontend:
- Add cpuUsage to schema.d.ts
- Compute maxCpu per AppGroup (max across all instances)
- Show "X% cpu" on compact cards when available (hidden when -1)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-16 14:12:23 +02:00
parent b57fe875f3
commit 4b264b3308
5 changed files with 54 additions and 4 deletions

View File

@@ -329,6 +329,7 @@ public class AgentRegistrationController {
// Enrich with runtime metrics from continuous aggregates // Enrich with runtime metrics from continuous aggregates
Map<String, double[]> agentMetrics = queryAgentMetrics(); Map<String, double[]> agentMetrics = queryAgentMetrics();
Map<String, Double> cpuByInstance = queryAgentCpuUsage();
final List<AgentInfo> finalAgents = agents; final List<AgentInfo> finalAgents = agents;
List<AgentInstanceResponse> response = finalAgents.stream() List<AgentInstanceResponse> response = finalAgents.stream()
@@ -341,7 +342,11 @@ public class AgentRegistrationController {
double agentTps = appAgentCount > 0 ? m[0] / appAgentCount : 0; double agentTps = appAgentCount > 0 ? m[0] / appAgentCount : 0;
double errorRate = m[1]; double errorRate = m[1];
int activeRoutes = (int) m[2]; int activeRoutes = (int) m[2];
return dto.withMetrics(agentTps, errorRate, activeRoutes); dto = dto.withMetrics(agentTps, errorRate, activeRoutes);
}
Double cpu = cpuByInstance.get(a.instanceId());
if (cpu != null) {
dto = dto.withCpuUsage(cpu);
} }
return dto; return dto;
}) })
@@ -377,6 +382,27 @@ public class AgentRegistrationController {
return result; return result;
} }
/** Query average CPU usage per agent instance over the last 2 minutes. */
private Map<String, Double> queryAgentCpuUsage() {
Map<String, Double> result = new HashMap<>();
Instant now = Instant.now();
Instant from2m = now.minus(2, ChronoUnit.MINUTES);
try {
jdbc.query(
"SELECT instance_id, avg(metric_value) AS cpu_avg " +
"FROM agent_metrics " +
"WHERE metric_name = 'process.cpu.usage.value'" +
" AND collected_at >= " + lit(from2m) + " AND collected_at < " + lit(now) +
" GROUP BY instance_id",
rs -> {
result.put(rs.getString("instance_id"), rs.getDouble("cpu_avg"));
});
} catch (Exception e) {
log.debug("Could not query agent CPU usage: {}", e.getMessage());
}
return result;
}
/** Format an Instant as a ClickHouse DateTime literal. */ /** Format an Instant as a ClickHouse DateTime literal. */
private static String lit(Instant instant) { private static String lit(Instant instant) {
return "'" + java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") return "'" + java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")

View File

@@ -25,7 +25,8 @@ public record AgentInstanceResponse(
double errorRate, double errorRate,
int activeRoutes, int activeRoutes,
int totalRoutes, int totalRoutes,
long uptimeSeconds long uptimeSeconds,
@Schema(description = "Recent average CPU usage (0.01.0), -1 if unavailable") double cpuUsage
) { ) {
public static AgentInstanceResponse from(AgentInfo info) { public static AgentInstanceResponse from(AgentInfo info) {
long uptime = Duration.between(info.registeredAt(), Instant.now()).toSeconds(); long uptime = Duration.between(info.registeredAt(), Instant.now()).toSeconds();
@@ -37,7 +38,7 @@ public record AgentInstanceResponse(
info.version(), info.capabilities(), info.version(), info.capabilities(),
0.0, 0.0, 0.0, 0.0,
0, info.routeIds() != null ? info.routeIds().size() : 0, 0, info.routeIds() != null ? info.routeIds().size() : 0,
uptime uptime, -1
); );
} }
@@ -46,7 +47,16 @@ public record AgentInstanceResponse(
instanceId, displayName, applicationId, environmentId, instanceId, displayName, applicationId, environmentId,
status, routeIds, registeredAt, lastHeartbeat, status, routeIds, registeredAt, lastHeartbeat,
version, capabilities, version, capabilities,
tps, errorRate, activeRoutes, totalRoutes, uptimeSeconds tps, errorRate, activeRoutes, totalRoutes, uptimeSeconds, cpuUsage
);
}
public AgentInstanceResponse withCpuUsage(double cpuUsage) {
return new AgentInstanceResponse(
instanceId, displayName, applicationId, environmentId,
status, routeIds, registeredAt, lastHeartbeat,
version, capabilities,
tps, errorRate, activeRoutes, totalRoutes, uptimeSeconds, cpuUsage
); );
} }
} }

View File

@@ -2065,6 +2065,8 @@ export interface components {
totalRoutes: number; totalRoutes: number;
/** Format: int64 */ /** Format: int64 */
uptimeSeconds: number; uptimeSeconds: number;
/** Format: double */
cpuUsage: number;
}; };
SseEmitter: { SseEmitter: {
/** Format: int64 */ /** Format: int64 */

View File

@@ -321,6 +321,11 @@
color: var(--text-muted); color: var(--text-muted);
} }
.compactCardCpu {
font-family: var(--font-mono);
color: var(--text-muted);
}
.compactCardHeartbeat { .compactCardHeartbeat {
color: var(--text-muted); color: var(--text-muted);
} }

View File

@@ -52,6 +52,7 @@ interface AppGroup {
totalTps: number; totalTps: number;
totalActiveRoutes: number; totalActiveRoutes: number;
totalRoutes: number; totalRoutes: number;
maxCpu: number;
} }
function groupByApp(agentList: AgentInstance[]): AppGroup[] { function groupByApp(agentList: AgentInstance[]): AppGroup[] {
@@ -71,6 +72,7 @@ function groupByApp(agentList: AgentInstance[]): AppGroup[] {
totalTps: instances.reduce((s, i) => s + (i.tps ?? 0), 0), totalTps: instances.reduce((s, i) => s + (i.tps ?? 0), 0),
totalActiveRoutes: instances.reduce((s, i) => s + (i.activeRoutes ?? 0), 0), totalActiveRoutes: instances.reduce((s, i) => s + (i.activeRoutes ?? 0), 0),
totalRoutes: instances.reduce((s, i) => s + (i.totalRoutes ?? 0), 0), totalRoutes: instances.reduce((s, i) => s + (i.totalRoutes ?? 0), 0),
maxCpu: Math.max(...instances.map((i) => (i as AgentInstance & { cpuUsage?: number }).cpuUsage ?? -1)),
})); }));
} }
@@ -141,6 +143,11 @@ function CompactAppCard({ group, onExpand, onNavigate }: { group: AppGroup; onEx
<span className={styles.compactCardTps}> <span className={styles.compactCardTps}>
{group.totalTps.toFixed(1)} tps {group.totalTps.toFixed(1)} tps
</span> </span>
{group.maxCpu >= 0 && (
<span className={styles.compactCardCpu}>
{(group.maxCpu * 100).toFixed(0)}% cpu
</span>
)}
<span className={isHealthy ? styles.compactCardHeartbeat : styles.compactCardHeartbeatWarn}> <span className={isHealthy ? styles.compactCardHeartbeat : styles.compactCardHeartbeatWarn}>
{heartbeat ? timeAgo(heartbeat) : '\u2014'} {heartbeat ? timeAgo(heartbeat) : '\u2014'}
</span> </span>