feat: add CPU usage to agent response and compact cards

Backend:
- Add cpuUsage field to AgentInstanceResponse (-1 if unavailable)
- Add queryAgentCpuUsage() to AgentRegistrationController — queries
  avg CPU per instance from agent_metrics over last 2 minutes
- Wire CPU into agent list response via withCpuUsage()

Frontend:
- Add cpuUsage to schema.d.ts
- Compute maxCpu per AppGroup (max across all instances)
- Show "X% cpu" on compact cards when available (hidden when -1)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-16 14:12:23 +02:00
parent b57fe875f3
commit 4b264b3308
5 changed files with 54 additions and 4 deletions

View File

@@ -329,6 +329,7 @@ public class AgentRegistrationController {
// Enrich with runtime metrics from continuous aggregates
Map<String, double[]> agentMetrics = queryAgentMetrics();
Map<String, Double> cpuByInstance = queryAgentCpuUsage();
final List<AgentInfo> finalAgents = agents;
List<AgentInstanceResponse> response = finalAgents.stream()
@@ -341,7 +342,11 @@ public class AgentRegistrationController {
double agentTps = appAgentCount > 0 ? m[0] / appAgentCount : 0;
double errorRate = m[1];
int activeRoutes = (int) m[2];
return dto.withMetrics(agentTps, errorRate, activeRoutes);
dto = dto.withMetrics(agentTps, errorRate, activeRoutes);
}
Double cpu = cpuByInstance.get(a.instanceId());
if (cpu != null) {
dto = dto.withCpuUsage(cpu);
}
return dto;
})
@@ -377,6 +382,27 @@ public class AgentRegistrationController {
return result;
}
/** Query average CPU usage per agent instance over the last 2 minutes. */
private Map<String, Double> queryAgentCpuUsage() {
Map<String, Double> result = new HashMap<>();
Instant now = Instant.now();
Instant from2m = now.minus(2, ChronoUnit.MINUTES);
try {
jdbc.query(
"SELECT instance_id, avg(metric_value) AS cpu_avg " +
"FROM agent_metrics " +
"WHERE metric_name = 'process.cpu.usage.value'" +
" AND collected_at >= " + lit(from2m) + " AND collected_at < " + lit(now) +
" GROUP BY instance_id",
rs -> {
result.put(rs.getString("instance_id"), rs.getDouble("cpu_avg"));
});
} catch (Exception e) {
log.debug("Could not query agent CPU usage: {}", e.getMessage());
}
return result;
}
/** Format an Instant as a ClickHouse DateTime literal. */
private static String lit(Instant instant) {
return "'" + java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")

View File

@@ -25,7 +25,8 @@ public record AgentInstanceResponse(
double errorRate,
int activeRoutes,
int totalRoutes,
long uptimeSeconds
long uptimeSeconds,
@Schema(description = "Recent average CPU usage (0.01.0), -1 if unavailable") double cpuUsage
) {
public static AgentInstanceResponse from(AgentInfo info) {
long uptime = Duration.between(info.registeredAt(), Instant.now()).toSeconds();
@@ -37,7 +38,7 @@ public record AgentInstanceResponse(
info.version(), info.capabilities(),
0.0, 0.0,
0, info.routeIds() != null ? info.routeIds().size() : 0,
uptime
uptime, -1
);
}
@@ -46,7 +47,16 @@ public record AgentInstanceResponse(
instanceId, displayName, applicationId, environmentId,
status, routeIds, registeredAt, lastHeartbeat,
version, capabilities,
tps, errorRate, activeRoutes, totalRoutes, uptimeSeconds
tps, errorRate, activeRoutes, totalRoutes, uptimeSeconds, cpuUsage
);
}
public AgentInstanceResponse withCpuUsage(double cpuUsage) {
return new AgentInstanceResponse(
instanceId, displayName, applicationId, environmentId,
status, routeIds, registeredAt, lastHeartbeat,
version, capabilities,
tps, errorRate, activeRoutes, totalRoutes, uptimeSeconds, cpuUsage
);
}
}

View File

@@ -2065,6 +2065,8 @@ export interface components {
totalRoutes: number;
/** Format: int64 */
uptimeSeconds: number;
/** Format: double */
cpuUsage: number;
};
SseEmitter: {
/** Format: int64 */

View File

@@ -321,6 +321,11 @@
color: var(--text-muted);
}
.compactCardCpu {
font-family: var(--font-mono);
color: var(--text-muted);
}
.compactCardHeartbeat {
color: var(--text-muted);
}

View File

@@ -52,6 +52,7 @@ interface AppGroup {
totalTps: number;
totalActiveRoutes: number;
totalRoutes: number;
maxCpu: number;
}
function groupByApp(agentList: AgentInstance[]): AppGroup[] {
@@ -71,6 +72,7 @@ function groupByApp(agentList: AgentInstance[]): AppGroup[] {
totalTps: instances.reduce((s, i) => s + (i.tps ?? 0), 0),
totalActiveRoutes: instances.reduce((s, i) => s + (i.activeRoutes ?? 0), 0),
totalRoutes: instances.reduce((s, i) => s + (i.totalRoutes ?? 0), 0),
maxCpu: Math.max(...instances.map((i) => (i as AgentInstance & { cpuUsage?: number }).cpuUsage ?? -1)),
}));
}
@@ -141,6 +143,11 @@ function CompactAppCard({ group, onExpand, onNavigate }: { group: AppGroup; onEx
<span className={styles.compactCardTps}>
{group.totalTps.toFixed(1)} tps
</span>
{group.maxCpu >= 0 && (
<span className={styles.compactCardCpu}>
{(group.maxCpu * 100).toFixed(0)}% cpu
</span>
)}
<span className={isHealthy ? styles.compactCardHeartbeat : styles.compactCardHeartbeatWarn}>
{heartbeat ? timeAgo(heartbeat) : '\u2014'}
</span>