From 1fbafbb16da4ce7b8e4d53a188ac6cc225b556f5 Mon Sep 17 00:00:00 2001 From: hsiegeln <37154749+hsiegeln@users.noreply.github.com> Date: Fri, 24 Apr 2026 14:02:57 +0200 Subject: [PATCH] feat: add vendor tenant metrics dashboard Fleet overview page at /vendor/metrics showing per-tenant operational metrics (agents, CPU, heap, HTTP requests, ingestion drops, uptime). Queries each tenant's server via the new POST /api/v1/admin/server-metrics/query REST API instead of direct ClickHouse access, supporting future per-tenant CH instances. Backend: TenantMetricsService fires 11 metric queries per tenant concurrently over a 5-minute window, assembles into a summary snapshot. ServerApiClient.queryServerMetrics() handles the M2M authenticated POST. Frontend: VendorMetricsPage with KPI strip (fleet totals) and per-tenant table with color-coded badges and heap usage bars. Auto-refreshes every 60s. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../saas/identity/ServerApiClient.java | 33 +++ .../saas/vendor/TenantMetricsController.java | 74 +++++++ .../saas/vendor/TenantMetricsService.java | 176 ++++++++++++++++ ui/src/api/vendor-hooks.ts | 12 +- ui/src/components/Layout.tsx | 8 + ui/src/pages/vendor/VendorMetricsPage.tsx | 194 ++++++++++++++++++ ui/src/router.tsx | 6 + ui/src/types/api.ts | 45 ++++ 8 files changed, 547 insertions(+), 1 deletion(-) create mode 100644 src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsController.java create mode 100644 src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsService.java create mode 100644 ui/src/pages/vendor/VendorMetricsPage.tsx diff --git a/src/main/java/net/siegeln/cameleer/saas/identity/ServerApiClient.java b/src/main/java/net/siegeln/cameleer/saas/identity/ServerApiClient.java index 7f45143..f8db0fc 100644 --- a/src/main/java/net/siegeln/cameleer/saas/identity/ServerApiClient.java +++ b/src/main/java/net/siegeln/cameleer/saas/identity/ServerApiClient.java @@ -8,6 +8,7 @@ import org.springframework.stereotype.Service; import org.springframework.web.client.RestClient; import java.time.Instant; +import java.util.List; import java.util.Map; /** @@ -171,6 +172,38 @@ public class ServerApiClient { public record ServerHealthResponse(boolean healthy, String status) {} + // --- Server metrics query (POST /api/v1/admin/server-metrics/query) --- + + public record MetricsQueryResponse( + String metric, + String statistic, + String aggregation, + String mode, + int stepSeconds, + List series + ) {} + + public record MetricsSeries(Map tags, List points) {} + + public record MetricsPoint(String t, double v) {} + + /** Execute a server-metrics query against a tenant's server. */ + public MetricsQueryResponse queryServerMetrics(String serverEndpoint, Map body) { + try { + return RestClient.create().post() + .uri(serverEndpoint + "/api/v1/admin/server-metrics/query") + .header("Authorization", "Bearer " + getAccessToken()) + .header("X-Cameleer-Protocol-Version", "1") + .contentType(MediaType.APPLICATION_JSON) + .body(body) + .retrieve() + .body(MetricsQueryResponse.class); + } catch (Exception e) { + log.warn("Metrics query failed for {}: {}", serverEndpoint, e.getMessage()); + return null; + } + } + private synchronized String getAccessToken() { if (cachedToken != null && Instant.now().isBefore(tokenExpiry.minusSeconds(60))) { return cachedToken; diff --git a/src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsController.java b/src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsController.java new file mode 100644 index 0000000..3b0d0fb --- /dev/null +++ b/src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsController.java @@ -0,0 +1,74 @@ +package net.siegeln.cameleer.saas.vendor; + +import net.siegeln.cameleer.saas.provisioning.ServerStatus; +import net.siegeln.cameleer.saas.tenant.TenantEntity; +import org.springframework.http.ResponseEntity; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.util.List; +import java.util.concurrent.CompletableFuture; + +@RestController +@RequestMapping("/api/vendor/metrics") +@PreAuthorize("hasAuthority('SCOPE_platform:admin')") +public class TenantMetricsController { + + private final VendorTenantService vendorTenantService; + private final TenantMetricsService metricsService; + + public TenantMetricsController(VendorTenantService vendorTenantService, + TenantMetricsService metricsService) { + this.vendorTenantService = vendorTenantService; + this.metricsService = metricsService; + } + + public record TenantMetricsEntry( + String tenantId, + String tenantName, + String slug, + String tier, + String status, + String serverState, + TenantMetricsService.MetricsSummary metrics + ) {} + + @GetMapping + public ResponseEntity> all() { + List tenants = vendorTenantService.listAll(); + + List> futures = tenants.stream() + .map(tenant -> CompletableFuture.supplyAsync(() -> { + ServerStatus serverStatus = vendorTenantService.getServerStatus(tenant); + String state = serverStatus.state().name(); + + TenantMetricsService.MetricsSummary metrics = null; + String endpoint = tenant.getServerEndpoint(); + boolean isRunning = "ACTIVE".equals(tenant.getStatus().name()) + && endpoint != null && !endpoint.isBlank() + && "RUNNING".equals(state); + if (isRunning) { + metrics = metricsService.getMetricsSummary(endpoint); + } + + return new TenantMetricsEntry( + tenant.getId().toString(), + tenant.getName(), + tenant.getSlug(), + tenant.getTier().name(), + tenant.getStatus().name(), + state, + metrics + ); + })) + .toList(); + + List entries = futures.stream() + .map(CompletableFuture::join) + .toList(); + + return ResponseEntity.ok(entries); + } +} diff --git a/src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsService.java b/src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsService.java new file mode 100644 index 0000000..9ee4fda --- /dev/null +++ b/src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsService.java @@ -0,0 +1,176 @@ +package net.siegeln.cameleer.saas.vendor; + +import net.siegeln.cameleer.saas.identity.ServerApiClient; +import net.siegeln.cameleer.saas.identity.ServerApiClient.MetricsQueryResponse; +import net.siegeln.cameleer.saas.identity.ServerApiClient.MetricsPoint; +import net.siegeln.cameleer.saas.identity.ServerApiClient.MetricsSeries; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; + +@Service +public class TenantMetricsService { + + private static final Logger log = LoggerFactory.getLogger(TenantMetricsService.class); + + private final ServerApiClient serverApiClient; + + public TenantMetricsService(ServerApiClient serverApiClient) { + this.serverApiClient = serverApiClient; + } + + // --- Response records --- + + public record MetricsSummary( + String collectedAt, + AgentMetrics agents, + IngestionMetrics ingestion, + ServerJvmMetrics server, + HttpMetrics http, + double authFailuresPerMinute + ) {} + + public record AgentMetrics(int live, int stale, int dead, int shutdown) {} + + public record IngestionMetrics(long bufferDepth, double dropsPerMinute) {} + + public record ServerJvmMetrics( + double cpuUsage, + long heapUsedBytes, + long heapMaxBytes, + long uptimeSeconds, + int threadCount + ) {} + + public record HttpMetrics(double requestsPerMinute, double errorRate) {} + + /** + * Query a tenant's server for key metrics and assemble a summary snapshot. + * Fires multiple queries concurrently (one per metric group) over the last 5 minutes. + */ + public MetricsSummary getMetricsSummary(String serverEndpoint) { + Instant to = Instant.now(); + Instant from = to.minus(5, ChronoUnit.MINUTES); + String fromStr = from.toString(); + String toStr = to.toString(); + + // Fire all queries concurrently + var agentsFuture = CompletableFuture.supplyAsync(() -> + query(serverEndpoint, "cameleer.agents.connected", "value", fromStr, toStr, "avg", "raw", List.of("state"), null)); + var cpuFuture = CompletableFuture.supplyAsync(() -> + query(serverEndpoint, "process.cpu.usage", "value", fromStr, toStr, "avg", "raw", null, null)); + var heapUsedFuture = CompletableFuture.supplyAsync(() -> + query(serverEndpoint, "jvm.memory.used", "value", fromStr, toStr, "sum", "raw", null, Map.of("area", "heap"))); + var heapMaxFuture = CompletableFuture.supplyAsync(() -> + query(serverEndpoint, "jvm.memory.max", "value", fromStr, toStr, "sum", "raw", null, Map.of("area", "heap"))); + var uptimeFuture = CompletableFuture.supplyAsync(() -> + query(serverEndpoint, "process.uptime", "value", fromStr, toStr, "latest", "raw", null, null)); + var threadsFuture = CompletableFuture.supplyAsync(() -> + query(serverEndpoint, "jvm.threads.live", "value", fromStr, toStr, "avg", "raw", null, null)); + var dropsFuture = CompletableFuture.supplyAsync(() -> + query(serverEndpoint, "cameleer.ingestion.drops", "count", fromStr, toStr, "sum", "delta", null, null)); + var bufferFuture = CompletableFuture.supplyAsync(() -> + query(serverEndpoint, "cameleer.ingestion.buffer.size", "value", fromStr, toStr, "sum", "raw", null, null)); + var httpTotalFuture = CompletableFuture.supplyAsync(() -> + query(serverEndpoint, "http.server.requests", "count", fromStr, toStr, "sum", "delta", null, null)); + var http5xxFuture = CompletableFuture.supplyAsync(() -> + query(serverEndpoint, "http.server.requests", "count", fromStr, toStr, "sum", "delta", null, Map.of("outcome", "SERVER_ERROR"))); + var authFuture = CompletableFuture.supplyAsync(() -> + query(serverEndpoint, "cameleer.auth.failures", "count", fromStr, toStr, "sum", "delta", null, null)); + + try { + // Extract latest values from each response + var agentsResp = agentsFuture.join(); + int live = agentStateValue(agentsResp, "live"); + int stale = agentStateValue(agentsResp, "stale"); + int dead = agentStateValue(agentsResp, "dead"); + int shutdown = agentStateValue(agentsResp, "shutdown"); + + double cpu = latestValue(cpuFuture.join()); + long heapUsed = (long) latestValue(heapUsedFuture.join()); + long heapMax = (long) latestValue(heapMaxFuture.join()); + long uptimeMs = (long) latestValue(uptimeFuture.join()); + int threads = (int) latestValue(threadsFuture.join()); + + double dropsTotal = sumLatestValues(dropsFuture.join()); + long bufferDepth = (long) latestValue(bufferFuture.join()); + + double httpTotal = sumLatestValues(httpTotalFuture.join()); + double http5xx = sumLatestValues(http5xxFuture.join()); + double errorRate = httpTotal > 0 ? http5xx / httpTotal : 0.0; + // stepSeconds=300 (5min window), so total is per-5-min; convert to per-minute + double httpPerMin = httpTotal / 5.0; + + double authTotal = sumLatestValues(authFuture.join()); + double authPerMin = authTotal / 5.0; + + return new MetricsSummary( + toStr, + new AgentMetrics(live, stale, dead, shutdown), + new IngestionMetrics(bufferDepth, dropsTotal / 5.0), + new ServerJvmMetrics(cpu, heapUsed, heapMax, uptimeMs / 1000, threads), + new HttpMetrics(httpPerMin, errorRate), + authPerMin + ); + } catch (Exception e) { + log.warn("Failed to assemble metrics summary for {}: {}", serverEndpoint, e.getMessage()); + return null; + } + } + + private MetricsQueryResponse query(String endpoint, String metric, String statistic, + String from, String to, String aggregation, String mode, + List groupByTags, Map filterTags) { + Map body = new HashMap<>(); + body.put("metric", metric); + body.put("statistic", statistic); + body.put("from", from); + body.put("to", to); + body.put("stepSeconds", 300); + body.put("aggregation", aggregation); + body.put("mode", mode); + if (groupByTags != null) body.put("groupByTags", groupByTags); + if (filterTags != null) body.put("filterTags", filterTags); + return serverApiClient.queryServerMetrics(endpoint, body); + } + + /** Extract the latest value from the first (or only) series. */ + private double latestValue(MetricsQueryResponse resp) { + if (resp == null || resp.series() == null || resp.series().isEmpty()) return 0.0; + List points = resp.series().getFirst().points(); + if (points == null || points.isEmpty()) return 0.0; + return points.getLast().v(); + } + + /** Sum the latest value across all series (for metrics with groupByTags or multiple series). */ + private double sumLatestValues(MetricsQueryResponse resp) { + if (resp == null || resp.series() == null || resp.series().isEmpty()) return 0.0; + double sum = 0.0; + for (MetricsSeries series : resp.series()) { + if (series.points() != null && !series.points().isEmpty()) { + sum += series.points().getLast().v(); + } + } + return sum; + } + + /** Extract the latest value for a specific agent state tag. */ + private int agentStateValue(MetricsQueryResponse resp, String state) { + if (resp == null || resp.series() == null) return 0; + for (MetricsSeries series : resp.series()) { + if (series.tags() != null && state.equals(series.tags().get("state"))) { + if (series.points() != null && !series.points().isEmpty()) { + return (int) series.points().getLast().v(); + } + } + } + return 0; + } +} diff --git a/ui/src/api/vendor-hooks.ts b/ui/src/api/vendor-hooks.ts index 7bcacdd..6eefdad 100644 --- a/ui/src/api/vendor-hooks.ts +++ b/ui/src/api/vendor-hooks.ts @@ -1,6 +1,6 @@ import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'; import { api } from './client'; -import type { VendorTenantSummary, VendorTenantDetail, CreateTenantRequest, TenantResponse, LicenseResponse, AuditLogPage, AuditLogFilters } from '../types/api'; +import type { VendorTenantSummary, VendorTenantDetail, CreateTenantRequest, TenantResponse, LicenseResponse, AuditLogPage, AuditLogFilters, TenantMetricsEntry } from '../types/api'; export function useVendorTenants() { return useQuery({ @@ -179,3 +179,13 @@ export function useInfraChDetail(tenantId: string) { enabled: !!tenantId, }); } + +// --- Tenant Metrics --- + +export function useTenantMetrics() { + return useQuery({ + queryKey: ['vendor', 'metrics'], + queryFn: () => api.get('/vendor/metrics'), + refetchInterval: 60_000, + }); +} diff --git a/ui/src/components/Layout.tsx b/ui/src/components/Layout.tsx index 9c06de3..2dbe7e9 100644 --- a/ui/src/components/Layout.tsx +++ b/ui/src/components/Layout.tsx @@ -109,6 +109,14 @@ export function Layout() { > Certificates +
navigate('/vendor/metrics')} + > + Metrics +
0) return `${d}d ${h}h`; + if (h > 0) return `${h}h ${m}m`; + return `${m}m`; +} + +function formatPct(v: number): string { + return `${(v * 100).toFixed(1)}%`; +} + +function formatRate(v: number): string { + if (v === 0) return '0'; + if (v < 0.1) return v.toFixed(3); + if (v < 10) return v.toFixed(1); + return Math.round(v).toLocaleString(); +} + +const thStyle: React.CSSProperties = { + textAlign: 'left', + padding: '8px 16px', + fontSize: 11, + fontWeight: 600, + color: 'var(--text-muted)', + textTransform: 'uppercase', + letterSpacing: '0.05em', + borderBottom: '1px solid var(--border)', +}; + +const tdStyle: React.CSSProperties = { + padding: '10px 16px', + fontSize: 13, + borderBottom: '1px solid var(--border)', + fontVariantNumeric: 'tabular-nums', +}; + +function AgentsBadges({ m }: { m: MetricsSummary }) { + const { live, stale, dead } = m.agents; + return ( + + + {stale > 0 && } + {dead > 0 && } + + ); +} + +function HeapBar({ used, max }: { used: number; max: number }) { + const pct = max > 0 ? (used / max) * 100 : 0; + const color = pct > 85 ? 'var(--error, #ef4444)' : pct > 70 ? 'var(--warning, #f59e0b)' : 'var(--success, #22c55e)'; + return ( +
+
+
+
+ + {formatBytes(used)} / {formatBytes(max)} + +
+ ); +} + +function DropsBadge({ rate }: { rate: number }) { + if (rate === 0) return 0; + return ; +} + +function TenantRow({ entry, onClick }: { entry: TenantMetricsEntry; onClick: () => void }) { + const m = entry.metrics; + const notRunning = entry.serverState !== 'RUNNING'; + + return ( + + +
+ {entry.tenantName} + +
+ + {notRunning || !m ? ( + + {notRunning ? `Server ${entry.serverState.toLowerCase()}` : 'No metrics available'} + + ) : ( + <> + + {formatPct(m.server.cpuUsage)} + + {formatRate(m.http.requestsPerMinute)}/min + + {formatUptime(m.server.uptimeSeconds)} + + )} + + ); +} + +function FleetKpis({ entries }: { entries: TenantMetricsEntry[] }) { + const withMetrics = entries.filter((e) => e.metrics != null); + const totalAgentsLive = withMetrics.reduce((s, e) => s + (e.metrics!.agents.live), 0); + const totalAgentsDead = withMetrics.reduce((s, e) => s + (e.metrics!.agents.dead), 0); + const totalDrops = withMetrics.reduce((s, e) => s + (e.metrics!.ingestion.dropsPerMinute), 0); + const running = entries.filter((e) => e.serverState === 'RUNNING').length; + const avgCpu = withMetrics.length > 0 + ? withMetrics.reduce((s, e) => s + e.metrics!.server.cpuUsage, 0) / withMetrics.length + : 0; + + return ( + + ); +} + +export function VendorMetricsPage() { + const { data, isLoading, isError } = useTenantMetrics(); + const navigate = useNavigate(); + + return ( +
+

Tenant Metrics

+ + +
+ +

Fleet Overview

+ {isLoading && } + {isError && Failed to load} +
+ + {data && ( + <> +
+ +
+ + + + + + + + + + + + + + + {data.length === 0 ? ( + + + + ) : ( + data.map((entry) => ( + navigate(`/vendor/tenants/${entry.tenantId}`)} + /> + )) + )} + +
TenantAgentsCPUHeapHTTP ReqDropsUptime
+ No tenants found +
+ + )} +
+
+ ); +} diff --git a/ui/src/router.tsx b/ui/src/router.tsx index cf3fcd1..ebf0fd9 100644 --- a/ui/src/router.tsx +++ b/ui/src/router.tsx @@ -14,6 +14,7 @@ import { TenantDetailPage } from './pages/vendor/TenantDetailPage'; import { VendorAuditPage } from './pages/vendor/VendorAuditPage'; import { CertificatesPage } from './pages/vendor/CertificatesPage'; import { InfrastructurePage } from './pages/vendor/InfrastructurePage'; +import { VendorMetricsPage } from './pages/vendor/VendorMetricsPage'; import { TenantDashboardPage } from './pages/tenant/TenantDashboardPage'; import { TenantLicensePage } from './pages/tenant/TenantLicensePage'; import { SsoPage } from './pages/tenant/SsoPage'; @@ -82,6 +83,11 @@ export function AppRouter() { } /> + }> + + + } /> }> diff --git a/ui/src/types/api.ts b/ui/src/types/api.ts index 21ec01a..ac6aabe 100644 --- a/ui/src/types/api.ts +++ b/ui/src/types/api.ts @@ -155,3 +155,48 @@ export interface AuditLogFilters { page?: number; size?: number; } + +// Tenant metrics (from server /api/v1/admin/metrics/summary) +export interface AgentMetrics { + live: number; + stale: number; + dead: number; + shutdown: number; +} + +export interface IngestionMetrics { + bufferDepth: number; + dropsPerMinute: number; +} + +export interface ServerMetrics { + cpuUsage: number; + heapUsedBytes: number; + heapMaxBytes: number; + uptimeSeconds: number; + threadCount: number; +} + +export interface HttpMetrics { + requestsPerMinute: number; + errorRate: number; +} + +export interface MetricsSummary { + collectedAt: string; + agents: AgentMetrics; + ingestion: IngestionMetrics; + server: ServerMetrics; + http: HttpMetrics; + authFailuresPerMinute: number; +} + +export interface TenantMetricsEntry { + tenantId: string; + tenantName: string; + slug: string; + tier: string; + status: string; + serverState: string; + metrics: MetricsSummary | null; +}