feat: add vendor tenant metrics dashboard
All checks were successful
CI / build (push) Successful in 1m24s
CI / docker (push) Successful in 1m0s

Fleet overview page at /vendor/metrics showing per-tenant operational
metrics (agents, CPU, heap, HTTP requests, ingestion drops, uptime).
Queries each tenant's server via the new POST /api/v1/admin/server-metrics/query
REST API instead of direct ClickHouse access, supporting future per-tenant
CH instances.

Backend: TenantMetricsService fires 11 metric queries per tenant
concurrently over a 5-minute window, assembles into a summary snapshot.
ServerApiClient.queryServerMetrics() handles the M2M authenticated POST.

Frontend: VendorMetricsPage with KPI strip (fleet totals) and per-tenant
table with color-coded badges and heap usage bars. Auto-refreshes every 60s.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-24 14:02:57 +02:00
parent 6c1241ed89
commit 1fbafbb16d
8 changed files with 547 additions and 1 deletions

View File

@@ -8,6 +8,7 @@ import org.springframework.stereotype.Service;
import org.springframework.web.client.RestClient;
import java.time.Instant;
import java.util.List;
import java.util.Map;
/**
@@ -171,6 +172,38 @@ public class ServerApiClient {
public record ServerHealthResponse(boolean healthy, String status) {}
// --- Server metrics query (POST /api/v1/admin/server-metrics/query) ---
public record MetricsQueryResponse(
String metric,
String statistic,
String aggregation,
String mode,
int stepSeconds,
List<MetricsSeries> series
) {}
public record MetricsSeries(Map<String, String> tags, List<MetricsPoint> points) {}
public record MetricsPoint(String t, double v) {}
/** Execute a server-metrics query against a tenant's server. */
public MetricsQueryResponse queryServerMetrics(String serverEndpoint, Map<String, Object> body) {
try {
return RestClient.create().post()
.uri(serverEndpoint + "/api/v1/admin/server-metrics/query")
.header("Authorization", "Bearer " + getAccessToken())
.header("X-Cameleer-Protocol-Version", "1")
.contentType(MediaType.APPLICATION_JSON)
.body(body)
.retrieve()
.body(MetricsQueryResponse.class);
} catch (Exception e) {
log.warn("Metrics query failed for {}: {}", serverEndpoint, e.getMessage());
return null;
}
}
private synchronized String getAccessToken() {
if (cachedToken != null && Instant.now().isBefore(tokenExpiry.minusSeconds(60))) {
return cachedToken;

View File

@@ -0,0 +1,74 @@
package net.siegeln.cameleer.saas.vendor;
import net.siegeln.cameleer.saas.provisioning.ServerStatus;
import net.siegeln.cameleer.saas.tenant.TenantEntity;
import org.springframework.http.ResponseEntity;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.List;
import java.util.concurrent.CompletableFuture;
@RestController
@RequestMapping("/api/vendor/metrics")
@PreAuthorize("hasAuthority('SCOPE_platform:admin')")
public class TenantMetricsController {
private final VendorTenantService vendorTenantService;
private final TenantMetricsService metricsService;
public TenantMetricsController(VendorTenantService vendorTenantService,
TenantMetricsService metricsService) {
this.vendorTenantService = vendorTenantService;
this.metricsService = metricsService;
}
public record TenantMetricsEntry(
String tenantId,
String tenantName,
String slug,
String tier,
String status,
String serverState,
TenantMetricsService.MetricsSummary metrics
) {}
@GetMapping
public ResponseEntity<List<TenantMetricsEntry>> all() {
List<TenantEntity> tenants = vendorTenantService.listAll();
List<CompletableFuture<TenantMetricsEntry>> futures = tenants.stream()
.map(tenant -> CompletableFuture.supplyAsync(() -> {
ServerStatus serverStatus = vendorTenantService.getServerStatus(tenant);
String state = serverStatus.state().name();
TenantMetricsService.MetricsSummary metrics = null;
String endpoint = tenant.getServerEndpoint();
boolean isRunning = "ACTIVE".equals(tenant.getStatus().name())
&& endpoint != null && !endpoint.isBlank()
&& "RUNNING".equals(state);
if (isRunning) {
metrics = metricsService.getMetricsSummary(endpoint);
}
return new TenantMetricsEntry(
tenant.getId().toString(),
tenant.getName(),
tenant.getSlug(),
tenant.getTier().name(),
tenant.getStatus().name(),
state,
metrics
);
}))
.toList();
List<TenantMetricsEntry> entries = futures.stream()
.map(CompletableFuture::join)
.toList();
return ResponseEntity.ok(entries);
}
}

View File

@@ -0,0 +1,176 @@
package net.siegeln.cameleer.saas.vendor;
import net.siegeln.cameleer.saas.identity.ServerApiClient;
import net.siegeln.cameleer.saas.identity.ServerApiClient.MetricsQueryResponse;
import net.siegeln.cameleer.saas.identity.ServerApiClient.MetricsPoint;
import net.siegeln.cameleer.saas.identity.ServerApiClient.MetricsSeries;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
@Service
public class TenantMetricsService {
private static final Logger log = LoggerFactory.getLogger(TenantMetricsService.class);
private final ServerApiClient serverApiClient;
public TenantMetricsService(ServerApiClient serverApiClient) {
this.serverApiClient = serverApiClient;
}
// --- Response records ---
public record MetricsSummary(
String collectedAt,
AgentMetrics agents,
IngestionMetrics ingestion,
ServerJvmMetrics server,
HttpMetrics http,
double authFailuresPerMinute
) {}
public record AgentMetrics(int live, int stale, int dead, int shutdown) {}
public record IngestionMetrics(long bufferDepth, double dropsPerMinute) {}
public record ServerJvmMetrics(
double cpuUsage,
long heapUsedBytes,
long heapMaxBytes,
long uptimeSeconds,
int threadCount
) {}
public record HttpMetrics(double requestsPerMinute, double errorRate) {}
/**
* Query a tenant's server for key metrics and assemble a summary snapshot.
* Fires multiple queries concurrently (one per metric group) over the last 5 minutes.
*/
public MetricsSummary getMetricsSummary(String serverEndpoint) {
Instant to = Instant.now();
Instant from = to.minus(5, ChronoUnit.MINUTES);
String fromStr = from.toString();
String toStr = to.toString();
// Fire all queries concurrently
var agentsFuture = CompletableFuture.supplyAsync(() ->
query(serverEndpoint, "cameleer.agents.connected", "value", fromStr, toStr, "avg", "raw", List.of("state"), null));
var cpuFuture = CompletableFuture.supplyAsync(() ->
query(serverEndpoint, "process.cpu.usage", "value", fromStr, toStr, "avg", "raw", null, null));
var heapUsedFuture = CompletableFuture.supplyAsync(() ->
query(serverEndpoint, "jvm.memory.used", "value", fromStr, toStr, "sum", "raw", null, Map.of("area", "heap")));
var heapMaxFuture = CompletableFuture.supplyAsync(() ->
query(serverEndpoint, "jvm.memory.max", "value", fromStr, toStr, "sum", "raw", null, Map.of("area", "heap")));
var uptimeFuture = CompletableFuture.supplyAsync(() ->
query(serverEndpoint, "process.uptime", "value", fromStr, toStr, "latest", "raw", null, null));
var threadsFuture = CompletableFuture.supplyAsync(() ->
query(serverEndpoint, "jvm.threads.live", "value", fromStr, toStr, "avg", "raw", null, null));
var dropsFuture = CompletableFuture.supplyAsync(() ->
query(serverEndpoint, "cameleer.ingestion.drops", "count", fromStr, toStr, "sum", "delta", null, null));
var bufferFuture = CompletableFuture.supplyAsync(() ->
query(serverEndpoint, "cameleer.ingestion.buffer.size", "value", fromStr, toStr, "sum", "raw", null, null));
var httpTotalFuture = CompletableFuture.supplyAsync(() ->
query(serverEndpoint, "http.server.requests", "count", fromStr, toStr, "sum", "delta", null, null));
var http5xxFuture = CompletableFuture.supplyAsync(() ->
query(serverEndpoint, "http.server.requests", "count", fromStr, toStr, "sum", "delta", null, Map.of("outcome", "SERVER_ERROR")));
var authFuture = CompletableFuture.supplyAsync(() ->
query(serverEndpoint, "cameleer.auth.failures", "count", fromStr, toStr, "sum", "delta", null, null));
try {
// Extract latest values from each response
var agentsResp = agentsFuture.join();
int live = agentStateValue(agentsResp, "live");
int stale = agentStateValue(agentsResp, "stale");
int dead = agentStateValue(agentsResp, "dead");
int shutdown = agentStateValue(agentsResp, "shutdown");
double cpu = latestValue(cpuFuture.join());
long heapUsed = (long) latestValue(heapUsedFuture.join());
long heapMax = (long) latestValue(heapMaxFuture.join());
long uptimeMs = (long) latestValue(uptimeFuture.join());
int threads = (int) latestValue(threadsFuture.join());
double dropsTotal = sumLatestValues(dropsFuture.join());
long bufferDepth = (long) latestValue(bufferFuture.join());
double httpTotal = sumLatestValues(httpTotalFuture.join());
double http5xx = sumLatestValues(http5xxFuture.join());
double errorRate = httpTotal > 0 ? http5xx / httpTotal : 0.0;
// stepSeconds=300 (5min window), so total is per-5-min; convert to per-minute
double httpPerMin = httpTotal / 5.0;
double authTotal = sumLatestValues(authFuture.join());
double authPerMin = authTotal / 5.0;
return new MetricsSummary(
toStr,
new AgentMetrics(live, stale, dead, shutdown),
new IngestionMetrics(bufferDepth, dropsTotal / 5.0),
new ServerJvmMetrics(cpu, heapUsed, heapMax, uptimeMs / 1000, threads),
new HttpMetrics(httpPerMin, errorRate),
authPerMin
);
} catch (Exception e) {
log.warn("Failed to assemble metrics summary for {}: {}", serverEndpoint, e.getMessage());
return null;
}
}
private MetricsQueryResponse query(String endpoint, String metric, String statistic,
String from, String to, String aggregation, String mode,
List<String> groupByTags, Map<String, String> filterTags) {
Map<String, Object> body = new HashMap<>();
body.put("metric", metric);
body.put("statistic", statistic);
body.put("from", from);
body.put("to", to);
body.put("stepSeconds", 300);
body.put("aggregation", aggregation);
body.put("mode", mode);
if (groupByTags != null) body.put("groupByTags", groupByTags);
if (filterTags != null) body.put("filterTags", filterTags);
return serverApiClient.queryServerMetrics(endpoint, body);
}
/** Extract the latest value from the first (or only) series. */
private double latestValue(MetricsQueryResponse resp) {
if (resp == null || resp.series() == null || resp.series().isEmpty()) return 0.0;
List<MetricsPoint> points = resp.series().getFirst().points();
if (points == null || points.isEmpty()) return 0.0;
return points.getLast().v();
}
/** Sum the latest value across all series (for metrics with groupByTags or multiple series). */
private double sumLatestValues(MetricsQueryResponse resp) {
if (resp == null || resp.series() == null || resp.series().isEmpty()) return 0.0;
double sum = 0.0;
for (MetricsSeries series : resp.series()) {
if (series.points() != null && !series.points().isEmpty()) {
sum += series.points().getLast().v();
}
}
return sum;
}
/** Extract the latest value for a specific agent state tag. */
private int agentStateValue(MetricsQueryResponse resp, String state) {
if (resp == null || resp.series() == null) return 0;
for (MetricsSeries series : resp.series()) {
if (series.tags() != null && state.equals(series.tags().get("state"))) {
if (series.points() != null && !series.points().isEmpty()) {
return (int) series.points().getLast().v();
}
}
}
return 0;
}
}