feat: add vendor tenant metrics dashboard
Fleet overview page at /vendor/metrics showing per-tenant operational metrics (agents, CPU, heap, HTTP requests, ingestion drops, uptime). Queries each tenant's server via the new POST /api/v1/admin/server-metrics/query REST API instead of direct ClickHouse access, supporting future per-tenant CH instances. Backend: TenantMetricsService fires 11 metric queries per tenant concurrently over a 5-minute window, assembles into a summary snapshot. ServerApiClient.queryServerMetrics() handles the M2M authenticated POST. Frontend: VendorMetricsPage with KPI strip (fleet totals) and per-tenant table with color-coded badges and heap usage bars. Auto-refreshes every 60s. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,7 @@ import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.client.RestClient;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
@@ -171,6 +172,38 @@ public class ServerApiClient {
|
||||
|
||||
public record ServerHealthResponse(boolean healthy, String status) {}
|
||||
|
||||
// --- Server metrics query (POST /api/v1/admin/server-metrics/query) ---
|
||||
|
||||
public record MetricsQueryResponse(
|
||||
String metric,
|
||||
String statistic,
|
||||
String aggregation,
|
||||
String mode,
|
||||
int stepSeconds,
|
||||
List<MetricsSeries> series
|
||||
) {}
|
||||
|
||||
public record MetricsSeries(Map<String, String> tags, List<MetricsPoint> points) {}
|
||||
|
||||
public record MetricsPoint(String t, double v) {}
|
||||
|
||||
/** Execute a server-metrics query against a tenant's server. */
|
||||
public MetricsQueryResponse queryServerMetrics(String serverEndpoint, Map<String, Object> body) {
|
||||
try {
|
||||
return RestClient.create().post()
|
||||
.uri(serverEndpoint + "/api/v1/admin/server-metrics/query")
|
||||
.header("Authorization", "Bearer " + getAccessToken())
|
||||
.header("X-Cameleer-Protocol-Version", "1")
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.body(body)
|
||||
.retrieve()
|
||||
.body(MetricsQueryResponse.class);
|
||||
} catch (Exception e) {
|
||||
log.warn("Metrics query failed for {}: {}", serverEndpoint, e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized String getAccessToken() {
|
||||
if (cachedToken != null && Instant.now().isBefore(tokenExpiry.minusSeconds(60))) {
|
||||
return cachedToken;
|
||||
|
||||
74
src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsController.java
vendored
Normal file
74
src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsController.java
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
package net.siegeln.cameleer.saas.vendor;
|
||||
|
||||
import net.siegeln.cameleer.saas.provisioning.ServerStatus;
|
||||
import net.siegeln.cameleer.saas.tenant.TenantEntity;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.security.access.prepost.PreAuthorize;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/vendor/metrics")
|
||||
@PreAuthorize("hasAuthority('SCOPE_platform:admin')")
|
||||
public class TenantMetricsController {
|
||||
|
||||
private final VendorTenantService vendorTenantService;
|
||||
private final TenantMetricsService metricsService;
|
||||
|
||||
public TenantMetricsController(VendorTenantService vendorTenantService,
|
||||
TenantMetricsService metricsService) {
|
||||
this.vendorTenantService = vendorTenantService;
|
||||
this.metricsService = metricsService;
|
||||
}
|
||||
|
||||
public record TenantMetricsEntry(
|
||||
String tenantId,
|
||||
String tenantName,
|
||||
String slug,
|
||||
String tier,
|
||||
String status,
|
||||
String serverState,
|
||||
TenantMetricsService.MetricsSummary metrics
|
||||
) {}
|
||||
|
||||
@GetMapping
|
||||
public ResponseEntity<List<TenantMetricsEntry>> all() {
|
||||
List<TenantEntity> tenants = vendorTenantService.listAll();
|
||||
|
||||
List<CompletableFuture<TenantMetricsEntry>> futures = tenants.stream()
|
||||
.map(tenant -> CompletableFuture.supplyAsync(() -> {
|
||||
ServerStatus serverStatus = vendorTenantService.getServerStatus(tenant);
|
||||
String state = serverStatus.state().name();
|
||||
|
||||
TenantMetricsService.MetricsSummary metrics = null;
|
||||
String endpoint = tenant.getServerEndpoint();
|
||||
boolean isRunning = "ACTIVE".equals(tenant.getStatus().name())
|
||||
&& endpoint != null && !endpoint.isBlank()
|
||||
&& "RUNNING".equals(state);
|
||||
if (isRunning) {
|
||||
metrics = metricsService.getMetricsSummary(endpoint);
|
||||
}
|
||||
|
||||
return new TenantMetricsEntry(
|
||||
tenant.getId().toString(),
|
||||
tenant.getName(),
|
||||
tenant.getSlug(),
|
||||
tenant.getTier().name(),
|
||||
tenant.getStatus().name(),
|
||||
state,
|
||||
metrics
|
||||
);
|
||||
}))
|
||||
.toList();
|
||||
|
||||
List<TenantMetricsEntry> entries = futures.stream()
|
||||
.map(CompletableFuture::join)
|
||||
.toList();
|
||||
|
||||
return ResponseEntity.ok(entries);
|
||||
}
|
||||
}
|
||||
176
src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsService.java
vendored
Normal file
176
src/main/java/net/siegeln/cameleer/saas/vendor/TenantMetricsService.java
vendored
Normal file
@@ -0,0 +1,176 @@
|
||||
package net.siegeln.cameleer.saas.vendor;
|
||||
|
||||
import net.siegeln.cameleer.saas.identity.ServerApiClient;
|
||||
import net.siegeln.cameleer.saas.identity.ServerApiClient.MetricsQueryResponse;
|
||||
import net.siegeln.cameleer.saas.identity.ServerApiClient.MetricsPoint;
|
||||
import net.siegeln.cameleer.saas.identity.ServerApiClient.MetricsSeries;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
@Service
|
||||
public class TenantMetricsService {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(TenantMetricsService.class);
|
||||
|
||||
private final ServerApiClient serverApiClient;
|
||||
|
||||
public TenantMetricsService(ServerApiClient serverApiClient) {
|
||||
this.serverApiClient = serverApiClient;
|
||||
}
|
||||
|
||||
// --- Response records ---
|
||||
|
||||
public record MetricsSummary(
|
||||
String collectedAt,
|
||||
AgentMetrics agents,
|
||||
IngestionMetrics ingestion,
|
||||
ServerJvmMetrics server,
|
||||
HttpMetrics http,
|
||||
double authFailuresPerMinute
|
||||
) {}
|
||||
|
||||
public record AgentMetrics(int live, int stale, int dead, int shutdown) {}
|
||||
|
||||
public record IngestionMetrics(long bufferDepth, double dropsPerMinute) {}
|
||||
|
||||
public record ServerJvmMetrics(
|
||||
double cpuUsage,
|
||||
long heapUsedBytes,
|
||||
long heapMaxBytes,
|
||||
long uptimeSeconds,
|
||||
int threadCount
|
||||
) {}
|
||||
|
||||
public record HttpMetrics(double requestsPerMinute, double errorRate) {}
|
||||
|
||||
/**
|
||||
* Query a tenant's server for key metrics and assemble a summary snapshot.
|
||||
* Fires multiple queries concurrently (one per metric group) over the last 5 minutes.
|
||||
*/
|
||||
public MetricsSummary getMetricsSummary(String serverEndpoint) {
|
||||
Instant to = Instant.now();
|
||||
Instant from = to.minus(5, ChronoUnit.MINUTES);
|
||||
String fromStr = from.toString();
|
||||
String toStr = to.toString();
|
||||
|
||||
// Fire all queries concurrently
|
||||
var agentsFuture = CompletableFuture.supplyAsync(() ->
|
||||
query(serverEndpoint, "cameleer.agents.connected", "value", fromStr, toStr, "avg", "raw", List.of("state"), null));
|
||||
var cpuFuture = CompletableFuture.supplyAsync(() ->
|
||||
query(serverEndpoint, "process.cpu.usage", "value", fromStr, toStr, "avg", "raw", null, null));
|
||||
var heapUsedFuture = CompletableFuture.supplyAsync(() ->
|
||||
query(serverEndpoint, "jvm.memory.used", "value", fromStr, toStr, "sum", "raw", null, Map.of("area", "heap")));
|
||||
var heapMaxFuture = CompletableFuture.supplyAsync(() ->
|
||||
query(serverEndpoint, "jvm.memory.max", "value", fromStr, toStr, "sum", "raw", null, Map.of("area", "heap")));
|
||||
var uptimeFuture = CompletableFuture.supplyAsync(() ->
|
||||
query(serverEndpoint, "process.uptime", "value", fromStr, toStr, "latest", "raw", null, null));
|
||||
var threadsFuture = CompletableFuture.supplyAsync(() ->
|
||||
query(serverEndpoint, "jvm.threads.live", "value", fromStr, toStr, "avg", "raw", null, null));
|
||||
var dropsFuture = CompletableFuture.supplyAsync(() ->
|
||||
query(serverEndpoint, "cameleer.ingestion.drops", "count", fromStr, toStr, "sum", "delta", null, null));
|
||||
var bufferFuture = CompletableFuture.supplyAsync(() ->
|
||||
query(serverEndpoint, "cameleer.ingestion.buffer.size", "value", fromStr, toStr, "sum", "raw", null, null));
|
||||
var httpTotalFuture = CompletableFuture.supplyAsync(() ->
|
||||
query(serverEndpoint, "http.server.requests", "count", fromStr, toStr, "sum", "delta", null, null));
|
||||
var http5xxFuture = CompletableFuture.supplyAsync(() ->
|
||||
query(serverEndpoint, "http.server.requests", "count", fromStr, toStr, "sum", "delta", null, Map.of("outcome", "SERVER_ERROR")));
|
||||
var authFuture = CompletableFuture.supplyAsync(() ->
|
||||
query(serverEndpoint, "cameleer.auth.failures", "count", fromStr, toStr, "sum", "delta", null, null));
|
||||
|
||||
try {
|
||||
// Extract latest values from each response
|
||||
var agentsResp = agentsFuture.join();
|
||||
int live = agentStateValue(agentsResp, "live");
|
||||
int stale = agentStateValue(agentsResp, "stale");
|
||||
int dead = agentStateValue(agentsResp, "dead");
|
||||
int shutdown = agentStateValue(agentsResp, "shutdown");
|
||||
|
||||
double cpu = latestValue(cpuFuture.join());
|
||||
long heapUsed = (long) latestValue(heapUsedFuture.join());
|
||||
long heapMax = (long) latestValue(heapMaxFuture.join());
|
||||
long uptimeMs = (long) latestValue(uptimeFuture.join());
|
||||
int threads = (int) latestValue(threadsFuture.join());
|
||||
|
||||
double dropsTotal = sumLatestValues(dropsFuture.join());
|
||||
long bufferDepth = (long) latestValue(bufferFuture.join());
|
||||
|
||||
double httpTotal = sumLatestValues(httpTotalFuture.join());
|
||||
double http5xx = sumLatestValues(http5xxFuture.join());
|
||||
double errorRate = httpTotal > 0 ? http5xx / httpTotal : 0.0;
|
||||
// stepSeconds=300 (5min window), so total is per-5-min; convert to per-minute
|
||||
double httpPerMin = httpTotal / 5.0;
|
||||
|
||||
double authTotal = sumLatestValues(authFuture.join());
|
||||
double authPerMin = authTotal / 5.0;
|
||||
|
||||
return new MetricsSummary(
|
||||
toStr,
|
||||
new AgentMetrics(live, stale, dead, shutdown),
|
||||
new IngestionMetrics(bufferDepth, dropsTotal / 5.0),
|
||||
new ServerJvmMetrics(cpu, heapUsed, heapMax, uptimeMs / 1000, threads),
|
||||
new HttpMetrics(httpPerMin, errorRate),
|
||||
authPerMin
|
||||
);
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to assemble metrics summary for {}: {}", serverEndpoint, e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private MetricsQueryResponse query(String endpoint, String metric, String statistic,
|
||||
String from, String to, String aggregation, String mode,
|
||||
List<String> groupByTags, Map<String, String> filterTags) {
|
||||
Map<String, Object> body = new HashMap<>();
|
||||
body.put("metric", metric);
|
||||
body.put("statistic", statistic);
|
||||
body.put("from", from);
|
||||
body.put("to", to);
|
||||
body.put("stepSeconds", 300);
|
||||
body.put("aggregation", aggregation);
|
||||
body.put("mode", mode);
|
||||
if (groupByTags != null) body.put("groupByTags", groupByTags);
|
||||
if (filterTags != null) body.put("filterTags", filterTags);
|
||||
return serverApiClient.queryServerMetrics(endpoint, body);
|
||||
}
|
||||
|
||||
/** Extract the latest value from the first (or only) series. */
|
||||
private double latestValue(MetricsQueryResponse resp) {
|
||||
if (resp == null || resp.series() == null || resp.series().isEmpty()) return 0.0;
|
||||
List<MetricsPoint> points = resp.series().getFirst().points();
|
||||
if (points == null || points.isEmpty()) return 0.0;
|
||||
return points.getLast().v();
|
||||
}
|
||||
|
||||
/** Sum the latest value across all series (for metrics with groupByTags or multiple series). */
|
||||
private double sumLatestValues(MetricsQueryResponse resp) {
|
||||
if (resp == null || resp.series() == null || resp.series().isEmpty()) return 0.0;
|
||||
double sum = 0.0;
|
||||
for (MetricsSeries series : resp.series()) {
|
||||
if (series.points() != null && !series.points().isEmpty()) {
|
||||
sum += series.points().getLast().v();
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/** Extract the latest value for a specific agent state tag. */
|
||||
private int agentStateValue(MetricsQueryResponse resp, String state) {
|
||||
if (resp == null || resp.series() == null) return 0;
|
||||
for (MetricsSeries series : resp.series()) {
|
||||
if (series.tags() != null && state.equals(series.tags().get("state"))) {
|
||||
if (series.points() != null && !series.points().isEmpty()) {
|
||||
return (int) series.points().getLast().v();
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user