From 213aa86c47bb434aa67b68af1a8033cf47ef7d19 Mon Sep 17 00:00:00 2001 From: hsiegeln <37154749+hsiegeln@users.noreply.github.com> Date: Sun, 29 Mar 2026 23:29:20 +0200 Subject: [PATCH] feat: progressive drill-down dashboard with RED metrics and SLA compliance (#94) Three-level dashboard driven by sidebar selection: - L1 (no selection): all-apps overview with health table, per-app charts - L2 (app selected): route performance table, error velocity, top errors - L3 (route selected): processor table, latency heatmap data, bottleneck KPI Backend: 3 new endpoints (timeseries/by-app, timeseries/by-route, errors/top), per-app SLA settings (app_settings table, V12 migration), exact SLA compliance from executions hypertable, error velocity with acceleration detection. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../app/controller/AppSettingsController.java | 79 ++++ .../controller/RouteMetricsController.java | 36 +- .../app/controller/SearchController.java | 64 ++- .../server/app/dto/AppSettingsRequest.java | 54 +++ .../server/app/dto/RouteMetrics.java | 3 +- .../PostgresAppSettingsRepository.java | 67 +++ .../app/storage/PostgresStatsStore.java | 215 +++++++++ .../db/migration/V12__app_settings.sql | 11 + .../server/core/admin/AppSettings.java | 19 + .../core/admin/AppSettingsRepository.java | 11 + .../server/core/search/ExecutionStats.java | 21 +- .../server/core/search/SearchService.java | 35 ++ .../server/core/search/TopError.java | 12 + .../server/core/storage/StatsStore.java | 27 ++ ui/src/api/queries/dashboard.ts | 142 ++++++ ui/src/pages/DashboardTab/DashboardL1.tsx | 442 ++++++++++++++++++ ui/src/pages/DashboardTab/DashboardL2.tsx | 421 +++++++++++++++++ ui/src/pages/DashboardTab/DashboardL3.tsx | 434 +++++++++++++++++ ui/src/pages/DashboardTab/DashboardPage.tsx | 16 +- .../DashboardTab/DashboardTab.module.css | 133 ++++++ ui/src/pages/DashboardTab/dashboard-utils.ts | 70 +++ 21 files changed, 2293 insertions(+), 19 deletions(-) create mode 100644 cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/AppSettingsController.java create mode 100644 cameleer3-server-app/src/main/java/com/cameleer3/server/app/dto/AppSettingsRequest.java create mode 100644 cameleer3-server-app/src/main/java/com/cameleer3/server/app/storage/PostgresAppSettingsRepository.java create mode 100644 cameleer3-server-app/src/main/resources/db/migration/V12__app_settings.sql create mode 100644 cameleer3-server-core/src/main/java/com/cameleer3/server/core/admin/AppSettings.java create mode 100644 cameleer3-server-core/src/main/java/com/cameleer3/server/core/admin/AppSettingsRepository.java create mode 100644 cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/TopError.java create mode 100644 ui/src/api/queries/dashboard.ts create mode 100644 ui/src/pages/DashboardTab/DashboardL1.tsx create mode 100644 ui/src/pages/DashboardTab/DashboardL2.tsx create mode 100644 ui/src/pages/DashboardTab/DashboardL3.tsx create mode 100644 ui/src/pages/DashboardTab/DashboardTab.module.css create mode 100644 ui/src/pages/DashboardTab/dashboard-utils.ts diff --git a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/AppSettingsController.java b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/AppSettingsController.java new file mode 100644 index 00000000..ccbeffd0 --- /dev/null +++ b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/AppSettingsController.java @@ -0,0 +1,79 @@ +package com.cameleer3.server.app.controller; + +import com.cameleer3.server.app.dto.AppSettingsRequest; +import com.cameleer3.server.core.admin.AppSettings; +import com.cameleer3.server.core.admin.AppSettingsRepository; +import com.cameleer3.server.core.admin.AuditCategory; +import com.cameleer3.server.core.admin.AuditResult; +import com.cameleer3.server.core.admin.AuditService; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.validation.Valid; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PutMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.server.ResponseStatusException; + +import java.util.List; +import java.util.Map; + +@RestController +@RequestMapping("/api/v1/admin/app-settings") +@PreAuthorize("hasAnyRole('ADMIN', 'OPERATOR')") +@Tag(name = "App Settings", description = "Per-application dashboard settings (ADMIN/OPERATOR)") +public class AppSettingsController { + + private final AppSettingsRepository repository; + private final AuditService auditService; + + public AppSettingsController(AppSettingsRepository repository, AuditService auditService) { + this.repository = repository; + this.auditService = auditService; + } + + @GetMapping + @Operation(summary = "List all application settings") + public ResponseEntity> getAll() { + return ResponseEntity.ok(repository.findAll()); + } + + @GetMapping("/{appId}") + @Operation(summary = "Get settings for a specific application (returns defaults if not configured)") + public ResponseEntity getByAppId(@PathVariable String appId) { + AppSettings settings = repository.findByAppId(appId).orElse(AppSettings.defaults(appId)); + return ResponseEntity.ok(settings); + } + + @PutMapping("/{appId}") + @Operation(summary = "Create or update settings for an application") + public ResponseEntity update(@PathVariable String appId, + @Valid @RequestBody AppSettingsRequest request, + HttpServletRequest httpRequest) { + List errors = request.validate(); + if (!errors.isEmpty()) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, String.join("; ", errors)); + } + + AppSettings saved = repository.save(request.toSettings(appId)); + auditService.log("update_app_settings", AuditCategory.CONFIG, appId, + Map.of("settings", saved), AuditResult.SUCCESS, httpRequest); + return ResponseEntity.ok(saved); + } + + @DeleteMapping("/{appId}") + @Operation(summary = "Delete application settings (reverts to defaults)") + public ResponseEntity delete(@PathVariable String appId, HttpServletRequest httpRequest) { + repository.delete(appId); + auditService.log("delete_app_settings", AuditCategory.CONFIG, appId, + Map.of(), AuditResult.SUCCESS, httpRequest); + return ResponseEntity.noContent().build(); + } +} diff --git a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/RouteMetricsController.java b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/RouteMetricsController.java index bff2b064..9f0d3048 100644 --- a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/RouteMetricsController.java +++ b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/RouteMetricsController.java @@ -2,6 +2,9 @@ package com.cameleer3.server.app.controller; import com.cameleer3.server.app.dto.ProcessorMetrics; import com.cameleer3.server.app.dto.RouteMetrics; +import com.cameleer3.server.core.admin.AppSettings; +import com.cameleer3.server.core.admin.AppSettingsRepository; +import com.cameleer3.server.core.storage.StatsStore; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.responses.ApiResponse; import io.swagger.v3.oas.annotations.tags.Tag; @@ -18,6 +21,7 @@ import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.List; +import java.util.Map; @RestController @RequestMapping("/api/v1/routes") @@ -25,9 +29,14 @@ import java.util.List; public class RouteMetricsController { private final JdbcTemplate jdbc; + private final StatsStore statsStore; + private final AppSettingsRepository appSettingsRepository; - public RouteMetricsController(JdbcTemplate jdbc) { + public RouteMetricsController(JdbcTemplate jdbc, StatsStore statsStore, + AppSettingsRepository appSettingsRepository) { this.jdbc = jdbc; + this.statsStore = statsStore; + this.appSettingsRepository = appSettingsRepository; } @GetMapping("/metrics") @@ -78,7 +87,7 @@ public class RouteMetricsController { routeKeys.add(new RouteKey(applicationName, routeId)); return new RouteMetrics(routeId, applicationName, total, successRate, - avgDur, p99Dur, errorRate, tps, List.of()); + avgDur, p99Dur, errorRate, tps, List.of(), -1.0); }, params.toArray()); // Fetch sparklines (12 buckets over the time window) @@ -100,13 +109,34 @@ public class RouteMetricsController { m.appId(), m.routeId()); metrics.set(i, new RouteMetrics(m.routeId(), m.appId(), m.exchangeCount(), m.successRate(), m.avgDurationMs(), m.p99DurationMs(), - m.errorRate(), m.throughputPerSec(), sparkline)); + m.errorRate(), m.throughputPerSec(), sparkline, m.slaCompliance())); } catch (Exception e) { // Leave sparkline empty on error } } } + // Enrich with SLA compliance per route + if (!metrics.isEmpty()) { + // Determine SLA threshold (per-app or default) + String effectiveAppId = appId != null ? appId : (metrics.isEmpty() ? null : metrics.get(0).appId()); + int threshold = appSettingsRepository.findByAppId(effectiveAppId != null ? effectiveAppId : "") + .map(AppSettings::slaThresholdMs).orElse(300); + + Map slaCounts = statsStore.slaCountsByRoute(fromInstant, toInstant, + effectiveAppId, threshold); + + for (int i = 0; i < metrics.size(); i++) { + RouteMetrics m = metrics.get(i); + long[] counts = slaCounts.get(m.routeId()); + double sla = (counts != null && counts[1] > 0) + ? counts[0] * 100.0 / counts[1] : 100.0; + metrics.set(i, new RouteMetrics(m.routeId(), m.appId(), m.exchangeCount(), + m.successRate(), m.avgDurationMs(), m.p99DurationMs(), + m.errorRate(), m.throughputPerSec(), m.sparkline(), sla)); + } + } + return ResponseEntity.ok(metrics); } diff --git a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/SearchController.java b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/SearchController.java index d1045086..cdeeada1 100644 --- a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/SearchController.java +++ b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/controller/SearchController.java @@ -1,5 +1,7 @@ package com.cameleer3.server.app.controller; +import com.cameleer3.server.core.admin.AppSettings; +import com.cameleer3.server.core.admin.AppSettingsRepository; import com.cameleer3.server.core.agent.AgentInfo; import com.cameleer3.server.core.agent.AgentRegistryService; import com.cameleer3.server.core.search.ExecutionStats; @@ -8,6 +10,7 @@ import com.cameleer3.server.core.search.SearchRequest; import com.cameleer3.server.core.search.SearchResult; import com.cameleer3.server.core.search.SearchService; import com.cameleer3.server.core.search.StatsTimeseries; +import com.cameleer3.server.core.search.TopError; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; import org.springframework.http.ResponseEntity; @@ -20,6 +23,7 @@ import org.springframework.web.bind.annotation.RestController; import java.time.Instant; import java.util.List; +import java.util.Map; /** * Search endpoints for querying route executions. @@ -34,10 +38,13 @@ public class SearchController { private final SearchService searchService; private final AgentRegistryService registryService; + private final AppSettingsRepository appSettingsRepository; - public SearchController(SearchService searchService, AgentRegistryService registryService) { + public SearchController(SearchService searchService, AgentRegistryService registryService, + AppSettingsRepository appSettingsRepository) { this.searchService = searchService; this.registryService = registryService; + this.appSettingsRepository = appSettingsRepository; } @GetMapping("/executions") @@ -87,21 +94,29 @@ public class SearchController { } @GetMapping("/stats") - @Operation(summary = "Aggregate execution stats (P99 latency, active count)") + @Operation(summary = "Aggregate execution stats (P99 latency, active count, SLA compliance)") public ResponseEntity stats( @RequestParam Instant from, @RequestParam(required = false) Instant to, @RequestParam(required = false) String routeId, @RequestParam(required = false) String application) { Instant end = to != null ? to : Instant.now(); + ExecutionStats stats; if (routeId == null && application == null) { - return ResponseEntity.ok(searchService.stats(from, end)); + stats = searchService.stats(from, end); + } else if (routeId == null) { + stats = searchService.statsForApp(from, end, application); + } else { + List agentIds = resolveApplicationToAgentIds(application); + stats = searchService.stats(from, end, routeId, agentIds); } - if (routeId == null) { - return ResponseEntity.ok(searchService.statsForApp(from, end, application)); - } - List agentIds = resolveApplicationToAgentIds(application); - return ResponseEntity.ok(searchService.stats(from, end, routeId, agentIds)); + + // Enrich with SLA compliance + int threshold = appSettingsRepository + .findByAppId(application != null ? application : "") + .map(AppSettings::slaThresholdMs).orElse(300); + double sla = searchService.slaCompliance(from, end, threshold, application, routeId); + return ResponseEntity.ok(stats.withSlaCompliance(sla)); } @GetMapping("/stats/timeseries") @@ -126,6 +141,39 @@ public class SearchController { return ResponseEntity.ok(searchService.timeseries(from, end, buckets, routeId, agentIds)); } + @GetMapping("/stats/timeseries/by-app") + @Operation(summary = "Timeseries grouped by application") + public ResponseEntity> timeseriesByApp( + @RequestParam Instant from, + @RequestParam(required = false) Instant to, + @RequestParam(defaultValue = "24") int buckets) { + Instant end = to != null ? to : Instant.now(); + return ResponseEntity.ok(searchService.timeseriesGroupedByApp(from, end, buckets)); + } + + @GetMapping("/stats/timeseries/by-route") + @Operation(summary = "Timeseries grouped by route for an application") + public ResponseEntity> timeseriesByRoute( + @RequestParam Instant from, + @RequestParam(required = false) Instant to, + @RequestParam(defaultValue = "24") int buckets, + @RequestParam String application) { + Instant end = to != null ? to : Instant.now(); + return ResponseEntity.ok(searchService.timeseriesGroupedByRoute(from, end, buckets, application)); + } + + @GetMapping("/errors/top") + @Operation(summary = "Top N errors with velocity trend") + public ResponseEntity> topErrors( + @RequestParam Instant from, + @RequestParam(required = false) Instant to, + @RequestParam(required = false) String application, + @RequestParam(required = false) String routeId, + @RequestParam(defaultValue = "5") int limit) { + Instant end = to != null ? to : Instant.now(); + return ResponseEntity.ok(searchService.topErrors(from, end, application, routeId, limit)); + } + /** * Resolve an application name to agent IDs. * Returns null if application is null/blank (no filtering). diff --git a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/dto/AppSettingsRequest.java b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/dto/AppSettingsRequest.java new file mode 100644 index 00000000..630b8b16 --- /dev/null +++ b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/dto/AppSettingsRequest.java @@ -0,0 +1,54 @@ +package com.cameleer3.server.app.dto; + +import com.cameleer3.server.core.admin.AppSettings; +import io.swagger.v3.oas.annotations.media.Schema; +import jakarta.validation.constraints.Max; +import jakarta.validation.constraints.Min; +import jakarta.validation.constraints.NotNull; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; + +@Schema(description = "Per-application dashboard settings") +public record AppSettingsRequest( + @NotNull @Min(1) + @Schema(description = "SLA duration threshold in milliseconds") + Integer slaThresholdMs, + + @NotNull @Min(0) @Max(100) + @Schema(description = "Error rate % threshold for warning (yellow) health dot") + Double healthErrorWarn, + + @NotNull @Min(0) @Max(100) + @Schema(description = "Error rate % threshold for critical (red) health dot") + Double healthErrorCrit, + + @NotNull @Min(0) @Max(100) + @Schema(description = "SLA compliance % threshold for warning (yellow) health dot") + Double healthSlaWarn, + + @NotNull @Min(0) @Max(100) + @Schema(description = "SLA compliance % threshold for critical (red) health dot") + Double healthSlaCrit +) { + + public AppSettings toSettings(String appId) { + Instant now = Instant.now(); + return new AppSettings(appId, slaThresholdMs, healthErrorWarn, healthErrorCrit, + healthSlaWarn, healthSlaCrit, now, now); + } + + public List validate() { + List errors = new ArrayList<>(); + if (healthErrorWarn != null && healthErrorCrit != null + && healthErrorWarn > healthErrorCrit) { + errors.add("healthErrorWarn must be <= healthErrorCrit"); + } + if (healthSlaWarn != null && healthSlaCrit != null + && healthSlaWarn < healthSlaCrit) { + errors.add("healthSlaWarn must be >= healthSlaCrit (higher SLA = healthier)"); + } + return errors; + } +} diff --git a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/dto/RouteMetrics.java b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/dto/RouteMetrics.java index 58b73d48..bcb78f13 100644 --- a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/dto/RouteMetrics.java +++ b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/dto/RouteMetrics.java @@ -15,5 +15,6 @@ public record RouteMetrics( @NotNull double p99DurationMs, @NotNull double errorRate, @NotNull double throughputPerSec, - @NotNull List sparkline + @NotNull List sparkline, + double slaCompliance ) {} diff --git a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/storage/PostgresAppSettingsRepository.java b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/storage/PostgresAppSettingsRepository.java new file mode 100644 index 00000000..63b80947 --- /dev/null +++ b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/storage/PostgresAppSettingsRepository.java @@ -0,0 +1,67 @@ +package com.cameleer3.server.app.storage; + +import com.cameleer3.server.core.admin.AppSettings; +import com.cameleer3.server.core.admin.AppSettingsRepository; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.jdbc.core.RowMapper; +import org.springframework.stereotype.Repository; + +import java.util.List; +import java.util.Optional; + +@Repository +public class PostgresAppSettingsRepository implements AppSettingsRepository { + + private final JdbcTemplate jdbc; + + private static final RowMapper ROW_MAPPER = (rs, rowNum) -> new AppSettings( + rs.getString("app_id"), + rs.getInt("sla_threshold_ms"), + rs.getDouble("health_error_warn"), + rs.getDouble("health_error_crit"), + rs.getDouble("health_sla_warn"), + rs.getDouble("health_sla_crit"), + rs.getTimestamp("created_at").toInstant(), + rs.getTimestamp("updated_at").toInstant()); + + public PostgresAppSettingsRepository(JdbcTemplate jdbc) { + this.jdbc = jdbc; + } + + @Override + public Optional findByAppId(String appId) { + List results = jdbc.query( + "SELECT * FROM app_settings WHERE app_id = ?", ROW_MAPPER, appId); + return results.isEmpty() ? Optional.empty() : Optional.of(results.get(0)); + } + + @Override + public List findAll() { + return jdbc.query("SELECT * FROM app_settings ORDER BY app_id", ROW_MAPPER); + } + + @Override + public AppSettings save(AppSettings settings) { + jdbc.update(""" + INSERT INTO app_settings (app_id, sla_threshold_ms, health_error_warn, + health_error_crit, health_sla_warn, health_sla_crit, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, now(), now()) + ON CONFLICT (app_id) DO UPDATE SET + sla_threshold_ms = EXCLUDED.sla_threshold_ms, + health_error_warn = EXCLUDED.health_error_warn, + health_error_crit = EXCLUDED.health_error_crit, + health_sla_warn = EXCLUDED.health_sla_warn, + health_sla_crit = EXCLUDED.health_sla_crit, + updated_at = now() + """, + settings.appId(), settings.slaThresholdMs(), + settings.healthErrorWarn(), settings.healthErrorCrit(), + settings.healthSlaWarn(), settings.healthSlaCrit()); + return findByAppId(settings.appId()).orElseThrow(); + } + + @Override + public void delete(String appId) { + jdbc.update("DELETE FROM app_settings WHERE app_id = ?", appId); + } +} diff --git a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/storage/PostgresStatsStore.java b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/storage/PostgresStatsStore.java index ec2500dc..ee1b70b9 100644 --- a/cameleer3-server-app/src/main/java/com/cameleer3/server/app/storage/PostgresStatsStore.java +++ b/cameleer3-server-app/src/main/java/com/cameleer3/server/app/storage/PostgresStatsStore.java @@ -3,6 +3,7 @@ package com.cameleer3.server.app.storage; import com.cameleer3.server.core.search.ExecutionStats; import com.cameleer3.server.core.search.StatsTimeseries; import com.cameleer3.server.core.search.StatsTimeseries.TimeseriesBucket; +import com.cameleer3.server.core.search.TopError; import com.cameleer3.server.core.storage.StatsStore; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.stereotype.Repository; @@ -12,7 +13,9 @@ import java.time.Duration; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; @Repository public class PostgresStatsStore implements StatsStore { @@ -184,4 +187,216 @@ public class PostgresStatsStore implements StatsStore { return new StatsTimeseries(buckets); } + + // ── Grouped timeseries ──────────────────────────────────────────────── + + @Override + public Map timeseriesGroupedByApp(Instant from, Instant to, int bucketCount) { + return queryGroupedTimeseries("stats_1m_app", "application_name", from, to, + bucketCount, List.of()); + } + + @Override + public Map timeseriesGroupedByRoute(Instant from, Instant to, + int bucketCount, String applicationName) { + return queryGroupedTimeseries("stats_1m_route", "route_id", from, to, + bucketCount, List.of(new Filter("application_name", applicationName))); + } + + private Map queryGroupedTimeseries( + String view, String groupCol, Instant from, Instant to, + int bucketCount, List filters) { + + long intervalSeconds = Duration.between(from, to).toSeconds() / Math.max(bucketCount, 1); + if (intervalSeconds < 60) intervalSeconds = 60; + + String sql = "SELECT time_bucket(? * INTERVAL '1 second', bucket) AS period, " + + groupCol + " AS group_key, " + + "COALESCE(SUM(total_count), 0) AS total_count, " + + "COALESCE(SUM(failed_count), 0) AS failed_count, " + + "CASE WHEN SUM(total_count) > 0 THEN SUM(duration_sum) / SUM(total_count) ELSE 0 END AS avg_duration, " + + "COALESCE(MAX(p99_duration), 0) AS p99_duration, " + + "COALESCE(SUM(running_count), 0) AS active_count " + + "FROM " + view + " WHERE bucket >= ? AND bucket < ?"; + + List params = new ArrayList<>(); + params.add(intervalSeconds); + params.add(Timestamp.from(from)); + params.add(Timestamp.from(to)); + for (Filter f : filters) { + sql += " AND " + f.column() + " = ?"; + params.add(f.value()); + } + sql += " GROUP BY period, group_key ORDER BY period, group_key"; + + Map> grouped = new LinkedHashMap<>(); + jdbc.query(sql, (rs) -> { + String key = rs.getString("group_key"); + TimeseriesBucket bucket = new TimeseriesBucket( + rs.getTimestamp("period").toInstant(), + rs.getLong("total_count"), rs.getLong("failed_count"), + rs.getLong("avg_duration"), rs.getLong("p99_duration"), + rs.getLong("active_count")); + grouped.computeIfAbsent(key, k -> new ArrayList<>()).add(bucket); + }, params.toArray()); + + Map result = new LinkedHashMap<>(); + grouped.forEach((key, buckets) -> result.put(key, new StatsTimeseries(buckets))); + return result; + } + + // ── SLA compliance ──────────────────────────────────────────────────── + + @Override + public double slaCompliance(Instant from, Instant to, int thresholdMs, + String applicationName, String routeId) { + String sql = "SELECT " + + "COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " + + "COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " + + "FROM executions WHERE start_time >= ? AND start_time < ?"; + + List params = new ArrayList<>(); + params.add(thresholdMs); + params.add(Timestamp.from(from)); + params.add(Timestamp.from(to)); + if (applicationName != null) { + sql += " AND application_name = ?"; + params.add(applicationName); + } + if (routeId != null) { + sql += " AND route_id = ?"; + params.add(routeId); + } + + return jdbc.query(sql, (rs, rowNum) -> { + long total = rs.getLong("total"); + if (total == 0) return 1.0; + return rs.getLong("compliant") * 100.0 / total; + }, params.toArray()).stream().findFirst().orElse(1.0); + } + + @Override + public Map slaCountsByApp(Instant from, Instant to, int defaultThresholdMs) { + String sql = "SELECT application_name, " + + "COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " + + "COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " + + "FROM executions WHERE start_time >= ? AND start_time < ? " + + "GROUP BY application_name"; + + Map result = new LinkedHashMap<>(); + jdbc.query(sql, (rs) -> { + result.put(rs.getString("application_name"), + new long[]{rs.getLong("compliant"), rs.getLong("total")}); + }, defaultThresholdMs, Timestamp.from(from), Timestamp.from(to)); + return result; + } + + @Override + public Map slaCountsByRoute(Instant from, Instant to, + String applicationName, int thresholdMs) { + String sql = "SELECT route_id, " + + "COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " + + "COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " + + "FROM executions WHERE start_time >= ? AND start_time < ? " + + "AND application_name = ? GROUP BY route_id"; + + Map result = new LinkedHashMap<>(); + jdbc.query(sql, (rs) -> { + result.put(rs.getString("route_id"), + new long[]{rs.getLong("compliant"), rs.getLong("total")}); + }, thresholdMs, Timestamp.from(from), Timestamp.from(to), applicationName); + return result; + } + + // ── Top errors ──────────────────────────────────────────────────────── + + @Override + public List topErrors(Instant from, Instant to, String applicationName, + String routeId, int limit) { + StringBuilder where = new StringBuilder( + "status = 'FAILED' AND start_time >= ? AND start_time < ?"); + List params = new ArrayList<>(); + params.add(Timestamp.from(from)); + params.add(Timestamp.from(to)); + if (applicationName != null) { + where.append(" AND application_name = ?"); + params.add(applicationName); + } + + String table; + String groupId; + if (routeId != null) { + // L3: attribute errors to processors + table = "processor_executions"; + groupId = "processor_id"; + where.append(" AND route_id = ?"); + params.add(routeId); + } else { + // L1/L2: attribute errors to routes + table = "executions"; + groupId = "route_id"; + } + + Instant fiveMinAgo = Instant.now().minus(5, ChronoUnit.MINUTES); + Instant tenMinAgo = Instant.now().minus(10, ChronoUnit.MINUTES); + + String sql = "WITH counted AS (" + + " SELECT COALESCE(error_type, LEFT(error_message, 200)) AS error_key, " + + " " + groupId + " AS group_id, " + + " COUNT(*) AS cnt, MAX(start_time) AS last_seen " + + " FROM " + table + " WHERE " + where + + " GROUP BY error_key, group_id ORDER BY cnt DESC LIMIT ?" + + "), velocity AS (" + + " SELECT COALESCE(error_type, LEFT(error_message, 200)) AS error_key, " + + " COUNT(*) FILTER (WHERE start_time >= ?) AS recent_5m, " + + " COUNT(*) FILTER (WHERE start_time >= ? AND start_time < ?) AS prev_5m " + + " FROM " + table + " WHERE " + where + + " GROUP BY error_key" + + ") SELECT c.error_key, c.group_id, c.cnt, c.last_seen, " + + " COALESCE(v.recent_5m, 0) / 5.0 AS velocity, " + + " CASE " + + " WHEN COALESCE(v.recent_5m, 0) > COALESCE(v.prev_5m, 0) * 1.2 THEN 'accelerating' " + + " WHEN COALESCE(v.recent_5m, 0) < COALESCE(v.prev_5m, 0) * 0.8 THEN 'decelerating' " + + " ELSE 'stable' END AS trend " + + "FROM counted c LEFT JOIN velocity v ON c.error_key = v.error_key " + + "ORDER BY c.cnt DESC"; + + // Build full params: counted-where params + limit + velocity timestamps + velocity-where params + List fullParams = new ArrayList<>(params); + fullParams.add(limit); + fullParams.add(Timestamp.from(fiveMinAgo)); + fullParams.add(Timestamp.from(tenMinAgo)); + fullParams.add(Timestamp.from(fiveMinAgo)); + fullParams.addAll(params); // same where clause for velocity CTE + + return jdbc.query(sql, (rs, rowNum) -> { + String errorKey = rs.getString("error_key"); + String gid = rs.getString("group_id"); + return new TopError( + errorKey, + routeId != null ? routeId : gid, // routeId + routeId != null ? gid : null, // processorId (only at L3) + rs.getLong("cnt"), + rs.getDouble("velocity"), + rs.getString("trend"), + rs.getTimestamp("last_seen").toInstant()); + }, fullParams.toArray()); + } + + @Override + public int activeErrorTypes(Instant from, Instant to, String applicationName) { + String sql = "SELECT COUNT(DISTINCT COALESCE(error_type, LEFT(error_message, 200))) " + + "FROM executions WHERE status = 'FAILED' AND start_time >= ? AND start_time < ?"; + + List params = new ArrayList<>(); + params.add(Timestamp.from(from)); + params.add(Timestamp.from(to)); + if (applicationName != null) { + sql += " AND application_name = ?"; + params.add(applicationName); + } + + Integer count = jdbc.queryForObject(sql, Integer.class, params.toArray()); + return count != null ? count : 0; + } } diff --git a/cameleer3-server-app/src/main/resources/db/migration/V12__app_settings.sql b/cameleer3-server-app/src/main/resources/db/migration/V12__app_settings.sql new file mode 100644 index 00000000..50f4c42b --- /dev/null +++ b/cameleer3-server-app/src/main/resources/db/migration/V12__app_settings.sql @@ -0,0 +1,11 @@ +-- Per-application dashboard settings (SLA thresholds, health dot thresholds) +CREATE TABLE app_settings ( + app_id TEXT PRIMARY KEY, + sla_threshold_ms INTEGER NOT NULL DEFAULT 300, + health_error_warn DOUBLE PRECISION NOT NULL DEFAULT 1.0, + health_error_crit DOUBLE PRECISION NOT NULL DEFAULT 5.0, + health_sla_warn DOUBLE PRECISION NOT NULL DEFAULT 99.0, + health_sla_crit DOUBLE PRECISION NOT NULL DEFAULT 95.0, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); diff --git a/cameleer3-server-core/src/main/java/com/cameleer3/server/core/admin/AppSettings.java b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/admin/AppSettings.java new file mode 100644 index 00000000..3be91cff --- /dev/null +++ b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/admin/AppSettings.java @@ -0,0 +1,19 @@ +package com.cameleer3.server.core.admin; + +import java.time.Instant; + +public record AppSettings( + String appId, + int slaThresholdMs, + double healthErrorWarn, + double healthErrorCrit, + double healthSlaWarn, + double healthSlaCrit, + Instant createdAt, + Instant updatedAt) { + + public static AppSettings defaults(String appId) { + Instant now = Instant.now(); + return new AppSettings(appId, 300, 1.0, 5.0, 99.0, 95.0, now, now); + } +} diff --git a/cameleer3-server-core/src/main/java/com/cameleer3/server/core/admin/AppSettingsRepository.java b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/admin/AppSettingsRepository.java new file mode 100644 index 00000000..73aa8130 --- /dev/null +++ b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/admin/AppSettingsRepository.java @@ -0,0 +1,11 @@ +package com.cameleer3.server.core.admin; + +import java.util.List; +import java.util.Optional; + +public interface AppSettingsRepository { + Optional findByAppId(String appId); + List findAll(); + AppSettings save(AppSettings settings); + void delete(String appId); +} diff --git a/cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/ExecutionStats.java b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/ExecutionStats.java index 1579fa62..2c13886d 100644 --- a/cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/ExecutionStats.java +++ b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/ExecutionStats.java @@ -14,4 +14,23 @@ public record ExecutionStats( long prevTotalCount, long prevFailedCount, long prevAvgDurationMs, - long prevP99LatencyMs) {} + long prevP99LatencyMs, + double slaCompliance) { + + /** Constructor without SLA compliance (backward-compatible, sets to -1). */ + public ExecutionStats(long totalCount, long failedCount, long avgDurationMs, + long p99LatencyMs, long activeCount, long totalToday, + long prevTotalCount, long prevFailedCount, + long prevAvgDurationMs, long prevP99LatencyMs) { + this(totalCount, failedCount, avgDurationMs, p99LatencyMs, activeCount, + totalToday, prevTotalCount, prevFailedCount, prevAvgDurationMs, + prevP99LatencyMs, -1.0); + } + + /** Return a copy with the given SLA compliance value. */ + public ExecutionStats withSlaCompliance(double slaCompliance) { + return new ExecutionStats(totalCount, failedCount, avgDurationMs, p99LatencyMs, + activeCount, totalToday, prevTotalCount, prevFailedCount, + prevAvgDurationMs, prevP99LatencyMs, slaCompliance); + } +} diff --git a/cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/SearchService.java b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/SearchService.java index 7389bfe5..556160a1 100644 --- a/cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/SearchService.java +++ b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/SearchService.java @@ -5,6 +5,7 @@ import com.cameleer3.server.core.storage.StatsStore; import java.time.Instant; import java.util.List; +import java.util.Map; public class SearchService { @@ -48,4 +49,38 @@ public class SearchService { String routeId, List agentIds) { return statsStore.timeseriesForRoute(from, to, bucketCount, routeId, agentIds); } + + // ── Dashboard-specific queries ──────────────────────────────────────── + + public Map timeseriesGroupedByApp(Instant from, Instant to, int bucketCount) { + return statsStore.timeseriesGroupedByApp(from, to, bucketCount); + } + + public Map timeseriesGroupedByRoute(Instant from, Instant to, + int bucketCount, String applicationName) { + return statsStore.timeseriesGroupedByRoute(from, to, bucketCount, applicationName); + } + + public double slaCompliance(Instant from, Instant to, int thresholdMs, + String applicationName, String routeId) { + return statsStore.slaCompliance(from, to, thresholdMs, applicationName, routeId); + } + + public Map slaCountsByApp(Instant from, Instant to, int defaultThresholdMs) { + return statsStore.slaCountsByApp(from, to, defaultThresholdMs); + } + + public Map slaCountsByRoute(Instant from, Instant to, + String applicationName, int thresholdMs) { + return statsStore.slaCountsByRoute(from, to, applicationName, thresholdMs); + } + + public List topErrors(Instant from, Instant to, String applicationName, + String routeId, int limit) { + return statsStore.topErrors(from, to, applicationName, routeId, limit); + } + + public int activeErrorTypes(Instant from, Instant to, String applicationName) { + return statsStore.activeErrorTypes(from, to, applicationName); + } } diff --git a/cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/TopError.java b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/TopError.java new file mode 100644 index 00000000..300df75b --- /dev/null +++ b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/search/TopError.java @@ -0,0 +1,12 @@ +package com.cameleer3.server.core.search; + +import java.time.Instant; + +public record TopError( + String errorType, + String routeId, + String processorId, + long count, + double velocity, + String trend, + Instant lastSeen) {} diff --git a/cameleer3-server-core/src/main/java/com/cameleer3/server/core/storage/StatsStore.java b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/storage/StatsStore.java index 8c00b6e8..f3267d81 100644 --- a/cameleer3-server-core/src/main/java/com/cameleer3/server/core/storage/StatsStore.java +++ b/cameleer3-server-core/src/main/java/com/cameleer3/server/core/storage/StatsStore.java @@ -2,9 +2,11 @@ package com.cameleer3.server.core.storage; import com.cameleer3.server.core.search.ExecutionStats; import com.cameleer3.server.core.search.StatsTimeseries; +import com.cameleer3.server.core.search.TopError; import java.time.Instant; import java.util.List; +import java.util.Map; public interface StatsStore { @@ -33,4 +35,29 @@ public interface StatsStore { // Per-processor timeseries StatsTimeseries timeseriesForProcessor(Instant from, Instant to, int bucketCount, String routeId, String processorType); + + // Grouped timeseries by application (for L1 dashboard charts) + Map timeseriesGroupedByApp(Instant from, Instant to, int bucketCount); + + // Grouped timeseries by route within an application (for L2 dashboard charts) + Map timeseriesGroupedByRoute(Instant from, Instant to, int bucketCount, + String applicationName); + + // SLA compliance: % of completed exchanges with duration <= thresholdMs + double slaCompliance(Instant from, Instant to, int thresholdMs, + String applicationName, String routeId); + + // Batch SLA counts by app: {appId -> [compliant, total]} + Map slaCountsByApp(Instant from, Instant to, int defaultThresholdMs); + + // Batch SLA counts by route within an app: {routeId -> [compliant, total]} + Map slaCountsByRoute(Instant from, Instant to, String applicationName, + int thresholdMs); + + // Top N errors with velocity trend + List topErrors(Instant from, Instant to, String applicationName, + String routeId, int limit); + + // Count of distinct error types in window + int activeErrorTypes(Instant from, Instant to, String applicationName); } diff --git a/ui/src/api/queries/dashboard.ts b/ui/src/api/queries/dashboard.ts new file mode 100644 index 00000000..012e13fb --- /dev/null +++ b/ui/src/api/queries/dashboard.ts @@ -0,0 +1,142 @@ +import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'; +import { config } from '../../config'; +import { useAuthStore } from '../../auth/auth-store'; +import { useRefreshInterval } from './use-refresh-interval'; + +function authHeaders() { + const token = useAuthStore.getState().accessToken; + return { + Authorization: `Bearer ${token}`, + 'X-Cameleer-Protocol-Version': '1', + }; +} + +async function fetchJson(path: string, params?: Record): Promise { + const qs = new URLSearchParams(); + if (params) { + for (const [k, v] of Object.entries(params)) { + if (v != null) qs.set(k, v); + } + } + const url = `${config.apiBaseUrl}${path}${qs.toString() ? `?${qs}` : ''}`; + const res = await fetch(url, { headers: authHeaders() }); + if (!res.ok) throw new Error(`Failed to fetch ${path}`); + return res.json(); +} + +// ── Timeseries by app (L1 charts) ───────────────────────────────────── + +export interface TimeseriesBucket { + time: string; + totalCount: number; + failedCount: number; + avgDurationMs: number; + p99DurationMs: number; + activeCount: number; +} + +export interface GroupedTimeseries { + [key: string]: { buckets: TimeseriesBucket[] }; +} + +export function useTimeseriesByApp(from?: string, to?: string) { + const refetchInterval = useRefreshInterval(30_000); + return useQuery({ + queryKey: ['dashboard', 'timeseries-by-app', from, to], + queryFn: () => fetchJson('/search/stats/timeseries/by-app', { + from, to, buckets: '24', + }), + enabled: !!from, + placeholderData: (prev: GroupedTimeseries | undefined) => prev, + refetchInterval, + }); +} + +// ── Timeseries by route (L2 charts) ─────────────────────────────────── + +export function useTimeseriesByRoute(from?: string, to?: string, application?: string) { + const refetchInterval = useRefreshInterval(30_000); + return useQuery({ + queryKey: ['dashboard', 'timeseries-by-route', from, to, application], + queryFn: () => fetchJson('/search/stats/timeseries/by-route', { + from, to, application, buckets: '24', + }), + enabled: !!from && !!application, + placeholderData: (prev: GroupedTimeseries | undefined) => prev, + refetchInterval, + }); +} + +// ── Top errors (L2/L3) ──────────────────────────────────────────────── + +export interface TopError { + errorType: string; + routeId: string | null; + processorId: string | null; + count: number; + velocity: number; + trend: 'accelerating' | 'stable' | 'decelerating'; + lastSeen: string; +} + +export function useTopErrors(from?: string, to?: string, application?: string, routeId?: string) { + const refetchInterval = useRefreshInterval(10_000); + return useQuery({ + queryKey: ['dashboard', 'top-errors', from, to, application, routeId], + queryFn: () => fetchJson('/search/errors/top', { + from, to, application, routeId, limit: '5', + }), + enabled: !!from, + placeholderData: (prev: TopError[] | undefined) => prev, + refetchInterval, + }); +} + +// ── App settings ────────────────────────────────────────────────────── + +export interface AppSettings { + appId: string; + slaThresholdMs: number; + healthErrorWarn: number; + healthErrorCrit: number; + healthSlaWarn: number; + healthSlaCrit: number; + createdAt: string; + updatedAt: string; +} + +export function useAppSettings(appId?: string) { + return useQuery({ + queryKey: ['app-settings', appId], + queryFn: () => fetchJson(`/admin/app-settings/${appId}`), + enabled: !!appId, + staleTime: 60_000, + }); +} + +export function useAllAppSettings() { + return useQuery({ + queryKey: ['app-settings', 'all'], + queryFn: () => fetchJson('/admin/app-settings'), + staleTime: 60_000, + }); +} + +export function useUpdateAppSettings() { + const queryClient = useQueryClient(); + return useMutation({ + mutationFn: async ({ appId, settings }: { appId: string; settings: Omit }) => { + const token = useAuthStore.getState().accessToken; + const res = await fetch(`${config.apiBaseUrl}/admin/app-settings/${appId}`, { + method: 'PUT', + headers: { ...authHeaders(), 'Content-Type': 'application/json' }, + body: JSON.stringify(settings), + }); + if (!res.ok) throw new Error('Failed to update app settings'); + return res.json(); + }, + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ['app-settings'] }); + }, + }); +} diff --git a/ui/src/pages/DashboardTab/DashboardL1.tsx b/ui/src/pages/DashboardTab/DashboardL1.tsx new file mode 100644 index 00000000..a2aaa7a3 --- /dev/null +++ b/ui/src/pages/DashboardTab/DashboardL1.tsx @@ -0,0 +1,442 @@ +import { useMemo } from 'react'; +import { useNavigate } from 'react-router'; +import { + KpiStrip, + DataTable, + AreaChart, + LineChart, + Card, + Sparkline, + MonoText, + StatusDot, + Badge, +} from '@cameleer/design-system'; +import type { KpiItem, Column } from '@cameleer/design-system'; +import { useGlobalFilters } from '@cameleer/design-system'; +import { useRouteMetrics } from '../../api/queries/catalog'; +import { useExecutionStats, useStatsTimeseries } from '../../api/queries/executions'; +import { useTimeseriesByApp, useTopErrors, useAllAppSettings } from '../../api/queries/dashboard'; +import type { AppSettings } from '../../api/queries/dashboard'; +import type { RouteMetrics } from '../../api/types'; +import { + computeHealthDot, + formatThroughput, + formatSlaCompliance, + trendIndicator, + type HealthStatus, +} from './dashboard-utils'; +import styles from './DashboardTab.module.css'; + +// ── Row type for application health table ─────────────────────────────────── + +interface AppRow { + id: string; + appId: string; + health: HealthStatus; + throughput: number; + throughputLabel: string; + successRate: number; + p99DurationMs: number; + slaCompliance: number; + errorCount: number; + sparkline: number[]; +} + +// ── Table columns ─────────────────────────────────────────────────────────── + +const APP_COLUMNS: Column[] = [ + { + key: 'health', + header: '', + render: (_, row) => , + }, + { + key: 'appId', + header: 'Application', + sortable: true, + render: (_, row) => ( + {row.appId} + ), + }, + { + key: 'throughput', + header: 'Throughput', + sortable: true, + render: (_, row) => ( + {row.throughputLabel} + ), + }, + { + key: 'successRate', + header: 'Success %', + sortable: true, + render: (_, row) => { + const pct = row.successRate; + const cls = pct >= 99 ? styles.rateGood : pct >= 97 ? styles.rateWarn : styles.rateBad; + return {pct.toFixed(1)}%; + }, + }, + { + key: 'p99DurationMs', + header: 'P99', + sortable: true, + render: (_, row) => { + const cls = row.p99DurationMs > 300 ? styles.rateBad : row.p99DurationMs > 200 ? styles.rateWarn : styles.rateGood; + return {Math.round(row.p99DurationMs)}ms; + }, + }, + { + key: 'slaCompliance', + header: 'SLA %', + sortable: true, + render: (_, row) => { + const cls = row.slaCompliance >= 99 ? styles.rateGood : row.slaCompliance >= 95 ? styles.rateWarn : styles.rateBad; + return {formatSlaCompliance(row.slaCompliance)}; + }, + }, + { + key: 'errorCount', + header: 'Errors', + sortable: true, + render: (_, row) => { + const cls = row.errorCount > 10 ? styles.rateBad : row.errorCount > 0 ? styles.rateWarn : styles.rateGood; + return {row.errorCount.toLocaleString()}; + }, + }, + { + key: 'sparkline', + header: 'Trend', + render: (_, row) => ( + + ), + }, +]; + +// ── Aggregate RouteMetrics by appId ───────────────────────────────────────── + +function aggregateByApp( + metrics: RouteMetrics[], + windowSeconds: number, + settingsMap: Map, +): AppRow[] { + const grouped = new Map(); + for (const m of metrics) { + const list = grouped.get(m.appId) ?? []; + list.push(m); + grouped.set(m.appId, list); + } + + const rows: AppRow[] = []; + for (const [appId, routes] of grouped) { + const totalExchanges = routes.reduce((s, r) => s + r.exchangeCount, 0); + const totalFailed = routes.reduce((s, r) => s + r.exchangeCount * r.errorRate, 0); + const successRate = totalExchanges > 0 ? ((totalExchanges - totalFailed) / totalExchanges) * 100 : 100; + const errorRate = totalExchanges > 0 ? totalFailed / totalExchanges : 0; + + // Weighted average p99 by exchange count + const p99Sum = routes.reduce((s, r) => s + r.p99DurationMs * r.exchangeCount, 0); + const p99DurationMs = totalExchanges > 0 ? p99Sum / totalExchanges : 0; + + // SLA compliance: weighted average of per-route slaCompliance from backend + const appSettings = settingsMap.get(appId); + const slaWeightedSum = routes.reduce((s, r) => s + (r.slaCompliance ?? 100) * r.exchangeCount, 0); + const slaCompliance = totalExchanges > 0 ? slaWeightedSum / totalExchanges : 100; + + const errorCount = Math.round(totalFailed); + + // Merge sparklines: sum across routes per bucket position + const maxLen = Math.max(...routes.map((r) => (r.sparkline ?? []).length), 0); + const sparkline: number[] = []; + for (let i = 0; i < maxLen; i++) { + sparkline.push(routes.reduce((s, r) => s + ((r.sparkline ?? [])[i] ?? 0), 0)); + } + + rows.push({ + id: appId, + appId, + health: computeHealthDot(errorRate, slaCompliance, appSettings), + throughput: totalExchanges, + throughputLabel: formatThroughput(totalExchanges, windowSeconds), + successRate, + p99DurationMs, + slaCompliance, + errorCount, + sparkline, + }); + } + + return rows.sort((a, b) => { + const order: Record = { error: 0, warning: 1, success: 2 }; + return order[a.health] - order[b.health]; + }); +} + +// ── Build KPI items ───────────────────────────────────────────────────────── + +function buildKpiItems( + stats: { + totalCount: number; + failedCount: number; + p99LatencyMs: number; + prevTotalCount: number; + prevFailedCount: number; + prevP99LatencyMs: number; + } | undefined, + windowSeconds: number, + slaCompliance: number, + activeErrorCount: number, + throughputSparkline: number[], + successSparkline: number[], + latencySparkline: number[], + slaSparkline: number[], + errorSparkline: number[], +): KpiItem[] { + const totalCount = stats?.totalCount ?? 0; + const failedCount = stats?.failedCount ?? 0; + const prevTotalCount = stats?.prevTotalCount ?? 0; + const prevFailedCount = stats?.prevFailedCount ?? 0; + const p99Ms = stats?.p99LatencyMs ?? 0; + const prevP99Ms = stats?.prevP99LatencyMs ?? 0; + + // Throughput + const throughput = windowSeconds > 0 ? totalCount / windowSeconds : 0; + const prevThroughput = windowSeconds > 0 ? prevTotalCount / windowSeconds : 0; + const throughputTrend = trendIndicator(throughput, prevThroughput); + + // Success Rate + const successPct = totalCount > 0 ? ((totalCount - failedCount) / totalCount) * 100 : 100; + const prevSuccessPct = prevTotalCount > 0 + ? ((prevTotalCount - prevFailedCount) / prevTotalCount) * 100 + : 100; + const successTrend = trendIndicator(successPct, prevSuccessPct); + + // P99 Latency + const p99Trend = trendIndicator(p99Ms, prevP99Ms); + + // SLA compliance trend — higher is better, so invert the variant + const slaTrend = trendIndicator(slaCompliance, 100); + + // Active Errors + const prevErrorRate = prevTotalCount > 0 ? (prevFailedCount / prevTotalCount) * 100 : 0; + const currentErrorRate = totalCount > 0 ? (failedCount / totalCount) * 100 : 0; + const errorTrend = trendIndicator(currentErrorRate, prevErrorRate); + + return [ + { + label: 'Throughput', + value: formatThroughput(totalCount, windowSeconds), + trend: { + label: throughputTrend.label, + variant: throughputTrend.direction === 'up' ? 'success' as const : throughputTrend.direction === 'down' ? 'error' as const : 'muted' as const, + }, + subtitle: `${totalCount.toLocaleString()} msg total`, + sparkline: throughputSparkline, + borderColor: 'var(--amber)', + }, + { + label: 'Success Rate', + value: `${successPct.toFixed(1)}%`, + trend: { + label: successTrend.label, + variant: successPct >= 99 ? 'success' as const : successPct >= 97 ? 'warning' as const : 'error' as const, + }, + subtitle: `${(totalCount - failedCount).toLocaleString()} succeeded`, + sparkline: successSparkline, + borderColor: successPct >= 99 ? 'var(--success)' : 'var(--error)', + }, + { + label: 'P99 Latency', + value: `${Math.round(p99Ms)}ms`, + trend: { + label: p99Trend.label, + variant: p99Ms > 300 ? 'error' as const : p99Ms > 200 ? 'warning' as const : 'success' as const, + }, + subtitle: `prev ${Math.round(prevP99Ms)}ms`, + sparkline: latencySparkline, + borderColor: p99Ms > 300 ? 'var(--warning)' : 'var(--success)', + }, + { + label: 'SLA Compliance', + value: formatSlaCompliance(slaCompliance), + trend: { + label: slaTrend.label, + variant: slaCompliance >= 99 ? 'success' as const : slaCompliance >= 95 ? 'warning' as const : 'error' as const, + }, + subtitle: 'P99 within threshold', + sparkline: slaSparkline, + borderColor: slaCompliance >= 99 ? 'var(--success)' : 'var(--warning)', + }, + { + label: 'Active Errors', + value: String(activeErrorCount), + trend: { + label: errorTrend.label, + variant: activeErrorCount === 0 ? 'success' as const : 'error' as const, + }, + subtitle: `${failedCount.toLocaleString()} failures total`, + sparkline: errorSparkline, + borderColor: activeErrorCount === 0 ? 'var(--success)' : 'var(--error)', + }, + ]; +} + +// ── Component ─────────────────────────────────────────────────────────────── + +export default function DashboardL1() { + const navigate = useNavigate(); + const { timeRange } = useGlobalFilters(); + const timeFrom = timeRange.start.toISOString(); + const timeTo = timeRange.end.toISOString(); + const windowSeconds = (timeRange.end.getTime() - timeRange.start.getTime()) / 1000; + + const { data: metrics } = useRouteMetrics(timeFrom, timeTo); + const { data: stats } = useExecutionStats(timeFrom, timeTo); + const { data: timeseries } = useStatsTimeseries(timeFrom, timeTo); + const { data: timeseriesByApp } = useTimeseriesByApp(timeFrom, timeTo); + const { data: topErrors } = useTopErrors(timeFrom, timeTo); + const { data: allAppSettings } = useAllAppSettings(); + + // Build settings lookup map + const settingsMap = useMemo(() => { + const map = new Map(); + for (const s of allAppSettings ?? []) { + map.set(s.appId, s); + } + return map; + }, [allAppSettings]); + + // Aggregate route metrics by appId for the table + const appRows = useMemo( + () => aggregateByApp(metrics ?? [], windowSeconds, settingsMap), + [metrics, windowSeconds, settingsMap], + ); + + // Global SLA compliance from backend stats (exact calculation from executions table) + const globalSlaCompliance = (stats as Record)?.slaCompliance as number ?? -1; + const effectiveSlaCompliance = globalSlaCompliance >= 0 ? globalSlaCompliance : 100; + + // Active error count = distinct error types + const activeErrorCount = useMemo( + () => (topErrors ?? []).length, + [topErrors], + ); + + // KPI sparklines from timeseries buckets + const throughputSparkline = useMemo( + () => (timeseries?.buckets ?? []).map((b) => b.totalCount), + [timeseries], + ); + const successSparkline = useMemo( + () => (timeseries?.buckets ?? []).map((b) => + b.totalCount > 0 ? ((b.totalCount - b.failedCount) / b.totalCount) * 100 : 100, + ), + [timeseries], + ); + const latencySparkline = useMemo( + () => (timeseries?.buckets ?? []).map((b) => b.p99DurationMs), + [timeseries], + ); + const slaSparkline = useMemo( + () => (timeseries?.buckets ?? []).map((b) => + b.p99DurationMs <= 300 ? 100 : 0, + ), + [timeseries], + ); + const errorSparkline = useMemo( + () => (timeseries?.buckets ?? []).map((b) => b.failedCount), + [timeseries], + ); + + const kpiItems = useMemo( + () => buildKpiItems( + stats, + windowSeconds, + effectiveSlaCompliance, + activeErrorCount, + throughputSparkline, + successSparkline, + latencySparkline, + slaSparkline, + errorSparkline, + ), + [stats, windowSeconds, effectiveSlaCompliance, activeErrorCount, + throughputSparkline, successSparkline, latencySparkline, slaSparkline, errorSparkline], + ); + + // ── Per-app chart series (throughput stacked area) ────────────────────── + const throughputByAppSeries = useMemo(() => { + if (!timeseriesByApp) return []; + return Object.entries(timeseriesByApp).map(([appId, { buckets }]) => ({ + label: appId, + data: buckets.map((b, i) => ({ + x: i as number, + y: b.totalCount, + })), + })); + }, [timeseriesByApp]); + + // ── Per-app chart series (error rate line) ───────────────────────────── + const errorRateByAppSeries = useMemo(() => { + if (!timeseriesByApp) return []; + return Object.entries(timeseriesByApp).map(([appId, { buckets }]) => ({ + label: appId, + data: buckets.map((b, i) => ({ + x: i as number, + y: b.totalCount > 0 ? (b.failedCount / b.totalCount) * 100 : 0, + })), + })); + }, [timeseriesByApp]); + + return ( +
+
+ + Auto-refresh: 30s +
+ + {/* KPI header cards */} + + + {/* Application Health table */} +
+
+ Application Health +
+ {appRows.length} applications + +
+
+ navigate(`/dashboard/${row.appId}`)} + /> +
+ + {/* Side-by-side charts */} + {throughputByAppSeries.length > 0 && ( +
+ + + + + + + +
+ )} +
+ ); +} diff --git a/ui/src/pages/DashboardTab/DashboardL2.tsx b/ui/src/pages/DashboardTab/DashboardL2.tsx new file mode 100644 index 00000000..68bc9bbe --- /dev/null +++ b/ui/src/pages/DashboardTab/DashboardL2.tsx @@ -0,0 +1,421 @@ +import { useMemo } from 'react'; +import { useParams, useNavigate } from 'react-router'; +import { + KpiStrip, + DataTable, + AreaChart, + LineChart, + Card, + Sparkline, + MonoText, + Badge, +} from '@cameleer/design-system'; +import type { KpiItem, Column } from '@cameleer/design-system'; +import { useGlobalFilters } from '@cameleer/design-system'; +import { useRouteMetrics } from '../../api/queries/catalog'; +import { useExecutionStats, useStatsTimeseries } from '../../api/queries/executions'; +import { + useTimeseriesByRoute, + useTopErrors, + useAppSettings, +} from '../../api/queries/dashboard'; +import type { TopError } from '../../api/queries/dashboard'; +import type { RouteMetrics } from '../../api/types'; +import { + trendArrow, + trendIndicator, + formatThroughput, + formatSlaCompliance, + formatRelativeTime, +} from './dashboard-utils'; +import styles from './DashboardTab.module.css'; + +// ── Route table row type ──────────────────────────────────────────────────── + +interface RouteRow { + id: string; + routeId: string; + exchangeCount: number; + successRate: number; + avgDurationMs: number; + p99DurationMs: number; + slaCompliance: number; + sparkline: number[]; +} + +// ── Route performance columns ─────────────────────────────────────────────── + +const ROUTE_COLUMNS: Column[] = [ + { + key: 'routeId', + header: 'Route ID', + sortable: true, + render: (_, row) => ( + {row.routeId} + ), + }, + { + key: 'exchangeCount', + header: 'Throughput', + sortable: true, + render: (_, row) => ( + {row.exchangeCount.toLocaleString()} + ), + }, + { + key: 'successRate', + header: 'Success%', + sortable: true, + render: (_, row) => { + const pct = row.successRate * 100; + const cls = pct >= 99 ? styles.rateGood : pct >= 97 ? styles.rateWarn : styles.rateBad; + return {pct.toFixed(1)}%; + }, + }, + { + key: 'avgDurationMs', + header: 'Avg(ms)', + sortable: true, + render: (_, row) => ( + {Math.round(row.avgDurationMs)} + ), + }, + { + key: 'p99DurationMs', + header: 'P99(ms)', + sortable: true, + render: (_, row) => { + const cls = row.p99DurationMs > 300 ? styles.rateBad : row.p99DurationMs > 200 ? styles.rateWarn : styles.rateGood; + return {Math.round(row.p99DurationMs)}; + }, + }, + { + key: 'slaCompliance', + header: 'SLA%', + sortable: true, + render: (_, row) => { + const cls = row.slaCompliance >= 99 ? styles.rateGood : row.slaCompliance >= 95 ? styles.rateWarn : styles.rateBad; + return {formatSlaCompliance(row.slaCompliance)}; + }, + }, + { + key: 'sparkline', + header: 'Sparkline', + render: (_, row) => ( + + ), + }, +]; + +// ── Top errors columns ────────────────────────────────────────────────────── + +const ERROR_COLUMNS: Column[] = [ + { + key: 'errorType', + header: 'Error Type', + sortable: true, + render: (_, row) => ( + {row.errorType} + ), + }, + { + key: 'routeId', + header: 'Route', + sortable: true, + render: (_, row) => ( + {row.routeId ?? '\u2014'} + ), + }, + { + key: 'count', + header: 'Count', + sortable: true, + render: (_, row) => ( + {row.count.toLocaleString()} + ), + }, + { + key: 'velocity', + header: 'Velocity', + sortable: true, + render: (_, row) => { + const arrow = trendArrow(row.trend); + const cls = row.trend === 'accelerating' ? styles.rateBad + : row.trend === 'decelerating' ? styles.rateGood + : styles.rateNeutral; + return {row.velocity.toFixed(1)}/min {arrow}; + }, + }, + { + key: 'lastSeen', + header: 'Last Seen', + sortable: true, + render: (_, row) => ( + {formatRelativeTime(row.lastSeen)} + ), + }, +]; + +// ── Build KPI items ───────────────────────────────────────────────────────── + +function buildKpiItems( + stats: { + totalCount: number; + failedCount: number; + p99LatencyMs: number; + prevTotalCount: number; + prevFailedCount: number; + prevP99LatencyMs: number; + } | undefined, + slaThresholdMs: number, + throughputSparkline: number[], + latencySparkline: number[], + errors: TopError[] | undefined, + windowSeconds: number, +): KpiItem[] { + const totalCount = stats?.totalCount ?? 0; + const failedCount = stats?.failedCount ?? 0; + const prevTotalCount = stats?.prevTotalCount ?? 0; + const prevFailedCount = stats?.prevFailedCount ?? 0; + const p99Ms = stats?.p99LatencyMs ?? 0; + const prevP99Ms = stats?.prevP99LatencyMs ?? 0; + + // Throughput + const throughputTrend = trendIndicator(totalCount, prevTotalCount); + + // Success Rate + const successRate = totalCount > 0 ? ((totalCount - failedCount) / totalCount) * 100 : 100; + const prevSuccessRate = prevTotalCount > 0 ? ((prevTotalCount - prevFailedCount) / prevTotalCount) * 100 : 100; + const successTrend = trendIndicator(successRate, prevSuccessRate); + + // P99 Latency + const latencyTrend = trendIndicator(p99Ms, prevP99Ms); + + // SLA Compliance — percentage of exchanges under threshold + // Approximate from p99: if p99 < threshold, ~99%+ are compliant + const slaCompliance = p99Ms <= slaThresholdMs ? 99.9 : Math.max(0, 100 - ((p99Ms - slaThresholdMs) / slaThresholdMs) * 10); + + // Error Velocity — aggregate from top errors + const errorList = errors ?? []; + const totalVelocity = errorList.reduce((sum, e) => sum + e.velocity, 0); + const hasAccelerating = errorList.some((e) => e.trend === 'accelerating'); + const allDecelerating = errorList.length > 0 && errorList.every((e) => e.trend === 'decelerating'); + const velocityTrendLabel = hasAccelerating ? '\u25B2' : allDecelerating ? '\u25BC' : '\u2500\u2500'; + const velocityVariant = hasAccelerating ? 'error' as const : allDecelerating ? 'success' as const : 'muted' as const; + + return [ + { + label: 'Throughput', + value: formatThroughput(totalCount, windowSeconds), + trend: { + label: throughputTrend.label, + variant: throughputTrend.direction === 'up' ? 'success' as const : throughputTrend.direction === 'down' ? 'error' as const : 'muted' as const, + }, + sparkline: throughputSparkline, + borderColor: 'var(--amber)', + }, + { + label: 'Success Rate', + value: `${successRate.toFixed(2)}%`, + trend: { + label: successTrend.label, + variant: successTrend.direction === 'up' ? 'success' as const : successTrend.direction === 'down' ? 'error' as const : 'muted' as const, + }, + borderColor: successRate >= 99 ? 'var(--success)' : successRate >= 95 ? 'var(--warning)' : 'var(--error)', + }, + { + label: 'P99 Latency', + value: `${Math.round(p99Ms)}ms`, + trend: { + label: latencyTrend.label, + variant: latencyTrend.direction === 'up' ? 'error' as const : latencyTrend.direction === 'down' ? 'success' as const : 'muted' as const, + }, + sparkline: latencySparkline, + borderColor: p99Ms > slaThresholdMs ? 'var(--error)' : 'var(--success)', + }, + { + label: 'SLA Compliance', + value: formatSlaCompliance(slaCompliance), + trend: { + label: slaCompliance >= 99 ? 'OK' : 'BREACH', + variant: slaCompliance >= 99 ? 'success' as const : 'error' as const, + }, + subtitle: `Threshold: ${slaThresholdMs}ms`, + borderColor: slaCompliance >= 99 ? 'var(--success)' : slaCompliance >= 95 ? 'var(--warning)' : 'var(--error)', + }, + { + label: 'Error Velocity', + value: `${totalVelocity.toFixed(1)}/min`, + trend: { + label: velocityTrendLabel, + variant: velocityVariant, + }, + subtitle: `${errorList.length} error type${errorList.length !== 1 ? 's' : ''} tracked`, + borderColor: hasAccelerating ? 'var(--error)' : allDecelerating ? 'var(--success)' : 'var(--text-muted)', + }, + ]; +} + +// ── Component ─────────────────────────────────────────────────────────────── + +export default function DashboardL2() { + const { appId } = useParams<{ appId: string }>(); + const navigate = useNavigate(); + const { timeRange } = useGlobalFilters(); + const timeFrom = timeRange.start.toISOString(); + const timeTo = timeRange.end.toISOString(); + const windowSeconds = (timeRange.end.getTime() - timeRange.start.getTime()) / 1000; + + // Data hooks + const { data: stats } = useExecutionStats(timeFrom, timeTo, undefined, appId); + const { data: timeseries } = useStatsTimeseries(timeFrom, timeTo, undefined, appId); + const { data: metrics } = useRouteMetrics(timeFrom, timeTo, appId); + const { data: timeseriesByRoute } = useTimeseriesByRoute(timeFrom, timeTo, appId); + const { data: errors } = useTopErrors(timeFrom, timeTo, appId); + const { data: appSettings } = useAppSettings(appId); + + const slaThresholdMs = appSettings?.slaThresholdMs ?? 300; + + // Route performance table rows + const routeRows: RouteRow[] = useMemo(() => + (metrics || []).map((m: RouteMetrics) => { + const sla = m.p99DurationMs <= slaThresholdMs + ? 99.9 + : Math.max(0, 100 - ((m.p99DurationMs - slaThresholdMs) / slaThresholdMs) * 10); + return { + id: m.routeId, + routeId: m.routeId, + exchangeCount: m.exchangeCount, + successRate: m.successRate, + avgDurationMs: m.avgDurationMs, + p99DurationMs: m.p99DurationMs, + slaCompliance: sla, + sparkline: m.sparkline ?? [], + }; + }), + [metrics, slaThresholdMs], + ); + + // KPI sparklines from timeseries + const throughputSparkline = useMemo(() => + (timeseries?.buckets || []).map((b) => b.totalCount), + [timeseries], + ); + const latencySparkline = useMemo(() => + (timeseries?.buckets || []).map((b) => b.p99DurationMs), + [timeseries], + ); + + const kpiItems = useMemo(() => + buildKpiItems(stats, slaThresholdMs, throughputSparkline, latencySparkline, errors, windowSeconds), + [stats, slaThresholdMs, throughputSparkline, latencySparkline, errors, windowSeconds], + ); + + // Throughput by Route — stacked area chart series + const throughputByRouteSeries = useMemo(() => { + if (!timeseriesByRoute) return []; + return Object.entries(timeseriesByRoute).map(([routeId, data]) => ({ + label: routeId, + data: (data.buckets || []).map((b, i) => ({ + x: i as number, + y: b.totalCount, + })), + })); + }, [timeseriesByRoute]); + + // Latency percentiles chart — P99 line from app-level timeseries + const latencyChartSeries = useMemo(() => { + const buckets = timeseries?.buckets || []; + return [ + { + label: 'P99', + data: buckets.map((b, i) => ({ + x: i as number, + y: b.p99DurationMs, + })), + }, + { + label: 'Avg', + data: buckets.map((b, i) => ({ + x: i as number, + y: b.avgDurationMs, + })), + }, + ]; + }, [timeseries]); + + // Error rows with stable identity + const errorRows = useMemo(() => + (errors ?? []).map((e, i) => ({ ...e, id: `${e.errorType}-${e.routeId}-${i}` })), + [errors], + ); + + return ( +
+
+ + Auto-refresh: 30s +
+ + {/* KPI Strip */} + + + {/* Route Performance Table */} +
+
+ Route Performance +
+ {routeRows.length} routes + +
+
+ navigate(`/dashboard/${appId}/${row.routeId}`)} + /> +
+ + {/* Charts: Throughput by Route + Latency Percentiles */} + {(timeseries?.buckets?.length ?? 0) > 0 && ( +
+ + + + + + + +
+ )} + + {/* Top 5 Errors — hidden when empty */} + {errorRows.length > 0 && ( +
+
+ Top Errors + {errorRows.length} error types +
+ +
+ )} +
+ ); +} diff --git a/ui/src/pages/DashboardTab/DashboardL3.tsx b/ui/src/pages/DashboardTab/DashboardL3.tsx new file mode 100644 index 00000000..83c94b75 --- /dev/null +++ b/ui/src/pages/DashboardTab/DashboardL3.tsx @@ -0,0 +1,434 @@ +import { useMemo } from 'react'; +import { useParams } from 'react-router'; +import { + KpiStrip, + DataTable, + AreaChart, + LineChart, + Card, + MonoText, + Badge, +} from '@cameleer/design-system'; +import type { KpiItem, Column } from '@cameleer/design-system'; +import { useGlobalFilters } from '@cameleer/design-system'; +import { useExecutionStats, useStatsTimeseries } from '../../api/queries/executions'; +import { useProcessorMetrics } from '../../api/queries/processor-metrics'; +import { useTopErrors, useAppSettings } from '../../api/queries/dashboard'; +import type { TopError } from '../../api/queries/dashboard'; +import { useDiagramByRoute } from '../../api/queries/diagrams'; +import { ProcessDiagram } from '../../components/ProcessDiagram'; +import { + formatRelativeTime, + trendArrow, + formatThroughput, + formatSlaCompliance, + trendIndicator, +} from './dashboard-utils'; +import styles from './DashboardTab.module.css'; + +// ── Row types ─────────────────────────────────────────────────────────────── + +interface ProcessorRow { + id: string; + processorId: string; + processorType: string; + totalCount: number; + avgDurationMs: number; + p99DurationMs: number; + errorRate: number; + pctTime: number; +} + +interface ErrorRow extends TopError { + id: string; +} + +// ── Processor table columns ───────────────────────────────────────────────── + +const PROCESSOR_COLUMNS: Column[] = [ + { + key: 'processorId', + header: 'Processor ID', + sortable: true, + render: (_, row) => {row.processorId}, + }, + { + key: 'processorType', + header: 'Type', + sortable: true, + render: (_, row) => , + }, + { + key: 'totalCount', + header: 'Invocations', + sortable: true, + render: (_, row) => ( + {row.totalCount.toLocaleString()} + ), + }, + { + key: 'avgDurationMs', + header: 'Avg(ms)', + sortable: true, + render: (_, row) => ( + {Math.round(row.avgDurationMs)} + ), + }, + { + key: 'p99DurationMs', + header: 'P99(ms)', + sortable: true, + render: (_, row) => { + const cls = row.p99DurationMs > 300 + ? styles.rateBad + : row.p99DurationMs > 200 + ? styles.rateWarn + : styles.rateGood; + return {Math.round(row.p99DurationMs)}; + }, + }, + { + key: 'errorRate', + header: 'Error Rate(%)', + sortable: true, + render: (_, row) => { + const pct = row.errorRate * 100; + const cls = pct > 5 ? styles.rateBad : pct > 1 ? styles.rateWarn : styles.rateGood; + return {pct.toFixed(2)}%; + }, + }, + { + key: 'pctTime', + header: '% Time', + sortable: true, + render: (_, row) => ( + {row.pctTime.toFixed(1)}% + ), + }, +]; + +// ── Error table columns ───────────────────────────────────────────────────── + +const ERROR_COLUMNS: Column[] = [ + { + key: 'errorType', + header: 'Error Type', + sortable: true, + render: (_, row) => {row.errorType}, + }, + { + key: 'processorId', + header: 'Processor', + sortable: true, + render: (_, row) => ( + {row.processorId ?? '\u2014'} + ), + }, + { + key: 'count', + header: 'Count', + sortable: true, + render: (_, row) => ( + {row.count.toLocaleString()} + ), + }, + { + key: 'trend', + header: 'Velocity', + render: (_, row) => ( + {trendArrow(row.trend)} {row.trend} + ), + }, + { + key: 'lastSeen', + header: 'Last Seen', + sortable: true, + render: (_, row) => ( + {formatRelativeTime(row.lastSeen)} + ), + }, +]; + +// ── Build KPI items ───────────────────────────────────────────────────────── + +function buildKpiItems( + stats: { + totalCount: number; + failedCount: number; + avgDurationMs: number; + p99LatencyMs: number; + activeCount: number; + prevTotalCount: number; + prevFailedCount: number; + prevP99LatencyMs: number; + } | undefined, + slaThresholdMs: number, + bottleneck: { processorId: string; avgMs: number; pct: number } | null, + throughputSparkline: number[], + windowSeconds: number, +): KpiItem[] { + const totalCount = stats?.totalCount ?? 0; + const failedCount = stats?.failedCount ?? 0; + const prevTotalCount = stats?.prevTotalCount ?? 0; + const p99Ms = stats?.p99LatencyMs ?? 0; + const avgMs = stats?.avgDurationMs ?? 0; + + const successRate = totalCount > 0 ? ((totalCount - failedCount) / totalCount) * 100 : 100; + const slaCompliance = totalCount > 0 + ? ((totalCount - failedCount) / totalCount) * 100 + : 100; + + const throughputTrend = trendIndicator(totalCount, prevTotalCount); + + return [ + { + label: 'Throughput', + value: formatThroughput(totalCount, windowSeconds), + trend: { + label: throughputTrend.label, + variant: throughputTrend.direction === 'up' ? 'success' as const : throughputTrend.direction === 'down' ? 'error' as const : 'muted' as const, + }, + subtitle: `${totalCount.toLocaleString()} total exchanges`, + sparkline: throughputSparkline, + borderColor: 'var(--amber)', + }, + { + label: 'Success Rate', + value: `${successRate.toFixed(2)}%`, + trend: { + label: failedCount > 0 ? `${failedCount} failed` : 'No errors', + variant: successRate >= 99 ? 'success' as const : successRate >= 97 ? 'warning' as const : 'error' as const, + }, + subtitle: `${totalCount - failedCount} succeeded / ${totalCount.toLocaleString()} total`, + borderColor: successRate >= 99 ? 'var(--success)' : 'var(--error)', + }, + { + label: 'P99 Latency', + value: `${Math.round(p99Ms)}ms`, + trend: { + label: p99Ms > slaThresholdMs ? 'BREACH' : 'OK', + variant: p99Ms > slaThresholdMs ? 'error' as const : 'success' as const, + }, + subtitle: `SLA threshold: ${slaThresholdMs}ms \u00B7 Avg: ${Math.round(avgMs)}ms`, + borderColor: p99Ms > slaThresholdMs ? 'var(--warning)' : 'var(--success)', + }, + { + label: 'SLA Compliance', + value: formatSlaCompliance(slaCompliance), + trend: { + label: slaCompliance >= 99.9 ? 'Excellent' : slaCompliance >= 99 ? 'Good' : 'Degraded', + variant: slaCompliance >= 99 ? 'success' as const : slaCompliance >= 95 ? 'warning' as const : 'error' as const, + }, + subtitle: `Target: 99.9%`, + borderColor: slaCompliance >= 99 ? 'var(--success)' : 'var(--warning)', + }, + { + label: 'Bottleneck', + value: bottleneck ? `${Math.round(bottleneck.avgMs)}ms` : '\u2014', + trend: { + label: bottleneck ? `${bottleneck.pct.toFixed(1)}% of total` : '\u2014', + variant: bottleneck && bottleneck.pct > 50 ? 'error' as const : 'muted' as const, + }, + subtitle: bottleneck + ? `${bottleneck.processorId} \u00B7 ${Math.round(bottleneck.avgMs)}ms \u00B7 ${bottleneck.pct.toFixed(1)}% of total` + : 'No processor data', + borderColor: 'var(--running)', + }, + ]; +} + +// ── Component ─────────────────────────────────────────────────────────────── + +export default function DashboardL3() { + const { appId, routeId } = useParams<{ appId: string; routeId: string }>(); + const { timeRange } = useGlobalFilters(); + const timeFrom = timeRange.start.toISOString(); + const timeTo = timeRange.end.toISOString(); + const windowSeconds = (timeRange.end.getTime() - timeRange.start.getTime()) / 1000; + + // ── Data hooks ────────────────────────────────────────────────────────── + const { data: stats } = useExecutionStats(timeFrom, timeTo, routeId, appId); + const { data: timeseries } = useStatsTimeseries(timeFrom, timeTo, routeId, appId); + const { data: processorMetrics } = useProcessorMetrics(routeId ?? null, appId); + const { data: topErrors } = useTopErrors(timeFrom, timeTo, appId, routeId); + const { data: diagramLayout } = useDiagramByRoute(appId, routeId); + const { data: appSettings } = useAppSettings(appId); + + const slaThresholdMs = appSettings?.slaThresholdMs ?? 300; + + // ── Bottleneck (processor with highest avgDurationMs) ─────────────────── + const bottleneck = useMemo(() => { + if (!processorMetrics?.length) return null; + const routeAvg = stats?.avgDurationMs ?? 0; + const sorted = [...processorMetrics].sort( + (a: any, b: any) => b.avgDurationMs - a.avgDurationMs, + ); + const top = sorted[0]; + const pct = routeAvg > 0 ? (top.avgDurationMs / routeAvg) * 100 : 0; + return { processorId: top.processorId, avgMs: top.avgDurationMs, pct }; + }, [processorMetrics, stats]); + + // ── Sparklines from timeseries ────────────────────────────────────────── + const throughputSparkline = useMemo( + () => (timeseries?.buckets || []).map((b: any) => b.totalCount), + [timeseries], + ); + + // ── KPI strip ─────────────────────────────────────────────────────────── + const kpiItems = useMemo( + () => buildKpiItems(stats, slaThresholdMs, bottleneck, throughputSparkline, windowSeconds), + [stats, slaThresholdMs, bottleneck, throughputSparkline, windowSeconds], + ); + + // ── Chart series ──────────────────────────────────────────────────────── + const throughputChartSeries = useMemo(() => [{ + label: 'Throughput', + data: (timeseries?.buckets || []).map((b: any, i: number) => ({ + x: i, + y: b.totalCount, + })), + }], [timeseries]); + + const latencyChartSeries = useMemo(() => [{ + label: 'P99', + data: (timeseries?.buckets || []).map((b: any, i: number) => ({ + x: i, + y: b.p99DurationMs, + })), + }], [timeseries]); + + const errorRateChartSeries = useMemo(() => [{ + label: 'Error Rate', + data: (timeseries?.buckets || []).map((b: any, i: number) => ({ + x: i, + y: b.totalCount > 0 ? (b.failedCount / b.totalCount) * 100 : 0, + })), + color: 'var(--error)', + }], [timeseries]); + + // ── Processor table rows ──────────────────────────────────────────────── + const processorRows: ProcessorRow[] = useMemo(() => { + if (!processorMetrics?.length) return []; + const routeAvg = stats?.avgDurationMs ?? 0; + return processorMetrics.map((m: any) => ({ + id: m.processorId, + processorId: m.processorId, + processorType: m.processorType, + totalCount: m.totalCount, + avgDurationMs: m.avgDurationMs, + p99DurationMs: m.p99DurationMs, + errorRate: m.errorRate, + pctTime: routeAvg > 0 ? (m.avgDurationMs / routeAvg) * 100 : 0, + })); + }, [processorMetrics, stats]); + + // ── Latency heatmap for ProcessDiagram ────────────────────────────────── + const latencyHeatmap = useMemo(() => { + if (!processorMetrics?.length) return new Map(); + const totalAvg = processorMetrics.reduce( + (sum: number, m: any) => sum + m.avgDurationMs, 0, + ); + const map = new Map(); + for (const m of processorMetrics) { + map.set(m.processorId, { + avgDurationMs: m.avgDurationMs, + p99DurationMs: m.p99DurationMs, + pctOfRoute: totalAvg > 0 ? (m.avgDurationMs / totalAvg) * 100 : 0, + }); + } + return map; + }, [processorMetrics]); + + // ── Error table rows ──────────────────────────────────────────────────── + const errorRows: ErrorRow[] = useMemo( + () => (topErrors || []).map((e, i) => ({ ...e, id: `${e.errorType}-${i}` })), + [topErrors], + ); + + return ( +
+
+ + Auto-refresh: 30s +
+ + {/* KPI Strip */} + + + {/* Charts — 3 in a row */} + {(timeseries?.buckets?.length ?? 0) > 0 && ( +
+ + + + + + + + + + + +
+ )} + + {/* Process Diagram with Latency Heatmap */} + {appId && routeId && ( +
+ +
+ )} + + {/* Processor Metrics Table */} +
+
+ Processor Metrics +
+ + {processorRows.length} processor{processorRows.length !== 1 ? 's' : ''} + +
+
+ +
+ + {/* Top 5 Errors — hidden if empty */} + {errorRows.length > 0 && ( +
+
+ Top 5 Errors + +
+ +
+ )} +
+ ); +} diff --git a/ui/src/pages/DashboardTab/DashboardPage.tsx b/ui/src/pages/DashboardTab/DashboardPage.tsx index ed6dcf0c..7c3e7908 100644 --- a/ui/src/pages/DashboardTab/DashboardPage.tsx +++ b/ui/src/pages/DashboardTab/DashboardPage.tsx @@ -2,16 +2,20 @@ import { useParams } from 'react-router'; import { lazy, Suspense } from 'react'; import { Spinner } from '@cameleer/design-system'; -const RoutesMetrics = lazy(() => import('../Routes/RoutesMetrics')); -const RouteDetail = lazy(() => import('../Routes/RouteDetail')); +const DashboardL1 = lazy(() => import('./DashboardL1')); +const DashboardL2 = lazy(() => import('./DashboardL2')); +const DashboardL3 = lazy(() => import('./DashboardL3')); const Fallback =
; export default function DashboardPage() { - const { routeId } = useParams<{ appId?: string; routeId?: string }>(); + const { appId, routeId } = useParams<{ appId?: string; routeId?: string }>(); - if (routeId) { - return ; + if (routeId && appId) { + return ; } - return ; + if (appId) { + return ; + } + return ; } diff --git a/ui/src/pages/DashboardTab/DashboardTab.module.css b/ui/src/pages/DashboardTab/DashboardTab.module.css new file mode 100644 index 00000000..0687f310 --- /dev/null +++ b/ui/src/pages/DashboardTab/DashboardTab.module.css @@ -0,0 +1,133 @@ +.content { + display: flex; + flex-direction: column; + gap: 20px; +} + +.refreshIndicator { + display: flex; + align-items: center; + gap: 6px; + justify-content: flex-end; +} + +.refreshDot { + width: 7px; + height: 7px; + border-radius: 50%; + background: var(--success); + box-shadow: 0 0 4px rgba(61, 124, 71, 0.5); + animation: pulse 2s ease-in-out infinite; +} + +@keyframes pulse { + 0%, 100% { opacity: 1; } + 50% { opacity: 0.5; } +} + +.refreshText { + font-size: 11px; + color: var(--text-muted); + font-family: var(--font-mono); +} + +/* Tables */ +.tableSection { + background: var(--bg-surface); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-lg); + box-shadow: var(--shadow-card); + overflow: hidden; +} + +.tableHeader { + display: flex; + align-items: center; + justify-content: space-between; + padding: 12px 16px; + border-bottom: 1px solid var(--border-subtle); +} + +.tableTitle { + font-size: 13px; + font-weight: 600; + color: var(--text-primary); +} + +.tableMeta { + font-size: 11px; + color: var(--text-muted); + font-family: var(--font-mono); +} + +/* Charts */ +.chartGrid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 16px; +} + +.chartRow { + display: grid; + grid-template-columns: 1fr 1fr 1fr; + gap: 16px; +} + +/* Cells */ +.monoCell { + font-size: 12px; + font-family: var(--font-mono); + color: var(--text-primary); +} + +.appNameCell { + display: flex; + align-items: center; + gap: 8px; + font-size: 12px; + font-weight: 500; + color: var(--text-primary); + font-family: var(--font-mono); + cursor: pointer; +} + +.appNameCell:hover { + text-decoration: underline; +} + +/* Rate coloring */ +.rateGood { color: var(--success); } +.rateWarn { color: var(--warning); } +.rateBad { color: var(--error); } +.rateNeutral { color: var(--text-secondary); } + +/* Diagram container */ +.diagramSection { + background: var(--bg-surface); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-lg); + box-shadow: var(--shadow-card); + overflow: hidden; + height: 280px; +} + +/* Table right side (meta + badge) */ +.tableRight { + display: flex; + align-items: center; + gap: 10px; +} + +/* Chart fill */ +.chart { + width: 100%; +} + +/* Errors section */ +.errorsSection { + background: var(--bg-surface); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-lg); + box-shadow: var(--shadow-card); + overflow: hidden; +} diff --git a/ui/src/pages/DashboardTab/dashboard-utils.ts b/ui/src/pages/DashboardTab/dashboard-utils.ts new file mode 100644 index 00000000..b493a29f --- /dev/null +++ b/ui/src/pages/DashboardTab/dashboard-utils.ts @@ -0,0 +1,70 @@ +import type { AppSettings } from '../../api/queries/dashboard'; + +export type HealthStatus = 'success' | 'warning' | 'error'; + +const DEFAULT_SETTINGS: Pick = { + healthErrorWarn: 1.0, + healthErrorCrit: 5.0, + healthSlaWarn: 99.0, + healthSlaCrit: 95.0, +}; + +export function computeHealthDot( + errorRate: number, + slaCompliance: number, + settings?: Partial | null, +): HealthStatus { + const s = { ...DEFAULT_SETTINGS, ...settings }; + const errorPct = errorRate * 100; + + if (errorPct > s.healthErrorCrit || slaCompliance < s.healthSlaCrit) return 'error'; + if (errorPct > s.healthErrorWarn || slaCompliance < s.healthSlaWarn) return 'warning'; + return 'success'; +} + +export function formatThroughput(count: number, windowSeconds: number): string { + if (windowSeconds <= 0) return '0/s'; + const tps = count / windowSeconds; + if (tps >= 1000) return `${(tps / 1000).toFixed(1)}k/s`; + if (tps >= 1) return `${tps.toFixed(0)}/s`; + return `${tps.toFixed(2)}/s`; +} + +export function formatSlaCompliance(pct: number): string { + if (pct < 0) return '—'; + return `${pct.toFixed(1)}%`; +} + +export function trendIndicator(current: number, previous: number): { label: string; direction: 'up' | 'down' | 'flat' } { + if (previous === 0) return { label: '—', direction: 'flat' }; + const delta = ((current - previous) / previous) * 100; + if (Math.abs(delta) < 0.5) return { label: '—', direction: 'flat' }; + return { + label: `${delta > 0 ? '+' : ''}${delta.toFixed(1)}%`, + direction: delta > 0 ? 'up' : 'down', + }; +} + +export function trendArrow(trend: 'accelerating' | 'stable' | 'decelerating'): string { + switch (trend) { + case 'accelerating': return '\u25B2'; + case 'decelerating': return '\u25BC'; + default: return '\u2500\u2500'; + } +} + +export function formatDuration(ms: number): string { + if (ms < 1) return '<1ms'; + if (ms < 1000) return `${Math.round(ms)}ms`; + return `${(ms / 1000).toFixed(2)}s`; +} + +export function formatRelativeTime(isoString: string): string { + const diff = Date.now() - new Date(isoString).getTime(); + const minutes = Math.floor(diff / 60_000); + if (minutes < 1) return 'just now'; + if (minutes < 60) return `${minutes} min ago`; + const hours = Math.floor(minutes / 60); + if (hours < 24) return `${hours} hr ago`; + return `${Math.floor(hours / 24)} d ago`; +}