Compare commits

2 Commits

Author SHA1 Message Date
hsiegeln
b5c19b6774 feat: latency heatmap overlay on process diagram (#94)
Some checks failed
CI / cleanup-branch (push) Has been skipped
CI / build (push) Failing after 29s
CI / docker (push) Has been skipped
CI / deploy (push) Has been skipped
CI / deploy-feature (push) Has been skipped
SonarQube / sonarqube (push) Failing after 1m10s
Add latencyHeatmap prop to ProcessDiagram that colors nodes green→yellow→red
based on their relative contribution to route latency (pctOfRoute). Shows avg
duration label on each node. Threaded through CompoundNode for nested EIP
patterns. Heatmap is active only when no execution overlay is present.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-29 23:32:42 +02:00
hsiegeln
213aa86c47 feat: progressive drill-down dashboard with RED metrics and SLA compliance (#94)
Three-level dashboard driven by sidebar selection:
- L1 (no selection): all-apps overview with health table, per-app charts
- L2 (app selected): route performance table, error velocity, top errors
- L3 (route selected): processor table, latency heatmap data, bottleneck KPI

Backend: 3 new endpoints (timeseries/by-app, timeseries/by-route, errors/top),
per-app SLA settings (app_settings table, V12 migration), exact SLA compliance
from executions hypertable, error velocity with acceleration detection.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-29 23:29:20 +02:00
25 changed files with 2348 additions and 25 deletions

View File

@@ -0,0 +1,79 @@
package com.cameleer3.server.app.controller;
import com.cameleer3.server.app.dto.AppSettingsRequest;
import com.cameleer3.server.core.admin.AppSettings;
import com.cameleer3.server.core.admin.AppSettingsRepository;
import com.cameleer3.server.core.admin.AuditCategory;
import com.cameleer3.server.core.admin.AuditResult;
import com.cameleer3.server.core.admin.AuditService;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.validation.Valid;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.web.bind.annotation.DeleteMapping;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PutMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.server.ResponseStatusException;
import java.util.List;
import java.util.Map;
@RestController
@RequestMapping("/api/v1/admin/app-settings")
@PreAuthorize("hasAnyRole('ADMIN', 'OPERATOR')")
@Tag(name = "App Settings", description = "Per-application dashboard settings (ADMIN/OPERATOR)")
public class AppSettingsController {
private final AppSettingsRepository repository;
private final AuditService auditService;
public AppSettingsController(AppSettingsRepository repository, AuditService auditService) {
this.repository = repository;
this.auditService = auditService;
}
@GetMapping
@Operation(summary = "List all application settings")
public ResponseEntity<List<AppSettings>> getAll() {
return ResponseEntity.ok(repository.findAll());
}
@GetMapping("/{appId}")
@Operation(summary = "Get settings for a specific application (returns defaults if not configured)")
public ResponseEntity<AppSettings> getByAppId(@PathVariable String appId) {
AppSettings settings = repository.findByAppId(appId).orElse(AppSettings.defaults(appId));
return ResponseEntity.ok(settings);
}
@PutMapping("/{appId}")
@Operation(summary = "Create or update settings for an application")
public ResponseEntity<AppSettings> update(@PathVariable String appId,
@Valid @RequestBody AppSettingsRequest request,
HttpServletRequest httpRequest) {
List<String> errors = request.validate();
if (!errors.isEmpty()) {
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, String.join("; ", errors));
}
AppSettings saved = repository.save(request.toSettings(appId));
auditService.log("update_app_settings", AuditCategory.CONFIG, appId,
Map.of("settings", saved), AuditResult.SUCCESS, httpRequest);
return ResponseEntity.ok(saved);
}
@DeleteMapping("/{appId}")
@Operation(summary = "Delete application settings (reverts to defaults)")
public ResponseEntity<Void> delete(@PathVariable String appId, HttpServletRequest httpRequest) {
repository.delete(appId);
auditService.log("delete_app_settings", AuditCategory.CONFIG, appId,
Map.of(), AuditResult.SUCCESS, httpRequest);
return ResponseEntity.noContent().build();
}
}

View File

@@ -2,6 +2,9 @@ package com.cameleer3.server.app.controller;
import com.cameleer3.server.app.dto.ProcessorMetrics;
import com.cameleer3.server.app.dto.RouteMetrics;
import com.cameleer3.server.core.admin.AppSettings;
import com.cameleer3.server.core.admin.AppSettingsRepository;
import com.cameleer3.server.core.storage.StatsStore;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import io.swagger.v3.oas.annotations.tags.Tag;
@@ -18,6 +21,7 @@ import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@RestController
@RequestMapping("/api/v1/routes")
@@ -25,9 +29,14 @@ import java.util.List;
public class RouteMetricsController {
private final JdbcTemplate jdbc;
private final StatsStore statsStore;
private final AppSettingsRepository appSettingsRepository;
public RouteMetricsController(JdbcTemplate jdbc) {
public RouteMetricsController(JdbcTemplate jdbc, StatsStore statsStore,
AppSettingsRepository appSettingsRepository) {
this.jdbc = jdbc;
this.statsStore = statsStore;
this.appSettingsRepository = appSettingsRepository;
}
@GetMapping("/metrics")
@@ -78,7 +87,7 @@ public class RouteMetricsController {
routeKeys.add(new RouteKey(applicationName, routeId));
return new RouteMetrics(routeId, applicationName, total, successRate,
avgDur, p99Dur, errorRate, tps, List.of());
avgDur, p99Dur, errorRate, tps, List.of(), -1.0);
}, params.toArray());
// Fetch sparklines (12 buckets over the time window)
@@ -100,13 +109,34 @@ public class RouteMetricsController {
m.appId(), m.routeId());
metrics.set(i, new RouteMetrics(m.routeId(), m.appId(), m.exchangeCount(),
m.successRate(), m.avgDurationMs(), m.p99DurationMs(),
m.errorRate(), m.throughputPerSec(), sparkline));
m.errorRate(), m.throughputPerSec(), sparkline, m.slaCompliance()));
} catch (Exception e) {
// Leave sparkline empty on error
}
}
}
// Enrich with SLA compliance per route
if (!metrics.isEmpty()) {
// Determine SLA threshold (per-app or default)
String effectiveAppId = appId != null ? appId : (metrics.isEmpty() ? null : metrics.get(0).appId());
int threshold = appSettingsRepository.findByAppId(effectiveAppId != null ? effectiveAppId : "")
.map(AppSettings::slaThresholdMs).orElse(300);
Map<String, long[]> slaCounts = statsStore.slaCountsByRoute(fromInstant, toInstant,
effectiveAppId, threshold);
for (int i = 0; i < metrics.size(); i++) {
RouteMetrics m = metrics.get(i);
long[] counts = slaCounts.get(m.routeId());
double sla = (counts != null && counts[1] > 0)
? counts[0] * 100.0 / counts[1] : 100.0;
metrics.set(i, new RouteMetrics(m.routeId(), m.appId(), m.exchangeCount(),
m.successRate(), m.avgDurationMs(), m.p99DurationMs(),
m.errorRate(), m.throughputPerSec(), m.sparkline(), sla));
}
}
return ResponseEntity.ok(metrics);
}

View File

@@ -1,5 +1,7 @@
package com.cameleer3.server.app.controller;
import com.cameleer3.server.core.admin.AppSettings;
import com.cameleer3.server.core.admin.AppSettingsRepository;
import com.cameleer3.server.core.agent.AgentInfo;
import com.cameleer3.server.core.agent.AgentRegistryService;
import com.cameleer3.server.core.search.ExecutionStats;
@@ -8,6 +10,7 @@ import com.cameleer3.server.core.search.SearchRequest;
import com.cameleer3.server.core.search.SearchResult;
import com.cameleer3.server.core.search.SearchService;
import com.cameleer3.server.core.search.StatsTimeseries;
import com.cameleer3.server.core.search.TopError;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import org.springframework.http.ResponseEntity;
@@ -20,6 +23,7 @@ import org.springframework.web.bind.annotation.RestController;
import java.time.Instant;
import java.util.List;
import java.util.Map;
/**
* Search endpoints for querying route executions.
@@ -34,10 +38,13 @@ public class SearchController {
private final SearchService searchService;
private final AgentRegistryService registryService;
private final AppSettingsRepository appSettingsRepository;
public SearchController(SearchService searchService, AgentRegistryService registryService) {
public SearchController(SearchService searchService, AgentRegistryService registryService,
AppSettingsRepository appSettingsRepository) {
this.searchService = searchService;
this.registryService = registryService;
this.appSettingsRepository = appSettingsRepository;
}
@GetMapping("/executions")
@@ -87,21 +94,29 @@ public class SearchController {
}
@GetMapping("/stats")
@Operation(summary = "Aggregate execution stats (P99 latency, active count)")
@Operation(summary = "Aggregate execution stats (P99 latency, active count, SLA compliance)")
public ResponseEntity<ExecutionStats> stats(
@RequestParam Instant from,
@RequestParam(required = false) Instant to,
@RequestParam(required = false) String routeId,
@RequestParam(required = false) String application) {
Instant end = to != null ? to : Instant.now();
ExecutionStats stats;
if (routeId == null && application == null) {
return ResponseEntity.ok(searchService.stats(from, end));
stats = searchService.stats(from, end);
} else if (routeId == null) {
stats = searchService.statsForApp(from, end, application);
} else {
List<String> agentIds = resolveApplicationToAgentIds(application);
stats = searchService.stats(from, end, routeId, agentIds);
}
if (routeId == null) {
return ResponseEntity.ok(searchService.statsForApp(from, end, application));
}
List<String> agentIds = resolveApplicationToAgentIds(application);
return ResponseEntity.ok(searchService.stats(from, end, routeId, agentIds));
// Enrich with SLA compliance
int threshold = appSettingsRepository
.findByAppId(application != null ? application : "")
.map(AppSettings::slaThresholdMs).orElse(300);
double sla = searchService.slaCompliance(from, end, threshold, application, routeId);
return ResponseEntity.ok(stats.withSlaCompliance(sla));
}
@GetMapping("/stats/timeseries")
@@ -126,6 +141,39 @@ public class SearchController {
return ResponseEntity.ok(searchService.timeseries(from, end, buckets, routeId, agentIds));
}
@GetMapping("/stats/timeseries/by-app")
@Operation(summary = "Timeseries grouped by application")
public ResponseEntity<Map<String, StatsTimeseries>> timeseriesByApp(
@RequestParam Instant from,
@RequestParam(required = false) Instant to,
@RequestParam(defaultValue = "24") int buckets) {
Instant end = to != null ? to : Instant.now();
return ResponseEntity.ok(searchService.timeseriesGroupedByApp(from, end, buckets));
}
@GetMapping("/stats/timeseries/by-route")
@Operation(summary = "Timeseries grouped by route for an application")
public ResponseEntity<Map<String, StatsTimeseries>> timeseriesByRoute(
@RequestParam Instant from,
@RequestParam(required = false) Instant to,
@RequestParam(defaultValue = "24") int buckets,
@RequestParam String application) {
Instant end = to != null ? to : Instant.now();
return ResponseEntity.ok(searchService.timeseriesGroupedByRoute(from, end, buckets, application));
}
@GetMapping("/errors/top")
@Operation(summary = "Top N errors with velocity trend")
public ResponseEntity<List<TopError>> topErrors(
@RequestParam Instant from,
@RequestParam(required = false) Instant to,
@RequestParam(required = false) String application,
@RequestParam(required = false) String routeId,
@RequestParam(defaultValue = "5") int limit) {
Instant end = to != null ? to : Instant.now();
return ResponseEntity.ok(searchService.topErrors(from, end, application, routeId, limit));
}
/**
* Resolve an application name to agent IDs.
* Returns null if application is null/blank (no filtering).

View File

@@ -0,0 +1,54 @@
package com.cameleer3.server.app.dto;
import com.cameleer3.server.core.admin.AppSettings;
import io.swagger.v3.oas.annotations.media.Schema;
import jakarta.validation.constraints.Max;
import jakarta.validation.constraints.Min;
import jakarta.validation.constraints.NotNull;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
@Schema(description = "Per-application dashboard settings")
public record AppSettingsRequest(
@NotNull @Min(1)
@Schema(description = "SLA duration threshold in milliseconds")
Integer slaThresholdMs,
@NotNull @Min(0) @Max(100)
@Schema(description = "Error rate % threshold for warning (yellow) health dot")
Double healthErrorWarn,
@NotNull @Min(0) @Max(100)
@Schema(description = "Error rate % threshold for critical (red) health dot")
Double healthErrorCrit,
@NotNull @Min(0) @Max(100)
@Schema(description = "SLA compliance % threshold for warning (yellow) health dot")
Double healthSlaWarn,
@NotNull @Min(0) @Max(100)
@Schema(description = "SLA compliance % threshold for critical (red) health dot")
Double healthSlaCrit
) {
public AppSettings toSettings(String appId) {
Instant now = Instant.now();
return new AppSettings(appId, slaThresholdMs, healthErrorWarn, healthErrorCrit,
healthSlaWarn, healthSlaCrit, now, now);
}
public List<String> validate() {
List<String> errors = new ArrayList<>();
if (healthErrorWarn != null && healthErrorCrit != null
&& healthErrorWarn > healthErrorCrit) {
errors.add("healthErrorWarn must be <= healthErrorCrit");
}
if (healthSlaWarn != null && healthSlaCrit != null
&& healthSlaWarn < healthSlaCrit) {
errors.add("healthSlaWarn must be >= healthSlaCrit (higher SLA = healthier)");
}
return errors;
}
}

View File

@@ -15,5 +15,6 @@ public record RouteMetrics(
@NotNull double p99DurationMs,
@NotNull double errorRate,
@NotNull double throughputPerSec,
@NotNull List<Double> sparkline
@NotNull List<Double> sparkline,
double slaCompliance
) {}

View File

@@ -0,0 +1,67 @@
package com.cameleer3.server.app.storage;
import com.cameleer3.server.core.admin.AppSettings;
import com.cameleer3.server.core.admin.AppSettingsRepository;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowMapper;
import org.springframework.stereotype.Repository;
import java.util.List;
import java.util.Optional;
@Repository
public class PostgresAppSettingsRepository implements AppSettingsRepository {
private final JdbcTemplate jdbc;
private static final RowMapper<AppSettings> ROW_MAPPER = (rs, rowNum) -> new AppSettings(
rs.getString("app_id"),
rs.getInt("sla_threshold_ms"),
rs.getDouble("health_error_warn"),
rs.getDouble("health_error_crit"),
rs.getDouble("health_sla_warn"),
rs.getDouble("health_sla_crit"),
rs.getTimestamp("created_at").toInstant(),
rs.getTimestamp("updated_at").toInstant());
public PostgresAppSettingsRepository(JdbcTemplate jdbc) {
this.jdbc = jdbc;
}
@Override
public Optional<AppSettings> findByAppId(String appId) {
List<AppSettings> results = jdbc.query(
"SELECT * FROM app_settings WHERE app_id = ?", ROW_MAPPER, appId);
return results.isEmpty() ? Optional.empty() : Optional.of(results.get(0));
}
@Override
public List<AppSettings> findAll() {
return jdbc.query("SELECT * FROM app_settings ORDER BY app_id", ROW_MAPPER);
}
@Override
public AppSettings save(AppSettings settings) {
jdbc.update("""
INSERT INTO app_settings (app_id, sla_threshold_ms, health_error_warn,
health_error_crit, health_sla_warn, health_sla_crit, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, now(), now())
ON CONFLICT (app_id) DO UPDATE SET
sla_threshold_ms = EXCLUDED.sla_threshold_ms,
health_error_warn = EXCLUDED.health_error_warn,
health_error_crit = EXCLUDED.health_error_crit,
health_sla_warn = EXCLUDED.health_sla_warn,
health_sla_crit = EXCLUDED.health_sla_crit,
updated_at = now()
""",
settings.appId(), settings.slaThresholdMs(),
settings.healthErrorWarn(), settings.healthErrorCrit(),
settings.healthSlaWarn(), settings.healthSlaCrit());
return findByAppId(settings.appId()).orElseThrow();
}
@Override
public void delete(String appId) {
jdbc.update("DELETE FROM app_settings WHERE app_id = ?", appId);
}
}

View File

@@ -3,6 +3,7 @@ package com.cameleer3.server.app.storage;
import com.cameleer3.server.core.search.ExecutionStats;
import com.cameleer3.server.core.search.StatsTimeseries;
import com.cameleer3.server.core.search.StatsTimeseries.TimeseriesBucket;
import com.cameleer3.server.core.search.TopError;
import com.cameleer3.server.core.storage.StatsStore;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Repository;
@@ -12,7 +13,9 @@ import java.time.Duration;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@Repository
public class PostgresStatsStore implements StatsStore {
@@ -184,4 +187,216 @@ public class PostgresStatsStore implements StatsStore {
return new StatsTimeseries(buckets);
}
// ── Grouped timeseries ────────────────────────────────────────────────
@Override
public Map<String, StatsTimeseries> timeseriesGroupedByApp(Instant from, Instant to, int bucketCount) {
return queryGroupedTimeseries("stats_1m_app", "application_name", from, to,
bucketCount, List.of());
}
@Override
public Map<String, StatsTimeseries> timeseriesGroupedByRoute(Instant from, Instant to,
int bucketCount, String applicationName) {
return queryGroupedTimeseries("stats_1m_route", "route_id", from, to,
bucketCount, List.of(new Filter("application_name", applicationName)));
}
private Map<String, StatsTimeseries> queryGroupedTimeseries(
String view, String groupCol, Instant from, Instant to,
int bucketCount, List<Filter> filters) {
long intervalSeconds = Duration.between(from, to).toSeconds() / Math.max(bucketCount, 1);
if (intervalSeconds < 60) intervalSeconds = 60;
String sql = "SELECT time_bucket(? * INTERVAL '1 second', bucket) AS period, " +
groupCol + " AS group_key, " +
"COALESCE(SUM(total_count), 0) AS total_count, " +
"COALESCE(SUM(failed_count), 0) AS failed_count, " +
"CASE WHEN SUM(total_count) > 0 THEN SUM(duration_sum) / SUM(total_count) ELSE 0 END AS avg_duration, " +
"COALESCE(MAX(p99_duration), 0) AS p99_duration, " +
"COALESCE(SUM(running_count), 0) AS active_count " +
"FROM " + view + " WHERE bucket >= ? AND bucket < ?";
List<Object> params = new ArrayList<>();
params.add(intervalSeconds);
params.add(Timestamp.from(from));
params.add(Timestamp.from(to));
for (Filter f : filters) {
sql += " AND " + f.column() + " = ?";
params.add(f.value());
}
sql += " GROUP BY period, group_key ORDER BY period, group_key";
Map<String, List<TimeseriesBucket>> grouped = new LinkedHashMap<>();
jdbc.query(sql, (rs) -> {
String key = rs.getString("group_key");
TimeseriesBucket bucket = new TimeseriesBucket(
rs.getTimestamp("period").toInstant(),
rs.getLong("total_count"), rs.getLong("failed_count"),
rs.getLong("avg_duration"), rs.getLong("p99_duration"),
rs.getLong("active_count"));
grouped.computeIfAbsent(key, k -> new ArrayList<>()).add(bucket);
}, params.toArray());
Map<String, StatsTimeseries> result = new LinkedHashMap<>();
grouped.forEach((key, buckets) -> result.put(key, new StatsTimeseries(buckets)));
return result;
}
// ── SLA compliance ────────────────────────────────────────────────────
@Override
public double slaCompliance(Instant from, Instant to, int thresholdMs,
String applicationName, String routeId) {
String sql = "SELECT " +
"COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
"COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " +
"FROM executions WHERE start_time >= ? AND start_time < ?";
List<Object> params = new ArrayList<>();
params.add(thresholdMs);
params.add(Timestamp.from(from));
params.add(Timestamp.from(to));
if (applicationName != null) {
sql += " AND application_name = ?";
params.add(applicationName);
}
if (routeId != null) {
sql += " AND route_id = ?";
params.add(routeId);
}
return jdbc.query(sql, (rs, rowNum) -> {
long total = rs.getLong("total");
if (total == 0) return 1.0;
return rs.getLong("compliant") * 100.0 / total;
}, params.toArray()).stream().findFirst().orElse(1.0);
}
@Override
public Map<String, long[]> slaCountsByApp(Instant from, Instant to, int defaultThresholdMs) {
String sql = "SELECT application_name, " +
"COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
"COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " +
"FROM executions WHERE start_time >= ? AND start_time < ? " +
"GROUP BY application_name";
Map<String, long[]> result = new LinkedHashMap<>();
jdbc.query(sql, (rs) -> {
result.put(rs.getString("application_name"),
new long[]{rs.getLong("compliant"), rs.getLong("total")});
}, defaultThresholdMs, Timestamp.from(from), Timestamp.from(to));
return result;
}
@Override
public Map<String, long[]> slaCountsByRoute(Instant from, Instant to,
String applicationName, int thresholdMs) {
String sql = "SELECT route_id, " +
"COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
"COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " +
"FROM executions WHERE start_time >= ? AND start_time < ? " +
"AND application_name = ? GROUP BY route_id";
Map<String, long[]> result = new LinkedHashMap<>();
jdbc.query(sql, (rs) -> {
result.put(rs.getString("route_id"),
new long[]{rs.getLong("compliant"), rs.getLong("total")});
}, thresholdMs, Timestamp.from(from), Timestamp.from(to), applicationName);
return result;
}
// ── Top errors ────────────────────────────────────────────────────────
@Override
public List<TopError> topErrors(Instant from, Instant to, String applicationName,
String routeId, int limit) {
StringBuilder where = new StringBuilder(
"status = 'FAILED' AND start_time >= ? AND start_time < ?");
List<Object> params = new ArrayList<>();
params.add(Timestamp.from(from));
params.add(Timestamp.from(to));
if (applicationName != null) {
where.append(" AND application_name = ?");
params.add(applicationName);
}
String table;
String groupId;
if (routeId != null) {
// L3: attribute errors to processors
table = "processor_executions";
groupId = "processor_id";
where.append(" AND route_id = ?");
params.add(routeId);
} else {
// L1/L2: attribute errors to routes
table = "executions";
groupId = "route_id";
}
Instant fiveMinAgo = Instant.now().minus(5, ChronoUnit.MINUTES);
Instant tenMinAgo = Instant.now().minus(10, ChronoUnit.MINUTES);
String sql = "WITH counted AS (" +
" SELECT COALESCE(error_type, LEFT(error_message, 200)) AS error_key, " +
" " + groupId + " AS group_id, " +
" COUNT(*) AS cnt, MAX(start_time) AS last_seen " +
" FROM " + table + " WHERE " + where +
" GROUP BY error_key, group_id ORDER BY cnt DESC LIMIT ?" +
"), velocity AS (" +
" SELECT COALESCE(error_type, LEFT(error_message, 200)) AS error_key, " +
" COUNT(*) FILTER (WHERE start_time >= ?) AS recent_5m, " +
" COUNT(*) FILTER (WHERE start_time >= ? AND start_time < ?) AS prev_5m " +
" FROM " + table + " WHERE " + where +
" GROUP BY error_key" +
") SELECT c.error_key, c.group_id, c.cnt, c.last_seen, " +
" COALESCE(v.recent_5m, 0) / 5.0 AS velocity, " +
" CASE " +
" WHEN COALESCE(v.recent_5m, 0) > COALESCE(v.prev_5m, 0) * 1.2 THEN 'accelerating' " +
" WHEN COALESCE(v.recent_5m, 0) < COALESCE(v.prev_5m, 0) * 0.8 THEN 'decelerating' " +
" ELSE 'stable' END AS trend " +
"FROM counted c LEFT JOIN velocity v ON c.error_key = v.error_key " +
"ORDER BY c.cnt DESC";
// Build full params: counted-where params + limit + velocity timestamps + velocity-where params
List<Object> fullParams = new ArrayList<>(params);
fullParams.add(limit);
fullParams.add(Timestamp.from(fiveMinAgo));
fullParams.add(Timestamp.from(tenMinAgo));
fullParams.add(Timestamp.from(fiveMinAgo));
fullParams.addAll(params); // same where clause for velocity CTE
return jdbc.query(sql, (rs, rowNum) -> {
String errorKey = rs.getString("error_key");
String gid = rs.getString("group_id");
return new TopError(
errorKey,
routeId != null ? routeId : gid, // routeId
routeId != null ? gid : null, // processorId (only at L3)
rs.getLong("cnt"),
rs.getDouble("velocity"),
rs.getString("trend"),
rs.getTimestamp("last_seen").toInstant());
}, fullParams.toArray());
}
@Override
public int activeErrorTypes(Instant from, Instant to, String applicationName) {
String sql = "SELECT COUNT(DISTINCT COALESCE(error_type, LEFT(error_message, 200))) " +
"FROM executions WHERE status = 'FAILED' AND start_time >= ? AND start_time < ?";
List<Object> params = new ArrayList<>();
params.add(Timestamp.from(from));
params.add(Timestamp.from(to));
if (applicationName != null) {
sql += " AND application_name = ?";
params.add(applicationName);
}
Integer count = jdbc.queryForObject(sql, Integer.class, params.toArray());
return count != null ? count : 0;
}
}

View File

@@ -0,0 +1,11 @@
-- Per-application dashboard settings (SLA thresholds, health dot thresholds)
CREATE TABLE app_settings (
app_id TEXT PRIMARY KEY,
sla_threshold_ms INTEGER NOT NULL DEFAULT 300,
health_error_warn DOUBLE PRECISION NOT NULL DEFAULT 1.0,
health_error_crit DOUBLE PRECISION NOT NULL DEFAULT 5.0,
health_sla_warn DOUBLE PRECISION NOT NULL DEFAULT 99.0,
health_sla_crit DOUBLE PRECISION NOT NULL DEFAULT 95.0,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);

View File

@@ -0,0 +1,19 @@
package com.cameleer3.server.core.admin;
import java.time.Instant;
public record AppSettings(
String appId,
int slaThresholdMs,
double healthErrorWarn,
double healthErrorCrit,
double healthSlaWarn,
double healthSlaCrit,
Instant createdAt,
Instant updatedAt) {
public static AppSettings defaults(String appId) {
Instant now = Instant.now();
return new AppSettings(appId, 300, 1.0, 5.0, 99.0, 95.0, now, now);
}
}

View File

@@ -0,0 +1,11 @@
package com.cameleer3.server.core.admin;
import java.util.List;
import java.util.Optional;
public interface AppSettingsRepository {
Optional<AppSettings> findByAppId(String appId);
List<AppSettings> findAll();
AppSettings save(AppSettings settings);
void delete(String appId);
}

View File

@@ -14,4 +14,23 @@ public record ExecutionStats(
long prevTotalCount,
long prevFailedCount,
long prevAvgDurationMs,
long prevP99LatencyMs) {}
long prevP99LatencyMs,
double slaCompliance) {
/** Constructor without SLA compliance (backward-compatible, sets to -1). */
public ExecutionStats(long totalCount, long failedCount, long avgDurationMs,
long p99LatencyMs, long activeCount, long totalToday,
long prevTotalCount, long prevFailedCount,
long prevAvgDurationMs, long prevP99LatencyMs) {
this(totalCount, failedCount, avgDurationMs, p99LatencyMs, activeCount,
totalToday, prevTotalCount, prevFailedCount, prevAvgDurationMs,
prevP99LatencyMs, -1.0);
}
/** Return a copy with the given SLA compliance value. */
public ExecutionStats withSlaCompliance(double slaCompliance) {
return new ExecutionStats(totalCount, failedCount, avgDurationMs, p99LatencyMs,
activeCount, totalToday, prevTotalCount, prevFailedCount,
prevAvgDurationMs, prevP99LatencyMs, slaCompliance);
}
}

View File

@@ -5,6 +5,7 @@ import com.cameleer3.server.core.storage.StatsStore;
import java.time.Instant;
import java.util.List;
import java.util.Map;
public class SearchService {
@@ -48,4 +49,38 @@ public class SearchService {
String routeId, List<String> agentIds) {
return statsStore.timeseriesForRoute(from, to, bucketCount, routeId, agentIds);
}
// ── Dashboard-specific queries ────────────────────────────────────────
public Map<String, StatsTimeseries> timeseriesGroupedByApp(Instant from, Instant to, int bucketCount) {
return statsStore.timeseriesGroupedByApp(from, to, bucketCount);
}
public Map<String, StatsTimeseries> timeseriesGroupedByRoute(Instant from, Instant to,
int bucketCount, String applicationName) {
return statsStore.timeseriesGroupedByRoute(from, to, bucketCount, applicationName);
}
public double slaCompliance(Instant from, Instant to, int thresholdMs,
String applicationName, String routeId) {
return statsStore.slaCompliance(from, to, thresholdMs, applicationName, routeId);
}
public Map<String, long[]> slaCountsByApp(Instant from, Instant to, int defaultThresholdMs) {
return statsStore.slaCountsByApp(from, to, defaultThresholdMs);
}
public Map<String, long[]> slaCountsByRoute(Instant from, Instant to,
String applicationName, int thresholdMs) {
return statsStore.slaCountsByRoute(from, to, applicationName, thresholdMs);
}
public List<TopError> topErrors(Instant from, Instant to, String applicationName,
String routeId, int limit) {
return statsStore.topErrors(from, to, applicationName, routeId, limit);
}
public int activeErrorTypes(Instant from, Instant to, String applicationName) {
return statsStore.activeErrorTypes(from, to, applicationName);
}
}

View File

@@ -0,0 +1,12 @@
package com.cameleer3.server.core.search;
import java.time.Instant;
public record TopError(
String errorType,
String routeId,
String processorId,
long count,
double velocity,
String trend,
Instant lastSeen) {}

View File

@@ -2,9 +2,11 @@ package com.cameleer3.server.core.storage;
import com.cameleer3.server.core.search.ExecutionStats;
import com.cameleer3.server.core.search.StatsTimeseries;
import com.cameleer3.server.core.search.TopError;
import java.time.Instant;
import java.util.List;
import java.util.Map;
public interface StatsStore {
@@ -33,4 +35,29 @@ public interface StatsStore {
// Per-processor timeseries
StatsTimeseries timeseriesForProcessor(Instant from, Instant to, int bucketCount,
String routeId, String processorType);
// Grouped timeseries by application (for L1 dashboard charts)
Map<String, StatsTimeseries> timeseriesGroupedByApp(Instant from, Instant to, int bucketCount);
// Grouped timeseries by route within an application (for L2 dashboard charts)
Map<String, StatsTimeseries> timeseriesGroupedByRoute(Instant from, Instant to, int bucketCount,
String applicationName);
// SLA compliance: % of completed exchanges with duration <= thresholdMs
double slaCompliance(Instant from, Instant to, int thresholdMs,
String applicationName, String routeId);
// Batch SLA counts by app: {appId -> [compliant, total]}
Map<String, long[]> slaCountsByApp(Instant from, Instant to, int defaultThresholdMs);
// Batch SLA counts by route within an app: {routeId -> [compliant, total]}
Map<String, long[]> slaCountsByRoute(Instant from, Instant to, String applicationName,
int thresholdMs);
// Top N errors with velocity trend
List<TopError> topErrors(Instant from, Instant to, String applicationName,
String routeId, int limit);
// Count of distinct error types in window
int activeErrorTypes(Instant from, Instant to, String applicationName);
}

View File

@@ -0,0 +1,142 @@
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import { config } from '../../config';
import { useAuthStore } from '../../auth/auth-store';
import { useRefreshInterval } from './use-refresh-interval';
function authHeaders() {
const token = useAuthStore.getState().accessToken;
return {
Authorization: `Bearer ${token}`,
'X-Cameleer-Protocol-Version': '1',
};
}
async function fetchJson<T>(path: string, params?: Record<string, string | undefined>): Promise<T> {
const qs = new URLSearchParams();
if (params) {
for (const [k, v] of Object.entries(params)) {
if (v != null) qs.set(k, v);
}
}
const url = `${config.apiBaseUrl}${path}${qs.toString() ? `?${qs}` : ''}`;
const res = await fetch(url, { headers: authHeaders() });
if (!res.ok) throw new Error(`Failed to fetch ${path}`);
return res.json();
}
// ── Timeseries by app (L1 charts) ─────────────────────────────────────
export interface TimeseriesBucket {
time: string;
totalCount: number;
failedCount: number;
avgDurationMs: number;
p99DurationMs: number;
activeCount: number;
}
export interface GroupedTimeseries {
[key: string]: { buckets: TimeseriesBucket[] };
}
export function useTimeseriesByApp(from?: string, to?: string) {
const refetchInterval = useRefreshInterval(30_000);
return useQuery({
queryKey: ['dashboard', 'timeseries-by-app', from, to],
queryFn: () => fetchJson<GroupedTimeseries>('/search/stats/timeseries/by-app', {
from, to, buckets: '24',
}),
enabled: !!from,
placeholderData: (prev: GroupedTimeseries | undefined) => prev,
refetchInterval,
});
}
// ── Timeseries by route (L2 charts) ───────────────────────────────────
export function useTimeseriesByRoute(from?: string, to?: string, application?: string) {
const refetchInterval = useRefreshInterval(30_000);
return useQuery({
queryKey: ['dashboard', 'timeseries-by-route', from, to, application],
queryFn: () => fetchJson<GroupedTimeseries>('/search/stats/timeseries/by-route', {
from, to, application, buckets: '24',
}),
enabled: !!from && !!application,
placeholderData: (prev: GroupedTimeseries | undefined) => prev,
refetchInterval,
});
}
// ── Top errors (L2/L3) ────────────────────────────────────────────────
export interface TopError {
errorType: string;
routeId: string | null;
processorId: string | null;
count: number;
velocity: number;
trend: 'accelerating' | 'stable' | 'decelerating';
lastSeen: string;
}
export function useTopErrors(from?: string, to?: string, application?: string, routeId?: string) {
const refetchInterval = useRefreshInterval(10_000);
return useQuery({
queryKey: ['dashboard', 'top-errors', from, to, application, routeId],
queryFn: () => fetchJson<TopError[]>('/search/errors/top', {
from, to, application, routeId, limit: '5',
}),
enabled: !!from,
placeholderData: (prev: TopError[] | undefined) => prev,
refetchInterval,
});
}
// ── App settings ──────────────────────────────────────────────────────
export interface AppSettings {
appId: string;
slaThresholdMs: number;
healthErrorWarn: number;
healthErrorCrit: number;
healthSlaWarn: number;
healthSlaCrit: number;
createdAt: string;
updatedAt: string;
}
export function useAppSettings(appId?: string) {
return useQuery({
queryKey: ['app-settings', appId],
queryFn: () => fetchJson<AppSettings>(`/admin/app-settings/${appId}`),
enabled: !!appId,
staleTime: 60_000,
});
}
export function useAllAppSettings() {
return useQuery({
queryKey: ['app-settings', 'all'],
queryFn: () => fetchJson<AppSettings[]>('/admin/app-settings'),
staleTime: 60_000,
});
}
export function useUpdateAppSettings() {
const queryClient = useQueryClient();
return useMutation({
mutationFn: async ({ appId, settings }: { appId: string; settings: Omit<AppSettings, 'appId' | 'createdAt' | 'updatedAt'> }) => {
const token = useAuthStore.getState().accessToken;
const res = await fetch(`${config.apiBaseUrl}/admin/app-settings/${appId}`, {
method: 'PUT',
headers: { ...authHeaders(), 'Content-Type': 'application/json' },
body: JSON.stringify(settings),
});
if (!res.ok) throw new Error('Failed to update app settings');
return res.json();
},
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: ['app-settings'] });
},
});
}

View File

@@ -1,5 +1,5 @@
import type { DiagramNode as DiagramNodeType, DiagramEdge as DiagramEdgeType } from '../../api/queries/diagrams';
import type { NodeConfig } from './types';
import type { NodeConfig, LatencyHeatmapEntry } from './types';
import type { NodeExecutionState, IterationInfo } from '../ExecutionDiagram/types';
import { colorForType, isCompoundType, iconForType, type IconElement } from './node-colors';
import { DiagramNode } from './DiagramNode';
@@ -27,6 +27,7 @@ interface CompoundNodeProps {
iterationState?: Map<string, IterationInfo>;
/** Called when user changes iteration on a compound stepper */
onIterationChange?: (compoundNodeId: string, iterationIndex: number) => void;
latencyHeatmap?: Map<string, LatencyHeatmapEntry>;
onNodeClick: (nodeId: string) => void;
onNodeDoubleClick?: (nodeId: string) => void;
onNodeEnter: (nodeId: string) => void;
@@ -36,7 +37,7 @@ interface CompoundNodeProps {
export function CompoundNode({
node, edges, parentX = 0, parentY = 0,
selectedNodeId, hoveredNodeId, nodeConfigs, executionOverlay,
overlayActive, iterationState, onIterationChange,
overlayActive, iterationState, onIterationChange, latencyHeatmap,
onNodeClick, onNodeDoubleClick, onNodeEnter, onNodeLeave,
}: CompoundNodeProps) {
const x = (node.x ?? 0) - parentX;
@@ -61,7 +62,7 @@ export function CompoundNode({
const childProps = {
edges, selectedNodeId, hoveredNodeId, nodeConfigs, executionOverlay,
overlayActive, iterationState, onIterationChange,
overlayActive, iterationState, onIterationChange, latencyHeatmap,
onNodeClick, onNodeDoubleClick, onNodeEnter, onNodeLeave,
};
@@ -243,6 +244,7 @@ function renderChildren(
config={child.id ? props.nodeConfigs?.get(child.id) : undefined}
executionState={props.executionOverlay?.get(child.id ?? '')}
overlayActive={props.overlayActive}
heatmapEntry={child.id ? props.latencyHeatmap?.get(child.id) : undefined}
onClick={() => child.id && props.onNodeClick(child.id)}
onDoubleClick={() => child.id && props.onNodeDoubleClick?.(child.id)}
onMouseEnter={() => child.id && props.onNodeEnter(child.id)}

View File

@@ -1,5 +1,5 @@
import type { DiagramNode as DiagramNodeType } from '../../api/queries/diagrams';
import type { NodeConfig } from './types';
import type { NodeConfig, LatencyHeatmapEntry } from './types';
import type { NodeExecutionState } from '../ExecutionDiagram/types';
import { colorForType, iconForType, type IconElement } from './node-colors';
import { ConfigBadge } from './ConfigBadge';
@@ -16,12 +16,27 @@ interface DiagramNodeProps {
config?: NodeConfig;
executionState?: NodeExecutionState;
overlayActive?: boolean;
heatmapEntry?: LatencyHeatmapEntry;
onClick: () => void;
onDoubleClick?: () => void;
onMouseEnter: () => void;
onMouseLeave: () => void;
}
/** Interpolate green (120°) → yellow (60°) → red (0°) based on pctOfRoute */
function heatmapColor(pct: number): string {
const clamped = Math.max(0, Math.min(100, pct));
// 0% → hue 120 (green), 50% → hue 60 (yellow), 100% → hue 0 (red)
const hue = 120 - (clamped / 100) * 120;
return `hsl(${Math.round(hue)}, 70%, 92%)`;
}
function heatmapBorderColor(pct: number): string {
const clamped = Math.max(0, Math.min(100, pct));
const hue = 120 - (clamped / 100) * 120;
return `hsl(${Math.round(hue)}, 60%, 50%)`;
}
function formatDuration(ms: number): string {
if (ms < 1000) return `${ms}ms`;
return `${(ms / 1000).toFixed(1)}s`;
@@ -29,7 +44,7 @@ function formatDuration(ms: number): string {
export function DiagramNode({
node, isHovered, isSelected, config,
executionState, overlayActive,
executionState, overlayActive, heatmapEntry,
onClick, onDoubleClick, onMouseEnter, onMouseLeave,
}: DiagramNodeProps) {
const x = node.x ?? 0;
@@ -49,7 +64,7 @@ export function DiagramNode({
const isFailed = executionState?.status === 'FAILED';
const isSkipped = overlayActive && !executionState;
// Colors based on execution state
// Colors based on execution state (heatmap takes priority when no execution overlay)
let cardFill = isHovered ? '#F5F0EA' : 'white';
let borderStroke = isHovered || isSelected ? color : '#E4DFD8';
let borderWidth = isHovered || isSelected ? 1.5 : 1;
@@ -67,6 +82,11 @@ export function DiagramNode({
borderWidth = 2;
topBarColor = '#C0392B';
labelColor = '#C0392B';
} else if (heatmapEntry && !overlayActive) {
cardFill = heatmapColor(heatmapEntry.pctOfRoute);
borderStroke = heatmapBorderColor(heatmapEntry.pctOfRoute);
borderWidth = 1.5;
topBarColor = heatmapBorderColor(heatmapEntry.pctOfRoute);
}
const statusColor = isCompleted ? '#3D7C47' : isFailed ? '#C0392B' : undefined;
@@ -195,6 +215,20 @@ export function DiagramNode({
</text>
)}
{/* Heatmap: avg duration label at bottom-right */}
{heatmapEntry && !overlayActive && !executionState && (
<text
x={w - 6}
y={h - 4}
textAnchor="end"
fill={heatmapBorderColor(heatmapEntry.pctOfRoute)}
fontSize={9}
fontWeight={600}
>
{formatDuration(heatmapEntry.avgDurationMs)}
</text>
)}
{/* Sub-route failure: drill-down arrow at bottom-left */}
{isFailed && executionState?.subRouteFailed && (
<g transform={`translate(4, ${h - 14})`}>

View File

@@ -34,6 +34,7 @@ export function ProcessDiagram({
iterationState,
onIterationChange,
centerOnNodeId,
latencyHeatmap,
}: ProcessDiagramProps) {
// Route stack for drill-down navigation
const [routeStack, setRouteStack] = useState<string[]>([routeId]);
@@ -338,6 +339,7 @@ export function ProcessDiagram({
overlayActive={overlayActive}
iterationState={iterationState}
onIterationChange={onIterationChange}
latencyHeatmap={latencyHeatmap}
onNodeClick={handleNodeClick}
onNodeDoubleClick={handleNodeDoubleClick}
onNodeEnter={toolbar.onNodeEnter}
@@ -354,6 +356,7 @@ export function ProcessDiagram({
config={node.id ? nodeConfigs?.get(node.id) : undefined}
executionState={getNodeExecutionState(node.id, node.type)}
overlayActive={overlayActive}
heatmapEntry={node.id ? latencyHeatmap?.get(node.id) : undefined}
onClick={() => node.id && handleNodeClick(node.id)}
onDoubleClick={() => node.id && handleNodeDoubleClick(node.id)}
onMouseEnter={() => node.id && toolbar.onNodeEnter(node.id)}

View File

@@ -16,6 +16,13 @@ export interface DiagramSection {
variant?: 'error' | 'completion';
}
export interface LatencyHeatmapEntry {
avgDurationMs: number;
p99DurationMs: number;
/** Percentage of total route time this processor consumes (0-100) */
pctOfRoute: number;
}
export interface ProcessDiagramProps {
application: string;
routeId: string;
@@ -39,4 +46,7 @@ export interface ProcessDiagramProps {
onIterationChange?: (compoundNodeId: string, iterationIndex: number) => void;
/** When set, the diagram pans to center this node in the viewport */
centerOnNodeId?: string;
/** Latency heatmap: maps processor ID → aggregate performance data.
* When provided, nodes are colored green→yellow→red by relative latency. */
latencyHeatmap?: Map<string, LatencyHeatmapEntry>;
}

View File

@@ -0,0 +1,442 @@
import { useMemo } from 'react';
import { useNavigate } from 'react-router';
import {
KpiStrip,
DataTable,
AreaChart,
LineChart,
Card,
Sparkline,
MonoText,
StatusDot,
Badge,
} from '@cameleer/design-system';
import type { KpiItem, Column } from '@cameleer/design-system';
import { useGlobalFilters } from '@cameleer/design-system';
import { useRouteMetrics } from '../../api/queries/catalog';
import { useExecutionStats, useStatsTimeseries } from '../../api/queries/executions';
import { useTimeseriesByApp, useTopErrors, useAllAppSettings } from '../../api/queries/dashboard';
import type { AppSettings } from '../../api/queries/dashboard';
import type { RouteMetrics } from '../../api/types';
import {
computeHealthDot,
formatThroughput,
formatSlaCompliance,
trendIndicator,
type HealthStatus,
} from './dashboard-utils';
import styles from './DashboardTab.module.css';
// ── Row type for application health table ───────────────────────────────────
interface AppRow {
id: string;
appId: string;
health: HealthStatus;
throughput: number;
throughputLabel: string;
successRate: number;
p99DurationMs: number;
slaCompliance: number;
errorCount: number;
sparkline: number[];
}
// ── Table columns ───────────────────────────────────────────────────────────
const APP_COLUMNS: Column<AppRow>[] = [
{
key: 'health',
header: '',
render: (_, row) => <StatusDot status={row.health} />,
},
{
key: 'appId',
header: 'Application',
sortable: true,
render: (_, row) => (
<span className={styles.appNameCell}>{row.appId}</span>
),
},
{
key: 'throughput',
header: 'Throughput',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{row.throughputLabel}</MonoText>
),
},
{
key: 'successRate',
header: 'Success %',
sortable: true,
render: (_, row) => {
const pct = row.successRate;
const cls = pct >= 99 ? styles.rateGood : pct >= 97 ? styles.rateWarn : styles.rateBad;
return <MonoText size="sm" className={cls}>{pct.toFixed(1)}%</MonoText>;
},
},
{
key: 'p99DurationMs',
header: 'P99',
sortable: true,
render: (_, row) => {
const cls = row.p99DurationMs > 300 ? styles.rateBad : row.p99DurationMs > 200 ? styles.rateWarn : styles.rateGood;
return <MonoText size="sm" className={cls}>{Math.round(row.p99DurationMs)}ms</MonoText>;
},
},
{
key: 'slaCompliance',
header: 'SLA %',
sortable: true,
render: (_, row) => {
const cls = row.slaCompliance >= 99 ? styles.rateGood : row.slaCompliance >= 95 ? styles.rateWarn : styles.rateBad;
return <MonoText size="sm" className={cls}>{formatSlaCompliance(row.slaCompliance)}</MonoText>;
},
},
{
key: 'errorCount',
header: 'Errors',
sortable: true,
render: (_, row) => {
const cls = row.errorCount > 10 ? styles.rateBad : row.errorCount > 0 ? styles.rateWarn : styles.rateGood;
return <MonoText size="sm" className={cls}>{row.errorCount.toLocaleString()}</MonoText>;
},
},
{
key: 'sparkline',
header: 'Trend',
render: (_, row) => (
<Sparkline data={row.sparkline} width={80} height={24} />
),
},
];
// ── Aggregate RouteMetrics by appId ─────────────────────────────────────────
function aggregateByApp(
metrics: RouteMetrics[],
windowSeconds: number,
settingsMap: Map<string, AppSettings>,
): AppRow[] {
const grouped = new Map<string, RouteMetrics[]>();
for (const m of metrics) {
const list = grouped.get(m.appId) ?? [];
list.push(m);
grouped.set(m.appId, list);
}
const rows: AppRow[] = [];
for (const [appId, routes] of grouped) {
const totalExchanges = routes.reduce((s, r) => s + r.exchangeCount, 0);
const totalFailed = routes.reduce((s, r) => s + r.exchangeCount * r.errorRate, 0);
const successRate = totalExchanges > 0 ? ((totalExchanges - totalFailed) / totalExchanges) * 100 : 100;
const errorRate = totalExchanges > 0 ? totalFailed / totalExchanges : 0;
// Weighted average p99 by exchange count
const p99Sum = routes.reduce((s, r) => s + r.p99DurationMs * r.exchangeCount, 0);
const p99DurationMs = totalExchanges > 0 ? p99Sum / totalExchanges : 0;
// SLA compliance: weighted average of per-route slaCompliance from backend
const appSettings = settingsMap.get(appId);
const slaWeightedSum = routes.reduce((s, r) => s + (r.slaCompliance ?? 100) * r.exchangeCount, 0);
const slaCompliance = totalExchanges > 0 ? slaWeightedSum / totalExchanges : 100;
const errorCount = Math.round(totalFailed);
// Merge sparklines: sum across routes per bucket position
const maxLen = Math.max(...routes.map((r) => (r.sparkline ?? []).length), 0);
const sparkline: number[] = [];
for (let i = 0; i < maxLen; i++) {
sparkline.push(routes.reduce((s, r) => s + ((r.sparkline ?? [])[i] ?? 0), 0));
}
rows.push({
id: appId,
appId,
health: computeHealthDot(errorRate, slaCompliance, appSettings),
throughput: totalExchanges,
throughputLabel: formatThroughput(totalExchanges, windowSeconds),
successRate,
p99DurationMs,
slaCompliance,
errorCount,
sparkline,
});
}
return rows.sort((a, b) => {
const order: Record<HealthStatus, number> = { error: 0, warning: 1, success: 2 };
return order[a.health] - order[b.health];
});
}
// ── Build KPI items ─────────────────────────────────────────────────────────
function buildKpiItems(
stats: {
totalCount: number;
failedCount: number;
p99LatencyMs: number;
prevTotalCount: number;
prevFailedCount: number;
prevP99LatencyMs: number;
} | undefined,
windowSeconds: number,
slaCompliance: number,
activeErrorCount: number,
throughputSparkline: number[],
successSparkline: number[],
latencySparkline: number[],
slaSparkline: number[],
errorSparkline: number[],
): KpiItem[] {
const totalCount = stats?.totalCount ?? 0;
const failedCount = stats?.failedCount ?? 0;
const prevTotalCount = stats?.prevTotalCount ?? 0;
const prevFailedCount = stats?.prevFailedCount ?? 0;
const p99Ms = stats?.p99LatencyMs ?? 0;
const prevP99Ms = stats?.prevP99LatencyMs ?? 0;
// Throughput
const throughput = windowSeconds > 0 ? totalCount / windowSeconds : 0;
const prevThroughput = windowSeconds > 0 ? prevTotalCount / windowSeconds : 0;
const throughputTrend = trendIndicator(throughput, prevThroughput);
// Success Rate
const successPct = totalCount > 0 ? ((totalCount - failedCount) / totalCount) * 100 : 100;
const prevSuccessPct = prevTotalCount > 0
? ((prevTotalCount - prevFailedCount) / prevTotalCount) * 100
: 100;
const successTrend = trendIndicator(successPct, prevSuccessPct);
// P99 Latency
const p99Trend = trendIndicator(p99Ms, prevP99Ms);
// SLA compliance trend — higher is better, so invert the variant
const slaTrend = trendIndicator(slaCompliance, 100);
// Active Errors
const prevErrorRate = prevTotalCount > 0 ? (prevFailedCount / prevTotalCount) * 100 : 0;
const currentErrorRate = totalCount > 0 ? (failedCount / totalCount) * 100 : 0;
const errorTrend = trendIndicator(currentErrorRate, prevErrorRate);
return [
{
label: 'Throughput',
value: formatThroughput(totalCount, windowSeconds),
trend: {
label: throughputTrend.label,
variant: throughputTrend.direction === 'up' ? 'success' as const : throughputTrend.direction === 'down' ? 'error' as const : 'muted' as const,
},
subtitle: `${totalCount.toLocaleString()} msg total`,
sparkline: throughputSparkline,
borderColor: 'var(--amber)',
},
{
label: 'Success Rate',
value: `${successPct.toFixed(1)}%`,
trend: {
label: successTrend.label,
variant: successPct >= 99 ? 'success' as const : successPct >= 97 ? 'warning' as const : 'error' as const,
},
subtitle: `${(totalCount - failedCount).toLocaleString()} succeeded`,
sparkline: successSparkline,
borderColor: successPct >= 99 ? 'var(--success)' : 'var(--error)',
},
{
label: 'P99 Latency',
value: `${Math.round(p99Ms)}ms`,
trend: {
label: p99Trend.label,
variant: p99Ms > 300 ? 'error' as const : p99Ms > 200 ? 'warning' as const : 'success' as const,
},
subtitle: `prev ${Math.round(prevP99Ms)}ms`,
sparkline: latencySparkline,
borderColor: p99Ms > 300 ? 'var(--warning)' : 'var(--success)',
},
{
label: 'SLA Compliance',
value: formatSlaCompliance(slaCompliance),
trend: {
label: slaTrend.label,
variant: slaCompliance >= 99 ? 'success' as const : slaCompliance >= 95 ? 'warning' as const : 'error' as const,
},
subtitle: 'P99 within threshold',
sparkline: slaSparkline,
borderColor: slaCompliance >= 99 ? 'var(--success)' : 'var(--warning)',
},
{
label: 'Active Errors',
value: String(activeErrorCount),
trend: {
label: errorTrend.label,
variant: activeErrorCount === 0 ? 'success' as const : 'error' as const,
},
subtitle: `${failedCount.toLocaleString()} failures total`,
sparkline: errorSparkline,
borderColor: activeErrorCount === 0 ? 'var(--success)' : 'var(--error)',
},
];
}
// ── Component ───────────────────────────────────────────────────────────────
export default function DashboardL1() {
const navigate = useNavigate();
const { timeRange } = useGlobalFilters();
const timeFrom = timeRange.start.toISOString();
const timeTo = timeRange.end.toISOString();
const windowSeconds = (timeRange.end.getTime() - timeRange.start.getTime()) / 1000;
const { data: metrics } = useRouteMetrics(timeFrom, timeTo);
const { data: stats } = useExecutionStats(timeFrom, timeTo);
const { data: timeseries } = useStatsTimeseries(timeFrom, timeTo);
const { data: timeseriesByApp } = useTimeseriesByApp(timeFrom, timeTo);
const { data: topErrors } = useTopErrors(timeFrom, timeTo);
const { data: allAppSettings } = useAllAppSettings();
// Build settings lookup map
const settingsMap = useMemo(() => {
const map = new Map<string, AppSettings>();
for (const s of allAppSettings ?? []) {
map.set(s.appId, s);
}
return map;
}, [allAppSettings]);
// Aggregate route metrics by appId for the table
const appRows = useMemo(
() => aggregateByApp(metrics ?? [], windowSeconds, settingsMap),
[metrics, windowSeconds, settingsMap],
);
// Global SLA compliance from backend stats (exact calculation from executions table)
const globalSlaCompliance = (stats as Record<string, unknown>)?.slaCompliance as number ?? -1;
const effectiveSlaCompliance = globalSlaCompliance >= 0 ? globalSlaCompliance : 100;
// Active error count = distinct error types
const activeErrorCount = useMemo(
() => (topErrors ?? []).length,
[topErrors],
);
// KPI sparklines from timeseries buckets
const throughputSparkline = useMemo(
() => (timeseries?.buckets ?? []).map((b) => b.totalCount),
[timeseries],
);
const successSparkline = useMemo(
() => (timeseries?.buckets ?? []).map((b) =>
b.totalCount > 0 ? ((b.totalCount - b.failedCount) / b.totalCount) * 100 : 100,
),
[timeseries],
);
const latencySparkline = useMemo(
() => (timeseries?.buckets ?? []).map((b) => b.p99DurationMs),
[timeseries],
);
const slaSparkline = useMemo(
() => (timeseries?.buckets ?? []).map((b) =>
b.p99DurationMs <= 300 ? 100 : 0,
),
[timeseries],
);
const errorSparkline = useMemo(
() => (timeseries?.buckets ?? []).map((b) => b.failedCount),
[timeseries],
);
const kpiItems = useMemo(
() => buildKpiItems(
stats,
windowSeconds,
effectiveSlaCompliance,
activeErrorCount,
throughputSparkline,
successSparkline,
latencySparkline,
slaSparkline,
errorSparkline,
),
[stats, windowSeconds, effectiveSlaCompliance, activeErrorCount,
throughputSparkline, successSparkline, latencySparkline, slaSparkline, errorSparkline],
);
// ── Per-app chart series (throughput stacked area) ──────────────────────
const throughputByAppSeries = useMemo(() => {
if (!timeseriesByApp) return [];
return Object.entries(timeseriesByApp).map(([appId, { buckets }]) => ({
label: appId,
data: buckets.map((b, i) => ({
x: i as number,
y: b.totalCount,
})),
}));
}, [timeseriesByApp]);
// ── Per-app chart series (error rate line) ─────────────────────────────
const errorRateByAppSeries = useMemo(() => {
if (!timeseriesByApp) return [];
return Object.entries(timeseriesByApp).map(([appId, { buckets }]) => ({
label: appId,
data: buckets.map((b, i) => ({
x: i as number,
y: b.totalCount > 0 ? (b.failedCount / b.totalCount) * 100 : 0,
})),
}));
}, [timeseriesByApp]);
return (
<div className={styles.content}>
<div className={styles.refreshIndicator}>
<span className={styles.refreshDot} />
<span className={styles.refreshText}>Auto-refresh: 30s</span>
</div>
{/* KPI header cards */}
<KpiStrip items={kpiItems} />
{/* Application Health table */}
<div className={styles.tableSection}>
<div className={styles.tableHeader}>
<span className={styles.tableTitle}>Application Health</span>
<div className={styles.tableRight}>
<span className={styles.tableMeta}>{appRows.length} applications</span>
<Badge label="ALL" color="muted" />
</div>
</div>
<DataTable
columns={APP_COLUMNS}
data={appRows}
sortable
onRowClick={(row) => navigate(`/dashboard/${row.appId}`)}
/>
</div>
{/* Side-by-side charts */}
{throughputByAppSeries.length > 0 && (
<div className={styles.chartGrid}>
<Card title="Throughput by Application (msg/s)">
<AreaChart
series={throughputByAppSeries}
yLabel="msg/s"
stacked
height={200}
className={styles.chart}
/>
</Card>
<Card title="Error Rate by Application (%)">
<LineChart
series={errorRateByAppSeries}
yLabel="%"
height={200}
className={styles.chart}
/>
</Card>
</div>
)}
</div>
);
}

View File

@@ -0,0 +1,421 @@
import { useMemo } from 'react';
import { useParams, useNavigate } from 'react-router';
import {
KpiStrip,
DataTable,
AreaChart,
LineChart,
Card,
Sparkline,
MonoText,
Badge,
} from '@cameleer/design-system';
import type { KpiItem, Column } from '@cameleer/design-system';
import { useGlobalFilters } from '@cameleer/design-system';
import { useRouteMetrics } from '../../api/queries/catalog';
import { useExecutionStats, useStatsTimeseries } from '../../api/queries/executions';
import {
useTimeseriesByRoute,
useTopErrors,
useAppSettings,
} from '../../api/queries/dashboard';
import type { TopError } from '../../api/queries/dashboard';
import type { RouteMetrics } from '../../api/types';
import {
trendArrow,
trendIndicator,
formatThroughput,
formatSlaCompliance,
formatRelativeTime,
} from './dashboard-utils';
import styles from './DashboardTab.module.css';
// ── Route table row type ────────────────────────────────────────────────────
interface RouteRow {
id: string;
routeId: string;
exchangeCount: number;
successRate: number;
avgDurationMs: number;
p99DurationMs: number;
slaCompliance: number;
sparkline: number[];
}
// ── Route performance columns ───────────────────────────────────────────────
const ROUTE_COLUMNS: Column<RouteRow>[] = [
{
key: 'routeId',
header: 'Route ID',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{row.routeId}</MonoText>
),
},
{
key: 'exchangeCount',
header: 'Throughput',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{row.exchangeCount.toLocaleString()}</MonoText>
),
},
{
key: 'successRate',
header: 'Success%',
sortable: true,
render: (_, row) => {
const pct = row.successRate * 100;
const cls = pct >= 99 ? styles.rateGood : pct >= 97 ? styles.rateWarn : styles.rateBad;
return <MonoText size="sm" className={cls}>{pct.toFixed(1)}%</MonoText>;
},
},
{
key: 'avgDurationMs',
header: 'Avg(ms)',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{Math.round(row.avgDurationMs)}</MonoText>
),
},
{
key: 'p99DurationMs',
header: 'P99(ms)',
sortable: true,
render: (_, row) => {
const cls = row.p99DurationMs > 300 ? styles.rateBad : row.p99DurationMs > 200 ? styles.rateWarn : styles.rateGood;
return <MonoText size="sm" className={cls}>{Math.round(row.p99DurationMs)}</MonoText>;
},
},
{
key: 'slaCompliance',
header: 'SLA%',
sortable: true,
render: (_, row) => {
const cls = row.slaCompliance >= 99 ? styles.rateGood : row.slaCompliance >= 95 ? styles.rateWarn : styles.rateBad;
return <MonoText size="sm" className={cls}>{formatSlaCompliance(row.slaCompliance)}</MonoText>;
},
},
{
key: 'sparkline',
header: 'Sparkline',
render: (_, row) => (
<Sparkline data={row.sparkline} width={80} height={24} />
),
},
];
// ── Top errors columns ──────────────────────────────────────────────────────
const ERROR_COLUMNS: Column<TopError>[] = [
{
key: 'errorType',
header: 'Error Type',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{row.errorType}</MonoText>
),
},
{
key: 'routeId',
header: 'Route',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{row.routeId ?? '\u2014'}</MonoText>
),
},
{
key: 'count',
header: 'Count',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{row.count.toLocaleString()}</MonoText>
),
},
{
key: 'velocity',
header: 'Velocity',
sortable: true,
render: (_, row) => {
const arrow = trendArrow(row.trend);
const cls = row.trend === 'accelerating' ? styles.rateBad
: row.trend === 'decelerating' ? styles.rateGood
: styles.rateNeutral;
return <MonoText size="sm" className={cls}>{row.velocity.toFixed(1)}/min {arrow}</MonoText>;
},
},
{
key: 'lastSeen',
header: 'Last Seen',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{formatRelativeTime(row.lastSeen)}</MonoText>
),
},
];
// ── Build KPI items ─────────────────────────────────────────────────────────
function buildKpiItems(
stats: {
totalCount: number;
failedCount: number;
p99LatencyMs: number;
prevTotalCount: number;
prevFailedCount: number;
prevP99LatencyMs: number;
} | undefined,
slaThresholdMs: number,
throughputSparkline: number[],
latencySparkline: number[],
errors: TopError[] | undefined,
windowSeconds: number,
): KpiItem[] {
const totalCount = stats?.totalCount ?? 0;
const failedCount = stats?.failedCount ?? 0;
const prevTotalCount = stats?.prevTotalCount ?? 0;
const prevFailedCount = stats?.prevFailedCount ?? 0;
const p99Ms = stats?.p99LatencyMs ?? 0;
const prevP99Ms = stats?.prevP99LatencyMs ?? 0;
// Throughput
const throughputTrend = trendIndicator(totalCount, prevTotalCount);
// Success Rate
const successRate = totalCount > 0 ? ((totalCount - failedCount) / totalCount) * 100 : 100;
const prevSuccessRate = prevTotalCount > 0 ? ((prevTotalCount - prevFailedCount) / prevTotalCount) * 100 : 100;
const successTrend = trendIndicator(successRate, prevSuccessRate);
// P99 Latency
const latencyTrend = trendIndicator(p99Ms, prevP99Ms);
// SLA Compliance — percentage of exchanges under threshold
// Approximate from p99: if p99 < threshold, ~99%+ are compliant
const slaCompliance = p99Ms <= slaThresholdMs ? 99.9 : Math.max(0, 100 - ((p99Ms - slaThresholdMs) / slaThresholdMs) * 10);
// Error Velocity — aggregate from top errors
const errorList = errors ?? [];
const totalVelocity = errorList.reduce((sum, e) => sum + e.velocity, 0);
const hasAccelerating = errorList.some((e) => e.trend === 'accelerating');
const allDecelerating = errorList.length > 0 && errorList.every((e) => e.trend === 'decelerating');
const velocityTrendLabel = hasAccelerating ? '\u25B2' : allDecelerating ? '\u25BC' : '\u2500\u2500';
const velocityVariant = hasAccelerating ? 'error' as const : allDecelerating ? 'success' as const : 'muted' as const;
return [
{
label: 'Throughput',
value: formatThroughput(totalCount, windowSeconds),
trend: {
label: throughputTrend.label,
variant: throughputTrend.direction === 'up' ? 'success' as const : throughputTrend.direction === 'down' ? 'error' as const : 'muted' as const,
},
sparkline: throughputSparkline,
borderColor: 'var(--amber)',
},
{
label: 'Success Rate',
value: `${successRate.toFixed(2)}%`,
trend: {
label: successTrend.label,
variant: successTrend.direction === 'up' ? 'success' as const : successTrend.direction === 'down' ? 'error' as const : 'muted' as const,
},
borderColor: successRate >= 99 ? 'var(--success)' : successRate >= 95 ? 'var(--warning)' : 'var(--error)',
},
{
label: 'P99 Latency',
value: `${Math.round(p99Ms)}ms`,
trend: {
label: latencyTrend.label,
variant: latencyTrend.direction === 'up' ? 'error' as const : latencyTrend.direction === 'down' ? 'success' as const : 'muted' as const,
},
sparkline: latencySparkline,
borderColor: p99Ms > slaThresholdMs ? 'var(--error)' : 'var(--success)',
},
{
label: 'SLA Compliance',
value: formatSlaCompliance(slaCompliance),
trend: {
label: slaCompliance >= 99 ? 'OK' : 'BREACH',
variant: slaCompliance >= 99 ? 'success' as const : 'error' as const,
},
subtitle: `Threshold: ${slaThresholdMs}ms`,
borderColor: slaCompliance >= 99 ? 'var(--success)' : slaCompliance >= 95 ? 'var(--warning)' : 'var(--error)',
},
{
label: 'Error Velocity',
value: `${totalVelocity.toFixed(1)}/min`,
trend: {
label: velocityTrendLabel,
variant: velocityVariant,
},
subtitle: `${errorList.length} error type${errorList.length !== 1 ? 's' : ''} tracked`,
borderColor: hasAccelerating ? 'var(--error)' : allDecelerating ? 'var(--success)' : 'var(--text-muted)',
},
];
}
// ── Component ───────────────────────────────────────────────────────────────
export default function DashboardL2() {
const { appId } = useParams<{ appId: string }>();
const navigate = useNavigate();
const { timeRange } = useGlobalFilters();
const timeFrom = timeRange.start.toISOString();
const timeTo = timeRange.end.toISOString();
const windowSeconds = (timeRange.end.getTime() - timeRange.start.getTime()) / 1000;
// Data hooks
const { data: stats } = useExecutionStats(timeFrom, timeTo, undefined, appId);
const { data: timeseries } = useStatsTimeseries(timeFrom, timeTo, undefined, appId);
const { data: metrics } = useRouteMetrics(timeFrom, timeTo, appId);
const { data: timeseriesByRoute } = useTimeseriesByRoute(timeFrom, timeTo, appId);
const { data: errors } = useTopErrors(timeFrom, timeTo, appId);
const { data: appSettings } = useAppSettings(appId);
const slaThresholdMs = appSettings?.slaThresholdMs ?? 300;
// Route performance table rows
const routeRows: RouteRow[] = useMemo(() =>
(metrics || []).map((m: RouteMetrics) => {
const sla = m.p99DurationMs <= slaThresholdMs
? 99.9
: Math.max(0, 100 - ((m.p99DurationMs - slaThresholdMs) / slaThresholdMs) * 10);
return {
id: m.routeId,
routeId: m.routeId,
exchangeCount: m.exchangeCount,
successRate: m.successRate,
avgDurationMs: m.avgDurationMs,
p99DurationMs: m.p99DurationMs,
slaCompliance: sla,
sparkline: m.sparkline ?? [],
};
}),
[metrics, slaThresholdMs],
);
// KPI sparklines from timeseries
const throughputSparkline = useMemo(() =>
(timeseries?.buckets || []).map((b) => b.totalCount),
[timeseries],
);
const latencySparkline = useMemo(() =>
(timeseries?.buckets || []).map((b) => b.p99DurationMs),
[timeseries],
);
const kpiItems = useMemo(() =>
buildKpiItems(stats, slaThresholdMs, throughputSparkline, latencySparkline, errors, windowSeconds),
[stats, slaThresholdMs, throughputSparkline, latencySparkline, errors, windowSeconds],
);
// Throughput by Route — stacked area chart series
const throughputByRouteSeries = useMemo(() => {
if (!timeseriesByRoute) return [];
return Object.entries(timeseriesByRoute).map(([routeId, data]) => ({
label: routeId,
data: (data.buckets || []).map((b, i) => ({
x: i as number,
y: b.totalCount,
})),
}));
}, [timeseriesByRoute]);
// Latency percentiles chart — P99 line from app-level timeseries
const latencyChartSeries = useMemo(() => {
const buckets = timeseries?.buckets || [];
return [
{
label: 'P99',
data: buckets.map((b, i) => ({
x: i as number,
y: b.p99DurationMs,
})),
},
{
label: 'Avg',
data: buckets.map((b, i) => ({
x: i as number,
y: b.avgDurationMs,
})),
},
];
}, [timeseries]);
// Error rows with stable identity
const errorRows = useMemo(() =>
(errors ?? []).map((e, i) => ({ ...e, id: `${e.errorType}-${e.routeId}-${i}` })),
[errors],
);
return (
<div className={styles.content}>
<div className={styles.refreshIndicator}>
<span className={styles.refreshDot} />
<span className={styles.refreshText}>Auto-refresh: 30s</span>
</div>
{/* KPI Strip */}
<KpiStrip items={kpiItems} />
{/* Route Performance Table */}
<div className={styles.tableSection}>
<div className={styles.tableHeader}>
<span className={styles.tableTitle}>Route Performance</span>
<div className={styles.tableRight}>
<span className={styles.tableMeta}>{routeRows.length} routes</span>
<Badge label="LIVE" color="success" />
</div>
</div>
<DataTable
columns={ROUTE_COLUMNS}
data={routeRows}
sortable
onRowClick={(row) => navigate(`/dashboard/${appId}/${row.routeId}`)}
/>
</div>
{/* Charts: Throughput by Route + Latency Percentiles */}
{(timeseries?.buckets?.length ?? 0) > 0 && (
<div className={styles.chartGrid}>
<Card title="Throughput by Route">
<AreaChart
series={throughputByRouteSeries}
yLabel="msg/s"
stacked
height={200}
className={styles.chart}
/>
</Card>
<Card title="Latency Percentiles">
<LineChart
series={latencyChartSeries}
yLabel="ms"
threshold={{ value: slaThresholdMs, label: `SLA ${slaThresholdMs}ms` }}
height={200}
className={styles.chart}
/>
</Card>
</div>
)}
{/* Top 5 Errors — hidden when empty */}
{errorRows.length > 0 && (
<div className={styles.errorsSection}>
<div className={styles.tableHeader}>
<span className={styles.tableTitle}>Top Errors</span>
<span className={styles.tableMeta}>{errorRows.length} error types</span>
</div>
<DataTable
columns={ERROR_COLUMNS}
data={errorRows}
sortable
/>
</div>
)}
</div>
);
}

View File

@@ -0,0 +1,434 @@
import { useMemo } from 'react';
import { useParams } from 'react-router';
import {
KpiStrip,
DataTable,
AreaChart,
LineChart,
Card,
MonoText,
Badge,
} from '@cameleer/design-system';
import type { KpiItem, Column } from '@cameleer/design-system';
import { useGlobalFilters } from '@cameleer/design-system';
import { useExecutionStats, useStatsTimeseries } from '../../api/queries/executions';
import { useProcessorMetrics } from '../../api/queries/processor-metrics';
import { useTopErrors, useAppSettings } from '../../api/queries/dashboard';
import type { TopError } from '../../api/queries/dashboard';
import { useDiagramByRoute } from '../../api/queries/diagrams';
import { ProcessDiagram } from '../../components/ProcessDiagram';
import {
formatRelativeTime,
trendArrow,
formatThroughput,
formatSlaCompliance,
trendIndicator,
} from './dashboard-utils';
import styles from './DashboardTab.module.css';
// ── Row types ───────────────────────────────────────────────────────────────
interface ProcessorRow {
id: string;
processorId: string;
processorType: string;
totalCount: number;
avgDurationMs: number;
p99DurationMs: number;
errorRate: number;
pctTime: number;
}
interface ErrorRow extends TopError {
id: string;
}
// ── Processor table columns ─────────────────────────────────────────────────
const PROCESSOR_COLUMNS: Column<ProcessorRow>[] = [
{
key: 'processorId',
header: 'Processor ID',
sortable: true,
render: (_, row) => <MonoText size="sm">{row.processorId}</MonoText>,
},
{
key: 'processorType',
header: 'Type',
sortable: true,
render: (_, row) => <Badge label={row.processorType} color="muted" />,
},
{
key: 'totalCount',
header: 'Invocations',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{row.totalCount.toLocaleString()}</MonoText>
),
},
{
key: 'avgDurationMs',
header: 'Avg(ms)',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{Math.round(row.avgDurationMs)}</MonoText>
),
},
{
key: 'p99DurationMs',
header: 'P99(ms)',
sortable: true,
render: (_, row) => {
const cls = row.p99DurationMs > 300
? styles.rateBad
: row.p99DurationMs > 200
? styles.rateWarn
: styles.rateGood;
return <MonoText size="sm" className={cls}>{Math.round(row.p99DurationMs)}</MonoText>;
},
},
{
key: 'errorRate',
header: 'Error Rate(%)',
sortable: true,
render: (_, row) => {
const pct = row.errorRate * 100;
const cls = pct > 5 ? styles.rateBad : pct > 1 ? styles.rateWarn : styles.rateGood;
return <MonoText size="sm" className={cls}>{pct.toFixed(2)}%</MonoText>;
},
},
{
key: 'pctTime',
header: '% Time',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{row.pctTime.toFixed(1)}%</MonoText>
),
},
];
// ── Error table columns ─────────────────────────────────────────────────────
const ERROR_COLUMNS: Column<ErrorRow>[] = [
{
key: 'errorType',
header: 'Error Type',
sortable: true,
render: (_, row) => <MonoText size="sm">{row.errorType}</MonoText>,
},
{
key: 'processorId',
header: 'Processor',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{row.processorId ?? '\u2014'}</MonoText>
),
},
{
key: 'count',
header: 'Count',
sortable: true,
render: (_, row) => (
<MonoText size="sm">{row.count.toLocaleString()}</MonoText>
),
},
{
key: 'trend',
header: 'Velocity',
render: (_, row) => (
<span>{trendArrow(row.trend)} {row.trend}</span>
),
},
{
key: 'lastSeen',
header: 'Last Seen',
sortable: true,
render: (_, row) => (
<span>{formatRelativeTime(row.lastSeen)}</span>
),
},
];
// ── Build KPI items ─────────────────────────────────────────────────────────
function buildKpiItems(
stats: {
totalCount: number;
failedCount: number;
avgDurationMs: number;
p99LatencyMs: number;
activeCount: number;
prevTotalCount: number;
prevFailedCount: number;
prevP99LatencyMs: number;
} | undefined,
slaThresholdMs: number,
bottleneck: { processorId: string; avgMs: number; pct: number } | null,
throughputSparkline: number[],
windowSeconds: number,
): KpiItem[] {
const totalCount = stats?.totalCount ?? 0;
const failedCount = stats?.failedCount ?? 0;
const prevTotalCount = stats?.prevTotalCount ?? 0;
const p99Ms = stats?.p99LatencyMs ?? 0;
const avgMs = stats?.avgDurationMs ?? 0;
const successRate = totalCount > 0 ? ((totalCount - failedCount) / totalCount) * 100 : 100;
const slaCompliance = totalCount > 0
? ((totalCount - failedCount) / totalCount) * 100
: 100;
const throughputTrend = trendIndicator(totalCount, prevTotalCount);
return [
{
label: 'Throughput',
value: formatThroughput(totalCount, windowSeconds),
trend: {
label: throughputTrend.label,
variant: throughputTrend.direction === 'up' ? 'success' as const : throughputTrend.direction === 'down' ? 'error' as const : 'muted' as const,
},
subtitle: `${totalCount.toLocaleString()} total exchanges`,
sparkline: throughputSparkline,
borderColor: 'var(--amber)',
},
{
label: 'Success Rate',
value: `${successRate.toFixed(2)}%`,
trend: {
label: failedCount > 0 ? `${failedCount} failed` : 'No errors',
variant: successRate >= 99 ? 'success' as const : successRate >= 97 ? 'warning' as const : 'error' as const,
},
subtitle: `${totalCount - failedCount} succeeded / ${totalCount.toLocaleString()} total`,
borderColor: successRate >= 99 ? 'var(--success)' : 'var(--error)',
},
{
label: 'P99 Latency',
value: `${Math.round(p99Ms)}ms`,
trend: {
label: p99Ms > slaThresholdMs ? 'BREACH' : 'OK',
variant: p99Ms > slaThresholdMs ? 'error' as const : 'success' as const,
},
subtitle: `SLA threshold: ${slaThresholdMs}ms \u00B7 Avg: ${Math.round(avgMs)}ms`,
borderColor: p99Ms > slaThresholdMs ? 'var(--warning)' : 'var(--success)',
},
{
label: 'SLA Compliance',
value: formatSlaCompliance(slaCompliance),
trend: {
label: slaCompliance >= 99.9 ? 'Excellent' : slaCompliance >= 99 ? 'Good' : 'Degraded',
variant: slaCompliance >= 99 ? 'success' as const : slaCompliance >= 95 ? 'warning' as const : 'error' as const,
},
subtitle: `Target: 99.9%`,
borderColor: slaCompliance >= 99 ? 'var(--success)' : 'var(--warning)',
},
{
label: 'Bottleneck',
value: bottleneck ? `${Math.round(bottleneck.avgMs)}ms` : '\u2014',
trend: {
label: bottleneck ? `${bottleneck.pct.toFixed(1)}% of total` : '\u2014',
variant: bottleneck && bottleneck.pct > 50 ? 'error' as const : 'muted' as const,
},
subtitle: bottleneck
? `${bottleneck.processorId} \u00B7 ${Math.round(bottleneck.avgMs)}ms \u00B7 ${bottleneck.pct.toFixed(1)}% of total`
: 'No processor data',
borderColor: 'var(--running)',
},
];
}
// ── Component ───────────────────────────────────────────────────────────────
export default function DashboardL3() {
const { appId, routeId } = useParams<{ appId: string; routeId: string }>();
const { timeRange } = useGlobalFilters();
const timeFrom = timeRange.start.toISOString();
const timeTo = timeRange.end.toISOString();
const windowSeconds = (timeRange.end.getTime() - timeRange.start.getTime()) / 1000;
// ── Data hooks ──────────────────────────────────────────────────────────
const { data: stats } = useExecutionStats(timeFrom, timeTo, routeId, appId);
const { data: timeseries } = useStatsTimeseries(timeFrom, timeTo, routeId, appId);
const { data: processorMetrics } = useProcessorMetrics(routeId ?? null, appId);
const { data: topErrors } = useTopErrors(timeFrom, timeTo, appId, routeId);
const { data: diagramLayout } = useDiagramByRoute(appId, routeId);
const { data: appSettings } = useAppSettings(appId);
const slaThresholdMs = appSettings?.slaThresholdMs ?? 300;
// ── Bottleneck (processor with highest avgDurationMs) ───────────────────
const bottleneck = useMemo(() => {
if (!processorMetrics?.length) return null;
const routeAvg = stats?.avgDurationMs ?? 0;
const sorted = [...processorMetrics].sort(
(a: any, b: any) => b.avgDurationMs - a.avgDurationMs,
);
const top = sorted[0];
const pct = routeAvg > 0 ? (top.avgDurationMs / routeAvg) * 100 : 0;
return { processorId: top.processorId, avgMs: top.avgDurationMs, pct };
}, [processorMetrics, stats]);
// ── Sparklines from timeseries ──────────────────────────────────────────
const throughputSparkline = useMemo(
() => (timeseries?.buckets || []).map((b: any) => b.totalCount),
[timeseries],
);
// ── KPI strip ───────────────────────────────────────────────────────────
const kpiItems = useMemo(
() => buildKpiItems(stats, slaThresholdMs, bottleneck, throughputSparkline, windowSeconds),
[stats, slaThresholdMs, bottleneck, throughputSparkline, windowSeconds],
);
// ── Chart series ────────────────────────────────────────────────────────
const throughputChartSeries = useMemo(() => [{
label: 'Throughput',
data: (timeseries?.buckets || []).map((b: any, i: number) => ({
x: i,
y: b.totalCount,
})),
}], [timeseries]);
const latencyChartSeries = useMemo(() => [{
label: 'P99',
data: (timeseries?.buckets || []).map((b: any, i: number) => ({
x: i,
y: b.p99DurationMs,
})),
}], [timeseries]);
const errorRateChartSeries = useMemo(() => [{
label: 'Error Rate',
data: (timeseries?.buckets || []).map((b: any, i: number) => ({
x: i,
y: b.totalCount > 0 ? (b.failedCount / b.totalCount) * 100 : 0,
})),
color: 'var(--error)',
}], [timeseries]);
// ── Processor table rows ────────────────────────────────────────────────
const processorRows: ProcessorRow[] = useMemo(() => {
if (!processorMetrics?.length) return [];
const routeAvg = stats?.avgDurationMs ?? 0;
return processorMetrics.map((m: any) => ({
id: m.processorId,
processorId: m.processorId,
processorType: m.processorType,
totalCount: m.totalCount,
avgDurationMs: m.avgDurationMs,
p99DurationMs: m.p99DurationMs,
errorRate: m.errorRate,
pctTime: routeAvg > 0 ? (m.avgDurationMs / routeAvg) * 100 : 0,
}));
}, [processorMetrics, stats]);
// ── Latency heatmap for ProcessDiagram ──────────────────────────────────
const latencyHeatmap = useMemo(() => {
if (!processorMetrics?.length) return new Map();
const totalAvg = processorMetrics.reduce(
(sum: number, m: any) => sum + m.avgDurationMs, 0,
);
const map = new Map<string, { avgDurationMs: number; p99DurationMs: number; pctOfRoute: number }>();
for (const m of processorMetrics) {
map.set(m.processorId, {
avgDurationMs: m.avgDurationMs,
p99DurationMs: m.p99DurationMs,
pctOfRoute: totalAvg > 0 ? (m.avgDurationMs / totalAvg) * 100 : 0,
});
}
return map;
}, [processorMetrics]);
// ── Error table rows ────────────────────────────────────────────────────
const errorRows: ErrorRow[] = useMemo(
() => (topErrors || []).map((e, i) => ({ ...e, id: `${e.errorType}-${i}` })),
[topErrors],
);
return (
<div className={styles.content}>
<div className={styles.refreshIndicator}>
<span className={styles.refreshDot} />
<span className={styles.refreshText}>Auto-refresh: 30s</span>
</div>
{/* KPI Strip */}
<KpiStrip items={kpiItems} />
{/* Charts — 3 in a row */}
{(timeseries?.buckets?.length ?? 0) > 0 && (
<div className={styles.chartRow}>
<Card title="Throughput">
<AreaChart
series={throughputChartSeries}
yLabel="msg/s"
height={200}
/>
</Card>
<Card title="Latency Percentiles">
<LineChart
series={latencyChartSeries}
yLabel="ms"
threshold={{ value: slaThresholdMs, label: `SLA ${slaThresholdMs}ms` }}
height={200}
/>
</Card>
<Card title="Error Rate">
<AreaChart
series={errorRateChartSeries}
yLabel="%"
height={200}
/>
</Card>
</div>
)}
{/* Process Diagram with Latency Heatmap */}
{appId && routeId && (
<div className={styles.diagramSection}>
<ProcessDiagram
application={appId}
routeId={routeId}
diagramLayout={diagramLayout}
latencyHeatmap={latencyHeatmap}
/>
</div>
)}
{/* Processor Metrics Table */}
<div className={styles.tableSection}>
<div className={styles.tableHeader}>
<span className={styles.tableTitle}>Processor Metrics</span>
<div>
<span className={styles.tableMeta}>
{processorRows.length} processor{processorRows.length !== 1 ? 's' : ''}
</span>
</div>
</div>
<DataTable
columns={PROCESSOR_COLUMNS}
data={processorRows}
sortable
defaultSort={{ key: 'p99DurationMs', direction: 'desc' }}
/>
</div>
{/* Top 5 Errors — hidden if empty */}
{errorRows.length > 0 && (
<div className={styles.errorsSection}>
<div className={styles.tableHeader}>
<span className={styles.tableTitle}>Top 5 Errors</span>
<Badge label={`${errorRows.length}`} color="error" />
</div>
<DataTable
columns={ERROR_COLUMNS}
data={errorRows}
sortable
/>
</div>
)}
</div>
);
}

View File

@@ -2,16 +2,20 @@ import { useParams } from 'react-router';
import { lazy, Suspense } from 'react';
import { Spinner } from '@cameleer/design-system';
const RoutesMetrics = lazy(() => import('../Routes/RoutesMetrics'));
const RouteDetail = lazy(() => import('../Routes/RouteDetail'));
const DashboardL1 = lazy(() => import('./DashboardL1'));
const DashboardL2 = lazy(() => import('./DashboardL2'));
const DashboardL3 = lazy(() => import('./DashboardL3'));
const Fallback = <div style={{ display: 'flex', justifyContent: 'center', padding: '4rem' }}><Spinner size="lg" /></div>;
export default function DashboardPage() {
const { routeId } = useParams<{ appId?: string; routeId?: string }>();
const { appId, routeId } = useParams<{ appId?: string; routeId?: string }>();
if (routeId) {
return <Suspense fallback={Fallback}><RouteDetail /></Suspense>;
if (routeId && appId) {
return <Suspense fallback={Fallback}><DashboardL3 /></Suspense>;
}
return <Suspense fallback={Fallback}><RoutesMetrics /></Suspense>;
if (appId) {
return <Suspense fallback={Fallback}><DashboardL2 /></Suspense>;
}
return <Suspense fallback={Fallback}><DashboardL1 /></Suspense>;
}

View File

@@ -0,0 +1,133 @@
.content {
display: flex;
flex-direction: column;
gap: 20px;
}
.refreshIndicator {
display: flex;
align-items: center;
gap: 6px;
justify-content: flex-end;
}
.refreshDot {
width: 7px;
height: 7px;
border-radius: 50%;
background: var(--success);
box-shadow: 0 0 4px rgba(61, 124, 71, 0.5);
animation: pulse 2s ease-in-out infinite;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
.refreshText {
font-size: 11px;
color: var(--text-muted);
font-family: var(--font-mono);
}
/* Tables */
.tableSection {
background: var(--bg-surface);
border: 1px solid var(--border-subtle);
border-radius: var(--radius-lg);
box-shadow: var(--shadow-card);
overflow: hidden;
}
.tableHeader {
display: flex;
align-items: center;
justify-content: space-between;
padding: 12px 16px;
border-bottom: 1px solid var(--border-subtle);
}
.tableTitle {
font-size: 13px;
font-weight: 600;
color: var(--text-primary);
}
.tableMeta {
font-size: 11px;
color: var(--text-muted);
font-family: var(--font-mono);
}
/* Charts */
.chartGrid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 16px;
}
.chartRow {
display: grid;
grid-template-columns: 1fr 1fr 1fr;
gap: 16px;
}
/* Cells */
.monoCell {
font-size: 12px;
font-family: var(--font-mono);
color: var(--text-primary);
}
.appNameCell {
display: flex;
align-items: center;
gap: 8px;
font-size: 12px;
font-weight: 500;
color: var(--text-primary);
font-family: var(--font-mono);
cursor: pointer;
}
.appNameCell:hover {
text-decoration: underline;
}
/* Rate coloring */
.rateGood { color: var(--success); }
.rateWarn { color: var(--warning); }
.rateBad { color: var(--error); }
.rateNeutral { color: var(--text-secondary); }
/* Diagram container */
.diagramSection {
background: var(--bg-surface);
border: 1px solid var(--border-subtle);
border-radius: var(--radius-lg);
box-shadow: var(--shadow-card);
overflow: hidden;
height: 280px;
}
/* Table right side (meta + badge) */
.tableRight {
display: flex;
align-items: center;
gap: 10px;
}
/* Chart fill */
.chart {
width: 100%;
}
/* Errors section */
.errorsSection {
background: var(--bg-surface);
border: 1px solid var(--border-subtle);
border-radius: var(--radius-lg);
box-shadow: var(--shadow-card);
overflow: hidden;
}

View File

@@ -0,0 +1,70 @@
import type { AppSettings } from '../../api/queries/dashboard';
export type HealthStatus = 'success' | 'warning' | 'error';
const DEFAULT_SETTINGS: Pick<AppSettings, 'healthErrorWarn' | 'healthErrorCrit' | 'healthSlaWarn' | 'healthSlaCrit'> = {
healthErrorWarn: 1.0,
healthErrorCrit: 5.0,
healthSlaWarn: 99.0,
healthSlaCrit: 95.0,
};
export function computeHealthDot(
errorRate: number,
slaCompliance: number,
settings?: Partial<AppSettings> | null,
): HealthStatus {
const s = { ...DEFAULT_SETTINGS, ...settings };
const errorPct = errorRate * 100;
if (errorPct > s.healthErrorCrit || slaCompliance < s.healthSlaCrit) return 'error';
if (errorPct > s.healthErrorWarn || slaCompliance < s.healthSlaWarn) return 'warning';
return 'success';
}
export function formatThroughput(count: number, windowSeconds: number): string {
if (windowSeconds <= 0) return '0/s';
const tps = count / windowSeconds;
if (tps >= 1000) return `${(tps / 1000).toFixed(1)}k/s`;
if (tps >= 1) return `${tps.toFixed(0)}/s`;
return `${tps.toFixed(2)}/s`;
}
export function formatSlaCompliance(pct: number): string {
if (pct < 0) return '—';
return `${pct.toFixed(1)}%`;
}
export function trendIndicator(current: number, previous: number): { label: string; direction: 'up' | 'down' | 'flat' } {
if (previous === 0) return { label: '—', direction: 'flat' };
const delta = ((current - previous) / previous) * 100;
if (Math.abs(delta) < 0.5) return { label: '—', direction: 'flat' };
return {
label: `${delta > 0 ? '+' : ''}${delta.toFixed(1)}%`,
direction: delta > 0 ? 'up' : 'down',
};
}
export function trendArrow(trend: 'accelerating' | 'stable' | 'decelerating'): string {
switch (trend) {
case 'accelerating': return '\u25B2';
case 'decelerating': return '\u25BC';
default: return '\u2500\u2500';
}
}
export function formatDuration(ms: number): string {
if (ms < 1) return '<1ms';
if (ms < 1000) return `${Math.round(ms)}ms`;
return `${(ms / 1000).toFixed(2)}s`;
}
export function formatRelativeTime(isoString: string): string {
const diff = Date.now() - new Date(isoString).getTime();
const minutes = Math.floor(diff / 60_000);
if (minutes < 1) return 'just now';
if (minutes < 60) return `${minutes} min ago`;
const hours = Math.floor(minutes / 60);
if (hours < 24) return `${hours} hr ago`;
return `${Math.floor(hours / 24)} d ago`;
}