feat: progressive drill-down dashboard with RED metrics and SLA compliance (#94)

Three-level dashboard driven by sidebar selection:
- L1 (no selection): all-apps overview with health table, per-app charts
- L2 (app selected): route performance table, error velocity, top errors
- L3 (route selected): processor table, latency heatmap data, bottleneck KPI

Backend: 3 new endpoints (timeseries/by-app, timeseries/by-route, errors/top),
per-app SLA settings (app_settings table, V12 migration), exact SLA compliance
from executions hypertable, error velocity with acceleration detection.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-03-29 23:29:20 +02:00
parent b2ae37637d
commit 213aa86c47
21 changed files with 2293 additions and 19 deletions

View File

@@ -0,0 +1,79 @@
package com.cameleer3.server.app.controller;
import com.cameleer3.server.app.dto.AppSettingsRequest;
import com.cameleer3.server.core.admin.AppSettings;
import com.cameleer3.server.core.admin.AppSettingsRepository;
import com.cameleer3.server.core.admin.AuditCategory;
import com.cameleer3.server.core.admin.AuditResult;
import com.cameleer3.server.core.admin.AuditService;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.validation.Valid;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.web.bind.annotation.DeleteMapping;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PutMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.server.ResponseStatusException;
import java.util.List;
import java.util.Map;
@RestController
@RequestMapping("/api/v1/admin/app-settings")
@PreAuthorize("hasAnyRole('ADMIN', 'OPERATOR')")
@Tag(name = "App Settings", description = "Per-application dashboard settings (ADMIN/OPERATOR)")
public class AppSettingsController {
private final AppSettingsRepository repository;
private final AuditService auditService;
public AppSettingsController(AppSettingsRepository repository, AuditService auditService) {
this.repository = repository;
this.auditService = auditService;
}
@GetMapping
@Operation(summary = "List all application settings")
public ResponseEntity<List<AppSettings>> getAll() {
return ResponseEntity.ok(repository.findAll());
}
@GetMapping("/{appId}")
@Operation(summary = "Get settings for a specific application (returns defaults if not configured)")
public ResponseEntity<AppSettings> getByAppId(@PathVariable String appId) {
AppSettings settings = repository.findByAppId(appId).orElse(AppSettings.defaults(appId));
return ResponseEntity.ok(settings);
}
@PutMapping("/{appId}")
@Operation(summary = "Create or update settings for an application")
public ResponseEntity<AppSettings> update(@PathVariable String appId,
@Valid @RequestBody AppSettingsRequest request,
HttpServletRequest httpRequest) {
List<String> errors = request.validate();
if (!errors.isEmpty()) {
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, String.join("; ", errors));
}
AppSettings saved = repository.save(request.toSettings(appId));
auditService.log("update_app_settings", AuditCategory.CONFIG, appId,
Map.of("settings", saved), AuditResult.SUCCESS, httpRequest);
return ResponseEntity.ok(saved);
}
@DeleteMapping("/{appId}")
@Operation(summary = "Delete application settings (reverts to defaults)")
public ResponseEntity<Void> delete(@PathVariable String appId, HttpServletRequest httpRequest) {
repository.delete(appId);
auditService.log("delete_app_settings", AuditCategory.CONFIG, appId,
Map.of(), AuditResult.SUCCESS, httpRequest);
return ResponseEntity.noContent().build();
}
}

View File

@@ -2,6 +2,9 @@ package com.cameleer3.server.app.controller;
import com.cameleer3.server.app.dto.ProcessorMetrics;
import com.cameleer3.server.app.dto.RouteMetrics;
import com.cameleer3.server.core.admin.AppSettings;
import com.cameleer3.server.core.admin.AppSettingsRepository;
import com.cameleer3.server.core.storage.StatsStore;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import io.swagger.v3.oas.annotations.tags.Tag;
@@ -18,6 +21,7 @@ import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@RestController
@RequestMapping("/api/v1/routes")
@@ -25,9 +29,14 @@ import java.util.List;
public class RouteMetricsController {
private final JdbcTemplate jdbc;
private final StatsStore statsStore;
private final AppSettingsRepository appSettingsRepository;
public RouteMetricsController(JdbcTemplate jdbc) {
public RouteMetricsController(JdbcTemplate jdbc, StatsStore statsStore,
AppSettingsRepository appSettingsRepository) {
this.jdbc = jdbc;
this.statsStore = statsStore;
this.appSettingsRepository = appSettingsRepository;
}
@GetMapping("/metrics")
@@ -78,7 +87,7 @@ public class RouteMetricsController {
routeKeys.add(new RouteKey(applicationName, routeId));
return new RouteMetrics(routeId, applicationName, total, successRate,
avgDur, p99Dur, errorRate, tps, List.of());
avgDur, p99Dur, errorRate, tps, List.of(), -1.0);
}, params.toArray());
// Fetch sparklines (12 buckets over the time window)
@@ -100,13 +109,34 @@ public class RouteMetricsController {
m.appId(), m.routeId());
metrics.set(i, new RouteMetrics(m.routeId(), m.appId(), m.exchangeCount(),
m.successRate(), m.avgDurationMs(), m.p99DurationMs(),
m.errorRate(), m.throughputPerSec(), sparkline));
m.errorRate(), m.throughputPerSec(), sparkline, m.slaCompliance()));
} catch (Exception e) {
// Leave sparkline empty on error
}
}
}
// Enrich with SLA compliance per route
if (!metrics.isEmpty()) {
// Determine SLA threshold (per-app or default)
String effectiveAppId = appId != null ? appId : (metrics.isEmpty() ? null : metrics.get(0).appId());
int threshold = appSettingsRepository.findByAppId(effectiveAppId != null ? effectiveAppId : "")
.map(AppSettings::slaThresholdMs).orElse(300);
Map<String, long[]> slaCounts = statsStore.slaCountsByRoute(fromInstant, toInstant,
effectiveAppId, threshold);
for (int i = 0; i < metrics.size(); i++) {
RouteMetrics m = metrics.get(i);
long[] counts = slaCounts.get(m.routeId());
double sla = (counts != null && counts[1] > 0)
? counts[0] * 100.0 / counts[1] : 100.0;
metrics.set(i, new RouteMetrics(m.routeId(), m.appId(), m.exchangeCount(),
m.successRate(), m.avgDurationMs(), m.p99DurationMs(),
m.errorRate(), m.throughputPerSec(), m.sparkline(), sla));
}
}
return ResponseEntity.ok(metrics);
}

View File

@@ -1,5 +1,7 @@
package com.cameleer3.server.app.controller;
import com.cameleer3.server.core.admin.AppSettings;
import com.cameleer3.server.core.admin.AppSettingsRepository;
import com.cameleer3.server.core.agent.AgentInfo;
import com.cameleer3.server.core.agent.AgentRegistryService;
import com.cameleer3.server.core.search.ExecutionStats;
@@ -8,6 +10,7 @@ import com.cameleer3.server.core.search.SearchRequest;
import com.cameleer3.server.core.search.SearchResult;
import com.cameleer3.server.core.search.SearchService;
import com.cameleer3.server.core.search.StatsTimeseries;
import com.cameleer3.server.core.search.TopError;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import org.springframework.http.ResponseEntity;
@@ -20,6 +23,7 @@ import org.springframework.web.bind.annotation.RestController;
import java.time.Instant;
import java.util.List;
import java.util.Map;
/**
* Search endpoints for querying route executions.
@@ -34,10 +38,13 @@ public class SearchController {
private final SearchService searchService;
private final AgentRegistryService registryService;
private final AppSettingsRepository appSettingsRepository;
public SearchController(SearchService searchService, AgentRegistryService registryService) {
public SearchController(SearchService searchService, AgentRegistryService registryService,
AppSettingsRepository appSettingsRepository) {
this.searchService = searchService;
this.registryService = registryService;
this.appSettingsRepository = appSettingsRepository;
}
@GetMapping("/executions")
@@ -87,21 +94,29 @@ public class SearchController {
}
@GetMapping("/stats")
@Operation(summary = "Aggregate execution stats (P99 latency, active count)")
@Operation(summary = "Aggregate execution stats (P99 latency, active count, SLA compliance)")
public ResponseEntity<ExecutionStats> stats(
@RequestParam Instant from,
@RequestParam(required = false) Instant to,
@RequestParam(required = false) String routeId,
@RequestParam(required = false) String application) {
Instant end = to != null ? to : Instant.now();
ExecutionStats stats;
if (routeId == null && application == null) {
return ResponseEntity.ok(searchService.stats(from, end));
stats = searchService.stats(from, end);
} else if (routeId == null) {
stats = searchService.statsForApp(from, end, application);
} else {
List<String> agentIds = resolveApplicationToAgentIds(application);
stats = searchService.stats(from, end, routeId, agentIds);
}
if (routeId == null) {
return ResponseEntity.ok(searchService.statsForApp(from, end, application));
}
List<String> agentIds = resolveApplicationToAgentIds(application);
return ResponseEntity.ok(searchService.stats(from, end, routeId, agentIds));
// Enrich with SLA compliance
int threshold = appSettingsRepository
.findByAppId(application != null ? application : "")
.map(AppSettings::slaThresholdMs).orElse(300);
double sla = searchService.slaCompliance(from, end, threshold, application, routeId);
return ResponseEntity.ok(stats.withSlaCompliance(sla));
}
@GetMapping("/stats/timeseries")
@@ -126,6 +141,39 @@ public class SearchController {
return ResponseEntity.ok(searchService.timeseries(from, end, buckets, routeId, agentIds));
}
@GetMapping("/stats/timeseries/by-app")
@Operation(summary = "Timeseries grouped by application")
public ResponseEntity<Map<String, StatsTimeseries>> timeseriesByApp(
@RequestParam Instant from,
@RequestParam(required = false) Instant to,
@RequestParam(defaultValue = "24") int buckets) {
Instant end = to != null ? to : Instant.now();
return ResponseEntity.ok(searchService.timeseriesGroupedByApp(from, end, buckets));
}
@GetMapping("/stats/timeseries/by-route")
@Operation(summary = "Timeseries grouped by route for an application")
public ResponseEntity<Map<String, StatsTimeseries>> timeseriesByRoute(
@RequestParam Instant from,
@RequestParam(required = false) Instant to,
@RequestParam(defaultValue = "24") int buckets,
@RequestParam String application) {
Instant end = to != null ? to : Instant.now();
return ResponseEntity.ok(searchService.timeseriesGroupedByRoute(from, end, buckets, application));
}
@GetMapping("/errors/top")
@Operation(summary = "Top N errors with velocity trend")
public ResponseEntity<List<TopError>> topErrors(
@RequestParam Instant from,
@RequestParam(required = false) Instant to,
@RequestParam(required = false) String application,
@RequestParam(required = false) String routeId,
@RequestParam(defaultValue = "5") int limit) {
Instant end = to != null ? to : Instant.now();
return ResponseEntity.ok(searchService.topErrors(from, end, application, routeId, limit));
}
/**
* Resolve an application name to agent IDs.
* Returns null if application is null/blank (no filtering).

View File

@@ -0,0 +1,54 @@
package com.cameleer3.server.app.dto;
import com.cameleer3.server.core.admin.AppSettings;
import io.swagger.v3.oas.annotations.media.Schema;
import jakarta.validation.constraints.Max;
import jakarta.validation.constraints.Min;
import jakarta.validation.constraints.NotNull;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
@Schema(description = "Per-application dashboard settings")
public record AppSettingsRequest(
@NotNull @Min(1)
@Schema(description = "SLA duration threshold in milliseconds")
Integer slaThresholdMs,
@NotNull @Min(0) @Max(100)
@Schema(description = "Error rate % threshold for warning (yellow) health dot")
Double healthErrorWarn,
@NotNull @Min(0) @Max(100)
@Schema(description = "Error rate % threshold for critical (red) health dot")
Double healthErrorCrit,
@NotNull @Min(0) @Max(100)
@Schema(description = "SLA compliance % threshold for warning (yellow) health dot")
Double healthSlaWarn,
@NotNull @Min(0) @Max(100)
@Schema(description = "SLA compliance % threshold for critical (red) health dot")
Double healthSlaCrit
) {
public AppSettings toSettings(String appId) {
Instant now = Instant.now();
return new AppSettings(appId, slaThresholdMs, healthErrorWarn, healthErrorCrit,
healthSlaWarn, healthSlaCrit, now, now);
}
public List<String> validate() {
List<String> errors = new ArrayList<>();
if (healthErrorWarn != null && healthErrorCrit != null
&& healthErrorWarn > healthErrorCrit) {
errors.add("healthErrorWarn must be <= healthErrorCrit");
}
if (healthSlaWarn != null && healthSlaCrit != null
&& healthSlaWarn < healthSlaCrit) {
errors.add("healthSlaWarn must be >= healthSlaCrit (higher SLA = healthier)");
}
return errors;
}
}

View File

@@ -15,5 +15,6 @@ public record RouteMetrics(
@NotNull double p99DurationMs,
@NotNull double errorRate,
@NotNull double throughputPerSec,
@NotNull List<Double> sparkline
@NotNull List<Double> sparkline,
double slaCompliance
) {}

View File

@@ -0,0 +1,67 @@
package com.cameleer3.server.app.storage;
import com.cameleer3.server.core.admin.AppSettings;
import com.cameleer3.server.core.admin.AppSettingsRepository;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowMapper;
import org.springframework.stereotype.Repository;
import java.util.List;
import java.util.Optional;
@Repository
public class PostgresAppSettingsRepository implements AppSettingsRepository {
private final JdbcTemplate jdbc;
private static final RowMapper<AppSettings> ROW_MAPPER = (rs, rowNum) -> new AppSettings(
rs.getString("app_id"),
rs.getInt("sla_threshold_ms"),
rs.getDouble("health_error_warn"),
rs.getDouble("health_error_crit"),
rs.getDouble("health_sla_warn"),
rs.getDouble("health_sla_crit"),
rs.getTimestamp("created_at").toInstant(),
rs.getTimestamp("updated_at").toInstant());
public PostgresAppSettingsRepository(JdbcTemplate jdbc) {
this.jdbc = jdbc;
}
@Override
public Optional<AppSettings> findByAppId(String appId) {
List<AppSettings> results = jdbc.query(
"SELECT * FROM app_settings WHERE app_id = ?", ROW_MAPPER, appId);
return results.isEmpty() ? Optional.empty() : Optional.of(results.get(0));
}
@Override
public List<AppSettings> findAll() {
return jdbc.query("SELECT * FROM app_settings ORDER BY app_id", ROW_MAPPER);
}
@Override
public AppSettings save(AppSettings settings) {
jdbc.update("""
INSERT INTO app_settings (app_id, sla_threshold_ms, health_error_warn,
health_error_crit, health_sla_warn, health_sla_crit, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, now(), now())
ON CONFLICT (app_id) DO UPDATE SET
sla_threshold_ms = EXCLUDED.sla_threshold_ms,
health_error_warn = EXCLUDED.health_error_warn,
health_error_crit = EXCLUDED.health_error_crit,
health_sla_warn = EXCLUDED.health_sla_warn,
health_sla_crit = EXCLUDED.health_sla_crit,
updated_at = now()
""",
settings.appId(), settings.slaThresholdMs(),
settings.healthErrorWarn(), settings.healthErrorCrit(),
settings.healthSlaWarn(), settings.healthSlaCrit());
return findByAppId(settings.appId()).orElseThrow();
}
@Override
public void delete(String appId) {
jdbc.update("DELETE FROM app_settings WHERE app_id = ?", appId);
}
}

View File

@@ -3,6 +3,7 @@ package com.cameleer3.server.app.storage;
import com.cameleer3.server.core.search.ExecutionStats;
import com.cameleer3.server.core.search.StatsTimeseries;
import com.cameleer3.server.core.search.StatsTimeseries.TimeseriesBucket;
import com.cameleer3.server.core.search.TopError;
import com.cameleer3.server.core.storage.StatsStore;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Repository;
@@ -12,7 +13,9 @@ import java.time.Duration;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@Repository
public class PostgresStatsStore implements StatsStore {
@@ -184,4 +187,216 @@ public class PostgresStatsStore implements StatsStore {
return new StatsTimeseries(buckets);
}
// ── Grouped timeseries ────────────────────────────────────────────────
@Override
public Map<String, StatsTimeseries> timeseriesGroupedByApp(Instant from, Instant to, int bucketCount) {
return queryGroupedTimeseries("stats_1m_app", "application_name", from, to,
bucketCount, List.of());
}
@Override
public Map<String, StatsTimeseries> timeseriesGroupedByRoute(Instant from, Instant to,
int bucketCount, String applicationName) {
return queryGroupedTimeseries("stats_1m_route", "route_id", from, to,
bucketCount, List.of(new Filter("application_name", applicationName)));
}
private Map<String, StatsTimeseries> queryGroupedTimeseries(
String view, String groupCol, Instant from, Instant to,
int bucketCount, List<Filter> filters) {
long intervalSeconds = Duration.between(from, to).toSeconds() / Math.max(bucketCount, 1);
if (intervalSeconds < 60) intervalSeconds = 60;
String sql = "SELECT time_bucket(? * INTERVAL '1 second', bucket) AS period, " +
groupCol + " AS group_key, " +
"COALESCE(SUM(total_count), 0) AS total_count, " +
"COALESCE(SUM(failed_count), 0) AS failed_count, " +
"CASE WHEN SUM(total_count) > 0 THEN SUM(duration_sum) / SUM(total_count) ELSE 0 END AS avg_duration, " +
"COALESCE(MAX(p99_duration), 0) AS p99_duration, " +
"COALESCE(SUM(running_count), 0) AS active_count " +
"FROM " + view + " WHERE bucket >= ? AND bucket < ?";
List<Object> params = new ArrayList<>();
params.add(intervalSeconds);
params.add(Timestamp.from(from));
params.add(Timestamp.from(to));
for (Filter f : filters) {
sql += " AND " + f.column() + " = ?";
params.add(f.value());
}
sql += " GROUP BY period, group_key ORDER BY period, group_key";
Map<String, List<TimeseriesBucket>> grouped = new LinkedHashMap<>();
jdbc.query(sql, (rs) -> {
String key = rs.getString("group_key");
TimeseriesBucket bucket = new TimeseriesBucket(
rs.getTimestamp("period").toInstant(),
rs.getLong("total_count"), rs.getLong("failed_count"),
rs.getLong("avg_duration"), rs.getLong("p99_duration"),
rs.getLong("active_count"));
grouped.computeIfAbsent(key, k -> new ArrayList<>()).add(bucket);
}, params.toArray());
Map<String, StatsTimeseries> result = new LinkedHashMap<>();
grouped.forEach((key, buckets) -> result.put(key, new StatsTimeseries(buckets)));
return result;
}
// ── SLA compliance ────────────────────────────────────────────────────
@Override
public double slaCompliance(Instant from, Instant to, int thresholdMs,
String applicationName, String routeId) {
String sql = "SELECT " +
"COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
"COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " +
"FROM executions WHERE start_time >= ? AND start_time < ?";
List<Object> params = new ArrayList<>();
params.add(thresholdMs);
params.add(Timestamp.from(from));
params.add(Timestamp.from(to));
if (applicationName != null) {
sql += " AND application_name = ?";
params.add(applicationName);
}
if (routeId != null) {
sql += " AND route_id = ?";
params.add(routeId);
}
return jdbc.query(sql, (rs, rowNum) -> {
long total = rs.getLong("total");
if (total == 0) return 1.0;
return rs.getLong("compliant") * 100.0 / total;
}, params.toArray()).stream().findFirst().orElse(1.0);
}
@Override
public Map<String, long[]> slaCountsByApp(Instant from, Instant to, int defaultThresholdMs) {
String sql = "SELECT application_name, " +
"COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
"COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " +
"FROM executions WHERE start_time >= ? AND start_time < ? " +
"GROUP BY application_name";
Map<String, long[]> result = new LinkedHashMap<>();
jdbc.query(sql, (rs) -> {
result.put(rs.getString("application_name"),
new long[]{rs.getLong("compliant"), rs.getLong("total")});
}, defaultThresholdMs, Timestamp.from(from), Timestamp.from(to));
return result;
}
@Override
public Map<String, long[]> slaCountsByRoute(Instant from, Instant to,
String applicationName, int thresholdMs) {
String sql = "SELECT route_id, " +
"COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
"COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " +
"FROM executions WHERE start_time >= ? AND start_time < ? " +
"AND application_name = ? GROUP BY route_id";
Map<String, long[]> result = new LinkedHashMap<>();
jdbc.query(sql, (rs) -> {
result.put(rs.getString("route_id"),
new long[]{rs.getLong("compliant"), rs.getLong("total")});
}, thresholdMs, Timestamp.from(from), Timestamp.from(to), applicationName);
return result;
}
// ── Top errors ────────────────────────────────────────────────────────
@Override
public List<TopError> topErrors(Instant from, Instant to, String applicationName,
String routeId, int limit) {
StringBuilder where = new StringBuilder(
"status = 'FAILED' AND start_time >= ? AND start_time < ?");
List<Object> params = new ArrayList<>();
params.add(Timestamp.from(from));
params.add(Timestamp.from(to));
if (applicationName != null) {
where.append(" AND application_name = ?");
params.add(applicationName);
}
String table;
String groupId;
if (routeId != null) {
// L3: attribute errors to processors
table = "processor_executions";
groupId = "processor_id";
where.append(" AND route_id = ?");
params.add(routeId);
} else {
// L1/L2: attribute errors to routes
table = "executions";
groupId = "route_id";
}
Instant fiveMinAgo = Instant.now().minus(5, ChronoUnit.MINUTES);
Instant tenMinAgo = Instant.now().minus(10, ChronoUnit.MINUTES);
String sql = "WITH counted AS (" +
" SELECT COALESCE(error_type, LEFT(error_message, 200)) AS error_key, " +
" " + groupId + " AS group_id, " +
" COUNT(*) AS cnt, MAX(start_time) AS last_seen " +
" FROM " + table + " WHERE " + where +
" GROUP BY error_key, group_id ORDER BY cnt DESC LIMIT ?" +
"), velocity AS (" +
" SELECT COALESCE(error_type, LEFT(error_message, 200)) AS error_key, " +
" COUNT(*) FILTER (WHERE start_time >= ?) AS recent_5m, " +
" COUNT(*) FILTER (WHERE start_time >= ? AND start_time < ?) AS prev_5m " +
" FROM " + table + " WHERE " + where +
" GROUP BY error_key" +
") SELECT c.error_key, c.group_id, c.cnt, c.last_seen, " +
" COALESCE(v.recent_5m, 0) / 5.0 AS velocity, " +
" CASE " +
" WHEN COALESCE(v.recent_5m, 0) > COALESCE(v.prev_5m, 0) * 1.2 THEN 'accelerating' " +
" WHEN COALESCE(v.recent_5m, 0) < COALESCE(v.prev_5m, 0) * 0.8 THEN 'decelerating' " +
" ELSE 'stable' END AS trend " +
"FROM counted c LEFT JOIN velocity v ON c.error_key = v.error_key " +
"ORDER BY c.cnt DESC";
// Build full params: counted-where params + limit + velocity timestamps + velocity-where params
List<Object> fullParams = new ArrayList<>(params);
fullParams.add(limit);
fullParams.add(Timestamp.from(fiveMinAgo));
fullParams.add(Timestamp.from(tenMinAgo));
fullParams.add(Timestamp.from(fiveMinAgo));
fullParams.addAll(params); // same where clause for velocity CTE
return jdbc.query(sql, (rs, rowNum) -> {
String errorKey = rs.getString("error_key");
String gid = rs.getString("group_id");
return new TopError(
errorKey,
routeId != null ? routeId : gid, // routeId
routeId != null ? gid : null, // processorId (only at L3)
rs.getLong("cnt"),
rs.getDouble("velocity"),
rs.getString("trend"),
rs.getTimestamp("last_seen").toInstant());
}, fullParams.toArray());
}
@Override
public int activeErrorTypes(Instant from, Instant to, String applicationName) {
String sql = "SELECT COUNT(DISTINCT COALESCE(error_type, LEFT(error_message, 200))) " +
"FROM executions WHERE status = 'FAILED' AND start_time >= ? AND start_time < ?";
List<Object> params = new ArrayList<>();
params.add(Timestamp.from(from));
params.add(Timestamp.from(to));
if (applicationName != null) {
sql += " AND application_name = ?";
params.add(applicationName);
}
Integer count = jdbc.queryForObject(sql, Integer.class, params.toArray());
return count != null ? count : 0;
}
}

View File

@@ -0,0 +1,11 @@
-- Per-application dashboard settings (SLA thresholds, health dot thresholds)
CREATE TABLE app_settings (
app_id TEXT PRIMARY KEY,
sla_threshold_ms INTEGER NOT NULL DEFAULT 300,
health_error_warn DOUBLE PRECISION NOT NULL DEFAULT 1.0,
health_error_crit DOUBLE PRECISION NOT NULL DEFAULT 5.0,
health_sla_warn DOUBLE PRECISION NOT NULL DEFAULT 99.0,
health_sla_crit DOUBLE PRECISION NOT NULL DEFAULT 95.0,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);