feat: progressive drill-down dashboard with RED metrics and SLA compliance (#94)
Three-level dashboard driven by sidebar selection: - L1 (no selection): all-apps overview with health table, per-app charts - L2 (app selected): route performance table, error velocity, top errors - L3 (route selected): processor table, latency heatmap data, bottleneck KPI Backend: 3 new endpoints (timeseries/by-app, timeseries/by-route, errors/top), per-app SLA settings (app_settings table, V12 migration), exact SLA compliance from executions hypertable, error velocity with acceleration detection. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,79 @@
|
||||
package com.cameleer3.server.app.controller;
|
||||
|
||||
import com.cameleer3.server.app.dto.AppSettingsRequest;
|
||||
import com.cameleer3.server.core.admin.AppSettings;
|
||||
import com.cameleer3.server.core.admin.AppSettingsRepository;
|
||||
import com.cameleer3.server.core.admin.AuditCategory;
|
||||
import com.cameleer3.server.core.admin.AuditResult;
|
||||
import com.cameleer3.server.core.admin.AuditService;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import jakarta.servlet.http.HttpServletRequest;
|
||||
import jakarta.validation.Valid;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.security.access.prepost.PreAuthorize;
|
||||
import org.springframework.web.bind.annotation.DeleteMapping;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.PutMapping;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.server.ResponseStatusException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/admin/app-settings")
|
||||
@PreAuthorize("hasAnyRole('ADMIN', 'OPERATOR')")
|
||||
@Tag(name = "App Settings", description = "Per-application dashboard settings (ADMIN/OPERATOR)")
|
||||
public class AppSettingsController {
|
||||
|
||||
private final AppSettingsRepository repository;
|
||||
private final AuditService auditService;
|
||||
|
||||
public AppSettingsController(AppSettingsRepository repository, AuditService auditService) {
|
||||
this.repository = repository;
|
||||
this.auditService = auditService;
|
||||
}
|
||||
|
||||
@GetMapping
|
||||
@Operation(summary = "List all application settings")
|
||||
public ResponseEntity<List<AppSettings>> getAll() {
|
||||
return ResponseEntity.ok(repository.findAll());
|
||||
}
|
||||
|
||||
@GetMapping("/{appId}")
|
||||
@Operation(summary = "Get settings for a specific application (returns defaults if not configured)")
|
||||
public ResponseEntity<AppSettings> getByAppId(@PathVariable String appId) {
|
||||
AppSettings settings = repository.findByAppId(appId).orElse(AppSettings.defaults(appId));
|
||||
return ResponseEntity.ok(settings);
|
||||
}
|
||||
|
||||
@PutMapping("/{appId}")
|
||||
@Operation(summary = "Create or update settings for an application")
|
||||
public ResponseEntity<AppSettings> update(@PathVariable String appId,
|
||||
@Valid @RequestBody AppSettingsRequest request,
|
||||
HttpServletRequest httpRequest) {
|
||||
List<String> errors = request.validate();
|
||||
if (!errors.isEmpty()) {
|
||||
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, String.join("; ", errors));
|
||||
}
|
||||
|
||||
AppSettings saved = repository.save(request.toSettings(appId));
|
||||
auditService.log("update_app_settings", AuditCategory.CONFIG, appId,
|
||||
Map.of("settings", saved), AuditResult.SUCCESS, httpRequest);
|
||||
return ResponseEntity.ok(saved);
|
||||
}
|
||||
|
||||
@DeleteMapping("/{appId}")
|
||||
@Operation(summary = "Delete application settings (reverts to defaults)")
|
||||
public ResponseEntity<Void> delete(@PathVariable String appId, HttpServletRequest httpRequest) {
|
||||
repository.delete(appId);
|
||||
auditService.log("delete_app_settings", AuditCategory.CONFIG, appId,
|
||||
Map.of(), AuditResult.SUCCESS, httpRequest);
|
||||
return ResponseEntity.noContent().build();
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,9 @@ package com.cameleer3.server.app.controller;
|
||||
|
||||
import com.cameleer3.server.app.dto.ProcessorMetrics;
|
||||
import com.cameleer3.server.app.dto.RouteMetrics;
|
||||
import com.cameleer3.server.core.admin.AppSettings;
|
||||
import com.cameleer3.server.core.admin.AppSettingsRepository;
|
||||
import com.cameleer3.server.core.storage.StatsStore;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.responses.ApiResponse;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
@@ -18,6 +21,7 @@ import java.time.Instant;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/routes")
|
||||
@@ -25,9 +29,14 @@ import java.util.List;
|
||||
public class RouteMetricsController {
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
private final StatsStore statsStore;
|
||||
private final AppSettingsRepository appSettingsRepository;
|
||||
|
||||
public RouteMetricsController(JdbcTemplate jdbc) {
|
||||
public RouteMetricsController(JdbcTemplate jdbc, StatsStore statsStore,
|
||||
AppSettingsRepository appSettingsRepository) {
|
||||
this.jdbc = jdbc;
|
||||
this.statsStore = statsStore;
|
||||
this.appSettingsRepository = appSettingsRepository;
|
||||
}
|
||||
|
||||
@GetMapping("/metrics")
|
||||
@@ -78,7 +87,7 @@ public class RouteMetricsController {
|
||||
|
||||
routeKeys.add(new RouteKey(applicationName, routeId));
|
||||
return new RouteMetrics(routeId, applicationName, total, successRate,
|
||||
avgDur, p99Dur, errorRate, tps, List.of());
|
||||
avgDur, p99Dur, errorRate, tps, List.of(), -1.0);
|
||||
}, params.toArray());
|
||||
|
||||
// Fetch sparklines (12 buckets over the time window)
|
||||
@@ -100,13 +109,34 @@ public class RouteMetricsController {
|
||||
m.appId(), m.routeId());
|
||||
metrics.set(i, new RouteMetrics(m.routeId(), m.appId(), m.exchangeCount(),
|
||||
m.successRate(), m.avgDurationMs(), m.p99DurationMs(),
|
||||
m.errorRate(), m.throughputPerSec(), sparkline));
|
||||
m.errorRate(), m.throughputPerSec(), sparkline, m.slaCompliance()));
|
||||
} catch (Exception e) {
|
||||
// Leave sparkline empty on error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Enrich with SLA compliance per route
|
||||
if (!metrics.isEmpty()) {
|
||||
// Determine SLA threshold (per-app or default)
|
||||
String effectiveAppId = appId != null ? appId : (metrics.isEmpty() ? null : metrics.get(0).appId());
|
||||
int threshold = appSettingsRepository.findByAppId(effectiveAppId != null ? effectiveAppId : "")
|
||||
.map(AppSettings::slaThresholdMs).orElse(300);
|
||||
|
||||
Map<String, long[]> slaCounts = statsStore.slaCountsByRoute(fromInstant, toInstant,
|
||||
effectiveAppId, threshold);
|
||||
|
||||
for (int i = 0; i < metrics.size(); i++) {
|
||||
RouteMetrics m = metrics.get(i);
|
||||
long[] counts = slaCounts.get(m.routeId());
|
||||
double sla = (counts != null && counts[1] > 0)
|
||||
? counts[0] * 100.0 / counts[1] : 100.0;
|
||||
metrics.set(i, new RouteMetrics(m.routeId(), m.appId(), m.exchangeCount(),
|
||||
m.successRate(), m.avgDurationMs(), m.p99DurationMs(),
|
||||
m.errorRate(), m.throughputPerSec(), m.sparkline(), sla));
|
||||
}
|
||||
}
|
||||
|
||||
return ResponseEntity.ok(metrics);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package com.cameleer3.server.app.controller;
|
||||
|
||||
import com.cameleer3.server.core.admin.AppSettings;
|
||||
import com.cameleer3.server.core.admin.AppSettingsRepository;
|
||||
import com.cameleer3.server.core.agent.AgentInfo;
|
||||
import com.cameleer3.server.core.agent.AgentRegistryService;
|
||||
import com.cameleer3.server.core.search.ExecutionStats;
|
||||
@@ -8,6 +10,7 @@ import com.cameleer3.server.core.search.SearchRequest;
|
||||
import com.cameleer3.server.core.search.SearchResult;
|
||||
import com.cameleer3.server.core.search.SearchService;
|
||||
import com.cameleer3.server.core.search.StatsTimeseries;
|
||||
import com.cameleer3.server.core.search.TopError;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
@@ -20,6 +23,7 @@ import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Search endpoints for querying route executions.
|
||||
@@ -34,10 +38,13 @@ public class SearchController {
|
||||
|
||||
private final SearchService searchService;
|
||||
private final AgentRegistryService registryService;
|
||||
private final AppSettingsRepository appSettingsRepository;
|
||||
|
||||
public SearchController(SearchService searchService, AgentRegistryService registryService) {
|
||||
public SearchController(SearchService searchService, AgentRegistryService registryService,
|
||||
AppSettingsRepository appSettingsRepository) {
|
||||
this.searchService = searchService;
|
||||
this.registryService = registryService;
|
||||
this.appSettingsRepository = appSettingsRepository;
|
||||
}
|
||||
|
||||
@GetMapping("/executions")
|
||||
@@ -87,21 +94,29 @@ public class SearchController {
|
||||
}
|
||||
|
||||
@GetMapping("/stats")
|
||||
@Operation(summary = "Aggregate execution stats (P99 latency, active count)")
|
||||
@Operation(summary = "Aggregate execution stats (P99 latency, active count, SLA compliance)")
|
||||
public ResponseEntity<ExecutionStats> stats(
|
||||
@RequestParam Instant from,
|
||||
@RequestParam(required = false) Instant to,
|
||||
@RequestParam(required = false) String routeId,
|
||||
@RequestParam(required = false) String application) {
|
||||
Instant end = to != null ? to : Instant.now();
|
||||
ExecutionStats stats;
|
||||
if (routeId == null && application == null) {
|
||||
return ResponseEntity.ok(searchService.stats(from, end));
|
||||
stats = searchService.stats(from, end);
|
||||
} else if (routeId == null) {
|
||||
stats = searchService.statsForApp(from, end, application);
|
||||
} else {
|
||||
List<String> agentIds = resolveApplicationToAgentIds(application);
|
||||
stats = searchService.stats(from, end, routeId, agentIds);
|
||||
}
|
||||
if (routeId == null) {
|
||||
return ResponseEntity.ok(searchService.statsForApp(from, end, application));
|
||||
}
|
||||
List<String> agentIds = resolveApplicationToAgentIds(application);
|
||||
return ResponseEntity.ok(searchService.stats(from, end, routeId, agentIds));
|
||||
|
||||
// Enrich with SLA compliance
|
||||
int threshold = appSettingsRepository
|
||||
.findByAppId(application != null ? application : "")
|
||||
.map(AppSettings::slaThresholdMs).orElse(300);
|
||||
double sla = searchService.slaCompliance(from, end, threshold, application, routeId);
|
||||
return ResponseEntity.ok(stats.withSlaCompliance(sla));
|
||||
}
|
||||
|
||||
@GetMapping("/stats/timeseries")
|
||||
@@ -126,6 +141,39 @@ public class SearchController {
|
||||
return ResponseEntity.ok(searchService.timeseries(from, end, buckets, routeId, agentIds));
|
||||
}
|
||||
|
||||
@GetMapping("/stats/timeseries/by-app")
|
||||
@Operation(summary = "Timeseries grouped by application")
|
||||
public ResponseEntity<Map<String, StatsTimeseries>> timeseriesByApp(
|
||||
@RequestParam Instant from,
|
||||
@RequestParam(required = false) Instant to,
|
||||
@RequestParam(defaultValue = "24") int buckets) {
|
||||
Instant end = to != null ? to : Instant.now();
|
||||
return ResponseEntity.ok(searchService.timeseriesGroupedByApp(from, end, buckets));
|
||||
}
|
||||
|
||||
@GetMapping("/stats/timeseries/by-route")
|
||||
@Operation(summary = "Timeseries grouped by route for an application")
|
||||
public ResponseEntity<Map<String, StatsTimeseries>> timeseriesByRoute(
|
||||
@RequestParam Instant from,
|
||||
@RequestParam(required = false) Instant to,
|
||||
@RequestParam(defaultValue = "24") int buckets,
|
||||
@RequestParam String application) {
|
||||
Instant end = to != null ? to : Instant.now();
|
||||
return ResponseEntity.ok(searchService.timeseriesGroupedByRoute(from, end, buckets, application));
|
||||
}
|
||||
|
||||
@GetMapping("/errors/top")
|
||||
@Operation(summary = "Top N errors with velocity trend")
|
||||
public ResponseEntity<List<TopError>> topErrors(
|
||||
@RequestParam Instant from,
|
||||
@RequestParam(required = false) Instant to,
|
||||
@RequestParam(required = false) String application,
|
||||
@RequestParam(required = false) String routeId,
|
||||
@RequestParam(defaultValue = "5") int limit) {
|
||||
Instant end = to != null ? to : Instant.now();
|
||||
return ResponseEntity.ok(searchService.topErrors(from, end, application, routeId, limit));
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve an application name to agent IDs.
|
||||
* Returns null if application is null/blank (no filtering).
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
package com.cameleer3.server.app.dto;
|
||||
|
||||
import com.cameleer3.server.core.admin.AppSettings;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.validation.constraints.Max;
|
||||
import jakarta.validation.constraints.Min;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Schema(description = "Per-application dashboard settings")
|
||||
public record AppSettingsRequest(
|
||||
@NotNull @Min(1)
|
||||
@Schema(description = "SLA duration threshold in milliseconds")
|
||||
Integer slaThresholdMs,
|
||||
|
||||
@NotNull @Min(0) @Max(100)
|
||||
@Schema(description = "Error rate % threshold for warning (yellow) health dot")
|
||||
Double healthErrorWarn,
|
||||
|
||||
@NotNull @Min(0) @Max(100)
|
||||
@Schema(description = "Error rate % threshold for critical (red) health dot")
|
||||
Double healthErrorCrit,
|
||||
|
||||
@NotNull @Min(0) @Max(100)
|
||||
@Schema(description = "SLA compliance % threshold for warning (yellow) health dot")
|
||||
Double healthSlaWarn,
|
||||
|
||||
@NotNull @Min(0) @Max(100)
|
||||
@Schema(description = "SLA compliance % threshold for critical (red) health dot")
|
||||
Double healthSlaCrit
|
||||
) {
|
||||
|
||||
public AppSettings toSettings(String appId) {
|
||||
Instant now = Instant.now();
|
||||
return new AppSettings(appId, slaThresholdMs, healthErrorWarn, healthErrorCrit,
|
||||
healthSlaWarn, healthSlaCrit, now, now);
|
||||
}
|
||||
|
||||
public List<String> validate() {
|
||||
List<String> errors = new ArrayList<>();
|
||||
if (healthErrorWarn != null && healthErrorCrit != null
|
||||
&& healthErrorWarn > healthErrorCrit) {
|
||||
errors.add("healthErrorWarn must be <= healthErrorCrit");
|
||||
}
|
||||
if (healthSlaWarn != null && healthSlaCrit != null
|
||||
&& healthSlaWarn < healthSlaCrit) {
|
||||
errors.add("healthSlaWarn must be >= healthSlaCrit (higher SLA = healthier)");
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
}
|
||||
@@ -15,5 +15,6 @@ public record RouteMetrics(
|
||||
@NotNull double p99DurationMs,
|
||||
@NotNull double errorRate,
|
||||
@NotNull double throughputPerSec,
|
||||
@NotNull List<Double> sparkline
|
||||
@NotNull List<Double> sparkline,
|
||||
double slaCompliance
|
||||
) {}
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.admin.AppSettings;
|
||||
import com.cameleer3.server.core.admin.AppSettingsRepository;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.jdbc.core.RowMapper;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
@Repository
|
||||
public class PostgresAppSettingsRepository implements AppSettingsRepository {
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
private static final RowMapper<AppSettings> ROW_MAPPER = (rs, rowNum) -> new AppSettings(
|
||||
rs.getString("app_id"),
|
||||
rs.getInt("sla_threshold_ms"),
|
||||
rs.getDouble("health_error_warn"),
|
||||
rs.getDouble("health_error_crit"),
|
||||
rs.getDouble("health_sla_warn"),
|
||||
rs.getDouble("health_sla_crit"),
|
||||
rs.getTimestamp("created_at").toInstant(),
|
||||
rs.getTimestamp("updated_at").toInstant());
|
||||
|
||||
public PostgresAppSettingsRepository(JdbcTemplate jdbc) {
|
||||
this.jdbc = jdbc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<AppSettings> findByAppId(String appId) {
|
||||
List<AppSettings> results = jdbc.query(
|
||||
"SELECT * FROM app_settings WHERE app_id = ?", ROW_MAPPER, appId);
|
||||
return results.isEmpty() ? Optional.empty() : Optional.of(results.get(0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AppSettings> findAll() {
|
||||
return jdbc.query("SELECT * FROM app_settings ORDER BY app_id", ROW_MAPPER);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AppSettings save(AppSettings settings) {
|
||||
jdbc.update("""
|
||||
INSERT INTO app_settings (app_id, sla_threshold_ms, health_error_warn,
|
||||
health_error_crit, health_sla_warn, health_sla_crit, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, now(), now())
|
||||
ON CONFLICT (app_id) DO UPDATE SET
|
||||
sla_threshold_ms = EXCLUDED.sla_threshold_ms,
|
||||
health_error_warn = EXCLUDED.health_error_warn,
|
||||
health_error_crit = EXCLUDED.health_error_crit,
|
||||
health_sla_warn = EXCLUDED.health_sla_warn,
|
||||
health_sla_crit = EXCLUDED.health_sla_crit,
|
||||
updated_at = now()
|
||||
""",
|
||||
settings.appId(), settings.slaThresholdMs(),
|
||||
settings.healthErrorWarn(), settings.healthErrorCrit(),
|
||||
settings.healthSlaWarn(), settings.healthSlaCrit());
|
||||
return findByAppId(settings.appId()).orElseThrow();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void delete(String appId) {
|
||||
jdbc.update("DELETE FROM app_settings WHERE app_id = ?", appId);
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@ package com.cameleer3.server.app.storage;
|
||||
import com.cameleer3.server.core.search.ExecutionStats;
|
||||
import com.cameleer3.server.core.search.StatsTimeseries;
|
||||
import com.cameleer3.server.core.search.StatsTimeseries.TimeseriesBucket;
|
||||
import com.cameleer3.server.core.search.TopError;
|
||||
import com.cameleer3.server.core.storage.StatsStore;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Repository;
|
||||
@@ -12,7 +13,9 @@ import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Repository
|
||||
public class PostgresStatsStore implements StatsStore {
|
||||
@@ -184,4 +187,216 @@ public class PostgresStatsStore implements StatsStore {
|
||||
|
||||
return new StatsTimeseries(buckets);
|
||||
}
|
||||
|
||||
// ── Grouped timeseries ────────────────────────────────────────────────
|
||||
|
||||
@Override
|
||||
public Map<String, StatsTimeseries> timeseriesGroupedByApp(Instant from, Instant to, int bucketCount) {
|
||||
return queryGroupedTimeseries("stats_1m_app", "application_name", from, to,
|
||||
bucketCount, List.of());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, StatsTimeseries> timeseriesGroupedByRoute(Instant from, Instant to,
|
||||
int bucketCount, String applicationName) {
|
||||
return queryGroupedTimeseries("stats_1m_route", "route_id", from, to,
|
||||
bucketCount, List.of(new Filter("application_name", applicationName)));
|
||||
}
|
||||
|
||||
private Map<String, StatsTimeseries> queryGroupedTimeseries(
|
||||
String view, String groupCol, Instant from, Instant to,
|
||||
int bucketCount, List<Filter> filters) {
|
||||
|
||||
long intervalSeconds = Duration.between(from, to).toSeconds() / Math.max(bucketCount, 1);
|
||||
if (intervalSeconds < 60) intervalSeconds = 60;
|
||||
|
||||
String sql = "SELECT time_bucket(? * INTERVAL '1 second', bucket) AS period, " +
|
||||
groupCol + " AS group_key, " +
|
||||
"COALESCE(SUM(total_count), 0) AS total_count, " +
|
||||
"COALESCE(SUM(failed_count), 0) AS failed_count, " +
|
||||
"CASE WHEN SUM(total_count) > 0 THEN SUM(duration_sum) / SUM(total_count) ELSE 0 END AS avg_duration, " +
|
||||
"COALESCE(MAX(p99_duration), 0) AS p99_duration, " +
|
||||
"COALESCE(SUM(running_count), 0) AS active_count " +
|
||||
"FROM " + view + " WHERE bucket >= ? AND bucket < ?";
|
||||
|
||||
List<Object> params = new ArrayList<>();
|
||||
params.add(intervalSeconds);
|
||||
params.add(Timestamp.from(from));
|
||||
params.add(Timestamp.from(to));
|
||||
for (Filter f : filters) {
|
||||
sql += " AND " + f.column() + " = ?";
|
||||
params.add(f.value());
|
||||
}
|
||||
sql += " GROUP BY period, group_key ORDER BY period, group_key";
|
||||
|
||||
Map<String, List<TimeseriesBucket>> grouped = new LinkedHashMap<>();
|
||||
jdbc.query(sql, (rs) -> {
|
||||
String key = rs.getString("group_key");
|
||||
TimeseriesBucket bucket = new TimeseriesBucket(
|
||||
rs.getTimestamp("period").toInstant(),
|
||||
rs.getLong("total_count"), rs.getLong("failed_count"),
|
||||
rs.getLong("avg_duration"), rs.getLong("p99_duration"),
|
||||
rs.getLong("active_count"));
|
||||
grouped.computeIfAbsent(key, k -> new ArrayList<>()).add(bucket);
|
||||
}, params.toArray());
|
||||
|
||||
Map<String, StatsTimeseries> result = new LinkedHashMap<>();
|
||||
grouped.forEach((key, buckets) -> result.put(key, new StatsTimeseries(buckets)));
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── SLA compliance ────────────────────────────────────────────────────
|
||||
|
||||
@Override
|
||||
public double slaCompliance(Instant from, Instant to, int thresholdMs,
|
||||
String applicationName, String routeId) {
|
||||
String sql = "SELECT " +
|
||||
"COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
|
||||
"COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " +
|
||||
"FROM executions WHERE start_time >= ? AND start_time < ?";
|
||||
|
||||
List<Object> params = new ArrayList<>();
|
||||
params.add(thresholdMs);
|
||||
params.add(Timestamp.from(from));
|
||||
params.add(Timestamp.from(to));
|
||||
if (applicationName != null) {
|
||||
sql += " AND application_name = ?";
|
||||
params.add(applicationName);
|
||||
}
|
||||
if (routeId != null) {
|
||||
sql += " AND route_id = ?";
|
||||
params.add(routeId);
|
||||
}
|
||||
|
||||
return jdbc.query(sql, (rs, rowNum) -> {
|
||||
long total = rs.getLong("total");
|
||||
if (total == 0) return 1.0;
|
||||
return rs.getLong("compliant") * 100.0 / total;
|
||||
}, params.toArray()).stream().findFirst().orElse(1.0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, long[]> slaCountsByApp(Instant from, Instant to, int defaultThresholdMs) {
|
||||
String sql = "SELECT application_name, " +
|
||||
"COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
|
||||
"COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " +
|
||||
"FROM executions WHERE start_time >= ? AND start_time < ? " +
|
||||
"GROUP BY application_name";
|
||||
|
||||
Map<String, long[]> result = new LinkedHashMap<>();
|
||||
jdbc.query(sql, (rs) -> {
|
||||
result.put(rs.getString("application_name"),
|
||||
new long[]{rs.getLong("compliant"), rs.getLong("total")});
|
||||
}, defaultThresholdMs, Timestamp.from(from), Timestamp.from(to));
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, long[]> slaCountsByRoute(Instant from, Instant to,
|
||||
String applicationName, int thresholdMs) {
|
||||
String sql = "SELECT route_id, " +
|
||||
"COUNT(*) FILTER (WHERE duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
|
||||
"COUNT(*) FILTER (WHERE status != 'RUNNING') AS total " +
|
||||
"FROM executions WHERE start_time >= ? AND start_time < ? " +
|
||||
"AND application_name = ? GROUP BY route_id";
|
||||
|
||||
Map<String, long[]> result = new LinkedHashMap<>();
|
||||
jdbc.query(sql, (rs) -> {
|
||||
result.put(rs.getString("route_id"),
|
||||
new long[]{rs.getLong("compliant"), rs.getLong("total")});
|
||||
}, thresholdMs, Timestamp.from(from), Timestamp.from(to), applicationName);
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── Top errors ────────────────────────────────────────────────────────
|
||||
|
||||
@Override
|
||||
public List<TopError> topErrors(Instant from, Instant to, String applicationName,
|
||||
String routeId, int limit) {
|
||||
StringBuilder where = new StringBuilder(
|
||||
"status = 'FAILED' AND start_time >= ? AND start_time < ?");
|
||||
List<Object> params = new ArrayList<>();
|
||||
params.add(Timestamp.from(from));
|
||||
params.add(Timestamp.from(to));
|
||||
if (applicationName != null) {
|
||||
where.append(" AND application_name = ?");
|
||||
params.add(applicationName);
|
||||
}
|
||||
|
||||
String table;
|
||||
String groupId;
|
||||
if (routeId != null) {
|
||||
// L3: attribute errors to processors
|
||||
table = "processor_executions";
|
||||
groupId = "processor_id";
|
||||
where.append(" AND route_id = ?");
|
||||
params.add(routeId);
|
||||
} else {
|
||||
// L1/L2: attribute errors to routes
|
||||
table = "executions";
|
||||
groupId = "route_id";
|
||||
}
|
||||
|
||||
Instant fiveMinAgo = Instant.now().minus(5, ChronoUnit.MINUTES);
|
||||
Instant tenMinAgo = Instant.now().minus(10, ChronoUnit.MINUTES);
|
||||
|
||||
String sql = "WITH counted AS (" +
|
||||
" SELECT COALESCE(error_type, LEFT(error_message, 200)) AS error_key, " +
|
||||
" " + groupId + " AS group_id, " +
|
||||
" COUNT(*) AS cnt, MAX(start_time) AS last_seen " +
|
||||
" FROM " + table + " WHERE " + where +
|
||||
" GROUP BY error_key, group_id ORDER BY cnt DESC LIMIT ?" +
|
||||
"), velocity AS (" +
|
||||
" SELECT COALESCE(error_type, LEFT(error_message, 200)) AS error_key, " +
|
||||
" COUNT(*) FILTER (WHERE start_time >= ?) AS recent_5m, " +
|
||||
" COUNT(*) FILTER (WHERE start_time >= ? AND start_time < ?) AS prev_5m " +
|
||||
" FROM " + table + " WHERE " + where +
|
||||
" GROUP BY error_key" +
|
||||
") SELECT c.error_key, c.group_id, c.cnt, c.last_seen, " +
|
||||
" COALESCE(v.recent_5m, 0) / 5.0 AS velocity, " +
|
||||
" CASE " +
|
||||
" WHEN COALESCE(v.recent_5m, 0) > COALESCE(v.prev_5m, 0) * 1.2 THEN 'accelerating' " +
|
||||
" WHEN COALESCE(v.recent_5m, 0) < COALESCE(v.prev_5m, 0) * 0.8 THEN 'decelerating' " +
|
||||
" ELSE 'stable' END AS trend " +
|
||||
"FROM counted c LEFT JOIN velocity v ON c.error_key = v.error_key " +
|
||||
"ORDER BY c.cnt DESC";
|
||||
|
||||
// Build full params: counted-where params + limit + velocity timestamps + velocity-where params
|
||||
List<Object> fullParams = new ArrayList<>(params);
|
||||
fullParams.add(limit);
|
||||
fullParams.add(Timestamp.from(fiveMinAgo));
|
||||
fullParams.add(Timestamp.from(tenMinAgo));
|
||||
fullParams.add(Timestamp.from(fiveMinAgo));
|
||||
fullParams.addAll(params); // same where clause for velocity CTE
|
||||
|
||||
return jdbc.query(sql, (rs, rowNum) -> {
|
||||
String errorKey = rs.getString("error_key");
|
||||
String gid = rs.getString("group_id");
|
||||
return new TopError(
|
||||
errorKey,
|
||||
routeId != null ? routeId : gid, // routeId
|
||||
routeId != null ? gid : null, // processorId (only at L3)
|
||||
rs.getLong("cnt"),
|
||||
rs.getDouble("velocity"),
|
||||
rs.getString("trend"),
|
||||
rs.getTimestamp("last_seen").toInstant());
|
||||
}, fullParams.toArray());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int activeErrorTypes(Instant from, Instant to, String applicationName) {
|
||||
String sql = "SELECT COUNT(DISTINCT COALESCE(error_type, LEFT(error_message, 200))) " +
|
||||
"FROM executions WHERE status = 'FAILED' AND start_time >= ? AND start_time < ?";
|
||||
|
||||
List<Object> params = new ArrayList<>();
|
||||
params.add(Timestamp.from(from));
|
||||
params.add(Timestamp.from(to));
|
||||
if (applicationName != null) {
|
||||
sql += " AND application_name = ?";
|
||||
params.add(applicationName);
|
||||
}
|
||||
|
||||
Integer count = jdbc.queryForObject(sql, Integer.class, params.toArray());
|
||||
return count != null ? count : 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
-- Per-application dashboard settings (SLA thresholds, health dot thresholds)
|
||||
CREATE TABLE app_settings (
|
||||
app_id TEXT PRIMARY KEY,
|
||||
sla_threshold_ms INTEGER NOT NULL DEFAULT 300,
|
||||
health_error_warn DOUBLE PRECISION NOT NULL DEFAULT 1.0,
|
||||
health_error_crit DOUBLE PRECISION NOT NULL DEFAULT 5.0,
|
||||
health_sla_warn DOUBLE PRECISION NOT NULL DEFAULT 99.0,
|
||||
health_sla_crit DOUBLE PRECISION NOT NULL DEFAULT 95.0,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
Reference in New Issue
Block a user