From 657dc2d407318bcb9c4a7e63649e237e1b874ce9 Mon Sep 17 00:00:00 2001 From: hsiegeln <37154749+hsiegeln@users.noreply.github.com> Date: Sun, 19 Apr 2026 19:58:12 +0200 Subject: [PATCH] feat(alerting): AlertingProperties + AlertStateTransitions state machine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - AlertingProperties @ConfigurationProperties with effective*() accessors and 5000 ms floor clamp on evaluatorTickIntervalMs; warn logged at startup - AlertStateTransitions pure static state machine: Clear/Firing/Batch/Error branches, PENDING→FIRING promotion on forDuration elapsed; Batch delegated to job - AlertInstance wither helpers: withState, withFiredAt, withResolvedAt, withAck, withSilenced, withTitleMessage, withLastNotifiedAt, withContext - AlertingBeanConfig gains @EnableConfigurationProperties(AlertingProperties), alertingInstanceId bean (hostname:pid), alertingClock bean, PerKindCircuitBreaker bean wired from props - 12 unit tests in AlertStateTransitionsTest covering all transitions Co-Authored-By: Claude Sonnet 4.6 --- .../alerting/config/AlertingBeanConfig.java | 39 ++++ .../alerting/config/AlertingProperties.java | 73 ++++++++ .../alerting/eval/AlertStateTransitions.java | 123 +++++++++++++ .../eval/AlertStateTransitionsTest.java | 168 ++++++++++++++++++ .../server/core/alerting/AlertInstance.java | 58 ++++++ 5 files changed, 461 insertions(+) create mode 100644 cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/config/AlertingProperties.java create mode 100644 cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/eval/AlertStateTransitions.java create mode 100644 cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/eval/AlertStateTransitionsTest.java diff --git a/cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/config/AlertingBeanConfig.java b/cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/config/AlertingBeanConfig.java index 55ef6537..f41e0e58 100644 --- a/cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/config/AlertingBeanConfig.java +++ b/cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/config/AlertingBeanConfig.java @@ -1,15 +1,25 @@ package com.cameleer.server.app.alerting.config; +import com.cameleer.server.app.alerting.eval.PerKindCircuitBreaker; import com.cameleer.server.app.alerting.storage.*; import com.cameleer.server.core.alerting.*; import com.fasterxml.jackson.databind.ObjectMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.jdbc.core.JdbcTemplate; +import java.net.InetAddress; +import java.time.Clock; + @Configuration +@EnableConfigurationProperties(AlertingProperties.class) public class AlertingBeanConfig { + private static final Logger log = LoggerFactory.getLogger(AlertingBeanConfig.class); + @Bean public AlertRuleRepository alertRuleRepository(JdbcTemplate jdbc, ObjectMapper om) { return new PostgresAlertRuleRepository(jdbc, om); @@ -34,4 +44,33 @@ public class AlertingBeanConfig { public AlertReadRepository alertReadRepository(JdbcTemplate jdbc) { return new PostgresAlertReadRepository(jdbc); } + + @Bean + public Clock alertingClock() { + return Clock.systemDefaultZone(); + } + + @Bean("alertingInstanceId") + public String alertingInstanceId() { + String hostname; + try { + hostname = InetAddress.getLocalHost().getHostName(); + } catch (Exception e) { + hostname = "unknown"; + } + return hostname + ":" + ProcessHandle.current().pid(); + } + + @Bean + public PerKindCircuitBreaker perKindCircuitBreaker(AlertingProperties props) { + if (props.evaluatorTickIntervalMs() != null + && props.evaluatorTickIntervalMs() < 5000) { + log.warn("cameleer.server.alerting.evaluatorTickIntervalMs={} is below the 5000 ms floor; clamping to 5000 ms", + props.evaluatorTickIntervalMs()); + } + return new PerKindCircuitBreaker( + props.cbFailThreshold(), + props.cbWindowSeconds(), + props.cbCooldownSeconds()); + } } diff --git a/cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/config/AlertingProperties.java b/cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/config/AlertingProperties.java new file mode 100644 index 00000000..66c74803 --- /dev/null +++ b/cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/config/AlertingProperties.java @@ -0,0 +1,73 @@ +package com.cameleer.server.app.alerting.config; + +import org.springframework.boot.context.properties.ConfigurationProperties; + +@ConfigurationProperties("cameleer.server.alerting") +public record AlertingProperties( + Integer evaluatorTickIntervalMs, + Integer evaluatorBatchSize, + Integer claimTtlSeconds, + Integer notificationTickIntervalMs, + Integer notificationBatchSize, + Boolean inTickCacheEnabled, + Integer circuitBreakerFailThreshold, + Integer circuitBreakerWindowSeconds, + Integer circuitBreakerCooldownSeconds, + Integer eventRetentionDays, + Integer notificationRetentionDays, + Integer webhookTimeoutMs, + Integer webhookMaxAttempts) { + + public int effectiveEvaluatorTickIntervalMs() { + int raw = evaluatorTickIntervalMs == null ? 5000 : evaluatorTickIntervalMs; + return Math.max(5000, raw); // floor: no faster than 5 s + } + + public int effectiveEvaluatorBatchSize() { + return evaluatorBatchSize == null ? 20 : evaluatorBatchSize; + } + + public int effectiveClaimTtlSeconds() { + return claimTtlSeconds == null ? 30 : claimTtlSeconds; + } + + public int effectiveNotificationTickIntervalMs() { + return notificationTickIntervalMs == null ? 5000 : notificationTickIntervalMs; + } + + public int effectiveNotificationBatchSize() { + return notificationBatchSize == null ? 50 : notificationBatchSize; + } + + public boolean effectiveInTickCacheEnabled() { + return inTickCacheEnabled == null || inTickCacheEnabled; + } + + public int effectiveEventRetentionDays() { + return eventRetentionDays == null ? 90 : eventRetentionDays; + } + + public int effectiveNotificationRetentionDays() { + return notificationRetentionDays == null ? 30 : notificationRetentionDays; + } + + public int effectiveWebhookTimeoutMs() { + return webhookTimeoutMs == null ? 5000 : webhookTimeoutMs; + } + + public int effectiveWebhookMaxAttempts() { + return webhookMaxAttempts == null ? 3 : webhookMaxAttempts; + } + + public int cbFailThreshold() { + return circuitBreakerFailThreshold == null ? 5 : circuitBreakerFailThreshold; + } + + public int cbWindowSeconds() { + return circuitBreakerWindowSeconds == null ? 30 : circuitBreakerWindowSeconds; + } + + public int cbCooldownSeconds() { + return circuitBreakerCooldownSeconds == null ? 60 : circuitBreakerCooldownSeconds; + } +} diff --git a/cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/eval/AlertStateTransitions.java b/cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/eval/AlertStateTransitions.java new file mode 100644 index 00000000..44453595 --- /dev/null +++ b/cameleer-server-app/src/main/java/com/cameleer/server/app/alerting/eval/AlertStateTransitions.java @@ -0,0 +1,123 @@ +package com.cameleer.server.app.alerting.eval; + +import com.cameleer.server.core.alerting.AlertInstance; +import com.cameleer.server.core.alerting.AlertRule; +import com.cameleer.server.core.alerting.AlertSeverity; +import com.cameleer.server.core.alerting.AlertState; + +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; + +/** + * Pure, stateless state-machine for alert instance transitions. + *

+ * Given the current open instance (nullable) and an EvalResult, returns the new/updated + * AlertInstance or {@link Optional#empty()} when no action is needed. + *

+ * Batch results must be handled directly in the job; this helper returns empty for them. + */ +public final class AlertStateTransitions { + + private AlertStateTransitions() {} + + /** + * Apply an EvalResult to the current open AlertInstance. + * + * @param current the open instance for this rule (PENDING / FIRING / ACKNOWLEDGED), or null if none + * @param result the evaluator outcome + * @param rule the rule being evaluated + * @param now wall-clock instant for the current tick + * @return the new or updated AlertInstance, or empty when nothing should change + */ + public static Optional apply( + AlertInstance current, EvalResult result, AlertRule rule, Instant now) { + + if (result instanceof EvalResult.Clear) return onClear(current, now); + if (result instanceof EvalResult.Firing f) return onFiring(current, f, rule, now); + // EvalResult.Error and EvalResult.Batch — no action (Batch handled by the job directly) + return Optional.empty(); + } + + // ------------------------------------------------------------------------- + // Clear branch + // ------------------------------------------------------------------------- + + private static Optional onClear(AlertInstance current, Instant now) { + if (current == null) return Optional.empty(); // no open instance — no-op + if (current.state() == AlertState.RESOLVED) return Optional.empty(); // already resolved + // Any open state (PENDING / FIRING / ACKNOWLEDGED) → RESOLVED + return Optional.of(current + .withState(AlertState.RESOLVED) + .withResolvedAt(now)); + } + + // ------------------------------------------------------------------------- + // Firing branch + // ------------------------------------------------------------------------- + + private static Optional onFiring( + AlertInstance current, EvalResult.Firing f, AlertRule rule, Instant now) { + + if (current == null) { + // No open instance — create a new one + AlertState initial = rule.forDurationSeconds() > 0 + ? AlertState.PENDING + : AlertState.FIRING; + return Optional.of(newInstance(rule, f, initial, now)); + } + + return switch (current.state()) { + case PENDING -> { + // Check whether the forDuration window has elapsed + Instant promoteAt = current.firedAt().plusSeconds(rule.forDurationSeconds()); + if (!promoteAt.isAfter(now)) { + // Promote to FIRING; keep the original firedAt (that's when it first appeared) + yield Optional.of(current + .withState(AlertState.FIRING) + .withFiredAt(now)); + } + // Still within forDuration — stay PENDING, nothing to persist + yield Optional.empty(); + } + // FIRING / ACKNOWLEDGED — re-notification cadence handled by the dispatcher + case FIRING, ACKNOWLEDGED -> Optional.empty(); + // RESOLVED should never appear as the "current open" instance, but guard anyway + case RESOLVED -> Optional.empty(); + }; + } + + // ------------------------------------------------------------------------- + // Factory helpers + // ------------------------------------------------------------------------- + + /** + * Creates a brand-new AlertInstance from a rule + Firing result. + * title/message are left empty here; the job enriches them via MustacheRenderer after. + */ + static AlertInstance newInstance(AlertRule rule, EvalResult.Firing f, AlertState state, Instant now) { + return new AlertInstance( + UUID.randomUUID(), + rule.id(), + Map.of(), // ruleSnapshot — caller (job) fills in via ObjectMapper + rule.environmentId(), + state, + rule.severity() != null ? rule.severity() : AlertSeverity.WARNING, + now, // firedAt + null, // ackedAt + null, // ackedBy + null, // resolvedAt + null, // lastNotifiedAt + false, // silenced + f.currentValue(), + f.threshold(), + f.context() != null ? f.context() : Map.of(), + "", // title — rendered by job + "", // message — rendered by job + List.of(), + List.of(), + List.of()); + } +} diff --git a/cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/eval/AlertStateTransitionsTest.java b/cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/eval/AlertStateTransitionsTest.java new file mode 100644 index 00000000..29d07a81 --- /dev/null +++ b/cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/eval/AlertStateTransitionsTest.java @@ -0,0 +1,168 @@ +package com.cameleer.server.app.alerting.eval; + +import com.cameleer.server.core.alerting.*; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; + +class AlertStateTransitionsTest { + + private static final Instant NOW = Instant.parse("2026-04-19T12:00:00Z"); + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private AlertRule ruleWith(int forDurationSeconds) { + return new AlertRule( + UUID.randomUUID(), UUID.randomUUID(), "test-rule", null, + AlertSeverity.WARNING, true, ConditionKind.AGENT_STATE, + new AgentStateCondition(new AlertScope(null, null, null), "DEAD", 60), + 60, forDurationSeconds, 60, + "{{rule.name}} fired", "Alert: {{alert.state}}", + List.of(), List.of(), + NOW, null, null, Map.of(), + NOW, "u1", NOW, "u1"); + } + + private AlertInstance openInstance(AlertState state, Instant firedAt, String ackedBy) { + return new AlertInstance( + UUID.randomUUID(), UUID.randomUUID(), Map.of(), UUID.randomUUID(), + state, AlertSeverity.WARNING, + firedAt, null, ackedBy, null, null, false, + 1.0, null, Map.of(), "title", "msg", + List.of(), List.of(), List.of()); + } + + private static final EvalResult.Firing FIRING_RESULT = + new EvalResult.Firing(2500.0, 2000.0, Map.of()); + + // ------------------------------------------------------------------------- + // Clear branch + // ------------------------------------------------------------------------- + + @Test + void clearWithNoOpenInstanceIsNoOp() { + var next = AlertStateTransitions.apply(null, EvalResult.Clear.INSTANCE, ruleWith(0), NOW); + assertThat(next).isEmpty(); + } + + @Test + void clearWithAlreadyResolvedInstanceIsNoOp() { + var resolved = openInstance(AlertState.RESOLVED, NOW.minusSeconds(120), null); + var next = AlertStateTransitions.apply(resolved, EvalResult.Clear.INSTANCE, ruleWith(0), NOW); + assertThat(next).isEmpty(); + } + + @Test + void firingClearTransitionsToResolved() { + var firing = openInstance(AlertState.FIRING, NOW.minusSeconds(90), null); + var next = AlertStateTransitions.apply(firing, EvalResult.Clear.INSTANCE, ruleWith(0), NOW); + assertThat(next).hasValueSatisfying(i -> { + assertThat(i.state()).isEqualTo(AlertState.RESOLVED); + assertThat(i.resolvedAt()).isEqualTo(NOW); + }); + } + + @Test + void ackedInstanceClearsToResolved() { + var acked = openInstance(AlertState.ACKNOWLEDGED, NOW.minusSeconds(30), "alice"); + var next = AlertStateTransitions.apply(acked, EvalResult.Clear.INSTANCE, ruleWith(0), NOW); + assertThat(next).hasValueSatisfying(i -> { + assertThat(i.state()).isEqualTo(AlertState.RESOLVED); + assertThat(i.resolvedAt()).isEqualTo(NOW); + assertThat(i.ackedBy()).isEqualTo("alice"); // preserves acked_by + }); + } + + // ------------------------------------------------------------------------- + // Firing branch — no open instance + // ------------------------------------------------------------------------- + + @Test + void firingWithNoOpenInstanceCreatesPendingIfForDuration() { + var rule = ruleWith(60); + var next = AlertStateTransitions.apply(null, FIRING_RESULT, rule, NOW); + assertThat(next).hasValueSatisfying(i -> { + assertThat(i.state()).isEqualTo(AlertState.PENDING); + assertThat(i.firedAt()).isEqualTo(NOW); + assertThat(i.ruleId()).isEqualTo(rule.id()); + }); + } + + @Test + void firingWithNoForDurationGoesStraightToFiring() { + var rule = ruleWith(0); + var next = AlertStateTransitions.apply(null, new EvalResult.Firing(1.0, null, Map.of()), rule, NOW); + assertThat(next).hasValueSatisfying(i -> { + assertThat(i.state()).isEqualTo(AlertState.FIRING); + assertThat(i.firedAt()).isEqualTo(NOW); + }); + } + + // ------------------------------------------------------------------------- + // Firing branch — PENDING current + // ------------------------------------------------------------------------- + + @Test + void pendingStaysWhenForDurationNotElapsed() { + var rule = ruleWith(60); + // firedAt = NOW-10s, forDuration=60s → promoteAt = NOW+50s → still in window + var pending = openInstance(AlertState.PENDING, NOW.minusSeconds(10), null); + var next = AlertStateTransitions.apply(pending, FIRING_RESULT, rule, NOW); + assertThat(next).isEmpty(); // no change + } + + @Test + void pendingPromotesToFiringAfterForDuration() { + var rule = ruleWith(60); + // firedAt = NOW-120s, forDuration=60s → promoteAt = NOW-60s → elapsed + var pending = openInstance(AlertState.PENDING, NOW.minusSeconds(120), null); + var next = AlertStateTransitions.apply(pending, FIRING_RESULT, rule, NOW); + assertThat(next).hasValueSatisfying(i -> { + assertThat(i.state()).isEqualTo(AlertState.FIRING); + assertThat(i.firedAt()).isEqualTo(NOW); + }); + } + + // ------------------------------------------------------------------------- + // Firing branch — already open FIRING / ACKNOWLEDGED + // ------------------------------------------------------------------------- + + @Test + void firingWhenAlreadyFiringIsNoOp() { + var firing = openInstance(AlertState.FIRING, NOW.minusSeconds(120), null); + var next = AlertStateTransitions.apply(firing, FIRING_RESULT, ruleWith(0), NOW); + assertThat(next).isEmpty(); + } + + @Test + void firingWhenAcknowledgedIsNoOp() { + var acked = openInstance(AlertState.ACKNOWLEDGED, NOW.minusSeconds(30), "alice"); + var next = AlertStateTransitions.apply(acked, FIRING_RESULT, ruleWith(0), NOW); + assertThat(next).isEmpty(); + } + + // ------------------------------------------------------------------------- + // Batch + Error → always empty + // ------------------------------------------------------------------------- + + @Test + void batchResultAlwaysEmpty() { + var batch = new EvalResult.Batch(List.of(FIRING_RESULT)); + var next = AlertStateTransitions.apply(null, batch, ruleWith(0), NOW); + assertThat(next).isEmpty(); + } + + @Test + void errorResultAlwaysEmpty() { + var next = AlertStateTransitions.apply(null, + new EvalResult.Error(new RuntimeException("fail")), ruleWith(0), NOW); + assertThat(next).isEmpty(); + } +} diff --git a/cameleer-server-core/src/main/java/com/cameleer/server/core/alerting/AlertInstance.java b/cameleer-server-core/src/main/java/com/cameleer/server/core/alerting/AlertInstance.java index 4f59060e..cf319124 100644 --- a/cameleer-server-core/src/main/java/com/cameleer/server/core/alerting/AlertInstance.java +++ b/cameleer-server-core/src/main/java/com/cameleer/server/core/alerting/AlertInstance.java @@ -34,4 +34,62 @@ public record AlertInstance( targetGroupIds = targetGroupIds == null ? List.of() : List.copyOf(targetGroupIds); targetRoleNames = targetRoleNames == null ? List.of() : List.copyOf(targetRoleNames); } + + // --- Wither helpers (return a new record with one field changed) --- + + public AlertInstance withState(AlertState s) { + return new AlertInstance(id, ruleId, ruleSnapshot, environmentId, + s, severity, firedAt, ackedAt, ackedBy, resolvedAt, lastNotifiedAt, silenced, + currentValue, threshold, context, title, message, + targetUserIds, targetGroupIds, targetRoleNames); + } + + public AlertInstance withFiredAt(Instant i) { + return new AlertInstance(id, ruleId, ruleSnapshot, environmentId, + state, severity, i, ackedAt, ackedBy, resolvedAt, lastNotifiedAt, silenced, + currentValue, threshold, context, title, message, + targetUserIds, targetGroupIds, targetRoleNames); + } + + public AlertInstance withResolvedAt(Instant i) { + return new AlertInstance(id, ruleId, ruleSnapshot, environmentId, + state, severity, firedAt, ackedAt, ackedBy, i, lastNotifiedAt, silenced, + currentValue, threshold, context, title, message, + targetUserIds, targetGroupIds, targetRoleNames); + } + + public AlertInstance withAck(String ackedBy, Instant ackedAt) { + return new AlertInstance(id, ruleId, ruleSnapshot, environmentId, + state, severity, firedAt, ackedAt, ackedBy, resolvedAt, lastNotifiedAt, silenced, + currentValue, threshold, context, title, message, + targetUserIds, targetGroupIds, targetRoleNames); + } + + public AlertInstance withSilenced(boolean silenced) { + return new AlertInstance(id, ruleId, ruleSnapshot, environmentId, + state, severity, firedAt, ackedAt, ackedBy, resolvedAt, lastNotifiedAt, silenced, + currentValue, threshold, context, title, message, + targetUserIds, targetGroupIds, targetRoleNames); + } + + public AlertInstance withTitleMessage(String title, String message) { + return new AlertInstance(id, ruleId, ruleSnapshot, environmentId, + state, severity, firedAt, ackedAt, ackedBy, resolvedAt, lastNotifiedAt, silenced, + currentValue, threshold, context, title, message, + targetUserIds, targetGroupIds, targetRoleNames); + } + + public AlertInstance withLastNotifiedAt(Instant instant) { + return new AlertInstance(id, ruleId, ruleSnapshot, environmentId, + state, severity, firedAt, ackedAt, ackedBy, resolvedAt, instant, silenced, + currentValue, threshold, context, title, message, + targetUserIds, targetGroupIds, targetRoleNames); + } + + public AlertInstance withContext(Map context) { + return new AlertInstance(id, ruleId, ruleSnapshot, environmentId, + state, severity, firedAt, ackedAt, ackedBy, resolvedAt, lastNotifiedAt, silenced, + currentValue, threshold, context, title, message, + targetUserIds, targetGroupIds, targetRoleNames); + } }