feat(alerting): AlertingProperties + AlertStateTransitions state machine
- AlertingProperties @ConfigurationProperties with effective*() accessors and 5000 ms floor clamp on evaluatorTickIntervalMs; warn logged at startup - AlertStateTransitions pure static state machine: Clear/Firing/Batch/Error branches, PENDING→FIRING promotion on forDuration elapsed; Batch delegated to job - AlertInstance wither helpers: withState, withFiredAt, withResolvedAt, withAck, withSilenced, withTitleMessage, withLastNotifiedAt, withContext - AlertingBeanConfig gains @EnableConfigurationProperties(AlertingProperties), alertingInstanceId bean (hostname:pid), alertingClock bean, PerKindCircuitBreaker bean wired from props - 12 unit tests in AlertStateTransitionsTest covering all transitions Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,15 +1,25 @@
|
||||
package com.cameleer.server.app.alerting.config;
|
||||
|
||||
import com.cameleer.server.app.alerting.eval.PerKindCircuitBreaker;
|
||||
import com.cameleer.server.app.alerting.storage.*;
|
||||
import com.cameleer.server.core.alerting.*;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.net.InetAddress;
|
||||
import java.time.Clock;
|
||||
|
||||
@Configuration
|
||||
@EnableConfigurationProperties(AlertingProperties.class)
|
||||
public class AlertingBeanConfig {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(AlertingBeanConfig.class);
|
||||
|
||||
@Bean
|
||||
public AlertRuleRepository alertRuleRepository(JdbcTemplate jdbc, ObjectMapper om) {
|
||||
return new PostgresAlertRuleRepository(jdbc, om);
|
||||
@@ -34,4 +44,33 @@ public class AlertingBeanConfig {
|
||||
public AlertReadRepository alertReadRepository(JdbcTemplate jdbc) {
|
||||
return new PostgresAlertReadRepository(jdbc);
|
||||
}
|
||||
|
||||
@Bean
|
||||
public Clock alertingClock() {
|
||||
return Clock.systemDefaultZone();
|
||||
}
|
||||
|
||||
@Bean("alertingInstanceId")
|
||||
public String alertingInstanceId() {
|
||||
String hostname;
|
||||
try {
|
||||
hostname = InetAddress.getLocalHost().getHostName();
|
||||
} catch (Exception e) {
|
||||
hostname = "unknown";
|
||||
}
|
||||
return hostname + ":" + ProcessHandle.current().pid();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public PerKindCircuitBreaker perKindCircuitBreaker(AlertingProperties props) {
|
||||
if (props.evaluatorTickIntervalMs() != null
|
||||
&& props.evaluatorTickIntervalMs() < 5000) {
|
||||
log.warn("cameleer.server.alerting.evaluatorTickIntervalMs={} is below the 5000 ms floor; clamping to 5000 ms",
|
||||
props.evaluatorTickIntervalMs());
|
||||
}
|
||||
return new PerKindCircuitBreaker(
|
||||
props.cbFailThreshold(),
|
||||
props.cbWindowSeconds(),
|
||||
props.cbCooldownSeconds());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
package com.cameleer.server.app.alerting.config;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
@ConfigurationProperties("cameleer.server.alerting")
|
||||
public record AlertingProperties(
|
||||
Integer evaluatorTickIntervalMs,
|
||||
Integer evaluatorBatchSize,
|
||||
Integer claimTtlSeconds,
|
||||
Integer notificationTickIntervalMs,
|
||||
Integer notificationBatchSize,
|
||||
Boolean inTickCacheEnabled,
|
||||
Integer circuitBreakerFailThreshold,
|
||||
Integer circuitBreakerWindowSeconds,
|
||||
Integer circuitBreakerCooldownSeconds,
|
||||
Integer eventRetentionDays,
|
||||
Integer notificationRetentionDays,
|
||||
Integer webhookTimeoutMs,
|
||||
Integer webhookMaxAttempts) {
|
||||
|
||||
public int effectiveEvaluatorTickIntervalMs() {
|
||||
int raw = evaluatorTickIntervalMs == null ? 5000 : evaluatorTickIntervalMs;
|
||||
return Math.max(5000, raw); // floor: no faster than 5 s
|
||||
}
|
||||
|
||||
public int effectiveEvaluatorBatchSize() {
|
||||
return evaluatorBatchSize == null ? 20 : evaluatorBatchSize;
|
||||
}
|
||||
|
||||
public int effectiveClaimTtlSeconds() {
|
||||
return claimTtlSeconds == null ? 30 : claimTtlSeconds;
|
||||
}
|
||||
|
||||
public int effectiveNotificationTickIntervalMs() {
|
||||
return notificationTickIntervalMs == null ? 5000 : notificationTickIntervalMs;
|
||||
}
|
||||
|
||||
public int effectiveNotificationBatchSize() {
|
||||
return notificationBatchSize == null ? 50 : notificationBatchSize;
|
||||
}
|
||||
|
||||
public boolean effectiveInTickCacheEnabled() {
|
||||
return inTickCacheEnabled == null || inTickCacheEnabled;
|
||||
}
|
||||
|
||||
public int effectiveEventRetentionDays() {
|
||||
return eventRetentionDays == null ? 90 : eventRetentionDays;
|
||||
}
|
||||
|
||||
public int effectiveNotificationRetentionDays() {
|
||||
return notificationRetentionDays == null ? 30 : notificationRetentionDays;
|
||||
}
|
||||
|
||||
public int effectiveWebhookTimeoutMs() {
|
||||
return webhookTimeoutMs == null ? 5000 : webhookTimeoutMs;
|
||||
}
|
||||
|
||||
public int effectiveWebhookMaxAttempts() {
|
||||
return webhookMaxAttempts == null ? 3 : webhookMaxAttempts;
|
||||
}
|
||||
|
||||
public int cbFailThreshold() {
|
||||
return circuitBreakerFailThreshold == null ? 5 : circuitBreakerFailThreshold;
|
||||
}
|
||||
|
||||
public int cbWindowSeconds() {
|
||||
return circuitBreakerWindowSeconds == null ? 30 : circuitBreakerWindowSeconds;
|
||||
}
|
||||
|
||||
public int cbCooldownSeconds() {
|
||||
return circuitBreakerCooldownSeconds == null ? 60 : circuitBreakerCooldownSeconds;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
package com.cameleer.server.app.alerting.eval;
|
||||
|
||||
import com.cameleer.server.core.alerting.AlertInstance;
|
||||
import com.cameleer.server.core.alerting.AlertRule;
|
||||
import com.cameleer.server.core.alerting.AlertSeverity;
|
||||
import com.cameleer.server.core.alerting.AlertState;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Pure, stateless state-machine for alert instance transitions.
|
||||
* <p>
|
||||
* Given the current open instance (nullable) and an EvalResult, returns the new/updated
|
||||
* AlertInstance or {@link Optional#empty()} when no action is needed.
|
||||
* <p>
|
||||
* Batch results must be handled directly in the job; this helper returns empty for them.
|
||||
*/
|
||||
public final class AlertStateTransitions {
|
||||
|
||||
private AlertStateTransitions() {}
|
||||
|
||||
/**
|
||||
* Apply an EvalResult to the current open AlertInstance.
|
||||
*
|
||||
* @param current the open instance for this rule (PENDING / FIRING / ACKNOWLEDGED), or null if none
|
||||
* @param result the evaluator outcome
|
||||
* @param rule the rule being evaluated
|
||||
* @param now wall-clock instant for the current tick
|
||||
* @return the new or updated AlertInstance, or empty when nothing should change
|
||||
*/
|
||||
public static Optional<AlertInstance> apply(
|
||||
AlertInstance current, EvalResult result, AlertRule rule, Instant now) {
|
||||
|
||||
if (result instanceof EvalResult.Clear) return onClear(current, now);
|
||||
if (result instanceof EvalResult.Firing f) return onFiring(current, f, rule, now);
|
||||
// EvalResult.Error and EvalResult.Batch — no action (Batch handled by the job directly)
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Clear branch
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private static Optional<AlertInstance> onClear(AlertInstance current, Instant now) {
|
||||
if (current == null) return Optional.empty(); // no open instance — no-op
|
||||
if (current.state() == AlertState.RESOLVED) return Optional.empty(); // already resolved
|
||||
// Any open state (PENDING / FIRING / ACKNOWLEDGED) → RESOLVED
|
||||
return Optional.of(current
|
||||
.withState(AlertState.RESOLVED)
|
||||
.withResolvedAt(now));
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Firing branch
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private static Optional<AlertInstance> onFiring(
|
||||
AlertInstance current, EvalResult.Firing f, AlertRule rule, Instant now) {
|
||||
|
||||
if (current == null) {
|
||||
// No open instance — create a new one
|
||||
AlertState initial = rule.forDurationSeconds() > 0
|
||||
? AlertState.PENDING
|
||||
: AlertState.FIRING;
|
||||
return Optional.of(newInstance(rule, f, initial, now));
|
||||
}
|
||||
|
||||
return switch (current.state()) {
|
||||
case PENDING -> {
|
||||
// Check whether the forDuration window has elapsed
|
||||
Instant promoteAt = current.firedAt().plusSeconds(rule.forDurationSeconds());
|
||||
if (!promoteAt.isAfter(now)) {
|
||||
// Promote to FIRING; keep the original firedAt (that's when it first appeared)
|
||||
yield Optional.of(current
|
||||
.withState(AlertState.FIRING)
|
||||
.withFiredAt(now));
|
||||
}
|
||||
// Still within forDuration — stay PENDING, nothing to persist
|
||||
yield Optional.empty();
|
||||
}
|
||||
// FIRING / ACKNOWLEDGED — re-notification cadence handled by the dispatcher
|
||||
case FIRING, ACKNOWLEDGED -> Optional.empty();
|
||||
// RESOLVED should never appear as the "current open" instance, but guard anyway
|
||||
case RESOLVED -> Optional.empty();
|
||||
};
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Factory helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Creates a brand-new AlertInstance from a rule + Firing result.
|
||||
* title/message are left empty here; the job enriches them via MustacheRenderer after.
|
||||
*/
|
||||
static AlertInstance newInstance(AlertRule rule, EvalResult.Firing f, AlertState state, Instant now) {
|
||||
return new AlertInstance(
|
||||
UUID.randomUUID(),
|
||||
rule.id(),
|
||||
Map.of(), // ruleSnapshot — caller (job) fills in via ObjectMapper
|
||||
rule.environmentId(),
|
||||
state,
|
||||
rule.severity() != null ? rule.severity() : AlertSeverity.WARNING,
|
||||
now, // firedAt
|
||||
null, // ackedAt
|
||||
null, // ackedBy
|
||||
null, // resolvedAt
|
||||
null, // lastNotifiedAt
|
||||
false, // silenced
|
||||
f.currentValue(),
|
||||
f.threshold(),
|
||||
f.context() != null ? f.context() : Map.of(),
|
||||
"", // title — rendered by job
|
||||
"", // message — rendered by job
|
||||
List.of(),
|
||||
List.of(),
|
||||
List.of());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user