fix(alerting): populate AlertInstance.rule_snapshot so history survives rule delete

- Add withRuleSnapshot(Map) wither to AlertInstance (same pattern as other withers)
- Call snapshotRule(rule) + withRuleSnapshot in both applyResult (single-firing) and
  applyBatchFiring paths so every persisted instance carries a non-empty JSONB snapshot
- Strip null values from the Jackson-serialized map before wrapping in the immutable
  snapshot so Map.copyOf in the compact ctor does not throw NPE on nullable rule fields
- Add ruleSnapshotIsPersistedOnInstanceCreation IT: asserts name/severity/conditionKind
  appear in the rule_snapshot column after a tick fires an instance
- Add historySurvivesRuleDelete IT: fires an instance, deletes the rule, asserts
  rule_id IS NULL and rule_snapshot still contains the rule name (spec §5 guarantee)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-19 20:09:28 +02:00
parent 15c0a8273c
commit bf178ba141
3 changed files with 66 additions and 3 deletions

View File

@@ -161,7 +161,8 @@ public class AlertEvaluatorJob implements SchedulingConfigurer {
&& current.state() == AlertState.PENDING && current.state() == AlertState.PENDING
&& next.state() == AlertState.FIRING; && next.state() == AlertState.FIRING;
AlertInstance enriched = enrichTitleMessage(rule, next); AlertInstance withSnapshot = next.withRuleSnapshot(snapshotRule(rule));
AlertInstance enriched = enrichTitleMessage(rule, withSnapshot);
AlertInstance persisted = instanceRepo.save(enriched); AlertInstance persisted = instanceRepo.save(enriched);
if (isFirstFire || promotedFromPending) { if (isFirstFire || promotedFromPending) {
@@ -176,7 +177,8 @@ public class AlertEvaluatorJob implements SchedulingConfigurer {
*/ */
private void applyBatchFiring(AlertRule rule, EvalResult.Firing f) { private void applyBatchFiring(AlertRule rule, EvalResult.Firing f) {
Instant now = Instant.now(clock); Instant now = Instant.now(clock);
AlertInstance instance = AlertStateTransitions.newInstance(rule, f, AlertState.FIRING, now); AlertInstance instance = AlertStateTransitions.newInstance(rule, f, AlertState.FIRING, now)
.withRuleSnapshot(snapshotRule(rule));
AlertInstance enriched = enrichTitleMessage(rule, instance); AlertInstance enriched = enrichTitleMessage(rule, instance);
AlertInstance persisted = instanceRepo.save(enriched); AlertInstance persisted = instanceRepo.save(enriched);
enqueueNotifications(rule, persisted, now); enqueueNotifications(rule, persisted, now);
@@ -236,7 +238,12 @@ public class AlertEvaluatorJob implements SchedulingConfigurer {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
Map<String, Object> snapshotRule(AlertRule rule) { Map<String, Object> snapshotRule(AlertRule rule) {
try { try {
return objectMapper.convertValue(rule, Map.class); Map<String, Object> raw = objectMapper.convertValue(rule, Map.class);
// Map.copyOf (used in AlertInstance compact ctor) rejects null values —
// strip them so the snapshot is safe to store.
Map<String, Object> safe = new java.util.LinkedHashMap<>();
raw.forEach((k, v) -> { if (v != null) safe.put(k, v); });
return safe;
} catch (Exception e) { } catch (Exception e) {
log.warn("Failed to snapshot rule {}: {}", rule.id(), e.getMessage()); log.warn("Failed to snapshot rule {}: {}", rule.id(), e.getMessage());
return Map.of("id", rule.id().toString(), "name", rule.name()); return Map.of("id", rule.id().toString(), "name", rule.name());

View File

@@ -196,4 +196,53 @@ class AlertEvaluatorJobIT extends AbstractPostgresIT {
jdbcTemplate.update("DELETE FROM alert_instances WHERE rule_id = ?", ruleId2); jdbcTemplate.update("DELETE FROM alert_instances WHERE rule_id = ?", ruleId2);
jdbcTemplate.update("DELETE FROM alert_rules WHERE id = ?", ruleId2); jdbcTemplate.update("DELETE FROM alert_rules WHERE id = ?", ruleId2);
} }
@Test
void ruleSnapshotIsPersistedOnInstanceCreation() {
// Dead agent → FIRING instance created
when(agentRegistryService.findAll())
.thenReturn(List.of(deadAgent(Instant.now().minusSeconds(120))));
job.tick();
// Read rule_snapshot directly from the DB — must contain name, severity, conditionKind
String snapshot = jdbcTemplate.queryForObject(
"SELECT rule_snapshot::text FROM alert_instances WHERE rule_id = ?",
String.class, ruleId);
assertThat(snapshot).isNotNull();
assertThat(snapshot).contains("\"name\": \"dead-agent-rule\"");
assertThat(snapshot).contains("\"severity\": \"WARNING\"");
assertThat(snapshot).contains("\"conditionKind\": \"AGENT_STATE\"");
}
@Test
void historySurvivesRuleDelete() {
// Seed: dead agent → FIRING instance created
when(agentRegistryService.findAll())
.thenReturn(List.of(deadAgent(Instant.now().minusSeconds(120))));
job.tick();
// Verify instance exists with a populated snapshot
String snapshotBefore = jdbcTemplate.queryForObject(
"SELECT rule_snapshot::text FROM alert_instances WHERE rule_id = ?",
String.class, ruleId);
assertThat(snapshotBefore).contains("\"name\": \"dead-agent-rule\"");
// Delete the rule — ON DELETE SET NULL clears rule_id on the instance
ruleRepo.delete(ruleId);
// rule_id must be NULL on the instance row
Long nullRuleIdCount = jdbcTemplate.queryForObject(
"SELECT count(*) FROM alert_instances WHERE rule_id IS NULL AND rule_snapshot::text LIKE '%dead-agent-rule%'",
Long.class);
assertThat(nullRuleIdCount).isEqualTo(1L);
// snapshot still contains the rule name — history survives deletion
String snapshotAfter = jdbcTemplate.queryForObject(
"SELECT rule_snapshot::text FROM alert_instances WHERE rule_id IS NULL AND rule_snapshot::text LIKE '%dead-agent-rule%'",
String.class);
assertThat(snapshotAfter).contains("\"name\": \"dead-agent-rule\"");
assertThat(snapshotAfter).contains("\"severity\": \"WARNING\"");
}
} }

View File

@@ -92,4 +92,11 @@ public record AlertInstance(
currentValue, threshold, context, title, message, currentValue, threshold, context, title, message,
targetUserIds, targetGroupIds, targetRoleNames); targetUserIds, targetGroupIds, targetRoleNames);
} }
public AlertInstance withRuleSnapshot(Map<String, Object> snapshot) {
return new AlertInstance(id, ruleId, snapshot, environmentId,
state, severity, firedAt, ackedAt, ackedBy, resolvedAt, lastNotifiedAt, silenced,
currentValue, threshold, context, title, message,
targetUserIds, targetGroupIds, targetRoleNames);
}
} }