fix(alerting): allow multiple open alert_instances per rule for PER_EXCHANGE
V13 added a partial unique index on alert_instances(rule_id) WHERE state
IN (PENDING,FIRING,ACKNOWLEDGED). Correct for scalar condition kinds
(ROUTE_METRIC / AGENT_STATE / DEPLOYMENT_STATE / LOG_PATTERN / JVM_METRIC
/ EXCHANGE_MATCH in COUNT_IN_WINDOW) but wrong for EXCHANGE_MATCH /
PER_EXCHANGE, which by design emits one alert_instance per matching
exchange. Under V13 every PER_EXCHANGE tick with >1 match logged
"Skipped duplicate open alert_instance for rule …" at evaluator cadence
and silently lost alert fidelity — only the first matching exchange per
tick got an AlertInstance + webhook dispatch.
V15 drops the rule_id-only constraint and recreates it with a
discriminator on context->'exchange'->>'id'. Scalar kinds emit
Map.of() as context, so their expression resolves to '' — "one open per
rule" preserved. ExchangeMatchEvaluator.evaluatePerExchange always
populates exchange.id, so per-exchange instances coexist cleanly.
Two new PostgresAlertInstanceRepositoryIT tests:
- multiple open instances for same rule + distinct exchanges all land
- second open for identical (rule, exchange) still dedups via the
DuplicateKeyException fallback in save() — defense-in-depth kept
Also fixes pre-existing PostgresAlertReadRepositoryIT brokenness: its
setup() inserted 3 open instances sharing one rule_id, which V13 blocked
on arrival. Migrate to one rule_id per instance (pattern already used
across other storage ITs).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
-- V15 — Discriminate open-alert_instance uniqueness by exchange id for PER_EXCHANGE rules.
|
||||
--
|
||||
-- V13 enforced "one open alert_instance per rule" via a partial unique on
|
||||
-- (rule_id). That is correct for every scalar condition kind (ROUTE_METRIC,
|
||||
-- AGENT_STATE, DEPLOYMENT_STATE, LOG_PATTERN, JVM_METRIC, and EXCHANGE_MATCH
|
||||
-- in COUNT_IN_WINDOW mode) but wrong for EXCHANGE_MATCH / PER_EXCHANGE, which
|
||||
-- by design emits one alert_instance per matching exchange. Under V13 every
|
||||
-- PER_EXCHANGE tick with >1 match logged "Skipped duplicate open alert_instance
|
||||
-- for rule …" at evaluator cadence and silently lost alert fidelity — only the
|
||||
-- first matching exchange per tick got an AlertInstance + webhook dispatch.
|
||||
--
|
||||
-- New discriminator: (rule_id, COALESCE(context->'exchange'->>'id', '')).
|
||||
-- Scalar evaluators emit Map.of() (no exchange subtree), the expression
|
||||
-- resolves to '' for all of them, so they continue to get "one open per rule".
|
||||
-- ExchangeMatchEvaluator.evaluatePerExchange emits {exchange.id = <execId>}
|
||||
-- per firing, so PER_EXCHANGE instances for distinct exchanges coexist.
|
||||
-- Two attempts to open an instance for the same (rule, exchange) still collapse
|
||||
-- to one — the repo's DuplicateKeyException fallback preserves defense-in-depth.
|
||||
DROP INDEX IF EXISTS alert_instances_open_rule_uq;
|
||||
|
||||
CREATE UNIQUE INDEX alert_instances_open_rule_uq
|
||||
ON alert_instances (rule_id, (COALESCE(context->'exchange'->>'id', '')))
|
||||
WHERE rule_id IS NOT NULL
|
||||
AND state IN ('PENDING','FIRING','ACKNOWLEDGED');
|
||||
@@ -243,6 +243,61 @@ class PostgresAlertInstanceRepositoryIT extends AbstractPostgresIT {
|
||||
// Extra rules are cleaned up by @AfterEach via env-scoped DELETE
|
||||
}
|
||||
|
||||
@Test
|
||||
void save_allowsMultipleOpenInstancesForSameRuleWithDifferentExchangeIds() {
|
||||
// PER_EXCHANGE mode (EXCHANGE_MATCH rules) emits one alert_instance per
|
||||
// matching exchange — the design requires many open instances per rule,
|
||||
// each carrying a distinct exchange.id in its context. The open-rule
|
||||
// partial unique must therefore discriminate on (rule_id, exchange.id),
|
||||
// not on rule_id alone. Regression: V13's original (rule_id)-only
|
||||
// constraint masked this by catching DuplicateKeyException and silently
|
||||
// dropping every per-exchange firing after the first — logs filled with
|
||||
// "Skipped duplicate open alert_instance for rule …" at evaluator cadence
|
||||
// and PER_EXCHANGE notifications were never dispatched.
|
||||
|
||||
var exchangeA = newInstanceWithExchange(ruleId, "exchange-aaa");
|
||||
var exchangeB = newInstanceWithExchange(ruleId, "exchange-bbb");
|
||||
var exchangeC = newInstanceWithExchange(ruleId, "exchange-ccc");
|
||||
|
||||
repo.save(exchangeA);
|
||||
repo.save(exchangeB);
|
||||
repo.save(exchangeC);
|
||||
|
||||
assertThat(repo.findById(exchangeA.id())).isPresent();
|
||||
assertThat(repo.findById(exchangeB.id())).isPresent();
|
||||
assertThat(repo.findById(exchangeC.id())).isPresent();
|
||||
|
||||
Long count = jdbcTemplate.queryForObject(
|
||||
"SELECT COUNT(*) FROM alert_instances " +
|
||||
" WHERE rule_id = ? AND state IN ('PENDING','FIRING','ACKNOWLEDGED')",
|
||||
Long.class, ruleId);
|
||||
assertThat(count).isEqualTo(3L);
|
||||
}
|
||||
|
||||
@Test
|
||||
void save_rejectsSecondOpenInstanceForSameRuleAndExchange() {
|
||||
// The partial unique still prevents genuine duplicates: two open instances
|
||||
// for the same (rule, exchange) pair must collapse to one. The repo's
|
||||
// DuplicateKeyException fallback returns the already-persisted instance.
|
||||
|
||||
var first = newInstanceWithExchange(ruleId, "exchange-dup");
|
||||
var second = newInstanceWithExchange(ruleId, "exchange-dup");
|
||||
|
||||
repo.save(first);
|
||||
var returned = repo.save(second);
|
||||
|
||||
// Second insert was suppressed; the repo returned the existing instance.
|
||||
assertThat(returned.id()).isEqualTo(first.id());
|
||||
assertThat(repo.findById(first.id())).isPresent();
|
||||
assertThat(repo.findById(second.id())).isEmpty();
|
||||
|
||||
Long count = jdbcTemplate.queryForObject(
|
||||
"SELECT COUNT(*) FROM alert_instances " +
|
||||
" WHERE rule_id = ? AND state IN ('PENDING','FIRING','ACKNOWLEDGED')",
|
||||
Long.class, ruleId);
|
||||
assertThat(count).isEqualTo(1L);
|
||||
}
|
||||
|
||||
@Test
|
||||
void markSilenced_togglesToTrue() {
|
||||
var inst = newInstance(ruleId, List.of(userId), List.of(), List.of());
|
||||
@@ -276,6 +331,23 @@ class PostgresAlertInstanceRepositoryIT extends AbstractPostgresIT {
|
||||
userIds, groupIds, roleNames);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an open FIRING instance carrying a PER_EXCHANGE-shaped context:
|
||||
* {@code context.exchange.id = <exchangeId>}. Mirrors what
|
||||
* {@code ExchangeMatchEvaluator.evaluatePerExchange} emits for each matching
|
||||
* exchange in a Batch result.
|
||||
*/
|
||||
private AlertInstance newInstanceWithExchange(UUID ruleId, String exchangeId) {
|
||||
return new AlertInstance(
|
||||
UUID.randomUUID(), ruleId, Map.of(), envId,
|
||||
AlertState.FIRING, AlertSeverity.WARNING,
|
||||
Instant.now(), null, null, null, null,
|
||||
false, null, null,
|
||||
Map.of("exchange", Map.of("id", exchangeId)),
|
||||
"title", "message",
|
||||
List.of(userId), List.of(), List.of());
|
||||
}
|
||||
|
||||
/** Inserts a minimal alert_rule with the given severity. */
|
||||
private UUID seedRuleWithSeverity(String name, AlertSeverity severity) {
|
||||
UUID id = UUID.randomUUID();
|
||||
|
||||
@@ -31,7 +31,6 @@ class PostgresAlertReadRepositoryIT extends AbstractPostgresIT {
|
||||
instanceId1 = UUID.randomUUID();
|
||||
instanceId2 = UUID.randomUUID();
|
||||
instanceId3 = UUID.randomUUID();
|
||||
UUID ruleId = UUID.randomUUID();
|
||||
|
||||
jdbcTemplate.update(
|
||||
"INSERT INTO environments (id, slug, display_name) VALUES (?, ?, ?)",
|
||||
@@ -41,18 +40,23 @@ class PostgresAlertReadRepositoryIT extends AbstractPostgresIT {
|
||||
jdbcTemplate.update(
|
||||
"INSERT INTO users (user_id, provider, email) VALUES (?, 'local', ?) ON CONFLICT (user_id) DO NOTHING",
|
||||
userId, userId + "@example.com");
|
||||
jdbcTemplate.update(
|
||||
"INSERT INTO alert_rules (id, environment_id, name, severity, condition_kind, condition, " +
|
||||
"notification_title_tmpl, notification_message_tmpl, created_by, updated_by) " +
|
||||
"VALUES (?, ?, 'rule', 'WARNING', 'AGENT_STATE', '{}'::jsonb, 't', 'm', 'sys-user', 'sys-user')",
|
||||
ruleId, envId);
|
||||
|
||||
for (UUID id : List.of(instanceId1, instanceId2, instanceId3)) {
|
||||
// Each open alert_instance needs its own rule_id — the alert_instances_open_rule_uq
|
||||
// partial unique forbids multiple open instances sharing the same rule_id + exchange
|
||||
// discriminator (V13/V15). Three separate rules let all three instances coexist
|
||||
// in FIRING state so alert_reads tests can target each one independently.
|
||||
for (UUID instanceId : List.of(instanceId1, instanceId2, instanceId3)) {
|
||||
UUID ruleId = UUID.randomUUID();
|
||||
jdbcTemplate.update(
|
||||
"INSERT INTO alert_rules (id, environment_id, name, severity, condition_kind, condition, " +
|
||||
"notification_title_tmpl, notification_message_tmpl, created_by, updated_by) " +
|
||||
"VALUES (?, ?, ?, 'WARNING', 'AGENT_STATE', '{}'::jsonb, 't', 'm', 'sys-user', 'sys-user')",
|
||||
ruleId, envId, "rule-" + instanceId);
|
||||
jdbcTemplate.update(
|
||||
"INSERT INTO alert_instances (id, rule_id, rule_snapshot, environment_id, state, severity, " +
|
||||
"fired_at, context, title, message) VALUES (?, ?, '{}'::jsonb, ?, 'FIRING', 'WARNING', " +
|
||||
"now(), '{}'::jsonb, 'title', 'msg')",
|
||||
id, ruleId, envId);
|
||||
instanceId, ruleId, envId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user