fix(alerting): allow multiple open alert_instances per rule for PER_EXCHANGE
V13 added a partial unique index on alert_instances(rule_id) WHERE state
IN (PENDING,FIRING,ACKNOWLEDGED). Correct for scalar condition kinds
(ROUTE_METRIC / AGENT_STATE / DEPLOYMENT_STATE / LOG_PATTERN / JVM_METRIC
/ EXCHANGE_MATCH in COUNT_IN_WINDOW) but wrong for EXCHANGE_MATCH /
PER_EXCHANGE, which by design emits one alert_instance per matching
exchange. Under V13 every PER_EXCHANGE tick with >1 match logged
"Skipped duplicate open alert_instance for rule …" at evaluator cadence
and silently lost alert fidelity — only the first matching exchange per
tick got an AlertInstance + webhook dispatch.
V15 drops the rule_id-only constraint and recreates it with a
discriminator on context->'exchange'->>'id'. Scalar kinds emit
Map.of() as context, so their expression resolves to '' — "one open per
rule" preserved. ExchangeMatchEvaluator.evaluatePerExchange always
populates exchange.id, so per-exchange instances coexist cleanly.
Two new PostgresAlertInstanceRepositoryIT tests:
- multiple open instances for same rule + distinct exchanges all land
- second open for identical (rule, exchange) still dedups via the
DuplicateKeyException fallback in save() — defense-in-depth kept
Also fixes pre-existing PostgresAlertReadRepositoryIT brokenness: its
setup() inserted 3 open instances sharing one rule_id, which V13 blocked
on arrival. Migrate to one rule_id per instance (pattern already used
across other storage ITs).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,24 @@
|
|||||||
|
-- V15 — Discriminate open-alert_instance uniqueness by exchange id for PER_EXCHANGE rules.
|
||||||
|
--
|
||||||
|
-- V13 enforced "one open alert_instance per rule" via a partial unique on
|
||||||
|
-- (rule_id). That is correct for every scalar condition kind (ROUTE_METRIC,
|
||||||
|
-- AGENT_STATE, DEPLOYMENT_STATE, LOG_PATTERN, JVM_METRIC, and EXCHANGE_MATCH
|
||||||
|
-- in COUNT_IN_WINDOW mode) but wrong for EXCHANGE_MATCH / PER_EXCHANGE, which
|
||||||
|
-- by design emits one alert_instance per matching exchange. Under V13 every
|
||||||
|
-- PER_EXCHANGE tick with >1 match logged "Skipped duplicate open alert_instance
|
||||||
|
-- for rule …" at evaluator cadence and silently lost alert fidelity — only the
|
||||||
|
-- first matching exchange per tick got an AlertInstance + webhook dispatch.
|
||||||
|
--
|
||||||
|
-- New discriminator: (rule_id, COALESCE(context->'exchange'->>'id', '')).
|
||||||
|
-- Scalar evaluators emit Map.of() (no exchange subtree), the expression
|
||||||
|
-- resolves to '' for all of them, so they continue to get "one open per rule".
|
||||||
|
-- ExchangeMatchEvaluator.evaluatePerExchange emits {exchange.id = <execId>}
|
||||||
|
-- per firing, so PER_EXCHANGE instances for distinct exchanges coexist.
|
||||||
|
-- Two attempts to open an instance for the same (rule, exchange) still collapse
|
||||||
|
-- to one — the repo's DuplicateKeyException fallback preserves defense-in-depth.
|
||||||
|
DROP INDEX IF EXISTS alert_instances_open_rule_uq;
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX alert_instances_open_rule_uq
|
||||||
|
ON alert_instances (rule_id, (COALESCE(context->'exchange'->>'id', '')))
|
||||||
|
WHERE rule_id IS NOT NULL
|
||||||
|
AND state IN ('PENDING','FIRING','ACKNOWLEDGED');
|
||||||
@@ -243,6 +243,61 @@ class PostgresAlertInstanceRepositoryIT extends AbstractPostgresIT {
|
|||||||
// Extra rules are cleaned up by @AfterEach via env-scoped DELETE
|
// Extra rules are cleaned up by @AfterEach via env-scoped DELETE
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void save_allowsMultipleOpenInstancesForSameRuleWithDifferentExchangeIds() {
|
||||||
|
// PER_EXCHANGE mode (EXCHANGE_MATCH rules) emits one alert_instance per
|
||||||
|
// matching exchange — the design requires many open instances per rule,
|
||||||
|
// each carrying a distinct exchange.id in its context. The open-rule
|
||||||
|
// partial unique must therefore discriminate on (rule_id, exchange.id),
|
||||||
|
// not on rule_id alone. Regression: V13's original (rule_id)-only
|
||||||
|
// constraint masked this by catching DuplicateKeyException and silently
|
||||||
|
// dropping every per-exchange firing after the first — logs filled with
|
||||||
|
// "Skipped duplicate open alert_instance for rule …" at evaluator cadence
|
||||||
|
// and PER_EXCHANGE notifications were never dispatched.
|
||||||
|
|
||||||
|
var exchangeA = newInstanceWithExchange(ruleId, "exchange-aaa");
|
||||||
|
var exchangeB = newInstanceWithExchange(ruleId, "exchange-bbb");
|
||||||
|
var exchangeC = newInstanceWithExchange(ruleId, "exchange-ccc");
|
||||||
|
|
||||||
|
repo.save(exchangeA);
|
||||||
|
repo.save(exchangeB);
|
||||||
|
repo.save(exchangeC);
|
||||||
|
|
||||||
|
assertThat(repo.findById(exchangeA.id())).isPresent();
|
||||||
|
assertThat(repo.findById(exchangeB.id())).isPresent();
|
||||||
|
assertThat(repo.findById(exchangeC.id())).isPresent();
|
||||||
|
|
||||||
|
Long count = jdbcTemplate.queryForObject(
|
||||||
|
"SELECT COUNT(*) FROM alert_instances " +
|
||||||
|
" WHERE rule_id = ? AND state IN ('PENDING','FIRING','ACKNOWLEDGED')",
|
||||||
|
Long.class, ruleId);
|
||||||
|
assertThat(count).isEqualTo(3L);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void save_rejectsSecondOpenInstanceForSameRuleAndExchange() {
|
||||||
|
// The partial unique still prevents genuine duplicates: two open instances
|
||||||
|
// for the same (rule, exchange) pair must collapse to one. The repo's
|
||||||
|
// DuplicateKeyException fallback returns the already-persisted instance.
|
||||||
|
|
||||||
|
var first = newInstanceWithExchange(ruleId, "exchange-dup");
|
||||||
|
var second = newInstanceWithExchange(ruleId, "exchange-dup");
|
||||||
|
|
||||||
|
repo.save(first);
|
||||||
|
var returned = repo.save(second);
|
||||||
|
|
||||||
|
// Second insert was suppressed; the repo returned the existing instance.
|
||||||
|
assertThat(returned.id()).isEqualTo(first.id());
|
||||||
|
assertThat(repo.findById(first.id())).isPresent();
|
||||||
|
assertThat(repo.findById(second.id())).isEmpty();
|
||||||
|
|
||||||
|
Long count = jdbcTemplate.queryForObject(
|
||||||
|
"SELECT COUNT(*) FROM alert_instances " +
|
||||||
|
" WHERE rule_id = ? AND state IN ('PENDING','FIRING','ACKNOWLEDGED')",
|
||||||
|
Long.class, ruleId);
|
||||||
|
assertThat(count).isEqualTo(1L);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void markSilenced_togglesToTrue() {
|
void markSilenced_togglesToTrue() {
|
||||||
var inst = newInstance(ruleId, List.of(userId), List.of(), List.of());
|
var inst = newInstance(ruleId, List.of(userId), List.of(), List.of());
|
||||||
@@ -276,6 +331,23 @@ class PostgresAlertInstanceRepositoryIT extends AbstractPostgresIT {
|
|||||||
userIds, groupIds, roleNames);
|
userIds, groupIds, roleNames);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an open FIRING instance carrying a PER_EXCHANGE-shaped context:
|
||||||
|
* {@code context.exchange.id = <exchangeId>}. Mirrors what
|
||||||
|
* {@code ExchangeMatchEvaluator.evaluatePerExchange} emits for each matching
|
||||||
|
* exchange in a Batch result.
|
||||||
|
*/
|
||||||
|
private AlertInstance newInstanceWithExchange(UUID ruleId, String exchangeId) {
|
||||||
|
return new AlertInstance(
|
||||||
|
UUID.randomUUID(), ruleId, Map.of(), envId,
|
||||||
|
AlertState.FIRING, AlertSeverity.WARNING,
|
||||||
|
Instant.now(), null, null, null, null,
|
||||||
|
false, null, null,
|
||||||
|
Map.of("exchange", Map.of("id", exchangeId)),
|
||||||
|
"title", "message",
|
||||||
|
List.of(userId), List.of(), List.of());
|
||||||
|
}
|
||||||
|
|
||||||
/** Inserts a minimal alert_rule with the given severity. */
|
/** Inserts a minimal alert_rule with the given severity. */
|
||||||
private UUID seedRuleWithSeverity(String name, AlertSeverity severity) {
|
private UUID seedRuleWithSeverity(String name, AlertSeverity severity) {
|
||||||
UUID id = UUID.randomUUID();
|
UUID id = UUID.randomUUID();
|
||||||
|
|||||||
@@ -31,7 +31,6 @@ class PostgresAlertReadRepositoryIT extends AbstractPostgresIT {
|
|||||||
instanceId1 = UUID.randomUUID();
|
instanceId1 = UUID.randomUUID();
|
||||||
instanceId2 = UUID.randomUUID();
|
instanceId2 = UUID.randomUUID();
|
||||||
instanceId3 = UUID.randomUUID();
|
instanceId3 = UUID.randomUUID();
|
||||||
UUID ruleId = UUID.randomUUID();
|
|
||||||
|
|
||||||
jdbcTemplate.update(
|
jdbcTemplate.update(
|
||||||
"INSERT INTO environments (id, slug, display_name) VALUES (?, ?, ?)",
|
"INSERT INTO environments (id, slug, display_name) VALUES (?, ?, ?)",
|
||||||
@@ -41,18 +40,23 @@ class PostgresAlertReadRepositoryIT extends AbstractPostgresIT {
|
|||||||
jdbcTemplate.update(
|
jdbcTemplate.update(
|
||||||
"INSERT INTO users (user_id, provider, email) VALUES (?, 'local', ?) ON CONFLICT (user_id) DO NOTHING",
|
"INSERT INTO users (user_id, provider, email) VALUES (?, 'local', ?) ON CONFLICT (user_id) DO NOTHING",
|
||||||
userId, userId + "@example.com");
|
userId, userId + "@example.com");
|
||||||
|
|
||||||
|
// Each open alert_instance needs its own rule_id — the alert_instances_open_rule_uq
|
||||||
|
// partial unique forbids multiple open instances sharing the same rule_id + exchange
|
||||||
|
// discriminator (V13/V15). Three separate rules let all three instances coexist
|
||||||
|
// in FIRING state so alert_reads tests can target each one independently.
|
||||||
|
for (UUID instanceId : List.of(instanceId1, instanceId2, instanceId3)) {
|
||||||
|
UUID ruleId = UUID.randomUUID();
|
||||||
jdbcTemplate.update(
|
jdbcTemplate.update(
|
||||||
"INSERT INTO alert_rules (id, environment_id, name, severity, condition_kind, condition, " +
|
"INSERT INTO alert_rules (id, environment_id, name, severity, condition_kind, condition, " +
|
||||||
"notification_title_tmpl, notification_message_tmpl, created_by, updated_by) " +
|
"notification_title_tmpl, notification_message_tmpl, created_by, updated_by) " +
|
||||||
"VALUES (?, ?, 'rule', 'WARNING', 'AGENT_STATE', '{}'::jsonb, 't', 'm', 'sys-user', 'sys-user')",
|
"VALUES (?, ?, ?, 'WARNING', 'AGENT_STATE', '{}'::jsonb, 't', 'm', 'sys-user', 'sys-user')",
|
||||||
ruleId, envId);
|
ruleId, envId, "rule-" + instanceId);
|
||||||
|
|
||||||
for (UUID id : List.of(instanceId1, instanceId2, instanceId3)) {
|
|
||||||
jdbcTemplate.update(
|
jdbcTemplate.update(
|
||||||
"INSERT INTO alert_instances (id, rule_id, rule_snapshot, environment_id, state, severity, " +
|
"INSERT INTO alert_instances (id, rule_id, rule_snapshot, environment_id, state, severity, " +
|
||||||
"fired_at, context, title, message) VALUES (?, ?, '{}'::jsonb, ?, 'FIRING', 'WARNING', " +
|
"fired_at, context, title, message) VALUES (?, ?, '{}'::jsonb, ?, 'FIRING', 'WARNING', " +
|
||||||
"now(), '{}'::jsonb, 'title', 'msg')",
|
"now(), '{}'::jsonb, 'title', 'msg')",
|
||||||
id, ruleId, envId);
|
instanceId, ruleId, envId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user