From 037a27d405e784d1fc4669c84b10506ad4545d99 Mon Sep 17 00:00:00 2001 From: hsiegeln <37154749+hsiegeln@users.noreply.github.com> Date: Mon, 20 Apr 2026 22:26:19 +0200 Subject: [PATCH] fix(alerting): allow multiple open alert_instances per rule for PER_EXCHANGE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit V13 added a partial unique index on alert_instances(rule_id) WHERE state IN (PENDING,FIRING,ACKNOWLEDGED). Correct for scalar condition kinds (ROUTE_METRIC / AGENT_STATE / DEPLOYMENT_STATE / LOG_PATTERN / JVM_METRIC / EXCHANGE_MATCH in COUNT_IN_WINDOW) but wrong for EXCHANGE_MATCH / PER_EXCHANGE, which by design emits one alert_instance per matching exchange. Under V13 every PER_EXCHANGE tick with >1 match logged "Skipped duplicate open alert_instance for rule …" at evaluator cadence and silently lost alert fidelity — only the first matching exchange per tick got an AlertInstance + webhook dispatch. V15 drops the rule_id-only constraint and recreates it with a discriminator on context->'exchange'->>'id'. Scalar kinds emit Map.of() as context, so their expression resolves to '' — "one open per rule" preserved. ExchangeMatchEvaluator.evaluatePerExchange always populates exchange.id, so per-exchange instances coexist cleanly. Two new PostgresAlertInstanceRepositoryIT tests: - multiple open instances for same rule + distinct exchanges all land - second open for identical (rule, exchange) still dedups via the DuplicateKeyException fallback in save() — defense-in-depth kept Also fixes pre-existing PostgresAlertReadRepositoryIT brokenness: its setup() inserted 3 open instances sharing one rule_id, which V13 blocked on arrival. Migrate to one rule_id per instance (pattern already used across other storage ITs). Co-Authored-By: Claude Opus 4.7 (1M context) --- ...rt_instances_open_rule_exchange_unique.sql | 24 +++++++ .../PostgresAlertInstanceRepositoryIT.java | 72 +++++++++++++++++++ .../PostgresAlertReadRepositoryIT.java | 20 +++--- 3 files changed, 108 insertions(+), 8 deletions(-) create mode 100644 cameleer-server-app/src/main/resources/db/migration/V15__alert_instances_open_rule_exchange_unique.sql diff --git a/cameleer-server-app/src/main/resources/db/migration/V15__alert_instances_open_rule_exchange_unique.sql b/cameleer-server-app/src/main/resources/db/migration/V15__alert_instances_open_rule_exchange_unique.sql new file mode 100644 index 00000000..544ed196 --- /dev/null +++ b/cameleer-server-app/src/main/resources/db/migration/V15__alert_instances_open_rule_exchange_unique.sql @@ -0,0 +1,24 @@ +-- V15 — Discriminate open-alert_instance uniqueness by exchange id for PER_EXCHANGE rules. +-- +-- V13 enforced "one open alert_instance per rule" via a partial unique on +-- (rule_id). That is correct for every scalar condition kind (ROUTE_METRIC, +-- AGENT_STATE, DEPLOYMENT_STATE, LOG_PATTERN, JVM_METRIC, and EXCHANGE_MATCH +-- in COUNT_IN_WINDOW mode) but wrong for EXCHANGE_MATCH / PER_EXCHANGE, which +-- by design emits one alert_instance per matching exchange. Under V13 every +-- PER_EXCHANGE tick with >1 match logged "Skipped duplicate open alert_instance +-- for rule …" at evaluator cadence and silently lost alert fidelity — only the +-- first matching exchange per tick got an AlertInstance + webhook dispatch. +-- +-- New discriminator: (rule_id, COALESCE(context->'exchange'->>'id', '')). +-- Scalar evaluators emit Map.of() (no exchange subtree), the expression +-- resolves to '' for all of them, so they continue to get "one open per rule". +-- ExchangeMatchEvaluator.evaluatePerExchange emits {exchange.id = } +-- per firing, so PER_EXCHANGE instances for distinct exchanges coexist. +-- Two attempts to open an instance for the same (rule, exchange) still collapse +-- to one — the repo's DuplicateKeyException fallback preserves defense-in-depth. +DROP INDEX IF EXISTS alert_instances_open_rule_uq; + +CREATE UNIQUE INDEX alert_instances_open_rule_uq + ON alert_instances (rule_id, (COALESCE(context->'exchange'->>'id', ''))) + WHERE rule_id IS NOT NULL + AND state IN ('PENDING','FIRING','ACKNOWLEDGED'); diff --git a/cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/storage/PostgresAlertInstanceRepositoryIT.java b/cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/storage/PostgresAlertInstanceRepositoryIT.java index d55e2820..35997a9f 100644 --- a/cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/storage/PostgresAlertInstanceRepositoryIT.java +++ b/cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/storage/PostgresAlertInstanceRepositoryIT.java @@ -243,6 +243,61 @@ class PostgresAlertInstanceRepositoryIT extends AbstractPostgresIT { // Extra rules are cleaned up by @AfterEach via env-scoped DELETE } + @Test + void save_allowsMultipleOpenInstancesForSameRuleWithDifferentExchangeIds() { + // PER_EXCHANGE mode (EXCHANGE_MATCH rules) emits one alert_instance per + // matching exchange — the design requires many open instances per rule, + // each carrying a distinct exchange.id in its context. The open-rule + // partial unique must therefore discriminate on (rule_id, exchange.id), + // not on rule_id alone. Regression: V13's original (rule_id)-only + // constraint masked this by catching DuplicateKeyException and silently + // dropping every per-exchange firing after the first — logs filled with + // "Skipped duplicate open alert_instance for rule …" at evaluator cadence + // and PER_EXCHANGE notifications were never dispatched. + + var exchangeA = newInstanceWithExchange(ruleId, "exchange-aaa"); + var exchangeB = newInstanceWithExchange(ruleId, "exchange-bbb"); + var exchangeC = newInstanceWithExchange(ruleId, "exchange-ccc"); + + repo.save(exchangeA); + repo.save(exchangeB); + repo.save(exchangeC); + + assertThat(repo.findById(exchangeA.id())).isPresent(); + assertThat(repo.findById(exchangeB.id())).isPresent(); + assertThat(repo.findById(exchangeC.id())).isPresent(); + + Long count = jdbcTemplate.queryForObject( + "SELECT COUNT(*) FROM alert_instances " + + " WHERE rule_id = ? AND state IN ('PENDING','FIRING','ACKNOWLEDGED')", + Long.class, ruleId); + assertThat(count).isEqualTo(3L); + } + + @Test + void save_rejectsSecondOpenInstanceForSameRuleAndExchange() { + // The partial unique still prevents genuine duplicates: two open instances + // for the same (rule, exchange) pair must collapse to one. The repo's + // DuplicateKeyException fallback returns the already-persisted instance. + + var first = newInstanceWithExchange(ruleId, "exchange-dup"); + var second = newInstanceWithExchange(ruleId, "exchange-dup"); + + repo.save(first); + var returned = repo.save(second); + + // Second insert was suppressed; the repo returned the existing instance. + assertThat(returned.id()).isEqualTo(first.id()); + assertThat(repo.findById(first.id())).isPresent(); + assertThat(repo.findById(second.id())).isEmpty(); + + Long count = jdbcTemplate.queryForObject( + "SELECT COUNT(*) FROM alert_instances " + + " WHERE rule_id = ? AND state IN ('PENDING','FIRING','ACKNOWLEDGED')", + Long.class, ruleId); + assertThat(count).isEqualTo(1L); + } + @Test void markSilenced_togglesToTrue() { var inst = newInstance(ruleId, List.of(userId), List.of(), List.of()); @@ -276,6 +331,23 @@ class PostgresAlertInstanceRepositoryIT extends AbstractPostgresIT { userIds, groupIds, roleNames); } + /** + * Creates an open FIRING instance carrying a PER_EXCHANGE-shaped context: + * {@code context.exchange.id = }. Mirrors what + * {@code ExchangeMatchEvaluator.evaluatePerExchange} emits for each matching + * exchange in a Batch result. + */ + private AlertInstance newInstanceWithExchange(UUID ruleId, String exchangeId) { + return new AlertInstance( + UUID.randomUUID(), ruleId, Map.of(), envId, + AlertState.FIRING, AlertSeverity.WARNING, + Instant.now(), null, null, null, null, + false, null, null, + Map.of("exchange", Map.of("id", exchangeId)), + "title", "message", + List.of(userId), List.of(), List.of()); + } + /** Inserts a minimal alert_rule with the given severity. */ private UUID seedRuleWithSeverity(String name, AlertSeverity severity) { UUID id = UUID.randomUUID(); diff --git a/cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/storage/PostgresAlertReadRepositoryIT.java b/cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/storage/PostgresAlertReadRepositoryIT.java index 0a616aaa..6d8d372f 100644 --- a/cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/storage/PostgresAlertReadRepositoryIT.java +++ b/cameleer-server-app/src/test/java/com/cameleer/server/app/alerting/storage/PostgresAlertReadRepositoryIT.java @@ -31,7 +31,6 @@ class PostgresAlertReadRepositoryIT extends AbstractPostgresIT { instanceId1 = UUID.randomUUID(); instanceId2 = UUID.randomUUID(); instanceId3 = UUID.randomUUID(); - UUID ruleId = UUID.randomUUID(); jdbcTemplate.update( "INSERT INTO environments (id, slug, display_name) VALUES (?, ?, ?)", @@ -41,18 +40,23 @@ class PostgresAlertReadRepositoryIT extends AbstractPostgresIT { jdbcTemplate.update( "INSERT INTO users (user_id, provider, email) VALUES (?, 'local', ?) ON CONFLICT (user_id) DO NOTHING", userId, userId + "@example.com"); - jdbcTemplate.update( - "INSERT INTO alert_rules (id, environment_id, name, severity, condition_kind, condition, " + - "notification_title_tmpl, notification_message_tmpl, created_by, updated_by) " + - "VALUES (?, ?, 'rule', 'WARNING', 'AGENT_STATE', '{}'::jsonb, 't', 'm', 'sys-user', 'sys-user')", - ruleId, envId); - for (UUID id : List.of(instanceId1, instanceId2, instanceId3)) { + // Each open alert_instance needs its own rule_id — the alert_instances_open_rule_uq + // partial unique forbids multiple open instances sharing the same rule_id + exchange + // discriminator (V13/V15). Three separate rules let all three instances coexist + // in FIRING state so alert_reads tests can target each one independently. + for (UUID instanceId : List.of(instanceId1, instanceId2, instanceId3)) { + UUID ruleId = UUID.randomUUID(); + jdbcTemplate.update( + "INSERT INTO alert_rules (id, environment_id, name, severity, condition_kind, condition, " + + "notification_title_tmpl, notification_message_tmpl, created_by, updated_by) " + + "VALUES (?, ?, ?, 'WARNING', 'AGENT_STATE', '{}'::jsonb, 't', 'm', 'sys-user', 'sys-user')", + ruleId, envId, "rule-" + instanceId); jdbcTemplate.update( "INSERT INTO alert_instances (id, rule_id, rule_snapshot, environment_id, state, severity, " + "fired_at, context, title, message) VALUES (?, ?, '{}'::jsonb, ?, 'FIRING', 'WARNING', " + "now(), '{}'::jsonb, 'title', 'msg')", - id, ruleId, envId); + instanceId, ruleId, envId); } }