Merge pull request 'feat(alerting): Plan 03 — UI + backfills (SSRF guard, metrics caching, docker stack)' (#144) from feat/alerting-03-ui into main
Reviewed-on: #144
This commit was merged in pull request #144.
This commit is contained in:
@@ -9,12 +9,20 @@ import io.micrometer.core.instrument.MeterRegistry;
|
||||
import io.micrometer.core.instrument.Timer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
/**
|
||||
* Micrometer-based metrics for the alerting subsystem.
|
||||
@@ -30,10 +38,11 @@ import java.util.concurrent.ConcurrentMap;
|
||||
* <li>{@code alerting_eval_duration_seconds{kind}} — per-kind evaluation latency</li>
|
||||
* <li>{@code alerting_webhook_delivery_duration_seconds} — webhook POST latency</li>
|
||||
* </ul>
|
||||
* Gauges (read from PostgreSQL on each scrape; low scrape frequency = low DB load):
|
||||
* Gauges (read from PostgreSQL, cached for {@link #DEFAULT_GAUGE_TTL} to amortise
|
||||
* Prometheus scrapes that may fire every few seconds):
|
||||
* <ul>
|
||||
* <li>{@code alerting_rules_total{state=enabled|disabled}} — rule counts from {@code alert_rules}</li>
|
||||
* <li>{@code alerting_instances_total{state,severity}} — instance counts grouped from {@code alert_instances}</li>
|
||||
* <li>{@code alerting_instances_total{state}} — instance counts grouped from {@code alert_instances}</li>
|
||||
* </ul>
|
||||
*/
|
||||
@Component
|
||||
@@ -41,11 +50,13 @@ public class AlertingMetrics {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(AlertingMetrics.class);
|
||||
|
||||
/** Default time-to-live for the gauge-supplier caches. */
|
||||
static final Duration DEFAULT_GAUGE_TTL = Duration.ofSeconds(30);
|
||||
|
||||
private final MeterRegistry registry;
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
// Cached counters per kind (lazy-initialized)
|
||||
private final ConcurrentMap<String, Counter> evalErrorCounters = new ConcurrentHashMap<>();
|
||||
private final ConcurrentMap<String, Counter> evalErrorCounters = new ConcurrentHashMap<>();
|
||||
private final ConcurrentMap<String, Counter> circuitOpenCounters = new ConcurrentHashMap<>();
|
||||
private final ConcurrentMap<String, Timer> evalDurationTimers = new ConcurrentHashMap<>();
|
||||
|
||||
@@ -55,33 +66,81 @@ public class AlertingMetrics {
|
||||
// Shared delivery timer
|
||||
private final Timer webhookDeliveryTimer;
|
||||
|
||||
// TTL-cached gauge suppliers registered so tests can force a read cycle.
|
||||
private final TtlCache enabledRulesCache;
|
||||
private final TtlCache disabledRulesCache;
|
||||
private final Map<AlertState, TtlCache> instancesByStateCaches;
|
||||
|
||||
/**
|
||||
* Production constructor: wraps the Postgres-backed gauge suppliers in a
|
||||
* 30-second TTL cache so Prometheus scrapes don't cause per-scrape DB queries.
|
||||
*/
|
||||
@Autowired
|
||||
public AlertingMetrics(MeterRegistry registry, JdbcTemplate jdbc) {
|
||||
this(registry,
|
||||
() -> countRules(jdbc, true),
|
||||
() -> countRules(jdbc, false),
|
||||
state -> countInstances(jdbc, state),
|
||||
DEFAULT_GAUGE_TTL,
|
||||
Instant::now);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test-friendly constructor accepting the three gauge suppliers that are
|
||||
* exercised in the {@link AlertingMetricsCachingTest} plan sketch. The
|
||||
* {@code instancesSupplier} is used for every {@link AlertState}.
|
||||
*/
|
||||
AlertingMetrics(MeterRegistry registry,
|
||||
Supplier<Long> enabledRulesSupplier,
|
||||
Supplier<Long> disabledRulesSupplier,
|
||||
Supplier<Long> instancesSupplier,
|
||||
Duration gaugeTtl,
|
||||
Supplier<Instant> clock) {
|
||||
this(registry,
|
||||
enabledRulesSupplier,
|
||||
disabledRulesSupplier,
|
||||
state -> instancesSupplier.get(),
|
||||
gaugeTtl,
|
||||
clock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Core constructor: accepts per-state instance supplier so production can
|
||||
* query PostgreSQL with a different value per {@link AlertState}.
|
||||
*/
|
||||
private AlertingMetrics(MeterRegistry registry,
|
||||
Supplier<Long> enabledRulesSupplier,
|
||||
Supplier<Long> disabledRulesSupplier,
|
||||
java.util.function.Function<AlertState, Long> instancesSupplier,
|
||||
Duration gaugeTtl,
|
||||
Supplier<Instant> clock) {
|
||||
this.registry = registry;
|
||||
this.jdbc = jdbc;
|
||||
|
||||
// ── Static timers ───────────────────────────────────────────────
|
||||
this.webhookDeliveryTimer = Timer.builder("alerting_webhook_delivery_duration_seconds")
|
||||
.description("Latency of outbound webhook POST requests")
|
||||
.register(registry);
|
||||
|
||||
// ── Gauge: rules by enabled/disabled ────────────────────────────
|
||||
Gauge.builder("alerting_rules_total", this, m -> m.countRules(true))
|
||||
// ── Gauge: rules by enabled/disabled (cached) ───────────────────
|
||||
this.enabledRulesCache = new TtlCache(enabledRulesSupplier, gaugeTtl, clock);
|
||||
this.disabledRulesCache = new TtlCache(disabledRulesSupplier, gaugeTtl, clock);
|
||||
|
||||
Gauge.builder("alerting_rules_total", enabledRulesCache, TtlCache::getAsDouble)
|
||||
.tag("state", "enabled")
|
||||
.description("Number of enabled alert rules")
|
||||
.register(registry);
|
||||
Gauge.builder("alerting_rules_total", this, m -> m.countRules(false))
|
||||
Gauge.builder("alerting_rules_total", disabledRulesCache, TtlCache::getAsDouble)
|
||||
.tag("state", "disabled")
|
||||
.description("Number of disabled alert rules")
|
||||
.register(registry);
|
||||
|
||||
// ── Gauges: alert instances by state × severity ─────────────────
|
||||
// ── Gauges: alert instances by state (cached) ───────────────────
|
||||
this.instancesByStateCaches = new EnumMap<>(AlertState.class);
|
||||
for (AlertState state : AlertState.values()) {
|
||||
// Capture state as effectively-final for lambda
|
||||
AlertState capturedState = state;
|
||||
// We register one gauge per state (summed across severities) for simplicity;
|
||||
// per-severity breakdown would require a dynamic MultiGauge.
|
||||
Gauge.builder("alerting_instances_total", this,
|
||||
m -> m.countInstances(capturedState))
|
||||
AlertState captured = state;
|
||||
TtlCache cache = new TtlCache(() -> instancesSupplier.apply(captured), gaugeTtl, clock);
|
||||
this.instancesByStateCaches.put(state, cache);
|
||||
Gauge.builder("alerting_instances_total", cache, TtlCache::getAsDouble)
|
||||
.tag("state", state.name().toLowerCase())
|
||||
.description("Number of alert instances by state")
|
||||
.register(registry);
|
||||
@@ -148,28 +207,73 @@ public class AlertingMetrics {
|
||||
.increment();
|
||||
}
|
||||
|
||||
// ── Gauge suppliers (called on each Prometheus scrape) ──────────────
|
||||
|
||||
private double countRules(boolean enabled) {
|
||||
try {
|
||||
Long count = jdbc.queryForObject(
|
||||
"SELECT COUNT(*) FROM alert_rules WHERE enabled = ?", Long.class, enabled);
|
||||
return count == null ? 0.0 : count.doubleValue();
|
||||
} catch (Exception e) {
|
||||
log.debug("alerting_rules gauge query failed: {}", e.getMessage());
|
||||
return 0.0;
|
||||
/**
|
||||
* Force a read of every TTL-cached gauge supplier. Used by tests to simulate
|
||||
* a Prometheus scrape without needing a real registry scrape pipeline.
|
||||
*/
|
||||
void snapshotAllGauges() {
|
||||
List<TtlCache> all = new ArrayList<>();
|
||||
all.add(enabledRulesCache);
|
||||
all.add(disabledRulesCache);
|
||||
all.addAll(instancesByStateCaches.values());
|
||||
for (TtlCache c : all) {
|
||||
c.getAsDouble();
|
||||
}
|
||||
}
|
||||
|
||||
private double countInstances(AlertState state) {
|
||||
// ── Gauge suppliers (queried at most once per TTL) ──────────────────
|
||||
|
||||
private static long countRules(JdbcTemplate jdbc, boolean enabled) {
|
||||
try {
|
||||
Long count = jdbc.queryForObject(
|
||||
"SELECT COUNT(*) FROM alert_rules WHERE enabled = ?", Long.class, enabled);
|
||||
return count == null ? 0L : count;
|
||||
} catch (Exception e) {
|
||||
log.debug("alerting_rules gauge query failed: {}", e.getMessage());
|
||||
return 0L;
|
||||
}
|
||||
}
|
||||
|
||||
private static long countInstances(JdbcTemplate jdbc, AlertState state) {
|
||||
try {
|
||||
Long count = jdbc.queryForObject(
|
||||
"SELECT COUNT(*) FROM alert_instances WHERE state = ?::alert_state_enum",
|
||||
Long.class, state.name());
|
||||
return count == null ? 0.0 : count.doubleValue();
|
||||
return count == null ? 0L : count;
|
||||
} catch (Exception e) {
|
||||
log.debug("alerting_instances gauge query failed: {}", e.getMessage());
|
||||
return 0.0;
|
||||
return 0L;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Lightweight TTL cache around a {@code Supplier<Long>}. Every call to
|
||||
* {@link #getAsDouble()} either returns the cached value (if {@code clock.get()
|
||||
* - lastRead < ttl}) or invokes the delegate and refreshes the cache.
|
||||
*
|
||||
* <p>Used to amortise Postgres queries behind Prometheus gauges over a
|
||||
* 30-second TTL (see {@link AlertingMetrics#DEFAULT_GAUGE_TTL}).
|
||||
*/
|
||||
static final class TtlCache {
|
||||
private final Supplier<Long> delegate;
|
||||
private final Duration ttl;
|
||||
private final Supplier<Instant> clock;
|
||||
private volatile Instant lastRead = Instant.MIN;
|
||||
private volatile long cached = 0L;
|
||||
|
||||
TtlCache(Supplier<Long> delegate, Duration ttl, Supplier<Instant> clock) {
|
||||
this.delegate = delegate;
|
||||
this.ttl = ttl;
|
||||
this.clock = clock;
|
||||
}
|
||||
|
||||
synchronized double getAsDouble() {
|
||||
Instant now = clock.get();
|
||||
if (lastRead == Instant.MIN || Duration.between(lastRead, now).compareTo(ttl) >= 0) {
|
||||
cached = delegate.get();
|
||||
lastRead = now;
|
||||
}
|
||||
return cached;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,8 @@ import com.cameleer.server.core.outbound.OutboundConnectionService;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.web.server.ResponseStatusException;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
@@ -15,20 +17,24 @@ public class OutboundConnectionServiceImpl implements OutboundConnectionService
|
||||
|
||||
private final OutboundConnectionRepository repo;
|
||||
private final AlertRuleRepository ruleRepo;
|
||||
private final SsrfGuard ssrfGuard;
|
||||
private final String tenantId;
|
||||
|
||||
public OutboundConnectionServiceImpl(
|
||||
OutboundConnectionRepository repo,
|
||||
AlertRuleRepository ruleRepo,
|
||||
SsrfGuard ssrfGuard,
|
||||
String tenantId) {
|
||||
this.repo = repo;
|
||||
this.ruleRepo = ruleRepo;
|
||||
this.ssrfGuard = ssrfGuard;
|
||||
this.tenantId = tenantId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OutboundConnection create(OutboundConnection draft, String actingUserId) {
|
||||
assertNameUnique(draft.name(), null);
|
||||
validateUrl(draft.url());
|
||||
OutboundConnection c = new OutboundConnection(
|
||||
UUID.randomUUID(), tenantId, draft.name(), draft.description(),
|
||||
draft.url(), draft.method(), draft.defaultHeaders(), draft.defaultBodyTmpl(),
|
||||
@@ -46,6 +52,7 @@ public class OutboundConnectionServiceImpl implements OutboundConnectionService
|
||||
if (!existing.name().equals(draft.name())) {
|
||||
assertNameUnique(draft.name(), id);
|
||||
}
|
||||
validateUrl(draft.url());
|
||||
|
||||
// Narrowing allowed-envs guard: if the new draft restricts to a non-empty set of envs,
|
||||
// find any envs that existed before but are absent in the draft.
|
||||
@@ -107,4 +114,23 @@ public class OutboundConnectionServiceImpl implements OutboundConnectionService
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate the webhook URL against SSRF pitfalls. Translates the guard's
|
||||
* {@link IllegalArgumentException} into a 400 Bad Request with the guard's
|
||||
* message preserved, so the client sees e.g. "private or loopback".
|
||||
*/
|
||||
private void validateUrl(String url) {
|
||||
URI uri;
|
||||
try {
|
||||
uri = new URI(url);
|
||||
} catch (URISyntaxException e) {
|
||||
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "Invalid URL: " + url);
|
||||
}
|
||||
try {
|
||||
ssrfGuard.validate(uri);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
package com.cameleer.server.app.outbound;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.net.Inet4Address;
|
||||
import java.net.Inet6Address;
|
||||
import java.net.InetAddress;
|
||||
import java.net.URI;
|
||||
import java.net.UnknownHostException;
|
||||
|
||||
/**
|
||||
* Validates outbound webhook URLs against SSRF pitfalls: rejects hosts that resolve to
|
||||
* loopback, link-local, or RFC-1918 private ranges (and IPv6 equivalents).
|
||||
*
|
||||
* Per spec §17. The `cameleer.server.outbound-http.allow-private-targets` flag bypasses
|
||||
* the check for dev environments where webhooks legitimately point at local services.
|
||||
*/
|
||||
@Component
|
||||
public class SsrfGuard {
|
||||
|
||||
private final boolean allowPrivate;
|
||||
|
||||
public SsrfGuard(
|
||||
@Value("${cameleer.server.outbound-http.allow-private-targets:false}") boolean allowPrivate
|
||||
) {
|
||||
this.allowPrivate = allowPrivate;
|
||||
}
|
||||
|
||||
public void validate(URI uri) {
|
||||
if (allowPrivate) return;
|
||||
String host = uri.getHost();
|
||||
if (host == null || host.isBlank()) {
|
||||
throw new IllegalArgumentException("URL must include a host: " + uri);
|
||||
}
|
||||
if ("localhost".equalsIgnoreCase(host)) {
|
||||
throw new IllegalArgumentException("URL host resolves to private or loopback range: " + host);
|
||||
}
|
||||
InetAddress[] addrs;
|
||||
try {
|
||||
addrs = InetAddress.getAllByName(host);
|
||||
} catch (UnknownHostException e) {
|
||||
throw new IllegalArgumentException("URL host does not resolve: " + host, e);
|
||||
}
|
||||
for (InetAddress addr : addrs) {
|
||||
if (isPrivate(addr)) {
|
||||
throw new IllegalArgumentException("URL host resolves to private or loopback range: " + host + " -> " + addr.getHostAddress());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isPrivate(InetAddress addr) {
|
||||
if (addr.isLoopbackAddress()) return true;
|
||||
if (addr.isLinkLocalAddress()) return true;
|
||||
if (addr.isSiteLocalAddress()) return true; // 10/8, 172.16/12, 192.168/16
|
||||
if (addr.isAnyLocalAddress()) return true; // 0.0.0.0, ::
|
||||
if (addr instanceof Inet6Address ip6) {
|
||||
byte[] raw = ip6.getAddress();
|
||||
// fc00::/7 unique-local
|
||||
if ((raw[0] & 0xfe) == 0xfc) return true;
|
||||
}
|
||||
if (addr instanceof Inet4Address ip4) {
|
||||
byte[] raw = ip4.getAddress();
|
||||
// 169.254.0.0/16 link-local (also matches isLinkLocalAddress but doubled-up for safety)
|
||||
if ((raw[0] & 0xff) == 169 && (raw[1] & 0xff) == 254) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.cameleer.server.app.outbound.config;
|
||||
|
||||
import com.cameleer.server.app.outbound.OutboundConnectionServiceImpl;
|
||||
import com.cameleer.server.app.outbound.SsrfGuard;
|
||||
import com.cameleer.server.app.outbound.crypto.SecretCipher;
|
||||
import com.cameleer.server.app.outbound.storage.PostgresOutboundConnectionRepository;
|
||||
import com.cameleer.server.core.alerting.AlertRuleRepository;
|
||||
@@ -31,7 +32,8 @@ public class OutboundBeanConfig {
|
||||
public OutboundConnectionService outboundConnectionService(
|
||||
OutboundConnectionRepository repo,
|
||||
AlertRuleRepository ruleRepo,
|
||||
SsrfGuard ssrfGuard,
|
||||
@Value("${cameleer.server.tenant.id:default}") String tenantId) {
|
||||
return new OutboundConnectionServiceImpl(repo, ruleRepo, tenantId);
|
||||
return new OutboundConnectionServiceImpl(repo, ruleRepo, ssrfGuard, tenantId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
package com.cameleer.server.app.alerting.metrics;
|
||||
|
||||
import com.cameleer.server.core.alerting.AlertState;
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import io.micrometer.core.instrument.simple.SimpleMeterRegistry;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* Verifies that {@link AlertingMetrics} caches gauge values for a configurable TTL,
|
||||
* so that Prometheus scrapes do not cause one Postgres query per scrape.
|
||||
*/
|
||||
class AlertingMetricsCachingTest {
|
||||
|
||||
@Test
|
||||
void gaugeSupplierIsCalledAtMostOncePerTtl() {
|
||||
// The instances supplier is shared across every AlertState gauge, so each
|
||||
// full gauge snapshot invokes it once per AlertState (one cache per state).
|
||||
final int stateCount = AlertState.values().length;
|
||||
|
||||
AtomicInteger enabledRulesCalls = new AtomicInteger();
|
||||
AtomicInteger disabledRulesCalls = new AtomicInteger();
|
||||
AtomicInteger instancesCalls = new AtomicInteger();
|
||||
AtomicReference<Instant> now = new AtomicReference<>(Instant.parse("2026-04-20T00:00:00Z"));
|
||||
Supplier<Instant> clock = now::get;
|
||||
|
||||
MeterRegistry registry = new SimpleMeterRegistry();
|
||||
|
||||
Supplier<Long> enabledRulesSupplier = () -> { enabledRulesCalls.incrementAndGet(); return 7L; };
|
||||
Supplier<Long> disabledRulesSupplier = () -> { disabledRulesCalls.incrementAndGet(); return 3L; };
|
||||
Supplier<Long> instancesSupplier = () -> { instancesCalls.incrementAndGet(); return 5L; };
|
||||
|
||||
AlertingMetrics metrics = new AlertingMetrics(
|
||||
registry,
|
||||
enabledRulesSupplier,
|
||||
disabledRulesSupplier,
|
||||
instancesSupplier,
|
||||
Duration.ofSeconds(30),
|
||||
clock
|
||||
);
|
||||
|
||||
// First scrape — each supplier invoked exactly once per gauge.
|
||||
metrics.snapshotAllGauges();
|
||||
assertThat(enabledRulesCalls.get()).isEqualTo(1);
|
||||
assertThat(disabledRulesCalls.get()).isEqualTo(1);
|
||||
assertThat(instancesCalls.get()).isEqualTo(stateCount);
|
||||
|
||||
// Second scrape within TTL — served from cache.
|
||||
metrics.snapshotAllGauges();
|
||||
assertThat(enabledRulesCalls.get()).isEqualTo(1);
|
||||
assertThat(disabledRulesCalls.get()).isEqualTo(1);
|
||||
assertThat(instancesCalls.get()).isEqualTo(stateCount);
|
||||
|
||||
// Third scrape still within TTL (29 s later) — still cached.
|
||||
now.set(now.get().plusSeconds(29));
|
||||
metrics.snapshotAllGauges();
|
||||
assertThat(enabledRulesCalls.get()).isEqualTo(1);
|
||||
assertThat(disabledRulesCalls.get()).isEqualTo(1);
|
||||
assertThat(instancesCalls.get()).isEqualTo(stateCount);
|
||||
|
||||
// Advance past TTL — next scrape re-queries the delegate.
|
||||
now.set(Instant.parse("2026-04-20T00:00:31Z"));
|
||||
metrics.snapshotAllGauges();
|
||||
assertThat(enabledRulesCalls.get()).isEqualTo(2);
|
||||
assertThat(disabledRulesCalls.get()).isEqualTo(2);
|
||||
assertThat(instancesCalls.get()).isEqualTo(stateCount * 2);
|
||||
|
||||
// Immediate follow-up — back in cache.
|
||||
metrics.snapshotAllGauges();
|
||||
assertThat(enabledRulesCalls.get()).isEqualTo(2);
|
||||
assertThat(disabledRulesCalls.get()).isEqualTo(2);
|
||||
assertThat(instancesCalls.get()).isEqualTo(stateCount * 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
void gaugeValueReflectsCachedResult() {
|
||||
AtomicReference<Long> enabledValue = new AtomicReference<>(10L);
|
||||
AtomicReference<Instant> now = new AtomicReference<>(Instant.parse("2026-04-20T00:00:00Z"));
|
||||
|
||||
MeterRegistry registry = new SimpleMeterRegistry();
|
||||
AlertingMetrics metrics = new AlertingMetrics(
|
||||
registry,
|
||||
enabledValue::get,
|
||||
() -> 0L,
|
||||
() -> 0L,
|
||||
Duration.ofSeconds(30),
|
||||
now::get
|
||||
);
|
||||
|
||||
// Read once — value cached at 10.
|
||||
metrics.snapshotAllGauges();
|
||||
|
||||
// Mutate the underlying supplier output; cache should shield it.
|
||||
enabledValue.set(99L);
|
||||
double cached = registry.find("alerting_rules_total").tag("state", "enabled").gauge().value();
|
||||
assertThat(cached).isEqualTo(10.0);
|
||||
|
||||
// After TTL, new value surfaces.
|
||||
now.set(now.get().plusSeconds(31));
|
||||
metrics.snapshotAllGauges();
|
||||
double refreshed = registry.find("alerting_rules_total").tag("state", "enabled").gauge().value();
|
||||
assertThat(refreshed).isEqualTo(99.0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
package com.cameleer.server.app.outbound;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.net.URI;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
class SsrfGuardTest {
|
||||
|
||||
private final SsrfGuard guard = new SsrfGuard(false); // allow-private disabled by default
|
||||
|
||||
@Test
|
||||
void rejectsLoopbackIpv4() {
|
||||
assertThatThrownBy(() -> guard.validate(URI.create("https://127.0.0.1/webhook")))
|
||||
.isInstanceOf(IllegalArgumentException.class)
|
||||
.hasMessageContaining("private or loopback");
|
||||
}
|
||||
|
||||
@Test
|
||||
void rejectsLocalhostHostname() {
|
||||
assertThatThrownBy(() -> guard.validate(URI.create("https://localhost:8080/x")))
|
||||
.isInstanceOf(IllegalArgumentException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void rejectsRfc1918Ranges() {
|
||||
for (String url : Set.of(
|
||||
"https://10.0.0.1/x",
|
||||
"https://172.16.5.6/x",
|
||||
"https://192.168.1.1/x"
|
||||
)) {
|
||||
assertThatThrownBy(() -> guard.validate(URI.create(url)))
|
||||
.as(url)
|
||||
.isInstanceOf(IllegalArgumentException.class);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void rejectsLinkLocal() {
|
||||
assertThatThrownBy(() -> guard.validate(URI.create("https://169.254.169.254/latest/meta-data/")))
|
||||
.isInstanceOf(IllegalArgumentException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void rejectsIpv6Loopback() {
|
||||
assertThatThrownBy(() -> guard.validate(URI.create("https://[::1]/x")))
|
||||
.isInstanceOf(IllegalArgumentException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void rejectsIpv6UniqueLocal() {
|
||||
assertThatThrownBy(() -> guard.validate(URI.create("https://[fc00::1]/x")))
|
||||
.isInstanceOf(IllegalArgumentException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void acceptsPublicHttps() {
|
||||
// DNS resolution happens inside validate(); this test relies on a public hostname.
|
||||
// Use a literal public IP to avoid network flakiness.
|
||||
// 8.8.8.8 is a public Google DNS IP — not in any private range.
|
||||
assertThat(new SsrfGuard(false)).isNotNull();
|
||||
guard.validate(URI.create("https://8.8.8.8/")); // does not throw
|
||||
}
|
||||
|
||||
@Test
|
||||
void allowPrivateFlagBypassesCheck() {
|
||||
SsrfGuard permissive = new SsrfGuard(true);
|
||||
permissive.validate(URI.create("https://127.0.0.1/")); // must not throw
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
package com.cameleer.server.app.outbound.controller;
|
||||
|
||||
import com.cameleer.server.app.AbstractPostgresIT;
|
||||
import com.cameleer.server.app.TestSecurityHelper;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.web.client.TestRestTemplate;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.test.annotation.DirtiesContext;
|
||||
import org.springframework.test.context.TestPropertySource;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* Dedicated IT that overrides the test-profile default `allow-private-targets=true`
|
||||
* back to `false` so the SSRF guard's production behavior (reject loopback) is
|
||||
* exercised end-to-end through the admin controller.
|
||||
*
|
||||
* Uses {@link DirtiesContext} to avoid polluting the shared context used by the
|
||||
* other ITs which rely on the flag being `true` to hit WireMock on localhost.
|
||||
*/
|
||||
@TestPropertySource(properties = "cameleer.server.outbound-http.allow-private-targets=false")
|
||||
@DirtiesContext
|
||||
class OutboundConnectionSsrfIT extends AbstractPostgresIT {
|
||||
|
||||
@Autowired private TestRestTemplate restTemplate;
|
||||
@Autowired private TestSecurityHelper securityHelper;
|
||||
|
||||
private String adminJwt;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
adminJwt = securityHelper.adminToken();
|
||||
// Seed admin user row since users(user_id) is an FK target.
|
||||
jdbcTemplate.update(
|
||||
"INSERT INTO users (user_id, provider, email, display_name) VALUES (?, 'test', ?, ?) ON CONFLICT (user_id) DO NOTHING",
|
||||
"test-admin", "test-admin@example.com", "test-admin");
|
||||
jdbcTemplate.update("DELETE FROM outbound_connections WHERE tenant_id = 'default'");
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
jdbcTemplate.update("DELETE FROM outbound_connections WHERE tenant_id = 'default'");
|
||||
jdbcTemplate.update("DELETE FROM users WHERE user_id = 'test-admin'");
|
||||
}
|
||||
|
||||
@Test
|
||||
void rejectsLoopbackUrlOnCreate() {
|
||||
String body = """
|
||||
{"name":"evil","url":"https://127.0.0.1/abuse","method":"POST",
|
||||
"tlsTrustMode":"SYSTEM_DEFAULT","auth":{}}""";
|
||||
|
||||
ResponseEntity<String> resp = restTemplate.exchange(
|
||||
"/api/v1/admin/outbound-connections", HttpMethod.POST,
|
||||
new HttpEntity<>(body, securityHelper.authHeaders(adminJwt)),
|
||||
String.class);
|
||||
|
||||
assertThat(resp.getStatusCode()).isEqualTo(HttpStatus.BAD_REQUEST);
|
||||
assertThat(resp.getBody()).isNotNull();
|
||||
assertThat(resp.getBody()).contains("private or loopback");
|
||||
}
|
||||
}
|
||||
@@ -17,3 +17,5 @@ cameleer:
|
||||
bootstraptokenprevious: old-bootstrap-token
|
||||
infrastructureendpoints: true
|
||||
jwtsecret: test-jwt-secret-for-integration-tests-only
|
||||
outbound-http:
|
||||
allow-private-targets: true
|
||||
|
||||
Reference in New Issue
Block a user