Compare commits
29 Commits
58009d7c23
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c5b6f2bbad | ||
| 83c3ac3ef3 | |||
| 7dd7317cb8 | |||
| 2654271494 | |||
|
|
888f589934 | ||
|
|
9aad2f3871 | ||
|
|
cbaac2bfa5 | ||
|
|
7529a9ce99 | ||
|
|
09309de982 | ||
|
|
56c41814fc | ||
|
|
68704e15b4 | ||
|
|
510206c752 | ||
|
|
58e9695b4c | ||
|
|
f27a0044f1 | ||
|
|
5c9323cfed | ||
|
|
2dcbd5a772 | ||
|
|
f9b5f235cc | ||
|
|
0b419db9f1 | ||
|
|
5f6f9e523d | ||
|
|
35319dc666 | ||
|
|
3c2409ed6e | ||
|
|
ca401363ec | ||
|
|
b5ee9e1d1f | ||
|
|
75a41929c4 | ||
|
|
d58c8cde2e | ||
|
|
64608a7677 | ||
|
|
48ce75bf38 | ||
|
|
0bbe5d6623 | ||
|
|
e1ac896a6e |
@@ -57,7 +57,7 @@ Env-scoped read-path controllers (`AlertController`, `AlertRuleController`, `Ale
|
||||
- `DeploymentController` — `/api/v1/environments/{envSlug}/apps/{appSlug}/deployments`. GET list / POST create (body `{ appVersionId }`) / POST `{id}/stop` / POST `{id}/promote` (body `{ targetEnvironment: slug }` — target app slug must exist in target env) / GET `{id}/logs`. All lifecycle ops (`POST /` deploy, `POST /{id}/stop`, `POST /{id}/promote`) audited under `AuditCategory.DEPLOYMENT`. Action codes: `deploy_app`, `stop_deployment`, `promote_deployment`. Acting user resolved via the `user:` prefix-strip convention; both SUCCESS and FAILURE branches write audit rows. `created_by` (TEXT, nullable) populated from `SecurityContextHolder` and surfaced on the `Deployment` DTO.
|
||||
- `ApplicationConfigController` — `/api/v1/environments/{envSlug}`. GET `/config` (list), GET/PUT `/apps/{appSlug}/config`, GET `/apps/{appSlug}/processor-routes`, POST `/apps/{appSlug}/config/test-expression`. PUT accepts `?apply=staged|live` (default `live`). `live` saves to DB and pushes `CONFIG_UPDATE` SSE to live agents in this env (existing behavior); `staged` saves to DB only, skipping the SSE push — used by the unified app deployment page. Audit action is `stage_app_config` for staged writes, `update_app_config` for live. Invalid `apply` values return 400.
|
||||
- `AppSettingsController` — `/api/v1/environments/{envSlug}`. GET `/app-settings` (list), GET/PUT/DELETE `/apps/{appSlug}/settings`. ADMIN/OPERATOR only.
|
||||
- `SearchController` — `/api/v1/environments/{envSlug}`. GET `/executions`, POST `/executions/search`, GET `/stats`, `/stats/timeseries`, `/stats/timeseries/by-app`, `/stats/timeseries/by-route`, `/stats/punchcard`, `/attributes/keys`, `/errors/top`.
|
||||
- `SearchController` — `/api/v1/environments/{envSlug}`. GET `/executions`, POST `/executions/search`, GET `/stats`, `/stats/timeseries`, `/stats/timeseries/by-app`, `/stats/timeseries/by-route`, `/stats/punchcard`, `/attributes/keys`, `/errors/top`. GET `/executions` accepts repeat `attr` query params: `attr=order` (key-exists), `attr=order:47` (exact), `attr=order:4*` (wildcard — `*` maps to SQL LIKE `%`). First `:` splits key/value; later colons stay in the value. Invalid keys → 400. POST `/executions/search` accepts the same filters via `SearchRequest.attributeFilters` in the body.
|
||||
- `LogQueryController` — GET `/api/v1/environments/{envSlug}/logs` (filters: source (multi, comma-split, OR-joined), level (multi, comma-split, OR-joined), application, agentId, exchangeId, logger, q, time range, instanceIds (multi, comma-split, AND-joined as WHERE instance_id IN (...) — used by the Checkpoint detail drawer to scope logs to a deployment's replicas); sort asc/desc). Cursor-paginated, returns `{ data, nextCursor, hasMore, levelCounts }`; cursor is base64url of `"{timestampIso}|{insert_id_uuid}"` — same-millisecond tiebreak via the `insert_id` UUID column on `logs`.
|
||||
- `RouteCatalogController` — GET `/api/v1/environments/{envSlug}/routes` (merged route catalog from registry + ClickHouse; env filter unconditional).
|
||||
- `RouteMetricsController` — GET `/api/v1/environments/{envSlug}/routes/metrics`, GET `/api/v1/environments/{envSlug}/routes/metrics/processors`.
|
||||
@@ -109,6 +109,7 @@ Env-scoped read-path controllers (`AlertController`, `AlertRuleController`, `Ale
|
||||
- `UsageAnalyticsController` — GET `/api/v1/admin/usage` (ClickHouse `usage_events`).
|
||||
- `ClickHouseAdminController` — GET `/api/v1/admin/clickhouse/**` (conditional on `infrastructureendpoints` flag).
|
||||
- `DatabaseAdminController` — GET `/api/v1/admin/database/**` (conditional on `infrastructureendpoints` flag).
|
||||
- `ServerMetricsAdminController` — `/api/v1/admin/server-metrics/**`. GET `/catalog`, GET `/instances`, POST `/query`. Generic read API over the `server_metrics` ClickHouse table so SaaS dashboards don't need direct CH access. Delegates to `ServerMetricsQueryStore` (impl `ClickHouseServerMetricsQueryStore`). Visibility matches ClickHouse/Database admin: `@ConditionalOnProperty(infrastructureendpoints, matchIfMissing=true)` + class-level `@PreAuthorize("hasRole('ADMIN')")`. Validation: metric/tag regex `^[a-zA-Z0-9._]+$`, statistic regex `^[a-z_]+$`, `to - from ≤ 31 days`, stepSeconds ∈ [10, 3600], response capped at 500 series. `IllegalArgumentException` → 400. `/query` supports `raw` + `delta` modes (delta does per-`server_instance_id` positive-clipped differences, then aggregates across instances). Derived `statistic=mean` for timers computes `sum(total|total_time)/sum(count)` per bucket.
|
||||
|
||||
### Other (flat)
|
||||
|
||||
@@ -129,6 +130,8 @@ Env-scoped read-path controllers (`AlertController`, `AlertRuleController`, `Ale
|
||||
## metrics/ — Prometheus observability
|
||||
|
||||
- `ServerMetrics` — centralized business metrics: gauges (agents by state, SSE connections, buffer depths), counters (ingestion drops, agent transitions, deployment outcomes, auth failures), timers (flush duration, deployment duration). Exposed via `/api/v1/prometheus`.
|
||||
- `ServerInstanceIdConfig` — `@Configuration`, exposes `@Bean("serverInstanceId") String`. Resolution precedence: `cameleer.server.instance-id` property → `HOSTNAME` env → `InetAddress.getLocalHost()` → random UUID. Fixed at boot; rotates across restarts so counters restart cleanly.
|
||||
- `ServerMetricsSnapshotScheduler` — `@Scheduled(fixedDelayString = "${cameleer.server.self-metrics.interval-ms:60000}")`. Walks `MeterRegistry.getMeters()` each tick, emits one `ServerMetricSample` per `Measurement` (Timer/DistributionSummary produce multiple rows per meter — one per Micrometer `Statistic`). Skips non-finite values; logs and swallows store failures. Disabled via `cameleer.server.self-metrics.enabled=false` (`@ConditionalOnProperty`). Write-only — no query endpoint yet; inspect via `/api/v1/admin/clickhouse/query`.
|
||||
|
||||
## storage/ — PostgreSQL repositories (JdbcTemplate)
|
||||
|
||||
@@ -145,6 +148,8 @@ Env-scoped read-path controllers (`AlertController`, `AlertRuleController`, `Ale
|
||||
- `ClickHouseDiagramStore`, `ClickHouseAgentEventRepository`
|
||||
- `ClickHouseUsageTracker` — usage_events for billing
|
||||
- `ClickHouseRouteCatalogStore` — persistent route catalog with first_seen cache, warm-loaded on startup
|
||||
- `ClickHouseServerMetricsStore` — periodic dumps of the server's own Micrometer registry into the `server_metrics` table. Tenant-stamped (bound at the scheduler, not the bean); no `environment` column (server straddles envs). Batch-insert via `JdbcTemplate.batchUpdate` with `Map(String, String)` tag binding. Written by `ServerMetricsSnapshotScheduler`.
|
||||
- `ClickHouseServerMetricsQueryStore` — read side of `server_metrics` for dashboards. Implements `ServerMetricsQueryStore`. `catalog(from,to)` returns name+type+statistics+tagKeys, `listInstances(from,to)` returns server_instance_ids with first/last seen, `query(request)` builds bucketed time-series with `raw` or `delta` mode and supports a derived `mean` statistic for timers. All identifier inputs regex-validated; tenant_id always bound; max range 31 days; series count capped at 500. Exposed via `ServerMetricsAdminController`.
|
||||
|
||||
## search/ — ClickHouse search and log stores
|
||||
|
||||
|
||||
@@ -47,7 +47,8 @@ paths:
|
||||
## search/ — Execution search and stats
|
||||
|
||||
- `SearchService` — search, count, stats, statsForApp, statsForRoute, timeseries, timeseriesForApp, timeseriesForRoute, timeseriesGroupedByApp, timeseriesGroupedByRoute, slaCompliance, slaCountsByApp, slaCountsByRoute, topErrors, activeErrorTypes, punchcard, distinctAttributeKeys. `statsForRoute`/`timeseriesForRoute` take `(routeId, applicationId)` — app filter is applied to `stats_1m_route`.
|
||||
- `SearchRequest` / `SearchResult` — search DTOs
|
||||
- `SearchRequest` / `SearchResult` — search DTOs. `SearchRequest.attributeFilters: List<AttributeFilter>` carries structured facet filters for execution attributes — key-only (exists), exact (key=value), or wildcard (`*` in value). The 21-arg legacy ctor is preserved for call-site churn; the compact ctor normalises null → `List.of()`.
|
||||
- `AttributeFilter(key, value)` — record with key regex `^[a-zA-Z0-9._-]+$` (inlined into SQL, same constraint as alerting), `value == null` means key-exists, `value` containing `*` becomes a SQL LIKE pattern via `toLikePattern()`.
|
||||
- `ExecutionStats`, `ExecutionSummary` — stats aggregation records
|
||||
- `StatsTimeseries`, `TopError` — timeseries and error DTOs
|
||||
- `LogSearchRequest` / `LogSearchResponse` — log search DTOs. `LogSearchRequest.sources` / `levels` are `List<String>` (null-normalized, multi-value OR); `cursor` + `limit` + `sort` drive keyset pagination. Response carries `nextCursor` + `hasMore` + per-level `levelCounts`.
|
||||
|
||||
@@ -8,7 +8,9 @@ paths:
|
||||
|
||||
# Prometheus Metrics
|
||||
|
||||
Server exposes `/api/v1/prometheus` (unauthenticated, Prometheus text format). Spring Boot Actuator provides JVM, GC, thread pool, and `http.server.requests` metrics automatically. Business metrics via `ServerMetrics` component:
|
||||
Server exposes `/api/v1/prometheus` (unauthenticated, Prometheus text format). Spring Boot Actuator provides JVM, GC, thread pool, and `http.server.requests` metrics automatically. Business metrics via `ServerMetrics` component.
|
||||
|
||||
The same `MeterRegistry` is also snapshotted to ClickHouse every 60 s by `ServerMetricsSnapshotScheduler` (see "Server self-metrics persistence" at the bottom of this file) — so historical server-health data survives restarts without an external Prometheus.
|
||||
|
||||
## Gauges (auto-polled)
|
||||
|
||||
@@ -83,3 +85,23 @@ Mean processing time = `camel.route.policy.total_time / camel.route.policy.count
|
||||
| `cameleer.sse.reconnects.count` | counter | `instanceId` |
|
||||
| `cameleer.taps.evaluated.count` | counter | `instanceId` |
|
||||
| `cameleer.metrics.exported.count` | counter | `instanceId` |
|
||||
|
||||
## Server self-metrics persistence
|
||||
|
||||
`ServerMetricsSnapshotScheduler` walks `MeterRegistry.getMeters()` every 60 s (configurable via `cameleer.server.self-metrics.interval-ms`) and writes one row per Micrometer `Measurement` to the ClickHouse `server_metrics` table. Full registry is captured — Spring Boot Actuator series (`jvm.*`, `process.*`, `http.server.requests`, `hikaricp.*`, `jdbc.*`, `tomcat.*`, `logback.events`, `system.*`) plus `cameleer.*` and `alerting_*`.
|
||||
|
||||
**Table** (`cameleer-server-app/src/main/resources/clickhouse/init.sql`):
|
||||
|
||||
```
|
||||
server_metrics(tenant_id, collected_at, server_instance_id,
|
||||
metric_name, metric_type, statistic, metric_value,
|
||||
tags Map(String,String), server_received_at)
|
||||
```
|
||||
|
||||
- `metric_type` — lowercase Micrometer `Meter.Type` (counter, gauge, timer, distribution_summary, long_task_timer, other)
|
||||
- `statistic` — Micrometer `Statistic.getTagValueRepresentation()` (value, count, total, total_time, max, mean, active_tasks, duration). Timers emit 3 rows per tick (count + total_time + max); gauges/counters emit 1 (`statistic='value'` or `'count'`).
|
||||
- No `environment` column — the server is env-agnostic.
|
||||
- `tenant_id` threaded from `cameleer.server.tenant.id` (single-tenant per server).
|
||||
- `server_instance_id` resolved once at boot by `ServerInstanceIdConfig` (property → HOSTNAME → localhost → UUID fallback). Rotates across restarts so counter resets are unambiguous.
|
||||
- TTL: 90 days (vs 365 for `agent_metrics`). Write-only in v1 — no query endpoint or UI page. Inspect via ClickHouse admin: `/api/v1/admin/clickhouse/query` or direct SQL.
|
||||
- Toggle off entirely with `cameleer.server.self-metrics.enabled=false` (uses `@ConditionalOnProperty`).
|
||||
|
||||
@@ -21,6 +21,7 @@ The UI has 4 main tabs: **Exchanges**, **Dashboard**, **Runtime**, **Deployments
|
||||
|
||||
**Admin pages** (ADMIN-only, under `/admin/`):
|
||||
- **Sensitive Keys** (`ui/src/pages/Admin/SensitiveKeysPage.tsx`) — global sensitive key masking config. Shows agent built-in defaults as outlined Badge reference, editable Tag pills for custom keys, amber-highlighted push-to-agents toggle. Keys add to (not replace) agent defaults. Per-app sensitive key additions managed via `ApplicationConfigController` API. Note: `AppConfigDetailPage.tsx` exists but is not routed in `router.tsx`.
|
||||
- **Server Metrics** (`ui/src/pages/Admin/ServerMetricsAdminPage.tsx`) — dashboard over the `server_metrics` ClickHouse table. Visibility matches Database/ClickHouse pages: gated on `capabilities.infrastructureEndpoints` in `buildAdminTreeNodes`; backend is `@ConditionalOnProperty(infrastructureendpoints) + @PreAuthorize('hasRole(ADMIN)')`. Uses the generic `/api/v1/admin/server-metrics/{catalog,instances,query}` API via `ui/src/api/queries/admin/serverMetrics.ts` hooks (`useServerMetricsCatalog`, `useServerMetricsInstances`, `useServerMetricsSeries`), all three of which take a `ServerMetricsRange = { from: Date; to: Date }`. Time range is driven by the global TopBar picker via `useGlobalFilters()` — no page-local selector; bucket size auto-scales through `stepSecondsFor(windowSeconds)` (10 s up to 1 h buckets). Toolbar is just server-instance badges. Sections: Server health (agents/ingestion/auth), JVM (memory/CPU/GC/threads), HTTP & DB pools, Alerting (conditional on catalog), Deployments (conditional on catalog). Each panel is a `ThemedChart` with `Line`/`Area` children from the design system; multi-series responses are flattened into overlap rows by bucket timestamp. Alerting and Deployments rows are hidden when their metrics aren't in the catalog (zero-deploy / alerting-disabled installs).
|
||||
|
||||
## Key UI Files
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<!-- gitnexus:start -->
|
||||
# GitNexus — Code Intelligence
|
||||
|
||||
This project is indexed by GitNexus as **cameleer-server** (9548 symbols, 24461 relationships, 300 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
|
||||
This project is indexed by GitNexus as **cameleer-server** (9731 symbols, 24987 relationships, 300 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
|
||||
|
||||
> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ When adding, removing, or renaming classes, controllers, endpoints, UI component
|
||||
<!-- gitnexus:start -->
|
||||
# GitNexus — Code Intelligence
|
||||
|
||||
This project is indexed by GitNexus as **cameleer-server** (9548 symbols, 24461 relationships, 300 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
|
||||
This project is indexed by GitNexus as **cameleer-server** (9731 symbols, 24987 relationships, 300 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
|
||||
|
||||
> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
|
||||
|
||||
|
||||
@@ -9,6 +9,8 @@ import com.cameleer.server.app.storage.ClickHouseRouteCatalogStore;
|
||||
import com.cameleer.server.core.storage.RouteCatalogStore;
|
||||
import com.cameleer.server.app.storage.ClickHouseMetricsQueryStore;
|
||||
import com.cameleer.server.app.storage.ClickHouseMetricsStore;
|
||||
import com.cameleer.server.app.storage.ClickHouseServerMetricsQueryStore;
|
||||
import com.cameleer.server.app.storage.ClickHouseServerMetricsStore;
|
||||
import com.cameleer.server.app.storage.ClickHouseStatsStore;
|
||||
import com.cameleer.server.core.admin.AuditRepository;
|
||||
import com.cameleer.server.core.admin.AuditService;
|
||||
@@ -67,6 +69,19 @@ public class StorageBeanConfig {
|
||||
return new ClickHouseMetricsQueryStore(tenantProperties.getId(), clickHouseJdbc);
|
||||
}
|
||||
|
||||
@Bean
|
||||
public ServerMetricsStore clickHouseServerMetricsStore(
|
||||
@Qualifier("clickHouseJdbcTemplate") JdbcTemplate clickHouseJdbc) {
|
||||
return new ClickHouseServerMetricsStore(clickHouseJdbc);
|
||||
}
|
||||
|
||||
@Bean
|
||||
public ServerMetricsQueryStore clickHouseServerMetricsQueryStore(
|
||||
TenantProperties tenantProperties,
|
||||
@Qualifier("clickHouseJdbcTemplate") JdbcTemplate clickHouseJdbc) {
|
||||
return new ClickHouseServerMetricsQueryStore(tenantProperties.getId(), clickHouseJdbc);
|
||||
}
|
||||
|
||||
// ── Execution Store ──────────────────────────────────────────────────
|
||||
|
||||
@Bean
|
||||
|
||||
@@ -4,6 +4,7 @@ import com.cameleer.server.app.web.EnvPath;
|
||||
import com.cameleer.server.core.admin.AppSettings;
|
||||
import com.cameleer.server.core.admin.AppSettingsRepository;
|
||||
import com.cameleer.server.core.runtime.Environment;
|
||||
import com.cameleer.server.core.search.AttributeFilter;
|
||||
import com.cameleer.server.core.search.ExecutionStats;
|
||||
import com.cameleer.server.core.search.ExecutionSummary;
|
||||
import com.cameleer.server.core.search.SearchRequest;
|
||||
@@ -14,6 +15,7 @@ import com.cameleer.server.core.search.TopError;
|
||||
import com.cameleer.server.core.storage.StatsStore;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
@@ -21,8 +23,10 @@ import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.server.ResponseStatusException;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@@ -57,11 +61,19 @@ public class SearchController {
|
||||
@RequestParam(name = "agentId", required = false) String instanceId,
|
||||
@RequestParam(required = false) String processorType,
|
||||
@RequestParam(required = false) String application,
|
||||
@RequestParam(name = "attr", required = false) List<String> attr,
|
||||
@RequestParam(defaultValue = "0") int offset,
|
||||
@RequestParam(defaultValue = "50") int limit,
|
||||
@RequestParam(required = false) String sortField,
|
||||
@RequestParam(required = false) String sortDir) {
|
||||
|
||||
List<AttributeFilter> attributeFilters;
|
||||
try {
|
||||
attributeFilters = parseAttrParams(attr);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new ResponseStatusException(HttpStatus.BAD_REQUEST, e.getMessage(), e);
|
||||
}
|
||||
|
||||
SearchRequest request = new SearchRequest(
|
||||
status, timeFrom, timeTo,
|
||||
null, null,
|
||||
@@ -72,12 +84,36 @@ public class SearchController {
|
||||
offset, limit,
|
||||
sortField, sortDir,
|
||||
null,
|
||||
env.slug()
|
||||
env.slug(),
|
||||
attributeFilters
|
||||
);
|
||||
|
||||
return ResponseEntity.ok(searchService.search(request));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses {@code attr} query params of the form {@code key} (key-only) or {@code key:value}
|
||||
* (exact or wildcard via {@code *}). Splits on the first {@code :}; later colons are part of
|
||||
* the value. Blank / null list → empty result. Key validation is delegated to
|
||||
* {@link AttributeFilter}'s compact constructor, which throws {@link IllegalArgumentException}
|
||||
* on invalid keys (mapped to 400 by the caller).
|
||||
*/
|
||||
static List<AttributeFilter> parseAttrParams(List<String> raw) {
|
||||
if (raw == null || raw.isEmpty()) return List.of();
|
||||
List<AttributeFilter> out = new ArrayList<>(raw.size());
|
||||
for (String entry : raw) {
|
||||
if (entry == null || entry.isBlank()) continue;
|
||||
int colon = entry.indexOf(':');
|
||||
if (colon < 0) {
|
||||
out.add(new AttributeFilter(entry.trim(), null));
|
||||
} else {
|
||||
out.add(new AttributeFilter(entry.substring(0, colon).trim(),
|
||||
entry.substring(colon + 1)));
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
@PostMapping("/executions/search")
|
||||
@Operation(summary = "Advanced search with all filters",
|
||||
description = "Env from the path overrides any environment field in the body.")
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
package com.cameleer.server.app.controller;
|
||||
|
||||
import com.cameleer.server.core.storage.ServerMetricsQueryStore;
|
||||
import com.cameleer.server.core.storage.model.ServerInstanceInfo;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricCatalogEntry;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricQueryRequest;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricQueryResponse;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.security.access.prepost.PreAuthorize;
|
||||
import org.springframework.web.bind.annotation.ExceptionHandler;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Generic read API over the ClickHouse {@code server_metrics} table. Lets
|
||||
* SaaS control planes build server-health dashboards without requiring direct
|
||||
* ClickHouse access.
|
||||
*
|
||||
* <p>Three endpoints cover all 17 panels in {@code docs/server-self-metrics.md}:
|
||||
* <ul>
|
||||
* <li>{@code GET /catalog} — discover available metric names, types, statistics, and tags</li>
|
||||
* <li>{@code POST /query} — generic time-series query with aggregation, grouping, filtering, and counter-delta mode</li>
|
||||
* <li>{@code GET /instances} — list server instances (useful for partitioning counter math)</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>Visibility matches {@code ClickHouseAdminController} / {@code DatabaseAdminController}:
|
||||
* <ul>
|
||||
* <li>Conditional on {@code cameleer.server.security.infrastructureendpoints=true} (default).</li>
|
||||
* <li>Class-level {@code @PreAuthorize("hasRole('ADMIN')")} on top of the
|
||||
* {@code /api/v1/admin/**} catch-all in {@code SecurityConfig}.</li>
|
||||
* </ul>
|
||||
*/
|
||||
@ConditionalOnProperty(
|
||||
name = "cameleer.server.security.infrastructureendpoints",
|
||||
havingValue = "true",
|
||||
matchIfMissing = true
|
||||
)
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/admin/server-metrics")
|
||||
@PreAuthorize("hasRole('ADMIN')")
|
||||
@Tag(name = "Server Self-Metrics",
|
||||
description = "Read API over the server's own Micrometer registry snapshots (ADMIN only)")
|
||||
public class ServerMetricsAdminController {
|
||||
|
||||
/** Default lookback window for catalog/instances when from/to are omitted. */
|
||||
private static final long DEFAULT_LOOKBACK_SECONDS = 3_600L;
|
||||
|
||||
private final ServerMetricsQueryStore store;
|
||||
|
||||
public ServerMetricsAdminController(ServerMetricsQueryStore store) {
|
||||
this.store = store;
|
||||
}
|
||||
|
||||
@GetMapping("/catalog")
|
||||
@Operation(summary = "List metric names observed in the window",
|
||||
description = "For each metric_name, returns metric_type, the set of statistics emitted, and the union of tag keys.")
|
||||
public ResponseEntity<List<ServerMetricCatalogEntry>> catalog(
|
||||
@RequestParam(required = false) String from,
|
||||
@RequestParam(required = false) String to) {
|
||||
Instant[] window = resolveWindow(from, to);
|
||||
return ResponseEntity.ok(store.catalog(window[0], window[1]));
|
||||
}
|
||||
|
||||
@GetMapping("/instances")
|
||||
@Operation(summary = "List server_instance_id values observed in the window",
|
||||
description = "Returns first/last seen timestamps — use to partition counter-delta computations.")
|
||||
public ResponseEntity<List<ServerInstanceInfo>> instances(
|
||||
@RequestParam(required = false) String from,
|
||||
@RequestParam(required = false) String to) {
|
||||
Instant[] window = resolveWindow(from, to);
|
||||
return ResponseEntity.ok(store.listInstances(window[0], window[1]));
|
||||
}
|
||||
|
||||
@PostMapping("/query")
|
||||
@Operation(summary = "Generic time-series query",
|
||||
description = "Returns bucketed series for a single metric_name. Supports aggregation (avg/sum/max/min/latest), group-by-tag, filter-by-tag, counter delta mode, and a derived 'mean' statistic for timers.")
|
||||
public ResponseEntity<ServerMetricQueryResponse> query(@RequestBody QueryBody body) {
|
||||
ServerMetricQueryRequest request = new ServerMetricQueryRequest(
|
||||
body.metric(),
|
||||
body.statistic(),
|
||||
parseInstant(body.from(), "from"),
|
||||
parseInstant(body.to(), "to"),
|
||||
body.stepSeconds(),
|
||||
body.groupByTags(),
|
||||
body.filterTags(),
|
||||
body.aggregation(),
|
||||
body.mode(),
|
||||
body.serverInstanceIds());
|
||||
return ResponseEntity.ok(store.query(request));
|
||||
}
|
||||
|
||||
@ExceptionHandler(IllegalArgumentException.class)
|
||||
public ResponseEntity<Map<String, String>> handleBadRequest(IllegalArgumentException e) {
|
||||
return ResponseEntity.badRequest().body(Map.of("error", e.getMessage()));
|
||||
}
|
||||
|
||||
private static Instant[] resolveWindow(String from, String to) {
|
||||
Instant toI = to != null ? parseInstant(to, "to") : Instant.now();
|
||||
Instant fromI = from != null
|
||||
? parseInstant(from, "from")
|
||||
: toI.minusSeconds(DEFAULT_LOOKBACK_SECONDS);
|
||||
if (!fromI.isBefore(toI)) {
|
||||
throw new IllegalArgumentException("from must be strictly before to");
|
||||
}
|
||||
return new Instant[]{fromI, toI};
|
||||
}
|
||||
|
||||
private static Instant parseInstant(String raw, String field) {
|
||||
if (raw == null || raw.isBlank()) {
|
||||
throw new IllegalArgumentException(field + " is required");
|
||||
}
|
||||
try {
|
||||
return Instant.parse(raw);
|
||||
} catch (Exception e) {
|
||||
throw new IllegalArgumentException(
|
||||
field + " must be an ISO-8601 instant (e.g. 2026-04-23T10:00:00Z)");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Request body for {@link #query(QueryBody)}. Uses ISO-8601 strings on
|
||||
* the wire so the OpenAPI schema stays language-neutral.
|
||||
*/
|
||||
public record QueryBody(
|
||||
String metric,
|
||||
String statistic,
|
||||
String from,
|
||||
String to,
|
||||
Integer stepSeconds,
|
||||
List<String> groupByTags,
|
||||
Map<String, String> filterTags,
|
||||
String aggregation,
|
||||
String mode,
|
||||
List<String> serverInstanceIds
|
||||
) {
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
package com.cameleer.server.app.metrics;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
import java.net.InetAddress;
|
||||
import java.net.UnknownHostException;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Resolves a stable identifier for this server process, used as the
|
||||
* {@code server_instance_id} on every server_metrics sample. The value is
|
||||
* fixed at boot, so counters restart cleanly whenever the id rotates.
|
||||
*
|
||||
* <p>Precedence:
|
||||
* <ol>
|
||||
* <li>{@code cameleer.server.instance-id} property / {@code CAMELEER_SERVER_INSTANCE_ID} env
|
||||
* <li>{@code HOSTNAME} env (populated by Docker/Kubernetes)
|
||||
* <li>{@link InetAddress#getLocalHost()} hostname
|
||||
* <li>Random UUID (fallback — only hit when DNS and env are both silent)
|
||||
* </ol>
|
||||
*/
|
||||
@Configuration
|
||||
public class ServerInstanceIdConfig {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ServerInstanceIdConfig.class);
|
||||
|
||||
@Bean("serverInstanceId")
|
||||
public String serverInstanceId(
|
||||
@Value("${cameleer.server.instance-id:}") String configuredId) {
|
||||
if (!isBlank(configuredId)) {
|
||||
log.info("Server instance id resolved from configuration: {}", configuredId);
|
||||
return configuredId;
|
||||
}
|
||||
|
||||
String hostnameEnv = System.getenv("HOSTNAME");
|
||||
if (!isBlank(hostnameEnv)) {
|
||||
log.info("Server instance id resolved from HOSTNAME env: {}", hostnameEnv);
|
||||
return hostnameEnv;
|
||||
}
|
||||
|
||||
try {
|
||||
String localHost = InetAddress.getLocalHost().getHostName();
|
||||
if (!isBlank(localHost)) {
|
||||
log.info("Server instance id resolved from localhost lookup: {}", localHost);
|
||||
return localHost;
|
||||
}
|
||||
} catch (UnknownHostException e) {
|
||||
log.debug("InetAddress.getLocalHost() failed, falling back to UUID: {}", e.getMessage());
|
||||
}
|
||||
|
||||
String fallback = UUID.randomUUID().toString();
|
||||
log.warn("Server instance id could not be resolved; using random UUID {}", fallback);
|
||||
return fallback;
|
||||
}
|
||||
|
||||
private static boolean isBlank(String s) {
|
||||
return s == null || s.isBlank();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
package com.cameleer.server.app.metrics;
|
||||
|
||||
import com.cameleer.server.core.storage.ServerMetricsStore;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricSample;
|
||||
import io.micrometer.core.instrument.Measurement;
|
||||
import io.micrometer.core.instrument.Meter;
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import io.micrometer.core.instrument.Tag;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Periodically snapshots every meter in the server's {@link MeterRegistry}
|
||||
* and writes the result to ClickHouse via {@link ServerMetricsStore}. This
|
||||
* gives us historical server-health data (buffer depths, agent transitions,
|
||||
* flush latency, JVM memory, HTTP response counts, etc.) without requiring
|
||||
* an external Prometheus.
|
||||
*
|
||||
* <p>Each Micrometer {@link Meter#measure() measurement} becomes one row, so
|
||||
* a single Timer produces rows for {@code count}, {@code total_time}, and
|
||||
* {@code max} each tick. Counter values are cumulative since meter
|
||||
* registration (Prometheus convention) — callers compute rate() themselves.
|
||||
*
|
||||
* <p>Disabled via {@code cameleer.server.self-metrics.enabled=false}.
|
||||
*/
|
||||
@Component
|
||||
@ConditionalOnProperty(
|
||||
prefix = "cameleer.server.self-metrics",
|
||||
name = "enabled",
|
||||
havingValue = "true",
|
||||
matchIfMissing = true)
|
||||
public class ServerMetricsSnapshotScheduler {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ServerMetricsSnapshotScheduler.class);
|
||||
|
||||
private final MeterRegistry registry;
|
||||
private final ServerMetricsStore store;
|
||||
private final String tenantId;
|
||||
private final String serverInstanceId;
|
||||
|
||||
public ServerMetricsSnapshotScheduler(
|
||||
MeterRegistry registry,
|
||||
ServerMetricsStore store,
|
||||
@Value("${cameleer.server.tenant.id:default}") String tenantId,
|
||||
@Qualifier("serverInstanceId") String serverInstanceId) {
|
||||
this.registry = registry;
|
||||
this.store = store;
|
||||
this.tenantId = tenantId;
|
||||
this.serverInstanceId = serverInstanceId;
|
||||
}
|
||||
|
||||
@Scheduled(fixedDelayString = "${cameleer.server.self-metrics.interval-ms:60000}",
|
||||
initialDelayString = "${cameleer.server.self-metrics.interval-ms:60000}")
|
||||
public void snapshot() {
|
||||
try {
|
||||
Instant now = Instant.now();
|
||||
List<ServerMetricSample> batch = new ArrayList<>();
|
||||
|
||||
for (Meter meter : registry.getMeters()) {
|
||||
Meter.Id id = meter.getId();
|
||||
Map<String, String> tags = flattenTags(id.getTagsAsIterable());
|
||||
String type = id.getType().name().toLowerCase();
|
||||
|
||||
for (Measurement m : meter.measure()) {
|
||||
double v = m.getValue();
|
||||
if (!Double.isFinite(v)) continue;
|
||||
batch.add(new ServerMetricSample(
|
||||
tenantId,
|
||||
now,
|
||||
serverInstanceId,
|
||||
id.getName(),
|
||||
type,
|
||||
m.getStatistic().getTagValueRepresentation(),
|
||||
v,
|
||||
tags));
|
||||
}
|
||||
}
|
||||
|
||||
if (!batch.isEmpty()) {
|
||||
store.insertBatch(batch);
|
||||
log.debug("Persisted {} server self-metric samples", batch.size());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Server self-metrics snapshot failed: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<String, String> flattenTags(Iterable<Tag> tags) {
|
||||
Map<String, String> out = new LinkedHashMap<>();
|
||||
for (Tag t : tags) {
|
||||
out.put(t.getKey(), t.getValue());
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.cameleer.server.app.search;
|
||||
|
||||
import com.cameleer.server.core.alerting.AlertMatchSpec;
|
||||
import com.cameleer.server.core.search.AttributeFilter;
|
||||
import com.cameleer.server.core.search.ExecutionSummary;
|
||||
import com.cameleer.server.core.search.SearchRequest;
|
||||
import com.cameleer.server.core.search.SearchResult;
|
||||
@@ -256,6 +257,23 @@ public class ClickHouseSearchIndex implements SearchIndex {
|
||||
params.add(likeTerm);
|
||||
}
|
||||
|
||||
// Structured attribute filters. Keys were validated at AttributeFilter construction
|
||||
// time against ^[a-zA-Z0-9._-]+$ so they are safe to single-quote-inline; the JSON path
|
||||
// argument of JSONExtractString does not accept a ? placeholder in ClickHouse JDBC
|
||||
// (same constraint as countExecutionsForAlerting below). Values are parameter-bound.
|
||||
for (AttributeFilter filter : request.attributeFilters()) {
|
||||
String escapedKey = filter.key().replace("'", "\\'");
|
||||
if (filter.isKeyOnly()) {
|
||||
conditions.add("JSONHas(attributes, '" + escapedKey + "')");
|
||||
} else if (filter.isWildcard()) {
|
||||
conditions.add("JSONExtractString(attributes, '" + escapedKey + "') LIKE ?");
|
||||
params.add(filter.toLikePattern());
|
||||
} else {
|
||||
conditions.add("JSONExtractString(attributes, '" + escapedKey + "') = ?");
|
||||
params.add(filter.value());
|
||||
}
|
||||
}
|
||||
|
||||
return String.join(" AND ", conditions);
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,408 @@
|
||||
package com.cameleer.server.app.storage;
|
||||
|
||||
import com.cameleer.server.core.storage.ServerMetricsQueryStore;
|
||||
import com.cameleer.server.core.storage.model.ServerInstanceInfo;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricCatalogEntry;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricPoint;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricQueryRequest;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricQueryResponse;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricSeries;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.sql.Array;
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* ClickHouse-backed {@link ServerMetricsQueryStore}.
|
||||
*
|
||||
* <p>Safety rules for every query:
|
||||
* <ul>
|
||||
* <li>tenant_id always bound as a parameter — no cross-tenant reads.</li>
|
||||
* <li>Identifier-like inputs (metric name, statistic, tag keys,
|
||||
* aggregation, mode) are regex-validated. Tag keys flow through the
|
||||
* query as JDBC parameter-bound values of {@code tags[?]} map lookups,
|
||||
* so even with a "safe" regex they cannot inject SQL.</li>
|
||||
* <li>Literal values ({@code from}, {@code to}, tag filter values,
|
||||
* server_instance_id allow-list) always go through {@code ?}.</li>
|
||||
* <li>The time range is capped at {@link #MAX_RANGE}.</li>
|
||||
* <li>Result cardinality is capped at {@link #MAX_SERIES} series.</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class ClickHouseServerMetricsQueryStore implements ServerMetricsQueryStore {
|
||||
|
||||
private static final Pattern SAFE_IDENTIFIER = Pattern.compile("^[a-zA-Z0-9._]+$");
|
||||
private static final Pattern SAFE_STATISTIC = Pattern.compile("^[a-z_]+$");
|
||||
|
||||
private static final Set<String> AGGREGATIONS = Set.of("avg", "sum", "max", "min", "latest");
|
||||
private static final Set<String> MODES = Set.of("raw", "delta");
|
||||
|
||||
/** Maximum {@code to - from} window accepted by the API. */
|
||||
static final Duration MAX_RANGE = Duration.ofDays(31);
|
||||
|
||||
/** Clamp bounds and default for {@code stepSeconds}. */
|
||||
static final int MIN_STEP = 10;
|
||||
static final int MAX_STEP = 3600;
|
||||
static final int DEFAULT_STEP = 60;
|
||||
|
||||
/** Defence against group-by explosion — limit the series count per response. */
|
||||
static final int MAX_SERIES = 500;
|
||||
|
||||
private final String tenantId;
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
public ClickHouseServerMetricsQueryStore(String tenantId, JdbcTemplate jdbc) {
|
||||
this.tenantId = tenantId;
|
||||
this.jdbc = jdbc;
|
||||
}
|
||||
|
||||
// ── catalog ─────────────────────────────────────────────────────────
|
||||
|
||||
@Override
|
||||
public List<ServerMetricCatalogEntry> catalog(Instant from, Instant to) {
|
||||
requireRange(from, to);
|
||||
String sql = """
|
||||
SELECT
|
||||
metric_name,
|
||||
any(metric_type) AS metric_type,
|
||||
arraySort(groupUniqArray(statistic)) AS statistics,
|
||||
arraySort(arrayDistinct(arrayFlatten(groupArray(mapKeys(tags))))) AS tag_keys
|
||||
FROM server_metrics
|
||||
WHERE tenant_id = ?
|
||||
AND collected_at >= ?
|
||||
AND collected_at < ?
|
||||
GROUP BY metric_name
|
||||
ORDER BY metric_name
|
||||
""";
|
||||
return jdbc.query(sql, (rs, n) -> new ServerMetricCatalogEntry(
|
||||
rs.getString("metric_name"),
|
||||
rs.getString("metric_type"),
|
||||
arrayToStringList(rs.getArray("statistics")),
|
||||
arrayToStringList(rs.getArray("tag_keys"))
|
||||
), tenantId, Timestamp.from(from), Timestamp.from(to));
|
||||
}
|
||||
|
||||
// ── instances ───────────────────────────────────────────────────────
|
||||
|
||||
@Override
|
||||
public List<ServerInstanceInfo> listInstances(Instant from, Instant to) {
|
||||
requireRange(from, to);
|
||||
String sql = """
|
||||
SELECT
|
||||
server_instance_id,
|
||||
min(collected_at) AS first_seen,
|
||||
max(collected_at) AS last_seen
|
||||
FROM server_metrics
|
||||
WHERE tenant_id = ?
|
||||
AND collected_at >= ?
|
||||
AND collected_at < ?
|
||||
GROUP BY server_instance_id
|
||||
ORDER BY last_seen DESC
|
||||
""";
|
||||
return jdbc.query(sql, (rs, n) -> new ServerInstanceInfo(
|
||||
rs.getString("server_instance_id"),
|
||||
rs.getTimestamp("first_seen").toInstant(),
|
||||
rs.getTimestamp("last_seen").toInstant()
|
||||
), tenantId, Timestamp.from(from), Timestamp.from(to));
|
||||
}
|
||||
|
||||
// ── query ───────────────────────────────────────────────────────────
|
||||
|
||||
@Override
|
||||
public ServerMetricQueryResponse query(ServerMetricQueryRequest request) {
|
||||
if (request == null) throw new IllegalArgumentException("request is required");
|
||||
String metric = requireSafeIdentifier(request.metric(), "metric");
|
||||
requireRange(request.from(), request.to());
|
||||
|
||||
String aggregation = request.aggregation() != null ? request.aggregation().toLowerCase() : "avg";
|
||||
if (!AGGREGATIONS.contains(aggregation)) {
|
||||
throw new IllegalArgumentException("aggregation must be one of " + AGGREGATIONS);
|
||||
}
|
||||
|
||||
String mode = request.mode() != null ? request.mode().toLowerCase() : "raw";
|
||||
if (!MODES.contains(mode)) {
|
||||
throw new IllegalArgumentException("mode must be one of " + MODES);
|
||||
}
|
||||
|
||||
int step = request.stepSeconds() != null ? request.stepSeconds() : DEFAULT_STEP;
|
||||
if (step < MIN_STEP || step > MAX_STEP) {
|
||||
throw new IllegalArgumentException(
|
||||
"stepSeconds must be in [" + MIN_STEP + "," + MAX_STEP + "]");
|
||||
}
|
||||
|
||||
String statistic = request.statistic();
|
||||
if (statistic != null && !SAFE_STATISTIC.matcher(statistic).matches()) {
|
||||
throw new IllegalArgumentException("statistic contains unsafe characters");
|
||||
}
|
||||
|
||||
List<String> groupByTags = request.groupByTags() != null
|
||||
? request.groupByTags() : List.of();
|
||||
for (String t : groupByTags) requireSafeIdentifier(t, "groupByTag");
|
||||
|
||||
Map<String, String> filterTags = request.filterTags() != null
|
||||
? request.filterTags() : Map.of();
|
||||
for (String t : filterTags.keySet()) requireSafeIdentifier(t, "filterTag key");
|
||||
|
||||
List<String> instanceAllowList = request.serverInstanceIds() != null
|
||||
? request.serverInstanceIds() : List.of();
|
||||
|
||||
boolean isDelta = "delta".equals(mode);
|
||||
boolean isMean = "mean".equals(statistic);
|
||||
|
||||
String sql = isDelta
|
||||
? buildDeltaSql(step, groupByTags, filterTags, instanceAllowList, statistic, isMean)
|
||||
: buildRawSql(step, groupByTags, filterTags, instanceAllowList,
|
||||
statistic, aggregation, isMean);
|
||||
|
||||
List<Object> params = buildParams(groupByTags, metric, statistic, isMean,
|
||||
request.from(), request.to(),
|
||||
filterTags, instanceAllowList);
|
||||
|
||||
List<Row> rows = jdbc.query(sql, (rs, n) -> {
|
||||
int idx = 1;
|
||||
Instant bucket = rs.getTimestamp(idx++).toInstant();
|
||||
List<String> tagValues = new ArrayList<>(groupByTags.size());
|
||||
for (int g = 0; g < groupByTags.size(); g++) {
|
||||
tagValues.add(rs.getString(idx++));
|
||||
}
|
||||
double value = rs.getDouble(idx);
|
||||
return new Row(bucket, tagValues, value);
|
||||
}, params.toArray());
|
||||
|
||||
return assembleSeries(rows, metric, statistic, aggregation, mode, step, groupByTags);
|
||||
}
|
||||
|
||||
// ── SQL builders ────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Builds a single-pass SQL for raw mode:
|
||||
* <pre>{@code
|
||||
* SELECT bucket, tag0, ..., <agg>(metric_value) AS value
|
||||
* FROM server_metrics WHERE ...
|
||||
* GROUP BY bucket, tag0, ...
|
||||
* ORDER BY bucket, tag0, ...
|
||||
* }</pre>
|
||||
* For {@code statistic=mean}, replaces the aggregate with
|
||||
* {@code sumIf(value, statistic IN ('total','total_time')) / nullIf(sumIf(value, statistic='count'), 0)}.
|
||||
*/
|
||||
private String buildRawSql(int step, List<String> groupByTags,
|
||||
Map<String, String> filterTags,
|
||||
List<String> instanceAllowList,
|
||||
String statistic, String aggregation, boolean isMean) {
|
||||
StringBuilder s = new StringBuilder(512);
|
||||
s.append("SELECT\n toDateTime64(toStartOfInterval(collected_at, INTERVAL ")
|
||||
.append(step).append(" SECOND), 3) AS bucket");
|
||||
for (int i = 0; i < groupByTags.size(); i++) {
|
||||
s.append(",\n tags[?] AS tag").append(i);
|
||||
}
|
||||
s.append(",\n ").append(isMean ? meanExpr() : scalarAggExpr(aggregation))
|
||||
.append(" AS value\nFROM server_metrics\n");
|
||||
appendWhereClause(s, filterTags, instanceAllowList, statistic, isMean);
|
||||
s.append("GROUP BY bucket");
|
||||
for (int i = 0; i < groupByTags.size(); i++) s.append(", tag").append(i);
|
||||
s.append("\nORDER BY bucket");
|
||||
for (int i = 0; i < groupByTags.size(); i++) s.append(", tag").append(i);
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a three-level SQL for delta mode. Inner fills one
|
||||
* (bucket, instance, tag-group) row via {@code max(metric_value)};
|
||||
* middle computes positive-clipped per-instance differences via a
|
||||
* window function; outer sums across instances.
|
||||
*/
|
||||
private String buildDeltaSql(int step, List<String> groupByTags,
|
||||
Map<String, String> filterTags,
|
||||
List<String> instanceAllowList,
|
||||
String statistic, boolean isMean) {
|
||||
StringBuilder s = new StringBuilder(1024);
|
||||
s.append("SELECT bucket");
|
||||
for (int i = 0; i < groupByTags.size(); i++) s.append(", tag").append(i);
|
||||
s.append(", sum(delta) AS value FROM (\n");
|
||||
|
||||
// Middle: per-instance positive-clipped delta using window.
|
||||
s.append(" SELECT bucket");
|
||||
for (int i = 0; i < groupByTags.size(); i++) s.append(", tag").append(i);
|
||||
s.append(", server_instance_id, greatest(0, value - coalesce(any(value) OVER (")
|
||||
.append("PARTITION BY server_instance_id");
|
||||
for (int i = 0; i < groupByTags.size(); i++) s.append(", tag").append(i);
|
||||
s.append(" ORDER BY bucket ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING), value)) AS delta FROM (\n");
|
||||
|
||||
// Inner: one representative value per (bucket, instance, tag-group).
|
||||
s.append(" SELECT\n toDateTime64(toStartOfInterval(collected_at, INTERVAL ")
|
||||
.append(step).append(" SECOND), 3) AS bucket,\n server_instance_id");
|
||||
for (int i = 0; i < groupByTags.size(); i++) {
|
||||
s.append(",\n tags[?] AS tag").append(i);
|
||||
}
|
||||
s.append(",\n ").append(isMean ? meanExpr() : "max(metric_value)")
|
||||
.append(" AS value\n FROM server_metrics\n");
|
||||
appendWhereClause(s, filterTags, instanceAllowList, statistic, isMean);
|
||||
s.append(" GROUP BY bucket, server_instance_id");
|
||||
for (int i = 0; i < groupByTags.size(); i++) s.append(", tag").append(i);
|
||||
s.append("\n ) AS bucketed\n) AS deltas\n");
|
||||
|
||||
s.append("GROUP BY bucket");
|
||||
for (int i = 0; i < groupByTags.size(); i++) s.append(", tag").append(i);
|
||||
s.append("\nORDER BY bucket");
|
||||
for (int i = 0; i < groupByTags.size(); i++) s.append(", tag").append(i);
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* WHERE clause shared by both raw and delta SQL shapes. Appended at the
|
||||
* correct indent under either the single {@code FROM server_metrics}
|
||||
* (raw) or the innermost one (delta).
|
||||
*/
|
||||
private void appendWhereClause(StringBuilder s, Map<String, String> filterTags,
|
||||
List<String> instanceAllowList,
|
||||
String statistic, boolean isMean) {
|
||||
s.append(" WHERE tenant_id = ?\n")
|
||||
.append(" AND metric_name = ?\n");
|
||||
if (isMean) {
|
||||
s.append(" AND statistic IN ('count', 'total', 'total_time')\n");
|
||||
} else if (statistic != null) {
|
||||
s.append(" AND statistic = ?\n");
|
||||
}
|
||||
s.append(" AND collected_at >= ?\n")
|
||||
.append(" AND collected_at < ?\n");
|
||||
for (int i = 0; i < filterTags.size(); i++) {
|
||||
s.append(" AND tags[?] = ?\n");
|
||||
}
|
||||
if (!instanceAllowList.isEmpty()) {
|
||||
s.append(" AND server_instance_id IN (")
|
||||
.append("?,".repeat(instanceAllowList.size() - 1)).append("?)\n");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* SQL-positional params for both raw and delta queries (same relative
|
||||
* order because the WHERE clause is emitted by {@link #appendWhereClause}
|
||||
* only once, with the {@code tags[?]} select-list placeholders appearing
|
||||
* earlier in the SQL text).
|
||||
*/
|
||||
private List<Object> buildParams(List<String> groupByTags, String metric,
|
||||
String statistic, boolean isMean,
|
||||
Instant from, Instant to,
|
||||
Map<String, String> filterTags,
|
||||
List<String> instanceAllowList) {
|
||||
List<Object> params = new ArrayList<>();
|
||||
// SELECT-list tags[?] placeholders
|
||||
params.addAll(groupByTags);
|
||||
// WHERE
|
||||
params.add(tenantId);
|
||||
params.add(metric);
|
||||
if (!isMean && statistic != null) params.add(statistic);
|
||||
params.add(Timestamp.from(from));
|
||||
params.add(Timestamp.from(to));
|
||||
for (Map.Entry<String, String> e : filterTags.entrySet()) {
|
||||
params.add(e.getKey());
|
||||
params.add(e.getValue());
|
||||
}
|
||||
params.addAll(instanceAllowList);
|
||||
return params;
|
||||
}
|
||||
|
||||
private static String scalarAggExpr(String aggregation) {
|
||||
return switch (aggregation) {
|
||||
case "avg" -> "avg(metric_value)";
|
||||
case "sum" -> "sum(metric_value)";
|
||||
case "max" -> "max(metric_value)";
|
||||
case "min" -> "min(metric_value)";
|
||||
case "latest" -> "argMax(metric_value, collected_at)";
|
||||
default -> throw new IllegalStateException("unreachable: " + aggregation);
|
||||
};
|
||||
}
|
||||
|
||||
private static String meanExpr() {
|
||||
return "sumIf(metric_value, statistic IN ('total', 'total_time'))"
|
||||
+ " / nullIf(sumIf(metric_value, statistic = 'count'), 0)";
|
||||
}
|
||||
|
||||
// ── response assembly ───────────────────────────────────────────────
|
||||
|
||||
private ServerMetricQueryResponse assembleSeries(
|
||||
List<Row> rows, String metric, String statistic,
|
||||
String aggregation, String mode, int step, List<String> groupByTags) {
|
||||
|
||||
Map<List<String>, List<ServerMetricPoint>> bySignature = new LinkedHashMap<>();
|
||||
for (Row r : rows) {
|
||||
if (Double.isNaN(r.value) || Double.isInfinite(r.value)) continue;
|
||||
bySignature.computeIfAbsent(r.tagValues, k -> new ArrayList<>())
|
||||
.add(new ServerMetricPoint(r.bucket, r.value));
|
||||
}
|
||||
|
||||
if (bySignature.size() > MAX_SERIES) {
|
||||
throw new IllegalArgumentException(
|
||||
"query produced " + bySignature.size()
|
||||
+ " series; reduce groupByTags or tighten filterTags (max "
|
||||
+ MAX_SERIES + ")");
|
||||
}
|
||||
|
||||
List<ServerMetricSeries> series = new ArrayList<>(bySignature.size());
|
||||
for (Map.Entry<List<String>, List<ServerMetricPoint>> e : bySignature.entrySet()) {
|
||||
Map<String, String> tags = new LinkedHashMap<>();
|
||||
for (int i = 0; i < groupByTags.size(); i++) {
|
||||
tags.put(groupByTags.get(i), e.getKey().get(i));
|
||||
}
|
||||
series.add(new ServerMetricSeries(Collections.unmodifiableMap(tags), e.getValue()));
|
||||
}
|
||||
|
||||
return new ServerMetricQueryResponse(metric,
|
||||
statistic != null ? statistic : "value",
|
||||
aggregation, mode, step, series);
|
||||
}
|
||||
|
||||
// ── helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
private static void requireRange(Instant from, Instant to) {
|
||||
if (from == null || to == null) {
|
||||
throw new IllegalArgumentException("from and to are required");
|
||||
}
|
||||
if (!from.isBefore(to)) {
|
||||
throw new IllegalArgumentException("from must be strictly before to");
|
||||
}
|
||||
if (Duration.between(from, to).compareTo(MAX_RANGE) > 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"time range exceeds maximum of " + MAX_RANGE.toDays() + " days");
|
||||
}
|
||||
}
|
||||
|
||||
private static String requireSafeIdentifier(String value, String field) {
|
||||
if (value == null || value.isBlank()) {
|
||||
throw new IllegalArgumentException(field + " is required");
|
||||
}
|
||||
if (!SAFE_IDENTIFIER.matcher(value).matches()) {
|
||||
throw new IllegalArgumentException(
|
||||
field + " contains unsafe characters (allowed: [a-zA-Z0-9._])");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
private static List<String> arrayToStringList(Array array) {
|
||||
if (array == null) return List.of();
|
||||
try {
|
||||
Object[] values = (Object[]) array.getArray();
|
||||
Set<String> sorted = new TreeSet<>();
|
||||
for (Object v : values) {
|
||||
if (v != null) sorted.add(v.toString());
|
||||
}
|
||||
return List.copyOf(sorted);
|
||||
} catch (Exception e) {
|
||||
return List.of();
|
||||
} finally {
|
||||
try { array.free(); } catch (Exception ignore) { }
|
||||
}
|
||||
}
|
||||
|
||||
private record Row(Instant bucket, List<String> tagValues, double value) {
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
package com.cameleer.server.app.storage;
|
||||
|
||||
import com.cameleer.server.core.storage.ServerMetricsStore;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricSample;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.sql.Timestamp;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class ClickHouseServerMetricsStore implements ServerMetricsStore {
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
public ClickHouseServerMetricsStore(JdbcTemplate jdbc) {
|
||||
this.jdbc = jdbc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void insertBatch(List<ServerMetricSample> samples) {
|
||||
if (samples.isEmpty()) return;
|
||||
|
||||
jdbc.batchUpdate("""
|
||||
INSERT INTO server_metrics
|
||||
(tenant_id, collected_at, server_instance_id, metric_name,
|
||||
metric_type, statistic, metric_value, tags)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
samples.stream().map(s -> new Object[]{
|
||||
s.tenantId(),
|
||||
Timestamp.from(s.collectedAt()),
|
||||
s.serverInstanceId(),
|
||||
s.metricName(),
|
||||
s.metricType(),
|
||||
s.statistic(),
|
||||
s.value(),
|
||||
tagsToClickHouseMap(s.tags())
|
||||
}).toList());
|
||||
}
|
||||
|
||||
private Map<String, String> tagsToClickHouseMap(Map<String, String> tags) {
|
||||
if (tags == null || tags.isEmpty()) return new HashMap<>();
|
||||
return new HashMap<>(tags);
|
||||
}
|
||||
}
|
||||
@@ -112,6 +112,10 @@ cameleer:
|
||||
url: ${CAMELEER_SERVER_CLICKHOUSE_URL:jdbc:clickhouse://localhost:8123/cameleer}
|
||||
username: ${CAMELEER_SERVER_CLICKHOUSE_USERNAME:default}
|
||||
password: ${CAMELEER_SERVER_CLICKHOUSE_PASSWORD:}
|
||||
self-metrics:
|
||||
enabled: ${CAMELEER_SERVER_SELFMETRICS_ENABLED:true}
|
||||
interval-ms: ${CAMELEER_SERVER_SELFMETRICS_INTERVALMS:60000}
|
||||
instance-id: ${CAMELEER_SERVER_INSTANCE_ID:}
|
||||
|
||||
springdoc:
|
||||
api-docs:
|
||||
|
||||
@@ -401,6 +401,29 @@ CREATE TABLE IF NOT EXISTS route_catalog (
|
||||
ENGINE = ReplacingMergeTree(last_seen)
|
||||
ORDER BY (tenant_id, environment, application_id, route_id);
|
||||
|
||||
-- ── Server Self-Metrics ────────────────────────────────────────────────
|
||||
-- Periodic snapshot of the server's own Micrometer registry (written by
|
||||
-- ServerMetricsSnapshotScheduler). No `environment` column — the server
|
||||
-- straddles environments. `statistic` distinguishes Timer/DistributionSummary
|
||||
-- sub-measurements (count, total_time, max, mean) from plain counter/gauge values.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS server_metrics (
|
||||
tenant_id LowCardinality(String) DEFAULT 'default',
|
||||
collected_at DateTime64(3),
|
||||
server_instance_id LowCardinality(String),
|
||||
metric_name LowCardinality(String),
|
||||
metric_type LowCardinality(String),
|
||||
statistic LowCardinality(String) DEFAULT 'value',
|
||||
metric_value Float64,
|
||||
tags Map(String, String) DEFAULT map(),
|
||||
server_received_at DateTime64(3) DEFAULT now64(3)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(collected_at))
|
||||
ORDER BY (tenant_id, collected_at, server_instance_id, metric_name, statistic)
|
||||
TTL toDateTime(collected_at) + INTERVAL 90 DAY DELETE
|
||||
SETTINGS index_granularity = 8192;
|
||||
|
||||
-- insert_id tiebreak for keyset pagination (fixes same-millisecond cursor collision).
|
||||
-- IF NOT EXISTS on ADD COLUMN is idempotent. MATERIALIZE COLUMN is a background mutation,
|
||||
-- effectively a no-op once all parts are already materialized.
|
||||
|
||||
@@ -166,6 +166,42 @@ class SearchControllerIT extends AbstractPostgresIT {
|
||||
""", i, i, i, i, i));
|
||||
}
|
||||
|
||||
// Executions 11-12: carry structured attributes used by the attribute-filter tests.
|
||||
ingest("""
|
||||
{
|
||||
"exchangeId": "ex-search-attr-1",
|
||||
"applicationId": "test-group",
|
||||
"instanceId": "test-agent-search-it",
|
||||
"routeId": "search-route-attr-1",
|
||||
"correlationId": "corr-attr-alpha",
|
||||
"status": "COMPLETED",
|
||||
"startTime": "2026-03-12T10:00:00Z",
|
||||
"endTime": "2026-03-12T10:00:00.050Z",
|
||||
"durationMs": 50,
|
||||
"attributes": {"order": "12345", "tenant": "acme"},
|
||||
"chunkSeq": 0,
|
||||
"final": true,
|
||||
"processors": []
|
||||
}
|
||||
""");
|
||||
ingest("""
|
||||
{
|
||||
"exchangeId": "ex-search-attr-2",
|
||||
"applicationId": "test-group",
|
||||
"instanceId": "test-agent-search-it",
|
||||
"routeId": "search-route-attr-2",
|
||||
"correlationId": "corr-attr-beta",
|
||||
"status": "COMPLETED",
|
||||
"startTime": "2026-03-12T10:01:00Z",
|
||||
"endTime": "2026-03-12T10:01:00.050Z",
|
||||
"durationMs": 50,
|
||||
"attributes": {"order": "99999"},
|
||||
"chunkSeq": 0,
|
||||
"final": true,
|
||||
"processors": []
|
||||
}
|
||||
""");
|
||||
|
||||
// Wait for async ingestion + search indexing via REST (no raw SQL).
|
||||
// Probe the last seeded execution to avoid false positives from
|
||||
// other test classes that may have written into the shared CH tables.
|
||||
@@ -174,6 +210,11 @@ class SearchControllerIT extends AbstractPostgresIT {
|
||||
JsonNode body = objectMapper.readTree(r.getBody());
|
||||
assertThat(body.get("total").asLong()).isGreaterThanOrEqualTo(1);
|
||||
});
|
||||
await().atMost(30, SECONDS).untilAsserted(() -> {
|
||||
ResponseEntity<String> r = searchGet("?correlationId=corr-attr-beta");
|
||||
JsonNode body = objectMapper.readTree(r.getBody());
|
||||
assertThat(body.get("total").asLong()).isGreaterThanOrEqualTo(1);
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -371,6 +412,69 @@ class SearchControllerIT extends AbstractPostgresIT {
|
||||
assertThat(body.get("limit").asInt()).isEqualTo(50);
|
||||
}
|
||||
|
||||
@Test
|
||||
void attrParam_exactMatch_filtersToMatchingExecution() throws Exception {
|
||||
ResponseEntity<String> response = searchGet("?attr=order:12345&correlationId=corr-attr-alpha");
|
||||
assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);
|
||||
|
||||
JsonNode body = objectMapper.readTree(response.getBody());
|
||||
assertThat(body.get("total").asLong()).isEqualTo(1);
|
||||
assertThat(body.get("data").get(0).get("correlationId").asText()).isEqualTo("corr-attr-alpha");
|
||||
}
|
||||
|
||||
@Test
|
||||
void attrParam_keyOnly_matchesAnyExecutionCarryingTheKey() throws Exception {
|
||||
ResponseEntity<String> response = searchGet("?attr=tenant&correlationId=corr-attr-alpha");
|
||||
assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);
|
||||
|
||||
JsonNode body = objectMapper.readTree(response.getBody());
|
||||
assertThat(body.get("total").asLong()).isEqualTo(1);
|
||||
assertThat(body.get("data").get(0).get("correlationId").asText()).isEqualTo("corr-attr-alpha");
|
||||
}
|
||||
|
||||
@Test
|
||||
void attrParam_multipleValues_produceIntersection() throws Exception {
|
||||
// order:99999 AND tenant=* should yield zero — exec-attr-2 has order=99999 but no tenant.
|
||||
ResponseEntity<String> response = searchGet("?attr=order:99999&attr=tenant");
|
||||
assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);
|
||||
|
||||
JsonNode body = objectMapper.readTree(response.getBody());
|
||||
assertThat(body.get("total").asLong()).isZero();
|
||||
}
|
||||
|
||||
@Test
|
||||
void attrParam_invalidKey_returns400() throws Exception {
|
||||
ResponseEntity<String> response = searchGet("?attr=bad%20key:x");
|
||||
assertThat(response.getStatusCode()).isEqualTo(HttpStatus.BAD_REQUEST);
|
||||
}
|
||||
|
||||
@Test
|
||||
void attributeFilters_inPostBody_filtersCorrectly() throws Exception {
|
||||
ResponseEntity<String> response = searchPost("""
|
||||
{
|
||||
"attributeFilters": [
|
||||
{"key": "order", "value": "12345"}
|
||||
],
|
||||
"correlationId": "corr-attr-alpha"
|
||||
}
|
||||
""");
|
||||
assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);
|
||||
|
||||
JsonNode body = objectMapper.readTree(response.getBody());
|
||||
assertThat(body.get("total").asLong()).isEqualTo(1);
|
||||
assertThat(body.get("data").get(0).get("correlationId").asText()).isEqualTo("corr-attr-alpha");
|
||||
}
|
||||
|
||||
@Test
|
||||
void attrParam_wildcardValue_matchesOnPrefix() throws Exception {
|
||||
ResponseEntity<String> response = searchGet("?attr=order:1*&correlationId=corr-attr-alpha");
|
||||
assertThat(response.getStatusCode()).isEqualTo(HttpStatus.OK);
|
||||
|
||||
JsonNode body = objectMapper.readTree(response.getBody());
|
||||
assertThat(body.get("total").asLong()).isEqualTo(1);
|
||||
assertThat(body.get("data").get(0).get("correlationId").asText()).isEqualTo("corr-attr-alpha");
|
||||
}
|
||||
|
||||
// --- Helper methods ---
|
||||
|
||||
private void ingest(String json) {
|
||||
|
||||
@@ -0,0 +1,314 @@
|
||||
package com.cameleer.server.app.controller;
|
||||
|
||||
import com.cameleer.server.app.AbstractPostgresIT;
|
||||
import com.cameleer.server.app.TestSecurityHelper;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.web.client.TestRestTemplate;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
class ServerMetricsAdminControllerIT extends AbstractPostgresIT {
|
||||
|
||||
@Autowired
|
||||
private TestRestTemplate restTemplate;
|
||||
|
||||
@Autowired
|
||||
private TestSecurityHelper securityHelper;
|
||||
|
||||
private final ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
private HttpHeaders adminJson;
|
||||
private HttpHeaders adminGet;
|
||||
private HttpHeaders viewerGet;
|
||||
|
||||
@BeforeEach
|
||||
void seedAndAuth() {
|
||||
adminJson = securityHelper.adminHeaders();
|
||||
adminGet = securityHelper.authHeadersNoBody(securityHelper.adminToken());
|
||||
viewerGet = securityHelper.authHeadersNoBody(securityHelper.viewerToken());
|
||||
|
||||
// Fresh rows for each test. The Spring-context ClickHouse JdbcTemplate
|
||||
// lives in a different bean; reach for it here by executing through
|
||||
// the same JdbcTemplate used by the store via the ClickHouseConfig bean.
|
||||
org.springframework.jdbc.core.JdbcTemplate ch = clickhouseJdbc();
|
||||
ch.execute("TRUNCATE TABLE server_metrics");
|
||||
|
||||
Instant t0 = Instant.parse("2026-04-23T10:00:00Z");
|
||||
// Gauge: cameleer.agents.connected, two states, two buckets.
|
||||
insert(ch, "default", t0, "srv-A", "cameleer.agents.connected", "gauge", "value", 3.0,
|
||||
Map.of("state", "live"));
|
||||
insert(ch, "default", t0.plusSeconds(60), "srv-A", "cameleer.agents.connected", "gauge", "value", 4.0,
|
||||
Map.of("state", "live"));
|
||||
insert(ch, "default", t0, "srv-A", "cameleer.agents.connected", "gauge", "value", 1.0,
|
||||
Map.of("state", "stale"));
|
||||
insert(ch, "default", t0.plusSeconds(60), "srv-A", "cameleer.agents.connected", "gauge", "value", 0.0,
|
||||
Map.of("state", "stale"));
|
||||
|
||||
// Counter: cumulative drops, +5 per minute on srv-A.
|
||||
insert(ch, "default", t0, "srv-A", "cameleer.ingestion.drops", "counter", "count", 0.0, Map.of("reason", "buffer_full"));
|
||||
insert(ch, "default", t0.plusSeconds(60), "srv-A", "cameleer.ingestion.drops", "counter", "count", 5.0, Map.of("reason", "buffer_full"));
|
||||
insert(ch, "default", t0.plusSeconds(120), "srv-A", "cameleer.ingestion.drops", "counter", "count", 10.0, Map.of("reason", "buffer_full"));
|
||||
// Simulated restart to srv-B: counter resets to 0, then climbs to 2.
|
||||
insert(ch, "default", t0.plusSeconds(180), "srv-B", "cameleer.ingestion.drops", "counter", "count", 0.0, Map.of("reason", "buffer_full"));
|
||||
insert(ch, "default", t0.plusSeconds(240), "srv-B", "cameleer.ingestion.drops", "counter", "count", 2.0, Map.of("reason", "buffer_full"));
|
||||
|
||||
// Timer mean inputs: two buckets, 2 samples each (count=2, total_time=30).
|
||||
insert(ch, "default", t0, "srv-A", "cameleer.ingestion.flush.duration", "timer", "count", 2.0, Map.of("type", "execution"));
|
||||
insert(ch, "default", t0, "srv-A", "cameleer.ingestion.flush.duration", "timer", "total_time", 30.0, Map.of("type", "execution"));
|
||||
insert(ch, "default", t0.plusSeconds(60), "srv-A", "cameleer.ingestion.flush.duration", "timer", "count", 4.0, Map.of("type", "execution"));
|
||||
insert(ch, "default", t0.plusSeconds(60), "srv-A", "cameleer.ingestion.flush.duration", "timer", "total_time", 100.0, Map.of("type", "execution"));
|
||||
}
|
||||
|
||||
// ── catalog ─────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void catalog_listsSeededMetricsWithStatisticsAndTagKeys() throws Exception {
|
||||
ResponseEntity<String> r = restTemplate.exchange(
|
||||
"/api/v1/admin/server-metrics/catalog?from=2026-04-23T09:00:00Z&to=2026-04-23T11:00:00Z",
|
||||
HttpMethod.GET, new HttpEntity<>(adminGet), String.class);
|
||||
assertThat(r.getStatusCode()).isEqualTo(HttpStatus.OK);
|
||||
|
||||
JsonNode body = mapper.readTree(r.getBody());
|
||||
assertThat(body.isArray()).isTrue();
|
||||
|
||||
JsonNode drops = findByField(body, "metricName", "cameleer.ingestion.drops");
|
||||
assertThat(drops.get("metricType").asText()).isEqualTo("counter");
|
||||
assertThat(asStringList(drops.get("statistics"))).contains("count");
|
||||
assertThat(asStringList(drops.get("tagKeys"))).contains("reason");
|
||||
|
||||
JsonNode timer = findByField(body, "metricName", "cameleer.ingestion.flush.duration");
|
||||
assertThat(asStringList(timer.get("statistics"))).contains("count", "total_time");
|
||||
}
|
||||
|
||||
// ── instances ───────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void instances_listsDistinctServerInstanceIdsWithFirstAndLastSeen() throws Exception {
|
||||
ResponseEntity<String> r = restTemplate.exchange(
|
||||
"/api/v1/admin/server-metrics/instances?from=2026-04-23T09:00:00Z&to=2026-04-23T11:00:00Z",
|
||||
HttpMethod.GET, new HttpEntity<>(adminGet), String.class);
|
||||
assertThat(r.getStatusCode()).isEqualTo(HttpStatus.OK);
|
||||
|
||||
JsonNode body = mapper.readTree(r.getBody());
|
||||
assertThat(body.isArray()).isTrue();
|
||||
assertThat(body.size()).isEqualTo(2);
|
||||
// Ordered by last_seen DESC — srv-B saw a later row.
|
||||
assertThat(body.get(0).get("serverInstanceId").asText()).isEqualTo("srv-B");
|
||||
assertThat(body.get(1).get("serverInstanceId").asText()).isEqualTo("srv-A");
|
||||
}
|
||||
|
||||
// ── query — gauge with group-by-tag ─────────────────────────────────
|
||||
|
||||
@Test
|
||||
void query_gaugeWithGroupByTag_returnsSeriesPerTagValue() throws Exception {
|
||||
String requestBody = """
|
||||
{
|
||||
"metric": "cameleer.agents.connected",
|
||||
"statistic": "value",
|
||||
"from": "2026-04-23T09:59:00Z",
|
||||
"to": "2026-04-23T10:02:00Z",
|
||||
"stepSeconds": 60,
|
||||
"groupByTags": ["state"],
|
||||
"aggregation": "avg",
|
||||
"mode": "raw"
|
||||
}
|
||||
""";
|
||||
|
||||
ResponseEntity<String> r = restTemplate.postForEntity(
|
||||
"/api/v1/admin/server-metrics/query",
|
||||
new HttpEntity<>(requestBody, adminJson), String.class);
|
||||
assertThat(r.getStatusCode()).isEqualTo(HttpStatus.OK);
|
||||
|
||||
JsonNode body = mapper.readTree(r.getBody());
|
||||
assertThat(body.get("metric").asText()).isEqualTo("cameleer.agents.connected");
|
||||
assertThat(body.get("statistic").asText()).isEqualTo("value");
|
||||
assertThat(body.get("mode").asText()).isEqualTo("raw");
|
||||
assertThat(body.get("stepSeconds").asInt()).isEqualTo(60);
|
||||
|
||||
JsonNode series = body.get("series");
|
||||
assertThat(series.isArray()).isTrue();
|
||||
assertThat(series.size()).isEqualTo(2);
|
||||
|
||||
JsonNode live = findByTag(series, "state", "live");
|
||||
assertThat(live.get("points").size()).isEqualTo(2);
|
||||
assertThat(live.get("points").get(0).get("v").asDouble()).isEqualTo(3.0);
|
||||
assertThat(live.get("points").get(1).get("v").asDouble()).isEqualTo(4.0);
|
||||
}
|
||||
|
||||
// ── query — counter delta across instance rotation ──────────────────
|
||||
|
||||
@Test
|
||||
void query_counterDelta_clipsNegativesAcrossInstanceRotation() throws Exception {
|
||||
String requestBody = """
|
||||
{
|
||||
"metric": "cameleer.ingestion.drops",
|
||||
"statistic": "count",
|
||||
"from": "2026-04-23T09:59:00Z",
|
||||
"to": "2026-04-23T10:05:00Z",
|
||||
"stepSeconds": 60,
|
||||
"groupByTags": ["reason"],
|
||||
"aggregation": "sum",
|
||||
"mode": "delta"
|
||||
}
|
||||
""";
|
||||
|
||||
ResponseEntity<String> r = restTemplate.postForEntity(
|
||||
"/api/v1/admin/server-metrics/query",
|
||||
new HttpEntity<>(requestBody, adminJson), String.class);
|
||||
assertThat(r.getStatusCode()).isEqualTo(HttpStatus.OK);
|
||||
|
||||
JsonNode body = mapper.readTree(r.getBody());
|
||||
JsonNode reason = findByTag(body.get("series"), "reason", "buffer_full");
|
||||
// Deltas: 0 (first bucket on srv-A), 5, 5, 0 (first on srv-B, clipped), 2.
|
||||
// Sum across the window should be 12 if we tally all positive deltas.
|
||||
double sum = 0;
|
||||
for (JsonNode p : reason.get("points")) sum += p.get("v").asDouble();
|
||||
assertThat(sum).isEqualTo(12.0);
|
||||
// No individual point may be negative.
|
||||
for (JsonNode p : reason.get("points")) {
|
||||
assertThat(p.get("v").asDouble()).isGreaterThanOrEqualTo(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
// ── query — derived 'mean' statistic for timers ─────────────────────
|
||||
|
||||
@Test
|
||||
void query_timerMeanStatistic_computesTotalOverCountPerBucket() throws Exception {
|
||||
String requestBody = """
|
||||
{
|
||||
"metric": "cameleer.ingestion.flush.duration",
|
||||
"statistic": "mean",
|
||||
"from": "2026-04-23T09:59:00Z",
|
||||
"to": "2026-04-23T10:02:00Z",
|
||||
"stepSeconds": 60,
|
||||
"groupByTags": ["type"],
|
||||
"aggregation": "avg",
|
||||
"mode": "raw"
|
||||
}
|
||||
""";
|
||||
|
||||
ResponseEntity<String> r = restTemplate.postForEntity(
|
||||
"/api/v1/admin/server-metrics/query",
|
||||
new HttpEntity<>(requestBody, adminJson), String.class);
|
||||
assertThat(r.getStatusCode()).isEqualTo(HttpStatus.OK);
|
||||
|
||||
JsonNode body = mapper.readTree(r.getBody());
|
||||
JsonNode points = findByTag(body.get("series"), "type", "execution").get("points");
|
||||
// Bucket 0: 30 / 2 = 15.0
|
||||
// Bucket 1: 100 / 4 = 25.0
|
||||
assertThat(points.get(0).get("v").asDouble()).isEqualTo(15.0);
|
||||
assertThat(points.get(1).get("v").asDouble()).isEqualTo(25.0);
|
||||
}
|
||||
|
||||
// ── query — input validation ────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void query_rejectsUnsafeMetricName() {
|
||||
String requestBody = """
|
||||
{
|
||||
"metric": "cameleer.agents; DROP TABLE server_metrics",
|
||||
"from": "2026-04-23T09:59:00Z",
|
||||
"to": "2026-04-23T10:02:00Z"
|
||||
}
|
||||
""";
|
||||
|
||||
ResponseEntity<String> r = restTemplate.postForEntity(
|
||||
"/api/v1/admin/server-metrics/query",
|
||||
new HttpEntity<>(requestBody, adminJson), String.class);
|
||||
assertThat(r.getStatusCode()).isEqualTo(HttpStatus.BAD_REQUEST);
|
||||
}
|
||||
|
||||
@Test
|
||||
void query_rejectsRangeBeyondMax() {
|
||||
String requestBody = """
|
||||
{
|
||||
"metric": "cameleer.agents.connected",
|
||||
"from": "2026-01-01T00:00:00Z",
|
||||
"to": "2026-04-23T00:00:00Z"
|
||||
}
|
||||
""";
|
||||
|
||||
ResponseEntity<String> r = restTemplate.postForEntity(
|
||||
"/api/v1/admin/server-metrics/query",
|
||||
new HttpEntity<>(requestBody, adminJson), String.class);
|
||||
assertThat(r.getStatusCode()).isEqualTo(HttpStatus.BAD_REQUEST);
|
||||
}
|
||||
|
||||
// ── authorization ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void allEndpoints_requireAdminRole() {
|
||||
ResponseEntity<String> catalog = restTemplate.exchange(
|
||||
"/api/v1/admin/server-metrics/catalog",
|
||||
HttpMethod.GET, new HttpEntity<>(viewerGet), String.class);
|
||||
assertThat(catalog.getStatusCode()).isEqualTo(HttpStatus.FORBIDDEN);
|
||||
|
||||
ResponseEntity<String> instances = restTemplate.exchange(
|
||||
"/api/v1/admin/server-metrics/instances",
|
||||
HttpMethod.GET, new HttpEntity<>(viewerGet), String.class);
|
||||
assertThat(instances.getStatusCode()).isEqualTo(HttpStatus.FORBIDDEN);
|
||||
|
||||
HttpHeaders viewerPost = securityHelper.authHeaders(securityHelper.viewerToken());
|
||||
ResponseEntity<String> query = restTemplate.exchange(
|
||||
"/api/v1/admin/server-metrics/query",
|
||||
HttpMethod.POST, new HttpEntity<>("{}", viewerPost), String.class);
|
||||
assertThat(query.getStatusCode()).isEqualTo(HttpStatus.FORBIDDEN);
|
||||
}
|
||||
|
||||
// ── helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
private org.springframework.jdbc.core.JdbcTemplate clickhouseJdbc() {
|
||||
return org.springframework.test.util.AopTestUtils.getTargetObject(
|
||||
applicationContext.getBean("clickHouseJdbcTemplate"));
|
||||
}
|
||||
|
||||
@Autowired
|
||||
private org.springframework.context.ApplicationContext applicationContext;
|
||||
|
||||
private static void insert(org.springframework.jdbc.core.JdbcTemplate jdbc,
|
||||
String tenantId, Instant collectedAt, String serverInstanceId,
|
||||
String metricName, String metricType, String statistic,
|
||||
double value, Map<String, String> tags) {
|
||||
jdbc.update("""
|
||||
INSERT INTO server_metrics
|
||||
(tenant_id, collected_at, server_instance_id,
|
||||
metric_name, metric_type, statistic, metric_value, tags)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
tenantId, Timestamp.from(collectedAt), serverInstanceId,
|
||||
metricName, metricType, statistic, value, tags);
|
||||
}
|
||||
|
||||
private static JsonNode findByField(JsonNode array, String field, String value) {
|
||||
for (JsonNode n : array) {
|
||||
if (value.equals(n.path(field).asText())) return n;
|
||||
}
|
||||
throw new AssertionError("no element with " + field + "=" + value);
|
||||
}
|
||||
|
||||
private static JsonNode findByTag(JsonNode seriesArray, String tagKey, String tagValue) {
|
||||
for (JsonNode s : seriesArray) {
|
||||
if (tagValue.equals(s.path("tags").path(tagKey).asText())) return s;
|
||||
}
|
||||
throw new AssertionError("no series with tag " + tagKey + "=" + tagValue);
|
||||
}
|
||||
|
||||
private static java.util.List<String> asStringList(JsonNode arr) {
|
||||
java.util.List<String> out = new java.util.ArrayList<>();
|
||||
if (arr != null) for (JsonNode n : arr) out.add(n.asText());
|
||||
return out;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
package com.cameleer.server.app.metrics;
|
||||
|
||||
import com.cameleer.server.core.storage.ServerMetricsStore;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricSample;
|
||||
import io.micrometer.core.instrument.Counter;
|
||||
import io.micrometer.core.instrument.Gauge;
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import io.micrometer.core.instrument.Timer;
|
||||
import io.micrometer.core.instrument.simple.SimpleMeterRegistry;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
class ServerMetricsSnapshotSchedulerTest {
|
||||
|
||||
@Test
|
||||
void snapshot_capturesCounterGaugeAndTimerMeasurements() {
|
||||
MeterRegistry registry = new SimpleMeterRegistry();
|
||||
|
||||
Counter counter = Counter.builder("cameleer.test.counter")
|
||||
.tag("env", "dev")
|
||||
.register(registry);
|
||||
counter.increment(3);
|
||||
|
||||
AtomicInteger gaugeSource = new AtomicInteger(42);
|
||||
Gauge.builder("cameleer.test.gauge", gaugeSource, AtomicInteger::doubleValue)
|
||||
.register(registry);
|
||||
|
||||
Timer timer = Timer.builder("cameleer.test.timer").register(registry);
|
||||
timer.record(Duration.ofMillis(5));
|
||||
timer.record(Duration.ofMillis(15));
|
||||
|
||||
RecordingStore store = new RecordingStore();
|
||||
ServerMetricsSnapshotScheduler scheduler =
|
||||
new ServerMetricsSnapshotScheduler(registry, store, "tenant-7", "server-A");
|
||||
|
||||
scheduler.snapshot();
|
||||
|
||||
assertThat(store.batches).hasSize(1);
|
||||
List<ServerMetricSample> samples = store.batches.get(0);
|
||||
|
||||
// Every sample is stamped with tenant + instance + finite value
|
||||
assertThat(samples).allSatisfy(s -> {
|
||||
assertThat(s.tenantId()).isEqualTo("tenant-7");
|
||||
assertThat(s.serverInstanceId()).isEqualTo("server-A");
|
||||
assertThat(Double.isFinite(s.value())).isTrue();
|
||||
assertThat(s.collectedAt()).isNotNull();
|
||||
});
|
||||
|
||||
// Counter -> 1 row with statistic=count, value=3, tag propagated
|
||||
List<ServerMetricSample> counterRows = samples.stream()
|
||||
.filter(s -> s.metricName().equals("cameleer.test.counter"))
|
||||
.toList();
|
||||
assertThat(counterRows).hasSize(1);
|
||||
assertThat(counterRows.get(0).statistic()).isEqualTo("count");
|
||||
assertThat(counterRows.get(0).metricType()).isEqualTo("counter");
|
||||
assertThat(counterRows.get(0).value()).isEqualTo(3.0);
|
||||
assertThat(counterRows.get(0).tags()).containsEntry("env", "dev");
|
||||
|
||||
// Gauge -> 1 row with statistic=value
|
||||
List<ServerMetricSample> gaugeRows = samples.stream()
|
||||
.filter(s -> s.metricName().equals("cameleer.test.gauge"))
|
||||
.toList();
|
||||
assertThat(gaugeRows).hasSize(1);
|
||||
assertThat(gaugeRows.get(0).statistic()).isEqualTo("value");
|
||||
assertThat(gaugeRows.get(0).metricType()).isEqualTo("gauge");
|
||||
assertThat(gaugeRows.get(0).value()).isEqualTo(42.0);
|
||||
|
||||
// Timer -> emits multiple statistics (count, total_time, max)
|
||||
List<ServerMetricSample> timerRows = samples.stream()
|
||||
.filter(s -> s.metricName().equals("cameleer.test.timer"))
|
||||
.toList();
|
||||
assertThat(timerRows).isNotEmpty();
|
||||
// SimpleMeterRegistry emits Statistic.TOTAL ("total"); other registries (Prometheus)
|
||||
// emit TOTAL_TIME ("total_time"). Accept either so the test isn't registry-coupled.
|
||||
assertThat(timerRows).extracting(ServerMetricSample::statistic)
|
||||
.contains("count", "max");
|
||||
assertThat(timerRows).extracting(ServerMetricSample::statistic)
|
||||
.containsAnyOf("total_time", "total");
|
||||
assertThat(timerRows).allSatisfy(s ->
|
||||
assertThat(s.metricType()).isEqualTo("timer"));
|
||||
ServerMetricSample count = timerRows.stream()
|
||||
.filter(s -> s.statistic().equals("count"))
|
||||
.findFirst().orElseThrow();
|
||||
assertThat(count.value()).isEqualTo(2.0);
|
||||
}
|
||||
|
||||
@Test
|
||||
void snapshot_withEmptyRegistry_doesNotWriteBatch() {
|
||||
MeterRegistry registry = new SimpleMeterRegistry();
|
||||
// Force removal of any auto-registered meters (SimpleMeterRegistry has none by default).
|
||||
RecordingStore store = new RecordingStore();
|
||||
ServerMetricsSnapshotScheduler scheduler =
|
||||
new ServerMetricsSnapshotScheduler(registry, store, "t", "s");
|
||||
|
||||
scheduler.snapshot();
|
||||
|
||||
assertThat(store.batches).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void snapshot_swallowsStoreFailures() {
|
||||
MeterRegistry registry = new SimpleMeterRegistry();
|
||||
Counter.builder("cameleer.test").register(registry).increment();
|
||||
|
||||
ServerMetricsStore throwingStore = batch -> {
|
||||
throw new RuntimeException("clickhouse down");
|
||||
};
|
||||
|
||||
ServerMetricsSnapshotScheduler scheduler =
|
||||
new ServerMetricsSnapshotScheduler(registry, throwingStore, "t", "s");
|
||||
|
||||
// Must not propagate — the scheduler thread would otherwise die.
|
||||
scheduler.snapshot();
|
||||
}
|
||||
|
||||
private static final class RecordingStore implements ServerMetricsStore {
|
||||
final List<List<ServerMetricSample>> batches = new ArrayList<>();
|
||||
|
||||
@Override
|
||||
public void insertBatch(List<ServerMetricSample> samples) {
|
||||
batches.add(List.copyOf(samples));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package com.cameleer.server.app.search;
|
||||
|
||||
import com.cameleer.server.app.storage.ClickHouseExecutionStore;
|
||||
import com.cameleer.server.core.ingestion.MergedExecution;
|
||||
import com.cameleer.server.core.search.AttributeFilter;
|
||||
import com.cameleer.server.core.search.ExecutionSummary;
|
||||
import com.cameleer.server.core.search.SearchRequest;
|
||||
import com.cameleer.server.core.search.SearchResult;
|
||||
@@ -62,7 +63,7 @@ class ClickHouseSearchIndexIT {
|
||||
500L,
|
||||
"", "", "", "", "", "",
|
||||
"hash-abc", "FULL",
|
||||
"{\"order\":\"12345\"}", "", "", "", "", "", "{\"env\":\"prod\"}",
|
||||
"", "", "", "", "", "", "{\"order\":\"12345\",\"tenant\":\"acme\"}",
|
||||
"", "",
|
||||
false, false,
|
||||
null, null
|
||||
@@ -79,7 +80,7 @@ class ClickHouseSearchIndexIT {
|
||||
"java.lang.NPE\n at Foo.bar(Foo.java:42)",
|
||||
"NullPointerException", "RUNTIME", "", "",
|
||||
"", "FULL",
|
||||
"", "", "", "", "", "", "",
|
||||
"", "", "", "", "", "", "{\"order\":\"99999\"}",
|
||||
"", "",
|
||||
false, false,
|
||||
null, null
|
||||
@@ -309,4 +310,59 @@ class ClickHouseSearchIndexIT {
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-1");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byAttributeFilter_exactMatch_matchesExec1() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null, null, null,
|
||||
List.of(new AttributeFilter("order", "12345")));
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-1");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byAttributeFilter_keyOnly_matchesExec1AndExec2() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null, null, null,
|
||||
List.of(new AttributeFilter("order", null)));
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(2);
|
||||
assertThat(result.data()).extracting(ExecutionSummary::executionId)
|
||||
.containsExactlyInAnyOrder("exec-1", "exec-2");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byAttributeFilter_wildcardValue_matchesExec1Only() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null, null, null,
|
||||
List.of(new AttributeFilter("order", "123*")));
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-1");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byAttributeFilter_multipleFiltersAreAnded() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null, null, null,
|
||||
List.of(
|
||||
new AttributeFilter("order", "12345"),
|
||||
new AttributeFilter("tenant", "acme")));
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-1");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,117 @@
|
||||
package com.cameleer.server.app.storage;
|
||||
|
||||
import com.cameleer.server.core.storage.model.ServerMetricSample;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseServerMetricsStoreIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseServerMetricsStore store;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
jdbc.execute("""
|
||||
CREATE TABLE IF NOT EXISTS server_metrics (
|
||||
tenant_id LowCardinality(String) DEFAULT 'default',
|
||||
collected_at DateTime64(3),
|
||||
server_instance_id LowCardinality(String),
|
||||
metric_name LowCardinality(String),
|
||||
metric_type LowCardinality(String),
|
||||
statistic LowCardinality(String) DEFAULT 'value',
|
||||
metric_value Float64,
|
||||
tags Map(String, String) DEFAULT map(),
|
||||
server_received_at DateTime64(3) DEFAULT now64(3)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY (tenant_id, collected_at, server_instance_id, metric_name, statistic)
|
||||
""");
|
||||
|
||||
jdbc.execute("TRUNCATE TABLE server_metrics");
|
||||
|
||||
store = new ClickHouseServerMetricsStore(jdbc);
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertBatch_roundTripsAllColumns() {
|
||||
Instant ts = Instant.parse("2026-04-23T12:00:00Z");
|
||||
store.insertBatch(List.of(
|
||||
new ServerMetricSample("tenant-a", ts, "srv-1",
|
||||
"cameleer.ingestion.drops", "counter", "count", 17.0,
|
||||
Map.of("reason", "buffer_full")),
|
||||
new ServerMetricSample("tenant-a", ts, "srv-1",
|
||||
"jvm.memory.used", "gauge", "value", 1_048_576.0,
|
||||
Map.of("area", "heap", "id", "G1 Eden Space"))
|
||||
));
|
||||
|
||||
Integer count = jdbc.queryForObject(
|
||||
"SELECT count() FROM server_metrics WHERE tenant_id = 'tenant-a'",
|
||||
Integer.class);
|
||||
assertThat(count).isEqualTo(2);
|
||||
|
||||
Double dropsValue = jdbc.queryForObject(
|
||||
"""
|
||||
SELECT metric_value FROM server_metrics
|
||||
WHERE tenant_id = 'tenant-a'
|
||||
AND server_instance_id = 'srv-1'
|
||||
AND metric_name = 'cameleer.ingestion.drops'
|
||||
AND statistic = 'count'
|
||||
""",
|
||||
Double.class);
|
||||
assertThat(dropsValue).isEqualTo(17.0);
|
||||
|
||||
String heapArea = jdbc.queryForObject(
|
||||
"""
|
||||
SELECT tags['area'] FROM server_metrics
|
||||
WHERE tenant_id = 'tenant-a'
|
||||
AND metric_name = 'jvm.memory.used'
|
||||
""",
|
||||
String.class);
|
||||
assertThat(heapArea).isEqualTo("heap");
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertBatch_emptyList_doesNothing() {
|
||||
store.insertBatch(List.of());
|
||||
|
||||
Integer count = jdbc.queryForObject(
|
||||
"SELECT count() FROM server_metrics", Integer.class);
|
||||
assertThat(count).isEqualTo(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertBatch_nullTags_storesEmptyMap() {
|
||||
store.insertBatch(List.of(
|
||||
new ServerMetricSample("default", Instant.parse("2026-04-23T12:00:00Z"),
|
||||
"srv-2", "process.cpu.usage", "gauge", "value", 0.12, null)
|
||||
));
|
||||
|
||||
Integer count = jdbc.queryForObject(
|
||||
"SELECT count() FROM server_metrics WHERE server_instance_id = 'srv-2'",
|
||||
Integer.class);
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
}
|
||||
@@ -23,8 +23,13 @@ import java.util.UUID;
|
||||
*/
|
||||
public class DirtyStateCalculator {
|
||||
|
||||
// Live-pushed fields are excluded from the deploy diff: changes to them take effect
|
||||
// via SSE config-update without a redeploy, so they are not "pending deploy" when they
|
||||
// differ from the last successful deployment snapshot. See ui/rules: the Traces & Taps
|
||||
// and Route Recording tabs apply with ?apply=live and "never mark dirty".
|
||||
private static final Set<String> AGENT_CONFIG_IGNORED_KEYS = Set.of(
|
||||
"version", "updatedAt", "updatedBy", "environment", "application"
|
||||
"version", "updatedAt", "updatedBy", "environment", "application",
|
||||
"taps", "tapVersion", "tracedProcessors", "routeRecording"
|
||||
);
|
||||
|
||||
private final ObjectMapper mapper;
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
package com.cameleer.server.core.search;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Structured attribute filter for execution search.
|
||||
* <p>
|
||||
* Value semantics:
|
||||
* <ul>
|
||||
* <li>{@code value == null} or blank -> key-exists check</li>
|
||||
* <li>{@code value} contains {@code *} -> wildcard match (translated to SQL LIKE pattern)</li>
|
||||
* <li>otherwise -> exact match</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Keys must match {@code ^[a-zA-Z0-9._-]+$} — they are later inlined into
|
||||
* ClickHouse SQL via {@code JSONExtractString}, which does not accept a
|
||||
* parameter placeholder for the JSON path. Values are always parameter-bound.
|
||||
*/
|
||||
public record AttributeFilter(String key, String value) {
|
||||
|
||||
private static final Pattern KEY_PATTERN = Pattern.compile("^[a-zA-Z0-9._-]+$");
|
||||
|
||||
public AttributeFilter {
|
||||
if (key == null || !KEY_PATTERN.matcher(key).matches()) {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid attribute key: must match " + KEY_PATTERN.pattern() + ", got: " + key);
|
||||
}
|
||||
if (value != null && value.isBlank()) {
|
||||
value = null;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isKeyOnly() {
|
||||
return value == null;
|
||||
}
|
||||
|
||||
public boolean isWildcard() {
|
||||
return value != null && value.indexOf('*') >= 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a SQL LIKE pattern for wildcard matches with {@code %} / {@code _} / {@code \}
|
||||
* in the source value escaped, or {@code null} for exact / key-only filters.
|
||||
*/
|
||||
public String toLikePattern() {
|
||||
if (!isWildcard()) return null;
|
||||
StringBuilder sb = new StringBuilder(value.length() + 4);
|
||||
for (int i = 0; i < value.length(); i++) {
|
||||
char c = value.charAt(i);
|
||||
switch (c) {
|
||||
case '\\' -> sb.append("\\\\");
|
||||
case '%' -> sb.append("\\%");
|
||||
case '_' -> sb.append("\\_");
|
||||
case '*' -> sb.append('%');
|
||||
default -> sb.append(c);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
@@ -54,7 +54,8 @@ public record SearchRequest(
|
||||
String sortField,
|
||||
String sortDir,
|
||||
String afterExecutionId,
|
||||
String environment
|
||||
String environment,
|
||||
List<AttributeFilter> attributeFilters
|
||||
) {
|
||||
|
||||
private static final int DEFAULT_LIMIT = 50;
|
||||
@@ -83,6 +84,24 @@ public record SearchRequest(
|
||||
if (offset < 0) offset = 0;
|
||||
if (sortField == null || !ALLOWED_SORT_FIELDS.contains(sortField)) sortField = "startTime";
|
||||
if (!"asc".equalsIgnoreCase(sortDir)) sortDir = "desc";
|
||||
if (attributeFilters == null) attributeFilters = List.of();
|
||||
}
|
||||
|
||||
/** Legacy 21-arg constructor preserved for existing call sites — defaults attributeFilters to empty. */
|
||||
public SearchRequest(
|
||||
String status, Instant timeFrom, Instant timeTo,
|
||||
Long durationMin, Long durationMax, String correlationId,
|
||||
String text, String textInBody, String textInHeaders, String textInErrors,
|
||||
String routeId, String instanceId, String processorType,
|
||||
String applicationId, List<String> instanceIds,
|
||||
int offset, int limit, String sortField, String sortDir,
|
||||
String afterExecutionId, String environment
|
||||
) {
|
||||
this(status, timeFrom, timeTo, durationMin, durationMax, correlationId,
|
||||
text, textInBody, textInHeaders, textInErrors,
|
||||
routeId, instanceId, processorType, applicationId, instanceIds,
|
||||
offset, limit, sortField, sortDir, afterExecutionId, environment,
|
||||
List.of());
|
||||
}
|
||||
|
||||
/** Returns the snake_case column name for ORDER BY. */
|
||||
@@ -96,7 +115,8 @@ public record SearchRequest(
|
||||
status, timeFrom, timeTo, durationMin, durationMax, correlationId,
|
||||
text, textInBody, textInHeaders, textInErrors,
|
||||
routeId, instanceId, processorType, applicationId, resolvedInstanceIds,
|
||||
offset, limit, sortField, sortDir, afterExecutionId, environment
|
||||
offset, limit, sortField, sortDir, afterExecutionId, environment,
|
||||
attributeFilters
|
||||
);
|
||||
}
|
||||
|
||||
@@ -106,7 +126,8 @@ public record SearchRequest(
|
||||
status, timeFrom, timeTo, durationMin, durationMax, correlationId,
|
||||
text, textInBody, textInHeaders, textInErrors,
|
||||
routeId, instanceId, processorType, applicationId, instanceIds,
|
||||
offset, limit, sortField, sortDir, afterExecutionId, env
|
||||
offset, limit, sortField, sortDir, afterExecutionId, env,
|
||||
attributeFilters
|
||||
);
|
||||
}
|
||||
|
||||
@@ -122,7 +143,8 @@ public record SearchRequest(
|
||||
status, ts, timeTo, durationMin, durationMax, correlationId,
|
||||
text, textInBody, textInHeaders, textInErrors,
|
||||
routeId, instanceId, processorType, applicationId, instanceIds,
|
||||
offset, limit, sortField, sortDir, afterExecutionId, environment
|
||||
offset, limit, sortField, sortDir, afterExecutionId, environment,
|
||||
attributeFilters
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
package com.cameleer.server.core.storage;
|
||||
|
||||
import com.cameleer.server.core.storage.model.ServerInstanceInfo;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricCatalogEntry;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricQueryRequest;
|
||||
import com.cameleer.server.core.storage.model.ServerMetricQueryResponse;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Read-side access to the ClickHouse {@code server_metrics} table. Exposed
|
||||
* to dashboards through {@code /api/v1/admin/server-metrics/**} so SaaS
|
||||
* control planes don't need direct ClickHouse access.
|
||||
*/
|
||||
public interface ServerMetricsQueryStore {
|
||||
|
||||
/**
|
||||
* Catalog of metric names observed in {@code [from, to)} along with their
|
||||
* type, the set of statistics emitted, and the union of tag keys seen.
|
||||
*/
|
||||
List<ServerMetricCatalogEntry> catalog(Instant from, Instant to);
|
||||
|
||||
/**
|
||||
* Distinct {@code server_instance_id} values that wrote at least one
|
||||
* sample in {@code [from, to)}, with first/last seen timestamps.
|
||||
*/
|
||||
List<ServerInstanceInfo> listInstances(Instant from, Instant to);
|
||||
|
||||
/**
|
||||
* Generic time-series query. See {@link ServerMetricQueryRequest} for
|
||||
* request semantics. Implementations must enforce input validation and
|
||||
* reject unsafe inputs with {@link IllegalArgumentException}.
|
||||
*/
|
||||
ServerMetricQueryResponse query(ServerMetricQueryRequest request);
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.cameleer.server.core.storage;
|
||||
|
||||
import com.cameleer.server.core.storage.model.ServerMetricSample;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Sink for periodic snapshots of the server's own Micrometer meter registry.
|
||||
* Implementations persist the samples (e.g. to ClickHouse) so server
|
||||
* self-metrics survive restarts and can be queried historically without an
|
||||
* external Prometheus.
|
||||
*/
|
||||
public interface ServerMetricsStore {
|
||||
|
||||
void insertBatch(List<ServerMetricSample> samples);
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.cameleer.server.core.storage.model;
|
||||
|
||||
import java.time.Instant;
|
||||
|
||||
/**
|
||||
* One row of the {@code /api/v1/admin/server-metrics/instances} response.
|
||||
* Used by dashboards to partition counter-delta computations across server
|
||||
* process boundaries (each boot rotates the id).
|
||||
*/
|
||||
public record ServerInstanceInfo(
|
||||
String serverInstanceId,
|
||||
Instant firstSeen,
|
||||
Instant lastSeen
|
||||
) {
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
package com.cameleer.server.core.storage.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* One row of the {@code /api/v1/admin/server-metrics/catalog} response.
|
||||
* Surfaces the set of statistics and tag keys observed for a metric across
|
||||
* the requested window, so dashboards can build selectors without ClickHouse
|
||||
* access.
|
||||
*/
|
||||
public record ServerMetricCatalogEntry(
|
||||
String metricName,
|
||||
String metricType,
|
||||
List<String> statistics,
|
||||
List<String> tagKeys
|
||||
) {
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
package com.cameleer.server.core.storage.model;
|
||||
|
||||
import java.time.Instant;
|
||||
|
||||
/** One {@code (bucket, value)} point of a server-metrics series. */
|
||||
public record ServerMetricPoint(
|
||||
Instant t,
|
||||
double v
|
||||
) {
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
package com.cameleer.server.core.storage.model;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Request contract for the generic server-metrics time-series query.
|
||||
*
|
||||
* <p>{@code aggregation} controls how multiple samples within a bucket
|
||||
* collapse: {@code avg|sum|max|min|latest}. {@code mode} controls counter
|
||||
* handling: {@code raw} returns values as stored (cumulative for counters),
|
||||
* {@code delta} returns per-bucket positive-clipped differences computed
|
||||
* per {@code server_instance_id}.
|
||||
*
|
||||
* <p>{@code statistic} filters which Micrometer sub-measurement to read
|
||||
* ({@code value} / {@code count} / {@code total_time} / {@code total} /
|
||||
* {@code max} / {@code mean}). {@code mean} is a derived statistic for
|
||||
* timers: {@code sum(total_time|total) / sum(count)} per bucket.
|
||||
*
|
||||
* <p>{@code groupByTags} splits the output into one series per unique tag
|
||||
* combination. {@code filterTags} narrows the input to samples whose tag
|
||||
* map matches every entry.
|
||||
*
|
||||
* <p>{@code serverInstanceIds} is an optional allow-list. When null or
|
||||
* empty all instances observed in the window are included.
|
||||
*/
|
||||
public record ServerMetricQueryRequest(
|
||||
String metric,
|
||||
String statistic,
|
||||
Instant from,
|
||||
Instant to,
|
||||
Integer stepSeconds,
|
||||
List<String> groupByTags,
|
||||
Map<String, String> filterTags,
|
||||
String aggregation,
|
||||
String mode,
|
||||
List<String> serverInstanceIds
|
||||
) {
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
package com.cameleer.server.core.storage.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/** Response of the generic server-metrics time-series query. */
|
||||
public record ServerMetricQueryResponse(
|
||||
String metric,
|
||||
String statistic,
|
||||
String aggregation,
|
||||
String mode,
|
||||
int stepSeconds,
|
||||
List<ServerMetricSeries> series
|
||||
) {
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
package com.cameleer.server.core.storage.model;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A single sample of the server's own Micrometer registry, captured by a
|
||||
* scheduled snapshot and destined for the ClickHouse {@code server_metrics}
|
||||
* table. One {@code ServerMetricSample} per Micrometer {@code Measurement},
|
||||
* so Timers and DistributionSummaries produce multiple samples per tick
|
||||
* (distinguished by {@link #statistic()}).
|
||||
*/
|
||||
public record ServerMetricSample(
|
||||
String tenantId,
|
||||
Instant collectedAt,
|
||||
String serverInstanceId,
|
||||
String metricName,
|
||||
String metricType,
|
||||
String statistic,
|
||||
double value,
|
||||
Map<String, String> tags
|
||||
) {
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
package com.cameleer.server.core.storage.model;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* One series of the server-metrics query response, identified by its
|
||||
* {@link #tags} group (empty map when the query had no {@code groupByTags}).
|
||||
*/
|
||||
public record ServerMetricSeries(
|
||||
Map<String, String> tags,
|
||||
List<ServerMetricPoint> points
|
||||
) {
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
@@ -114,9 +115,9 @@ class DirtyStateCalculatorTest {
|
||||
DirtyStateCalculator calc = CALC;
|
||||
|
||||
ApplicationConfig deployed = new ApplicationConfig();
|
||||
deployed.setTracedProcessors(Map.of("proc-1", "DEBUG"));
|
||||
deployed.setSensitiveKeys(List.of("password", "token"));
|
||||
ApplicationConfig desired = new ApplicationConfig();
|
||||
desired.setTracedProcessors(Map.of("proc-1", "TRACE"));
|
||||
desired.setSensitiveKeys(List.of("password", "token", "secret"));
|
||||
UUID jarId = UUID.randomUUID();
|
||||
DeploymentConfigSnapshot snap = new DeploymentConfigSnapshot(jarId, deployed, Map.of(), null);
|
||||
|
||||
@@ -124,7 +125,29 @@ class DirtyStateCalculatorTest {
|
||||
|
||||
assertThat(result.dirty()).isTrue();
|
||||
assertThat(result.differences()).extracting(DirtyStateResult.Difference::field)
|
||||
.contains("agentConfig.tracedProcessors.proc-1");
|
||||
.anyMatch(f -> f.startsWith("agentConfig.sensitiveKeys"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void livePushedFields_doNotMarkDirty() {
|
||||
// Taps, tracedProcessors, and routeRecording apply via live SSE push (never redeploy),
|
||||
// so they must not appear as "pending deploy" when they differ from the last deploy snapshot.
|
||||
ApplicationConfig deployed = new ApplicationConfig();
|
||||
deployed.setTracedProcessors(Map.of("proc-1", "DEBUG"));
|
||||
deployed.setRouteRecording(Map.of("route-a", true));
|
||||
deployed.setTapVersion(1);
|
||||
|
||||
ApplicationConfig desired = new ApplicationConfig();
|
||||
desired.setTracedProcessors(Map.of("proc-1", "TRACE", "proc-2", "DEBUG"));
|
||||
desired.setRouteRecording(Map.of("route-a", false, "route-b", true));
|
||||
desired.setTapVersion(5);
|
||||
|
||||
UUID jarId = UUID.randomUUID();
|
||||
DeploymentConfigSnapshot snap = new DeploymentConfigSnapshot(jarId, deployed, Map.of(), null);
|
||||
DirtyStateResult result = CALC.compute(jarId, desired, Map.of(), snap);
|
||||
|
||||
assertThat(result.dirty()).isFalse();
|
||||
assertThat(result.differences()).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
package com.cameleer.server.core.search;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
class AttributeFilterTest {
|
||||
|
||||
@Test
|
||||
void keyOnly_blankValue_normalizesToNull() {
|
||||
AttributeFilter f = new AttributeFilter("order", "");
|
||||
assertThat(f.value()).isNull();
|
||||
assertThat(f.isKeyOnly()).isTrue();
|
||||
assertThat(f.isWildcard()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void keyOnly_nullValue_isKeyOnly() {
|
||||
AttributeFilter f = new AttributeFilter("order", null);
|
||||
assertThat(f.isKeyOnly()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void exactValue_isNotWildcard() {
|
||||
AttributeFilter f = new AttributeFilter("order", "47");
|
||||
assertThat(f.isKeyOnly()).isFalse();
|
||||
assertThat(f.isWildcard()).isFalse();
|
||||
}
|
||||
|
||||
@Test
|
||||
void starInValue_isWildcard() {
|
||||
AttributeFilter f = new AttributeFilter("order", "47*");
|
||||
assertThat(f.isWildcard()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
void invalidKey_throws() {
|
||||
assertThatThrownBy(() -> new AttributeFilter("bad key", "x"))
|
||||
.isInstanceOf(IllegalArgumentException.class)
|
||||
.hasMessageContaining("attribute key");
|
||||
}
|
||||
|
||||
@Test
|
||||
void blankKey_throws() {
|
||||
assertThatThrownBy(() -> new AttributeFilter(" ", null))
|
||||
.isInstanceOf(IllegalArgumentException.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
void wildcardPattern_escapesLikeMetaCharacters() {
|
||||
AttributeFilter f = new AttributeFilter("order", "a_b%c\\d*");
|
||||
assertThat(f.toLikePattern()).isEqualTo("a\\_b\\%c\\\\d%");
|
||||
}
|
||||
|
||||
@Test
|
||||
void exactValue_toLikePattern_returnsNull() {
|
||||
AttributeFilter f = new AttributeFilter("order", "47");
|
||||
assertThat(f.toLikePattern()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchRequest_canonicalCtor_acceptsAttributeFilters() {
|
||||
SearchRequest r = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null, null, null,
|
||||
java.util.List.of(new AttributeFilter("order", "47")));
|
||||
assertThat(r.attributeFilters()).hasSize(1);
|
||||
assertThat(r.attributeFilters().get(0).key()).isEqualTo("order");
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchRequest_legacyCtor_defaultsAttributeFiltersToEmpty() {
|
||||
SearchRequest r = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null, null, null);
|
||||
assertThat(r.attributeFilters()).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void searchRequest_compactCtor_normalizesNullAttributeFilters() {
|
||||
SearchRequest r = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null, null, null,
|
||||
null);
|
||||
assertThat(r.attributeFilters()).isNotNull().isEmpty();
|
||||
}
|
||||
}
|
||||
@@ -204,6 +204,21 @@ All query endpoints require JWT with `VIEWER` role or higher.
|
||||
| `GET /api/v1/agents/events-log` | Agent lifecycle event history |
|
||||
| `GET /api/v1/agents/{id}/metrics` | Agent-level metrics time series |
|
||||
|
||||
### Server Self-Metrics
|
||||
|
||||
The server snapshots its own Micrometer registry into ClickHouse every 60 s (table `server_metrics`) — JVM, HTTP, DB pools, agent/ingestion business metrics, and alerting metrics. Use this instead of running an external Prometheus when building a server-health dashboard. The live scrape endpoint `/api/v1/prometheus` remains available for traditional scraping.
|
||||
|
||||
Two ways to consume:
|
||||
|
||||
| Consumer | How |
|
||||
|---|---|
|
||||
| Web UI (built-in) | `/admin/server-metrics` — 17 panels across Server Health / JVM / HTTP & DB / Alerting / Deployments with a 15 min–7 d time picker. ADMIN-only, hidden when `infrastructureendpoints=false`. |
|
||||
| Programmatic | Generic REST API under `/api/v1/admin/server-metrics/{catalog,instances,query}`. Same visibility rules. Designed for SaaS control planes that embed server health in their own console. |
|
||||
|
||||
Persistence can be disabled entirely with `cameleer.server.self-metrics.enabled=false`. Snapshot cadence via `cameleer.server.self-metrics.interval-ms` (default `60000`).
|
||||
|
||||
See [`docs/server-self-metrics.md`](./server-self-metrics.md) for the full metric catalog, API contract, and ready-to-paste query bodies for each panel.
|
||||
|
||||
---
|
||||
|
||||
## Application Configuration
|
||||
|
||||
522
docs/server-self-metrics.md
Normal file
522
docs/server-self-metrics.md
Normal file
@@ -0,0 +1,522 @@
|
||||
# Server Self-Metrics — Reference for Dashboard Builders
|
||||
|
||||
This is the reference for anyone building a server-health dashboard on top of the Cameleer server. It documents the `server_metrics` ClickHouse table, every series you can expect to find in it, and the queries we recommend for each dashboard panel.
|
||||
|
||||
> **tl;dr** — Every 60 s, every meter in the server's Micrometer registry (all `cameleer.*`, all `alerting_*`, and the full Spring Boot Actuator set) is written into ClickHouse as one row per `(meter, statistic)` pair. No external Prometheus required.
|
||||
|
||||
---
|
||||
|
||||
## Built-in admin dashboard
|
||||
|
||||
The server ships a ready-to-use dashboard at **`/admin/server-metrics`** in the web UI. It renders the 17 panels listed below using `ThemedChart` from the design system. The window is driven by the app-wide time-range control in the TopBar (same one used by Exchanges, Dashboard, and Runtime), so every panel automatically reflects the range you've selected globally. Visibility mirrors the Database and ClickHouse admin pages:
|
||||
|
||||
- Requires the `ADMIN` role.
|
||||
- Hidden when `cameleer.server.security.infrastructureendpoints=false` (both the backend endpoints and the sidebar entry disappear).
|
||||
|
||||
Use this page for single-tenant installs and dev/staging — it's the fastest path to "is the server healthy right now?". For multi-tenant control planes, cross-environment rollups, or embedding metrics inside an existing operations console, call the REST API below instead.
|
||||
|
||||
---
|
||||
|
||||
## Table schema
|
||||
|
||||
```sql
|
||||
server_metrics (
|
||||
tenant_id LowCardinality(String) DEFAULT 'default',
|
||||
collected_at DateTime64(3),
|
||||
server_instance_id LowCardinality(String),
|
||||
metric_name LowCardinality(String),
|
||||
metric_type LowCardinality(String), -- counter|gauge|timer|distribution_summary|long_task_timer|other
|
||||
statistic LowCardinality(String) DEFAULT 'value',
|
||||
metric_value Float64,
|
||||
tags Map(String, String) DEFAULT map(),
|
||||
server_received_at DateTime64(3) DEFAULT now64(3)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(collected_at))
|
||||
ORDER BY (tenant_id, collected_at, server_instance_id, metric_name, statistic)
|
||||
TTL toDateTime(collected_at) + INTERVAL 90 DAY DELETE
|
||||
```
|
||||
|
||||
### What each column means
|
||||
|
||||
| Column | Notes |
|
||||
|---|---|
|
||||
| `tenant_id` | Always filter by this. One tenant per server deployment. |
|
||||
| `server_instance_id` | Stable id per server process: property → `HOSTNAME` env → DNS → random UUID. **Rotates on restart**, so counters restart cleanly. |
|
||||
| `metric_name` | Raw Micrometer meter name. Dots, not underscores. |
|
||||
| `metric_type` | Lowercase Micrometer `Meter.Type`. |
|
||||
| `statistic` | Which `Measurement` this row is. Counters/gauges → `value` or `count`. Timers → three rows per tick: `count`, `total_time` (or `total`), `max`. Distribution summaries → same shape. |
|
||||
| `metric_value` | `Float64`. Non-finite values (NaN / ±∞) are dropped before insert. |
|
||||
| `tags` | `Map(String, String)`. Micrometer tags copied verbatim. |
|
||||
|
||||
### Counter semantics (important)
|
||||
|
||||
Counters are **cumulative totals since meter registration**, same convention as Prometheus. To get a rate, compute a delta within a `server_instance_id`:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toStartOfMinute(collected_at) AS minute,
|
||||
metric_value - any(metric_value) OVER (
|
||||
PARTITION BY server_instance_id, metric_name, tags
|
||||
ORDER BY collected_at
|
||||
ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING
|
||||
) AS per_minute_delta
|
||||
FROM server_metrics
|
||||
WHERE metric_name = 'cameleer.ingestion.drops'
|
||||
AND statistic = 'count'
|
||||
ORDER BY minute;
|
||||
```
|
||||
|
||||
On restart the `server_instance_id` rotates, so a simple `LAG()` partitioned by `server_instance_id` gives monotonic segments without fighting counter resets.
|
||||
|
||||
### Retention
|
||||
|
||||
90 days, TTL-enforced. Long-term trend analysis is out of scope — ship raw data to an external warehouse if you need more.
|
||||
|
||||
---
|
||||
|
||||
## How to query
|
||||
|
||||
Use the REST API — `/api/v1/admin/server-metrics/**`. It does the tenant filter, range bounding, counter-delta math, and input validation for you, so the dashboard never needs direct ClickHouse access. ADMIN role required (standard `/api/v1/admin/**` RBAC gate).
|
||||
|
||||
### `GET /catalog`
|
||||
|
||||
Enumerate every `metric_name` observed in a window, with its `metric_type`, the set of statistics emitted, and the union of tag keys.
|
||||
|
||||
```
|
||||
GET /api/v1/admin/server-metrics/catalog?from=2026-04-22T00:00:00Z&to=2026-04-23T00:00:00Z
|
||||
Authorization: Bearer <admin-jwt>
|
||||
```
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"metricName": "cameleer.agents.connected",
|
||||
"metricType": "gauge",
|
||||
"statistics": ["value"],
|
||||
"tagKeys": ["state"]
|
||||
},
|
||||
{
|
||||
"metricName": "cameleer.ingestion.drops",
|
||||
"metricType": "counter",
|
||||
"statistics": ["count"],
|
||||
"tagKeys": ["reason"]
|
||||
},
|
||||
...
|
||||
]
|
||||
```
|
||||
|
||||
`from`/`to` are optional; default is the last 1 h.
|
||||
|
||||
### `GET /instances`
|
||||
|
||||
Enumerate the `server_instance_id` values that wrote at least one sample in the window, with `firstSeen` / `lastSeen`. Use this when you need to annotate restarts on a graph or reason about counter-delta partitions.
|
||||
|
||||
```
|
||||
GET /api/v1/admin/server-metrics/instances?from=2026-04-22T00:00:00Z&to=2026-04-23T00:00:00Z
|
||||
```
|
||||
|
||||
```json
|
||||
[
|
||||
{ "serverInstanceId": "srv-prod-b", "firstSeen": "2026-04-22T14:30:00Z", "lastSeen": "2026-04-23T00:00:00Z" },
|
||||
{ "serverInstanceId": "srv-prod-a", "firstSeen": "2026-04-22T00:00:00Z", "lastSeen": "2026-04-22T14:25:00Z" }
|
||||
]
|
||||
```
|
||||
|
||||
### `POST /query` — generic time-series
|
||||
|
||||
The workhorse. One endpoint covers every panel in the dashboard.
|
||||
|
||||
```
|
||||
POST /api/v1/admin/server-metrics/query
|
||||
Authorization: Bearer <admin-jwt>
|
||||
Content-Type: application/json
|
||||
```
|
||||
|
||||
Request body:
|
||||
|
||||
```json
|
||||
{
|
||||
"metric": "cameleer.ingestion.drops",
|
||||
"statistic": "count",
|
||||
"from": "2026-04-22T00:00:00Z",
|
||||
"to": "2026-04-23T00:00:00Z",
|
||||
"stepSeconds": 60,
|
||||
"groupByTags": ["reason"],
|
||||
"filterTags": { },
|
||||
"aggregation": "sum",
|
||||
"mode": "delta",
|
||||
"serverInstanceIds": null
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"metric": "cameleer.ingestion.drops",
|
||||
"statistic": "count",
|
||||
"aggregation": "sum",
|
||||
"mode": "delta",
|
||||
"stepSeconds": 60,
|
||||
"series": [
|
||||
{
|
||||
"tags": { "reason": "buffer_full" },
|
||||
"points": [
|
||||
{ "t": "2026-04-22T00:00:00.000Z", "v": 0.0 },
|
||||
{ "t": "2026-04-22T00:01:00.000Z", "v": 5.0 },
|
||||
{ "t": "2026-04-22T00:02:00.000Z", "v": 5.0 }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### Request field reference
|
||||
|
||||
| Field | Type | Required | Description |
|
||||
|---|---|---|---|
|
||||
| `metric` | string | yes | Metric name. Regex `^[a-zA-Z0-9._]+$`. |
|
||||
| `statistic` | string | no | `value` / `count` / `total` / `total_time` / `max` / `mean`. `mean` is a derived statistic for timers: `sum(total_time \| total) / sum(count)` per bucket. |
|
||||
| `from`, `to` | ISO-8601 instant | yes | Half-open window. `to - from ≤ 31 days`. |
|
||||
| `stepSeconds` | int | no | Bucket size. Clamped to [10, 3600]. Default 60. |
|
||||
| `groupByTags` | string[] | no | Emit one series per unique combination of these tag values. Tag keys regex `^[a-zA-Z0-9._]+$`. |
|
||||
| `filterTags` | map<string,string> | no | Narrow to samples whose tag map contains every entry. Values bound via parameter — no injection. |
|
||||
| `aggregation` | string | no | Within-bucket reducer for raw mode: `avg` (default), `sum`, `max`, `min`, `latest`. For `mode=delta` this controls cross-instance aggregation (defaults to `sum` of per-instance deltas). |
|
||||
| `mode` | string | no | `raw` (default) or `delta`. Delta mode computes per-`server_instance_id` positive-clipped differences and then aggregates across instances — so you get a rate-like time series that survives server restarts. |
|
||||
| `serverInstanceIds` | string[] | no | Allow-list. When null or empty, every instance in the window is included. |
|
||||
|
||||
#### Validation errors
|
||||
|
||||
Any `IllegalArgumentException` surfaces as `400 Bad Request` with `{"error": "…"}`. Triggers:
|
||||
- unsafe characters in identifiers
|
||||
- `from ≥ to` or range > 31 days
|
||||
- `stepSeconds` outside [10, 3600]
|
||||
- result cardinality > 500 series (reduce `groupByTags` or tighten `filterTags`)
|
||||
|
||||
### Direct ClickHouse (fallback)
|
||||
|
||||
If you need something the generic query can't express (complex joins, percentile aggregates, materialized-view rollups), reach for `/api/v1/admin/clickhouse/query` (`infrastructureendpoints=true`, ADMIN) or a dedicated read-only CH user scoped to `server_metrics`. All direct queries must filter by `tenant_id`.
|
||||
|
||||
---
|
||||
|
||||
## Metric catalog
|
||||
|
||||
Every series below is populated. Names follow Micrometer conventions (dots, not underscores). Use these as the starting point for dashboard panels — pick the handful you care about, ignore the rest.
|
||||
|
||||
### Cameleer business metrics — agent + ingestion
|
||||
|
||||
Source: `cameleer-server-app/.../metrics/ServerMetrics.java`.
|
||||
|
||||
| Metric | Type | Statistic | Tags | Meaning |
|
||||
|---|---|---|---|---|
|
||||
| `cameleer.agents.connected` | gauge | `value` | `state` (live/stale/dead/shutdown) | Count of agents in each lifecycle state |
|
||||
| `cameleer.agents.sse.active` | gauge | `value` | — | Active SSE connections (command channel) |
|
||||
| `cameleer.agents.transitions` | counter | `count` | `transition` (went_stale/went_dead/recovered) | Cumulative lifecycle transitions |
|
||||
| `cameleer.ingestion.buffer.size` | gauge | `value` | `type` (execution/processor/log/metrics) | Write buffer depth — spikes mean ingestion is lagging |
|
||||
| `cameleer.ingestion.accumulator.pending` | gauge | `value` | — | Unfinalized execution chunks in the accumulator |
|
||||
| `cameleer.ingestion.drops` | counter | `count` | `reason` (buffer_full/no_agent/no_identity) | Dropped payloads. Any non-zero rate here is bad. |
|
||||
| `cameleer.ingestion.flush.duration` | timer | `count`, `total_time`/`total`, `max` | `type` (execution/processor/log) | Flush latency per type |
|
||||
|
||||
### Cameleer business metrics — deploy + auth
|
||||
|
||||
| Metric | Type | Statistic | Tags | Meaning |
|
||||
|---|---|---|---|---|
|
||||
| `cameleer.deployments.outcome` | counter | `count` | `status` (running/failed/degraded) | Deploy outcome tally since boot |
|
||||
| `cameleer.deployments.duration` | timer | `count`, `total_time`/`total`, `max` | — | End-to-end deploy latency |
|
||||
| `cameleer.auth.failures` | counter | `count` | `reason` (invalid_token/revoked/oidc_rejected) | Auth failure breakdown — watch for spikes |
|
||||
|
||||
### Alerting subsystem metrics
|
||||
|
||||
Source: `cameleer-server-app/.../alerting/metrics/AlertingMetrics.java`.
|
||||
|
||||
| Metric | Type | Statistic | Tags | Meaning |
|
||||
|---|---|---|---|---|
|
||||
| `alerting_rules_total` | gauge | `value` | `state` (enabled/disabled) | Cached 30 s from PostgreSQL `alert_rules` |
|
||||
| `alerting_instances_total` | gauge | `value` | `state` (firing/resolved/ack'd etc.) | Cached 30 s from PostgreSQL `alert_instances` |
|
||||
| `alerting_eval_errors_total` | counter | `count` | `kind` (condition kind) | Evaluator exceptions per kind |
|
||||
| `alerting_circuit_opened_total` | counter | `count` | `kind` | Circuit-breaker open transitions per kind |
|
||||
| `alerting_eval_duration_seconds` | timer | `count`, `total_time`/`total`, `max` | `kind` | Per-kind evaluation latency |
|
||||
| `alerting_webhook_delivery_duration_seconds` | timer | `count`, `total_time`/`total`, `max` | — | Outbound webhook POST latency |
|
||||
| `alerting_notifications_total` | counter | `count` | `status` (sent/failed/retry/giving_up) | Notification outcomes |
|
||||
|
||||
### JVM — memory, GC, threads, classes
|
||||
|
||||
From Spring Boot Actuator (`JvmMemoryMetrics`, `JvmGcMetrics`, `JvmThreadMetrics`, `ClassLoaderMetrics`).
|
||||
|
||||
| Metric | Type | Tags | Meaning |
|
||||
|---|---|---|---|
|
||||
| `jvm.memory.used` | gauge | `area` (heap/nonheap), `id` (pool name) | Bytes used per pool |
|
||||
| `jvm.memory.committed` | gauge | `area`, `id` | Bytes committed per pool |
|
||||
| `jvm.memory.max` | gauge | `area`, `id` | Pool max |
|
||||
| `jvm.memory.usage.after.gc` | gauge | `area`, `id` | Usage right after the last collection |
|
||||
| `jvm.buffer.memory.used` | gauge | `id` (direct/mapped) | NIO buffer bytes |
|
||||
| `jvm.buffer.count` | gauge | `id` | NIO buffer count |
|
||||
| `jvm.buffer.total.capacity` | gauge | `id` | NIO buffer capacity |
|
||||
| `jvm.threads.live` | gauge | — | Current live thread count |
|
||||
| `jvm.threads.daemon` | gauge | — | Current daemon thread count |
|
||||
| `jvm.threads.peak` | gauge | — | Peak thread count since start |
|
||||
| `jvm.threads.started` | counter | — | Cumulative threads started |
|
||||
| `jvm.threads.states` | gauge | `state` (runnable/blocked/waiting/…) | Threads per state |
|
||||
| `jvm.classes.loaded` | gauge | — | Currently-loaded classes |
|
||||
| `jvm.classes.unloaded` | counter | — | Cumulative unloaded classes |
|
||||
| `jvm.gc.pause` | timer | `action`, `cause` | Stop-the-world pause times — watch `max` |
|
||||
| `jvm.gc.concurrent.phase.time` | timer | `action`, `cause` | Concurrent-phase durations (G1/ZGC) |
|
||||
| `jvm.gc.memory.allocated` | counter | — | Bytes allocated in the young gen |
|
||||
| `jvm.gc.memory.promoted` | counter | — | Bytes promoted to old gen |
|
||||
| `jvm.gc.overhead` | gauge | — | Fraction of CPU spent in GC (0–1) |
|
||||
| `jvm.gc.live.data.size` | gauge | — | Live data after last collection |
|
||||
| `jvm.gc.max.data.size` | gauge | — | Max old-gen size |
|
||||
| `jvm.info` | gauge | `vendor`, `runtime`, `version` | Constant `1.0`; tags carry the real info |
|
||||
|
||||
### Process and system
|
||||
|
||||
| Metric | Type | Tags | Meaning |
|
||||
|---|---|---|---|
|
||||
| `process.cpu.usage` | gauge | — | CPU share consumed by this JVM (0–1) |
|
||||
| `process.cpu.time` | gauge | — | Cumulative CPU time (ns) |
|
||||
| `process.uptime` | gauge | — | ms since start |
|
||||
| `process.start.time` | gauge | — | Epoch start |
|
||||
| `process.files.open` | gauge | — | Open FDs |
|
||||
| `process.files.max` | gauge | — | FD ulimit |
|
||||
| `system.cpu.count` | gauge | — | Cores visible to the JVM |
|
||||
| `system.cpu.usage` | gauge | — | System-wide CPU (0–1) |
|
||||
| `system.load.average.1m` | gauge | — | 1-min load (Unix only) |
|
||||
| `disk.free` | gauge | `path` | Free bytes on the mount that holds the JAR |
|
||||
| `disk.total` | gauge | `path` | Total bytes |
|
||||
|
||||
### HTTP server
|
||||
|
||||
| Metric | Type | Tags | Meaning |
|
||||
|---|---|---|---|
|
||||
| `http.server.requests` | timer | `method`, `uri`, `status`, `outcome`, `exception` | Inbound HTTP: count, total_time/total, max |
|
||||
| `http.server.requests.active` | long_task_timer | `method`, `uri` | In-flight requests — `active_tasks` statistic |
|
||||
|
||||
`uri` is the Spring-templated path (`/api/v1/environments/{envSlug}/apps/{appSlug}`), not the raw URL — cardinality stays bounded.
|
||||
|
||||
### Tomcat
|
||||
|
||||
| Metric | Type | Tags | Meaning |
|
||||
|---|---|---|---|
|
||||
| `tomcat.sessions.active.current` | gauge | — | Currently active sessions |
|
||||
| `tomcat.sessions.active.max` | gauge | — | Max concurrent sessions observed |
|
||||
| `tomcat.sessions.alive.max` | gauge | — | Longest session lifetime (s) |
|
||||
| `tomcat.sessions.created` | counter | — | Cumulative session creates |
|
||||
| `tomcat.sessions.expired` | counter | — | Cumulative expirations |
|
||||
| `tomcat.sessions.rejected` | counter | — | Session creates refused |
|
||||
| `tomcat.threads.current` | gauge | `name` | Connector thread count |
|
||||
| `tomcat.threads.busy` | gauge | `name` | Connector threads currently serving a request |
|
||||
| `tomcat.threads.config.max` | gauge | `name` | Configured max |
|
||||
|
||||
### HikariCP (PostgreSQL pool)
|
||||
|
||||
| Metric | Type | Tags | Meaning |
|
||||
|---|---|---|---|
|
||||
| `hikaricp.connections` | gauge | `pool` | Total connections |
|
||||
| `hikaricp.connections.active` | gauge | `pool` | In-use |
|
||||
| `hikaricp.connections.idle` | gauge | `pool` | Idle |
|
||||
| `hikaricp.connections.pending` | gauge | `pool` | Threads waiting for a connection |
|
||||
| `hikaricp.connections.min` | gauge | `pool` | Configured min |
|
||||
| `hikaricp.connections.max` | gauge | `pool` | Configured max |
|
||||
| `hikaricp.connections.creation` | timer | `pool` | Time to open a new connection |
|
||||
| `hikaricp.connections.acquire` | timer | `pool` | Time to acquire from the pool |
|
||||
| `hikaricp.connections.usage` | timer | `pool` | Time a connection was in use |
|
||||
| `hikaricp.connections.timeout` | counter | `pool` | Pool acquisition timeouts — any non-zero rate is a problem |
|
||||
|
||||
Pools are named. You'll see `HikariPool-1` (PostgreSQL) and a separate pool for ClickHouse (`clickHouseJdbcTemplate`).
|
||||
|
||||
### JDBC generic
|
||||
|
||||
| Metric | Type | Tags | Meaning |
|
||||
|---|---|---|---|
|
||||
| `jdbc.connections.min` | gauge | `name` | Same data as Hikari, surfaced generically |
|
||||
| `jdbc.connections.max` | gauge | `name` | |
|
||||
| `jdbc.connections.active` | gauge | `name` | |
|
||||
| `jdbc.connections.idle` | gauge | `name` | |
|
||||
|
||||
### Logging
|
||||
|
||||
| Metric | Type | Tags | Meaning |
|
||||
|---|---|---|---|
|
||||
| `logback.events` | counter | `level` (error/warn/info/debug/trace) | Log events emitted since start — `{level=error}` is a useful panel |
|
||||
|
||||
### Spring Boot lifecycle
|
||||
|
||||
| Metric | Type | Tags | Meaning |
|
||||
|---|---|---|---|
|
||||
| `application.started.time` | timer | `main.application.class` | Cold-start duration |
|
||||
| `application.ready.time` | timer | `main.application.class` | Time to ready |
|
||||
|
||||
### Flyway
|
||||
|
||||
| Metric | Type | Tags | Meaning |
|
||||
|---|---|---|---|
|
||||
| `flyway.migrations` | gauge | — | Number of migrations applied (current schema) |
|
||||
|
||||
### Executor pools (if any `@Async` executors exist)
|
||||
|
||||
When a `ThreadPoolTaskExecutor` bean is registered and tagged, Micrometer adds:
|
||||
|
||||
| Metric | Type | Tags | Meaning |
|
||||
|---|---|---|---|
|
||||
| `executor.active` | gauge | `name` | Currently-running tasks |
|
||||
| `executor.queued` | gauge | `name` | Queued tasks |
|
||||
| `executor.queue.remaining` | gauge | `name` | Queue headroom |
|
||||
| `executor.pool.size` | gauge | `name` | Current pool size |
|
||||
| `executor.pool.core` | gauge | `name` | Core size |
|
||||
| `executor.pool.max` | gauge | `name` | Max size |
|
||||
| `executor.completed` | counter | `name` | Completed tasks |
|
||||
|
||||
---
|
||||
|
||||
## Suggested dashboard panels
|
||||
|
||||
Below are 17 panels, each expressed as a single `POST /api/v1/admin/server-metrics/query` body. Tenant is implicit in the JWT — the server filters by tenant server-side. `{from}` and `{to}` are dashboard variables.
|
||||
|
||||
### Row: server health (top of dashboard)
|
||||
|
||||
1. **Agents by state** — stacked area.
|
||||
```json
|
||||
{ "metric": "cameleer.agents.connected", "statistic": "value",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"groupByTags": ["state"], "aggregation": "avg", "mode": "raw" }
|
||||
```
|
||||
|
||||
2. **Ingestion buffer depth by type** — line chart.
|
||||
```json
|
||||
{ "metric": "cameleer.ingestion.buffer.size", "statistic": "value",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"groupByTags": ["type"], "aggregation": "avg", "mode": "raw" }
|
||||
```
|
||||
|
||||
3. **Ingestion drops per minute** — bar chart.
|
||||
```json
|
||||
{ "metric": "cameleer.ingestion.drops", "statistic": "count",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"groupByTags": ["reason"], "mode": "delta" }
|
||||
```
|
||||
|
||||
4. **Auth failures per minute** — same shape as drops, grouped by `reason`.
|
||||
```json
|
||||
{ "metric": "cameleer.auth.failures", "statistic": "count",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"groupByTags": ["reason"], "mode": "delta" }
|
||||
```
|
||||
|
||||
### Row: JVM
|
||||
|
||||
5. **Heap used vs committed vs max** — area chart (three overlay queries).
|
||||
```json
|
||||
{ "metric": "jvm.memory.used", "statistic": "value",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"filterTags": { "area": "heap" }, "aggregation": "sum", "mode": "raw" }
|
||||
```
|
||||
Repeat with `"metric": "jvm.memory.committed"` and `"metric": "jvm.memory.max"`.
|
||||
|
||||
6. **CPU %** — line.
|
||||
```json
|
||||
{ "metric": "process.cpu.usage", "statistic": "value",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60, "aggregation": "avg", "mode": "raw" }
|
||||
```
|
||||
Overlay with `"metric": "system.cpu.usage"`.
|
||||
|
||||
7. **GC pause — max per cause**.
|
||||
```json
|
||||
{ "metric": "jvm.gc.pause", "statistic": "max",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"groupByTags": ["cause"], "aggregation": "max", "mode": "raw" }
|
||||
```
|
||||
|
||||
8. **Thread count** — three overlay lines: `jvm.threads.live`, `jvm.threads.daemon`, `jvm.threads.peak` each with `statistic=value, aggregation=avg, mode=raw`.
|
||||
|
||||
### Row: HTTP + DB
|
||||
|
||||
9. **HTTP mean latency by URI** — top-N URIs.
|
||||
```json
|
||||
{ "metric": "http.server.requests", "statistic": "mean",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"groupByTags": ["uri"], "filterTags": { "outcome": "SUCCESS" },
|
||||
"aggregation": "avg", "mode": "raw" }
|
||||
```
|
||||
For p99 proxy, repeat with `"statistic": "max"`.
|
||||
|
||||
10. **HTTP error rate** — two queries, divide client-side: total requests and 5xx requests.
|
||||
```json
|
||||
{ "metric": "http.server.requests", "statistic": "count",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"mode": "delta", "aggregation": "sum" }
|
||||
```
|
||||
Then for the 5xx series, add `"filterTags": { "outcome": "SERVER_ERROR" }` and divide.
|
||||
|
||||
11. **HikariCP pool saturation** — overlay two queries.
|
||||
```json
|
||||
{ "metric": "hikaricp.connections.active", "statistic": "value",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"groupByTags": ["pool"], "aggregation": "avg", "mode": "raw" }
|
||||
```
|
||||
Overlay with `"metric": "hikaricp.connections.pending"`.
|
||||
|
||||
12. **Hikari acquire timeouts per minute**.
|
||||
```json
|
||||
{ "metric": "hikaricp.connections.timeout", "statistic": "count",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"groupByTags": ["pool"], "mode": "delta" }
|
||||
```
|
||||
|
||||
### Row: alerting (collapsible)
|
||||
|
||||
13. **Alerting instances by state** — stacked.
|
||||
```json
|
||||
{ "metric": "alerting_instances_total", "statistic": "value",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"groupByTags": ["state"], "aggregation": "avg", "mode": "raw" }
|
||||
```
|
||||
|
||||
14. **Eval errors per minute by kind**.
|
||||
```json
|
||||
{ "metric": "alerting_eval_errors_total", "statistic": "count",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"groupByTags": ["kind"], "mode": "delta" }
|
||||
```
|
||||
|
||||
15. **Webhook delivery — max per minute**.
|
||||
```json
|
||||
{ "metric": "alerting_webhook_delivery_duration_seconds", "statistic": "max",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 60,
|
||||
"aggregation": "max", "mode": "raw" }
|
||||
```
|
||||
|
||||
### Row: deployments (runtime-enabled only)
|
||||
|
||||
16. **Deploy outcomes per hour**.
|
||||
```json
|
||||
{ "metric": "cameleer.deployments.outcome", "statistic": "count",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 3600,
|
||||
"groupByTags": ["status"], "mode": "delta" }
|
||||
```
|
||||
|
||||
17. **Deploy duration mean**.
|
||||
```json
|
||||
{ "metric": "cameleer.deployments.duration", "statistic": "mean",
|
||||
"from": "{from}", "to": "{to}", "stepSeconds": 300,
|
||||
"aggregation": "avg", "mode": "raw" }
|
||||
```
|
||||
For p99 proxy, repeat with `"statistic": "max"`.
|
||||
|
||||
---
|
||||
|
||||
## Notes for the dashboard implementer
|
||||
|
||||
- **Use the REST API.** The server handles tenant filtering, counter deltas, range bounds, and input validation. Direct ClickHouse is a fallback for the handful of cases the generic query can't express.
|
||||
- **`total_time` vs `total`.** SimpleMeterRegistry and PrometheusMeterRegistry disagree on the tag value for Timer cumulative duration. The server uses PrometheusMeterRegistry in production, so expect `total_time`. The derived `statistic=mean` handles both transparently.
|
||||
- **Cardinality warning:** `http.server.requests` tags include `uri` and `status`. The server templates URIs, but if someone adds an endpoint that embeds a high-cardinality path segment without `@PathVariable`, you'll see explosion here. The API caps responses at 500 series; you'll get a 400 if you blow past it.
|
||||
- **The dashboard is read-only.** There's no write path — only the server writes into `server_metrics`.
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
- 2026-04-23 — initial write. Write-only backend.
|
||||
- 2026-04-23 — added generic REST API (`/api/v1/admin/server-metrics/{catalog,instances,query}`) so dashboards don't need direct ClickHouse access. All 17 suggested panels now expressed as single-endpoint queries.
|
||||
- 2026-04-24 — shipped the built-in `/admin/server-metrics` UI dashboard. Gated by `infrastructureendpoints` + ADMIN, identical visibility to `/admin/{database,clickhouse}`. Source: `ui/src/pages/Admin/ServerMetricsAdminPage.tsx`.
|
||||
- 2026-04-24 — dashboard now uses the global time-range control (`useGlobalFilters`) instead of a page-local picker. Bucket size auto-scales with the selected window (10 s → 1 h). Query hooks now take a `ServerMetricsRange = { from: Date; to: Date }` instead of a `windowSeconds` number so they work for any absolute or rolling range the TopBar supplies.
|
||||
File diff suppressed because one or more lines are too long
125
ui/src/api/queries/admin/serverMetrics.ts
Normal file
125
ui/src/api/queries/admin/serverMetrics.ts
Normal file
@@ -0,0 +1,125 @@
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { adminFetch } from './admin-api';
|
||||
import { useRefreshInterval } from '../use-refresh-interval';
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────
|
||||
|
||||
export interface ServerMetricCatalogEntry {
|
||||
metricName: string;
|
||||
metricType: string;
|
||||
statistics: string[];
|
||||
tagKeys: string[];
|
||||
}
|
||||
|
||||
export interface ServerInstanceInfo {
|
||||
serverInstanceId: string;
|
||||
firstSeen: string;
|
||||
lastSeen: string;
|
||||
}
|
||||
|
||||
export interface ServerMetricPoint {
|
||||
t: string;
|
||||
v: number;
|
||||
}
|
||||
|
||||
export interface ServerMetricSeries {
|
||||
tags: Record<string, string>;
|
||||
points: ServerMetricPoint[];
|
||||
}
|
||||
|
||||
export interface ServerMetricQueryResponse {
|
||||
metric: string;
|
||||
statistic: string;
|
||||
aggregation: string;
|
||||
mode: string;
|
||||
stepSeconds: number;
|
||||
series: ServerMetricSeries[];
|
||||
}
|
||||
|
||||
export interface ServerMetricQueryRequest {
|
||||
metric: string;
|
||||
statistic?: string | null;
|
||||
from: string;
|
||||
to: string;
|
||||
stepSeconds?: number | null;
|
||||
groupByTags?: string[] | null;
|
||||
filterTags?: Record<string, string> | null;
|
||||
aggregation?: string | null;
|
||||
mode?: string | null;
|
||||
serverInstanceIds?: string[] | null;
|
||||
}
|
||||
|
||||
// ── Range helper ───────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Time range driving every hook below. Callers pass the window they want
|
||||
* to render; the hooks never invent their own "now" — that's the job of
|
||||
* the global time-range control.
|
||||
*/
|
||||
export interface ServerMetricsRange {
|
||||
from: Date;
|
||||
to: Date;
|
||||
}
|
||||
|
||||
function serializeRange(range: ServerMetricsRange) {
|
||||
return {
|
||||
from: range.from.toISOString(),
|
||||
to: range.to.toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
// ── Query Hooks ────────────────────────────────────────────────────────
|
||||
|
||||
export function useServerMetricsCatalog(range: ServerMetricsRange) {
|
||||
const refetchInterval = useRefreshInterval(60_000);
|
||||
const { from, to } = serializeRange(range);
|
||||
return useQuery({
|
||||
queryKey: ['admin', 'server-metrics', 'catalog', from, to],
|
||||
queryFn: () => {
|
||||
const params = new URLSearchParams({ from, to });
|
||||
return adminFetch<ServerMetricCatalogEntry[]>(`/server-metrics/catalog?${params}`);
|
||||
},
|
||||
refetchInterval,
|
||||
});
|
||||
}
|
||||
|
||||
export function useServerMetricsInstances(range: ServerMetricsRange) {
|
||||
const refetchInterval = useRefreshInterval(60_000);
|
||||
const { from, to } = serializeRange(range);
|
||||
return useQuery({
|
||||
queryKey: ['admin', 'server-metrics', 'instances', from, to],
|
||||
queryFn: () => {
|
||||
const params = new URLSearchParams({ from, to });
|
||||
return adminFetch<ServerInstanceInfo[]>(`/server-metrics/instances?${params}`);
|
||||
},
|
||||
refetchInterval,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic time-series query against the server_metrics table.
|
||||
*
|
||||
* The caller owns the window — passing the globally-selected range keeps
|
||||
* every panel aligned with the app-wide time control and allows inspection
|
||||
* of historical windows, not just "last N seconds from now".
|
||||
*/
|
||||
export function useServerMetricsSeries(
|
||||
request: Omit<ServerMetricQueryRequest, 'from' | 'to'>,
|
||||
range: ServerMetricsRange,
|
||||
opts?: { enabled?: boolean },
|
||||
) {
|
||||
const refetchInterval = useRefreshInterval(30_000);
|
||||
const { from, to } = serializeRange(range);
|
||||
return useQuery({
|
||||
queryKey: ['admin', 'server-metrics', 'query', request, from, to],
|
||||
queryFn: () => {
|
||||
const body: ServerMetricQueryRequest = { ...request, from, to };
|
||||
return adminFetch<ServerMetricQueryResponse>('/server-metrics/query', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
},
|
||||
refetchInterval,
|
||||
enabled: opts?.enabled ?? true,
|
||||
});
|
||||
}
|
||||
200
ui/src/api/schema.d.ts
vendored
200
ui/src/api/schema.d.ts
vendored
@@ -1037,6 +1037,26 @@ export interface paths {
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/admin/server-metrics/query": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
get?: never;
|
||||
put?: never;
|
||||
/**
|
||||
* Generic time-series query
|
||||
* @description Returns bucketed series for a single metric_name. Supports aggregation (avg/sum/max/min/latest), group-by-tag, filter-by-tag, counter delta mode, and a derived 'mean' statistic for timers.
|
||||
*/
|
||||
post: operations["query"];
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/admin/roles": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
@@ -1556,7 +1576,7 @@ export interface paths {
|
||||
};
|
||||
/**
|
||||
* Find the latest diagram for this app's route in this environment
|
||||
* @description Resolves agents in this env for this app, then looks up the latest diagram for the route they reported. Env scope prevents a dev route from returning a prod diagram.
|
||||
* @description Returns the most recently stored diagram for (app, env, route). Independent of the agent registry, so routes removed from the current app version still resolve.
|
||||
*/
|
||||
get: operations["findByAppAndRoute"];
|
||||
put?: never;
|
||||
@@ -1912,6 +1932,46 @@ export interface paths {
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/admin/server-metrics/instances": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
/**
|
||||
* List server_instance_id values observed in the window
|
||||
* @description Returns first/last seen timestamps — use to partition counter-delta computations.
|
||||
*/
|
||||
get: operations["instances"];
|
||||
put?: never;
|
||||
post?: never;
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/admin/server-metrics/catalog": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
/**
|
||||
* List metric names observed in the window
|
||||
* @description For each metric_name, returns metric_type, the set of statistics emitted, and the union of tag keys.
|
||||
*/
|
||||
get: operations["catalog"];
|
||||
put?: never;
|
||||
post?: never;
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/admin/rbac/stats": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
@@ -2209,6 +2269,17 @@ export interface components {
|
||||
[key: string]: number;
|
||||
};
|
||||
sensitiveKeys?: string[];
|
||||
/** Format: int32 */
|
||||
exportBatchSize?: number;
|
||||
/** Format: int32 */
|
||||
exportQueueSize?: number;
|
||||
/** Format: int64 */
|
||||
exportFlushIntervalMs?: number;
|
||||
exportOverflowMode?: string;
|
||||
/** Format: int64 */
|
||||
exportBlockTimeoutMs?: number;
|
||||
/** Format: int32 */
|
||||
flushRecordThreshold?: number;
|
||||
};
|
||||
TapDefinition: {
|
||||
tapId?: string;
|
||||
@@ -2630,6 +2701,12 @@ export interface components {
|
||||
/** Format: date-time */
|
||||
createdAt?: string;
|
||||
};
|
||||
AttributeFilter: {
|
||||
key?: string;
|
||||
value?: string;
|
||||
keyOnly?: boolean;
|
||||
wildcard?: boolean;
|
||||
};
|
||||
SearchRequest: {
|
||||
status?: string;
|
||||
/** Format: date-time */
|
||||
@@ -2658,6 +2735,7 @@ export interface components {
|
||||
sortDir?: string;
|
||||
afterExecutionId?: string;
|
||||
environment?: string;
|
||||
attributeFilters?: components["schemas"]["AttributeFilter"][];
|
||||
};
|
||||
ExecutionSummary: {
|
||||
executionId: string;
|
||||
@@ -2967,6 +3045,42 @@ export interface components {
|
||||
SetPasswordRequest: {
|
||||
password?: string;
|
||||
};
|
||||
QueryBody: {
|
||||
metric?: string;
|
||||
statistic?: string;
|
||||
from?: string;
|
||||
to?: string;
|
||||
/** Format: int32 */
|
||||
stepSeconds?: number;
|
||||
groupByTags?: string[];
|
||||
filterTags?: {
|
||||
[key: string]: string;
|
||||
};
|
||||
aggregation?: string;
|
||||
mode?: string;
|
||||
serverInstanceIds?: string[];
|
||||
};
|
||||
ServerMetricPoint: {
|
||||
/** Format: date-time */
|
||||
t?: string;
|
||||
/** Format: double */
|
||||
v?: number;
|
||||
};
|
||||
ServerMetricQueryResponse: {
|
||||
metric?: string;
|
||||
statistic?: string;
|
||||
aggregation?: string;
|
||||
mode?: string;
|
||||
/** Format: int32 */
|
||||
stepSeconds?: number;
|
||||
series?: components["schemas"]["ServerMetricSeries"][];
|
||||
};
|
||||
ServerMetricSeries: {
|
||||
tags?: {
|
||||
[key: string]: string;
|
||||
};
|
||||
points?: components["schemas"]["ServerMetricPoint"][];
|
||||
};
|
||||
CreateRoleRequest: {
|
||||
name?: string;
|
||||
description?: string;
|
||||
@@ -3491,6 +3605,19 @@ export interface components {
|
||||
/** Format: int64 */
|
||||
avgDurationMs?: number;
|
||||
};
|
||||
ServerInstanceInfo: {
|
||||
serverInstanceId?: string;
|
||||
/** Format: date-time */
|
||||
firstSeen?: string;
|
||||
/** Format: date-time */
|
||||
lastSeen?: string;
|
||||
};
|
||||
ServerMetricCatalogEntry: {
|
||||
metricName?: string;
|
||||
metricType?: string;
|
||||
statistics?: string[];
|
||||
tagKeys?: string[];
|
||||
};
|
||||
SensitiveKeysConfig: {
|
||||
keys?: string[];
|
||||
};
|
||||
@@ -6246,6 +6373,30 @@ export interface operations {
|
||||
};
|
||||
};
|
||||
};
|
||||
query: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody: {
|
||||
content: {
|
||||
"application/json": components["schemas"]["QueryBody"];
|
||||
};
|
||||
};
|
||||
responses: {
|
||||
/** @description OK */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"*/*": components["schemas"]["ServerMetricQueryResponse"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
listRoles: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
@@ -7068,6 +7219,7 @@ export interface operations {
|
||||
agentId?: string;
|
||||
processorType?: string;
|
||||
application?: string;
|
||||
attr?: string[];
|
||||
offset?: number;
|
||||
limit?: number;
|
||||
sortField?: string;
|
||||
@@ -7822,6 +7974,52 @@ export interface operations {
|
||||
};
|
||||
};
|
||||
};
|
||||
instances: {
|
||||
parameters: {
|
||||
query?: {
|
||||
from?: string;
|
||||
to?: string;
|
||||
};
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody?: never;
|
||||
responses: {
|
||||
/** @description OK */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"*/*": components["schemas"]["ServerInstanceInfo"][];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
catalog: {
|
||||
parameters: {
|
||||
query?: {
|
||||
from?: string;
|
||||
to?: string;
|
||||
};
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody?: never;
|
||||
responses: {
|
||||
/** @description OK */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"*/*": components["schemas"]["ServerMetricCatalogEntry"][];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
getStats: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
|
||||
@@ -44,6 +44,7 @@ import { EnvironmentSwitcherModal } from './EnvironmentSwitcherModal';
|
||||
import { envColorVar } from './env-colors';
|
||||
import { useScope } from '../hooks/useScope';
|
||||
import { formatDuration } from '../utils/format-utils';
|
||||
import { parseFacetQuery, formatAttrParam } from '../utils/attribute-filter';
|
||||
import {
|
||||
buildAppTreeNodes,
|
||||
buildAdminTreeNodes,
|
||||
@@ -111,7 +112,11 @@ function buildSearchData(
|
||||
id: `attr-key-${key}`,
|
||||
category: 'attribute',
|
||||
title: key,
|
||||
meta: 'attribute key',
|
||||
meta: 'attribute key — filter list',
|
||||
// Path carries the facet in query-string form; handlePaletteSelect routes
|
||||
// attribute results to the current scope, so the leading segment below is
|
||||
// only used as a fallback when no scope is active.
|
||||
path: `/exchanges?attr=${encodeURIComponent(key)}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -690,7 +695,19 @@ function LayoutContent() {
|
||||
}
|
||||
}
|
||||
|
||||
return [...catalogRef.current, ...exchangeItems, ...attributeItems, ...alertingSearchData];
|
||||
const facet = parseFacetQuery(debouncedQuery ?? '');
|
||||
const facetItems: SearchResult[] =
|
||||
facet
|
||||
? [{
|
||||
id: `facet-${formatAttrParam(facet)}`,
|
||||
category: 'attribute' as const,
|
||||
title: `Filter: ${facet.key} = "${facet.value}"${facet.value?.includes('*') ? ' (wildcard)' : ''}`,
|
||||
meta: 'apply attribute filter',
|
||||
path: `/exchanges?attr=${encodeURIComponent(formatAttrParam(facet))}`,
|
||||
}]
|
||||
: [];
|
||||
|
||||
return [...facetItems, ...catalogRef.current, ...exchangeItems, ...attributeItems, ...alertingSearchData];
|
||||
}, [isAdminPage, catalogRef.current, exchangeResults, debouncedQuery, alertingSearchData]);
|
||||
|
||||
const searchData = isAdminPage ? adminSearchData : operationalSearchData;
|
||||
@@ -705,6 +722,7 @@ function LayoutContent() {
|
||||
oidc: 'OIDC',
|
||||
database: 'Database',
|
||||
clickhouse: 'ClickHouse',
|
||||
'server-metrics': 'Server Metrics',
|
||||
appconfig: 'App Config',
|
||||
};
|
||||
const parts = location.pathname.split('/').filter(Boolean);
|
||||
@@ -743,6 +761,32 @@ function LayoutContent() {
|
||||
setPaletteOpen(false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.category === 'attribute') {
|
||||
// Three sources feed 'attribute' results:
|
||||
// - buildSearchData → id `attr-key-<key>` (key-only)
|
||||
// - operationalSearchData per-exchange → id `<execId>-attr-<key>`, title `key = "value"`
|
||||
// - synthetic facet (Task 9) → id `facet-<serialized>` where <serialized> is already
|
||||
// the URL `attr=` form (`key` or `key:value`)
|
||||
let attrParam: string | null = null;
|
||||
if (typeof result.id === 'string' && result.id.startsWith('attr-key-')) {
|
||||
attrParam = result.id.substring('attr-key-'.length);
|
||||
} else if (typeof result.id === 'string' && result.id.startsWith('facet-')) {
|
||||
attrParam = result.id.substring('facet-'.length);
|
||||
} else if (typeof result.title === 'string') {
|
||||
const m = /^([a-zA-Z0-9._-]+)\s*=\s*"([^"]*)"/.exec(result.title);
|
||||
if (m) attrParam = `${m[1]}:${m[2]}`;
|
||||
}
|
||||
if (attrParam) {
|
||||
const base = ['/exchanges'];
|
||||
if (scope.appId) base.push(scope.appId);
|
||||
if (scope.routeId) base.push(scope.routeId);
|
||||
navigate(`${base.join('/')}?attr=${encodeURIComponent(attrParam)}`);
|
||||
}
|
||||
setPaletteOpen(false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.path) {
|
||||
if (ADMIN_CATEGORIES.has(result.category)) {
|
||||
const itemId = result.id.split(':').slice(1).join(':');
|
||||
@@ -751,7 +795,7 @@ function LayoutContent() {
|
||||
});
|
||||
} else {
|
||||
const state: Record<string, unknown> = { sidebarReveal: result.path };
|
||||
if (result.category === 'exchange' || result.category === 'attribute') {
|
||||
if (result.category === 'exchange') {
|
||||
const parts = result.path.split('/').filter(Boolean);
|
||||
if (parts.length === 4 && parts[0] === 'exchanges') {
|
||||
state.selectedExchange = {
|
||||
@@ -765,7 +809,7 @@ function LayoutContent() {
|
||||
}
|
||||
}
|
||||
setPaletteOpen(false);
|
||||
}, [navigate, setPaletteOpen]);
|
||||
}, [navigate, setPaletteOpen, scope.appId, scope.routeId]);
|
||||
|
||||
const handlePaletteSubmit = useCallback((query: string) => {
|
||||
if (isAdminPage) {
|
||||
@@ -779,12 +823,18 @@ function LayoutContent() {
|
||||
} else {
|
||||
navigate('/admin/rbac');
|
||||
}
|
||||
} else {
|
||||
const baseParts = ['/exchanges'];
|
||||
if (scope.appId) baseParts.push(scope.appId);
|
||||
if (scope.routeId) baseParts.push(scope.routeId);
|
||||
navigate(`${baseParts.join('/')}?text=${encodeURIComponent(query)}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const facet = parseFacetQuery(query);
|
||||
const baseParts = ['/exchanges'];
|
||||
if (scope.appId) baseParts.push(scope.appId);
|
||||
if (scope.routeId) baseParts.push(scope.routeId);
|
||||
if (facet) {
|
||||
navigate(`${baseParts.join('/')}?attr=${encodeURIComponent(formatAttrParam(facet))}`);
|
||||
return;
|
||||
}
|
||||
navigate(`${baseParts.join('/')}?text=${encodeURIComponent(query)}`);
|
||||
}, [isAdminPage, adminSearchData, handlePaletteSelect, navigate, scope.appId, scope.routeId]);
|
||||
|
||||
const handleSidebarNavigate = useCallback((path: string) => {
|
||||
|
||||
@@ -110,6 +110,7 @@ export function buildAdminTreeNodes(opts?: { infrastructureEndpoints?: boolean }
|
||||
{ id: 'admin:oidc', label: 'OIDC', path: '/admin/oidc' },
|
||||
{ id: 'admin:outbound-connections', label: 'Outbound Connections', path: '/admin/outbound-connections' },
|
||||
{ id: 'admin:sensitive-keys', label: 'Sensitive Keys', path: '/admin/sensitive-keys' },
|
||||
...(showInfra ? [{ id: 'admin:server-metrics', label: 'Server Metrics', path: '/admin/server-metrics' }] : []),
|
||||
{ id: 'admin:rbac', label: 'Users & Roles', path: '/admin/rbac' },
|
||||
];
|
||||
return nodes;
|
||||
|
||||
81
ui/src/pages/Admin/ServerMetricsAdminPage.module.css
Normal file
81
ui/src/pages/Admin/ServerMetricsAdminPage.module.css
Normal file
@@ -0,0 +1,81 @@
|
||||
.page {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 24px;
|
||||
}
|
||||
|
||||
.toolbar {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.instanceStrip {
|
||||
display: flex;
|
||||
gap: 6px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.row {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 14px;
|
||||
}
|
||||
|
||||
.rowTriple {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr 1fr;
|
||||
gap: 14px;
|
||||
}
|
||||
|
||||
.sectionTitle {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 10px;
|
||||
margin: 4px 0 4px 2px;
|
||||
color: var(--text-primary);
|
||||
font-size: 13px;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.04em;
|
||||
}
|
||||
|
||||
.sectionSubtitle {
|
||||
color: var(--text-muted);
|
||||
font-weight: 400;
|
||||
font-size: 12px;
|
||||
text-transform: none;
|
||||
letter-spacing: 0;
|
||||
}
|
||||
|
||||
.chartHeader {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.chartTitle {
|
||||
font-size: 13px;
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.chartMeta {
|
||||
font-size: 11px;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
/* Tighten chart card internals for denser grid */
|
||||
.compactCard {
|
||||
padding: 14px;
|
||||
}
|
||||
|
||||
@media (max-width: 1100px) {
|
||||
.rowTriple,
|
||||
.row {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
478
ui/src/pages/Admin/ServerMetricsAdminPage.tsx
Normal file
478
ui/src/pages/Admin/ServerMetricsAdminPage.tsx
Normal file
@@ -0,0 +1,478 @@
|
||||
import { useMemo } from 'react';
|
||||
import {
|
||||
ThemedChart, Area, Line, CHART_COLORS,
|
||||
Badge, EmptyState, Spinner, useGlobalFilters,
|
||||
} from '@cameleer/design-system';
|
||||
import {
|
||||
useServerMetricsCatalog,
|
||||
useServerMetricsInstances,
|
||||
useServerMetricsSeries,
|
||||
type ServerMetricQueryResponse,
|
||||
type ServerMetricSeries,
|
||||
type ServerMetricsRange,
|
||||
} from '../../api/queries/admin/serverMetrics';
|
||||
import chartCardStyles from '../../styles/chart-card.module.css';
|
||||
import styles from './ServerMetricsAdminPage.module.css';
|
||||
|
||||
// ── Step picker ────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Choose a bucket width that keeps the rendered series readable regardless
|
||||
* of the window size the global time-range control hands us.
|
||||
*
|
||||
* Targets roughly 30–120 points per series — any denser and the chart
|
||||
* becomes a blur; any sparser and short windows look empty. Clamped to the
|
||||
* [10, 3600] range the backend accepts.
|
||||
*/
|
||||
function stepSecondsFor(windowSeconds: number): number {
|
||||
if (windowSeconds <= 30 * 60) return 10; // ≤ 30 min → 10 s buckets
|
||||
if (windowSeconds <= 2 * 60 * 60) return 60; // ≤ 2 h → 1 min
|
||||
if (windowSeconds <= 12 * 60 * 60) return 300; // ≤ 12 h → 5 min
|
||||
if (windowSeconds <= 48 * 60 * 60) return 900; // ≤ 48 h → 15 min
|
||||
return 3600; // longer → 1 h
|
||||
}
|
||||
|
||||
// ── Panel component ────────────────────────────────────────────────────
|
||||
|
||||
interface PanelProps {
|
||||
title: string;
|
||||
subtitle?: string;
|
||||
metric: string;
|
||||
statistic?: string;
|
||||
groupByTags?: string[];
|
||||
filterTags?: Record<string, string>;
|
||||
aggregation?: string;
|
||||
mode?: 'raw' | 'delta';
|
||||
yLabel?: string;
|
||||
asArea?: boolean;
|
||||
range: ServerMetricsRange;
|
||||
stepSeconds: number;
|
||||
formatValue?: (v: number) => string;
|
||||
}
|
||||
|
||||
function Panel({
|
||||
title, subtitle, metric, statistic, groupByTags, filterTags,
|
||||
aggregation, mode = 'raw', yLabel, asArea = false,
|
||||
range, stepSeconds, formatValue,
|
||||
}: PanelProps) {
|
||||
const { data, isLoading, isError, error } = useServerMetricsSeries(
|
||||
{ metric, statistic, groupByTags, filterTags, aggregation, mode, stepSeconds },
|
||||
range,
|
||||
);
|
||||
|
||||
return (
|
||||
<div className={`${chartCardStyles.chartCard} ${styles.compactCard}`}>
|
||||
<div className={styles.chartHeader}>
|
||||
<span className={styles.chartTitle}>{title}</span>
|
||||
{subtitle && <span className={styles.chartMeta}>{subtitle}</span>}
|
||||
</div>
|
||||
<PanelBody
|
||||
data={data}
|
||||
loading={isLoading}
|
||||
error={isError ? (error as Error | null)?.message ?? 'query failed' : null}
|
||||
yLabel={yLabel}
|
||||
asArea={asArea}
|
||||
formatValue={formatValue}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function PanelBody({
|
||||
data, loading, error, yLabel, asArea, formatValue,
|
||||
}: {
|
||||
data: ServerMetricQueryResponse | undefined;
|
||||
loading: boolean;
|
||||
error: string | null;
|
||||
yLabel?: string;
|
||||
asArea?: boolean;
|
||||
formatValue?: (v: number) => string;
|
||||
}) {
|
||||
const points = useMemo(() => flatten(data?.series ?? []), [data]);
|
||||
|
||||
if (loading) {
|
||||
return <div style={{ minHeight: 160, display: 'grid', placeItems: 'center' }}>
|
||||
<Spinner />
|
||||
</div>;
|
||||
}
|
||||
if (error) {
|
||||
return <EmptyState title="Query failed" description={error} />;
|
||||
}
|
||||
if (!data || data.series.length === 0 || points.rows.length === 0) {
|
||||
return <EmptyState title="No data" description="No samples in the selected window" />;
|
||||
}
|
||||
|
||||
return (
|
||||
<ThemedChart data={points.rows} height={180} xDataKey="t" xTickFormatter={formatTime}
|
||||
yLabel={yLabel} yTickFormatter={formatValue}>
|
||||
{points.seriesKeys.map((key, idx) => {
|
||||
const color = CHART_COLORS[idx % CHART_COLORS.length];
|
||||
return asArea ? (
|
||||
<Area key={key} dataKey={key} name={key} stroke={color} fill={color}
|
||||
fillOpacity={0.18} strokeWidth={2} dot={false} />
|
||||
) : (
|
||||
<Line key={key} dataKey={key} name={key} stroke={color} strokeWidth={2} dot={false} />
|
||||
);
|
||||
})}
|
||||
</ThemedChart>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn ServerMetricSeries[] into a single array of rows keyed by series label.
|
||||
* Multiple series become overlapping lines on the same time axis; buckets are
|
||||
* merged on `t` so Recharts can render them as one dataset.
|
||||
*/
|
||||
function flatten(series: ServerMetricSeries[]): { rows: Array<Record<string, number | string>>; seriesKeys: string[] } {
|
||||
if (series.length === 0) return { rows: [], seriesKeys: [] };
|
||||
|
||||
const seriesKeys = series.map(seriesLabel);
|
||||
const rowsByTime = new Map<string, Record<string, number | string>>();
|
||||
series.forEach((s, i) => {
|
||||
const key = seriesKeys[i];
|
||||
for (const p of s.points) {
|
||||
let row = rowsByTime.get(p.t);
|
||||
if (!row) {
|
||||
row = { t: p.t };
|
||||
rowsByTime.set(p.t, row);
|
||||
}
|
||||
row[key] = p.v;
|
||||
}
|
||||
});
|
||||
const rows = Array.from(rowsByTime.values()).sort((a, b) =>
|
||||
(a.t as string).localeCompare(b.t as string));
|
||||
return { rows, seriesKeys };
|
||||
}
|
||||
|
||||
function seriesLabel(s: ServerMetricSeries): string {
|
||||
const entries = Object.entries(s.tags);
|
||||
if (entries.length === 0) return 'value';
|
||||
return entries.map(([k, v]) => `${k}=${v}`).join(' · ');
|
||||
}
|
||||
|
||||
function formatTime(iso: string | number): string {
|
||||
const d = typeof iso === 'number' ? new Date(iso) : new Date(String(iso));
|
||||
return d.toLocaleTimeString(undefined, { hour: '2-digit', minute: '2-digit' });
|
||||
}
|
||||
|
||||
function formatMB(bytes: number): string {
|
||||
return `${(bytes / (1024 * 1024)).toFixed(0)} MB`;
|
||||
}
|
||||
|
||||
function formatPct(frac: number): string {
|
||||
return `${(frac * 100).toFixed(0)}%`;
|
||||
}
|
||||
|
||||
// ── Page ───────────────────────────────────────────────────────────────
|
||||
|
||||
export default function ServerMetricsAdminPage() {
|
||||
// Drive the entire page from the global time-range control in the TopBar.
|
||||
const { timeRange } = useGlobalFilters();
|
||||
const range: ServerMetricsRange = useMemo(
|
||||
() => ({ from: timeRange.start, to: timeRange.end }),
|
||||
[timeRange.start, timeRange.end],
|
||||
);
|
||||
const windowSeconds = Math.max(
|
||||
1,
|
||||
Math.round((range.to.getTime() - range.from.getTime()) / 1000),
|
||||
);
|
||||
const stepSeconds = stepSecondsFor(windowSeconds);
|
||||
|
||||
const { data: catalog } = useServerMetricsCatalog(range);
|
||||
const { data: instances } = useServerMetricsInstances(range);
|
||||
|
||||
const has = (metricName: string) =>
|
||||
(catalog ?? []).some((c) => c.metricName === metricName);
|
||||
|
||||
return (
|
||||
<div className={styles.page}>
|
||||
{/* Toolbar — just server-instance badges. Time range is driven by
|
||||
the global time-range control in the TopBar. */}
|
||||
<div className={styles.toolbar}>
|
||||
<div className={styles.instanceStrip}>
|
||||
{(instances ?? []).slice(0, 8).map((i) => (
|
||||
<Badge key={i.serverInstanceId} label={i.serverInstanceId} variant="outlined" />
|
||||
))}
|
||||
{(instances ?? []).length > 8 && (
|
||||
<Badge label={`+${(instances ?? []).length - 8}`} variant="outlined" />
|
||||
)}
|
||||
{(instances ?? []).length === 0 && (
|
||||
<Badge label="no samples in window" variant="outlined" />
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Row 1: Server health */}
|
||||
<section>
|
||||
<div className={styles.sectionTitle}>
|
||||
Server health
|
||||
<span className={styles.sectionSubtitle}>agents, ingestion, auth</span>
|
||||
</div>
|
||||
<div className={styles.row}>
|
||||
<Panel
|
||||
title="Agents by state"
|
||||
subtitle="stacked area"
|
||||
metric="cameleer.agents.connected"
|
||||
statistic="value"
|
||||
groupByTags={['state']}
|
||||
aggregation="avg"
|
||||
asArea
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
<Panel
|
||||
title="Ingestion buffer depth"
|
||||
subtitle="by type"
|
||||
metric="cameleer.ingestion.buffer.size"
|
||||
statistic="value"
|
||||
groupByTags={['type']}
|
||||
aggregation="avg"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
</div>
|
||||
<div className={styles.row} style={{ marginTop: 14 }}>
|
||||
<Panel
|
||||
title="Ingestion drops / interval"
|
||||
subtitle="per-bucket delta"
|
||||
metric="cameleer.ingestion.drops"
|
||||
statistic="count"
|
||||
groupByTags={['reason']}
|
||||
mode="delta"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
<Panel
|
||||
title="Auth failures / interval"
|
||||
subtitle="per-bucket delta"
|
||||
metric="cameleer.auth.failures"
|
||||
statistic="count"
|
||||
groupByTags={['reason']}
|
||||
mode="delta"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
{/* Row 2: JVM */}
|
||||
<section>
|
||||
<div className={styles.sectionTitle}>
|
||||
JVM
|
||||
<span className={styles.sectionSubtitle}>memory, CPU, threads, GC</span>
|
||||
</div>
|
||||
<div className={styles.rowTriple}>
|
||||
<Panel
|
||||
title="Heap used"
|
||||
subtitle="sum across pools"
|
||||
metric="jvm.memory.used"
|
||||
statistic="value"
|
||||
filterTags={{ area: 'heap' }}
|
||||
aggregation="sum"
|
||||
asArea
|
||||
yLabel="MB"
|
||||
formatValue={formatMB}
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
<Panel
|
||||
title="CPU usage"
|
||||
subtitle="process + system"
|
||||
metric="process.cpu.usage"
|
||||
statistic="value"
|
||||
aggregation="avg"
|
||||
asArea
|
||||
yLabel="%"
|
||||
formatValue={formatPct}
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
<Panel
|
||||
title="GC pause max"
|
||||
subtitle="by cause"
|
||||
metric="jvm.gc.pause"
|
||||
statistic="max"
|
||||
groupByTags={['cause']}
|
||||
aggregation="max"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
</div>
|
||||
<div className={styles.row} style={{ marginTop: 14 }}>
|
||||
<Panel
|
||||
title="Thread count"
|
||||
subtitle="live threads"
|
||||
metric="jvm.threads.live"
|
||||
statistic="value"
|
||||
aggregation="avg"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
<Panel
|
||||
title="Heap committed vs max"
|
||||
subtitle="sum across pools"
|
||||
metric="jvm.memory.committed"
|
||||
statistic="value"
|
||||
filterTags={{ area: 'heap' }}
|
||||
aggregation="sum"
|
||||
yLabel="MB"
|
||||
formatValue={formatMB}
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
{/* Row 3: HTTP + DB */}
|
||||
<section>
|
||||
<div className={styles.sectionTitle}>
|
||||
HTTP & DB pools
|
||||
<span className={styles.sectionSubtitle}>requests, Hikari saturation</span>
|
||||
</div>
|
||||
<div className={styles.rowTriple}>
|
||||
<Panel
|
||||
title="HTTP latency — mean by URI"
|
||||
subtitle="SUCCESS only"
|
||||
metric="http.server.requests"
|
||||
statistic="mean"
|
||||
groupByTags={['uri']}
|
||||
filterTags={{ outcome: 'SUCCESS' }}
|
||||
aggregation="avg"
|
||||
yLabel="s"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
<Panel
|
||||
title="HTTP requests / interval"
|
||||
subtitle="all outcomes"
|
||||
metric="http.server.requests"
|
||||
statistic="count"
|
||||
mode="delta"
|
||||
aggregation="sum"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
<Panel
|
||||
title="Hikari pool — active vs pending"
|
||||
subtitle="by pool"
|
||||
metric="hikaricp.connections.active"
|
||||
statistic="value"
|
||||
groupByTags={['pool']}
|
||||
aggregation="avg"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
</div>
|
||||
<div className={styles.row} style={{ marginTop: 14 }}>
|
||||
<Panel
|
||||
title="Hikari acquire timeouts"
|
||||
subtitle="per-bucket delta"
|
||||
metric="hikaricp.connections.timeout"
|
||||
statistic="count"
|
||||
groupByTags={['pool']}
|
||||
mode="delta"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
<Panel
|
||||
title="Log events by level"
|
||||
subtitle="per-bucket delta"
|
||||
metric="logback.events"
|
||||
statistic="count"
|
||||
groupByTags={['level']}
|
||||
mode="delta"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
{/* Row 4: Alerting */}
|
||||
{(has('alerting_instances_total')
|
||||
|| has('alerting_eval_errors_total')
|
||||
|| has('alerting_webhook_delivery_duration_seconds')) && (
|
||||
<section>
|
||||
<div className={styles.sectionTitle}>
|
||||
Alerting
|
||||
<span className={styles.sectionSubtitle}>instances, eval errors, webhook delivery</span>
|
||||
</div>
|
||||
<div className={styles.rowTriple}>
|
||||
{has('alerting_instances_total') && (
|
||||
<Panel
|
||||
title="Alert instances by state"
|
||||
subtitle="stacked"
|
||||
metric="alerting_instances_total"
|
||||
statistic="value"
|
||||
groupByTags={['state']}
|
||||
aggregation="avg"
|
||||
asArea
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
)}
|
||||
{has('alerting_eval_errors_total') && (
|
||||
<Panel
|
||||
title="Eval errors / interval"
|
||||
subtitle="by kind"
|
||||
metric="alerting_eval_errors_total"
|
||||
statistic="count"
|
||||
groupByTags={['kind']}
|
||||
mode="delta"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
)}
|
||||
{has('alerting_webhook_delivery_duration_seconds') && (
|
||||
<Panel
|
||||
title="Webhook delivery max"
|
||||
subtitle="max latency per bucket"
|
||||
metric="alerting_webhook_delivery_duration_seconds"
|
||||
statistic="max"
|
||||
aggregation="max"
|
||||
yLabel="s"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</section>
|
||||
)}
|
||||
|
||||
{/* Row 5: Deployments (only when runtime orchestration is enabled) */}
|
||||
{(has('cameleer.deployments.outcome') || has('cameleer.deployments.duration')) && (
|
||||
<section>
|
||||
<div className={styles.sectionTitle}>
|
||||
Deployments
|
||||
<span className={styles.sectionSubtitle}>outcomes, duration</span>
|
||||
</div>
|
||||
<div className={styles.row}>
|
||||
{has('cameleer.deployments.outcome') && (
|
||||
<Panel
|
||||
title="Deploy outcomes / interval"
|
||||
subtitle="by status"
|
||||
metric="cameleer.deployments.outcome"
|
||||
statistic="count"
|
||||
groupByTags={['status']}
|
||||
mode="delta"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
)}
|
||||
{has('cameleer.deployments.duration') && (
|
||||
<Panel
|
||||
title="Deploy duration mean"
|
||||
subtitle="total_time / count"
|
||||
metric="cameleer.deployments.duration"
|
||||
statistic="mean"
|
||||
aggregation="avg"
|
||||
yLabel="s"
|
||||
range={range}
|
||||
stepSeconds={stepSeconds}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</section>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
import { useState, useEffect, useRef } from 'react';
|
||||
import { useParams, useLocation, useNavigate } from 'react-router';
|
||||
import { useQueryClient } from '@tanstack/react-query';
|
||||
import { AlertDialog, Badge, Button, Tabs, useToast } from '@cameleer/design-system';
|
||||
import { AlertDialog, Badge, Button, StatusDot, Tabs, useToast } from '@cameleer/design-system';
|
||||
import { useEnvironmentStore } from '../../../api/environment-store';
|
||||
import { useEnvironments } from '../../../api/queries/admin/environments';
|
||||
import {
|
||||
@@ -39,6 +39,16 @@ import styles from './AppDeploymentPage.module.css';
|
||||
|
||||
type TabKey = 'monitoring' | 'resources' | 'variables' | 'sensitive-keys' | 'deployment' | 'traces' | 'recording';
|
||||
|
||||
const STATUS_COLORS: Record<string, 'success' | 'warning' | 'error' | 'auto' | 'running'> = {
|
||||
RUNNING: 'running', STARTING: 'warning', FAILED: 'error', STOPPED: 'auto',
|
||||
DEGRADED: 'warning', STOPPING: 'auto',
|
||||
};
|
||||
|
||||
const DEPLOY_STATUS_DOT: Record<string, 'live' | 'stale' | 'dead' | 'success' | 'warning' | 'error' | 'running'> = {
|
||||
RUNNING: 'live', STARTING: 'running', DEGRADED: 'stale',
|
||||
STOPPING: 'stale', STOPPED: 'dead', FAILED: 'error',
|
||||
};
|
||||
|
||||
function slugify(name: string): string {
|
||||
return name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '').substring(0, 100);
|
||||
}
|
||||
@@ -393,9 +403,35 @@ export default function AppDeploymentPage() {
|
||||
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', gap: 8 }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: 10 }}>
|
||||
<h2 style={{ margin: 0 }}>{app ? app.displayName : 'Create Application'}</h2>
|
||||
{app && !deploymentInProgress && (dirty.anyLocalEdit || serverDirtyAgainstDeploy) && (
|
||||
<Badge label="Pending deploy" color="warning" />
|
||||
{app && latestDeployment && (
|
||||
<span style={{ display: 'inline-flex', alignItems: 'center', gap: 5 }}>
|
||||
<StatusDot variant={DEPLOY_STATUS_DOT[latestDeployment.status] ?? 'dead'} />
|
||||
<Badge
|
||||
label={latestDeployment.status}
|
||||
color={STATUS_COLORS[latestDeployment.status] ?? 'auto'}
|
||||
/>
|
||||
</span>
|
||||
)}
|
||||
{app && !deploymentInProgress && (dirty.anyLocalEdit || serverDirtyAgainstDeploy) && (() => {
|
||||
const diffs = dirtyState?.differences ?? [];
|
||||
const noSnapshot = diffs.length === 1 && diffs[0].field === 'snapshot';
|
||||
const tooltip = dirty.anyLocalEdit
|
||||
? 'Local edits not yet saved — see tabs marked with *.'
|
||||
: noSnapshot
|
||||
? 'No successful deployment recorded for this app yet.'
|
||||
: diffs.length > 0
|
||||
? `Differs from last successful deploy:\n` +
|
||||
diffs.map((d) => `• ${d.field}\n staged: ${d.staged}\n deployed: ${d.deployed}`).join('\n')
|
||||
: 'Server reports config differs from last successful deploy.';
|
||||
return (
|
||||
<span title={tooltip} style={{ display: 'inline-flex' }}>
|
||||
<Badge
|
||||
label={dirty.anyLocalEdit ? 'Pending deploy' : `Pending deploy (${diffs.length})`}
|
||||
color="warning"
|
||||
/>
|
||||
</span>
|
||||
);
|
||||
})()}
|
||||
</div>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
|
||||
{dirty.anyLocalEdit && (
|
||||
|
||||
@@ -139,3 +139,23 @@
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.attrChip {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
margin-left: 8px;
|
||||
padding: 2px 8px;
|
||||
background: var(--bg-hover);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 10px;
|
||||
font-size: 11px;
|
||||
font-family: var(--font-mono);
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.attrChip code {
|
||||
background: transparent;
|
||||
font-family: inherit;
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
|
||||
@@ -15,6 +15,8 @@ import {
|
||||
import { useEnvironmentStore } from '../../api/environment-store'
|
||||
import type { ExecutionSummary } from '../../api/types'
|
||||
import { attributeBadgeColor } from '../../utils/attribute-color'
|
||||
import { parseAttrParam, formatAttrParam } from '../../utils/attribute-filter';
|
||||
import type { AttributeFilter } from '../../utils/attribute-filter';
|
||||
import { formatDuration, statusLabel } from '../../utils/format-utils'
|
||||
import styles from './Dashboard.module.css'
|
||||
import tableStyles from '../../styles/table-section.module.css'
|
||||
@@ -84,7 +86,7 @@ function buildColumns(hasAttributes: boolean): Column<Row>[] {
|
||||
<div className={styles.attrCell}>
|
||||
{shown.map(([k, v]) => (
|
||||
<span key={k} title={k}>
|
||||
<Badge label={String(v)} color={attributeBadgeColor(String(v))} />
|
||||
<Badge label={String(v)} color={attributeBadgeColor(k)} />
|
||||
</span>
|
||||
))}
|
||||
{overflow > 0 && <span className={styles.attrOverflow}>+{overflow}</span>}
|
||||
@@ -147,6 +149,12 @@ export default function Dashboard({ onExchangeSelect, activeExchangeId }: Dashbo
|
||||
const navigate = useNavigate()
|
||||
const [searchParams, setSearchParams] = useSearchParams()
|
||||
const textFilter = searchParams.get('text') || undefined
|
||||
const attributeFilters = useMemo<AttributeFilter[]>(
|
||||
() => searchParams.getAll('attr')
|
||||
.map(parseAttrParam)
|
||||
.filter((f): f is AttributeFilter => f != null),
|
||||
[searchParams],
|
||||
);
|
||||
const [selectedId, setSelectedId] = useState<string | undefined>(activeExchangeId)
|
||||
const [sortField, setSortField] = useState<string>('startTime')
|
||||
const [sortDir, setSortDir] = useState<'asc' | 'desc'>('desc')
|
||||
@@ -180,12 +188,13 @@ export default function Dashboard({ onExchangeSelect, activeExchangeId }: Dashbo
|
||||
environment: selectedEnv,
|
||||
status: statusParam,
|
||||
text: textFilter,
|
||||
attributeFilters: attributeFilters.length > 0 ? attributeFilters : undefined,
|
||||
sortField,
|
||||
sortDir,
|
||||
offset: 0,
|
||||
limit: textFilter ? 200 : 50,
|
||||
limit: textFilter || attributeFilters.length > 0 ? 200 : 50,
|
||||
},
|
||||
!textFilter,
|
||||
!textFilter && attributeFilters.length === 0,
|
||||
)
|
||||
|
||||
// ─── Rows ────────────────────────────────────────────────────────────────
|
||||
@@ -221,17 +230,46 @@ export default function Dashboard({ onExchangeSelect, activeExchangeId }: Dashbo
|
||||
<div className={`${tableStyles.tableSection} ${styles.tableWrap}`}>
|
||||
<div className={tableStyles.tableHeader}>
|
||||
<span className={tableStyles.tableTitle}>
|
||||
{textFilter ? (
|
||||
{textFilter || attributeFilters.length > 0 ? (
|
||||
<>
|
||||
<Search size={14} style={{ marginRight: 4, verticalAlign: -2 }} />
|
||||
Search: “{textFilter}”
|
||||
<button
|
||||
className={styles.clearSearch}
|
||||
onClick={() => setSearchParams({})}
|
||||
title="Clear search"
|
||||
>
|
||||
<X size={12} />
|
||||
</button>
|
||||
{textFilter && (
|
||||
<>
|
||||
Search: “{textFilter}”
|
||||
<button
|
||||
className={styles.clearSearch}
|
||||
onClick={() => {
|
||||
const next = new URLSearchParams(searchParams);
|
||||
next.delete('text');
|
||||
setSearchParams(next);
|
||||
}}
|
||||
title="Clear text search"
|
||||
>
|
||||
<X size={12} />
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
{attributeFilters.map((f, i) => (
|
||||
<span key={`${f.key}:${f.value ?? ''}:${i}`} className={styles.attrChip}>
|
||||
{f.value === undefined
|
||||
? <>has <code>{f.key}</code></>
|
||||
: <><code>{f.key}</code> = <code>{f.value}</code></>}
|
||||
<button
|
||||
className={styles.clearSearch}
|
||||
onClick={() => {
|
||||
const next = new URLSearchParams(searchParams);
|
||||
const remaining = next.getAll('attr')
|
||||
.filter(a => a !== formatAttrParam(f));
|
||||
next.delete('attr');
|
||||
remaining.forEach(a => next.append('attr', a));
|
||||
setSearchParams(next);
|
||||
}}
|
||||
title="Remove filter"
|
||||
>
|
||||
<X size={12} />
|
||||
</button>
|
||||
</span>
|
||||
))}
|
||||
</>
|
||||
) : 'Recent Exchanges'}
|
||||
</span>
|
||||
@@ -239,7 +277,7 @@ export default function Dashboard({ onExchangeSelect, activeExchangeId }: Dashbo
|
||||
<span className={tableStyles.tableMeta}>
|
||||
{rows.length.toLocaleString()} of {(searchResult?.total ?? 0).toLocaleString()} exchanges
|
||||
</span>
|
||||
{!textFilter && <Badge label="AUTO" color="success" />}
|
||||
{!textFilter && attributeFilters.length === 0 && <Badge label="AUTO" color="success" />}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ const AuditLogPage = lazy(() => import('./pages/Admin/AuditLogPage'));
|
||||
const OidcConfigPage = lazy(() => import('./pages/Admin/OidcConfigPage'));
|
||||
const DatabaseAdminPage = lazy(() => import('./pages/Admin/DatabaseAdminPage'));
|
||||
const ClickHouseAdminPage = lazy(() => import('./pages/Admin/ClickHouseAdminPage'));
|
||||
const ServerMetricsAdminPage = lazy(() => import('./pages/Admin/ServerMetricsAdminPage'));
|
||||
const EnvironmentsPage = lazy(() => import('./pages/Admin/EnvironmentsPage'));
|
||||
const OutboundConnectionsPage = lazy(() => import('./pages/Admin/OutboundConnectionsPage'));
|
||||
const OutboundConnectionEditor = lazy(() => import('./pages/Admin/OutboundConnectionEditor'));
|
||||
@@ -105,6 +106,7 @@ export const router = createBrowserRouter([
|
||||
{ path: 'sensitive-keys', element: <SuspenseWrapper><SensitiveKeysPage /></SuspenseWrapper> },
|
||||
{ path: 'database', element: <SuspenseWrapper><DatabaseAdminPage /></SuspenseWrapper> },
|
||||
{ path: 'clickhouse', element: <SuspenseWrapper><ClickHouseAdminPage /></SuspenseWrapper> },
|
||||
{ path: 'server-metrics', element: <SuspenseWrapper><ServerMetricsAdminPage /></SuspenseWrapper> },
|
||||
{ path: 'environments', element: <SuspenseWrapper><EnvironmentsPage /></SuspenseWrapper> },
|
||||
],
|
||||
}],
|
||||
|
||||
69
ui/src/utils/attribute-filter.test.ts
Normal file
69
ui/src/utils/attribute-filter.test.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { parseAttrParam, formatAttrParam, parseFacetQuery } from './attribute-filter';
|
||||
|
||||
describe('parseAttrParam', () => {
|
||||
it('returns key-only for input without colon', () => {
|
||||
expect(parseAttrParam('order')).toEqual({ key: 'order' });
|
||||
});
|
||||
|
||||
it('splits on first colon, trims key, preserves value as-is', () => {
|
||||
expect(parseAttrParam('order:47')).toEqual({ key: 'order', value: '47' });
|
||||
});
|
||||
|
||||
it('treats a value containing colons as a single value', () => {
|
||||
expect(parseAttrParam('trace-id:abc:123')).toEqual({ key: 'trace-id', value: 'abc:123' });
|
||||
});
|
||||
|
||||
it('returns null for blank input', () => {
|
||||
expect(parseAttrParam('')).toBeNull();
|
||||
expect(parseAttrParam(' ')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for missing key', () => {
|
||||
expect(parseAttrParam(':x')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when the key contains invalid characters', () => {
|
||||
expect(parseAttrParam('bad key:1')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('formatAttrParam', () => {
|
||||
it('returns bare key for key-only filter', () => {
|
||||
expect(formatAttrParam({ key: 'order' })).toBe('order');
|
||||
});
|
||||
|
||||
it('joins with colon when value is present', () => {
|
||||
expect(formatAttrParam({ key: 'order', value: '47' })).toBe('order:47');
|
||||
});
|
||||
|
||||
it('joins with colon when value is empty string', () => {
|
||||
expect(formatAttrParam({ key: 'order', value: '' })).toBe('order:');
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseFacetQuery', () => {
|
||||
it('matches `key: value`', () => {
|
||||
expect(parseFacetQuery('order: 47')).toEqual({ key: 'order', value: '47' });
|
||||
});
|
||||
|
||||
it('matches `key:value` without spaces', () => {
|
||||
expect(parseFacetQuery('order:47')).toEqual({ key: 'order', value: '47' });
|
||||
});
|
||||
|
||||
it('matches wildcard values', () => {
|
||||
expect(parseFacetQuery('order: 4*')).toEqual({ key: 'order', value: '4*' });
|
||||
});
|
||||
|
||||
it('returns null when the key contains invalid characters', () => {
|
||||
expect(parseFacetQuery('bad key: 1')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null without a colon', () => {
|
||||
expect(parseFacetQuery('order')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null with an empty value side', () => {
|
||||
expect(parseFacetQuery('order: ')).toBeNull();
|
||||
});
|
||||
});
|
||||
37
ui/src/utils/attribute-filter.ts
Normal file
37
ui/src/utils/attribute-filter.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
export interface AttributeFilter {
|
||||
key: string;
|
||||
value?: string;
|
||||
}
|
||||
|
||||
const KEY_REGEX = /^[a-zA-Z0-9._-]+$/;
|
||||
|
||||
/** Parses a single `?attr=` URL value. Returns null for invalid / blank input. */
|
||||
export function parseAttrParam(raw: string): AttributeFilter | null {
|
||||
if (!raw) return null;
|
||||
const trimmed = raw.trim();
|
||||
if (trimmed.length === 0) return null;
|
||||
|
||||
const colon = trimmed.indexOf(':');
|
||||
if (colon < 0) {
|
||||
return KEY_REGEX.test(trimmed) ? { key: trimmed } : null;
|
||||
}
|
||||
|
||||
const key = trimmed.substring(0, colon).trim();
|
||||
const value = raw.substring(raw.indexOf(':') + 1);
|
||||
if (!KEY_REGEX.test(key)) return null;
|
||||
return { key, value };
|
||||
}
|
||||
|
||||
/** Serialises an AttributeFilter back to a URL `?attr=` value. */
|
||||
export function formatAttrParam(f: AttributeFilter): string {
|
||||
return f.value === undefined ? f.key : `${f.key}:${f.value}`;
|
||||
}
|
||||
|
||||
const FACET_REGEX = /^\s*([a-zA-Z0-9._-]+)\s*:\s*(\S(?:.*\S)?)\s*$/;
|
||||
|
||||
/** Parses a cmd-k query like `order: 47` into a facet descriptor. */
|
||||
export function parseFacetQuery(query: string): AttributeFilter | null {
|
||||
const m = FACET_REGEX.exec(query);
|
||||
if (!m) return null;
|
||||
return { key: m[1], value: m[2] };
|
||||
}
|
||||
Reference in New Issue
Block a user