diff --git a/.claude/rules/app-classes.md b/.claude/rules/app-classes.md index 971643fb..033bd33f 100644 --- a/.claude/rules/app-classes.md +++ b/.claude/rules/app-classes.md @@ -109,7 +109,7 @@ Env-scoped read-path controllers (`AlertController`, `AlertRuleController`, `Ale - `UsageAnalyticsController` — GET `/api/v1/admin/usage` (ClickHouse `usage_events`). - `ClickHouseAdminController` — GET `/api/v1/admin/clickhouse/**` (conditional on `infrastructureendpoints` flag). - `DatabaseAdminController` — GET `/api/v1/admin/database/**` (conditional on `infrastructureendpoints` flag). -- `ServerMetricsAdminController` — `/api/v1/admin/server-metrics/**`. GET `/catalog`, GET `/instances`, POST `/query`. Generic read API over the `server_metrics` ClickHouse table so SaaS dashboards don't need direct CH access. Delegates to `ServerMetricsQueryStore` (impl `ClickHouseServerMetricsQueryStore`). Validation: metric/tag regex `^[a-zA-Z0-9._]+$`, statistic regex `^[a-z_]+$`, `to - from ≤ 31 days`, stepSeconds ∈ [10, 3600], response capped at 500 series. `IllegalArgumentException` → 400. `/query` supports `raw` + `delta` modes (delta does per-`server_instance_id` positive-clipped differences, then aggregates across instances). Derived `statistic=mean` for timers computes `sum(total|total_time)/sum(count)` per bucket. +- `ServerMetricsAdminController` — `/api/v1/admin/server-metrics/**`. GET `/catalog`, GET `/instances`, POST `/query`. Generic read API over the `server_metrics` ClickHouse table so SaaS dashboards don't need direct CH access. Delegates to `ServerMetricsQueryStore` (impl `ClickHouseServerMetricsQueryStore`). Visibility matches ClickHouse/Database admin: `@ConditionalOnProperty(infrastructureendpoints, matchIfMissing=true)` + class-level `@PreAuthorize("hasRole('ADMIN')")`. Validation: metric/tag regex `^[a-zA-Z0-9._]+$`, statistic regex `^[a-z_]+$`, `to - from ≤ 31 days`, stepSeconds ∈ [10, 3600], response capped at 500 series. `IllegalArgumentException` → 400. `/query` supports `raw` + `delta` modes (delta does per-`server_instance_id` positive-clipped differences, then aggregates across instances). Derived `statistic=mean` for timers computes `sum(total|total_time)/sum(count)` per bucket. ### Other (flat) diff --git a/.claude/rules/ui.md b/.claude/rules/ui.md index f4597100..6ebc15cf 100644 --- a/.claude/rules/ui.md +++ b/.claude/rules/ui.md @@ -21,6 +21,7 @@ The UI has 4 main tabs: **Exchanges**, **Dashboard**, **Runtime**, **Deployments **Admin pages** (ADMIN-only, under `/admin/`): - **Sensitive Keys** (`ui/src/pages/Admin/SensitiveKeysPage.tsx`) — global sensitive key masking config. Shows agent built-in defaults as outlined Badge reference, editable Tag pills for custom keys, amber-highlighted push-to-agents toggle. Keys add to (not replace) agent defaults. Per-app sensitive key additions managed via `ApplicationConfigController` API. Note: `AppConfigDetailPage.tsx` exists but is not routed in `router.tsx`. +- **Server Metrics** (`ui/src/pages/Admin/ServerMetricsAdminPage.tsx`) — dashboard over the `server_metrics` ClickHouse table. Visibility matches Database/ClickHouse pages: gated on `capabilities.infrastructureEndpoints` in `buildAdminTreeNodes`; backend is `@ConditionalOnProperty(infrastructureendpoints) + @PreAuthorize('hasRole(ADMIN)')`. Uses the generic `/api/v1/admin/server-metrics/{catalog,instances,query}` API via `ui/src/api/queries/admin/serverMetrics.ts` hooks (`useServerMetricsCatalog`, `useServerMetricsInstances`, `useServerMetricsSeries`). Toolbar: server-instance badges + DS `Select` window picker (15 min / 1 h / 6 h / 24 h / 7 d). Sections: Server health (agents/ingestion/auth), JVM (memory/CPU/GC/threads), HTTP & DB pools, Alerting (conditional on catalog), Deployments (conditional on catalog). Each panel is a `ThemedChart` with `Line`/`Area` children from the design system; multi-series responses are flattened into overlap rows by bucket timestamp. Alerting and Deployments rows are hidden when their metrics aren't in the catalog (zero-deploy / alerting-disabled installs). ## Key UI Files diff --git a/cameleer-server-app/src/main/java/com/cameleer/server/app/controller/ServerMetricsAdminController.java b/cameleer-server-app/src/main/java/com/cameleer/server/app/controller/ServerMetricsAdminController.java index 676dbd8c..a8049d59 100644 --- a/cameleer-server-app/src/main/java/com/cameleer/server/app/controller/ServerMetricsAdminController.java +++ b/cameleer-server-app/src/main/java/com/cameleer/server/app/controller/ServerMetricsAdminController.java @@ -7,7 +7,9 @@ import com.cameleer.server.core.storage.model.ServerMetricQueryRequest; import com.cameleer.server.core.storage.model.ServerMetricQueryResponse; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.http.ResponseEntity; +import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.web.bind.annotation.ExceptionHandler; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PostMapping; @@ -32,12 +34,23 @@ import java.util.Map; *
  • {@code GET /instances} — list server instances (useful for partitioning counter math)
  • * * - *

    Protected by the {@code /api/v1/admin/**} catch-all in {@code SecurityConfig} — requires ADMIN role. + *

    Visibility matches {@code ClickHouseAdminController} / {@code DatabaseAdminController}: + *

    */ +@ConditionalOnProperty( + name = "cameleer.server.security.infrastructureendpoints", + havingValue = "true", + matchIfMissing = true +) @RestController @RequestMapping("/api/v1/admin/server-metrics") +@PreAuthorize("hasRole('ADMIN')") @Tag(name = "Server Self-Metrics", - description = "Read API over the server's own Micrometer registry snapshots for dashboards") + description = "Read API over the server's own Micrometer registry snapshots (ADMIN only)") public class ServerMetricsAdminController { /** Default lookback window for catalog/instances when from/to are omitted. */ diff --git a/ui/src/api/queries/admin/serverMetrics.ts b/ui/src/api/queries/admin/serverMetrics.ts new file mode 100644 index 00000000..4207a51e --- /dev/null +++ b/ui/src/api/queries/admin/serverMetrics.ts @@ -0,0 +1,113 @@ +import { useQuery } from '@tanstack/react-query'; +import { adminFetch } from './admin-api'; +import { useRefreshInterval } from '../use-refresh-interval'; + +// ── Types ────────────────────────────────────────────────────────────── + +export interface ServerMetricCatalogEntry { + metricName: string; + metricType: string; + statistics: string[]; + tagKeys: string[]; +} + +export interface ServerInstanceInfo { + serverInstanceId: string; + firstSeen: string; + lastSeen: string; +} + +export interface ServerMetricPoint { + t: string; + v: number; +} + +export interface ServerMetricSeries { + tags: Record; + points: ServerMetricPoint[]; +} + +export interface ServerMetricQueryResponse { + metric: string; + statistic: string; + aggregation: string; + mode: string; + stepSeconds: number; + series: ServerMetricSeries[]; +} + +export interface ServerMetricQueryRequest { + metric: string; + statistic?: string | null; + from: string; + to: string; + stepSeconds?: number | null; + groupByTags?: string[] | null; + filterTags?: Record | null; + aggregation?: string | null; + mode?: string | null; + serverInstanceIds?: string[] | null; +} + +// ── Query Hooks ──────────────────────────────────────────────────────── + +export function useServerMetricsCatalog(windowSeconds = 3600) { + const refetchInterval = useRefreshInterval(60_000); + return useQuery({ + queryKey: ['admin', 'server-metrics', 'catalog', windowSeconds], + queryFn: async () => { + const to = new Date(); + const from = new Date(to.getTime() - windowSeconds * 1000); + const params = new URLSearchParams({ from: from.toISOString(), to: to.toISOString() }); + return adminFetch(`/server-metrics/catalog?${params}`); + }, + refetchInterval, + }); +} + +export function useServerMetricsInstances(windowSeconds = 3600) { + const refetchInterval = useRefreshInterval(60_000); + return useQuery({ + queryKey: ['admin', 'server-metrics', 'instances', windowSeconds], + queryFn: async () => { + const to = new Date(); + const from = new Date(to.getTime() - windowSeconds * 1000); + const params = new URLSearchParams({ from: from.toISOString(), to: to.toISOString() }); + return adminFetch(`/server-metrics/instances?${params}`); + }, + refetchInterval, + }); +} + +/** + * Run a time-series query against the server_metrics table. + * + * The window [from, to) is supplied in seconds of "now minus N" so the panel + * refreshes automatically at the polling interval without the caller + * recomputing timestamps. + */ +export function useServerMetricsSeries( + request: Omit, + windowSeconds: number, + opts?: { enabled?: boolean }, +) { + const refetchInterval = useRefreshInterval(30_000); + return useQuery({ + queryKey: ['admin', 'server-metrics', 'query', request, windowSeconds], + queryFn: async () => { + const to = new Date(); + const from = new Date(to.getTime() - windowSeconds * 1000); + const body: ServerMetricQueryRequest = { + ...request, + from: from.toISOString(), + to: to.toISOString(), + }; + return adminFetch('/server-metrics/query', { + method: 'POST', + body: JSON.stringify(body), + }); + }, + refetchInterval, + enabled: opts?.enabled ?? true, + }); +} diff --git a/ui/src/components/LayoutShell.tsx b/ui/src/components/LayoutShell.tsx index bf767dac..6742857a 100644 --- a/ui/src/components/LayoutShell.tsx +++ b/ui/src/components/LayoutShell.tsx @@ -705,6 +705,7 @@ function LayoutContent() { oidc: 'OIDC', database: 'Database', clickhouse: 'ClickHouse', + 'server-metrics': 'Server Metrics', appconfig: 'App Config', }; const parts = location.pathname.split('/').filter(Boolean); diff --git a/ui/src/components/sidebar-utils.ts b/ui/src/components/sidebar-utils.ts index 9da2e922..bb610b75 100644 --- a/ui/src/components/sidebar-utils.ts +++ b/ui/src/components/sidebar-utils.ts @@ -110,6 +110,7 @@ export function buildAdminTreeNodes(opts?: { infrastructureEndpoints?: boolean } { id: 'admin:oidc', label: 'OIDC', path: '/admin/oidc' }, { id: 'admin:outbound-connections', label: 'Outbound Connections', path: '/admin/outbound-connections' }, { id: 'admin:sensitive-keys', label: 'Sensitive Keys', path: '/admin/sensitive-keys' }, + ...(showInfra ? [{ id: 'admin:server-metrics', label: 'Server Metrics', path: '/admin/server-metrics' }] : []), { id: 'admin:rbac', label: 'Users & Roles', path: '/admin/rbac' }, ]; return nodes; diff --git a/ui/src/pages/Admin/ServerMetricsAdminPage.module.css b/ui/src/pages/Admin/ServerMetricsAdminPage.module.css new file mode 100644 index 00000000..7cbcc0d3 --- /dev/null +++ b/ui/src/pages/Admin/ServerMetricsAdminPage.module.css @@ -0,0 +1,81 @@ +.page { + display: flex; + flex-direction: column; + gap: 24px; +} + +.toolbar { + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; + flex-wrap: wrap; +} + +.instanceStrip { + display: flex; + gap: 6px; + flex-wrap: wrap; +} + +.row { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 14px; +} + +.rowTriple { + display: grid; + grid-template-columns: 1fr 1fr 1fr; + gap: 14px; +} + +.sectionTitle { + display: flex; + align-items: baseline; + gap: 10px; + margin: 4px 0 4px 2px; + color: var(--text-primary); + font-size: 13px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.04em; +} + +.sectionSubtitle { + color: var(--text-muted); + font-weight: 400; + font-size: 12px; + text-transform: none; + letter-spacing: 0; +} + +.chartHeader { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 8px; +} + +.chartTitle { + font-size: 13px; + font-weight: 600; + color: var(--text-primary); +} + +.chartMeta { + font-size: 11px; + color: var(--text-muted); +} + +/* Tighten chart card internals for denser grid */ +.compactCard { + padding: 14px; +} + +@media (max-width: 1100px) { + .rowTriple, + .row { + grid-template-columns: 1fr; + } +} diff --git a/ui/src/pages/Admin/ServerMetricsAdminPage.tsx b/ui/src/pages/Admin/ServerMetricsAdminPage.tsx new file mode 100644 index 00000000..16099336 --- /dev/null +++ b/ui/src/pages/Admin/ServerMetricsAdminPage.tsx @@ -0,0 +1,466 @@ +import { useMemo, useState } from 'react'; +import { + ThemedChart, Area, Line, CHART_COLORS, + Badge, EmptyState, Spinner, Select, +} from '@cameleer/design-system'; +import { + useServerMetricsCatalog, + useServerMetricsInstances, + useServerMetricsSeries, + type ServerMetricQueryResponse, + type ServerMetricSeries, +} from '../../api/queries/admin/serverMetrics'; +import chartCardStyles from '../../styles/chart-card.module.css'; +import styles from './ServerMetricsAdminPage.module.css'; + +// ── Window options ───────────────────────────────────────────────────── + +const WINDOWS: { label: string; seconds: number; step: number }[] = [ + { label: 'Last 15 min', seconds: 15 * 60, step: 60 }, + { label: 'Last 1 h', seconds: 60 * 60, step: 60 }, + { label: 'Last 6 h', seconds: 6 * 60 * 60, step: 300 }, + { label: 'Last 24 h', seconds: 24 * 60 * 60, step: 300 }, + { label: 'Last 7 d', seconds: 7 * 24 * 60 * 60, step: 3600 }, +]; + +// ── Panel component ──────────────────────────────────────────────────── + +interface PanelProps { + title: string; + subtitle?: string; + metric: string; + statistic?: string; + groupByTags?: string[]; + filterTags?: Record; + aggregation?: string; + mode?: 'raw' | 'delta'; + yLabel?: string; + asArea?: boolean; + windowSeconds: number; + stepSeconds: number; + formatValue?: (v: number) => string; +} + +function Panel({ + title, subtitle, metric, statistic, groupByTags, filterTags, + aggregation, mode = 'raw', yLabel, asArea = false, + windowSeconds, stepSeconds, formatValue, +}: PanelProps) { + const { data, isLoading, isError, error } = useServerMetricsSeries( + { metric, statistic, groupByTags, filterTags, aggregation, mode, stepSeconds }, + windowSeconds, + ); + + return ( +
    +
    + {title} + {subtitle && {subtitle}} +
    + +
    + ); +} + +function PanelBody({ + data, loading, error, yLabel, asArea, formatValue, +}: { + data: ServerMetricQueryResponse | undefined; + loading: boolean; + error: string | null; + yLabel?: string; + asArea?: boolean; + formatValue?: (v: number) => string; +}) { + const points = useMemo(() => flatten(data?.series ?? []), [data]); + + if (loading) { + return
    + +
    ; + } + if (error) { + return ; + } + if (!data || data.series.length === 0 || points.rows.length === 0) { + return ; + } + + return ( + + {points.seriesKeys.map((key, idx) => { + const color = CHART_COLORS[idx % CHART_COLORS.length]; + return asArea ? ( + + ) : ( + + ); + })} + + ); +} + +/** + * Turn ServerMetricSeries[] into a single array of rows keyed by series label. + * Multiple series become overlapping lines on the same time axis; buckets are + * merged on `t` so Recharts can render them as one dataset. + */ +function flatten(series: ServerMetricSeries[]): { rows: Array>; seriesKeys: string[] } { + if (series.length === 0) return { rows: [], seriesKeys: [] }; + + const seriesKeys = series.map(seriesLabel); + const rowsByTime = new Map>(); + series.forEach((s, i) => { + const key = seriesKeys[i]; + for (const p of s.points) { + let row = rowsByTime.get(p.t); + if (!row) { + row = { t: p.t }; + rowsByTime.set(p.t, row); + } + row[key] = p.v; + } + }); + const rows = Array.from(rowsByTime.values()).sort((a, b) => + (a.t as string).localeCompare(b.t as string)); + return { rows, seriesKeys }; +} + +function seriesLabel(s: ServerMetricSeries): string { + const entries = Object.entries(s.tags); + if (entries.length === 0) return 'value'; + return entries.map(([k, v]) => `${k}=${v}`).join(' · '); +} + +function formatTime(iso: string | number): string { + const d = typeof iso === 'number' ? new Date(iso) : new Date(String(iso)); + return d.toLocaleTimeString(undefined, { hour: '2-digit', minute: '2-digit' }); +} + +function formatMB(bytes: number): string { + return `${(bytes / (1024 * 1024)).toFixed(0)} MB`; +} + +function formatPct(frac: number): string { + return `${(frac * 100).toFixed(0)}%`; +} + +// ── Page ─────────────────────────────────────────────────────────────── + +export default function ServerMetricsAdminPage() { + const [windowIdx, setWindowIdx] = useState(1); // default: last 1 h + const windowOpt = WINDOWS[windowIdx]; + const windowSeconds = windowOpt.seconds; + const stepSeconds = windowOpt.step; + + const { data: catalog } = useServerMetricsCatalog(windowSeconds); + const { data: instances } = useServerMetricsInstances(windowSeconds); + + const has = (metricName: string) => + (catalog ?? []).some((c) => c.metricName === metricName); + + return ( +
    + {/* Toolbar */} +
    +
    + {(instances ?? []).slice(0, 8).map((i) => ( + + ))} + {(instances ?? []).length > 8 && ( + + )} + {(instances ?? []).length === 0 && ( + + )} +
    +