Files
cameleer-server/ui/src/pages/AgentHealth/AgentHealth.tsx
hsiegeln b0484459a2
Some checks failed
CI / cleanup-branch (push) Has been skipped
CI / build (push) Failing after 22s
CI / docker (push) Has been skipped
CI / deploy (push) Has been skipped
CI / deploy-feature (push) Has been skipped
feat: add application config overview and inline editing
Add admin page at /admin/appconfig with a DataTable showing all
application configurations. Inline dropdowns allow editing log level,
engine level, payload capture mode, and metrics toggle directly from
the table. Changes push to agents via SSE immediately.

Also adds a config bar on the AgentHealth page (/agents/:appId) for
per-application config management with the same 4 settings.

Backend: GET /api/v1/config list endpoint, findAll() on repository,
sensible defaults for logForwardingLevel/engineLevel/payloadCaptureMode.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 12:51:07 +01:00

711 lines
26 KiB
TypeScript

import { useState, useMemo, useCallback } from 'react';
import { useParams, useNavigate } from 'react-router';
import {
StatCard, StatusDot, Badge, MonoText, ProgressBar,
GroupCard, DataTable, LineChart, EventFeed, DetailPanel,
LogViewer, ButtonGroup, SectionHeader, useToast,
} from '@cameleer/design-system';
import type { Column, FeedEvent, LogEntry, ButtonGroupItem } from '@cameleer/design-system';
import styles from './AgentHealth.module.css';
import { useAgents, useAgentEvents } from '../../api/queries/agents';
import { useApplicationLogs } from '../../api/queries/logs';
import { useAgentMetrics } from '../../api/queries/agent-metrics';
import { useApplicationConfig, useUpdateApplicationConfig } from '../../api/queries/commands';
import type { AgentInstance } from '../../api/types';
// ── Helpers ──────────────────────────────────────────────────────────────────
function timeAgo(iso?: string): string {
if (!iso) return '\u2014';
const diff = Date.now() - new Date(iso).getTime();
const secs = Math.floor(diff / 1000);
if (secs < 60) return `${secs}s ago`;
const mins = Math.floor(secs / 60);
if (mins < 60) return `${mins}m ago`;
const hours = Math.floor(mins / 60);
if (hours < 24) return `${hours}h ago`;
return `${Math.floor(hours / 24)}d ago`;
}
function formatUptime(seconds?: number): string {
if (!seconds) return '\u2014';
const days = Math.floor(seconds / 86400);
const hours = Math.floor((seconds % 86400) / 3600);
const mins = Math.floor((seconds % 3600) / 60);
if (days > 0) return `${days}d ${hours}h`;
if (hours > 0) return `${hours}h ${mins}m`;
return `${mins}m`;
}
function formatErrorRate(rate?: number): string {
if (rate == null) return '\u2014';
return `${(rate * 100).toFixed(1)}%`;
}
type NormStatus = 'live' | 'stale' | 'dead';
function normalizeStatus(status: string): NormStatus {
return status.toLowerCase() as NormStatus;
}
function statusColor(s: NormStatus): 'success' | 'warning' | 'error' {
if (s === 'live') return 'success';
if (s === 'stale') return 'warning';
return 'error';
}
// ── Data grouping ────────────────────────────────────────────────────────────
interface AppGroup {
appId: string;
instances: AgentInstance[];
liveCount: number;
staleCount: number;
deadCount: number;
totalTps: number;
totalActiveRoutes: number;
totalRoutes: number;
}
function groupByApp(agentList: AgentInstance[]): AppGroup[] {
const map = new Map<string, AgentInstance[]>();
for (const a of agentList) {
const app = a.application;
const list = map.get(app) ?? [];
list.push(a);
map.set(app, list);
}
return Array.from(map.entries()).map(([appId, instances]) => ({
appId,
instances,
liveCount: instances.filter((i) => normalizeStatus(i.status) === 'live').length,
staleCount: instances.filter((i) => normalizeStatus(i.status) === 'stale').length,
deadCount: instances.filter((i) => normalizeStatus(i.status) === 'dead').length,
totalTps: instances.reduce((s, i) => s + (i.tps ?? 0), 0),
totalActiveRoutes: instances.reduce((s, i) => s + (i.activeRoutes ?? 0), 0),
totalRoutes: instances.reduce((s, i) => s + (i.totalRoutes ?? 0), 0),
}));
}
function appHealth(group: AppGroup): 'success' | 'warning' | 'error' {
if (group.deadCount > 0) return 'error';
if (group.staleCount > 0) return 'warning';
return 'success';
}
// ── Detail sub-components ────────────────────────────────────────────────────
function AgentOverviewContent({ agent }: { agent: AgentInstance }) {
const { data: memMetrics } = useAgentMetrics(
agent.id,
['jvm.memory.heap.used', 'jvm.memory.heap.max'],
1,
);
const { data: cpuMetrics } = useAgentMetrics(agent.id, ['jvm.cpu.process'], 1);
const cpuValue = cpuMetrics?.metrics?.['jvm.cpu.process']?.[0]?.value;
const heapUsed = memMetrics?.metrics?.['jvm.memory.heap.used']?.[0]?.value;
const heapMax = memMetrics?.metrics?.['jvm.memory.heap.max']?.[0]?.value;
const heapPercent =
heapUsed != null && heapMax != null && heapMax > 0
? Math.round((heapUsed / heapMax) * 100)
: undefined;
const cpuPercent = cpuValue != null ? Math.round(cpuValue * 100) : undefined;
const ns = normalizeStatus(agent.status);
return (
<div className={styles.detailContent}>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Status</span>
<Badge label={agent.status} color={statusColor(ns)} variant="filled" />
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Application</span>
<MonoText size="xs">{agent.application}</MonoText>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Uptime</span>
<MonoText size="xs">{formatUptime(agent.uptimeSeconds)}</MonoText>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Last Seen</span>
<MonoText size="xs">{timeAgo(agent.lastHeartbeat)}</MonoText>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Throughput</span>
<MonoText size="xs">{agent.tps != null ? `${agent.tps.toFixed(1)}/s` : '\u2014'}</MonoText>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Errors</span>
<MonoText size="xs" className={agent.errorRate ? styles.instanceError : undefined}>
{formatErrorRate(agent.errorRate)}
</MonoText>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Routes</span>
<span>{agent.activeRoutes ?? 0}/{agent.totalRoutes ?? 0} active</span>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Heap Memory</span>
{heapPercent != null ? (
<div className={styles.detailProgress}>
<ProgressBar
value={heapPercent}
variant={heapPercent > 85 ? 'error' : heapPercent > 70 ? 'warning' : 'success'}
size="sm"
/>
<MonoText size="xs">{heapPercent}%</MonoText>
</div>
) : (
<MonoText size="xs">N/A</MonoText>
)}
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>CPU</span>
{cpuPercent != null ? (
<div className={styles.detailProgress}>
<ProgressBar
value={cpuPercent}
variant={cpuPercent > 80 ? 'error' : cpuPercent > 60 ? 'warning' : 'success'}
size="sm"
/>
<MonoText size="xs">{cpuPercent}%</MonoText>
</div>
) : (
<MonoText size="xs">N/A</MonoText>
)}
</div>
</div>
);
}
function AgentPerformanceContent({ agent }: { agent: AgentInstance }) {
const { data: tpsMetrics } = useAgentMetrics(agent.id, ['cameleer.tps'], 60);
const { data: errMetrics } = useAgentMetrics(agent.id, ['cameleer.error.rate'], 60);
const tpsSeries = useMemo(() => {
const raw = tpsMetrics?.metrics?.['cameleer.tps'] ?? [];
return [{ label: 'TPS', data: raw.map((p) => ({ x: new Date(p.time), y: p.value })) }];
}, [tpsMetrics]);
const errSeries = useMemo(() => {
const raw = errMetrics?.metrics?.['cameleer.error.rate'] ?? [];
return [{
label: 'Error Rate',
data: raw.map((p) => ({ x: new Date(p.time), y: p.value * 100 })),
color: 'var(--error)',
}];
}, [errMetrics]);
return (
<div className={styles.detailContent}>
<div className={styles.chartPanel}>
<div className={styles.chartTitle}>Throughput (msg/s)</div>
{tpsSeries[0].data.length > 0 ? (
<LineChart series={tpsSeries} height={160} yLabel="msg/s" />
) : (
<div className={styles.emptyChart}>No data available</div>
)}
</div>
<div className={styles.chartPanel}>
<div className={styles.chartTitle}>Error Rate (%)</div>
{errSeries[0].data.length > 0 ? (
<LineChart series={errSeries} height={160} yLabel="%" />
) : (
<div className={styles.emptyChart}>No data available</div>
)}
</div>
</div>
);
}
const LOG_LEVEL_ITEMS: ButtonGroupItem[] = [
{ value: 'error', label: 'Error', color: 'var(--error)' },
{ value: 'warn', label: 'Warn', color: 'var(--warning)' },
{ value: 'info', label: 'Info', color: 'var(--success)' },
{ value: 'debug', label: 'Debug', color: 'var(--running)' },
];
function mapLogLevel(level: string): LogEntry['level'] {
switch (level?.toUpperCase()) {
case 'ERROR': return 'error';
case 'WARN': case 'WARNING': return 'warn';
case 'DEBUG': case 'TRACE': return 'debug';
default: return 'info';
}
}
// ── AgentHealth page ─────────────────────────────────────────────────────────
export default function AgentHealth() {
const { appId } = useParams();
const navigate = useNavigate();
const { toast } = useToast();
const { data: agents } = useAgents(undefined, appId);
const { data: appConfig } = useApplicationConfig(appId);
const updateConfig = useUpdateApplicationConfig();
const handleConfigChange = useCallback((field: string, value: string | boolean) => {
if (!appConfig) return;
const updated = { ...appConfig, [field]: value };
updateConfig.mutate(updated, {
onSuccess: (saved) => {
toast({ title: 'Config updated', description: `${field}${value} (v${saved.version})`, variant: 'success' });
},
onError: () => {
toast({ title: 'Config update failed', variant: 'error' });
},
});
}, [appConfig, updateConfig, toast]);
const [eventSortAsc, setEventSortAsc] = useState(false);
const [eventRefreshTo, setEventRefreshTo] = useState<string | undefined>();
const { data: events } = useAgentEvents(appId, undefined, 50, eventRefreshTo);
const [logSearch, setLogSearch] = useState('');
const [logLevels, setLogLevels] = useState<Set<string>>(new Set());
const [logSortAsc, setLogSortAsc] = useState(false);
const [logRefreshTo, setLogRefreshTo] = useState<string | undefined>();
const { data: rawLogs } = useApplicationLogs(appId, undefined, { toOverride: logRefreshTo });
const logEntries = useMemo<LogEntry[]>(() => {
const mapped = (rawLogs || []).map((l) => ({
timestamp: l.timestamp ?? '',
level: mapLogLevel(l.level),
message: l.message ?? '',
}));
return logSortAsc ? mapped.toReversed() : mapped;
}, [rawLogs, logSortAsc]);
const logSearchLower = logSearch.toLowerCase();
const filteredLogs = logEntries
.filter((l) => logLevels.size === 0 || logLevels.has(l.level))
.filter((l) => !logSearchLower || l.message.toLowerCase().includes(logSearchLower));
const [selectedInstance, setSelectedInstance] = useState<AgentInstance | null>(null);
const [panelOpen, setPanelOpen] = useState(false);
const agentList = agents ?? [];
const groups = useMemo(() => groupByApp(agentList), [agentList]);
// Aggregate stats
const totalInstances = agentList.length;
const liveCount = agentList.filter((a) => normalizeStatus(a.status) === 'live').length;
const staleCount = agentList.filter((a) => normalizeStatus(a.status) === 'stale').length;
const deadCount = agentList.filter((a) => normalizeStatus(a.status) === 'dead').length;
const totalTps = agentList.reduce((s, a) => s + (a.tps ?? 0), 0);
const totalActiveRoutes = agentList.reduce((s, a) => s + (a.activeRoutes ?? 0), 0);
const totalRoutes = agentList.reduce((s, a) => s + (a.totalRoutes ?? 0), 0);
// Map events to FeedEvent
const feedEvents: FeedEvent[] = useMemo(() => {
const mapped = (events ?? []).map((e: { id: number; agentId: string; eventType: string; detail: string; timestamp: string }) => ({
id: String(e.id),
severity:
e.eventType === 'WENT_DEAD'
? ('error' as const)
: e.eventType === 'WENT_STALE'
? ('warning' as const)
: e.eventType === 'RECOVERED'
? ('success' as const)
: ('running' as const),
message: `${e.agentId}: ${e.eventType}${e.detail ? ' \u2014 ' + e.detail : ''}`,
timestamp: new Date(e.timestamp),
}));
return eventSortAsc ? mapped.toReversed() : mapped;
}, [events, eventSortAsc],
);
// Column definitions for the instance DataTable
const instanceColumns: Column<AgentInstance>[] = useMemo(
() => [
{
key: 'status',
header: '',
width: '12px',
render: (_val, row) => <StatusDot variant={normalizeStatus(row.status)} />,
},
{
key: '_inspect',
header: '',
width: '36px',
render: (_val, row) => (
<button
className={styles.inspectLink}
title="Open instance page"
onClick={(e) => {
e.stopPropagation();
navigate(`/agents/${row.application}/${row.id}`);
}}
>
&#x2197;
</button>
),
},
{
key: 'name',
header: 'Instance',
render: (_val, row) => (
<MonoText size="sm" className={styles.instanceName}>{row.name ?? row.id}</MonoText>
),
},
{
key: 'state',
header: 'State',
render: (_val, row) => {
const ns = normalizeStatus(row.status);
return <Badge label={row.status} color={statusColor(ns)} variant="filled" />;
},
},
{
key: 'uptime',
header: 'Uptime',
render: (_val, row) => (
<MonoText size="xs" className={styles.instanceMeta}>{formatUptime(row.uptimeSeconds)}</MonoText>
),
},
{
key: 'tps',
header: 'TPS',
render: (_val, row) => (
<MonoText size="xs" className={styles.instanceMeta}>
{row.tps != null ? `${row.tps.toFixed(1)}/s` : '\u2014'}
</MonoText>
),
},
{
key: 'errorRate',
header: 'Errors',
render: (_val, row) => (
<MonoText size="xs" className={row.errorRate ? styles.instanceError : styles.instanceMeta}>
{formatErrorRate(row.errorRate)}
</MonoText>
),
},
{
key: 'lastHeartbeat',
header: 'Heartbeat',
render: (_val, row) => {
const ns = normalizeStatus(row.status);
return (
<MonoText
size="xs"
className={
ns === 'dead'
? styles.instanceHeartbeatDead
: ns === 'stale'
? styles.instanceHeartbeatStale
: styles.instanceMeta
}
>
{timeAgo(row.lastHeartbeat)}
</MonoText>
);
},
},
],
[],
);
function handleInstanceClick(inst: AgentInstance) {
setSelectedInstance(inst);
setPanelOpen(true);
}
// Detail panel tabs
const detailTabs = selectedInstance
? [
{
label: 'Overview',
value: 'overview',
content: <AgentOverviewContent agent={selectedInstance} />,
},
{
label: 'Performance',
value: 'performance',
content: <AgentPerformanceContent agent={selectedInstance} />,
},
]
: [];
const isFullWidth = !!appId;
return (
<div className={styles.content}>
{/* Stat strip */}
<div className={styles.statStrip}>
<StatCard
label="Total Agents"
value={String(totalInstances)}
accent={deadCount > 0 ? 'warning' : 'amber'}
detail={
<span className={styles.breakdown}>
<span className={styles.bpLive}><StatusDot variant="live" /> {liveCount} live</span>
<span className={styles.bpStale}><StatusDot variant="stale" /> {staleCount} stale</span>
<span className={styles.bpDead}><StatusDot variant="dead" /> {deadCount} dead</span>
</span>
}
/>
<StatCard
label="Applications"
value={String(groups.length)}
accent="running"
detail={
<span className={styles.breakdown}>
<span className={styles.bpLive}>
<StatusDot variant="live" /> {groups.filter((g) => g.deadCount === 0 && g.staleCount === 0).length} healthy
</span>
<span className={styles.bpStale}>
<StatusDot variant="stale" /> {groups.filter((g) => g.staleCount > 0 && g.deadCount === 0).length} degraded
</span>
<span className={styles.bpDead}>
<StatusDot variant="dead" /> {groups.filter((g) => g.deadCount > 0).length} critical
</span>
</span>
}
/>
<StatCard
label="Active Routes"
value={
<span
className={
styles[
totalActiveRoutes === 0
? 'routesError'
: totalActiveRoutes < totalRoutes
? 'routesWarning'
: 'routesSuccess'
]
}
>
{totalActiveRoutes}/{totalRoutes}
</span>
}
accent={totalActiveRoutes === 0 ? 'error' : totalActiveRoutes < totalRoutes ? 'warning' : 'success'}
detail={totalActiveRoutes < totalRoutes ? `${totalRoutes - totalActiveRoutes} suspended` : 'all routes active'}
/>
<StatCard
label="Total TPS"
value={totalTps.toFixed(1)}
accent="amber"
detail="msg/s"
/>
<StatCard
label="Dead"
value={String(deadCount)}
accent={deadCount > 0 ? 'error' : 'success'}
detail={deadCount > 0 ? 'requires attention' : 'all healthy'}
/>
</div>
<div style={{ marginBottom: 12 }}>
<Badge
label={`${liveCount}/${totalInstances} live`}
color={deadCount > 0 ? 'error' : staleCount > 0 ? 'warning' : 'success'}
variant="filled"
/>
</div>
{/* Application config bar */}
{appId && appConfig && (
<div className={styles.configBar}>
<div className={styles.configField}>
<span className={styles.configLabel}>Log Level</span>
<select
className={styles.configSelect}
value={appConfig.logForwardingLevel ?? 'INFO'}
onChange={(e) => handleConfigChange('logForwardingLevel', e.target.value)}
disabled={updateConfig.isPending}
>
<option value="ERROR">ERROR</option>
<option value="WARN">WARN</option>
<option value="INFO">INFO</option>
<option value="DEBUG">DEBUG</option>
</select>
</div>
<div className={styles.configField}>
<span className={styles.configLabel}>Engine Level</span>
<select
className={styles.configSelect}
value={appConfig.engineLevel ?? 'REGULAR'}
onChange={(e) => handleConfigChange('engineLevel', e.target.value)}
disabled={updateConfig.isPending}
>
<option value="NONE">None</option>
<option value="MINIMAL">Minimal</option>
<option value="REGULAR">Regular</option>
<option value="COMPLETE">Complete</option>
</select>
</div>
<div className={styles.configField}>
<span className={styles.configLabel}>Payload Capture</span>
<select
className={styles.configSelect}
value={appConfig.payloadCaptureMode ?? 'NONE'}
onChange={(e) => handleConfigChange('payloadCaptureMode', e.target.value)}
disabled={updateConfig.isPending}
>
<option value="NONE">None</option>
<option value="INPUT">Input</option>
<option value="OUTPUT">Output</option>
<option value="BOTH">Both</option>
</select>
</div>
<div className={styles.configField}>
<span className={styles.configLabel}>Metrics</span>
<label className={styles.configToggle}>
<input
type="checkbox"
checked={appConfig.metricsEnabled}
onChange={(e) => handleConfigChange('metricsEnabled', e.target.checked)}
disabled={updateConfig.isPending}
/>
<span>{appConfig.metricsEnabled ? 'Enabled' : 'Disabled'}</span>
</label>
</div>
</div>
)}
{/* Group cards grid */}
<div className={isFullWidth ? styles.groupGridSingle : styles.groupGrid}>
{groups.map((group) => (
<GroupCard
key={group.appId}
title={group.appId}
accent={appHealth(group)}
headerRight={
<Badge
label={`${group.liveCount}/${group.instances.length} LIVE`}
color={appHealth(group)}
variant="filled"
/>
}
meta={
<div className={styles.groupMeta}>
<span><strong>{group.totalTps.toFixed(1)}</strong> msg/s</span>
<span><strong>{group.totalActiveRoutes}</strong>/{group.totalRoutes} routes</span>
<span>
<StatusDot
variant={
appHealth(group) === 'success'
? 'live'
: appHealth(group) === 'warning'
? 'stale'
: 'dead'
}
/>
</span>
</div>
}
footer={
group.deadCount > 0 ? (
<div className={styles.alertBanner}>
<span className={styles.alertIcon}>&#9888;</span>
<span>
Single point of failure &mdash;{' '}
{group.deadCount === group.instances.length
? 'no redundancy'
: `${group.deadCount} dead instance${group.deadCount > 1 ? 's' : ''}`}
</span>
</div>
) : undefined
}
>
<DataTable<AgentInstance>
columns={instanceColumns}
data={group.instances}
onRowClick={handleInstanceClick}
selectedId={panelOpen ? selectedInstance?.id : undefined}
pageSize={50}
flush
/>
</GroupCard>
))}
</div>
{/* Log + Timeline side by side */}
<div className={styles.bottomRow}>
<div className={styles.logCard}>
<div className={styles.logHeader}>
<SectionHeader>Application Log</SectionHeader>
<div className={styles.headerActions}>
<span className={styles.sectionMeta}>{logEntries.length} entries</span>
<button className={styles.sortBtn} onClick={() => setLogSortAsc((v) => !v)} title={logSortAsc ? 'Oldest first' : 'Newest first'}>
{logSortAsc ? '\u2191' : '\u2193'}
</button>
<button className={styles.refreshBtn} onClick={() => setLogRefreshTo(new Date().toISOString())} title="Refresh">
&#x21bb;
</button>
</div>
</div>
<div className={styles.logToolbar}>
<div className={styles.logSearchWrap}>
<input
type="text"
className={styles.logSearchInput}
placeholder="Search logs\u2026"
value={logSearch}
onChange={(e) => setLogSearch(e.target.value)}
aria-label="Search logs"
/>
{logSearch && (
<button
type="button"
className={styles.logSearchClear}
onClick={() => setLogSearch('')}
aria-label="Clear search"
>
&times;
</button>
)}
</div>
<ButtonGroup items={LOG_LEVEL_ITEMS} value={logLevels} onChange={setLogLevels} />
{logLevels.size > 0 && (
<button className={styles.logClearFilters} onClick={() => setLogLevels(new Set())}>
Clear
</button>
)}
</div>
{filteredLogs.length > 0 ? (
<LogViewer entries={filteredLogs} maxHeight={360} />
) : (
<div className={styles.logEmpty}>
{logSearch || logLevels.size > 0 ? 'No matching log entries' : 'No log entries available'}
</div>
)}
</div>
<div className={styles.eventCard}>
<div className={styles.eventCardHeader}>
<span className={styles.sectionTitle}>Timeline</span>
<div className={styles.headerActions}>
<span className={styles.sectionMeta}>{feedEvents.length} events</span>
<button className={styles.sortBtn} onClick={() => setEventSortAsc((v) => !v)} title={eventSortAsc ? 'Oldest first' : 'Newest first'}>
{eventSortAsc ? '\u2191' : '\u2193'}
</button>
<button className={styles.refreshBtn} onClick={() => setEventRefreshTo(new Date().toISOString())} title="Refresh">
&#x21bb;
</button>
</div>
</div>
{feedEvents.length > 0 ? (
<EventFeed events={feedEvents} maxItems={100} />
) : (
<div className={styles.logEmpty}>No events in the selected time range.</div>
)}
</div>
</div>
{/* Detail panel — auto-portals to AppShell level via design system */}
{selectedInstance && (
<DetailPanel
open={panelOpen}
onClose={() => { setPanelOpen(false); setSelectedInstance(null); }}
title={selectedInstance.name ?? selectedInstance.id}
tabs={detailTabs}
/>
)}
</div>
);
}