Files
cameleer-server/ui/src/pages/AgentHealth/AgentHealth.tsx

521 lines
18 KiB
TypeScript
Raw Normal View History

import { useState, useMemo } from 'react';
import { useParams, Link } from 'react-router';
import {
StatCard, StatusDot, Badge, MonoText, ProgressBar,
GroupCard, DataTable, LineChart, EventFeed, DetailPanel,
} from '@cameleer/design-system';
import type { Column, FeedEvent } from '@cameleer/design-system';
import styles from './AgentHealth.module.css';
import { useAgents, useAgentEvents } from '../../api/queries/agents';
import { useAgentMetrics } from '../../api/queries/agent-metrics';
import type { AgentInstance } from '../../api/types';
// ── Helpers ──────────────────────────────────────────────────────────────────
function timeAgo(iso?: string): string {
if (!iso) return '\u2014';
const diff = Date.now() - new Date(iso).getTime();
const secs = Math.floor(diff / 1000);
if (secs < 60) return `${secs}s ago`;
const mins = Math.floor(secs / 60);
if (mins < 60) return `${mins}m ago`;
const hours = Math.floor(mins / 60);
if (hours < 24) return `${hours}h ago`;
return `${Math.floor(hours / 24)}d ago`;
}
function formatUptime(seconds?: number): string {
if (!seconds) return '\u2014';
const days = Math.floor(seconds / 86400);
const hours = Math.floor((seconds % 86400) / 3600);
const mins = Math.floor((seconds % 3600) / 60);
if (days > 0) return `${days}d ${hours}h`;
if (hours > 0) return `${hours}h ${mins}m`;
return `${mins}m`;
}
function formatErrorRate(rate?: number): string {
if (rate == null) return '\u2014';
return `${(rate * 100).toFixed(1)}%`;
}
type NormStatus = 'live' | 'stale' | 'dead';
function normalizeStatus(status: string): NormStatus {
return status.toLowerCase() as NormStatus;
}
function statusColor(s: NormStatus): 'success' | 'warning' | 'error' {
if (s === 'live') return 'success';
if (s === 'stale') return 'warning';
return 'error';
}
// ── Data grouping ────────────────────────────────────────────────────────────
interface AppGroup {
appId: string;
instances: AgentInstance[];
liveCount: number;
staleCount: number;
deadCount: number;
totalTps: number;
totalActiveRoutes: number;
totalRoutes: number;
}
function groupByApp(agentList: AgentInstance[]): AppGroup[] {
const map = new Map<string, AgentInstance[]>();
for (const a of agentList) {
const app = a.application;
const list = map.get(app) ?? [];
list.push(a);
map.set(app, list);
}
return Array.from(map.entries()).map(([appId, instances]) => ({
appId,
instances,
liveCount: instances.filter((i) => normalizeStatus(i.status) === 'live').length,
staleCount: instances.filter((i) => normalizeStatus(i.status) === 'stale').length,
deadCount: instances.filter((i) => normalizeStatus(i.status) === 'dead').length,
totalTps: instances.reduce((s, i) => s + (i.tps ?? 0), 0),
totalActiveRoutes: instances.reduce((s, i) => s + (i.activeRoutes ?? 0), 0),
totalRoutes: instances.reduce((s, i) => s + (i.totalRoutes ?? 0), 0),
}));
}
function appHealth(group: AppGroup): 'success' | 'warning' | 'error' {
if (group.deadCount > 0) return 'error';
if (group.staleCount > 0) return 'warning';
return 'success';
}
// ── Detail sub-components ────────────────────────────────────────────────────
function AgentOverviewContent({ agent }: { agent: AgentInstance }) {
const { data: memMetrics } = useAgentMetrics(
agent.id,
['jvm.memory.heap.used', 'jvm.memory.heap.max'],
1,
);
const { data: cpuMetrics } = useAgentMetrics(agent.id, ['jvm.cpu.process'], 1);
const cpuValue = cpuMetrics?.metrics?.['jvm.cpu.process']?.[0]?.value;
const heapUsed = memMetrics?.metrics?.['jvm.memory.heap.used']?.[0]?.value;
const heapMax = memMetrics?.metrics?.['jvm.memory.heap.max']?.[0]?.value;
const heapPercent =
heapUsed != null && heapMax != null && heapMax > 0
? Math.round((heapUsed / heapMax) * 100)
: undefined;
const cpuPercent = cpuValue != null ? Math.round(cpuValue * 100) : undefined;
const ns = normalizeStatus(agent.status);
return (
<div className={styles.detailContent}>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Status</span>
<Badge label={agent.status} color={statusColor(ns)} variant="filled" />
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Application</span>
<MonoText size="xs">{agent.application}</MonoText>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Uptime</span>
<MonoText size="xs">{formatUptime(agent.uptimeSeconds)}</MonoText>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Last Seen</span>
<MonoText size="xs">{timeAgo(agent.lastHeartbeat)}</MonoText>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Throughput</span>
<MonoText size="xs">{agent.tps != null ? `${agent.tps.toFixed(1)}/s` : '\u2014'}</MonoText>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Errors</span>
<MonoText size="xs" className={agent.errorRate ? styles.instanceError : undefined}>
{formatErrorRate(agent.errorRate)}
</MonoText>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Routes</span>
<span>{agent.activeRoutes ?? 0}/{agent.totalRoutes ?? 0} active</span>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>Heap Memory</span>
<div className={styles.detailProgress}>
<ProgressBar
value={heapPercent}
variant={heapPercent == null ? 'primary' : heapPercent > 85 ? 'error' : heapPercent > 70 ? 'warning' : 'success'}
indeterminate={heapPercent == null}
size="sm"
/>
<MonoText size="xs">{heapPercent != null ? `${heapPercent}%` : '\u2014'}</MonoText>
</div>
</div>
<div className={styles.detailRow}>
<span className={styles.detailLabel}>CPU</span>
<div className={styles.detailProgress}>
<ProgressBar
value={cpuPercent}
variant={cpuPercent == null ? 'primary' : cpuPercent > 80 ? 'error' : cpuPercent > 60 ? 'warning' : 'success'}
indeterminate={cpuPercent == null}
size="sm"
/>
<MonoText size="xs">{cpuPercent != null ? `${cpuPercent}%` : '\u2014'}</MonoText>
</div>
</div>
</div>
);
}
function AgentPerformanceContent({ agent }: { agent: AgentInstance }) {
const { data: tpsMetrics } = useAgentMetrics(agent.id, ['cameleer.tps'], 60);
const { data: errMetrics } = useAgentMetrics(agent.id, ['cameleer.error.rate'], 60);
const tpsSeries = useMemo(() => {
const raw = tpsMetrics?.metrics?.['cameleer.tps'] ?? [];
return [{ label: 'TPS', data: raw.map((p) => ({ x: new Date(p.time), y: p.value })) }];
}, [tpsMetrics]);
const errSeries = useMemo(() => {
const raw = errMetrics?.metrics?.['cameleer.error.rate'] ?? [];
return [{
label: 'Error Rate',
data: raw.map((p) => ({ x: new Date(p.time), y: p.value * 100 })),
color: 'var(--error)',
}];
}, [errMetrics]);
return (
<div className={styles.detailContent}>
<div className={styles.chartPanel}>
<div className={styles.chartTitle}>Throughput (msg/s)</div>
{tpsSeries[0].data.length > 0 ? (
<LineChart series={tpsSeries} height={160} yLabel="msg/s" />
) : (
<div className={styles.emptyChart}>No data available</div>
)}
</div>
<div className={styles.chartPanel}>
<div className={styles.chartTitle}>Error Rate (%)</div>
{errSeries[0].data.length > 0 ? (
<LineChart series={errSeries} height={160} yLabel="%" />
) : (
<div className={styles.emptyChart}>No data available</div>
)}
</div>
</div>
);
}
// ── AgentHealth page ─────────────────────────────────────────────────────────
export default function AgentHealth() {
const { appId } = useParams();
const { data: agents } = useAgents(undefined, appId);
const { data: events } = useAgentEvents(appId);
const [selectedInstance, setSelectedInstance] = useState<AgentInstance | null>(null);
const [panelOpen, setPanelOpen] = useState(false);
const agentList = agents ?? [];
const groups = useMemo(() => groupByApp(agentList), [agentList]);
// Aggregate stats
const totalInstances = agentList.length;
const liveCount = agentList.filter((a) => normalizeStatus(a.status) === 'live').length;
const staleCount = agentList.filter((a) => normalizeStatus(a.status) === 'stale').length;
const deadCount = agentList.filter((a) => normalizeStatus(a.status) === 'dead').length;
const totalTps = agentList.reduce((s, a) => s + (a.tps ?? 0), 0);
const totalActiveRoutes = agentList.reduce((s, a) => s + (a.activeRoutes ?? 0), 0);
const totalRoutes = agentList.reduce((s, a) => s + (a.totalRoutes ?? 0), 0);
// Map events to FeedEvent
const feedEvents: FeedEvent[] = useMemo(
() =>
(events ?? []).map((e: { id: number; agentId: string; eventType: string; detail: string; timestamp: string }) => ({
id: String(e.id),
severity:
e.eventType === 'WENT_DEAD'
? ('error' as const)
: e.eventType === 'WENT_STALE'
? ('warning' as const)
: e.eventType === 'RECOVERED'
? ('success' as const)
: ('running' as const),
message: `${e.agentId}: ${e.eventType}${e.detail ? ' \u2014 ' + e.detail : ''}`,
timestamp: new Date(e.timestamp),
})),
[events],
);
// Column definitions for the instance DataTable
const instanceColumns: Column<AgentInstance>[] = useMemo(
() => [
{
key: 'status',
header: '',
width: '12px',
render: (_val, row) => <StatusDot variant={normalizeStatus(row.status)} />,
},
{
key: 'name',
header: 'Instance',
render: (_val, row) => (
<MonoText size="sm" className={styles.instanceName}>{row.name ?? row.id}</MonoText>
),
},
{
key: 'state',
header: 'State',
render: (_val, row) => {
const ns = normalizeStatus(row.status);
return <Badge label={row.status} color={statusColor(ns)} variant="filled" />;
},
},
{
key: 'uptime',
header: 'Uptime',
render: (_val, row) => (
<MonoText size="xs" className={styles.instanceMeta}>{formatUptime(row.uptimeSeconds)}</MonoText>
),
},
{
key: 'tps',
header: 'TPS',
render: (_val, row) => (
<MonoText size="xs" className={styles.instanceMeta}>
{row.tps != null ? `${row.tps.toFixed(1)}/s` : '\u2014'}
</MonoText>
),
},
{
key: 'errorRate',
header: 'Errors',
render: (_val, row) => (
<MonoText size="xs" className={row.errorRate ? styles.instanceError : styles.instanceMeta}>
{formatErrorRate(row.errorRate)}
</MonoText>
),
},
{
key: 'lastHeartbeat',
header: 'Heartbeat',
render: (_val, row) => {
const ns = normalizeStatus(row.status);
return (
<MonoText
size="xs"
className={
ns === 'dead'
? styles.instanceHeartbeatDead
: ns === 'stale'
? styles.instanceHeartbeatStale
: styles.instanceMeta
}
>
{timeAgo(row.lastHeartbeat)}
</MonoText>
);
},
},
],
[],
);
function handleInstanceClick(inst: AgentInstance) {
setSelectedInstance(inst);
setPanelOpen(true);
}
// Detail panel tabs
const detailTabs = selectedInstance
? [
{
label: 'Overview',
value: 'overview',
content: <AgentOverviewContent agent={selectedInstance} />,
},
{
label: 'Performance',
value: 'performance',
content: <AgentPerformanceContent agent={selectedInstance} />,
},
]
: [];
const isFullWidth = !!appId;
return (
<div className={styles.content}>
{/* Stat strip */}
<div className={styles.statStrip}>
<StatCard
label="Total Agents"
value={String(totalInstances)}
accent={deadCount > 0 ? 'warning' : 'amber'}
detail={
<span className={styles.breakdown}>
<span className={styles.bpLive}><StatusDot variant="live" /> {liveCount} live</span>
<span className={styles.bpStale}><StatusDot variant="stale" /> {staleCount} stale</span>
<span className={styles.bpDead}><StatusDot variant="dead" /> {deadCount} dead</span>
</span>
}
/>
<StatCard
label="Applications"
value={String(groups.length)}
accent="running"
detail={
<span className={styles.breakdown}>
<span className={styles.bpLive}>
<StatusDot variant="live" /> {groups.filter((g) => g.deadCount === 0 && g.staleCount === 0).length} healthy
</span>
<span className={styles.bpStale}>
<StatusDot variant="stale" /> {groups.filter((g) => g.staleCount > 0 && g.deadCount === 0).length} degraded
</span>
<span className={styles.bpDead}>
<StatusDot variant="dead" /> {groups.filter((g) => g.deadCount > 0).length} critical
</span>
</span>
}
/>
<StatCard
label="Active Routes"
value={
<span
className={
styles[
totalActiveRoutes === 0
? 'routesError'
: totalActiveRoutes < totalRoutes
? 'routesWarning'
: 'routesSuccess'
]
}
>
{totalActiveRoutes}/{totalRoutes}
</span>
}
accent={totalActiveRoutes === 0 ? 'error' : totalActiveRoutes < totalRoutes ? 'warning' : 'success'}
detail={totalActiveRoutes < totalRoutes ? `${totalRoutes - totalActiveRoutes} suspended` : 'all routes active'}
/>
<StatCard
label="Total TPS"
value={totalTps.toFixed(1)}
accent="amber"
detail="msg/s"
/>
<StatCard
label="Dead"
value={String(deadCount)}
accent={deadCount > 0 ? 'error' : 'success'}
detail={deadCount > 0 ? 'requires attention' : 'all healthy'}
/>
</div>
{/* Scope trail + badges */}
<div className={styles.scopeTrail}>
{appId && (
<>
<Link to="/agents" className={styles.scopeLink}>All Agents</Link>
<span className={styles.scopeSep}>&#9656;</span>
<span className={styles.scopeCurrent}>{appId}</span>
</>
)}
<Badge
label={`${liveCount}/${totalInstances} live`}
color={deadCount > 0 ? 'error' : staleCount > 0 ? 'warning' : 'success'}
variant="filled"
/>
</div>
{/* Group cards grid */}
<div className={isFullWidth ? styles.groupGridSingle : styles.groupGrid}>
{groups.map((group) => (
<GroupCard
key={group.appId}
title={group.appId}
accent={appHealth(group)}
headerRight={
<Badge
label={`${group.liveCount}/${group.instances.length} LIVE`}
color={appHealth(group)}
variant="filled"
/>
}
meta={
<div className={styles.groupMeta}>
<span><strong>{group.totalTps.toFixed(1)}</strong> msg/s</span>
<span><strong>{group.totalActiveRoutes}</strong>/{group.totalRoutes} routes</span>
<span>
<StatusDot
variant={
appHealth(group) === 'success'
? 'live'
: appHealth(group) === 'warning'
? 'stale'
: 'dead'
}
/>
</span>
</div>
}
footer={
group.deadCount > 0 ? (
<div className={styles.alertBanner}>
<span className={styles.alertIcon}>&#9888;</span>
<span>
Single point of failure &mdash;{' '}
{group.deadCount === group.instances.length
? 'no redundancy'
: `${group.deadCount} dead instance${group.deadCount > 1 ? 's' : ''}`}
</span>
</div>
) : undefined
}
>
<DataTable<AgentInstance>
columns={instanceColumns}
data={group.instances}
onRowClick={handleInstanceClick}
selectedId={panelOpen ? selectedInstance?.id : undefined}
pageSize={50}
flush
/>
</GroupCard>
))}
</div>
{/* EventFeed */}
{feedEvents.length > 0 && (
<div className={styles.eventCard}>
<div className={styles.eventCardHeader}>
<span className={styles.sectionTitle}>Timeline</span>
<span className={styles.sectionMeta}>{feedEvents.length} events</span>
</div>
<EventFeed events={feedEvents} maxItems={100} />
</div>
)}
{/* Detail panel */}
{selectedInstance && (
<DetailPanel
open={panelOpen}
onClose={() => {
setPanelOpen(false);
setSelectedInstance(null);
}}
title={selectedInstance.name ?? selectedInstance.id}
tabs={detailTabs}
/>
)}
</div>
);
}