Runtime page (AgentInstance): - Rearrange charts: CPU, Memory, GC (top); Threads, Chunks Exported, Chunks Dropped (bottom). Removes throughput/error charts (belong on Dashboard, not Runtime). - Pass global time range (from/to) to useAgentMetrics — charts now respect the time filter instead of always showing last 60 minutes. - Bottom row (logs + timeline) fills remaining vertical space. Dashboard L3: - Processor metrics section fills remaining vertical space. - Chart x-axis uses timestamps instead of bucket indices. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
440 lines
17 KiB
TypeScript
440 lines
17 KiB
TypeScript
import { useMemo, useState } from 'react';
|
|
import { useParams } from 'react-router';
|
|
import {
|
|
KpiStrip,
|
|
DataTable,
|
|
ThemedChart,
|
|
Area,
|
|
Line,
|
|
ReferenceLine,
|
|
CHART_COLORS,
|
|
Card,
|
|
MonoText,
|
|
Badge,
|
|
} from '@cameleer/design-system';
|
|
import type { KpiItem, Column } from '@cameleer/design-system';
|
|
import { useGlobalFilters } from '@cameleer/design-system';
|
|
import { useExecutionStats, useStatsTimeseries } from '../../api/queries/executions';
|
|
import { useProcessorMetrics } from '../../api/queries/processor-metrics';
|
|
import { useTopErrors, useAppSettings } from '../../api/queries/dashboard';
|
|
import { useEnvironmentStore } from '../../api/environment-store';
|
|
import type { TopError } from '../../api/queries/dashboard';
|
|
import { useDiagramByRoute } from '../../api/queries/diagrams';
|
|
import { ProcessDiagram } from '../../components/ProcessDiagram';
|
|
import {
|
|
formatRelativeTime,
|
|
trendArrow,
|
|
formatThroughput,
|
|
formatSlaCompliance,
|
|
trendIndicator,
|
|
} from './dashboard-utils';
|
|
import styles from './DashboardTab.module.css';
|
|
import tableStyles from '../../styles/table-section.module.css';
|
|
import refreshStyles from '../../styles/refresh-indicator.module.css';
|
|
import rateStyles from '../../styles/rate-colors.module.css';
|
|
|
|
// ── Row types ───────────────────────────────────────────────────────────────
|
|
|
|
interface ProcessorRow {
|
|
id: string;
|
|
processorId: string;
|
|
processorType: string;
|
|
totalCount: number;
|
|
avgDurationMs: number;
|
|
p99DurationMs: number;
|
|
errorRate: number;
|
|
pctTime: number;
|
|
}
|
|
|
|
interface ErrorRow extends TopError {
|
|
id: string;
|
|
}
|
|
|
|
// ── Processor table columns ─────────────────────────────────────────────────
|
|
|
|
const PROCESSOR_COLUMNS: Column<ProcessorRow>[] = [
|
|
{
|
|
key: 'processorId',
|
|
header: 'Processor ID',
|
|
sortable: true,
|
|
render: (_, row) => <MonoText size="sm">{row.processorId}</MonoText>,
|
|
},
|
|
{
|
|
key: 'processorType',
|
|
header: 'Type',
|
|
sortable: true,
|
|
render: (_, row) => <Badge label={row.processorType} color="auto" />,
|
|
},
|
|
{
|
|
key: 'totalCount',
|
|
header: 'Invocations',
|
|
sortable: true,
|
|
render: (_, row) => (
|
|
<MonoText size="sm">{row.totalCount.toLocaleString()}</MonoText>
|
|
),
|
|
},
|
|
{
|
|
key: 'avgDurationMs',
|
|
header: 'Avg(ms)',
|
|
sortable: true,
|
|
render: (_, row) => (
|
|
<MonoText size="sm">{Math.round(row.avgDurationMs)}</MonoText>
|
|
),
|
|
},
|
|
{
|
|
key: 'p99DurationMs',
|
|
header: 'P99(ms)',
|
|
sortable: true,
|
|
render: (_, row) => {
|
|
const cls = row.p99DurationMs > 300
|
|
? rateStyles.rateBad
|
|
: row.p99DurationMs > 200
|
|
? rateStyles.rateWarn
|
|
: rateStyles.rateGood;
|
|
return <MonoText size="sm" className={cls}>{Math.round(row.p99DurationMs)}</MonoText>;
|
|
},
|
|
},
|
|
{
|
|
key: 'errorRate',
|
|
header: 'Error Rate(%)',
|
|
sortable: true,
|
|
render: (_, row) => {
|
|
const pct = row.errorRate * 100;
|
|
const cls = pct > 5 ? rateStyles.rateBad : pct > 1 ? rateStyles.rateWarn : rateStyles.rateGood;
|
|
return <MonoText size="sm" className={cls}>{pct.toFixed(2)}%</MonoText>;
|
|
},
|
|
},
|
|
{
|
|
key: 'pctTime',
|
|
header: '% Time',
|
|
sortable: true,
|
|
render: (_, row) => (
|
|
<MonoText size="sm">{row.pctTime.toFixed(1)}%</MonoText>
|
|
),
|
|
},
|
|
];
|
|
|
|
// ── Error table columns ─────────────────────────────────────────────────────
|
|
|
|
const ERROR_COLUMNS: Column<ErrorRow>[] = [
|
|
{
|
|
key: 'errorType',
|
|
header: 'Error Type',
|
|
sortable: true,
|
|
render: (_, row) => <MonoText size="sm">{row.errorType}</MonoText>,
|
|
},
|
|
{
|
|
key: 'processorId',
|
|
header: 'Processor',
|
|
sortable: true,
|
|
render: (_, row) => (
|
|
<MonoText size="sm">{row.processorId ?? '\u2014'}</MonoText>
|
|
),
|
|
},
|
|
{
|
|
key: 'count',
|
|
header: 'Count',
|
|
sortable: true,
|
|
render: (_, row) => (
|
|
<MonoText size="sm">{row.count.toLocaleString()}</MonoText>
|
|
),
|
|
},
|
|
{
|
|
key: 'trend',
|
|
header: 'Velocity',
|
|
render: (_, row) => (
|
|
<span>{trendArrow(row.trend)} {row.trend}</span>
|
|
),
|
|
},
|
|
{
|
|
key: 'lastSeen',
|
|
header: 'Last Seen',
|
|
sortable: true,
|
|
render: (_, row) => (
|
|
<span>{formatRelativeTime(row.lastSeen)}</span>
|
|
),
|
|
},
|
|
];
|
|
|
|
// ── Build KPI items ─────────────────────────────────────────────────────────
|
|
|
|
function buildKpiItems(
|
|
stats: {
|
|
totalCount: number;
|
|
failedCount: number;
|
|
avgDurationMs: number;
|
|
p99LatencyMs: number;
|
|
activeCount: number;
|
|
prevTotalCount: number;
|
|
prevFailedCount: number;
|
|
prevP99LatencyMs: number;
|
|
} | undefined,
|
|
slaThresholdMs: number,
|
|
bottleneck: { processorId: string; avgMs: number; pct: number } | null,
|
|
throughputSparkline: number[],
|
|
windowSeconds: number,
|
|
): KpiItem[] {
|
|
const totalCount = stats?.totalCount ?? 0;
|
|
const failedCount = stats?.failedCount ?? 0;
|
|
const prevTotalCount = stats?.prevTotalCount ?? 0;
|
|
const p99Ms = stats?.p99LatencyMs ?? 0;
|
|
const avgMs = stats?.avgDurationMs ?? 0;
|
|
|
|
const successRate = totalCount > 0 ? ((totalCount - failedCount) / totalCount) * 100 : 100;
|
|
const slaCompliance = totalCount > 0
|
|
? ((totalCount - failedCount) / totalCount) * 100
|
|
: 100;
|
|
|
|
const throughputTrend = trendIndicator(totalCount, prevTotalCount);
|
|
|
|
return [
|
|
{
|
|
label: 'Throughput',
|
|
value: formatThroughput(totalCount, windowSeconds),
|
|
trend: {
|
|
label: throughputTrend.label,
|
|
variant: throughputTrend.direction === 'up' ? 'success' as const : throughputTrend.direction === 'down' ? 'error' as const : 'muted' as const,
|
|
},
|
|
subtitle: `${totalCount.toLocaleString()} total exchanges`,
|
|
sparkline: throughputSparkline,
|
|
borderColor: 'var(--amber)',
|
|
},
|
|
{
|
|
label: 'Success Rate',
|
|
value: `${successRate.toFixed(2)}%`,
|
|
trend: {
|
|
label: failedCount > 0 ? `${failedCount} failed` : 'No errors',
|
|
variant: successRate >= 99 ? 'success' as const : successRate >= 97 ? 'warning' as const : 'error' as const,
|
|
},
|
|
subtitle: `${totalCount - failedCount} succeeded / ${totalCount.toLocaleString()} total`,
|
|
borderColor: successRate >= 99 ? 'var(--success)' : 'var(--error)',
|
|
},
|
|
{
|
|
label: 'P99 Latency',
|
|
value: `${Math.round(p99Ms)}ms`,
|
|
trend: {
|
|
label: p99Ms > slaThresholdMs ? 'BREACH' : 'OK',
|
|
variant: p99Ms > slaThresholdMs ? 'error' as const : 'success' as const,
|
|
},
|
|
subtitle: `SLA threshold: ${slaThresholdMs}ms \u00B7 Avg: ${Math.round(avgMs)}ms`,
|
|
borderColor: p99Ms > slaThresholdMs ? 'var(--warning)' : 'var(--success)',
|
|
},
|
|
{
|
|
label: 'SLA Compliance',
|
|
value: formatSlaCompliance(slaCompliance),
|
|
trend: {
|
|
label: slaCompliance >= 99.9 ? 'Excellent' : slaCompliance >= 99 ? 'Good' : 'Degraded',
|
|
variant: slaCompliance >= 99 ? 'success' as const : slaCompliance >= 95 ? 'warning' as const : 'error' as const,
|
|
},
|
|
subtitle: `Target: 99.9%`,
|
|
borderColor: slaCompliance >= 99 ? 'var(--success)' : 'var(--warning)',
|
|
},
|
|
{
|
|
label: 'Bottleneck',
|
|
value: bottleneck ? `${Math.round(bottleneck.avgMs)}ms` : '\u2014',
|
|
trend: {
|
|
label: bottleneck ? `${bottleneck.pct.toFixed(1)}% of total` : '\u2014',
|
|
variant: bottleneck && bottleneck.pct > 50 ? 'error' as const : 'muted' as const,
|
|
},
|
|
subtitle: bottleneck
|
|
? `${bottleneck.processorId} \u00B7 ${Math.round(bottleneck.avgMs)}ms \u00B7 ${bottleneck.pct.toFixed(1)}% of total`
|
|
: 'No processor data',
|
|
borderColor: 'var(--running)',
|
|
},
|
|
];
|
|
}
|
|
|
|
// ── Component ───────────────────────────────────────────────────────────────
|
|
|
|
export default function DashboardL3() {
|
|
const [processorView, setProcessorView] = useState<'diagram' | 'table'>('diagram');
|
|
const { appId, routeId } = useParams<{ appId: string; routeId: string }>();
|
|
const selectedEnv = useEnvironmentStore((s) => s.environment);
|
|
const { timeRange } = useGlobalFilters();
|
|
const timeFrom = timeRange.start.toISOString();
|
|
const timeTo = timeRange.end.toISOString();
|
|
const windowSeconds = (timeRange.end.getTime() - timeRange.start.getTime()) / 1000;
|
|
|
|
// ── Data hooks ──────────────────────────────────────────────────────────
|
|
const { data: stats } = useExecutionStats(timeFrom, timeTo, routeId, appId, selectedEnv);
|
|
const { data: timeseries } = useStatsTimeseries(timeFrom, timeTo, routeId, appId, selectedEnv);
|
|
const { data: processorMetrics } = useProcessorMetrics(routeId ?? null, appId, selectedEnv);
|
|
const { data: topErrors } = useTopErrors(timeFrom, timeTo, appId, routeId, selectedEnv);
|
|
const { data: diagramLayout } = useDiagramByRoute(appId, routeId);
|
|
const { data: appSettings } = useAppSettings(appId);
|
|
|
|
const slaThresholdMs = appSettings?.slaThresholdMs ?? 300;
|
|
|
|
// ── Bottleneck (processor with highest avgDurationMs) ───────────────────
|
|
const bottleneck = useMemo(() => {
|
|
if (!processorMetrics?.length) return null;
|
|
const routeAvg = stats?.avgDurationMs ?? 0;
|
|
const sorted = [...processorMetrics].sort(
|
|
(a: any, b: any) => b.avgDurationMs - a.avgDurationMs,
|
|
);
|
|
const top = sorted[0];
|
|
const pct = routeAvg > 0 ? (top.avgDurationMs / routeAvg) * 100 : 0;
|
|
return { processorId: top.processorId, avgMs: top.avgDurationMs, pct };
|
|
}, [processorMetrics, stats]);
|
|
|
|
// ── Sparklines from timeseries ──────────────────────────────────────────
|
|
const throughputSparkline = useMemo(
|
|
() => (timeseries?.buckets || []).map((b: any) => b.totalCount),
|
|
[timeseries],
|
|
);
|
|
|
|
// ── KPI strip ───────────────────────────────────────────────────────────
|
|
const kpiItems = useMemo(
|
|
() => buildKpiItems(stats, slaThresholdMs, bottleneck, throughputSparkline, windowSeconds),
|
|
[stats, slaThresholdMs, bottleneck, throughputSparkline, windowSeconds],
|
|
);
|
|
|
|
// ── Chart data ───────────────────────────────────────────────────────────
|
|
const chartData = useMemo(() =>
|
|
(timeseries?.buckets || []).map((b: any) => ({
|
|
time: b.time,
|
|
throughput: b.totalCount,
|
|
p99: b.p99DurationMs,
|
|
errorRate: b.totalCount > 0 ? (b.failedCount / b.totalCount) * 100 : 0,
|
|
})),
|
|
[timeseries],
|
|
);
|
|
|
|
const formatTime = (t: string) =>
|
|
new Date(t).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
|
|
|
|
// ── Processor table rows ────────────────────────────────────────────────
|
|
const processorRows: ProcessorRow[] = useMemo(() => {
|
|
if (!processorMetrics?.length) return [];
|
|
const routeAvg = stats?.avgDurationMs ?? 0;
|
|
return processorMetrics.map((m: any) => ({
|
|
id: m.processorId,
|
|
processorId: m.processorId,
|
|
processorType: m.processorType,
|
|
totalCount: m.totalCount,
|
|
avgDurationMs: m.avgDurationMs,
|
|
p99DurationMs: m.p99DurationMs,
|
|
errorRate: m.errorRate,
|
|
pctTime: routeAvg > 0 ? (m.avgDurationMs / routeAvg) * 100 : 0,
|
|
}));
|
|
}, [processorMetrics, stats]);
|
|
|
|
// ── Latency heatmap for ProcessDiagram ──────────────────────────────────
|
|
const latencyHeatmap = useMemo(() => {
|
|
if (!processorMetrics?.length) return new Map();
|
|
const totalAvg = processorMetrics.reduce(
|
|
(sum: number, m: any) => sum + m.avgDurationMs, 0,
|
|
);
|
|
const map = new Map<string, { avgDurationMs: number; p99DurationMs: number; pctOfRoute: number; processorType?: string; totalCount?: number; errorRate?: number }>();
|
|
for (const m of processorMetrics) {
|
|
map.set(m.processorId, {
|
|
avgDurationMs: m.avgDurationMs,
|
|
p99DurationMs: m.p99DurationMs,
|
|
pctOfRoute: totalAvg > 0 ? (m.avgDurationMs / totalAvg) * 100 : 0,
|
|
processorType: m.processorType,
|
|
totalCount: m.totalCount,
|
|
errorRate: m.errorRate,
|
|
});
|
|
}
|
|
return map;
|
|
}, [processorMetrics]);
|
|
|
|
// ── Error table rows ────────────────────────────────────────────────────
|
|
const errorRows: ErrorRow[] = useMemo(
|
|
() => (topErrors || []).map((e, i) => ({ ...e, id: `${e.errorType}-${i}` })),
|
|
[topErrors],
|
|
);
|
|
|
|
return (
|
|
<div className={styles.content}>
|
|
<div className={refreshStyles.refreshIndicator}>
|
|
<span className={refreshStyles.refreshDot} />
|
|
<span className={refreshStyles.refreshText}>Auto-refresh: 30s</span>
|
|
</div>
|
|
|
|
{/* KPI Strip */}
|
|
<KpiStrip items={kpiItems} />
|
|
|
|
{/* Charts — 3 in a row */}
|
|
{(timeseries?.buckets?.length ?? 0) > 0 && (
|
|
<div className={styles.chartRow}>
|
|
<Card title="Throughput">
|
|
<ThemedChart data={chartData} height={200} xDataKey="time" xTickFormatter={formatTime} yLabel="msg/s">
|
|
<Area dataKey="throughput" name="Throughput" stroke={CHART_COLORS[0]}
|
|
fill={CHART_COLORS[0]} fillOpacity={0.1} strokeWidth={2} dot={false} />
|
|
</ThemedChart>
|
|
</Card>
|
|
|
|
<Card title="Latency Percentiles">
|
|
<ThemedChart data={chartData} height={200} xDataKey="time" xTickFormatter={formatTime} yLabel="ms">
|
|
<Line dataKey="p99" name="P99" stroke={CHART_COLORS[0]} strokeWidth={2} dot={false} />
|
|
<ReferenceLine y={slaThresholdMs} stroke="var(--error)" strokeDasharray="5 3"
|
|
label={{ value: `SLA ${slaThresholdMs}ms`, position: 'right', fill: 'var(--error)', fontSize: 9 }} />
|
|
</ThemedChart>
|
|
</Card>
|
|
|
|
<Card title="Error Rate">
|
|
<ThemedChart data={chartData} height={200} xDataKey="time" xTickFormatter={formatTime} yLabel="%">
|
|
<Area dataKey="errorRate" name="Error Rate" stroke={CHART_COLORS[1]}
|
|
fill={CHART_COLORS[1]} fillOpacity={0.1} strokeWidth={2} dot={false} />
|
|
</ThemedChart>
|
|
</Card>
|
|
</div>
|
|
)}
|
|
|
|
{/* Processor Metrics — toggle between diagram and table */}
|
|
<div className={`${tableStyles.tableSection} ${styles.processorSection}`}>
|
|
<div className={tableStyles.tableHeader}>
|
|
<span className={tableStyles.tableTitle}>Processor Metrics</span>
|
|
<div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
|
|
<span className={tableStyles.tableMeta}>
|
|
{processorRows.length} processor{processorRows.length !== 1 ? 's' : ''}
|
|
</span>
|
|
<div className={styles.toggleRow}>
|
|
<button
|
|
className={`${styles.toggleBtn} ${processorView === 'diagram' ? styles.toggleActive : ''}`}
|
|
onClick={() => setProcessorView('diagram')}
|
|
>Diagram</button>
|
|
<button
|
|
className={`${styles.toggleBtn} ${processorView === 'table' ? styles.toggleActive : ''}`}
|
|
onClick={() => setProcessorView('table')}
|
|
>Table</button>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
{processorView === 'diagram' && appId && routeId ? (
|
|
<div className={styles.diagramHeight}>
|
|
<ProcessDiagram
|
|
application={appId}
|
|
routeId={routeId}
|
|
diagramLayout={diagramLayout}
|
|
latencyHeatmap={latencyHeatmap}
|
|
/>
|
|
</div>
|
|
) : (
|
|
<DataTable
|
|
columns={PROCESSOR_COLUMNS}
|
|
data={processorRows}
|
|
sortable
|
|
/>
|
|
)}
|
|
</div>
|
|
|
|
{/* Top 5 Errors — hidden if empty */}
|
|
{errorRows.length > 0 && (
|
|
<div className={tableStyles.tableSection}>
|
|
<div className={tableStyles.tableHeader}>
|
|
<span className={tableStyles.tableTitle}>Top 5 Errors</span>
|
|
<Badge label={`${errorRows.length}`} color="error" />
|
|
</div>
|
|
<DataTable
|
|
columns={ERROR_COLUMNS}
|
|
data={errorRows}
|
|
sortable
|
|
/>
|
|
</div>
|
|
)}
|
|
</div>
|
|
);
|
|
}
|