feat(clickhouse): add ClickHouseStatsStore with -Merge aggregate queries
Implements StatsStore interface for ClickHouse using AggregatingMergeTree tables with -Merge combinators (countMerge, countIfMerge, sumMerge, quantileMerge). Uses literal SQL for aggregate table queries to avoid ClickHouse JDBC driver PreparedStatement issues with AggregateFunction columns. Raw table queries (SLA, topErrors, activeErrorTypes) use normal prepared statements. Includes 13 integration tests covering stats, timeseries, grouped timeseries, SLA compliance, SLA counts by app/route, top errors, active error types, punchcard, and processor stats. Also fixes AggregateFunction type signatures in V4 DDL (count() takes no args, countIf takes UInt8). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,375 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.search.ExecutionStats;
|
||||
import com.cameleer3.server.core.search.StatsTimeseries;
|
||||
import com.cameleer3.server.core.search.TopError;
|
||||
import com.cameleer3.server.core.storage.StatsStore.PunchcardCell;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseStatsStoreIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseStatsStore store;
|
||||
|
||||
// base time: 2026-03-31T10:00:00Z (a Tuesday)
|
||||
private static final Instant BASE = Instant.parse("2026-03-31T10:00:00Z");
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
// Load DDL from classpath resources
|
||||
String executionsDdl = new ClassPathResource("clickhouse/V2__executions.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
String processorsDdl = new ClassPathResource("clickhouse/V3__processor_executions.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
String statsDdl = new ClassPathResource("clickhouse/V4__stats_tables_and_mvs.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
|
||||
jdbc.execute(executionsDdl);
|
||||
jdbc.execute(processorsDdl);
|
||||
|
||||
// Drop MVs first (they reference the stats tables), then recreate everything
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_all_mv");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_app_mv");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_route_mv");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_processor_mv");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_processor_detail_mv");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_all");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_app");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_route");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_processor");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_processor_detail");
|
||||
|
||||
// Strip SQL line comments first (they may contain semicolons),
|
||||
// then split by ';' and execute non-empty statements.
|
||||
String cleanedDdl = statsDdl.replaceAll("--[^\n]*", "");
|
||||
for (String stmt : cleanedDdl.split(";")) {
|
||||
String trimmed = stmt.trim();
|
||||
if (!trimmed.isEmpty()) {
|
||||
jdbc.execute(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
// Truncate base tables
|
||||
jdbc.execute("TRUNCATE TABLE executions");
|
||||
jdbc.execute("TRUNCATE TABLE processor_executions");
|
||||
|
||||
seedTestData();
|
||||
|
||||
// Try the failing query to capture it in query_log, then check
|
||||
try {
|
||||
jdbc.queryForMap(
|
||||
"SELECT countMerge(total_count) AS tc, countIfMerge(failed_count) AS fc, " +
|
||||
"sumMerge(duration_sum) / greatest(countMerge(total_count), 1) AS avg, " +
|
||||
"quantileMerge(0.99)(p99_duration) AS p99, " +
|
||||
"countIfMerge(running_count) AS rc " +
|
||||
"FROM stats_1m_all WHERE tenant_id = 'default' " +
|
||||
"AND bucket >= '2026-03-31 09:59:00' AND bucket < '2026-03-31 10:05:00'");
|
||||
} catch (Exception e) {
|
||||
System.out.println("Expected error: " + e.getMessage().substring(0, 80));
|
||||
}
|
||||
|
||||
jdbc.execute("SYSTEM FLUSH LOGS");
|
||||
// Get ALL recent queries to see what the driver sends
|
||||
var queryLog = jdbc.queryForList(
|
||||
"SELECT type, substring(query, 1, 200) AS q " +
|
||||
"FROM system.query_log WHERE event_time > now() - 30 " +
|
||||
"AND query NOT LIKE '%system.query_log%' AND query NOT LIKE '%FLUSH%' " +
|
||||
"ORDER BY event_time DESC LIMIT 20");
|
||||
for (var entry : queryLog) {
|
||||
System.out.println("LOG: " + entry.get("type") + " | " + entry.get("q"));
|
||||
}
|
||||
|
||||
store = new ClickHouseStatsStore(jdbc);
|
||||
}
|
||||
|
||||
private void seedTestData() {
|
||||
// 10 executions across 2 apps, 2 routes, spanning 5 minutes
|
||||
// app-1, route-a: 4 COMPLETED (200ms, 300ms, 400ms, 500ms)
|
||||
insertExecution("exec-01", BASE.plusSeconds(0), "app-1", "route-a", "agent-1",
|
||||
"COMPLETED", 200L, "", "");
|
||||
insertExecution("exec-02", BASE.plusSeconds(60), "app-1", "route-a", "agent-1",
|
||||
"COMPLETED", 300L, "", "");
|
||||
insertExecution("exec-03", BASE.plusSeconds(120), "app-1", "route-a", "agent-1",
|
||||
"COMPLETED", 400L, "", "");
|
||||
insertExecution("exec-04", BASE.plusSeconds(180), "app-1", "route-a", "agent-1",
|
||||
"COMPLETED", 500L, "", "");
|
||||
|
||||
// app-1, route-a: 2 FAILED (100ms, 150ms) with error_type="NPE"
|
||||
insertExecution("exec-05", BASE.plusSeconds(60), "app-1", "route-a", "agent-1",
|
||||
"FAILED", 100L, "NPE", "null ref");
|
||||
insertExecution("exec-06", BASE.plusSeconds(120), "app-1", "route-a", "agent-1",
|
||||
"FAILED", 150L, "NPE", "null ref");
|
||||
|
||||
// app-1, route-b: 2 COMPLETED (50ms, 60ms)
|
||||
insertExecution("exec-07", BASE.plusSeconds(60), "app-1", "route-b", "agent-1",
|
||||
"COMPLETED", 50L, "", "");
|
||||
insertExecution("exec-08", BASE.plusSeconds(120), "app-1", "route-b", "agent-1",
|
||||
"COMPLETED", 60L, "", "");
|
||||
|
||||
// app-2, route-c: 1 COMPLETED (1000ms)
|
||||
insertExecution("exec-09", BASE.plusSeconds(60), "app-2", "route-c", "agent-2",
|
||||
"COMPLETED", 1000L, "", "");
|
||||
|
||||
// app-2, route-c: 1 RUNNING (null duration)
|
||||
insertExecution("exec-10", BASE.plusSeconds(180), "app-2", "route-c", "agent-2",
|
||||
"RUNNING", null, "", "");
|
||||
|
||||
// 5 processor records for processor stats testing
|
||||
// app-1, route-a, processor_type="to": 3 COMPLETED
|
||||
insertProcessor("exec-01", 1, "proc-to-1", "to", BASE.plusSeconds(0),
|
||||
"app-1", "route-a", "COMPLETED", 50L);
|
||||
insertProcessor("exec-02", 1, "proc-to-2", "to", BASE.plusSeconds(60),
|
||||
"app-1", "route-a", "COMPLETED", 80L);
|
||||
insertProcessor("exec-03", 1, "proc-to-3", "to", BASE.plusSeconds(120),
|
||||
"app-1", "route-a", "COMPLETED", 90L);
|
||||
|
||||
// app-1, route-a, processor_type="log": 2 COMPLETED
|
||||
insertProcessor("exec-01", 2, "proc-log-1", "log", BASE.plusSeconds(1),
|
||||
"app-1", "route-a", "COMPLETED", 10L);
|
||||
insertProcessor("exec-02", 2, "proc-log-2", "log", BASE.plusSeconds(61),
|
||||
"app-1", "route-a", "COMPLETED", 15L);
|
||||
}
|
||||
|
||||
private void insertExecution(String executionId, Instant startTime, String appName,
|
||||
String routeId, String agentId, String status,
|
||||
Long durationMs, String errorType, String errorMessage) {
|
||||
jdbc.update(
|
||||
"INSERT INTO executions (tenant_id, execution_id, start_time, route_id, " +
|
||||
"agent_id, application_name, status, duration_ms, error_type, error_message) " +
|
||||
"VALUES ('default', ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
executionId, Timestamp.from(startTime), routeId, agentId, appName,
|
||||
status, durationMs, errorType, errorMessage);
|
||||
}
|
||||
|
||||
private void insertProcessor(String executionId, int seq, String processorId,
|
||||
String processorType, Instant startTime,
|
||||
String appName, String routeId, String status,
|
||||
Long durationMs) {
|
||||
jdbc.update(
|
||||
"INSERT INTO processor_executions (tenant_id, execution_id, seq, processor_id, " +
|
||||
"processor_type, start_time, route_id, application_name, status, duration_ms) " +
|
||||
"VALUES ('default', ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
executionId, seq, processorId, processorType, Timestamp.from(startTime),
|
||||
routeId, appName, status, durationMs);
|
||||
}
|
||||
|
||||
// ── Stats Tests ──────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void stats_returnsCorrectGlobalTotals() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
ExecutionStats stats = store.stats(from, to);
|
||||
|
||||
assertThat(stats.totalCount()).isEqualTo(10);
|
||||
assertThat(stats.failedCount()).isEqualTo(2);
|
||||
assertThat(stats.activeCount()).isEqualTo(1);
|
||||
assertThat(stats.avgDurationMs()).isGreaterThan(0);
|
||||
assertThat(stats.p99LatencyMs()).isGreaterThan(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
void statsForApp_filtersCorrectly() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
ExecutionStats app1 = store.statsForApp(from, to, "app-1");
|
||||
assertThat(app1.totalCount()).isEqualTo(8);
|
||||
|
||||
ExecutionStats app2 = store.statsForApp(from, to, "app-2");
|
||||
assertThat(app2.totalCount()).isEqualTo(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
void statsForRoute_filtersCorrectly() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
ExecutionStats routeA = store.statsForRoute(from, to, "route-a", List.of());
|
||||
assertThat(routeA.totalCount()).isEqualTo(6);
|
||||
}
|
||||
|
||||
// ── Timeseries Tests ─────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void timeseries_returnsBuckets() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
StatsTimeseries ts = store.timeseries(from, to, 5);
|
||||
|
||||
assertThat(ts.buckets()).isNotEmpty();
|
||||
long totalAcrossBuckets = ts.buckets().stream()
|
||||
.mapToLong(StatsTimeseries.TimeseriesBucket::totalCount).sum();
|
||||
assertThat(totalAcrossBuckets).isEqualTo(10);
|
||||
}
|
||||
|
||||
@Test
|
||||
void timeseriesForApp_filtersCorrectly() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
StatsTimeseries ts = store.timeseriesForApp(from, to, 5, "app-1");
|
||||
|
||||
long totalAcrossBuckets = ts.buckets().stream()
|
||||
.mapToLong(StatsTimeseries.TimeseriesBucket::totalCount).sum();
|
||||
assertThat(totalAcrossBuckets).isEqualTo(8);
|
||||
}
|
||||
|
||||
@Test
|
||||
void timeseriesGroupedByApp_returnsMap() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
Map<String, StatsTimeseries> grouped = store.timeseriesGroupedByApp(from, to, 5);
|
||||
|
||||
assertThat(grouped).containsKeys("app-1", "app-2");
|
||||
}
|
||||
|
||||
@Test
|
||||
void timeseriesGroupedByRoute_returnsMap() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
Map<String, StatsTimeseries> grouped = store.timeseriesGroupedByRoute(from, to, 5, "app-1");
|
||||
|
||||
assertThat(grouped).containsKeys("route-a", "route-b");
|
||||
}
|
||||
|
||||
// ── SLA Tests ────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void slaCompliance_calculatesCorrectly() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
// threshold=250ms: among 9 non-RUNNING executions:
|
||||
// compliant (<=250ms): exec-01(200), exec-05(100), exec-06(150), exec-07(50), exec-08(60) = 5
|
||||
// total non-running: 9
|
||||
// compliance = 5/9 * 100 ~ 55.56%
|
||||
double sla = store.slaCompliance(from, to, 250, null, null);
|
||||
assertThat(sla).isBetween(55.0, 56.0);
|
||||
}
|
||||
|
||||
// ── Top Errors Tests ─────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void topErrors_returnsRankedErrors() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
List<TopError> errors = store.topErrors(from, to, null, null, 10);
|
||||
|
||||
assertThat(errors).isNotEmpty();
|
||||
assertThat(errors.get(0).errorType()).isEqualTo("NPE");
|
||||
assertThat(errors.get(0).count()).isEqualTo(2);
|
||||
}
|
||||
|
||||
// ── Active Error Types Test ──────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void activeErrorTypes_countsDistinct() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
int count = store.activeErrorTypes(from, to, "app-1");
|
||||
|
||||
assertThat(count).isEqualTo(1); // only "NPE"
|
||||
}
|
||||
|
||||
// ── Punchcard Test ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void punchcard_returnsWeekdayHourCells() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
List<PunchcardCell> cells = store.punchcard(from, to, null);
|
||||
|
||||
assertThat(cells).isNotEmpty();
|
||||
long totalCount = cells.stream().mapToLong(PunchcardCell::totalCount).sum();
|
||||
assertThat(totalCount).isEqualTo(10);
|
||||
}
|
||||
|
||||
@Test
|
||||
void slaCountsByApp_returnsMap() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
// threshold=250ms
|
||||
Map<String, long[]> counts = store.slaCountsByApp(from, to, 250);
|
||||
|
||||
assertThat(counts).containsKeys("app-1", "app-2");
|
||||
// app-1: 8 total executions, all non-RUNNING
|
||||
// compliant (<=250ms): exec-01(200), exec-05(100), exec-06(150), exec-07(50), exec-08(60) = 5
|
||||
long[] app1 = counts.get("app-1");
|
||||
assertThat(app1[0]).isEqualTo(5); // compliant
|
||||
assertThat(app1[1]).isEqualTo(8); // total non-running
|
||||
// app-2: 1 COMPLETED(1000ms) + 1 RUNNING → 1 non-RUNNING, 0 compliant
|
||||
long[] app2 = counts.get("app-2");
|
||||
assertThat(app2[0]).isEqualTo(0); // compliant
|
||||
assertThat(app2[1]).isEqualTo(1); // total non-running
|
||||
}
|
||||
|
||||
@Test
|
||||
void slaCountsByRoute_returnsMap() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
Map<String, long[]> counts = store.slaCountsByRoute(from, to, "app-1", 250);
|
||||
|
||||
assertThat(counts).containsKeys("route-a", "route-b");
|
||||
// route-a: exec-01(200)OK, exec-02(300)NO, exec-03(400)NO, exec-04(500)NO,
|
||||
// exec-05(100)OK, exec-06(150)OK → 3 compliant, 6 total
|
||||
long[] routeA = counts.get("route-a");
|
||||
assertThat(routeA[0]).isEqualTo(3); // compliant
|
||||
assertThat(routeA[1]).isEqualTo(6); // total
|
||||
// route-b: exec-07(50)OK, exec-08(60)OK → 2 compliant, 2 total
|
||||
long[] routeB = counts.get("route-b");
|
||||
assertThat(routeB[0]).isEqualTo(2);
|
||||
assertThat(routeB[1]).isEqualTo(2);
|
||||
}
|
||||
|
||||
// ── Processor Stats Test ─────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void statsForProcessor_filtersCorrectly() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
ExecutionStats toStats = store.statsForProcessor(from, to, "route-a", "to");
|
||||
assertThat(toStats.totalCount()).isEqualTo(3);
|
||||
assertThat(toStats.activeCount()).isEqualTo(0); // processor stats have no running_count
|
||||
|
||||
ExecutionStats logStats = store.statsForProcessor(from, to, "route-a", "log");
|
||||
assertThat(logStats.totalCount()).isEqualTo(2);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user