fix(stats): close 8 ClickHouseStatsStoreIT TZ failures (bucket DateTime('UTC') + JVM UTC pin)
Two-layer fix for the TZ drift that caused stats reads to miss every row
when the JVM default TZ and CH session TZ disagreed:
- Insert side: ClickHouse JDBC 0.9.7 formats java.sql.Timestamp via
Timestamp.toString(), which uses JVM default TZ. A CEST JVM shipping
to a UTC CH server stored Unix timestamps off by the TZ offset (the
triage report's original symptom). Pinned JVM default to UTC in
CameleerServerApplication.main() — standard practice for observability
servers that push to time-series stores.
- Read side: stats_1m_* tables now declare bucket as DateTime('UTC'),
MV SELECTs wrap toStartOfMinute(start_time) in toDateTime(..., 'UTC')
so projections match column type, and ClickHouseStatsStore.lit(Instant)
emits toDateTime('...', 'UTC') rather than a bare literal — defence
in depth against future refactors.
Test class pins its own JVM TZ (the store IT builds its own
HikariDataSource, bypassing the main() path). Debug scaffolding from
the triage investigation removed.
Greenfield CH — no migration needed.
Verified: 14/14 ClickHouseStatsStoreIT green, plus 84/84 across all
ClickHouse IT classes (no regression from the JVM TZ default change).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,8 @@ import org.springframework.boot.context.properties.EnableConfigurationProperties
|
||||
import org.springframework.scheduling.annotation.EnableAsync;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
|
||||
import java.util.TimeZone;
|
||||
|
||||
/**
|
||||
* Main entry point for the Cameleer Server application.
|
||||
* <p>
|
||||
@@ -23,6 +25,11 @@ import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
public class CameleerServerApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
// Pin JVM default TZ to UTC. The ClickHouse JDBC driver formats
|
||||
// java.sql.Timestamp via toString() which uses JVM default TZ; a
|
||||
// non-UTC JVM would then send CH timestamps off by the TZ offset.
|
||||
// Standard practice for observability servers.
|
||||
TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
|
||||
SpringApplication.run(CameleerServerApplication.class, args);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -338,15 +338,15 @@ public class ClickHouseStatsStore implements StatsStore {
|
||||
private record Filter(String column, String value) {}
|
||||
|
||||
/**
|
||||
* Format an Instant as a ClickHouse DateTime literal.
|
||||
* Uses java.sql.Timestamp to match the JVM-ClickHouse timezone convention
|
||||
* used by the JDBC driver, then truncates to second precision for DateTime
|
||||
* column compatibility.
|
||||
* Format an Instant as a ClickHouse DateTime literal explicitly typed in UTC.
|
||||
* The explicit `toDateTime(..., 'UTC')` cast avoids depending on the session
|
||||
* timezone matching the `bucket DateTime('UTC')` column type.
|
||||
*/
|
||||
private static String lit(Instant instant) {
|
||||
return "'" + java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
|
||||
String raw = java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
|
||||
.withZone(java.time.ZoneOffset.UTC)
|
||||
.format(instant.truncatedTo(ChronoUnit.SECONDS)) + "'";
|
||||
.format(instant.truncatedTo(ChronoUnit.SECONDS));
|
||||
return "toDateTime('" + raw + "', 'UTC')";
|
||||
}
|
||||
|
||||
/** Format a string as a ClickHouse SQL literal with backslash + quote escaping. */
|
||||
|
||||
@@ -132,7 +132,7 @@ SETTINGS index_granularity = 8192;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS stats_1m_all (
|
||||
tenant_id LowCardinality(String),
|
||||
bucket DateTime,
|
||||
bucket DateTime('UTC'),
|
||||
environment LowCardinality(String) DEFAULT 'default',
|
||||
total_count AggregateFunction(uniq, String),
|
||||
failed_count AggregateFunction(uniqIf, String, UInt8),
|
||||
@@ -149,7 +149,7 @@ TTL bucket + INTERVAL 365 DAY DELETE;
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_all_mv TO stats_1m_all AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
toDateTime(toStartOfMinute(start_time), 'UTC') AS bucket,
|
||||
environment,
|
||||
uniqState(execution_id) AS total_count,
|
||||
uniqIfState(execution_id, status = 'FAILED') AS failed_count,
|
||||
@@ -165,7 +165,7 @@ GROUP BY tenant_id, bucket, environment;
|
||||
CREATE TABLE IF NOT EXISTS stats_1m_app (
|
||||
tenant_id LowCardinality(String),
|
||||
application_id LowCardinality(String),
|
||||
bucket DateTime,
|
||||
bucket DateTime('UTC'),
|
||||
environment LowCardinality(String) DEFAULT 'default',
|
||||
total_count AggregateFunction(uniq, String),
|
||||
failed_count AggregateFunction(uniqIf, String, UInt8),
|
||||
@@ -183,7 +183,7 @@ CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_app_mv TO stats_1m_app AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
application_id,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
toDateTime(toStartOfMinute(start_time), 'UTC') AS bucket,
|
||||
environment,
|
||||
uniqState(execution_id) AS total_count,
|
||||
uniqIfState(execution_id, status = 'FAILED') AS failed_count,
|
||||
@@ -200,7 +200,7 @@ CREATE TABLE IF NOT EXISTS stats_1m_route (
|
||||
tenant_id LowCardinality(String),
|
||||
application_id LowCardinality(String),
|
||||
route_id LowCardinality(String),
|
||||
bucket DateTime,
|
||||
bucket DateTime('UTC'),
|
||||
environment LowCardinality(String) DEFAULT 'default',
|
||||
total_count AggregateFunction(uniq, String),
|
||||
failed_count AggregateFunction(uniqIf, String, UInt8),
|
||||
@@ -219,7 +219,7 @@ SELECT
|
||||
tenant_id,
|
||||
application_id,
|
||||
route_id,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
toDateTime(toStartOfMinute(start_time), 'UTC') AS bucket,
|
||||
environment,
|
||||
uniqState(execution_id) AS total_count,
|
||||
uniqIfState(execution_id, status = 'FAILED') AS failed_count,
|
||||
@@ -236,7 +236,7 @@ CREATE TABLE IF NOT EXISTS stats_1m_processor (
|
||||
tenant_id LowCardinality(String),
|
||||
application_id LowCardinality(String),
|
||||
processor_type LowCardinality(String),
|
||||
bucket DateTime,
|
||||
bucket DateTime('UTC'),
|
||||
environment LowCardinality(String) DEFAULT 'default',
|
||||
total_count AggregateFunction(uniq, String),
|
||||
failed_count AggregateFunction(uniqIf, String, UInt8),
|
||||
@@ -254,7 +254,7 @@ SELECT
|
||||
tenant_id,
|
||||
application_id,
|
||||
processor_type,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
toDateTime(toStartOfMinute(start_time), 'UTC') AS bucket,
|
||||
environment,
|
||||
uniqState(concat(execution_id, toString(seq))) AS total_count,
|
||||
uniqIfState(concat(execution_id, toString(seq)), status = 'FAILED') AS failed_count,
|
||||
@@ -272,7 +272,7 @@ CREATE TABLE IF NOT EXISTS stats_1m_processor_detail (
|
||||
route_id LowCardinality(String),
|
||||
processor_id String,
|
||||
processor_type LowCardinality(String),
|
||||
bucket DateTime,
|
||||
bucket DateTime('UTC'),
|
||||
environment LowCardinality(String) DEFAULT 'default',
|
||||
total_count AggregateFunction(uniq, String),
|
||||
failed_count AggregateFunction(uniqIf, String, UInt8),
|
||||
@@ -292,7 +292,7 @@ SELECT
|
||||
route_id,
|
||||
processor_id,
|
||||
processor_type,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
toDateTime(toStartOfMinute(start_time), 'UTC') AS bucket,
|
||||
environment,
|
||||
uniqState(concat(execution_id, toString(seq))) AS total_count,
|
||||
uniqIfState(concat(execution_id, toString(seq)), status = 'FAILED') AS failed_count,
|
||||
|
||||
@@ -5,6 +5,7 @@ import com.cameleer.server.core.search.StatsTimeseries;
|
||||
import com.cameleer.server.core.search.TopError;
|
||||
import com.cameleer.server.core.storage.StatsStore.PunchcardCell;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import com.cameleer.server.app.ClickHouseTestHelper;
|
||||
@@ -13,7 +14,6 @@ import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
@@ -34,10 +34,22 @@ class ClickHouseStatsStoreIT {
|
||||
// base time: 2026-03-31T10:00:00Z (a Tuesday)
|
||||
private static final Instant BASE = Instant.parse("2026-03-31T10:00:00Z");
|
||||
|
||||
@BeforeAll
|
||||
static void pinJvmUtc() {
|
||||
// ClickHouse JDBC driver 0.9.x formats java.sql.Timestamp via its
|
||||
// toString(), which uses JVM default TZ. On a non-UTC dev JVM
|
||||
// (e.g. CEST), timestamps were being sent to CH off by the TZ offset
|
||||
// even though the CH server TZ is UTC. Pinning JVM default to UTC
|
||||
// for this test class makes inserts round-trip to the UTC-typed
|
||||
// bucket column predictably.
|
||||
java.util.TimeZone.setDefault(java.util.TimeZone.getTimeZone("UTC"));
|
||||
}
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
// Pin driver to UTC so Timestamp binding doesn't depend on JVM default TZ.
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl() + "?use_server_time_zone=false&use_time_zone=UTC");
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
|
||||
@@ -51,30 +63,6 @@ class ClickHouseStatsStoreIT {
|
||||
|
||||
seedTestData();
|
||||
|
||||
// Try the failing query to capture it in query_log, then check
|
||||
try {
|
||||
jdbc.queryForMap(
|
||||
"SELECT countMerge(total_count) AS tc, countIfMerge(failed_count) AS fc, " +
|
||||
"sumMerge(duration_sum) / greatest(countMerge(total_count), 1) AS avg, " +
|
||||
"quantileMerge(0.99)(p99_duration) AS p99, " +
|
||||
"countIfMerge(running_count) AS rc " +
|
||||
"FROM stats_1m_all WHERE tenant_id = 'default' " +
|
||||
"AND bucket >= '2026-03-31 09:59:00' AND bucket < '2026-03-31 10:05:00'");
|
||||
} catch (Exception e) {
|
||||
System.out.println("Expected error: " + e.getMessage().substring(0, 80));
|
||||
}
|
||||
|
||||
jdbc.execute("SYSTEM FLUSH LOGS");
|
||||
// Get ALL recent queries to see what the driver sends
|
||||
var queryLog = jdbc.queryForList(
|
||||
"SELECT type, substring(query, 1, 200) AS q " +
|
||||
"FROM system.query_log WHERE event_time > now() - 30 " +
|
||||
"AND query NOT LIKE '%system.query_log%' AND query NOT LIKE '%FLUSH%' " +
|
||||
"ORDER BY event_time DESC LIMIT 20");
|
||||
for (var entry : queryLog) {
|
||||
System.out.println("LOG: " + entry.get("type") + " | " + entry.get("q"));
|
||||
}
|
||||
|
||||
store = new ClickHouseStatsStore("default", jdbc);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user