Compare commits
60 Commits
v0.0.2
...
d4dbfa7ae6
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d4dbfa7ae6 | ||
|
|
59374482bc | ||
|
|
43e187a023 | ||
|
|
bc1c71277c | ||
|
|
520181d241 | ||
|
|
95b9dea5c4 | ||
|
|
151b96a680 | ||
|
|
0661fd995f | ||
|
|
190ae2797d | ||
|
|
968117c41a | ||
|
|
7d7eb52afb | ||
|
|
c73e4abf68 | ||
|
|
cd63d300b3 | ||
|
|
f7daadaaa9 | ||
|
|
af080337f5 | ||
|
|
606f81a970 | ||
|
|
154bce366a | ||
|
|
a669df08bd | ||
|
|
af18fc4142 | ||
|
|
1a00eed389 | ||
|
|
0423518f72 | ||
|
|
9df00fdde0 | ||
|
|
052990bb59 | ||
|
|
eb0d26814f | ||
|
|
c8e6bbe059 | ||
|
|
a9eabe97f7 | ||
|
|
e724607a66 | ||
|
|
07f215b0fd | ||
|
|
38551eac9d | ||
|
|
31f7113b3f | ||
|
|
6052407c82 | ||
|
|
776f2ce90d | ||
|
|
62420cf0c2 | ||
|
|
81f7f8afe1 | ||
|
|
b30dfa39f4 | ||
|
|
20c8e17843 | ||
| a96fe59840 | |||
|
|
7cf849269f | ||
| 76afcaa637 | |||
|
|
b1c5cc0616 | ||
| 8838077eff | |||
|
|
8eeaecf6f3 | ||
| b54bef302d | |||
|
|
f8505401d7 | ||
| a0f1a4aba4 | |||
|
|
aa5fc1b830 | ||
|
|
c42e13932b | ||
|
|
59dd629b0e | ||
|
|
697c689192 | ||
|
|
7a2a0ee649 | ||
|
|
1b991f99a3 | ||
|
|
21991b6cf8 | ||
|
|
53766aeb56 | ||
|
|
bf0e9ea418 | ||
|
|
6e30b7ec65 | ||
|
|
08934376df | ||
|
|
23f901279a | ||
|
|
6171827243 | ||
|
|
c77d8a7af0 | ||
|
|
e7eda7a7b3 |
@@ -222,12 +222,21 @@ jobs:
|
||||
--from-literal=AUTHENTIK_SECRET_KEY="${AUTHENTIK_SECRET_KEY}" \
|
||||
--dry-run=client -o yaml | kubectl apply -f -
|
||||
|
||||
kubectl create secret generic clickhouse-credentials \
|
||||
--namespace=cameleer \
|
||||
--from-literal=CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" \
|
||||
--from-literal=CLICKHOUSE_PASSWORD="$CLICKHOUSE_PASSWORD" \
|
||||
--dry-run=client -o yaml | kubectl apply -f -
|
||||
|
||||
kubectl apply -f deploy/postgres.yaml
|
||||
kubectl -n cameleer rollout status statefulset/postgres --timeout=120s
|
||||
|
||||
kubectl apply -f deploy/opensearch.yaml
|
||||
kubectl -n cameleer rollout status statefulset/opensearch --timeout=180s
|
||||
|
||||
kubectl apply -f deploy/clickhouse.yaml
|
||||
kubectl -n cameleer rollout status statefulset/clickhouse --timeout=180s
|
||||
|
||||
kubectl apply -f deploy/authentik.yaml
|
||||
kubectl -n cameleer rollout status deployment/authentik-server --timeout=180s
|
||||
|
||||
@@ -253,6 +262,8 @@ jobs:
|
||||
AUTHENTIK_PG_USER: ${{ secrets.AUTHENTIK_PG_USER }}
|
||||
AUTHENTIK_PG_PASSWORD: ${{ secrets.AUTHENTIK_PG_PASSWORD }}
|
||||
AUTHENTIK_SECRET_KEY: ${{ secrets.AUTHENTIK_SECRET_KEY }}
|
||||
CLICKHOUSE_USER: ${{ secrets.CLICKHOUSE_USER }}
|
||||
CLICKHOUSE_PASSWORD: ${{ secrets.CLICKHOUSE_PASSWORD }}
|
||||
|
||||
deploy-feature:
|
||||
needs: docker
|
||||
@@ -292,7 +303,7 @@ jobs:
|
||||
run: kubectl create namespace "$BRANCH_NS" --dry-run=client -o yaml | kubectl apply -f -
|
||||
- name: Copy secrets from cameleer namespace
|
||||
run: |
|
||||
for SECRET in gitea-registry postgres-credentials opensearch-credentials cameleer-auth; do
|
||||
for SECRET in gitea-registry postgres-credentials opensearch-credentials clickhouse-credentials cameleer-auth; do
|
||||
kubectl get secret "$SECRET" -n cameleer -o json \
|
||||
| jq 'del(.metadata.namespace, .metadata.resourceVersion, .metadata.uid, .metadata.creationTimestamp, .metadata.managedFields)' \
|
||||
| kubectl apply -n "$BRANCH_NS" -f -
|
||||
|
||||
@@ -57,6 +57,12 @@
|
||||
<artifactId>opensearch-rest-client</artifactId>
|
||||
<version>2.19.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.clickhouse</groupId>
|
||||
<artifactId>clickhouse-jdbc</artifactId>
|
||||
<version>0.9.7</version>
|
||||
<classifier>all</classifier>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springdoc</groupId>
|
||||
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||
@@ -126,6 +132,11 @@
|
||||
<version>2.1.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.testcontainers</groupId>
|
||||
<artifactId>testcontainers-clickhouse</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.awaitility</groupId>
|
||||
<artifactId>awaitility</artifactId>
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
package com.cameleer3.server.app.config;
|
||||
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceProperties;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import javax.sql.DataSource;
|
||||
|
||||
@Configuration
|
||||
@EnableConfigurationProperties(ClickHouseProperties.class)
|
||||
@ConditionalOnProperty(name = "clickhouse.enabled", havingValue = "true")
|
||||
public class ClickHouseConfig {
|
||||
|
||||
/**
|
||||
* Explicit primary PG DataSource. Required because adding a second DataSource
|
||||
* (ClickHouse) prevents Spring Boot auto-configuration from creating the default one.
|
||||
*/
|
||||
@Bean
|
||||
@Primary
|
||||
public DataSource dataSource(DataSourceProperties properties) {
|
||||
return properties.initializeDataSourceBuilder().build();
|
||||
}
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public JdbcTemplate jdbcTemplate(@Qualifier("dataSource") DataSource dataSource) {
|
||||
return new JdbcTemplate(dataSource);
|
||||
}
|
||||
|
||||
@Bean(name = "clickHouseDataSource")
|
||||
public DataSource clickHouseDataSource(ClickHouseProperties props) {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(props.getUrl());
|
||||
ds.setUsername(props.getUsername());
|
||||
ds.setPassword(props.getPassword());
|
||||
ds.setMaximumPoolSize(10);
|
||||
ds.setPoolName("clickhouse-pool");
|
||||
return ds;
|
||||
}
|
||||
|
||||
@Bean(name = "clickHouseJdbcTemplate")
|
||||
public JdbcTemplate clickHouseJdbcTemplate(
|
||||
@Qualifier("clickHouseDataSource") DataSource ds) {
|
||||
return new JdbcTemplate(ds);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.cameleer3.server.app.config;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
@ConfigurationProperties(prefix = "clickhouse")
|
||||
public class ClickHouseProperties {
|
||||
|
||||
private String url = "jdbc:clickhouse://localhost:8123/cameleer";
|
||||
private String username = "default";
|
||||
private String password = "";
|
||||
|
||||
public String getUrl() { return url; }
|
||||
public void setUrl(String url) { this.url = url; }
|
||||
|
||||
public String getUsername() { return username; }
|
||||
public void setUsername(String username) { this.username = username; }
|
||||
|
||||
public String getPassword() { return password; }
|
||||
public void setPassword(String password) { this.password = password; }
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
package com.cameleer3.server.app.config;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.boot.context.event.ApplicationReadyEvent;
|
||||
import org.springframework.context.event.EventListener;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
@Component
|
||||
@ConditionalOnProperty(name = "clickhouse.enabled", havingValue = "true")
|
||||
public class ClickHouseSchemaInitializer {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ClickHouseSchemaInitializer.class);
|
||||
|
||||
private final JdbcTemplate clickHouseJdbc;
|
||||
|
||||
public ClickHouseSchemaInitializer(
|
||||
@Qualifier("clickHouseJdbcTemplate") JdbcTemplate clickHouseJdbc) {
|
||||
this.clickHouseJdbc = clickHouseJdbc;
|
||||
}
|
||||
|
||||
@EventListener(ApplicationReadyEvent.class)
|
||||
public void initializeSchema() {
|
||||
try {
|
||||
PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
|
||||
Resource[] scripts = resolver.getResources("classpath:clickhouse/*.sql");
|
||||
|
||||
Arrays.sort(scripts, Comparator.comparing(Resource::getFilename));
|
||||
|
||||
for (Resource script : scripts) {
|
||||
String sql = script.getContentAsString(StandardCharsets.UTF_8);
|
||||
log.info("Executing ClickHouse schema script: {}", script.getFilename());
|
||||
for (String statement : sql.split(";")) {
|
||||
String trimmed = statement.trim();
|
||||
// Skip empty segments and comment-only segments
|
||||
String withoutComments = trimmed.lines()
|
||||
.filter(line -> !line.stripLeading().startsWith("--"))
|
||||
.map(String::trim)
|
||||
.filter(line -> !line.isEmpty())
|
||||
.reduce("", (a, b) -> a + b);
|
||||
if (!withoutComments.isEmpty()) {
|
||||
clickHouseJdbc.execute(trimmed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.info("ClickHouse schema initialization complete ({} scripts)", scripts.length);
|
||||
} catch (Exception e) {
|
||||
log.error("ClickHouse schema initialization failed — server will continue but ClickHouse features may not work", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,10 @@
|
||||
package com.cameleer3.server.app.config;
|
||||
|
||||
import com.cameleer3.server.core.ingestion.ChunkAccumulator;
|
||||
import com.cameleer3.server.core.ingestion.MergedExecution;
|
||||
import com.cameleer3.server.core.ingestion.WriteBuffer;
|
||||
import com.cameleer3.server.core.storage.model.MetricsSnapshot;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@@ -19,4 +22,16 @@ public class IngestionBeanConfig {
|
||||
public WriteBuffer<MetricsSnapshot> metricsBuffer(IngestionConfig config) {
|
||||
return new WriteBuffer<>(config.getBufferCapacity());
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "clickhouse.enabled", havingValue = "true")
|
||||
public WriteBuffer<MergedExecution> executionBuffer(IngestionConfig config) {
|
||||
return new WriteBuffer<>(config.getBufferCapacity());
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "clickhouse.enabled", havingValue = "true")
|
||||
public WriteBuffer<ChunkAccumulator.ProcessorBatch> processorBatchBuffer(IngestionConfig config) {
|
||||
return new WriteBuffer<>(config.getBufferCapacity());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,16 +1,36 @@
|
||||
package com.cameleer3.server.app.config;
|
||||
|
||||
import com.cameleer3.server.app.search.ClickHouseLogStore;
|
||||
import com.cameleer3.server.app.storage.ClickHouseAgentEventRepository;
|
||||
import com.cameleer3.server.app.storage.ClickHouseDiagramStore;
|
||||
import com.cameleer3.server.app.storage.ClickHouseMetricsQueryStore;
|
||||
import com.cameleer3.server.app.storage.ClickHouseMetricsStore;
|
||||
import com.cameleer3.server.app.storage.ClickHouseStatsStore;
|
||||
import com.cameleer3.server.app.storage.PostgresExecutionStore;
|
||||
import com.cameleer3.server.app.storage.PostgresMetricsQueryStore;
|
||||
import com.cameleer3.server.app.storage.PostgresMetricsStore;
|
||||
import com.cameleer3.server.core.admin.AuditRepository;
|
||||
import com.cameleer3.server.core.admin.AuditService;
|
||||
import com.cameleer3.server.core.agent.AgentEventRepository;
|
||||
import com.cameleer3.server.core.detail.DetailService;
|
||||
import com.cameleer3.server.core.indexing.SearchIndexer;
|
||||
import com.cameleer3.server.app.ingestion.ExecutionFlushScheduler;
|
||||
import com.cameleer3.server.app.search.ClickHouseSearchIndex;
|
||||
import com.cameleer3.server.app.storage.ClickHouseExecutionStore;
|
||||
import com.cameleer3.server.core.ingestion.ChunkAccumulator;
|
||||
import com.cameleer3.server.core.ingestion.IngestionService;
|
||||
import com.cameleer3.server.core.ingestion.MergedExecution;
|
||||
import com.cameleer3.server.core.ingestion.WriteBuffer;
|
||||
import com.cameleer3.server.core.storage.*;
|
||||
import com.cameleer3.server.core.storage.LogIndex;
|
||||
import com.cameleer3.server.core.storage.StatsStore;
|
||||
import com.cameleer3.server.core.storage.model.MetricsSnapshot;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
@Configuration
|
||||
public class StorageBeanConfig {
|
||||
@@ -41,4 +61,113 @@ public class StorageBeanConfig {
|
||||
return new IngestionService(executionStore, diagramStore, metricsBuffer,
|
||||
searchIndexer::onExecutionUpdated, bodySizeLimit);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.metrics", havingValue = "clickhouse")
|
||||
public MetricsStore clickHouseMetricsStore(
|
||||
@Qualifier("clickHouseJdbcTemplate") JdbcTemplate clickHouseJdbc) {
|
||||
return new ClickHouseMetricsStore(clickHouseJdbc);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.metrics", havingValue = "postgres", matchIfMissing = true)
|
||||
public MetricsStore postgresMetricsStore(JdbcTemplate jdbc) {
|
||||
return new PostgresMetricsStore(jdbc);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.metrics", havingValue = "clickhouse")
|
||||
public MetricsQueryStore clickHouseMetricsQueryStore(
|
||||
@Qualifier("clickHouseJdbcTemplate") JdbcTemplate clickHouseJdbc) {
|
||||
return new ClickHouseMetricsQueryStore(clickHouseJdbc);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.metrics", havingValue = "postgres", matchIfMissing = true)
|
||||
public MetricsQueryStore postgresMetricsQueryStore(JdbcTemplate jdbc) {
|
||||
return new PostgresMetricsQueryStore(jdbc);
|
||||
}
|
||||
|
||||
// ── Execution Store ──────────────────────────────────────────────────
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.executions", havingValue = "clickhouse", matchIfMissing = true)
|
||||
public ClickHouseExecutionStore clickHouseExecutionStore(
|
||||
@Qualifier("clickHouseJdbcTemplate") JdbcTemplate clickHouseJdbc) {
|
||||
return new ClickHouseExecutionStore(clickHouseJdbc);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.executions", havingValue = "postgres")
|
||||
public ExecutionStore executionStorePostgres(JdbcTemplate jdbc) {
|
||||
return new PostgresExecutionStore(jdbc);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.executions", havingValue = "clickhouse", matchIfMissing = true)
|
||||
public ChunkAccumulator chunkAccumulator(
|
||||
WriteBuffer<MergedExecution> executionBuffer,
|
||||
WriteBuffer<ChunkAccumulator.ProcessorBatch> processorBatchBuffer,
|
||||
DiagramStore diagramStore) {
|
||||
return new ChunkAccumulator(
|
||||
executionBuffer::offer,
|
||||
processorBatchBuffer::offer,
|
||||
diagramStore,
|
||||
java.time.Duration.ofMinutes(5));
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.executions", havingValue = "clickhouse", matchIfMissing = true)
|
||||
public ExecutionFlushScheduler executionFlushScheduler(
|
||||
WriteBuffer<MergedExecution> executionBuffer,
|
||||
WriteBuffer<ChunkAccumulator.ProcessorBatch> processorBatchBuffer,
|
||||
ClickHouseExecutionStore executionStore,
|
||||
ChunkAccumulator accumulator,
|
||||
IngestionConfig config) {
|
||||
return new ExecutionFlushScheduler(executionBuffer, processorBatchBuffer,
|
||||
executionStore, accumulator, config);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.search", havingValue = "clickhouse")
|
||||
public SearchIndex clickHouseSearchIndex(
|
||||
@Qualifier("clickHouseJdbcTemplate") JdbcTemplate clickHouseJdbc) {
|
||||
return new ClickHouseSearchIndex(clickHouseJdbc);
|
||||
}
|
||||
|
||||
// ── ClickHouse Stats Store ─────────────────────────────────────────
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.stats", havingValue = "clickhouse", matchIfMissing = true)
|
||||
public StatsStore clickHouseStatsStore(
|
||||
@Qualifier("clickHouseJdbcTemplate") JdbcTemplate clickHouseJdbc) {
|
||||
return new ClickHouseStatsStore(clickHouseJdbc);
|
||||
}
|
||||
|
||||
// ── ClickHouse Diagram Store ──────────────────────────────────────
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.diagrams", havingValue = "clickhouse", matchIfMissing = true)
|
||||
public DiagramStore clickHouseDiagramStore(
|
||||
@Qualifier("clickHouseJdbcTemplate") JdbcTemplate clickHouseJdbc) {
|
||||
return new ClickHouseDiagramStore(clickHouseJdbc);
|
||||
}
|
||||
|
||||
// ── ClickHouse Agent Event Repository ─────────────────────────────
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.events", havingValue = "clickhouse", matchIfMissing = true)
|
||||
public AgentEventRepository clickHouseAgentEventRepository(
|
||||
@Qualifier("clickHouseJdbcTemplate") JdbcTemplate clickHouseJdbc) {
|
||||
return new ClickHouseAgentEventRepository(clickHouseJdbc);
|
||||
}
|
||||
|
||||
// ── ClickHouse Log Store ──────────────────────────────────────────
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(name = "cameleer.storage.logs", havingValue = "clickhouse", matchIfMissing = true)
|
||||
public LogIndex clickHouseLogStore(
|
||||
@Qualifier("clickHouseJdbcTemplate") JdbcTemplate clickHouseJdbc) {
|
||||
return new ClickHouseLogStore(clickHouseJdbc);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,22 +2,23 @@ package com.cameleer3.server.app.controller;
|
||||
|
||||
import com.cameleer3.server.app.dto.AgentMetricsResponse;
|
||||
import com.cameleer3.server.app.dto.MetricBucket;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import com.cameleer3.server.core.storage.MetricsQueryStore;
|
||||
import com.cameleer3.server.core.storage.model.MetricTimeSeries;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/agents/{agentId}/metrics")
|
||||
public class AgentMetricsController {
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
private final MetricsQueryStore metricsQueryStore;
|
||||
|
||||
public AgentMetricsController(JdbcTemplate jdbc) {
|
||||
this.jdbc = jdbc;
|
||||
public AgentMetricsController(MetricsQueryStore metricsQueryStore) {
|
||||
this.metricsQueryStore = metricsQueryStore;
|
||||
}
|
||||
|
||||
@GetMapping
|
||||
@@ -32,34 +33,18 @@ public class AgentMetricsController {
|
||||
if (to == null) to = Instant.now();
|
||||
|
||||
List<String> metricNames = Arrays.asList(names.split(","));
|
||||
long intervalMs = (to.toEpochMilli() - from.toEpochMilli()) / Math.max(buckets, 1);
|
||||
String intervalStr = intervalMs + " milliseconds";
|
||||
|
||||
Map<String, List<MetricBucket>> result = new LinkedHashMap<>();
|
||||
for (String name : metricNames) {
|
||||
result.put(name.trim(), new ArrayList<>());
|
||||
}
|
||||
Map<String, List<MetricTimeSeries.Bucket>> raw =
|
||||
metricsQueryStore.queryTimeSeries(agentId, metricNames, from, to, buckets);
|
||||
|
||||
String sql = """
|
||||
SELECT time_bucket(CAST(? AS interval), collected_at) AS bucket,
|
||||
metric_name,
|
||||
AVG(metric_value) AS avg_value
|
||||
FROM agent_metrics
|
||||
WHERE agent_id = ?
|
||||
AND collected_at >= ? AND collected_at < ?
|
||||
AND metric_name = ANY(?)
|
||||
GROUP BY bucket, metric_name
|
||||
ORDER BY bucket
|
||||
""";
|
||||
|
||||
String[] namesArray = metricNames.stream().map(String::trim).toArray(String[]::new);
|
||||
jdbc.query(sql, rs -> {
|
||||
String metricName = rs.getString("metric_name");
|
||||
Instant bucket = rs.getTimestamp("bucket").toInstant();
|
||||
double value = rs.getDouble("avg_value");
|
||||
result.computeIfAbsent(metricName, k -> new ArrayList<>())
|
||||
.add(new MetricBucket(bucket, value));
|
||||
}, intervalStr, agentId, Timestamp.from(from), Timestamp.from(to), namesArray);
|
||||
Map<String, List<MetricBucket>> result = raw.entrySet().stream()
|
||||
.collect(Collectors.toMap(
|
||||
Map.Entry::getKey,
|
||||
e -> e.getValue().stream()
|
||||
.map(b -> new MetricBucket(b.time(), b.value()))
|
||||
.toList(),
|
||||
(a, b) -> a,
|
||||
LinkedHashMap::new));
|
||||
|
||||
return new AgentMetricsResponse(result);
|
||||
}
|
||||
|
||||
@@ -255,13 +255,15 @@ public class AgentRegistrationController {
|
||||
Instant now = Instant.now();
|
||||
Instant from1m = now.minus(1, ChronoUnit.MINUTES);
|
||||
try {
|
||||
// Literal SQL — ClickHouse JDBC driver wraps prepared statements in sub-queries
|
||||
// that strip AggregateFunction column types, breaking -Merge combinators
|
||||
jdbc.query(
|
||||
"SELECT application_name, " +
|
||||
"SUM(total_count) AS total, " +
|
||||
"SUM(failed_count) AS failed, " +
|
||||
"countMerge(total_count) AS total, " +
|
||||
"countIfMerge(failed_count) AS failed, " +
|
||||
"COUNT(DISTINCT route_id) AS active_routes " +
|
||||
"FROM stats_1m_route WHERE bucket >= ? AND bucket < ? " +
|
||||
"GROUP BY application_name",
|
||||
"FROM stats_1m_route WHERE bucket >= " + lit(from1m) + " AND bucket < " + lit(now) +
|
||||
" GROUP BY application_name",
|
||||
rs -> {
|
||||
long total = rs.getLong("total");
|
||||
long failed = rs.getLong("failed");
|
||||
@@ -269,11 +271,18 @@ public class AgentRegistrationController {
|
||||
double errorRate = total > 0 ? (double) failed / total : 0.0;
|
||||
int activeRoutes = rs.getInt("active_routes");
|
||||
result.put(rs.getString("application_name"), new double[]{tps, errorRate, activeRoutes});
|
||||
},
|
||||
Timestamp.from(from1m), Timestamp.from(now));
|
||||
});
|
||||
} catch (Exception e) {
|
||||
log.debug("Could not query agent metrics: {}", e.getMessage());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Format an Instant as a ClickHouse DateTime literal. */
|
||||
private static String lit(Instant instant) {
|
||||
Instant truncated = instant.truncatedTo(ChronoUnit.SECONDS);
|
||||
String ts = new Timestamp(truncated.toEpochMilli()).toString();
|
||||
if (ts.endsWith(".0")) ts = ts.substring(0, ts.length() - 2);
|
||||
return "'" + ts + "'";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
package com.cameleer3.server.app.controller;
|
||||
|
||||
import com.cameleer3.server.core.ingestion.ChunkAccumulator;
|
||||
import com.cameleer3.common.model.ExecutionChunk;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnBean;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Ingestion endpoint for execution chunk data (ClickHouse pipeline).
|
||||
* <p>
|
||||
* Accepts single or array {@link ExecutionChunk} payloads and feeds them
|
||||
* into the {@link ChunkAccumulator}. Only active when
|
||||
* {@code clickhouse.enabled=true} (conditional on the accumulator bean).
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/data")
|
||||
@ConditionalOnBean(ChunkAccumulator.class)
|
||||
@Tag(name = "Ingestion", description = "Data ingestion endpoints")
|
||||
public class ChunkIngestionController {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ChunkIngestionController.class);
|
||||
|
||||
private final ChunkAccumulator accumulator;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
public ChunkIngestionController(ChunkAccumulator accumulator) {
|
||||
this.accumulator = accumulator;
|
||||
this.objectMapper = new ObjectMapper();
|
||||
this.objectMapper.registerModule(new JavaTimeModule());
|
||||
this.objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
}
|
||||
|
||||
@PostMapping("/executions")
|
||||
@Operation(summary = "Ingest execution chunk")
|
||||
public ResponseEntity<Void> ingestChunks(@RequestBody String body) {
|
||||
try {
|
||||
String trimmed = body.strip();
|
||||
List<ExecutionChunk> chunks;
|
||||
if (trimmed.startsWith("[")) {
|
||||
chunks = objectMapper.readValue(trimmed, new TypeReference<List<ExecutionChunk>>() {});
|
||||
} else {
|
||||
ExecutionChunk single = objectMapper.readValue(trimmed, ExecutionChunk.class);
|
||||
chunks = List.of(single);
|
||||
}
|
||||
|
||||
for (ExecutionChunk chunk : chunks) {
|
||||
accumulator.onChunk(chunk);
|
||||
}
|
||||
|
||||
return ResponseEntity.accepted().build();
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to parse execution chunk payload: {}", e.getMessage());
|
||||
return ResponseEntity.badRequest().build();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -81,4 +81,16 @@ public class DetailController {
|
||||
.map(ResponseEntity::ok)
|
||||
.orElse(ResponseEntity.notFound().build());
|
||||
}
|
||||
|
||||
@GetMapping("/{executionId}/processors/by-seq/{seq}/snapshot")
|
||||
@Operation(summary = "Get exchange snapshot for a processor by seq number")
|
||||
@ApiResponse(responseCode = "200", description = "Snapshot data")
|
||||
@ApiResponse(responseCode = "404", description = "Snapshot not found")
|
||||
public ResponseEntity<Map<String, String>> processorSnapshotBySeq(
|
||||
@PathVariable String executionId,
|
||||
@PathVariable int seq) {
|
||||
return detailService.getProcessorSnapshotBySeq(executionId, seq)
|
||||
.map(ResponseEntity::ok)
|
||||
.orElse(ResponseEntity.notFound().build());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.cameleer3.server.app.controller;
|
||||
import com.cameleer3.common.model.RouteExecution;
|
||||
import com.cameleer3.server.core.agent.AgentInfo;
|
||||
import com.cameleer3.server.core.agent.AgentRegistryService;
|
||||
import com.cameleer3.server.core.ingestion.ChunkAccumulator;
|
||||
import com.cameleer3.server.core.ingestion.IngestionService;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
@@ -12,6 +13,7 @@ import io.swagger.v3.oas.annotations.responses.ApiResponse;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.security.core.Authentication;
|
||||
import org.springframework.security.core.context.SecurityContextHolder;
|
||||
@@ -23,13 +25,17 @@ import org.springframework.web.bind.annotation.RestController;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Ingestion endpoint for route execution data.
|
||||
* Legacy ingestion endpoint for route execution data (PostgreSQL path).
|
||||
* <p>
|
||||
* Accepts both single {@link RouteExecution} and arrays. Data is written
|
||||
* synchronously to PostgreSQL via {@link IngestionService}.
|
||||
* <p>
|
||||
* Only active when ClickHouse is disabled — when ClickHouse is enabled,
|
||||
* {@link ChunkIngestionController} takes over the {@code /executions} mapping.
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/data")
|
||||
@ConditionalOnMissingBean(ChunkAccumulator.class)
|
||||
@Tag(name = "Ingestion", description = "Data ingestion endpoints")
|
||||
public class ExecutionController {
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package com.cameleer3.server.app.controller;
|
||||
|
||||
import com.cameleer3.common.model.LogBatch;
|
||||
import com.cameleer3.server.app.search.OpenSearchLogIndex;
|
||||
import com.cameleer3.server.core.storage.LogIndex;
|
||||
import com.cameleer3.server.core.agent.AgentInfo;
|
||||
import com.cameleer3.server.core.agent.AgentRegistryService;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
@@ -24,10 +24,10 @@ public class LogIngestionController {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(LogIngestionController.class);
|
||||
|
||||
private final OpenSearchLogIndex logIndex;
|
||||
private final LogIndex logIndex;
|
||||
private final AgentRegistryService registryService;
|
||||
|
||||
public LogIngestionController(OpenSearchLogIndex logIndex,
|
||||
public LogIngestionController(LogIndex logIndex,
|
||||
AgentRegistryService registryService) {
|
||||
this.logIndex = logIndex;
|
||||
this.registryService = registryService;
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
package com.cameleer3.server.app.controller;
|
||||
|
||||
import com.cameleer3.server.app.dto.LogEntryResponse;
|
||||
import com.cameleer3.server.app.search.OpenSearchLogIndex;
|
||||
import com.cameleer3.server.core.storage.LogEntryResult;
|
||||
import com.cameleer3.server.core.storage.LogIndex;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
@@ -18,9 +19,9 @@ import java.util.List;
|
||||
@Tag(name = "Application Logs", description = "Query application logs stored in OpenSearch")
|
||||
public class LogQueryController {
|
||||
|
||||
private final OpenSearchLogIndex logIndex;
|
||||
private final LogIndex logIndex;
|
||||
|
||||
public LogQueryController(OpenSearchLogIndex logIndex) {
|
||||
public LogQueryController(LogIndex logIndex) {
|
||||
this.logIndex = logIndex;
|
||||
}
|
||||
|
||||
@@ -42,9 +43,14 @@ public class LogQueryController {
|
||||
Instant fromInstant = from != null ? Instant.parse(from) : null;
|
||||
Instant toInstant = to != null ? Instant.parse(to) : null;
|
||||
|
||||
List<LogEntryResponse> entries = logIndex.search(
|
||||
List<LogEntryResult> results = logIndex.search(
|
||||
application, agentId, level, query, exchangeId, fromInstant, toInstant, limit);
|
||||
|
||||
List<LogEntryResponse> entries = results.stream()
|
||||
.map(r -> new LogEntryResponse(r.timestamp(), r.level(), r.loggerName(),
|
||||
r.message(), r.threadName(), r.stackTrace()))
|
||||
.toList();
|
||||
|
||||
return ResponseEntity.ok(entries);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,38 +78,37 @@ public class RouteCatalogController {
|
||||
Instant rangeTo = to != null ? Instant.parse(to) : now;
|
||||
Instant from1m = now.minus(1, ChronoUnit.MINUTES);
|
||||
|
||||
// Route exchange counts from continuous aggregate
|
||||
// Route exchange counts from AggregatingMergeTree (literal SQL — ClickHouse JDBC driver
|
||||
// wraps prepared statements in sub-queries that strip AggregateFunction column types)
|
||||
Map<String, Long> routeExchangeCounts = new LinkedHashMap<>();
|
||||
Map<String, Instant> routeLastSeen = new LinkedHashMap<>();
|
||||
try {
|
||||
jdbc.query(
|
||||
"SELECT application_name, route_id, SUM(total_count) AS cnt, MAX(bucket) AS last_seen " +
|
||||
"FROM stats_1m_route WHERE bucket >= ? AND bucket < ? " +
|
||||
"GROUP BY application_name, route_id",
|
||||
"SELECT application_name, route_id, countMerge(total_count) AS cnt, MAX(bucket) AS last_seen " +
|
||||
"FROM stats_1m_route WHERE bucket >= " + lit(rangeFrom) + " AND bucket < " + lit(rangeTo) +
|
||||
" GROUP BY application_name, route_id",
|
||||
rs -> {
|
||||
String key = rs.getString("application_name") + "/" + rs.getString("route_id");
|
||||
routeExchangeCounts.put(key, rs.getLong("cnt"));
|
||||
Timestamp ts = rs.getTimestamp("last_seen");
|
||||
if (ts != null) routeLastSeen.put(key, ts.toInstant());
|
||||
},
|
||||
Timestamp.from(rangeFrom), Timestamp.from(rangeTo));
|
||||
});
|
||||
} catch (Exception e) {
|
||||
// Continuous aggregate may not exist yet
|
||||
// AggregatingMergeTree table may not exist yet
|
||||
}
|
||||
|
||||
// Per-agent TPS from the last minute
|
||||
Map<String, Double> agentTps = new LinkedHashMap<>();
|
||||
try {
|
||||
jdbc.query(
|
||||
"SELECT application_name, SUM(total_count) AS cnt " +
|
||||
"FROM stats_1m_route WHERE bucket >= ? AND bucket < ? " +
|
||||
"GROUP BY application_name",
|
||||
"SELECT application_name, countMerge(total_count) AS cnt " +
|
||||
"FROM stats_1m_route WHERE bucket >= " + lit(from1m) + " AND bucket < " + lit(now) +
|
||||
" GROUP BY application_name",
|
||||
rs -> {
|
||||
// This gives per-app TPS; we'll distribute among agents below
|
||||
},
|
||||
Timestamp.from(from1m), Timestamp.from(now));
|
||||
});
|
||||
} catch (Exception e) {
|
||||
// Continuous aggregate may not exist yet
|
||||
// AggregatingMergeTree table may not exist yet
|
||||
}
|
||||
|
||||
// Build catalog entries
|
||||
@@ -158,6 +157,14 @@ public class RouteCatalogController {
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
/** Format an Instant as a ClickHouse DateTime literal. */
|
||||
private static String lit(Instant instant) {
|
||||
Instant truncated = instant.truncatedTo(ChronoUnit.SECONDS);
|
||||
String ts = new Timestamp(truncated.toEpochMilli()).toString();
|
||||
if (ts.endsWith(".0")) ts = ts.substring(0, ts.length() - 2);
|
||||
return "'" + ts + "'";
|
||||
}
|
||||
|
||||
private String computeWorstHealth(List<AgentInfo> agents) {
|
||||
boolean hasDead = false;
|
||||
boolean hasStale = false;
|
||||
|
||||
@@ -52,20 +52,18 @@ public class RouteMetricsController {
|
||||
Instant fromInstant = from != null ? Instant.parse(from) : toInstant.minus(24, ChronoUnit.HOURS);
|
||||
long windowSeconds = Duration.between(fromInstant, toInstant).toSeconds();
|
||||
|
||||
// Literal SQL — ClickHouse JDBC driver wraps prepared statements in sub-queries
|
||||
// that strip AggregateFunction column types, breaking -Merge combinators
|
||||
var sql = new StringBuilder(
|
||||
"SELECT application_name, route_id, " +
|
||||
"SUM(total_count) AS total, " +
|
||||
"SUM(failed_count) AS failed, " +
|
||||
"CASE WHEN SUM(total_count) > 0 THEN SUM(duration_sum) / SUM(total_count) ELSE 0 END AS avg_dur, " +
|
||||
"COALESCE(MAX(p99_duration), 0) AS p99_dur " +
|
||||
"FROM stats_1m_route WHERE bucket >= ? AND bucket < ?");
|
||||
var params = new ArrayList<Object>();
|
||||
params.add(Timestamp.from(fromInstant));
|
||||
params.add(Timestamp.from(toInstant));
|
||||
"countMerge(total_count) AS total, " +
|
||||
"countIfMerge(failed_count) AS failed, " +
|
||||
"CASE WHEN countMerge(total_count) > 0 THEN toFloat64(sumMerge(duration_sum)) / countMerge(total_count) ELSE 0 END AS avg_dur, " +
|
||||
"COALESCE(quantileMerge(0.99)(p99_duration), 0) AS p99_dur " +
|
||||
"FROM stats_1m_route WHERE bucket >= " + lit(fromInstant) + " AND bucket < " + lit(toInstant));
|
||||
|
||||
if (appId != null) {
|
||||
sql.append(" AND application_name = ?");
|
||||
params.add(appId);
|
||||
sql.append(" AND application_name = " + lit(appId));
|
||||
}
|
||||
sql.append(" GROUP BY application_name, route_id ORDER BY application_name, route_id");
|
||||
|
||||
@@ -88,7 +86,7 @@ public class RouteMetricsController {
|
||||
routeKeys.add(new RouteKey(applicationName, routeId));
|
||||
return new RouteMetrics(routeId, applicationName, total, successRate,
|
||||
avgDur, p99Dur, errorRate, tps, List.of(), -1.0);
|
||||
}, params.toArray());
|
||||
});
|
||||
|
||||
// Fetch sparklines (12 buckets over the time window)
|
||||
if (!metrics.isEmpty()) {
|
||||
@@ -98,15 +96,13 @@ public class RouteMetricsController {
|
||||
for (int i = 0; i < metrics.size(); i++) {
|
||||
RouteMetrics m = metrics.get(i);
|
||||
try {
|
||||
List<Double> sparkline = jdbc.query(
|
||||
"SELECT time_bucket(? * INTERVAL '1 second', bucket) AS period, " +
|
||||
"COALESCE(SUM(total_count), 0) AS cnt " +
|
||||
"FROM stats_1m_route WHERE bucket >= ? AND bucket < ? " +
|
||||
"AND application_name = ? AND route_id = ? " +
|
||||
"GROUP BY period ORDER BY period",
|
||||
(rs, rowNum) -> rs.getDouble("cnt"),
|
||||
bucketSeconds, Timestamp.from(fromInstant), Timestamp.from(toInstant),
|
||||
m.appId(), m.routeId());
|
||||
String sparkSql = "SELECT toStartOfInterval(bucket, toIntervalSecond(" + bucketSeconds + ")) AS period, " +
|
||||
"COALESCE(countMerge(total_count), 0) AS cnt " +
|
||||
"FROM stats_1m_route WHERE bucket >= " + lit(fromInstant) + " AND bucket < " + lit(toInstant) +
|
||||
" AND application_name = " + lit(m.appId()) + " AND route_id = " + lit(m.routeId()) +
|
||||
" GROUP BY period ORDER BY period";
|
||||
List<Double> sparkline = jdbc.query(sparkSql,
|
||||
(rs, rowNum) -> rs.getDouble("cnt"));
|
||||
metrics.set(i, new RouteMetrics(m.routeId(), m.appId(), m.exchangeCount(),
|
||||
m.successRate(), m.avgDurationMs(), m.p99DurationMs(),
|
||||
m.errorRate(), m.throughputPerSec(), sparkline, m.slaCompliance()));
|
||||
@@ -153,25 +149,22 @@ public class RouteMetricsController {
|
||||
Instant toInstant = to != null ? to : Instant.now();
|
||||
Instant fromInstant = from != null ? from : toInstant.minus(24, ChronoUnit.HOURS);
|
||||
|
||||
// Literal SQL for AggregatingMergeTree -Merge combinators
|
||||
var sql = new StringBuilder(
|
||||
"SELECT processor_id, processor_type, route_id, application_name, " +
|
||||
"SUM(total_count) AS total_count, " +
|
||||
"SUM(failed_count) AS failed_count, " +
|
||||
"CASE WHEN SUM(total_count) > 0 THEN SUM(duration_sum)::double precision / SUM(total_count) ELSE 0 END AS avg_duration_ms, " +
|
||||
"MAX(p99_duration) AS p99_duration_ms " +
|
||||
"countMerge(total_count) AS total_count, " +
|
||||
"countIfMerge(failed_count) AS failed_count, " +
|
||||
"CASE WHEN countMerge(total_count) > 0 THEN toFloat64(sumMerge(duration_sum)) / countMerge(total_count) ELSE 0 END AS avg_duration_ms, " +
|
||||
"quantileMerge(0.99)(p99_duration) AS p99_duration_ms " +
|
||||
"FROM stats_1m_processor_detail " +
|
||||
"WHERE bucket >= ? AND bucket < ? AND route_id = ?");
|
||||
var params = new ArrayList<Object>();
|
||||
params.add(Timestamp.from(fromInstant));
|
||||
params.add(Timestamp.from(toInstant));
|
||||
params.add(routeId);
|
||||
"WHERE bucket >= " + lit(fromInstant) + " AND bucket < " + lit(toInstant) +
|
||||
" AND route_id = " + lit(routeId));
|
||||
|
||||
if (appId != null) {
|
||||
sql.append(" AND application_name = ?");
|
||||
params.add(appId);
|
||||
sql.append(" AND application_name = " + lit(appId));
|
||||
}
|
||||
sql.append(" GROUP BY processor_id, processor_type, route_id, application_name");
|
||||
sql.append(" ORDER BY SUM(total_count) DESC");
|
||||
sql.append(" ORDER BY countMerge(total_count) DESC");
|
||||
|
||||
List<ProcessorMetrics> metrics = jdbc.query(sql.toString(), (rs, rowNum) -> {
|
||||
long totalCount = rs.getLong("total_count");
|
||||
@@ -187,8 +180,21 @@ public class RouteMetricsController {
|
||||
rs.getDouble("avg_duration_ms"),
|
||||
rs.getDouble("p99_duration_ms"),
|
||||
errorRate);
|
||||
}, params.toArray());
|
||||
});
|
||||
|
||||
return ResponseEntity.ok(metrics);
|
||||
}
|
||||
|
||||
/** Format an Instant as a ClickHouse DateTime literal. */
|
||||
private static String lit(Instant instant) {
|
||||
Instant truncated = instant.truncatedTo(ChronoUnit.SECONDS);
|
||||
String ts = new Timestamp(truncated.toEpochMilli()).toString();
|
||||
if (ts.endsWith(".0")) ts = ts.substring(0, ts.length() - 2);
|
||||
return "'" + ts + "'";
|
||||
}
|
||||
|
||||
/** Format a string as a SQL literal with single-quote escaping. */
|
||||
private static String lit(String value) {
|
||||
return "'" + value.replace("'", "\\'") + "'";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
package com.cameleer3.server.app.ingestion;
|
||||
|
||||
import com.cameleer3.server.app.config.IngestionConfig;
|
||||
import com.cameleer3.server.app.storage.ClickHouseExecutionStore;
|
||||
import com.cameleer3.server.core.ingestion.ChunkAccumulator;
|
||||
import com.cameleer3.server.core.ingestion.MergedExecution;
|
||||
import com.cameleer3.server.core.ingestion.WriteBuffer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.context.SmartLifecycle;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Scheduled flush task for ClickHouse execution and processor write buffers.
|
||||
* <p>
|
||||
* Drains both buffers on a fixed interval and delegates batch inserts to
|
||||
* {@link ClickHouseExecutionStore}. Also periodically sweeps stale exchanges
|
||||
* from the {@link ChunkAccumulator}.
|
||||
* <p>
|
||||
* Not a {@code @Component} — instantiated as a {@code @Bean} in StorageBeanConfig.
|
||||
*/
|
||||
public class ExecutionFlushScheduler implements SmartLifecycle {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ExecutionFlushScheduler.class);
|
||||
|
||||
private final WriteBuffer<MergedExecution> executionBuffer;
|
||||
private final WriteBuffer<ChunkAccumulator.ProcessorBatch> processorBuffer;
|
||||
private final ClickHouseExecutionStore executionStore;
|
||||
private final ChunkAccumulator accumulator;
|
||||
private final int batchSize;
|
||||
private volatile boolean running = false;
|
||||
|
||||
public ExecutionFlushScheduler(WriteBuffer<MergedExecution> executionBuffer,
|
||||
WriteBuffer<ChunkAccumulator.ProcessorBatch> processorBuffer,
|
||||
ClickHouseExecutionStore executionStore,
|
||||
ChunkAccumulator accumulator,
|
||||
IngestionConfig config) {
|
||||
this.executionBuffer = executionBuffer;
|
||||
this.processorBuffer = processorBuffer;
|
||||
this.executionStore = executionStore;
|
||||
this.accumulator = accumulator;
|
||||
this.batchSize = config.getBatchSize();
|
||||
}
|
||||
|
||||
@Scheduled(fixedDelayString = "${ingestion.flush-interval-ms:1000}")
|
||||
public void flush() {
|
||||
try {
|
||||
List<MergedExecution> executions = executionBuffer.drain(batchSize);
|
||||
if (!executions.isEmpty()) {
|
||||
executionStore.insertExecutionBatch(executions);
|
||||
log.debug("Flushed {} executions to ClickHouse", executions.size());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to flush executions", e);
|
||||
}
|
||||
|
||||
try {
|
||||
List<ChunkAccumulator.ProcessorBatch> batches = processorBuffer.drain(batchSize);
|
||||
for (ChunkAccumulator.ProcessorBatch batch : batches) {
|
||||
executionStore.insertProcessorBatch(
|
||||
batch.tenantId(),
|
||||
batch.executionId(),
|
||||
batch.routeId(),
|
||||
batch.applicationName(),
|
||||
batch.execStartTime(),
|
||||
batch.processors());
|
||||
}
|
||||
if (!batches.isEmpty()) {
|
||||
log.debug("Flushed {} processor batches to ClickHouse", batches.size());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to flush processor batches", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Scheduled(fixedDelay = 60_000)
|
||||
public void sweepStale() {
|
||||
try {
|
||||
accumulator.sweepStale();
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to sweep stale exchanges", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start() {
|
||||
running = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() {
|
||||
// Drain remaining executions on shutdown
|
||||
while (executionBuffer.size() > 0) {
|
||||
List<MergedExecution> batch = executionBuffer.drain(batchSize);
|
||||
if (batch.isEmpty()) break;
|
||||
try {
|
||||
executionStore.insertExecutionBatch(batch);
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to flush executions during shutdown", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Drain remaining processor batches on shutdown
|
||||
while (processorBuffer.size() > 0) {
|
||||
List<ChunkAccumulator.ProcessorBatch> batches = processorBuffer.drain(batchSize);
|
||||
if (batches.isEmpty()) break;
|
||||
try {
|
||||
for (ChunkAccumulator.ProcessorBatch batch : batches) {
|
||||
executionStore.insertProcessorBatch(
|
||||
batch.tenantId(),
|
||||
batch.executionId(),
|
||||
batch.routeId(),
|
||||
batch.applicationName(),
|
||||
batch.execStartTime(),
|
||||
batch.processors());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to flush processor batches during shutdown", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
running = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isRunning() {
|
||||
return running;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPhase() {
|
||||
return Integer.MAX_VALUE - 1;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
package com.cameleer3.server.app.search;
|
||||
|
||||
import com.cameleer3.common.model.LogEntry;
|
||||
import com.cameleer3.server.core.storage.LogEntryResult;
|
||||
import com.cameleer3.server.core.storage.LogIndex;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* ClickHouse-backed implementation of {@link LogIndex}.
|
||||
* Stores application logs in the {@code logs} MergeTree table with
|
||||
* ngram bloom-filter indexes for efficient substring search.
|
||||
*/
|
||||
public class ClickHouseLogStore implements LogIndex {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ClickHouseLogStore.class);
|
||||
private static final String TENANT = "default";
|
||||
private static final DateTimeFormatter ISO_FMT = DateTimeFormatter.ISO_INSTANT;
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
public ClickHouseLogStore(JdbcTemplate jdbc) {
|
||||
this.jdbc = jdbc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void indexBatch(String agentId, String application, List<LogEntry> entries) {
|
||||
if (entries == null || entries.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
String sql = "INSERT INTO logs (tenant_id, timestamp, application, agent_id, level, " +
|
||||
"logger_name, message, thread_name, stack_trace, exchange_id, mdc) " +
|
||||
"VALUES ('default', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
|
||||
|
||||
jdbc.batchUpdate(sql, entries, entries.size(), (ps, entry) -> {
|
||||
Instant ts = entry.getTimestamp() != null ? entry.getTimestamp() : Instant.now();
|
||||
ps.setTimestamp(1, Timestamp.from(ts));
|
||||
ps.setString(2, application);
|
||||
ps.setString(3, agentId);
|
||||
ps.setString(4, entry.getLevel() != null ? entry.getLevel() : "");
|
||||
ps.setString(5, entry.getLoggerName() != null ? entry.getLoggerName() : "");
|
||||
ps.setString(6, entry.getMessage() != null ? entry.getMessage() : "");
|
||||
ps.setString(7, entry.getThreadName() != null ? entry.getThreadName() : "");
|
||||
ps.setString(8, entry.getStackTrace() != null ? entry.getStackTrace() : "");
|
||||
|
||||
// Extract camel.exchangeId from MDC into top-level column
|
||||
Map<String, String> mdc = entry.getMdc() != null ? entry.getMdc() : Collections.emptyMap();
|
||||
String exchangeId = mdc.getOrDefault("camel.exchangeId", "");
|
||||
ps.setString(9, exchangeId);
|
||||
|
||||
// ClickHouse JDBC handles java.util.Map natively for Map columns
|
||||
ps.setObject(10, mdc);
|
||||
});
|
||||
|
||||
log.debug("Indexed {} log entries for agent={}, app={}", entries.size(), agentId, application);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<LogEntryResult> search(String application, String agentId, String level,
|
||||
String query, String exchangeId,
|
||||
Instant from, Instant to, int limit) {
|
||||
StringBuilder sql = new StringBuilder(
|
||||
"SELECT timestamp, level, logger_name, message, thread_name, stack_trace " +
|
||||
"FROM logs WHERE tenant_id = 'default' AND application = ?");
|
||||
List<Object> params = new ArrayList<>();
|
||||
params.add(application);
|
||||
|
||||
if (agentId != null && !agentId.isEmpty()) {
|
||||
sql.append(" AND agent_id = ?");
|
||||
params.add(agentId);
|
||||
}
|
||||
|
||||
if (level != null && !level.isEmpty()) {
|
||||
sql.append(" AND level = ?");
|
||||
params.add(level.toUpperCase());
|
||||
}
|
||||
|
||||
if (exchangeId != null && !exchangeId.isEmpty()) {
|
||||
sql.append(" AND (exchange_id = ? OR (mapContains(mdc, 'camel.exchangeId') AND mdc['camel.exchangeId'] = ?))");
|
||||
params.add(exchangeId);
|
||||
params.add(exchangeId);
|
||||
}
|
||||
|
||||
if (query != null && !query.isEmpty()) {
|
||||
sql.append(" AND message LIKE ?");
|
||||
params.add("%" + query + "%");
|
||||
}
|
||||
|
||||
if (from != null) {
|
||||
sql.append(" AND timestamp >= ?");
|
||||
params.add(Timestamp.from(from));
|
||||
}
|
||||
|
||||
if (to != null) {
|
||||
sql.append(" AND timestamp <= ?");
|
||||
params.add(Timestamp.from(to));
|
||||
}
|
||||
|
||||
sql.append(" ORDER BY timestamp DESC LIMIT ?");
|
||||
params.add(limit);
|
||||
|
||||
return jdbc.query(sql.toString(), params.toArray(), (rs, rowNum) -> {
|
||||
Timestamp ts = rs.getTimestamp("timestamp");
|
||||
String timestampStr = ts != null
|
||||
? ts.toInstant().atOffset(ZoneOffset.UTC).format(DateTimeFormatter.ISO_INSTANT)
|
||||
: null;
|
||||
return new LogEntryResult(
|
||||
timestampStr,
|
||||
rs.getString("level"),
|
||||
rs.getString("logger_name"),
|
||||
rs.getString("message"),
|
||||
rs.getString("thread_name"),
|
||||
rs.getString("stack_trace")
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,304 @@
|
||||
package com.cameleer3.server.app.search;
|
||||
|
||||
import com.cameleer3.server.core.search.ExecutionSummary;
|
||||
import com.cameleer3.server.core.search.SearchRequest;
|
||||
import com.cameleer3.server.core.search.SearchResult;
|
||||
import com.cameleer3.server.core.storage.SearchIndex;
|
||||
import com.cameleer3.server.core.storage.model.ExecutionDocument;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* ClickHouse-backed implementation of {@link SearchIndex}.
|
||||
* <p>
|
||||
* Queries the {@code executions} and {@code processor_executions} tables directly
|
||||
* using SQL with ngram bloom-filter indexes for full-text search acceleration.
|
||||
* <p>
|
||||
* The {@link #index} and {@link #delete} methods are no-ops because data is
|
||||
* written by the accumulator/store pipeline, not the search index.
|
||||
*/
|
||||
public class ClickHouseSearchIndex implements SearchIndex {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ClickHouseSearchIndex.class);
|
||||
private static final ObjectMapper JSON = new ObjectMapper();
|
||||
private static final TypeReference<Map<String, String>> STR_MAP = new TypeReference<>() {};
|
||||
private static final int HIGHLIGHT_CONTEXT_CHARS = 120;
|
||||
|
||||
private static final Map<String, String> SORT_FIELD_MAP = Map.of(
|
||||
"startTime", "start_time",
|
||||
"durationMs", "duration_ms",
|
||||
"status", "status",
|
||||
"agentId", "agent_id",
|
||||
"routeId", "route_id",
|
||||
"correlationId", "correlation_id",
|
||||
"executionId", "execution_id",
|
||||
"applicationName", "application_name"
|
||||
);
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
public ClickHouseSearchIndex(JdbcTemplate jdbc) {
|
||||
this.jdbc = jdbc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void index(ExecutionDocument document) {
|
||||
// No-op: data is written by ClickHouseExecutionStore
|
||||
}
|
||||
|
||||
@Override
|
||||
public void delete(String executionId) {
|
||||
// No-op: ClickHouse ReplacingMergeTree handles versioning
|
||||
}
|
||||
|
||||
@Override
|
||||
public SearchResult<ExecutionSummary> search(SearchRequest request) {
|
||||
try {
|
||||
List<Object> params = new ArrayList<>();
|
||||
String whereClause = buildWhereClause(request, params);
|
||||
String searchTerm = request.text();
|
||||
|
||||
// Count query
|
||||
String countSql = "SELECT count() FROM executions FINAL WHERE " + whereClause;
|
||||
Long total = jdbc.queryForObject(countSql, Long.class, params.toArray());
|
||||
if (total == null || total == 0) {
|
||||
return SearchResult.empty(request.offset(), request.limit());
|
||||
}
|
||||
|
||||
// Data query
|
||||
String sortColumn = SORT_FIELD_MAP.getOrDefault(request.sortField(), "start_time");
|
||||
String sortDir = "asc".equalsIgnoreCase(request.sortDir()) ? "ASC" : "DESC";
|
||||
|
||||
String dataSql = "SELECT execution_id, route_id, agent_id, application_name, "
|
||||
+ "status, start_time, end_time, duration_ms, correlation_id, "
|
||||
+ "error_message, error_stacktrace, diagram_content_hash, attributes, "
|
||||
+ "has_trace_data, is_replay, "
|
||||
+ "input_body, output_body, input_headers, output_headers, root_cause_message "
|
||||
+ "FROM executions FINAL WHERE " + whereClause
|
||||
+ " ORDER BY " + sortColumn + " " + sortDir
|
||||
+ " LIMIT ? OFFSET ?";
|
||||
|
||||
List<Object> dataParams = new ArrayList<>(params);
|
||||
dataParams.add(request.limit());
|
||||
dataParams.add(request.offset());
|
||||
|
||||
List<ExecutionSummary> data = jdbc.query(
|
||||
dataSql, dataParams.toArray(),
|
||||
(rs, rowNum) -> mapRow(rs, searchTerm));
|
||||
|
||||
return new SearchResult<>(data, total, request.offset(), request.limit());
|
||||
} catch (Exception e) {
|
||||
log.error("ClickHouse search failed", e);
|
||||
return SearchResult.empty(request.offset(), request.limit());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long count(SearchRequest request) {
|
||||
try {
|
||||
List<Object> params = new ArrayList<>();
|
||||
String whereClause = buildWhereClause(request, params);
|
||||
String sql = "SELECT count() FROM executions FINAL WHERE " + whereClause;
|
||||
Long result = jdbc.queryForObject(sql, Long.class, params.toArray());
|
||||
return result != null ? result : 0L;
|
||||
} catch (Exception e) {
|
||||
log.error("ClickHouse count failed", e);
|
||||
return 0L;
|
||||
}
|
||||
}
|
||||
|
||||
private String buildWhereClause(SearchRequest request, List<Object> params) {
|
||||
List<String> conditions = new ArrayList<>();
|
||||
conditions.add("tenant_id = 'default'");
|
||||
|
||||
if (request.timeFrom() != null) {
|
||||
conditions.add("start_time >= ?");
|
||||
params.add(Timestamp.from(request.timeFrom()));
|
||||
}
|
||||
if (request.timeTo() != null) {
|
||||
conditions.add("start_time <= ?");
|
||||
params.add(Timestamp.from(request.timeTo()));
|
||||
}
|
||||
|
||||
if (request.status() != null && !request.status().isBlank()) {
|
||||
String[] statuses = request.status().split(",");
|
||||
if (statuses.length == 1) {
|
||||
conditions.add("status = ?");
|
||||
params.add(statuses[0].trim());
|
||||
} else {
|
||||
String placeholders = String.join(", ", Collections.nCopies(statuses.length, "?"));
|
||||
conditions.add("status IN (" + placeholders + ")");
|
||||
for (String s : statuses) {
|
||||
params.add(s.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (request.routeId() != null) {
|
||||
conditions.add("route_id = ?");
|
||||
params.add(request.routeId());
|
||||
}
|
||||
|
||||
if (request.agentId() != null) {
|
||||
conditions.add("agent_id = ?");
|
||||
params.add(request.agentId());
|
||||
}
|
||||
|
||||
if (request.correlationId() != null) {
|
||||
conditions.add("correlation_id = ?");
|
||||
params.add(request.correlationId());
|
||||
}
|
||||
|
||||
if (request.application() != null && !request.application().isBlank()) {
|
||||
conditions.add("application_name = ?");
|
||||
params.add(request.application());
|
||||
}
|
||||
|
||||
if (request.agentIds() != null && !request.agentIds().isEmpty()) {
|
||||
String placeholders = String.join(", ", Collections.nCopies(request.agentIds().size(), "?"));
|
||||
conditions.add("agent_id IN (" + placeholders + ")");
|
||||
params.addAll(request.agentIds());
|
||||
}
|
||||
|
||||
if (request.durationMin() != null) {
|
||||
conditions.add("duration_ms >= ?");
|
||||
params.add(request.durationMin());
|
||||
}
|
||||
|
||||
if (request.durationMax() != null) {
|
||||
conditions.add("duration_ms <= ?");
|
||||
params.add(request.durationMax());
|
||||
}
|
||||
|
||||
// Global full-text search: execution-level _search_text OR processor-level _search_text
|
||||
if (request.text() != null && !request.text().isBlank()) {
|
||||
String likeTerm = "%" + escapeLike(request.text()) + "%";
|
||||
conditions.add("(_search_text LIKE ? OR execution_id IN ("
|
||||
+ "SELECT DISTINCT execution_id FROM processor_executions "
|
||||
+ "WHERE tenant_id = 'default' AND _search_text LIKE ?))");
|
||||
params.add(likeTerm);
|
||||
params.add(likeTerm);
|
||||
}
|
||||
|
||||
// Scoped body search in processor_executions
|
||||
if (request.textInBody() != null && !request.textInBody().isBlank()) {
|
||||
String likeTerm = "%" + escapeLike(request.textInBody()) + "%";
|
||||
conditions.add("execution_id IN ("
|
||||
+ "SELECT DISTINCT execution_id FROM processor_executions "
|
||||
+ "WHERE tenant_id = 'default' AND (input_body LIKE ? OR output_body LIKE ?))");
|
||||
params.add(likeTerm);
|
||||
params.add(likeTerm);
|
||||
}
|
||||
|
||||
// Scoped headers search in processor_executions
|
||||
if (request.textInHeaders() != null && !request.textInHeaders().isBlank()) {
|
||||
String likeTerm = "%" + escapeLike(request.textInHeaders()) + "%";
|
||||
conditions.add("execution_id IN ("
|
||||
+ "SELECT DISTINCT execution_id FROM processor_executions "
|
||||
+ "WHERE tenant_id = 'default' AND (input_headers LIKE ? OR output_headers LIKE ?))");
|
||||
params.add(likeTerm);
|
||||
params.add(likeTerm);
|
||||
}
|
||||
|
||||
// Scoped error search: execution-level + processor-level
|
||||
if (request.textInErrors() != null && !request.textInErrors().isBlank()) {
|
||||
String likeTerm = "%" + escapeLike(request.textInErrors()) + "%";
|
||||
conditions.add("(error_message LIKE ? OR error_stacktrace LIKE ? OR execution_id IN ("
|
||||
+ "SELECT DISTINCT execution_id FROM processor_executions "
|
||||
+ "WHERE tenant_id = 'default' AND (error_message LIKE ? OR error_stacktrace LIKE ?)))");
|
||||
params.add(likeTerm);
|
||||
params.add(likeTerm);
|
||||
params.add(likeTerm);
|
||||
params.add(likeTerm);
|
||||
}
|
||||
|
||||
return String.join(" AND ", conditions);
|
||||
}
|
||||
|
||||
private ExecutionSummary mapRow(ResultSet rs, String searchTerm) throws SQLException {
|
||||
String executionId = rs.getString("execution_id");
|
||||
String routeId = rs.getString("route_id");
|
||||
String agentId = rs.getString("agent_id");
|
||||
String applicationName = rs.getString("application_name");
|
||||
String status = rs.getString("status");
|
||||
|
||||
Timestamp startTs = rs.getTimestamp("start_time");
|
||||
Instant startTime = startTs != null ? startTs.toInstant() : null;
|
||||
|
||||
Timestamp endTs = rs.getTimestamp("end_time");
|
||||
Instant endTime = endTs != null ? endTs.toInstant() : null;
|
||||
|
||||
long durationMs = rs.getLong("duration_ms");
|
||||
String correlationId = rs.getString("correlation_id");
|
||||
String errorMessage = rs.getString("error_message");
|
||||
String errorStacktrace = rs.getString("error_stacktrace");
|
||||
String diagramContentHash = rs.getString("diagram_content_hash");
|
||||
String attributesJson = rs.getString("attributes");
|
||||
boolean hasTraceData = rs.getBoolean("has_trace_data");
|
||||
boolean isReplay = rs.getBoolean("is_replay");
|
||||
String inputBody = rs.getString("input_body");
|
||||
String outputBody = rs.getString("output_body");
|
||||
String inputHeaders = rs.getString("input_headers");
|
||||
String outputHeaders = rs.getString("output_headers");
|
||||
String rootCauseMessage = rs.getString("root_cause_message");
|
||||
|
||||
Map<String, String> attributes = parseAttributesJson(attributesJson);
|
||||
|
||||
// Application-side highlighting
|
||||
String highlight = null;
|
||||
if (searchTerm != null && !searchTerm.isBlank()) {
|
||||
highlight = findHighlight(searchTerm, errorMessage, errorStacktrace,
|
||||
inputBody, outputBody, inputHeaders, outputHeaders, attributesJson, rootCauseMessage);
|
||||
}
|
||||
|
||||
return new ExecutionSummary(
|
||||
executionId, routeId, agentId, applicationName, status,
|
||||
startTime, endTime, durationMs,
|
||||
correlationId, errorMessage, diagramContentHash,
|
||||
highlight, attributes, hasTraceData, isReplay
|
||||
);
|
||||
}
|
||||
|
||||
private String findHighlight(String searchTerm, String... fields) {
|
||||
for (String field : fields) {
|
||||
String snippet = extractSnippet(field, searchTerm, HIGHLIGHT_CONTEXT_CHARS);
|
||||
if (snippet != null) {
|
||||
return snippet;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static String extractSnippet(String text, String searchTerm, int contextChars) {
|
||||
if (text == null || text.isEmpty() || searchTerm == null) return null;
|
||||
int idx = text.toLowerCase().indexOf(searchTerm.toLowerCase());
|
||||
if (idx < 0) return null;
|
||||
int start = Math.max(0, idx - contextChars / 2);
|
||||
int end = Math.min(text.length(), idx + searchTerm.length() + contextChars / 2);
|
||||
return (start > 0 ? "..." : "") + text.substring(start, end) + (end < text.length() ? "..." : "");
|
||||
}
|
||||
|
||||
private static String escapeLike(String term) {
|
||||
return term.replace("\\", "\\\\")
|
||||
.replace("%", "\\%")
|
||||
.replace("_", "\\_");
|
||||
}
|
||||
|
||||
private static Map<String, String> parseAttributesJson(String json) {
|
||||
if (json == null || json.isBlank()) return null;
|
||||
try {
|
||||
return JSON.readValue(json, STR_MAP);
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -20,6 +20,7 @@ import org.opensearch.client.opensearch.indices.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.io.IOException;
|
||||
@@ -30,6 +31,7 @@ import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Repository
|
||||
@ConditionalOnProperty(name = "cameleer.storage.search", havingValue = "opensearch", matchIfMissing = true)
|
||||
public class OpenSearchIndex implements SearchIndex {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(OpenSearchIndex.class);
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
package com.cameleer3.server.app.search;
|
||||
|
||||
import com.cameleer3.common.model.LogEntry;
|
||||
import com.cameleer3.server.app.dto.LogEntryResponse;
|
||||
import com.cameleer3.server.core.storage.LogEntryResult;
|
||||
import com.cameleer3.server.core.storage.LogIndex;
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import org.opensearch.client.json.JsonData;
|
||||
import org.opensearch.client.opensearch.OpenSearchClient;
|
||||
@@ -18,6 +19,7 @@ import org.opensearch.client.opensearch.indices.PutIndexTemplateRequest;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.io.IOException;
|
||||
@@ -30,7 +32,8 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Repository
|
||||
public class OpenSearchLogIndex {
|
||||
@ConditionalOnProperty(name = "cameleer.storage.logs", havingValue = "opensearch")
|
||||
public class OpenSearchLogIndex implements LogIndex {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(OpenSearchLogIndex.class);
|
||||
private static final DateTimeFormatter DAY_FMT = DateTimeFormatter.ofPattern("yyyy-MM-dd")
|
||||
@@ -100,9 +103,10 @@ public class OpenSearchLogIndex {
|
||||
}
|
||||
}
|
||||
|
||||
public List<LogEntryResponse> search(String application, String agentId, String level,
|
||||
String query, String exchangeId,
|
||||
Instant from, Instant to, int limit) {
|
||||
@Override
|
||||
public List<LogEntryResult> search(String application, String agentId, String level,
|
||||
String query, String exchangeId,
|
||||
Instant from, Instant to, int limit) {
|
||||
try {
|
||||
BoolQuery.Builder bool = new BoolQuery.Builder();
|
||||
bool.must(Query.of(q -> q.term(t -> t.field("application").value(FieldValue.of(application)))));
|
||||
@@ -137,12 +141,12 @@ public class OpenSearchLogIndex {
|
||||
.sort(so -> so.field(f -> f.field("@timestamp").order(SortOrder.Desc)))
|
||||
.size(limit), Map.class);
|
||||
|
||||
List<LogEntryResponse> results = new ArrayList<>();
|
||||
List<LogEntryResult> results = new ArrayList<>();
|
||||
for (var hit : response.hits().hits()) {
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Object> src = (Map<String, Object>) hit.source();
|
||||
if (src == null) continue;
|
||||
results.add(new LogEntryResponse(
|
||||
results.add(new LogEntryResult(
|
||||
str(src, "@timestamp"),
|
||||
str(src, "level"),
|
||||
str(src, "loggerName"),
|
||||
@@ -162,6 +166,7 @@ public class OpenSearchLogIndex {
|
||||
return v != null ? v.toString() : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void indexBatch(String agentId, String application, List<LogEntry> entries) {
|
||||
if (entries == null || entries.isEmpty()) {
|
||||
return;
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.agent.AgentEventRecord;
|
||||
import com.cameleer3.server.core.agent.AgentEventRepository;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* ClickHouse implementation of {@link AgentEventRepository}.
|
||||
* <p>
|
||||
* The ClickHouse table has no {@code id} column (no BIGSERIAL equivalent),
|
||||
* so all returned {@link AgentEventRecord} instances have {@code id = 0}.
|
||||
*/
|
||||
public class ClickHouseAgentEventRepository implements AgentEventRepository {
|
||||
|
||||
private static final String TENANT = "default";
|
||||
|
||||
private static final String INSERT_SQL =
|
||||
"INSERT INTO agent_events (tenant_id, agent_id, app_id, event_type, detail) VALUES (?, ?, ?, ?, ?)";
|
||||
|
||||
private static final String SELECT_BASE =
|
||||
"SELECT 0 AS id, agent_id, app_id, event_type, detail, timestamp FROM agent_events WHERE tenant_id = ?";
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
public ClickHouseAgentEventRepository(JdbcTemplate jdbc) {
|
||||
this.jdbc = jdbc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void insert(String agentId, String appId, String eventType, String detail) {
|
||||
jdbc.update(INSERT_SQL, TENANT, agentId, appId, eventType, detail);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AgentEventRecord> query(String appId, String agentId, Instant from, Instant to, int limit) {
|
||||
var sql = new StringBuilder(SELECT_BASE);
|
||||
var params = new ArrayList<Object>();
|
||||
params.add(TENANT);
|
||||
|
||||
if (appId != null) {
|
||||
sql.append(" AND app_id = ?");
|
||||
params.add(appId);
|
||||
}
|
||||
if (agentId != null) {
|
||||
sql.append(" AND agent_id = ?");
|
||||
params.add(agentId);
|
||||
}
|
||||
if (from != null) {
|
||||
sql.append(" AND timestamp >= ?");
|
||||
params.add(Timestamp.from(from));
|
||||
}
|
||||
if (to != null) {
|
||||
sql.append(" AND timestamp < ?");
|
||||
params.add(Timestamp.from(to));
|
||||
}
|
||||
sql.append(" ORDER BY timestamp DESC LIMIT ?");
|
||||
params.add(limit);
|
||||
|
||||
return jdbc.query(sql.toString(), (rs, rowNum) -> new AgentEventRecord(
|
||||
rs.getLong("id"),
|
||||
rs.getString("agent_id"),
|
||||
rs.getString("app_id"),
|
||||
rs.getString("event_type"),
|
||||
rs.getString("detail"),
|
||||
rs.getTimestamp("timestamp").toInstant()
|
||||
), params.toArray());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.common.graph.RouteGraph;
|
||||
import com.cameleer3.common.graph.RouteNode;
|
||||
import com.cameleer3.server.core.ingestion.TaggedDiagram;
|
||||
import com.cameleer3.server.core.storage.DiagramStore;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HexFormat;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* ClickHouse implementation of {@link DiagramStore}.
|
||||
* <p>
|
||||
* Stores route graphs as JSON with SHA-256 content-hash deduplication.
|
||||
* Uses ReplacingMergeTree — duplicate inserts are deduplicated on merge.
|
||||
* <p>
|
||||
* {@code findProcessorRouteMapping} fetches all definitions for the application
|
||||
* and deserializes them in Java because ClickHouse has no equivalent of
|
||||
* PostgreSQL's {@code jsonb_array_elements()}.
|
||||
*/
|
||||
public class ClickHouseDiagramStore implements DiagramStore {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ClickHouseDiagramStore.class);
|
||||
|
||||
private static final String TENANT = "default";
|
||||
|
||||
private static final String INSERT_SQL = """
|
||||
INSERT INTO route_diagrams
|
||||
(tenant_id, content_hash, route_id, agent_id, application_name, definition, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""";
|
||||
|
||||
private static final String SELECT_BY_HASH = """
|
||||
SELECT definition FROM route_diagrams
|
||||
WHERE tenant_id = ? AND content_hash = ?
|
||||
LIMIT 1
|
||||
""";
|
||||
|
||||
private static final String SELECT_HASH_FOR_ROUTE = """
|
||||
SELECT content_hash FROM route_diagrams
|
||||
WHERE tenant_id = ? AND route_id = ? AND agent_id = ?
|
||||
ORDER BY created_at DESC LIMIT 1
|
||||
""";
|
||||
|
||||
private static final String SELECT_DEFINITIONS_FOR_APP = """
|
||||
SELECT DISTINCT route_id, definition FROM route_diagrams
|
||||
WHERE tenant_id = ? AND application_name = ?
|
||||
""";
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
public ClickHouseDiagramStore(JdbcTemplate jdbc) {
|
||||
this.jdbc = jdbc;
|
||||
this.objectMapper = new ObjectMapper();
|
||||
this.objectMapper.registerModule(new JavaTimeModule());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void store(TaggedDiagram diagram) {
|
||||
try {
|
||||
RouteGraph graph = diagram.graph();
|
||||
String agentId = diagram.agentId() != null ? diagram.agentId() : "";
|
||||
String applicationName = diagram.applicationName() != null ? diagram.applicationName() : "";
|
||||
String json = objectMapper.writeValueAsString(graph);
|
||||
String contentHash = sha256Hex(json);
|
||||
String routeId = graph.getRouteId() != null ? graph.getRouteId() : "";
|
||||
|
||||
jdbc.update(INSERT_SQL,
|
||||
TENANT,
|
||||
contentHash,
|
||||
routeId,
|
||||
agentId,
|
||||
applicationName,
|
||||
json,
|
||||
Timestamp.from(Instant.now()));
|
||||
log.debug("Stored diagram for route={} agent={} with hash={}", routeId, agentId, contentHash);
|
||||
} catch (JsonProcessingException e) {
|
||||
throw new RuntimeException("Failed to serialize RouteGraph to JSON", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<RouteGraph> findByContentHash(String contentHash) {
|
||||
List<Map<String, Object>> rows = jdbc.queryForList(SELECT_BY_HASH, TENANT, contentHash);
|
||||
if (rows.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
String json = (String) rows.get(0).get("definition");
|
||||
try {
|
||||
return Optional.of(objectMapper.readValue(json, RouteGraph.class));
|
||||
} catch (JsonProcessingException e) {
|
||||
log.error("Failed to deserialize RouteGraph from ClickHouse", e);
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<String> findContentHashForRoute(String routeId, String agentId) {
|
||||
List<Map<String, Object>> rows = jdbc.queryForList(
|
||||
SELECT_HASH_FOR_ROUTE, TENANT, routeId, agentId);
|
||||
if (rows.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of((String) rows.get(0).get("content_hash"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<String> findContentHashForRouteByAgents(String routeId, List<String> agentIds) {
|
||||
if (agentIds == null || agentIds.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
String placeholders = String.join(", ", Collections.nCopies(agentIds.size(), "?"));
|
||||
String sql = "SELECT content_hash FROM route_diagrams " +
|
||||
"WHERE tenant_id = ? AND route_id = ? AND agent_id IN (" + placeholders + ") " +
|
||||
"ORDER BY created_at DESC LIMIT 1";
|
||||
var params = new ArrayList<Object>();
|
||||
params.add(TENANT);
|
||||
params.add(routeId);
|
||||
params.addAll(agentIds);
|
||||
List<Map<String, Object>> rows = jdbc.queryForList(sql, params.toArray());
|
||||
if (rows.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of((String) rows.get(0).get("content_hash"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, String> findProcessorRouteMapping(String applicationName) {
|
||||
Map<String, String> mapping = new HashMap<>();
|
||||
List<Map<String, Object>> rows = jdbc.queryForList(
|
||||
SELECT_DEFINITIONS_FOR_APP, TENANT, applicationName);
|
||||
for (Map<String, Object> row : rows) {
|
||||
String routeId = (String) row.get("route_id");
|
||||
String json = (String) row.get("definition");
|
||||
if (json == null || routeId == null) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
RouteGraph graph = objectMapper.readValue(json, RouteGraph.class);
|
||||
collectNodeIds(graph.getRoot(), routeId, mapping);
|
||||
} catch (JsonProcessingException e) {
|
||||
log.warn("Failed to deserialize RouteGraph for route={} app={}", routeId, applicationName, e);
|
||||
}
|
||||
}
|
||||
return mapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively walks the RouteNode tree and maps each node ID to the given routeId.
|
||||
*/
|
||||
private void collectNodeIds(RouteNode node, String routeId, Map<String, String> mapping) {
|
||||
if (node == null) {
|
||||
return;
|
||||
}
|
||||
String id = node.getId();
|
||||
if (id != null && !id.isEmpty()) {
|
||||
mapping.put(id, routeId);
|
||||
}
|
||||
List<RouteNode> children = node.getChildren();
|
||||
if (children != null) {
|
||||
for (RouteNode child : children) {
|
||||
collectNodeIds(child, routeId, mapping);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static String sha256Hex(String input) {
|
||||
try {
|
||||
MessageDigest digest = MessageDigest.getInstance("SHA-256");
|
||||
byte[] hash = digest.digest(input.getBytes(StandardCharsets.UTF_8));
|
||||
return HexFormat.of().formatHex(hash);
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new RuntimeException("SHA-256 not available", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,339 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.ingestion.MergedExecution;
|
||||
import com.cameleer3.server.core.storage.ExecutionStore;
|
||||
import com.cameleer3.common.model.FlatProcessorRecord;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
public class ClickHouseExecutionStore implements ExecutionStore {
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
public ClickHouseExecutionStore(JdbcTemplate jdbc) {
|
||||
this(jdbc, new ObjectMapper());
|
||||
}
|
||||
|
||||
public ClickHouseExecutionStore(JdbcTemplate jdbc, ObjectMapper objectMapper) {
|
||||
this.jdbc = jdbc;
|
||||
this.objectMapper = objectMapper;
|
||||
}
|
||||
|
||||
public void insertExecutionBatch(List<MergedExecution> executions) {
|
||||
if (executions.isEmpty()) return;
|
||||
|
||||
jdbc.batchUpdate("""
|
||||
INSERT INTO executions (
|
||||
tenant_id, _version, execution_id, route_id, agent_id, application_name,
|
||||
status, correlation_id, exchange_id, start_time, end_time, duration_ms,
|
||||
error_message, error_stacktrace, error_type, error_category,
|
||||
root_cause_type, root_cause_message, diagram_content_hash, engine_level,
|
||||
input_body, output_body, input_headers, output_headers, attributes,
|
||||
trace_id, span_id, has_trace_data, is_replay,
|
||||
original_exchange_id, replay_exchange_id
|
||||
)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
executions.stream().map(e -> new Object[]{
|
||||
nullToEmpty(e.tenantId()),
|
||||
e.version(),
|
||||
nullToEmpty(e.executionId()),
|
||||
nullToEmpty(e.routeId()),
|
||||
nullToEmpty(e.agentId()),
|
||||
nullToEmpty(e.applicationName()),
|
||||
nullToEmpty(e.status()),
|
||||
nullToEmpty(e.correlationId()),
|
||||
nullToEmpty(e.exchangeId()),
|
||||
Timestamp.from(e.startTime()),
|
||||
e.endTime() != null ? Timestamp.from(e.endTime()) : null,
|
||||
e.durationMs(),
|
||||
nullToEmpty(e.errorMessage()),
|
||||
nullToEmpty(e.errorStacktrace()),
|
||||
nullToEmpty(e.errorType()),
|
||||
nullToEmpty(e.errorCategory()),
|
||||
nullToEmpty(e.rootCauseType()),
|
||||
nullToEmpty(e.rootCauseMessage()),
|
||||
nullToEmpty(e.diagramContentHash()),
|
||||
nullToEmpty(e.engineLevel()),
|
||||
nullToEmpty(e.inputBody()),
|
||||
nullToEmpty(e.outputBody()),
|
||||
nullToEmpty(e.inputHeaders()),
|
||||
nullToEmpty(e.outputHeaders()),
|
||||
nullToEmpty(e.attributes()),
|
||||
nullToEmpty(e.traceId()),
|
||||
nullToEmpty(e.spanId()),
|
||||
e.hasTraceData(),
|
||||
e.isReplay(),
|
||||
nullToEmpty(e.originalExchangeId()),
|
||||
nullToEmpty(e.replayExchangeId())
|
||||
}).toList());
|
||||
}
|
||||
|
||||
public void insertProcessorBatch(String tenantId, String executionId, String routeId,
|
||||
String applicationName, Instant execStartTime,
|
||||
List<FlatProcessorRecord> processors) {
|
||||
if (processors.isEmpty()) return;
|
||||
|
||||
jdbc.batchUpdate("""
|
||||
INSERT INTO processor_executions (
|
||||
tenant_id, execution_id, seq, parent_seq, parent_processor_id,
|
||||
processor_id, processor_type, start_time, route_id, application_name,
|
||||
iteration, iteration_size, status, end_time, duration_ms,
|
||||
error_message, error_stacktrace, error_type, error_category,
|
||||
root_cause_type, root_cause_message,
|
||||
input_body, output_body, input_headers, output_headers, attributes,
|
||||
resolved_endpoint_uri, circuit_breaker_state,
|
||||
fallback_triggered, filter_matched, duplicate_message
|
||||
)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
processors.stream().map(p -> new Object[]{
|
||||
nullToEmpty(tenantId),
|
||||
nullToEmpty(executionId),
|
||||
p.getSeq(),
|
||||
p.getParentSeq(),
|
||||
nullToEmpty(p.getParentProcessorId()),
|
||||
nullToEmpty(p.getProcessorId()),
|
||||
nullToEmpty(p.getProcessorType()),
|
||||
Timestamp.from(p.getStartTime() != null ? p.getStartTime() : execStartTime),
|
||||
nullToEmpty(routeId),
|
||||
nullToEmpty(applicationName),
|
||||
p.getIteration(),
|
||||
p.getIterationSize(),
|
||||
p.getStatus() != null ? p.getStatus().name() : "",
|
||||
computeEndTime(p.getStartTime(), p.getDurationMs()),
|
||||
p.getDurationMs(),
|
||||
nullToEmpty(p.getErrorMessage()),
|
||||
nullToEmpty(p.getErrorStackTrace()),
|
||||
nullToEmpty(p.getErrorType()),
|
||||
nullToEmpty(p.getErrorCategory()),
|
||||
nullToEmpty(p.getRootCauseType()),
|
||||
nullToEmpty(p.getRootCauseMessage()),
|
||||
nullToEmpty(p.getInputBody()),
|
||||
nullToEmpty(p.getOutputBody()),
|
||||
mapToJson(p.getInputHeaders()),
|
||||
mapToJson(p.getOutputHeaders()),
|
||||
mapToJson(p.getAttributes()),
|
||||
nullToEmpty(p.getResolvedEndpointUri()),
|
||||
nullToEmpty(p.getCircuitBreakerState()),
|
||||
boolOrFalse(p.getFallbackTriggered()),
|
||||
boolOrFalse(p.getFilterMatched()),
|
||||
boolOrFalse(p.getDuplicateMessage())
|
||||
}).toList());
|
||||
}
|
||||
|
||||
// --- ExecutionStore interface: read methods ---
|
||||
|
||||
@Override
|
||||
public Optional<ExecutionRecord> findById(String executionId) {
|
||||
List<ExecutionRecord> results = jdbc.query("""
|
||||
SELECT execution_id, route_id, agent_id, application_name, status,
|
||||
correlation_id, exchange_id, start_time, end_time, duration_ms,
|
||||
error_message, error_stacktrace, diagram_content_hash, engine_level,
|
||||
input_body, output_body, input_headers, output_headers, attributes,
|
||||
error_type, error_category, root_cause_type, root_cause_message,
|
||||
trace_id, span_id, has_trace_data, is_replay
|
||||
FROM executions FINAL
|
||||
WHERE tenant_id = 'default' AND execution_id = ?
|
||||
LIMIT 1
|
||||
""",
|
||||
(rs, rowNum) -> mapExecutionRecord(rs),
|
||||
executionId);
|
||||
return results.isEmpty() ? Optional.empty() : Optional.of(results.get(0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ProcessorRecord> findProcessors(String executionId) {
|
||||
return jdbc.query("""
|
||||
SELECT execution_id, seq, parent_seq, parent_processor_id,
|
||||
processor_id, processor_type, start_time, route_id, application_name,
|
||||
iteration, iteration_size, status, end_time, duration_ms,
|
||||
error_message, error_stacktrace, error_type, error_category,
|
||||
root_cause_type, root_cause_message,
|
||||
input_body, output_body, input_headers, output_headers, attributes,
|
||||
resolved_endpoint_uri, circuit_breaker_state,
|
||||
fallback_triggered, filter_matched, duplicate_message
|
||||
FROM processor_executions
|
||||
WHERE tenant_id = 'default' AND execution_id = ?
|
||||
ORDER BY seq
|
||||
""",
|
||||
(rs, rowNum) -> mapProcessorRecord(rs),
|
||||
executionId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<ProcessorRecord> findProcessorById(String executionId, String processorId) {
|
||||
List<ProcessorRecord> results = jdbc.query("""
|
||||
SELECT execution_id, seq, parent_seq, parent_processor_id,
|
||||
processor_id, processor_type, start_time, route_id, application_name,
|
||||
iteration, iteration_size, status, end_time, duration_ms,
|
||||
error_message, error_stacktrace, error_type, error_category,
|
||||
root_cause_type, root_cause_message,
|
||||
input_body, output_body, input_headers, output_headers, attributes,
|
||||
resolved_endpoint_uri, circuit_breaker_state,
|
||||
fallback_triggered, filter_matched, duplicate_message
|
||||
FROM processor_executions
|
||||
WHERE tenant_id = 'default' AND execution_id = ? AND processor_id = ?
|
||||
LIMIT 1
|
||||
""",
|
||||
(rs, rowNum) -> mapProcessorRecord(rs),
|
||||
executionId, processorId);
|
||||
return results.isEmpty() ? Optional.empty() : Optional.of(results.get(0));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<ProcessorRecord> findProcessorBySeq(String executionId, int seq) {
|
||||
List<ProcessorRecord> results = jdbc.query("""
|
||||
SELECT execution_id, seq, parent_seq, parent_processor_id,
|
||||
processor_id, processor_type, start_time, route_id, application_name,
|
||||
iteration, iteration_size, status, end_time, duration_ms,
|
||||
error_message, error_stacktrace, error_type, error_category,
|
||||
root_cause_type, root_cause_message,
|
||||
input_body, output_body, input_headers, output_headers, attributes,
|
||||
resolved_endpoint_uri, circuit_breaker_state,
|
||||
fallback_triggered, filter_matched, duplicate_message
|
||||
FROM processor_executions
|
||||
WHERE tenant_id = 'default' AND execution_id = ? AND seq = ?
|
||||
LIMIT 1
|
||||
""",
|
||||
(rs, rowNum) -> mapProcessorRecord(rs),
|
||||
executionId, seq);
|
||||
return results.isEmpty() ? Optional.empty() : Optional.of(results.get(0));
|
||||
}
|
||||
|
||||
// --- ExecutionStore interface: write methods (unsupported, use chunked pipeline) ---
|
||||
|
||||
@Override
|
||||
public void upsert(ExecutionRecord execution) {
|
||||
throw new UnsupportedOperationException("ClickHouse writes use the chunked pipeline");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void upsertProcessors(String executionId, Instant startTime,
|
||||
String applicationName, String routeId,
|
||||
List<ProcessorRecord> processors) {
|
||||
throw new UnsupportedOperationException("ClickHouse writes use the chunked pipeline");
|
||||
}
|
||||
|
||||
// --- Row mappers ---
|
||||
|
||||
private static ExecutionRecord mapExecutionRecord(ResultSet rs) throws SQLException {
|
||||
return new ExecutionRecord(
|
||||
emptyToNull(rs.getString("execution_id")),
|
||||
emptyToNull(rs.getString("route_id")),
|
||||
emptyToNull(rs.getString("agent_id")),
|
||||
emptyToNull(rs.getString("application_name")),
|
||||
emptyToNull(rs.getString("status")),
|
||||
emptyToNull(rs.getString("correlation_id")),
|
||||
emptyToNull(rs.getString("exchange_id")),
|
||||
toInstant(rs, "start_time"),
|
||||
toInstant(rs, "end_time"),
|
||||
rs.getObject("duration_ms") != null ? rs.getLong("duration_ms") : null,
|
||||
emptyToNull(rs.getString("error_message")),
|
||||
emptyToNull(rs.getString("error_stacktrace")),
|
||||
emptyToNull(rs.getString("diagram_content_hash")),
|
||||
emptyToNull(rs.getString("engine_level")),
|
||||
emptyToNull(rs.getString("input_body")),
|
||||
emptyToNull(rs.getString("output_body")),
|
||||
emptyToNull(rs.getString("input_headers")),
|
||||
emptyToNull(rs.getString("output_headers")),
|
||||
emptyToNull(rs.getString("attributes")),
|
||||
emptyToNull(rs.getString("error_type")),
|
||||
emptyToNull(rs.getString("error_category")),
|
||||
emptyToNull(rs.getString("root_cause_type")),
|
||||
emptyToNull(rs.getString("root_cause_message")),
|
||||
emptyToNull(rs.getString("trace_id")),
|
||||
emptyToNull(rs.getString("span_id")),
|
||||
null, // processorsJson not stored in ClickHouse
|
||||
rs.getBoolean("has_trace_data"),
|
||||
rs.getBoolean("is_replay")
|
||||
);
|
||||
}
|
||||
|
||||
private static ProcessorRecord mapProcessorRecord(ResultSet rs) throws SQLException {
|
||||
return new ProcessorRecord(
|
||||
emptyToNull(rs.getString("execution_id")),
|
||||
emptyToNull(rs.getString("processor_id")),
|
||||
emptyToNull(rs.getString("processor_type")),
|
||||
emptyToNull(rs.getString("application_name")),
|
||||
emptyToNull(rs.getString("route_id")),
|
||||
0, // depth not stored in ClickHouse
|
||||
emptyToNull(rs.getString("parent_processor_id")),
|
||||
emptyToNull(rs.getString("status")),
|
||||
toInstant(rs, "start_time"),
|
||||
toInstant(rs, "end_time"),
|
||||
rs.getObject("duration_ms") != null ? rs.getLong("duration_ms") : null,
|
||||
emptyToNull(rs.getString("error_message")),
|
||||
emptyToNull(rs.getString("error_stacktrace")),
|
||||
emptyToNull(rs.getString("input_body")),
|
||||
emptyToNull(rs.getString("output_body")),
|
||||
emptyToNull(rs.getString("input_headers")),
|
||||
emptyToNull(rs.getString("output_headers")),
|
||||
emptyToNull(rs.getString("attributes")),
|
||||
null, // loopIndex
|
||||
null, // loopSize
|
||||
null, // splitIndex
|
||||
null, // splitSize
|
||||
null, // multicastIndex
|
||||
emptyToNull(rs.getString("resolved_endpoint_uri")),
|
||||
emptyToNull(rs.getString("error_type")),
|
||||
emptyToNull(rs.getString("error_category")),
|
||||
emptyToNull(rs.getString("root_cause_type")),
|
||||
emptyToNull(rs.getString("root_cause_message")),
|
||||
null, // errorHandlerType
|
||||
emptyToNull(rs.getString("circuit_breaker_state")),
|
||||
rs.getObject("fallback_triggered") != null ? rs.getBoolean("fallback_triggered") : null,
|
||||
rs.getObject("seq") != null ? rs.getInt("seq") : null,
|
||||
rs.getObject("parent_seq") != null ? rs.getInt("parent_seq") : null,
|
||||
rs.getObject("iteration") != null ? rs.getInt("iteration") : null,
|
||||
rs.getObject("iteration_size") != null ? rs.getInt("iteration_size") : null,
|
||||
rs.getObject("filter_matched") != null ? rs.getBoolean("filter_matched") : null,
|
||||
rs.getObject("duplicate_message") != null ? rs.getBoolean("duplicate_message") : null
|
||||
);
|
||||
}
|
||||
|
||||
// --- Helpers ---
|
||||
|
||||
private static String emptyToNull(String value) {
|
||||
return (value == null || value.isEmpty()) ? null : value;
|
||||
}
|
||||
|
||||
private static Instant toInstant(ResultSet rs, String column) throws SQLException {
|
||||
Timestamp ts = rs.getTimestamp(column);
|
||||
return ts != null ? ts.toInstant() : null;
|
||||
}
|
||||
|
||||
private static String nullToEmpty(String value) {
|
||||
return value != null ? value : "";
|
||||
}
|
||||
|
||||
private static boolean boolOrFalse(Boolean value) {
|
||||
return value != null && value;
|
||||
}
|
||||
|
||||
private static Timestamp computeEndTime(Instant startTime, long durationMs) {
|
||||
if (startTime != null && durationMs > 0) {
|
||||
return Timestamp.from(startTime.plusMillis(durationMs));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String mapToJson(Map<String, String> map) {
|
||||
if (map == null || map.isEmpty()) return "";
|
||||
try {
|
||||
return objectMapper.writeValueAsString(map);
|
||||
} catch (JsonProcessingException e) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.storage.MetricsQueryStore;
|
||||
import com.cameleer3.server.core.storage.model.MetricTimeSeries;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.*;
|
||||
|
||||
public class ClickHouseMetricsQueryStore implements MetricsQueryStore {
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
public ClickHouseMetricsQueryStore(JdbcTemplate jdbc) {
|
||||
this.jdbc = jdbc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, List<MetricTimeSeries.Bucket>> queryTimeSeries(
|
||||
String agentId, List<String> metricNames,
|
||||
Instant from, Instant to, int buckets) {
|
||||
|
||||
long intervalSeconds = Math.max(60,
|
||||
(to.getEpochSecond() - from.getEpochSecond()) / Math.max(buckets, 1));
|
||||
|
||||
Map<String, List<MetricTimeSeries.Bucket>> result = new LinkedHashMap<>();
|
||||
for (String name : metricNames) {
|
||||
result.put(name.trim(), new ArrayList<>());
|
||||
}
|
||||
|
||||
String[] namesArray = metricNames.stream().map(String::trim).toArray(String[]::new);
|
||||
|
||||
// ClickHouse JDBC doesn't support array params with IN (?).
|
||||
// Build the IN clause with properly escaped values.
|
||||
StringBuilder inClause = new StringBuilder();
|
||||
for (int i = 0; i < namesArray.length; i++) {
|
||||
if (i > 0) inClause.append(", ");
|
||||
inClause.append("'").append(namesArray[i].replace("'", "\\'")).append("'");
|
||||
}
|
||||
|
||||
String finalSql = """
|
||||
SELECT toStartOfInterval(collected_at, INTERVAL %d SECOND) AS bucket,
|
||||
metric_name,
|
||||
avg(metric_value) AS avg_value
|
||||
FROM agent_metrics
|
||||
WHERE agent_id = ?
|
||||
AND collected_at >= ?
|
||||
AND collected_at < ?
|
||||
AND metric_name IN (%s)
|
||||
GROUP BY bucket, metric_name
|
||||
ORDER BY bucket
|
||||
""".formatted(intervalSeconds, inClause);
|
||||
|
||||
jdbc.query(finalSql, rs -> {
|
||||
String metricName = rs.getString("metric_name");
|
||||
Instant bucket = rs.getTimestamp("bucket").toInstant();
|
||||
double value = rs.getDouble("avg_value");
|
||||
result.computeIfAbsent(metricName, k -> new ArrayList<>())
|
||||
.add(new MetricTimeSeries.Bucket(bucket, value));
|
||||
}, agentId,
|
||||
java.sql.Timestamp.from(from),
|
||||
java.sql.Timestamp.from(to));
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.storage.MetricsStore;
|
||||
import com.cameleer3.server.core.storage.model.MetricsSnapshot;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.sql.Timestamp;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class ClickHouseMetricsStore implements MetricsStore {
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
public ClickHouseMetricsStore(JdbcTemplate jdbc) {
|
||||
this.jdbc = jdbc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void insertBatch(List<MetricsSnapshot> snapshots) {
|
||||
if (snapshots.isEmpty()) return;
|
||||
|
||||
jdbc.batchUpdate("""
|
||||
INSERT INTO agent_metrics (agent_id, metric_name, metric_value, tags, collected_at)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""",
|
||||
snapshots.stream().map(s -> new Object[]{
|
||||
s.agentId(),
|
||||
s.metricName(),
|
||||
s.metricValue(),
|
||||
tagsToClickHouseMap(s.tags()),
|
||||
Timestamp.from(s.collectedAt())
|
||||
}).toList());
|
||||
}
|
||||
|
||||
private Map<String, String> tagsToClickHouseMap(Map<String, String> tags) {
|
||||
if (tags == null || tags.isEmpty()) return new HashMap<>();
|
||||
return new HashMap<>(tags);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,565 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.search.ExecutionStats;
|
||||
import com.cameleer3.server.core.search.StatsTimeseries;
|
||||
import com.cameleer3.server.core.search.StatsTimeseries.TimeseriesBucket;
|
||||
import com.cameleer3.server.core.search.TopError;
|
||||
import com.cameleer3.server.core.storage.StatsStore;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* ClickHouse implementation of {@link StatsStore}.
|
||||
* Reads from AggregatingMergeTree tables populated by materialized views,
|
||||
* using {@code -Merge} aggregate combinators to finalize partial states.
|
||||
*
|
||||
* <p>Queries against AggregatingMergeTree tables use literal SQL values instead
|
||||
* of JDBC prepared-statement parameters because the ClickHouse JDBC v2 driver
|
||||
* (0.9.x) wraps prepared statements in a sub-query that strips the
|
||||
* {@code AggregateFunction} column type, breaking {@code -Merge} combinators.
|
||||
* Queries against raw tables ({@code executions FINAL},
|
||||
* {@code processor_executions}) use normal prepared-statement parameters
|
||||
* since they have no AggregateFunction columns.</p>
|
||||
*/
|
||||
public class ClickHouseStatsStore implements StatsStore {
|
||||
|
||||
private static final String TENANT = "default";
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
public ClickHouseStatsStore(JdbcTemplate jdbc) {
|
||||
this.jdbc = jdbc;
|
||||
}
|
||||
|
||||
// ── Stats (aggregate) ────────────────────────────────────────────────
|
||||
|
||||
@Override
|
||||
public ExecutionStats stats(Instant from, Instant to) {
|
||||
return queryStats("stats_1m_all", from, to, List.of(), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExecutionStats statsForApp(Instant from, Instant to, String applicationName) {
|
||||
return queryStats("stats_1m_app", from, to, List.of(
|
||||
new Filter("application_name", applicationName)), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExecutionStats statsForRoute(Instant from, Instant to, String routeId, List<String> agentIds) {
|
||||
return queryStats("stats_1m_route", from, to, List.of(
|
||||
new Filter("route_id", routeId)), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExecutionStats statsForProcessor(Instant from, Instant to, String routeId, String processorType) {
|
||||
return queryProcessorStatsRaw(from, to, routeId, processorType);
|
||||
}
|
||||
|
||||
// ── Timeseries ───────────────────────────────────────────────────────
|
||||
|
||||
@Override
|
||||
public StatsTimeseries timeseries(Instant from, Instant to, int bucketCount) {
|
||||
return queryTimeseries("stats_1m_all", from, to, bucketCount, List.of(), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StatsTimeseries timeseriesForApp(Instant from, Instant to, int bucketCount, String applicationName) {
|
||||
return queryTimeseries("stats_1m_app", from, to, bucketCount, List.of(
|
||||
new Filter("application_name", applicationName)), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StatsTimeseries timeseriesForRoute(Instant from, Instant to, int bucketCount,
|
||||
String routeId, List<String> agentIds) {
|
||||
return queryTimeseries("stats_1m_route", from, to, bucketCount, List.of(
|
||||
new Filter("route_id", routeId)), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StatsTimeseries timeseriesForProcessor(Instant from, Instant to, int bucketCount,
|
||||
String routeId, String processorType) {
|
||||
return queryProcessorTimeseriesRaw(from, to, bucketCount, routeId, processorType);
|
||||
}
|
||||
|
||||
// ── Grouped timeseries ───────────────────────────────────────────────
|
||||
|
||||
@Override
|
||||
public Map<String, StatsTimeseries> timeseriesGroupedByApp(Instant from, Instant to, int bucketCount) {
|
||||
return queryGroupedTimeseries("stats_1m_app", "application_name", from, to,
|
||||
bucketCount, List.of());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, StatsTimeseries> timeseriesGroupedByRoute(Instant from, Instant to,
|
||||
int bucketCount, String applicationName) {
|
||||
return queryGroupedTimeseries("stats_1m_route", "route_id", from, to,
|
||||
bucketCount, List.of(new Filter("application_name", applicationName)));
|
||||
}
|
||||
|
||||
// ── SLA compliance (raw table — prepared statements OK) ──────────────
|
||||
|
||||
@Override
|
||||
public double slaCompliance(Instant from, Instant to, int thresholdMs,
|
||||
String applicationName, String routeId) {
|
||||
String sql = "SELECT " +
|
||||
"countIf(duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
|
||||
"countIf(status != 'RUNNING') AS total " +
|
||||
"FROM executions FINAL " +
|
||||
"WHERE tenant_id = ? AND start_time >= ? AND start_time < ?";
|
||||
|
||||
List<Object> params = new ArrayList<>();
|
||||
params.add(thresholdMs);
|
||||
params.add(TENANT);
|
||||
params.add(Timestamp.from(from));
|
||||
params.add(Timestamp.from(to));
|
||||
if (applicationName != null) {
|
||||
sql += " AND application_name = ?";
|
||||
params.add(applicationName);
|
||||
}
|
||||
if (routeId != null) {
|
||||
sql += " AND route_id = ?";
|
||||
params.add(routeId);
|
||||
}
|
||||
|
||||
return jdbc.query(sql, (rs, rowNum) -> {
|
||||
long total = rs.getLong("total");
|
||||
if (total == 0) return 1.0;
|
||||
return rs.getLong("compliant") * 100.0 / total;
|
||||
}, params.toArray()).stream().findFirst().orElse(1.0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, long[]> slaCountsByApp(Instant from, Instant to, int defaultThresholdMs) {
|
||||
String sql = "SELECT application_name, " +
|
||||
"countIf(duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
|
||||
"countIf(status != 'RUNNING') AS total " +
|
||||
"FROM executions FINAL " +
|
||||
"WHERE tenant_id = ? AND start_time >= ? AND start_time < ? " +
|
||||
"GROUP BY application_name";
|
||||
|
||||
Map<String, long[]> result = new LinkedHashMap<>();
|
||||
jdbc.query(sql, (rs) -> {
|
||||
result.put(rs.getString("application_name"),
|
||||
new long[]{rs.getLong("compliant"), rs.getLong("total")});
|
||||
}, defaultThresholdMs, TENANT, Timestamp.from(from), Timestamp.from(to));
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, long[]> slaCountsByRoute(Instant from, Instant to,
|
||||
String applicationName, int thresholdMs) {
|
||||
String sql = "SELECT route_id, " +
|
||||
"countIf(duration_ms <= ? AND status != 'RUNNING') AS compliant, " +
|
||||
"countIf(status != 'RUNNING') AS total " +
|
||||
"FROM executions FINAL " +
|
||||
"WHERE tenant_id = ? AND start_time >= ? AND start_time < ? " +
|
||||
"AND application_name = ? GROUP BY route_id";
|
||||
|
||||
Map<String, long[]> result = new LinkedHashMap<>();
|
||||
jdbc.query(sql, (rs) -> {
|
||||
result.put(rs.getString("route_id"),
|
||||
new long[]{rs.getLong("compliant"), rs.getLong("total")});
|
||||
}, thresholdMs, TENANT, Timestamp.from(from), Timestamp.from(to), applicationName);
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── Top errors (raw table — prepared statements OK) ──────────────────
|
||||
|
||||
@Override
|
||||
public List<TopError> topErrors(Instant from, Instant to, String applicationName,
|
||||
String routeId, int limit) {
|
||||
StringBuilder where = new StringBuilder(
|
||||
"status = 'FAILED' AND start_time >= ? AND start_time < ?");
|
||||
List<Object> params = new ArrayList<>();
|
||||
params.add(Timestamp.from(from));
|
||||
params.add(Timestamp.from(to));
|
||||
if (applicationName != null) {
|
||||
where.append(" AND application_name = ?");
|
||||
params.add(applicationName);
|
||||
}
|
||||
|
||||
String table;
|
||||
String groupId;
|
||||
if (routeId != null) {
|
||||
table = "processor_executions";
|
||||
groupId = "processor_id";
|
||||
where.append(" AND route_id = ?");
|
||||
params.add(routeId);
|
||||
} else {
|
||||
table = "executions FINAL";
|
||||
groupId = "route_id";
|
||||
}
|
||||
|
||||
Instant fiveMinAgo = Instant.now().minus(5, ChronoUnit.MINUTES);
|
||||
Instant tenMinAgo = Instant.now().minus(10, ChronoUnit.MINUTES);
|
||||
|
||||
String sql = "WITH counted AS (" +
|
||||
" SELECT COALESCE(error_type, substring(error_message, 1, 200)) AS error_key, " +
|
||||
" " + groupId + " AS group_id, " +
|
||||
" count() AS cnt, max(start_time) AS last_seen " +
|
||||
" FROM " + table + " WHERE tenant_id = ? AND " + where +
|
||||
" GROUP BY error_key, group_id ORDER BY cnt DESC LIMIT ?" +
|
||||
"), velocity AS (" +
|
||||
" SELECT COALESCE(error_type, substring(error_message, 1, 200)) AS error_key, " +
|
||||
" countIf(start_time >= ?) AS recent_5m, " +
|
||||
" countIf(start_time >= ? AND start_time < ?) AS prev_5m " +
|
||||
" FROM " + table + " WHERE tenant_id = ? AND " + where +
|
||||
" GROUP BY error_key" +
|
||||
") SELECT c.error_key, c.group_id, c.cnt, c.last_seen, " +
|
||||
" COALESCE(v.recent_5m, 0) / 5.0 AS velocity, " +
|
||||
" CASE " +
|
||||
" WHEN COALESCE(v.recent_5m, 0) > COALESCE(v.prev_5m, 0) * 1.2 THEN 'accelerating' " +
|
||||
" WHEN COALESCE(v.recent_5m, 0) < COALESCE(v.prev_5m, 0) * 0.8 THEN 'decelerating' " +
|
||||
" ELSE 'stable' END AS trend " +
|
||||
"FROM counted c LEFT JOIN velocity v ON c.error_key = v.error_key " +
|
||||
"ORDER BY c.cnt DESC";
|
||||
|
||||
List<Object> fullParams = new ArrayList<>();
|
||||
fullParams.add(TENANT);
|
||||
fullParams.addAll(params);
|
||||
fullParams.add(limit);
|
||||
fullParams.add(Timestamp.from(fiveMinAgo));
|
||||
fullParams.add(Timestamp.from(tenMinAgo));
|
||||
fullParams.add(Timestamp.from(fiveMinAgo));
|
||||
fullParams.add(TENANT);
|
||||
fullParams.addAll(params);
|
||||
|
||||
return jdbc.query(sql, (rs, rowNum) -> {
|
||||
String errorKey = rs.getString("error_key");
|
||||
String gid = rs.getString("group_id");
|
||||
return new TopError(
|
||||
errorKey,
|
||||
routeId != null ? routeId : gid,
|
||||
routeId != null ? gid : null,
|
||||
rs.getLong("cnt"),
|
||||
rs.getDouble("velocity"),
|
||||
rs.getString("trend"),
|
||||
rs.getTimestamp("last_seen").toInstant());
|
||||
}, fullParams.toArray());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int activeErrorTypes(Instant from, Instant to, String applicationName) {
|
||||
String sql = "SELECT COUNT(DISTINCT COALESCE(error_type, substring(error_message, 1, 200))) " +
|
||||
"FROM executions FINAL " +
|
||||
"WHERE tenant_id = ? AND status = 'FAILED' AND start_time >= ? AND start_time < ?";
|
||||
|
||||
List<Object> params = new ArrayList<>();
|
||||
params.add(TENANT);
|
||||
params.add(Timestamp.from(from));
|
||||
params.add(Timestamp.from(to));
|
||||
if (applicationName != null) {
|
||||
sql += " AND application_name = ?";
|
||||
params.add(applicationName);
|
||||
}
|
||||
|
||||
Integer count = jdbc.queryForObject(sql, Integer.class, params.toArray());
|
||||
return count != null ? count : 0;
|
||||
}
|
||||
|
||||
// ── Punchcard (AggregatingMergeTree — literal SQL) ───────────────────
|
||||
|
||||
@Override
|
||||
public List<PunchcardCell> punchcard(Instant from, Instant to, String applicationName) {
|
||||
String view = applicationName != null ? "stats_1m_app" : "stats_1m_all";
|
||||
String sql = "SELECT toDayOfWeek(bucket, 1) % 7 AS weekday, " +
|
||||
"toHour(bucket) AS hour, " +
|
||||
"countMerge(total_count) AS total_count, " +
|
||||
"countIfMerge(failed_count) AS failed_count " +
|
||||
"FROM " + view +
|
||||
" WHERE tenant_id = " + lit(TENANT) +
|
||||
" AND bucket >= " + lit(from) +
|
||||
" AND bucket < " + lit(to);
|
||||
if (applicationName != null) {
|
||||
sql += " AND application_name = " + lit(applicationName);
|
||||
}
|
||||
sql += " GROUP BY weekday, hour ORDER BY weekday, hour";
|
||||
|
||||
return jdbc.query(sql, (rs, rowNum) -> new PunchcardCell(
|
||||
rs.getInt("weekday"), rs.getInt("hour"),
|
||||
rs.getLong("total_count"), rs.getLong("failed_count")));
|
||||
}
|
||||
|
||||
// ── Private helpers ──────────────────────────────────────────────────
|
||||
|
||||
private record Filter(String column, String value) {}
|
||||
|
||||
/**
|
||||
* Format an Instant as a ClickHouse DateTime literal.
|
||||
* Uses java.sql.Timestamp to match the JVM→ClickHouse timezone convention
|
||||
* used by the JDBC driver, then truncates to second precision for DateTime
|
||||
* column compatibility.
|
||||
*/
|
||||
private static String lit(Instant instant) {
|
||||
// Truncate to seconds — ClickHouse DateTime has second precision
|
||||
Instant truncated = instant.truncatedTo(ChronoUnit.SECONDS);
|
||||
String ts = new Timestamp(truncated.toEpochMilli()).toString();
|
||||
// Remove trailing ".0" that Timestamp.toString() always appends
|
||||
if (ts.endsWith(".0")) ts = ts.substring(0, ts.length() - 2);
|
||||
return "'" + ts + "'";
|
||||
}
|
||||
|
||||
/** Format a string as a SQL literal with single-quote escaping. */
|
||||
private static String lit(String value) {
|
||||
return "'" + value.replace("'", "\\'") + "'";
|
||||
}
|
||||
|
||||
/** Convert Instant to java.sql.Timestamp for JDBC binding. */
|
||||
private static Timestamp ts(Instant instant) {
|
||||
return Timestamp.from(instant);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build -Merge combinator SQL for the given view and time range.
|
||||
*/
|
||||
private String buildStatsSql(String view, Instant rangeFrom, Instant rangeTo,
|
||||
List<Filter> filters, boolean hasRunning) {
|
||||
String runningCol = hasRunning ? "countIfMerge(running_count)" : "0";
|
||||
String sql = "SELECT " +
|
||||
"countMerge(total_count) AS total_count, " +
|
||||
"countIfMerge(failed_count) AS failed_count, " +
|
||||
"sumMerge(duration_sum) AS duration_sum, " +
|
||||
"quantileMerge(0.99)(p99_duration) AS p99_duration, " +
|
||||
runningCol + " AS active_count " +
|
||||
"FROM " + view +
|
||||
" WHERE tenant_id = " + lit(TENANT) +
|
||||
" AND bucket >= " + lit(rangeFrom) +
|
||||
" AND bucket < " + lit(rangeTo);
|
||||
for (Filter f : filters) {
|
||||
sql += " AND " + f.column() + " = " + lit(f.value());
|
||||
}
|
||||
return sql;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query an AggregatingMergeTree stats table using -Merge combinators.
|
||||
* Uses literal SQL to avoid ClickHouse JDBC driver PreparedStatement issues.
|
||||
*/
|
||||
private ExecutionStats queryStats(String view, Instant from, Instant to,
|
||||
List<Filter> filters, boolean hasRunning) {
|
||||
|
||||
String sql = buildStatsSql(view, from, to, filters, hasRunning);
|
||||
|
||||
long totalCount = 0, failedCount = 0, avgDuration = 0, p99Duration = 0, activeCount = 0;
|
||||
var currentResult = jdbc.query(sql, (rs, rowNum) -> {
|
||||
long tc = rs.getLong("total_count");
|
||||
long fc = rs.getLong("failed_count");
|
||||
long ds = rs.getLong("duration_sum"); // Nullable → 0 if null
|
||||
long p99 = (long) rs.getDouble("p99_duration"); // quantileMerge returns Float64
|
||||
long ac = rs.getLong("active_count");
|
||||
return new long[]{tc, fc, ds, p99, ac};
|
||||
});
|
||||
if (!currentResult.isEmpty()) {
|
||||
long[] r = currentResult.get(0);
|
||||
totalCount = r[0]; failedCount = r[1];
|
||||
avgDuration = totalCount > 0 ? r[2] / totalCount : 0;
|
||||
p99Duration = r[3]; activeCount = r[4];
|
||||
}
|
||||
|
||||
// Previous period (shifted back 24h)
|
||||
Instant prevFrom = from.minus(Duration.ofHours(24));
|
||||
Instant prevTo = to.minus(Duration.ofHours(24));
|
||||
String prevSql = buildStatsSql(view, prevFrom, prevTo, filters, hasRunning);
|
||||
|
||||
long prevTotal = 0, prevFailed = 0, prevAvg = 0, prevP99 = 0;
|
||||
var prevResult = jdbc.query(prevSql, (rs, rowNum) -> {
|
||||
long tc = rs.getLong("total_count");
|
||||
long fc = rs.getLong("failed_count");
|
||||
long ds = rs.getLong("duration_sum");
|
||||
long p99 = (long) rs.getDouble("p99_duration");
|
||||
return new long[]{tc, fc, ds, p99};
|
||||
});
|
||||
if (!prevResult.isEmpty()) {
|
||||
long[] r = prevResult.get(0);
|
||||
prevTotal = r[0]; prevFailed = r[1];
|
||||
prevAvg = prevTotal > 0 ? r[2] / prevTotal : 0;
|
||||
prevP99 = r[3];
|
||||
}
|
||||
|
||||
// Today total
|
||||
Instant todayStart = Instant.now().truncatedTo(ChronoUnit.DAYS);
|
||||
String todaySql = buildStatsSql(view, todayStart, Instant.now(), filters, hasRunning);
|
||||
|
||||
long totalToday = 0;
|
||||
var todayResult = jdbc.query(todaySql, (rs, rowNum) -> rs.getLong("total_count"));
|
||||
if (!todayResult.isEmpty()) totalToday = todayResult.get(0);
|
||||
|
||||
return new ExecutionStats(
|
||||
totalCount, failedCount, avgDuration, p99Duration, activeCount,
|
||||
totalToday, prevTotal, prevFailed, prevAvg, prevP99);
|
||||
}
|
||||
|
||||
/**
|
||||
* Timeseries from AggregatingMergeTree using -Merge combinators.
|
||||
*/
|
||||
private StatsTimeseries queryTimeseries(String view, Instant from, Instant to,
|
||||
int bucketCount, List<Filter> filters,
|
||||
boolean hasRunningCount) {
|
||||
long intervalSeconds = Duration.between(from, to).toSeconds() / Math.max(bucketCount, 1);
|
||||
if (intervalSeconds < 60) intervalSeconds = 60;
|
||||
|
||||
String runningCol = hasRunningCount ? "countIfMerge(running_count)" : "0";
|
||||
|
||||
String sql = "SELECT " +
|
||||
"toStartOfInterval(bucket, INTERVAL " + intervalSeconds + " SECOND) AS period, " +
|
||||
"countMerge(total_count) AS total_count, " +
|
||||
"countIfMerge(failed_count) AS failed_count, " +
|
||||
"sumMerge(duration_sum) AS duration_sum, " +
|
||||
"quantileMerge(0.99)(p99_duration) AS p99_duration, " +
|
||||
runningCol + " AS active_count " +
|
||||
"FROM " + view +
|
||||
" WHERE tenant_id = " + lit(TENANT) +
|
||||
" AND bucket >= " + lit(from) +
|
||||
" AND bucket < " + lit(to);
|
||||
for (Filter f : filters) {
|
||||
sql += " AND " + f.column() + " = " + lit(f.value());
|
||||
}
|
||||
sql += " GROUP BY period ORDER BY period";
|
||||
|
||||
List<TimeseriesBucket> buckets = jdbc.query(sql, (rs, rowNum) -> {
|
||||
long tc = rs.getLong("total_count");
|
||||
long ds = rs.getLong("duration_sum");
|
||||
return new TimeseriesBucket(
|
||||
rs.getTimestamp("period").toInstant(),
|
||||
tc, rs.getLong("failed_count"),
|
||||
tc > 0 ? ds / tc : 0, (long) rs.getDouble("p99_duration"),
|
||||
rs.getLong("active_count"));
|
||||
});
|
||||
|
||||
return new StatsTimeseries(buckets);
|
||||
}
|
||||
|
||||
/**
|
||||
* Grouped timeseries from AggregatingMergeTree.
|
||||
*/
|
||||
private Map<String, StatsTimeseries> queryGroupedTimeseries(
|
||||
String view, String groupCol, Instant from, Instant to,
|
||||
int bucketCount, List<Filter> filters) {
|
||||
|
||||
long intervalSeconds = Duration.between(from, to).toSeconds() / Math.max(bucketCount, 1);
|
||||
if (intervalSeconds < 60) intervalSeconds = 60;
|
||||
|
||||
String sql = "SELECT " +
|
||||
"toStartOfInterval(bucket, INTERVAL " + intervalSeconds + " SECOND) AS period, " +
|
||||
groupCol + " AS group_key, " +
|
||||
"countMerge(total_count) AS total_count, " +
|
||||
"countIfMerge(failed_count) AS failed_count, " +
|
||||
"sumMerge(duration_sum) AS duration_sum, " +
|
||||
"quantileMerge(0.99)(p99_duration) AS p99_duration, " +
|
||||
"countIfMerge(running_count) AS active_count " +
|
||||
"FROM " + view +
|
||||
" WHERE tenant_id = " + lit(TENANT) +
|
||||
" AND bucket >= " + lit(from) +
|
||||
" AND bucket < " + lit(to);
|
||||
for (Filter f : filters) {
|
||||
sql += " AND " + f.column() + " = " + lit(f.value());
|
||||
}
|
||||
sql += " GROUP BY period, group_key ORDER BY period, group_key";
|
||||
|
||||
Map<String, List<TimeseriesBucket>> grouped = new LinkedHashMap<>();
|
||||
jdbc.query(sql, (rs) -> {
|
||||
String key = rs.getString("group_key");
|
||||
long tc = rs.getLong("total_count");
|
||||
long ds = rs.getLong("duration_sum");
|
||||
TimeseriesBucket bucket = new TimeseriesBucket(
|
||||
rs.getTimestamp("period").toInstant(),
|
||||
tc, rs.getLong("failed_count"),
|
||||
tc > 0 ? ds / tc : 0, (long) rs.getDouble("p99_duration"),
|
||||
rs.getLong("active_count"));
|
||||
grouped.computeIfAbsent(key, k -> new ArrayList<>()).add(bucket);
|
||||
});
|
||||
|
||||
Map<String, StatsTimeseries> result = new LinkedHashMap<>();
|
||||
grouped.forEach((key, buckets) -> result.put(key, new StatsTimeseries(buckets)));
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Direct aggregation on processor_executions for processor-level stats.
|
||||
*/
|
||||
private ExecutionStats queryProcessorStatsRaw(Instant from, Instant to,
|
||||
String routeId, String processorType) {
|
||||
String sql = "SELECT " +
|
||||
"count() AS total_count, " +
|
||||
"countIf(status = 'FAILED') AS failed_count, " +
|
||||
"CASE WHEN count() > 0 THEN sum(duration_ms) / count() ELSE 0 END AS avg_duration, " +
|
||||
"quantile(0.99)(duration_ms) AS p99_duration, " +
|
||||
"0 AS active_count " +
|
||||
"FROM processor_executions " +
|
||||
"WHERE tenant_id = ? AND start_time >= ? AND start_time < ? " +
|
||||
"AND route_id = ? AND processor_type = ?";
|
||||
|
||||
long totalCount = 0, failedCount = 0, avgDuration = 0, p99Duration = 0;
|
||||
var currentResult = jdbc.query(sql, (rs, rowNum) -> new long[]{
|
||||
rs.getLong("total_count"), rs.getLong("failed_count"),
|
||||
(long) rs.getDouble("avg_duration"), (long) rs.getDouble("p99_duration"),
|
||||
rs.getLong("active_count")
|
||||
}, TENANT, ts(from), ts(to), routeId, processorType);
|
||||
if (!currentResult.isEmpty()) {
|
||||
long[] r = currentResult.get(0);
|
||||
totalCount = r[0]; failedCount = r[1]; avgDuration = r[2]; p99Duration = r[3];
|
||||
}
|
||||
|
||||
Instant prevFrom = from.minus(Duration.ofHours(24));
|
||||
Instant prevTo = to.minus(Duration.ofHours(24));
|
||||
long prevTotal = 0, prevFailed = 0, prevAvg = 0, prevP99 = 0;
|
||||
var prevResult = jdbc.query(sql, (rs, rowNum) -> new long[]{
|
||||
rs.getLong("total_count"), rs.getLong("failed_count"),
|
||||
(long) rs.getDouble("avg_duration"), (long) rs.getDouble("p99_duration")
|
||||
}, TENANT, ts(prevFrom), ts(prevTo), routeId, processorType);
|
||||
if (!prevResult.isEmpty()) {
|
||||
long[] r = prevResult.get(0);
|
||||
prevTotal = r[0]; prevFailed = r[1]; prevAvg = r[2]; prevP99 = r[3];
|
||||
}
|
||||
|
||||
Instant todayStart = Instant.now().truncatedTo(ChronoUnit.DAYS);
|
||||
long totalToday = 0;
|
||||
var todayResult = jdbc.query(sql, (rs, rowNum) -> rs.getLong("total_count"),
|
||||
TENANT, ts(todayStart), ts(Instant.now()), routeId, processorType);
|
||||
if (!todayResult.isEmpty()) totalToday = todayResult.get(0);
|
||||
|
||||
return new ExecutionStats(
|
||||
totalCount, failedCount, avgDuration, p99Duration, 0,
|
||||
totalToday, prevTotal, prevFailed, prevAvg, prevP99);
|
||||
}
|
||||
|
||||
/**
|
||||
* Direct aggregation on processor_executions for processor-level timeseries.
|
||||
*/
|
||||
private StatsTimeseries queryProcessorTimeseriesRaw(Instant from, Instant to,
|
||||
int bucketCount,
|
||||
String routeId, String processorType) {
|
||||
long intervalSeconds = Duration.between(from, to).toSeconds() / Math.max(bucketCount, 1);
|
||||
if (intervalSeconds < 60) intervalSeconds = 60;
|
||||
|
||||
String sql = "SELECT " +
|
||||
"toStartOfInterval(start_time, INTERVAL " + intervalSeconds + " SECOND) AS period, " +
|
||||
"count() AS total_count, " +
|
||||
"countIf(status = 'FAILED') AS failed_count, " +
|
||||
"CASE WHEN count() > 0 THEN sum(duration_ms) / count() ELSE 0 END AS avg_duration, " +
|
||||
"quantile(0.99)(duration_ms) AS p99_duration, " +
|
||||
"0 AS active_count " +
|
||||
"FROM processor_executions " +
|
||||
"WHERE tenant_id = ? AND start_time >= ? AND start_time < ? " +
|
||||
"AND route_id = ? AND processor_type = ? " +
|
||||
"GROUP BY period ORDER BY period";
|
||||
|
||||
List<TimeseriesBucket> buckets = jdbc.query(sql, (rs, rowNum) ->
|
||||
new TimeseriesBucket(
|
||||
rs.getTimestamp("period").toInstant(),
|
||||
rs.getLong("total_count"), rs.getLong("failed_count"),
|
||||
(long) rs.getDouble("avg_duration"), (long) rs.getDouble("p99_duration"),
|
||||
rs.getLong("active_count")
|
||||
), TENANT, ts(from), ts(to), routeId, processorType);
|
||||
|
||||
return new StatsTimeseries(buckets);
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.agent.AgentEventRecord;
|
||||
import com.cameleer3.server.core.agent.AgentEventRepository;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
@@ -11,6 +12,7 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Repository
|
||||
@ConditionalOnProperty(name = "cameleer.storage.events", havingValue = "postgres")
|
||||
public class PostgresAgentEventRepository implements AgentEventRepository {
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
@@ -8,6 +8,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
@@ -29,6 +30,7 @@ import java.util.Optional;
|
||||
* Uses {@code ON CONFLICT (content_hash) DO NOTHING} for idempotent inserts.
|
||||
*/
|
||||
@Repository
|
||||
@ConditionalOnProperty(name = "cameleer.storage.diagrams", havingValue = "postgres")
|
||||
public class PostgresDiagramStore implements DiagramStore {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PostgresDiagramStore.class);
|
||||
|
||||
@@ -3,7 +3,6 @@ package com.cameleer3.server.app.storage;
|
||||
import com.cameleer3.server.core.storage.ExecutionStore;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.jdbc.core.RowMapper;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
@@ -12,7 +11,6 @@ import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
@Repository
|
||||
public class PostgresExecutionStore implements ExecutionStore {
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
@@ -206,7 +204,8 @@ public class PostgresExecutionStore implements ExecutionStore {
|
||||
rs.getString("error_type"), rs.getString("error_category"),
|
||||
rs.getString("root_cause_type"), rs.getString("root_cause_message"),
|
||||
rs.getString("error_handler_type"), rs.getString("circuit_breaker_state"),
|
||||
rs.getObject("fallback_triggered") != null ? rs.getBoolean("fallback_triggered") : null);
|
||||
rs.getObject("fallback_triggered") != null ? rs.getBoolean("fallback_triggered") : null,
|
||||
null, null, null, null, null, null);
|
||||
|
||||
private static Instant toInstant(ResultSet rs, String column) throws SQLException {
|
||||
Timestamp ts = rs.getTimestamp(column);
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.storage.MetricsQueryStore;
|
||||
import com.cameleer3.server.core.storage.model.MetricTimeSeries;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.util.*;
|
||||
|
||||
public class PostgresMetricsQueryStore implements MetricsQueryStore {
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
public PostgresMetricsQueryStore(JdbcTemplate jdbc) {
|
||||
this.jdbc = jdbc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, List<MetricTimeSeries.Bucket>> queryTimeSeries(
|
||||
String agentId, List<String> metricNames,
|
||||
Instant from, Instant to, int buckets) {
|
||||
|
||||
long intervalMs = (to.toEpochMilli() - from.toEpochMilli()) / Math.max(buckets, 1);
|
||||
String intervalStr = intervalMs + " milliseconds";
|
||||
|
||||
Map<String, List<MetricTimeSeries.Bucket>> result = new LinkedHashMap<>();
|
||||
for (String name : metricNames) {
|
||||
result.put(name.trim(), new ArrayList<>());
|
||||
}
|
||||
|
||||
String sql = """
|
||||
SELECT time_bucket(CAST(? AS interval), collected_at) AS bucket,
|
||||
metric_name,
|
||||
AVG(metric_value) AS avg_value
|
||||
FROM agent_metrics
|
||||
WHERE agent_id = ?
|
||||
AND collected_at >= ? AND collected_at < ?
|
||||
AND metric_name = ANY(?)
|
||||
GROUP BY bucket, metric_name
|
||||
ORDER BY bucket
|
||||
""";
|
||||
|
||||
String[] namesArray = metricNames.stream().map(String::trim).toArray(String[]::new);
|
||||
jdbc.query(sql, rs -> {
|
||||
String metricName = rs.getString("metric_name");
|
||||
Instant bucket = rs.getTimestamp("bucket").toInstant();
|
||||
double value = rs.getDouble("avg_value");
|
||||
result.computeIfAbsent(metricName, k -> new ArrayList<>())
|
||||
.add(new MetricTimeSeries.Bucket(bucket, value));
|
||||
}, intervalStr, agentId, Timestamp.from(from), Timestamp.from(to), namesArray);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -5,12 +5,10 @@ import com.cameleer3.server.core.storage.model.MetricsSnapshot;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.sql.Timestamp;
|
||||
import java.util.List;
|
||||
|
||||
@Repository
|
||||
public class PostgresMetricsStore implements MetricsStore {
|
||||
|
||||
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||
|
||||
@@ -5,6 +5,7 @@ import com.cameleer3.server.core.search.StatsTimeseries;
|
||||
import com.cameleer3.server.core.search.StatsTimeseries.TimeseriesBucket;
|
||||
import com.cameleer3.server.core.search.TopError;
|
||||
import com.cameleer3.server.core.storage.StatsStore;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
@@ -18,6 +19,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Repository
|
||||
@ConditionalOnProperty(name = "cameleer.storage.stats", havingValue = "postgres")
|
||||
public class PostgresStatsStore implements StatsStore {
|
||||
|
||||
private final JdbcTemplate jdbc;
|
||||
|
||||
@@ -48,6 +48,14 @@ opensearch:
|
||||
cameleer:
|
||||
body-size-limit: ${CAMELEER_BODY_SIZE_LIMIT:16384}
|
||||
retention-days: ${CAMELEER_RETENTION_DAYS:30}
|
||||
storage:
|
||||
metrics: ${CAMELEER_STORAGE_METRICS:postgres}
|
||||
search: ${CAMELEER_STORAGE_SEARCH:opensearch}
|
||||
stats: ${CAMELEER_STORAGE_STATS:clickhouse}
|
||||
diagrams: ${CAMELEER_STORAGE_DIAGRAMS:clickhouse}
|
||||
events: ${CAMELEER_STORAGE_EVENTS:clickhouse}
|
||||
logs: ${CAMELEER_STORAGE_LOGS:clickhouse}
|
||||
executions: ${CAMELEER_STORAGE_EXECUTIONS:clickhouse}
|
||||
|
||||
security:
|
||||
access-token-expiry-ms: 3600000
|
||||
@@ -66,6 +74,12 @@ springdoc:
|
||||
swagger-ui:
|
||||
path: /api/v1/swagger-ui
|
||||
|
||||
clickhouse:
|
||||
enabled: ${CLICKHOUSE_ENABLED:false}
|
||||
url: ${CLICKHOUSE_URL:jdbc:clickhouse://localhost:8123/cameleer}
|
||||
username: ${CLICKHOUSE_USERNAME:default}
|
||||
password: ${CLICKHOUSE_PASSWORD:}
|
||||
|
||||
management:
|
||||
endpoints:
|
||||
web:
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
CREATE TABLE IF NOT EXISTS agent_metrics (
|
||||
tenant_id LowCardinality(String) DEFAULT 'default',
|
||||
collected_at DateTime64(3),
|
||||
agent_id LowCardinality(String),
|
||||
metric_name LowCardinality(String),
|
||||
metric_value Float64,
|
||||
tags Map(String, String) DEFAULT map(),
|
||||
server_received_at DateTime64(3) DEFAULT now64(3)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(collected_at))
|
||||
ORDER BY (tenant_id, agent_id, metric_name, collected_at)
|
||||
TTL toDateTime(collected_at) + INTERVAL 365 DAY DELETE
|
||||
SETTINGS index_granularity = 8192;
|
||||
@@ -0,0 +1,48 @@
|
||||
CREATE TABLE IF NOT EXISTS executions (
|
||||
tenant_id LowCardinality(String) DEFAULT 'default',
|
||||
execution_id String,
|
||||
start_time DateTime64(3),
|
||||
_version UInt64 DEFAULT 1,
|
||||
route_id LowCardinality(String),
|
||||
agent_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
status LowCardinality(String),
|
||||
correlation_id String DEFAULT '',
|
||||
exchange_id String DEFAULT '',
|
||||
end_time Nullable(DateTime64(3)),
|
||||
duration_ms Nullable(Int64),
|
||||
error_message String DEFAULT '',
|
||||
error_stacktrace String DEFAULT '',
|
||||
error_type LowCardinality(String) DEFAULT '',
|
||||
error_category LowCardinality(String) DEFAULT '',
|
||||
root_cause_type String DEFAULT '',
|
||||
root_cause_message String DEFAULT '',
|
||||
diagram_content_hash String DEFAULT '',
|
||||
engine_level LowCardinality(String) DEFAULT '',
|
||||
input_body String DEFAULT '',
|
||||
output_body String DEFAULT '',
|
||||
input_headers String DEFAULT '',
|
||||
output_headers String DEFAULT '',
|
||||
attributes String DEFAULT '',
|
||||
trace_id String DEFAULT '',
|
||||
span_id String DEFAULT '',
|
||||
has_trace_data Bool DEFAULT false,
|
||||
is_replay Bool DEFAULT false,
|
||||
|
||||
_search_text String MATERIALIZED
|
||||
concat(error_message, ' ', error_stacktrace, ' ', attributes,
|
||||
' ', input_body, ' ', output_body, ' ', input_headers,
|
||||
' ', output_headers, ' ', root_cause_message),
|
||||
|
||||
INDEX idx_search _search_text TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_error error_message TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_bodies concat(input_body, ' ', output_body) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_headers concat(input_headers, ' ', output_headers) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_status status TYPE set(10) GRANULARITY 1,
|
||||
INDEX idx_corr correlation_id TYPE bloom_filter(0.01) GRANULARITY 4
|
||||
)
|
||||
ENGINE = ReplacingMergeTree(_version)
|
||||
PARTITION BY (tenant_id, toYYYYMM(start_time))
|
||||
ORDER BY (tenant_id, start_time, application_name, route_id, execution_id)
|
||||
TTL toDateTime(start_time) + INTERVAL 365 DAY DELETE
|
||||
SETTINGS index_granularity = 8192;
|
||||
@@ -0,0 +1,45 @@
|
||||
CREATE TABLE IF NOT EXISTS processor_executions (
|
||||
tenant_id LowCardinality(String) DEFAULT 'default',
|
||||
execution_id String,
|
||||
seq UInt32,
|
||||
parent_seq Nullable(UInt32),
|
||||
parent_processor_id String DEFAULT '',
|
||||
processor_id String,
|
||||
processor_type LowCardinality(String),
|
||||
start_time DateTime64(3),
|
||||
route_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
iteration Nullable(Int32),
|
||||
iteration_size Nullable(Int32),
|
||||
status LowCardinality(String),
|
||||
end_time Nullable(DateTime64(3)),
|
||||
duration_ms Nullable(Int64),
|
||||
error_message String DEFAULT '',
|
||||
error_stacktrace String DEFAULT '',
|
||||
error_type LowCardinality(String) DEFAULT '',
|
||||
error_category LowCardinality(String) DEFAULT '',
|
||||
root_cause_type String DEFAULT '',
|
||||
root_cause_message String DEFAULT '',
|
||||
input_body String DEFAULT '',
|
||||
output_body String DEFAULT '',
|
||||
input_headers String DEFAULT '',
|
||||
output_headers String DEFAULT '',
|
||||
attributes String DEFAULT '',
|
||||
resolved_endpoint_uri String DEFAULT '',
|
||||
circuit_breaker_state LowCardinality(String) DEFAULT '',
|
||||
fallback_triggered Bool DEFAULT false,
|
||||
filter_matched Bool DEFAULT false,
|
||||
duplicate_message Bool DEFAULT false,
|
||||
|
||||
_search_text String MATERIALIZED
|
||||
concat(error_message, ' ', error_stacktrace, ' ', attributes,
|
||||
' ', input_body, ' ', output_body, ' ', input_headers, ' ', output_headers),
|
||||
|
||||
INDEX idx_search _search_text TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_exec_id execution_id TYPE bloom_filter(0.01) GRANULARITY 4
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(start_time))
|
||||
ORDER BY (tenant_id, start_time, application_name, route_id, execution_id, seq)
|
||||
TTL toDateTime(start_time) + INTERVAL 365 DAY DELETE
|
||||
SETTINGS index_granularity = 8192;
|
||||
@@ -0,0 +1,165 @@
|
||||
-- V4__stats_tables_and_mvs.sql
|
||||
-- Materialized views replacing TimescaleDB continuous aggregates.
|
||||
-- Tables use AggregatingMergeTree, MVs use -State combinators.
|
||||
|
||||
-- stats_1m_all (global)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS stats_1m_all (
|
||||
tenant_id LowCardinality(String),
|
||||
bucket DateTime,
|
||||
total_count AggregateFunction(count),
|
||||
failed_count AggregateFunction(countIf, UInt8),
|
||||
running_count AggregateFunction(countIf, UInt8),
|
||||
duration_sum AggregateFunction(sum, Nullable(Int64)),
|
||||
duration_max AggregateFunction(max, Nullable(Int64)),
|
||||
p99_duration AggregateFunction(quantile(0.99), Nullable(Int64))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(bucket))
|
||||
ORDER BY (tenant_id, bucket)
|
||||
TTL bucket + INTERVAL 365 DAY DELETE;
|
||||
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_all_mv TO stats_1m_all AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
countState() AS total_count,
|
||||
countIfState(status = 'FAILED') AS failed_count,
|
||||
countIfState(status = 'RUNNING') AS running_count,
|
||||
sumState(duration_ms) AS duration_sum,
|
||||
maxState(duration_ms) AS duration_max,
|
||||
quantileState(0.99)(duration_ms) AS p99_duration
|
||||
FROM executions
|
||||
GROUP BY tenant_id, bucket;
|
||||
|
||||
-- stats_1m_app (per-application)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS stats_1m_app (
|
||||
tenant_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
bucket DateTime,
|
||||
total_count AggregateFunction(count),
|
||||
failed_count AggregateFunction(countIf, UInt8),
|
||||
running_count AggregateFunction(countIf, UInt8),
|
||||
duration_sum AggregateFunction(sum, Nullable(Int64)),
|
||||
duration_max AggregateFunction(max, Nullable(Int64)),
|
||||
p99_duration AggregateFunction(quantile(0.99), Nullable(Int64))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(bucket))
|
||||
ORDER BY (tenant_id, application_name, bucket)
|
||||
TTL bucket + INTERVAL 365 DAY DELETE;
|
||||
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_app_mv TO stats_1m_app AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
application_name,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
countState() AS total_count,
|
||||
countIfState(status = 'FAILED') AS failed_count,
|
||||
countIfState(status = 'RUNNING') AS running_count,
|
||||
sumState(duration_ms) AS duration_sum,
|
||||
maxState(duration_ms) AS duration_max,
|
||||
quantileState(0.99)(duration_ms) AS p99_duration
|
||||
FROM executions
|
||||
GROUP BY tenant_id, application_name, bucket;
|
||||
|
||||
-- stats_1m_route (per-route)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS stats_1m_route (
|
||||
tenant_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
route_id LowCardinality(String),
|
||||
bucket DateTime,
|
||||
total_count AggregateFunction(count),
|
||||
failed_count AggregateFunction(countIf, UInt8),
|
||||
running_count AggregateFunction(countIf, UInt8),
|
||||
duration_sum AggregateFunction(sum, Nullable(Int64)),
|
||||
duration_max AggregateFunction(max, Nullable(Int64)),
|
||||
p99_duration AggregateFunction(quantile(0.99), Nullable(Int64))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(bucket))
|
||||
ORDER BY (tenant_id, application_name, route_id, bucket)
|
||||
TTL bucket + INTERVAL 365 DAY DELETE;
|
||||
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_route_mv TO stats_1m_route AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
application_name,
|
||||
route_id,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
countState() AS total_count,
|
||||
countIfState(status = 'FAILED') AS failed_count,
|
||||
countIfState(status = 'RUNNING') AS running_count,
|
||||
sumState(duration_ms) AS duration_sum,
|
||||
maxState(duration_ms) AS duration_max,
|
||||
quantileState(0.99)(duration_ms) AS p99_duration
|
||||
FROM executions
|
||||
GROUP BY tenant_id, application_name, route_id, bucket;
|
||||
|
||||
-- stats_1m_processor (per-processor-type)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS stats_1m_processor (
|
||||
tenant_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
processor_type LowCardinality(String),
|
||||
bucket DateTime,
|
||||
total_count AggregateFunction(count),
|
||||
failed_count AggregateFunction(countIf, UInt8),
|
||||
duration_sum AggregateFunction(sum, Nullable(Int64)),
|
||||
duration_max AggregateFunction(max, Nullable(Int64)),
|
||||
p99_duration AggregateFunction(quantile(0.99), Nullable(Int64))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(bucket))
|
||||
ORDER BY (tenant_id, application_name, processor_type, bucket)
|
||||
TTL bucket + INTERVAL 365 DAY DELETE;
|
||||
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_processor_mv TO stats_1m_processor AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
application_name,
|
||||
processor_type,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
countState() AS total_count,
|
||||
countIfState(status = 'FAILED') AS failed_count,
|
||||
sumState(duration_ms) AS duration_sum,
|
||||
maxState(duration_ms) AS duration_max,
|
||||
quantileState(0.99)(duration_ms) AS p99_duration
|
||||
FROM processor_executions
|
||||
GROUP BY tenant_id, application_name, processor_type, bucket;
|
||||
|
||||
-- stats_1m_processor_detail (per-processor-id)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS stats_1m_processor_detail (
|
||||
tenant_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
route_id LowCardinality(String),
|
||||
processor_id String,
|
||||
bucket DateTime,
|
||||
total_count AggregateFunction(count),
|
||||
failed_count AggregateFunction(countIf, UInt8),
|
||||
duration_sum AggregateFunction(sum, Nullable(Int64)),
|
||||
duration_max AggregateFunction(max, Nullable(Int64)),
|
||||
p99_duration AggregateFunction(quantile(0.99), Nullable(Int64))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(bucket))
|
||||
ORDER BY (tenant_id, application_name, route_id, processor_id, bucket)
|
||||
TTL bucket + INTERVAL 365 DAY DELETE;
|
||||
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_processor_detail_mv TO stats_1m_processor_detail AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
application_name,
|
||||
route_id,
|
||||
processor_id,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
countState() AS total_count,
|
||||
countIfState(status = 'FAILED') AS failed_count,
|
||||
sumState(duration_ms) AS duration_sum,
|
||||
maxState(duration_ms) AS duration_max,
|
||||
quantileState(0.99)(duration_ms) AS p99_duration
|
||||
FROM processor_executions
|
||||
GROUP BY tenant_id, application_name, route_id, processor_id, bucket;
|
||||
@@ -0,0 +1,2 @@
|
||||
ALTER TABLE executions ADD COLUMN IF NOT EXISTS original_exchange_id String DEFAULT '';
|
||||
ALTER TABLE executions ADD COLUMN IF NOT EXISTS replay_exchange_id String DEFAULT '';
|
||||
@@ -0,0 +1,12 @@
|
||||
CREATE TABLE IF NOT EXISTS route_diagrams (
|
||||
tenant_id LowCardinality(String) DEFAULT 'default',
|
||||
content_hash String,
|
||||
route_id LowCardinality(String),
|
||||
agent_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
definition String,
|
||||
created_at DateTime64(3) DEFAULT now64(3)
|
||||
)
|
||||
ENGINE = ReplacingMergeTree(created_at)
|
||||
ORDER BY (tenant_id, content_hash)
|
||||
SETTINGS index_granularity = 8192;
|
||||
@@ -0,0 +1,12 @@
|
||||
CREATE TABLE IF NOT EXISTS agent_events (
|
||||
tenant_id LowCardinality(String) DEFAULT 'default',
|
||||
timestamp DateTime64(3) DEFAULT now64(3),
|
||||
agent_id LowCardinality(String),
|
||||
app_id LowCardinality(String),
|
||||
event_type LowCardinality(String),
|
||||
detail String DEFAULT ''
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(timestamp))
|
||||
ORDER BY (tenant_id, app_id, agent_id, timestamp)
|
||||
TTL toDateTime(timestamp) + INTERVAL 365 DAY DELETE;
|
||||
@@ -0,0 +1,22 @@
|
||||
CREATE TABLE IF NOT EXISTS logs (
|
||||
tenant_id LowCardinality(String) DEFAULT 'default',
|
||||
timestamp DateTime64(3),
|
||||
application LowCardinality(String),
|
||||
agent_id LowCardinality(String),
|
||||
level LowCardinality(String),
|
||||
logger_name LowCardinality(String) DEFAULT '',
|
||||
message String,
|
||||
thread_name LowCardinality(String) DEFAULT '',
|
||||
stack_trace String DEFAULT '',
|
||||
exchange_id String DEFAULT '',
|
||||
mdc Map(String, String) DEFAULT map(),
|
||||
|
||||
INDEX idx_msg message TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_stack stack_trace TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_level level TYPE set(10) GRANULARITY 1
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(timestamp))
|
||||
ORDER BY (tenant_id, application, timestamp)
|
||||
TTL toDateTime(timestamp) + INTERVAL 365 DAY DELETE
|
||||
SETTINGS index_granularity = 8192;
|
||||
@@ -7,6 +7,7 @@ import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.DynamicPropertyRegistry;
|
||||
import org.springframework.test.context.DynamicPropertySource;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.containers.PostgreSQLContainer;
|
||||
import org.testcontainers.utility.DockerImageName;
|
||||
|
||||
@@ -20,6 +21,7 @@ public abstract class AbstractPostgresIT {
|
||||
|
||||
static final PostgreSQLContainer<?> postgres;
|
||||
static final OpensearchContainer<?> opensearch;
|
||||
static final ClickHouseContainer clickhouse;
|
||||
|
||||
static {
|
||||
postgres = new PostgreSQLContainer<>(TIMESCALEDB_IMAGE)
|
||||
@@ -30,6 +32,9 @@ public abstract class AbstractPostgresIT {
|
||||
|
||||
opensearch = new OpensearchContainer<>("opensearchproject/opensearch:2.19.0");
|
||||
opensearch.start();
|
||||
|
||||
clickhouse = new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
clickhouse.start();
|
||||
}
|
||||
|
||||
@Autowired
|
||||
@@ -46,5 +51,9 @@ public abstract class AbstractPostgresIT {
|
||||
registry.add("spring.flyway.user", postgres::getUsername);
|
||||
registry.add("spring.flyway.password", postgres::getPassword);
|
||||
registry.add("opensearch.url", opensearch::getHttpHostAddress);
|
||||
registry.add("clickhouse.enabled", () -> "true");
|
||||
registry.add("clickhouse.url", clickhouse::getJdbcUrl);
|
||||
registry.add("clickhouse.username", clickhouse::getUsername);
|
||||
registry.add("clickhouse.password", clickhouse::getPassword);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,178 @@
|
||||
package com.cameleer3.server.app.search;
|
||||
|
||||
import com.cameleer3.common.model.LogEntry;
|
||||
import com.cameleer3.server.core.storage.LogEntryResult;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseLogStoreIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseLogStore store;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
String ddl = new ClassPathResource("clickhouse/V8__logs.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
jdbc.execute(ddl);
|
||||
jdbc.execute("TRUNCATE TABLE logs");
|
||||
|
||||
store = new ClickHouseLogStore(jdbc);
|
||||
}
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
private LogEntry entry(Instant ts, String level, String logger, String message,
|
||||
String thread, String stackTrace, Map<String, String> mdc) {
|
||||
return new LogEntry(ts, level, logger, message, thread, stackTrace, mdc);
|
||||
}
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void indexBatch_writesLogs() {
|
||||
Instant now = Instant.parse("2026-03-31T12:00:00Z");
|
||||
List<LogEntry> entries = List.of(
|
||||
entry(now, "INFO", "com.example.Foo", "Hello world", "main", null, null),
|
||||
entry(now.plusSeconds(1), "ERROR", "com.example.Bar", "Something failed", "worker-1", "stack...", null)
|
||||
);
|
||||
|
||||
store.indexBatch("agent-1", "my-app", entries);
|
||||
|
||||
Long count = jdbc.queryForObject("SELECT count() FROM logs WHERE application = 'my-app'", Long.class);
|
||||
assertThat(count).isEqualTo(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byApplication_returnsLogs() {
|
||||
Instant now = Instant.parse("2026-03-31T12:00:00Z");
|
||||
store.indexBatch("agent-1", "app-a", List.of(
|
||||
entry(now, "INFO", "logger", "msg-a", "t1", null, null)
|
||||
));
|
||||
store.indexBatch("agent-2", "app-b", List.of(
|
||||
entry(now, "INFO", "logger", "msg-b", "t1", null, null)
|
||||
));
|
||||
|
||||
List<LogEntryResult> results = store.search("app-a", null, null, null, null, null, null, 100);
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).message()).isEqualTo("msg-a");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byLevel_filtersCorrectly() {
|
||||
Instant now = Instant.parse("2026-03-31T12:00:00Z");
|
||||
store.indexBatch("agent-1", "my-app", List.of(
|
||||
entry(now, "INFO", "logger", "info message", "t1", null, null),
|
||||
entry(now.plusSeconds(1), "ERROR", "logger", "error message", "t1", null, null)
|
||||
));
|
||||
|
||||
List<LogEntryResult> results = store.search("my-app", null, "ERROR", null, null, null, null, 100);
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).level()).isEqualTo("ERROR");
|
||||
assertThat(results.get(0).message()).isEqualTo("error message");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byQuery_usesLikeSearch() {
|
||||
Instant now = Instant.parse("2026-03-31T12:00:00Z");
|
||||
store.indexBatch("agent-1", "my-app", List.of(
|
||||
entry(now, "INFO", "logger", "Processing order #12345", "t1", null, null),
|
||||
entry(now.plusSeconds(1), "INFO", "logger", "Health check OK", "t1", null, null)
|
||||
));
|
||||
|
||||
List<LogEntryResult> results = store.search("my-app", null, null, "order #12345", null, null, null, 100);
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).message()).contains("order #12345");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byExchangeId_matchesTopLevelAndMdc() {
|
||||
Instant now = Instant.parse("2026-03-31T12:00:00Z");
|
||||
Map<String, String> mdc = Map.of("camel.exchangeId", "exchange-abc");
|
||||
|
||||
store.indexBatch("agent-1", "my-app", List.of(
|
||||
entry(now, "INFO", "logger", "msg with exchange", "t1", null, mdc),
|
||||
entry(now.plusSeconds(1), "INFO", "logger", "msg without exchange", "t1", null, null)
|
||||
));
|
||||
|
||||
List<LogEntryResult> results = store.search("my-app", null, null, null, "exchange-abc", null, null, 100);
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).message()).isEqualTo("msg with exchange");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byTimeRange_filtersCorrectly() {
|
||||
Instant t1 = Instant.parse("2026-03-31T10:00:00Z");
|
||||
Instant t2 = Instant.parse("2026-03-31T12:00:00Z");
|
||||
Instant t3 = Instant.parse("2026-03-31T14:00:00Z");
|
||||
|
||||
store.indexBatch("agent-1", "my-app", List.of(
|
||||
entry(t1, "INFO", "logger", "morning", "t1", null, null),
|
||||
entry(t2, "INFO", "logger", "noon", "t1", null, null),
|
||||
entry(t3, "INFO", "logger", "afternoon", "t1", null, null)
|
||||
));
|
||||
|
||||
// Query only the noon window
|
||||
Instant from = Instant.parse("2026-03-31T11:00:00Z");
|
||||
Instant to = Instant.parse("2026-03-31T13:00:00Z");
|
||||
|
||||
List<LogEntryResult> results = store.search("my-app", null, null, null, null, from, to, 100);
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).message()).isEqualTo("noon");
|
||||
}
|
||||
|
||||
@Test
|
||||
void indexBatch_storesMdc() {
|
||||
Instant now = Instant.parse("2026-03-31T12:00:00Z");
|
||||
Map<String, String> mdc = Map.of(
|
||||
"camel.exchangeId", "ex-123",
|
||||
"custom.key", "custom-value"
|
||||
);
|
||||
|
||||
store.indexBatch("agent-1", "my-app", List.of(
|
||||
entry(now, "INFO", "logger", "msg", "t1", null, mdc)
|
||||
));
|
||||
|
||||
// Verify MDC is stored by querying raw data
|
||||
String exchangeId = jdbc.queryForObject(
|
||||
"SELECT exchange_id FROM logs WHERE application = 'my-app' LIMIT 1",
|
||||
String.class);
|
||||
assertThat(exchangeId).isEqualTo("ex-123");
|
||||
|
||||
// Verify MDC map contains custom key
|
||||
String customVal = jdbc.queryForObject(
|
||||
"SELECT mdc['custom.key'] FROM logs WHERE application = 'my-app' LIMIT 1",
|
||||
String.class);
|
||||
assertThat(customVal).isEqualTo("custom-value");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,319 @@
|
||||
package com.cameleer3.server.app.search;
|
||||
|
||||
import com.cameleer3.server.app.storage.ClickHouseExecutionStore;
|
||||
import com.cameleer3.server.core.ingestion.MergedExecution;
|
||||
import com.cameleer3.server.core.search.ExecutionSummary;
|
||||
import com.cameleer3.server.core.search.SearchRequest;
|
||||
import com.cameleer3.server.core.search.SearchResult;
|
||||
import com.cameleer3.common.model.ExecutionStatus;
|
||||
import com.cameleer3.common.model.FlatProcessorRecord;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseSearchIndexIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseSearchIndex searchIndex;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
// Load DDL from classpath resources
|
||||
String executionsDdl = new ClassPathResource("clickhouse/V2__executions.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
String processorsDdl = new ClassPathResource("clickhouse/V3__processor_executions.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
|
||||
jdbc.execute(executionsDdl);
|
||||
jdbc.execute(processorsDdl);
|
||||
|
||||
jdbc.execute("TRUNCATE TABLE executions");
|
||||
jdbc.execute("TRUNCATE TABLE processor_executions");
|
||||
|
||||
ClickHouseExecutionStore store = new ClickHouseExecutionStore(jdbc);
|
||||
searchIndex = new ClickHouseSearchIndex(jdbc);
|
||||
|
||||
// Seed test data
|
||||
Instant baseTime = Instant.parse("2026-03-31T10:00:00Z");
|
||||
|
||||
// exec-1: COMPLETED, route-timer, agent-a, my-app, corr-1, 500ms, input_body with order number, attributes
|
||||
MergedExecution exec1 = new MergedExecution(
|
||||
"default", 1L, "exec-1", "route-timer", "agent-a", "my-app",
|
||||
"COMPLETED", "corr-1", "exchange-1",
|
||||
baseTime,
|
||||
baseTime.plusMillis(500),
|
||||
500L,
|
||||
"", "", "", "", "", "",
|
||||
"hash-abc", "FULL",
|
||||
"{\"order\":\"12345\"}", "", "", "", "{\"env\":\"prod\"}",
|
||||
"", "",
|
||||
false, false,
|
||||
null, null
|
||||
);
|
||||
|
||||
// exec-2: FAILED, route-timer, agent-a, my-app, corr-2, 200ms, with error
|
||||
MergedExecution exec2 = new MergedExecution(
|
||||
"default", 1L, "exec-2", "route-timer", "agent-a", "my-app",
|
||||
"FAILED", "corr-2", "exchange-2",
|
||||
baseTime.plusSeconds(1),
|
||||
baseTime.plusSeconds(1).plusMillis(200),
|
||||
200L,
|
||||
"NullPointerException at line 42",
|
||||
"java.lang.NPE\n at Foo.bar(Foo.java:42)",
|
||||
"NullPointerException", "RUNTIME", "", "",
|
||||
"", "FULL",
|
||||
"", "", "", "", "",
|
||||
"", "",
|
||||
false, false,
|
||||
null, null
|
||||
);
|
||||
|
||||
// exec-3: COMPLETED, route-rest, agent-b, other-app, 100ms, no error
|
||||
MergedExecution exec3 = new MergedExecution(
|
||||
"default", 1L, "exec-3", "route-rest", "agent-b", "other-app",
|
||||
"COMPLETED", "", "exchange-3",
|
||||
baseTime.plusSeconds(2),
|
||||
baseTime.plusSeconds(2).plusMillis(100),
|
||||
100L,
|
||||
"", "", "", "", "", "",
|
||||
"", "FULL",
|
||||
"", "", "", "", "",
|
||||
"", "",
|
||||
false, false,
|
||||
null, null
|
||||
);
|
||||
|
||||
store.insertExecutionBatch(List.of(exec1, exec2, exec3));
|
||||
|
||||
// Processor for exec-1: seq=1, to, inputBody with "Hello World", inputHeaders with secret-token
|
||||
FlatProcessorRecord proc1 = new FlatProcessorRecord(1, "proc-1", "to");
|
||||
proc1.setStatus(ExecutionStatus.COMPLETED);
|
||||
proc1.setStartTime(baseTime);
|
||||
proc1.setDurationMs(50L);
|
||||
proc1.setInputBody("Hello World request body");
|
||||
proc1.setOutputBody("");
|
||||
proc1.setInputHeaders(Map.of("Authorization", "Bearer secret-token"));
|
||||
|
||||
store.insertProcessorBatch("default", "exec-1", "route-timer", "my-app", baseTime, List.of(proc1));
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_withNoFilters_returnsAllExecutions() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(3);
|
||||
assertThat(result.data()).hasSize(3);
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byStatus_filtersCorrectly() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
"FAILED", null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data()).hasSize(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-2");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byTimeRange_filtersCorrectly() {
|
||||
Instant baseTime = Instant.parse("2026-03-31T10:00:00Z");
|
||||
// Time window covering exec-1 and exec-2 but not exec-3
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, baseTime, baseTime.plusMillis(1500), null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(2);
|
||||
assertThat(result.data()).extracting(ExecutionSummary::executionId)
|
||||
.containsExactlyInAnyOrder("exec-1", "exec-2");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_fullTextSearch_findsInErrorMessage() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, "NullPointerException", null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-2");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_fullTextSearch_findsInInputBody() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, "12345", null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-1");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_textInBody_searchesProcessorBodies() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, null, "Hello World", null, null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-1");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_textInHeaders_searchesProcessorHeaders() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, "secret-token", null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-1");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_textInErrors_searchesErrorFields() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, "Foo.bar",
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-2");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_withHighlight_returnsSnippet() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, "NullPointerException", null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).highlight()).contains("NullPointerException");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_pagination_works() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 2, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(3);
|
||||
assertThat(result.data()).hasSize(2);
|
||||
assertThat(result.offset()).isEqualTo(0);
|
||||
assertThat(result.limit()).isEqualTo(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byApplication_filtersCorrectly() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, "other-app", null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-3");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byAgentIds_filtersCorrectly() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, List.of("agent-b"), 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-3");
|
||||
}
|
||||
|
||||
@Test
|
||||
void count_returnsMatchingCount() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
"COMPLETED", null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
long count = searchIndex.count(request);
|
||||
|
||||
assertThat(count).isEqualTo(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_multipleStatusFilter_works() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
"COMPLETED,FAILED", null, null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(3);
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byCorrelationId_filtersCorrectly() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, null, null, "corr-1", null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-1");
|
||||
}
|
||||
|
||||
@Test
|
||||
void search_byDurationRange_filtersCorrectly() {
|
||||
SearchRequest request = new SearchRequest(
|
||||
null, null, null, 300L, 600L, null, null, null, null, null,
|
||||
null, null, null, null, null, 0, 50, null, null);
|
||||
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(request);
|
||||
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("exec-1");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,158 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.agent.AgentEventRecord;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseAgentEventRepositoryIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseAgentEventRepository repo;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
String ddl = new ClassPathResource("clickhouse/V7__agent_events.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
jdbc.execute(ddl);
|
||||
jdbc.execute("TRUNCATE TABLE agent_events");
|
||||
|
||||
repo = new ClickHouseAgentEventRepository(jdbc);
|
||||
}
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Insert a row with an explicit timestamp so tests can control ordering and ranges.
|
||||
*/
|
||||
private void insertAt(String agentId, String appId, String eventType, String detail, Instant ts) {
|
||||
jdbc.update(
|
||||
"INSERT INTO agent_events (tenant_id, agent_id, app_id, event_type, detail, timestamp) VALUES (?, ?, ?, ?, ?, ?)",
|
||||
"default", agentId, appId, eventType, detail, Timestamp.from(ts));
|
||||
}
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void insert_writesEvent() {
|
||||
repo.insert("agent-1", "app-a", "CONNECTED", "agent came online");
|
||||
|
||||
Long count = jdbc.queryForObject(
|
||||
"SELECT count() FROM agent_events WHERE agent_id = 'agent-1'",
|
||||
Long.class);
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void query_byAppId_filtersCorrectly() {
|
||||
repo.insert("agent-1", "app-x", "CONNECTED", "");
|
||||
repo.insert("agent-2", "app-y", "DISCONNECTED", "");
|
||||
|
||||
List<AgentEventRecord> results = repo.query("app-x", null, null, null, 100);
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).appId()).isEqualTo("app-x");
|
||||
assertThat(results.get(0).agentId()).isEqualTo("agent-1");
|
||||
}
|
||||
|
||||
@Test
|
||||
void query_byAgentId_filtersCorrectly() {
|
||||
repo.insert("agent-alpha", "app-shared", "CONNECTED", "");
|
||||
repo.insert("agent-beta", "app-shared", "CONNECTED", "");
|
||||
|
||||
List<AgentEventRecord> results = repo.query(null, "agent-alpha", null, null, 100);
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).agentId()).isEqualTo("agent-alpha");
|
||||
}
|
||||
|
||||
@Test
|
||||
void query_byTimeRange_filtersCorrectly() {
|
||||
Instant t1 = Instant.parse("2026-01-01T10:00:00Z");
|
||||
Instant t2 = Instant.parse("2026-01-01T11:00:00Z");
|
||||
Instant t3 = Instant.parse("2026-01-01T12:00:00Z");
|
||||
|
||||
insertAt("agent-1", "app-a", "CONNECTED", "early", t1);
|
||||
insertAt("agent-1", "app-a", "HEARTBEAT", "mid", t2);
|
||||
insertAt("agent-1", "app-a", "DISCONNECTED", "late", t3);
|
||||
|
||||
// Query [t2, t3) — should return only the middle event
|
||||
List<AgentEventRecord> results = repo.query(null, null, t2, t3, 100);
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).eventType()).isEqualTo("HEARTBEAT");
|
||||
}
|
||||
|
||||
@Test
|
||||
void query_respectsLimit() {
|
||||
Instant base = Instant.parse("2026-02-01T00:00:00Z");
|
||||
for (int i = 0; i < 10; i++) {
|
||||
insertAt("agent-1", "app-a", "HEARTBEAT", "beat-" + i, base.plusSeconds(i));
|
||||
}
|
||||
|
||||
List<AgentEventRecord> results = repo.query(null, null, null, null, 3);
|
||||
|
||||
assertThat(results).hasSize(3);
|
||||
}
|
||||
|
||||
@Test
|
||||
void query_returnsZeroId() {
|
||||
repo.insert("agent-1", "app-a", "CONNECTED", "");
|
||||
|
||||
List<AgentEventRecord> results = repo.query(null, null, null, null, 10);
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).id()).isEqualTo(0L);
|
||||
}
|
||||
|
||||
@Test
|
||||
void query_noFilters_returnsAllEvents() {
|
||||
repo.insert("agent-1", "app-a", "CONNECTED", "");
|
||||
repo.insert("agent-2", "app-b", "DISCONNECTED", "");
|
||||
|
||||
List<AgentEventRecord> results = repo.query(null, null, null, null, 100);
|
||||
|
||||
assertThat(results).hasSize(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
void query_resultsOrderedByTimestampDesc() {
|
||||
Instant t1 = Instant.parse("2026-03-01T08:00:00Z");
|
||||
Instant t2 = Instant.parse("2026-03-01T09:00:00Z");
|
||||
Instant t3 = Instant.parse("2026-03-01T10:00:00Z");
|
||||
|
||||
insertAt("agent-1", "app-a", "FIRST", "", t1);
|
||||
insertAt("agent-1", "app-a", "SECOND", "", t2);
|
||||
insertAt("agent-1", "app-a", "THIRD", "", t3);
|
||||
|
||||
List<AgentEventRecord> results = repo.query(null, null, null, null, 100);
|
||||
|
||||
assertThat(results.get(0).eventType()).isEqualTo("THIRD");
|
||||
assertThat(results.get(1).eventType()).isEqualTo("SECOND");
|
||||
assertThat(results.get(2).eventType()).isEqualTo("FIRST");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.app.search.ClickHouseSearchIndex;
|
||||
import com.cameleer3.server.core.ingestion.ChunkAccumulator;
|
||||
import com.cameleer3.server.core.ingestion.MergedExecution;
|
||||
import com.cameleer3.server.core.search.ExecutionSummary;
|
||||
import com.cameleer3.server.core.search.SearchRequest;
|
||||
import com.cameleer3.server.core.search.SearchResult;
|
||||
import com.cameleer3.common.model.ExecutionChunk;
|
||||
import com.cameleer3.common.model.ExecutionStatus;
|
||||
import com.cameleer3.common.model.FlatProcessorRecord;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseChunkPipelineIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseExecutionStore executionStore;
|
||||
private ClickHouseSearchIndex searchIndex;
|
||||
private ChunkAccumulator accumulator;
|
||||
private List<MergedExecution> executionBuffer;
|
||||
private List<ChunkAccumulator.ProcessorBatch> processorBuffer;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws IOException {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
String execDdl = new String(getClass().getResourceAsStream(
|
||||
"/clickhouse/V2__executions.sql").readAllBytes(), StandardCharsets.UTF_8);
|
||||
String procDdl = new String(getClass().getResourceAsStream(
|
||||
"/clickhouse/V3__processor_executions.sql").readAllBytes(), StandardCharsets.UTF_8);
|
||||
jdbc.execute(execDdl);
|
||||
jdbc.execute(procDdl);
|
||||
jdbc.execute("TRUNCATE TABLE executions");
|
||||
jdbc.execute("TRUNCATE TABLE processor_executions");
|
||||
|
||||
executionStore = new ClickHouseExecutionStore(jdbc);
|
||||
searchIndex = new ClickHouseSearchIndex(jdbc);
|
||||
|
||||
executionBuffer = new ArrayList<>();
|
||||
processorBuffer = new ArrayList<>();
|
||||
accumulator = new ChunkAccumulator(executionBuffer::add, processorBuffer::add, Duration.ofMinutes(5));
|
||||
}
|
||||
|
||||
@Test
|
||||
void fullPipeline_chunkedIngestion_thenSearch() {
|
||||
Instant start = Instant.parse("2026-03-31T12:00:00Z");
|
||||
|
||||
// Chunk 0: RUNNING with initial processors
|
||||
ExecutionChunk chunk0 = new ExecutionChunk();
|
||||
chunk0.setExchangeId("pipeline-1");
|
||||
chunk0.setApplicationName("order-service");
|
||||
chunk0.setAgentId("pod-1");
|
||||
chunk0.setRouteId("order-route");
|
||||
chunk0.setCorrelationId("corr-1");
|
||||
chunk0.setStatus(ExecutionStatus.RUNNING);
|
||||
chunk0.setStartTime(start);
|
||||
chunk0.setEngineLevel("DEEP");
|
||||
chunk0.setAttributes(Map.of("orderId", "ORD-123"));
|
||||
chunk0.setChunkSeq(0);
|
||||
chunk0.setFinal(false);
|
||||
|
||||
FlatProcessorRecord p1 = new FlatProcessorRecord(1, "log1", "log");
|
||||
p1.setStatus(ExecutionStatus.COMPLETED);
|
||||
p1.setStartTime(start);
|
||||
p1.setDurationMs(2L);
|
||||
|
||||
FlatProcessorRecord p2 = new FlatProcessorRecord(2, "split1", "split");
|
||||
p2.setIterationSize(3);
|
||||
p2.setStatus(ExecutionStatus.COMPLETED);
|
||||
p2.setStartTime(start.plusMillis(2));
|
||||
p2.setDurationMs(100L);
|
||||
|
||||
FlatProcessorRecord p3 = new FlatProcessorRecord(3, "to1", "to");
|
||||
p3.setParentSeq(2);
|
||||
p3.setParentProcessorId("split1");
|
||||
p3.setIteration(0);
|
||||
p3.setStatus(ExecutionStatus.COMPLETED);
|
||||
p3.setStartTime(start.plusMillis(5));
|
||||
p3.setDurationMs(30L);
|
||||
p3.setResolvedEndpointUri("http://inventory/api");
|
||||
p3.setInputBody("order ABC-123 check stock");
|
||||
p3.setOutputBody("stock available");
|
||||
|
||||
chunk0.setProcessors(List.of(p1, p2, p3));
|
||||
accumulator.onChunk(chunk0);
|
||||
|
||||
// Processors should be buffered immediately
|
||||
assertThat(processorBuffer).hasSize(1);
|
||||
assertThat(executionBuffer).isEmpty();
|
||||
|
||||
// Chunk 1: COMPLETED (final)
|
||||
ExecutionChunk chunk1 = new ExecutionChunk();
|
||||
chunk1.setExchangeId("pipeline-1");
|
||||
chunk1.setApplicationName("order-service");
|
||||
chunk1.setAgentId("pod-1");
|
||||
chunk1.setRouteId("order-route");
|
||||
chunk1.setCorrelationId("corr-1");
|
||||
chunk1.setStatus(ExecutionStatus.COMPLETED);
|
||||
chunk1.setStartTime(start);
|
||||
chunk1.setEndTime(start.plusMillis(750));
|
||||
chunk1.setDurationMs(750L);
|
||||
chunk1.setEngineLevel("DEEP");
|
||||
chunk1.setChunkSeq(1);
|
||||
chunk1.setFinal(true);
|
||||
|
||||
FlatProcessorRecord p4 = new FlatProcessorRecord(4, "to1", "to");
|
||||
p4.setParentSeq(2);
|
||||
p4.setParentProcessorId("split1");
|
||||
p4.setIteration(1);
|
||||
p4.setStatus(ExecutionStatus.COMPLETED);
|
||||
p4.setStartTime(start.plusMillis(40));
|
||||
p4.setDurationMs(25L);
|
||||
p4.setResolvedEndpointUri("http://inventory/api");
|
||||
p4.setInputBody("order DEF-456 check stock");
|
||||
p4.setOutputBody("stock available");
|
||||
|
||||
chunk1.setProcessors(List.of(p4));
|
||||
accumulator.onChunk(chunk1);
|
||||
|
||||
assertThat(executionBuffer).hasSize(1);
|
||||
assertThat(processorBuffer).hasSize(2);
|
||||
|
||||
// Flush to ClickHouse (simulating ExecutionFlushScheduler)
|
||||
executionStore.insertExecutionBatch(executionBuffer);
|
||||
for (ChunkAccumulator.ProcessorBatch batch : processorBuffer) {
|
||||
executionStore.insertProcessorBatch(
|
||||
batch.tenantId(), batch.executionId(),
|
||||
batch.routeId(), batch.applicationName(),
|
||||
batch.execStartTime(), batch.processors());
|
||||
}
|
||||
|
||||
// Search by order ID in attributes (via _search_text on executions)
|
||||
SearchResult<ExecutionSummary> result = searchIndex.search(new SearchRequest(
|
||||
null, null, null, null, null, null,
|
||||
"ORD-123", null, null, null,
|
||||
null, null, null, null, null,
|
||||
0, 50, null, null));
|
||||
assertThat(result.total()).isEqualTo(1);
|
||||
assertThat(result.data().get(0).executionId()).isEqualTo("pipeline-1");
|
||||
assertThat(result.data().get(0).status()).isEqualTo("COMPLETED");
|
||||
assertThat(result.data().get(0).durationMs()).isEqualTo(750L);
|
||||
|
||||
// Search in processor body
|
||||
SearchResult<ExecutionSummary> bodyResult = searchIndex.search(new SearchRequest(
|
||||
null, null, null, null, null, null,
|
||||
null, "ABC-123", null, null,
|
||||
null, null, null, null, null,
|
||||
0, 50, null, null));
|
||||
assertThat(bodyResult.total()).isEqualTo(1);
|
||||
|
||||
// Verify iteration data in processor_executions
|
||||
Integer iterSize = jdbc.queryForObject(
|
||||
"SELECT iteration_size FROM processor_executions WHERE execution_id = 'pipeline-1' AND seq = 2",
|
||||
Integer.class);
|
||||
assertThat(iterSize).isEqualTo(3);
|
||||
|
||||
Integer iter0 = jdbc.queryForObject(
|
||||
"SELECT iteration FROM processor_executions WHERE execution_id = 'pipeline-1' AND seq = 3",
|
||||
Integer.class);
|
||||
assertThat(iter0).isEqualTo(0);
|
||||
|
||||
// Verify total processor count
|
||||
Integer procCount = jdbc.queryForObject(
|
||||
"SELECT count() FROM processor_executions WHERE execution_id = 'pipeline-1'",
|
||||
Integer.class);
|
||||
assertThat(procCount).isEqualTo(4);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,213 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.common.graph.NodeType;
|
||||
import com.cameleer3.common.graph.RouteGraph;
|
||||
import com.cameleer3.common.graph.RouteNode;
|
||||
import com.cameleer3.server.core.ingestion.TaggedDiagram;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseDiagramStoreIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseDiagramStore store;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
String ddl = new ClassPathResource("clickhouse/V6__route_diagrams.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
jdbc.execute(ddl);
|
||||
jdbc.execute("TRUNCATE TABLE route_diagrams");
|
||||
|
||||
store = new ClickHouseDiagramStore(jdbc);
|
||||
}
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
private RouteGraph buildGraph(String routeId, String... nodeIds) {
|
||||
RouteGraph graph = new RouteGraph(routeId);
|
||||
if (nodeIds.length > 0) {
|
||||
RouteNode root = new RouteNode(nodeIds[0], NodeType.ENDPOINT, "from:" + nodeIds[0]);
|
||||
for (int i = 1; i < nodeIds.length; i++) {
|
||||
root.addChild(new RouteNode(nodeIds[i], NodeType.PROCESSOR, "proc:" + nodeIds[i]));
|
||||
}
|
||||
graph.setRoot(root);
|
||||
}
|
||||
return graph;
|
||||
}
|
||||
|
||||
private TaggedDiagram tagged(String agentId, String appName, RouteGraph graph) {
|
||||
return new TaggedDiagram(agentId, appName, graph);
|
||||
}
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void store_insertsNewDiagram() {
|
||||
RouteGraph graph = buildGraph("route-1", "node-a", "node-b");
|
||||
store.store(tagged("agent-1", "my-app", graph));
|
||||
|
||||
// Allow ReplacingMergeTree to settle
|
||||
jdbc.execute("OPTIMIZE TABLE route_diagrams FINAL");
|
||||
|
||||
long count = jdbc.queryForObject(
|
||||
"SELECT count() FROM route_diagrams WHERE route_id = 'route-1'",
|
||||
Long.class);
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void store_duplicateHashIgnored() {
|
||||
RouteGraph graph = buildGraph("route-1", "node-a");
|
||||
TaggedDiagram diagram = tagged("agent-1", "my-app", graph);
|
||||
|
||||
store.store(diagram);
|
||||
store.store(diagram); // same graph → same hash
|
||||
|
||||
jdbc.execute("OPTIMIZE TABLE route_diagrams FINAL");
|
||||
|
||||
long count = jdbc.queryForObject(
|
||||
"SELECT count() FROM route_diagrams FINAL WHERE route_id = 'route-1'",
|
||||
Long.class);
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByContentHash_returnsGraph() {
|
||||
RouteGraph graph = buildGraph("route-2", "node-x");
|
||||
graph.setDescription("Test route");
|
||||
TaggedDiagram diagram = tagged("agent-2", "app-a", graph);
|
||||
store.store(diagram);
|
||||
|
||||
// Compute the expected hash
|
||||
String hash = store.findContentHashForRoute("route-2", "agent-2")
|
||||
.orElseThrow(() -> new AssertionError("No hash found for route-2/agent-2"));
|
||||
|
||||
Optional<RouteGraph> result = store.findByContentHash(hash);
|
||||
|
||||
assertThat(result).isPresent();
|
||||
assertThat(result.get().getRouteId()).isEqualTo("route-2");
|
||||
assertThat(result.get().getDescription()).isEqualTo("Test route");
|
||||
}
|
||||
|
||||
@Test
|
||||
void findByContentHash_returnsEmptyForUnknownHash() {
|
||||
Optional<RouteGraph> result = store.findByContentHash("nonexistent-hash-000");
|
||||
assertThat(result).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findContentHashForRoute_returnsMostRecent() throws InterruptedException {
|
||||
RouteGraph graphV1 = buildGraph("route-3", "node-1");
|
||||
graphV1.setDescription("v1");
|
||||
RouteGraph graphV2 = buildGraph("route-3", "node-1", "node-2");
|
||||
graphV2.setDescription("v2");
|
||||
|
||||
store.store(tagged("agent-1", "my-app", graphV1));
|
||||
// Small delay to ensure different created_at timestamps
|
||||
Thread.sleep(10);
|
||||
store.store(tagged("agent-1", "my-app", graphV2));
|
||||
|
||||
Optional<String> hashOpt = store.findContentHashForRoute("route-3", "agent-1");
|
||||
assertThat(hashOpt).isPresent();
|
||||
|
||||
// The hash should correspond to graphV2 (the most recent)
|
||||
String expectedHash = ClickHouseDiagramStore.sha256Hex(
|
||||
store.findByContentHash(hashOpt.get())
|
||||
.map(g -> {
|
||||
try {
|
||||
return new com.fasterxml.jackson.databind.ObjectMapper()
|
||||
.registerModule(new com.fasterxml.jackson.datatype.jsr310.JavaTimeModule())
|
||||
.writeValueAsString(g);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
})
|
||||
.orElseThrow());
|
||||
|
||||
assertThat(hashOpt.get()).isEqualTo(expectedHash);
|
||||
|
||||
// Verify retrieved graph has v2's content
|
||||
RouteGraph retrieved = store.findByContentHash(hashOpt.get()).orElseThrow();
|
||||
assertThat(retrieved.getDescription()).isEqualTo("v2");
|
||||
}
|
||||
|
||||
@Test
|
||||
void findContentHashForRouteByAgents_returnsHash() {
|
||||
RouteGraph graph = buildGraph("route-4", "node-z");
|
||||
store.store(tagged("agent-10", "app-b", graph));
|
||||
store.store(tagged("agent-20", "app-b", graph));
|
||||
|
||||
Optional<String> result = store.findContentHashForRouteByAgents(
|
||||
"route-4", java.util.List.of("agent-10", "agent-20"));
|
||||
|
||||
assertThat(result).isPresent();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findContentHashForRouteByAgents_emptyListReturnsEmpty() {
|
||||
Optional<String> result = store.findContentHashForRouteByAgents("route-x", java.util.List.of());
|
||||
assertThat(result).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findProcessorRouteMapping_extractsMapping() {
|
||||
// Build a graph with 3 nodes: root + 2 children
|
||||
RouteGraph graph = buildGraph("route-5", "proc-from-1", "proc-to-2", "proc-log-3");
|
||||
store.store(tagged("agent-1", "app-mapping", graph));
|
||||
|
||||
jdbc.execute("OPTIMIZE TABLE route_diagrams FINAL");
|
||||
|
||||
Map<String, String> mapping = store.findProcessorRouteMapping("app-mapping");
|
||||
|
||||
assertThat(mapping).containsEntry("proc-from-1", "route-5");
|
||||
assertThat(mapping).containsEntry("proc-to-2", "route-5");
|
||||
assertThat(mapping).containsEntry("proc-log-3", "route-5");
|
||||
}
|
||||
|
||||
@Test
|
||||
void findProcessorRouteMapping_multipleRoutes() {
|
||||
RouteGraph graphA = buildGraph("route-a", "proc-a1", "proc-a2");
|
||||
RouteGraph graphB = buildGraph("route-b", "proc-b1");
|
||||
store.store(tagged("agent-1", "multi-app", graphA));
|
||||
store.store(tagged("agent-1", "multi-app", graphB));
|
||||
|
||||
jdbc.execute("OPTIMIZE TABLE route_diagrams FINAL");
|
||||
|
||||
Map<String, String> mapping = store.findProcessorRouteMapping("multi-app");
|
||||
|
||||
assertThat(mapping).containsEntry("proc-a1", "route-a");
|
||||
assertThat(mapping).containsEntry("proc-a2", "route-a");
|
||||
assertThat(mapping).containsEntry("proc-b1", "route-b");
|
||||
}
|
||||
|
||||
@Test
|
||||
void findProcessorRouteMapping_unknownAppReturnsEmpty() {
|
||||
Map<String, String> mapping = store.findProcessorRouteMapping("nonexistent-app");
|
||||
assertThat(mapping).isEmpty();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,262 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.common.model.ExecutionStatus;
|
||||
import com.cameleer3.common.model.FlatProcessorRecord;
|
||||
import com.cameleer3.server.core.detail.DetailService;
|
||||
import com.cameleer3.server.core.detail.ExecutionDetail;
|
||||
import com.cameleer3.server.core.detail.ProcessorNode;
|
||||
import com.cameleer3.server.core.ingestion.MergedExecution;
|
||||
import com.cameleer3.server.core.storage.ExecutionStore.ProcessorRecord;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseExecutionReadIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseExecutionStore store;
|
||||
private DetailService detailService;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
// Load DDL for both tables
|
||||
String execDdl = new ClassPathResource("clickhouse/V2__executions.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
String procDdl = new ClassPathResource("clickhouse/V3__processor_executions.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
// Also load V5 for replay fields
|
||||
String replayDdl = new ClassPathResource("clickhouse/V5__replay_fields.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
|
||||
jdbc.execute(execDdl);
|
||||
jdbc.execute(procDdl);
|
||||
// V5 has ALTER TABLE statements — execute each separately
|
||||
for (String stmt : replayDdl.split(";")) {
|
||||
String trimmed = stmt.trim();
|
||||
if (!trimmed.isEmpty()) jdbc.execute(trimmed);
|
||||
}
|
||||
|
||||
jdbc.execute("TRUNCATE TABLE executions");
|
||||
jdbc.execute("TRUNCATE TABLE processor_executions");
|
||||
|
||||
store = new ClickHouseExecutionStore(jdbc);
|
||||
detailService = new DetailService(store);
|
||||
}
|
||||
|
||||
// --- Helper factory methods ---
|
||||
|
||||
private MergedExecution minimalExecution(String executionId) {
|
||||
return new MergedExecution(
|
||||
"default", 1L, executionId, "route-a", "agent-1", "my-app",
|
||||
"COMPLETED", "corr-1", "exchange-1",
|
||||
Instant.parse("2026-04-01T10:00:00Z"),
|
||||
Instant.parse("2026-04-01T10:00:01Z"),
|
||||
1000L,
|
||||
"", "", "", "", "", "",
|
||||
"", "REGULAR",
|
||||
"", "", "", "", "{}",
|
||||
"", "",
|
||||
false, false,
|
||||
null, null
|
||||
);
|
||||
}
|
||||
|
||||
private FlatProcessorRecord processor(int seq, String processorId, String processorType) {
|
||||
FlatProcessorRecord p = new FlatProcessorRecord(seq, processorId, processorType);
|
||||
p.setStatus(ExecutionStatus.COMPLETED);
|
||||
p.setStartTime(Instant.parse("2026-04-01T10:00:00Z"));
|
||||
p.setDurationMs(10L);
|
||||
return p;
|
||||
}
|
||||
|
||||
// --- Tests ---
|
||||
|
||||
@Test
|
||||
void findById_returnsInsertedExecution() {
|
||||
store.insertExecutionBatch(List.of(minimalExecution("exec-1")));
|
||||
|
||||
Optional<com.cameleer3.server.core.storage.ExecutionStore.ExecutionRecord> result =
|
||||
store.findById("exec-1");
|
||||
|
||||
assertThat(result).isPresent();
|
||||
assertThat(result.get().executionId()).isEqualTo("exec-1");
|
||||
assertThat(result.get().routeId()).isEqualTo("route-a");
|
||||
assertThat(result.get().status()).isEqualTo("COMPLETED");
|
||||
assertThat(result.get().agentId()).isEqualTo("agent-1");
|
||||
assertThat(result.get().applicationName()).isEqualTo("my-app");
|
||||
assertThat(result.get().processorsJson()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findById_notFound_returnsEmpty() {
|
||||
Optional<com.cameleer3.server.core.storage.ExecutionStore.ExecutionRecord> result =
|
||||
store.findById("nonexistent");
|
||||
|
||||
assertThat(result).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void findProcessors_returnsOrderedBySeq() {
|
||||
store.insertExecutionBatch(List.of(minimalExecution("exec-1")));
|
||||
|
||||
FlatProcessorRecord p1 = processor(1, "log-1", "log");
|
||||
FlatProcessorRecord p2 = processor(2, "transform-1", "setBody");
|
||||
FlatProcessorRecord p3 = processor(3, "to-1", "to");
|
||||
p2.setParentSeq(1);
|
||||
p2.setParentProcessorId("log-1");
|
||||
p3.setParentSeq(1);
|
||||
p3.setParentProcessorId("log-1");
|
||||
|
||||
store.insertProcessorBatch(
|
||||
"default", "exec-1", "route-a", "my-app",
|
||||
Instant.parse("2026-04-01T10:00:00Z"),
|
||||
List.of(p1, p2, p3));
|
||||
|
||||
List<ProcessorRecord> records = store.findProcessors("exec-1");
|
||||
|
||||
assertThat(records).hasSize(3);
|
||||
assertThat(records.get(0).seq()).isEqualTo(1);
|
||||
assertThat(records.get(0).processorId()).isEqualTo("log-1");
|
||||
assertThat(records.get(1).seq()).isEqualTo(2);
|
||||
assertThat(records.get(1).processorId()).isEqualTo("transform-1");
|
||||
assertThat(records.get(1).parentSeq()).isEqualTo(1);
|
||||
assertThat(records.get(2).seq()).isEqualTo(3);
|
||||
assertThat(records.get(2).processorId()).isEqualTo("to-1");
|
||||
assertThat(records.get(2).parentSeq()).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void findProcessorBySeq_returnsCorrectRecord() {
|
||||
store.insertExecutionBatch(List.of(minimalExecution("exec-1")));
|
||||
|
||||
FlatProcessorRecord p1 = processor(1, "log-1", "log");
|
||||
FlatProcessorRecord p2 = processor(2, "to-1", "to");
|
||||
FlatProcessorRecord p3 = processor(3, "log-2", "log");
|
||||
|
||||
store.insertProcessorBatch(
|
||||
"default", "exec-1", "route-a", "my-app",
|
||||
Instant.parse("2026-04-01T10:00:00Z"),
|
||||
List.of(p1, p2, p3));
|
||||
|
||||
Optional<ProcessorRecord> result = store.findProcessorBySeq("exec-1", 2);
|
||||
|
||||
assertThat(result).isPresent();
|
||||
assertThat(result.get().seq()).isEqualTo(2);
|
||||
assertThat(result.get().processorId()).isEqualTo("to-1");
|
||||
assertThat(result.get().processorType()).isEqualTo("to");
|
||||
}
|
||||
|
||||
@Test
|
||||
void findProcessorById_returnsFirstOccurrence() {
|
||||
store.insertExecutionBatch(List.of(minimalExecution("exec-1")));
|
||||
|
||||
// Three processors with the same processorId (iteration scenario)
|
||||
FlatProcessorRecord iter0 = processor(1, "to-1", "to");
|
||||
iter0.setIteration(0);
|
||||
|
||||
FlatProcessorRecord iter1 = processor(2, "to-1", "to");
|
||||
iter1.setIteration(1);
|
||||
|
||||
FlatProcessorRecord iter2 = processor(3, "to-1", "to");
|
||||
iter2.setIteration(2);
|
||||
|
||||
store.insertProcessorBatch(
|
||||
"default", "exec-1", "route-a", "my-app",
|
||||
Instant.parse("2026-04-01T10:00:00Z"),
|
||||
List.of(iter0, iter1, iter2));
|
||||
|
||||
Optional<ProcessorRecord> result = store.findProcessorById("exec-1", "to-1");
|
||||
|
||||
assertThat(result).isPresent();
|
||||
// ClickHouse LIMIT 1 returns one record — verify it has the correct processorId
|
||||
assertThat(result.get().processorId()).isEqualTo("to-1");
|
||||
// The returned record should have the lowest seq (first occurrence)
|
||||
assertThat(result.get().seq()).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void detailService_buildTree_withIterations() {
|
||||
// Insert an execution
|
||||
store.insertExecutionBatch(List.of(minimalExecution("exec-1")));
|
||||
|
||||
// Build a split scenario:
|
||||
// seq=1: log-1 (root)
|
||||
// seq=2: split-1 (root)
|
||||
// seq=3: to-1 (child of split-1, iteration=0)
|
||||
// seq=4: to-1 (child of split-1, iteration=1)
|
||||
// seq=5: to-1 (child of split-1, iteration=2)
|
||||
// seq=6: log-2 (root)
|
||||
|
||||
FlatProcessorRecord log1 = processor(1, "log-1", "log");
|
||||
|
||||
FlatProcessorRecord split1 = processor(2, "split-1", "split");
|
||||
split1.setIterationSize(3);
|
||||
|
||||
FlatProcessorRecord to1iter0 = processor(3, "to-1", "to");
|
||||
to1iter0.setParentSeq(2);
|
||||
to1iter0.setParentProcessorId("split-1");
|
||||
to1iter0.setIteration(0);
|
||||
|
||||
FlatProcessorRecord to1iter1 = processor(4, "to-1", "to");
|
||||
to1iter1.setParentSeq(2);
|
||||
to1iter1.setParentProcessorId("split-1");
|
||||
to1iter1.setIteration(1);
|
||||
|
||||
FlatProcessorRecord to1iter2 = processor(5, "to-1", "to");
|
||||
to1iter2.setParentSeq(2);
|
||||
to1iter2.setParentProcessorId("split-1");
|
||||
to1iter2.setIteration(2);
|
||||
|
||||
FlatProcessorRecord log2 = processor(6, "log-2", "log");
|
||||
|
||||
store.insertProcessorBatch(
|
||||
"default", "exec-1", "route-a", "my-app",
|
||||
Instant.parse("2026-04-01T10:00:00Z"),
|
||||
List.of(log1, split1, to1iter0, to1iter1, to1iter2, log2));
|
||||
|
||||
// Invoke DetailService
|
||||
Optional<ExecutionDetail> detail = detailService.getDetail("exec-1");
|
||||
|
||||
assertThat(detail).isPresent();
|
||||
|
||||
List<ProcessorNode> roots = detail.get().processors();
|
||||
assertThat(roots).hasSize(3);
|
||||
assertThat(roots.get(0).getProcessorId()).isEqualTo("log-1");
|
||||
assertThat(roots.get(1).getProcessorId()).isEqualTo("split-1");
|
||||
assertThat(roots.get(2).getProcessorId()).isEqualTo("log-2");
|
||||
|
||||
// Verify split-1 has 3 children (all with processorId "to-1")
|
||||
ProcessorNode splitNode = roots.get(1);
|
||||
List<ProcessorNode> children = splitNode.getChildren();
|
||||
assertThat(children).hasSize(3);
|
||||
assertThat(children).allMatch(c -> "to-1".equals(c.getProcessorId()));
|
||||
|
||||
// Verify iteration values via getLoopIndex() (iteration maps to loopIndex in the seq-based path)
|
||||
assertThat(children.get(0).getLoopIndex()).isEqualTo(0);
|
||||
assertThat(children.get(1).getLoopIndex()).isEqualTo(1);
|
||||
assertThat(children.get(2).getLoopIndex()).isEqualTo(2);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,229 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.ingestion.MergedExecution;
|
||||
import com.cameleer3.common.model.ExecutionStatus;
|
||||
import com.cameleer3.common.model.FlatProcessorRecord;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseExecutionStoreIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseExecutionStore store;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
// Load DDL from classpath resources
|
||||
String executionsDdl = new ClassPathResource("clickhouse/V2__executions.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
String processorsDdl = new ClassPathResource("clickhouse/V3__processor_executions.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
|
||||
jdbc.execute(executionsDdl);
|
||||
jdbc.execute(processorsDdl);
|
||||
|
||||
jdbc.execute("TRUNCATE TABLE executions");
|
||||
jdbc.execute("TRUNCATE TABLE processor_executions");
|
||||
|
||||
store = new ClickHouseExecutionStore(jdbc);
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertExecutionBatch_writesToClickHouse() {
|
||||
MergedExecution exec = new MergedExecution(
|
||||
"default", 1L, "exec-1", "route-a", "agent-1", "my-app",
|
||||
"COMPLETED", "corr-1", "exchange-1",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
Instant.parse("2026-03-31T10:00:01Z"),
|
||||
1000L,
|
||||
"some error", "stack trace", "IOException", "IO",
|
||||
"FileNotFoundException", "file not found",
|
||||
"hash-abc", "FULL",
|
||||
"{\"key\":\"val\"}", "{\"out\":\"val\"}",
|
||||
"{\"h1\":\"v1\"}", "{\"h2\":\"v2\"}",
|
||||
"{\"attr\":\"val\"}",
|
||||
"trace-123", "span-456",
|
||||
true, false,
|
||||
null, null
|
||||
);
|
||||
|
||||
store.insertExecutionBatch(List.of(exec));
|
||||
|
||||
Integer count = jdbc.queryForObject(
|
||||
"SELECT count() FROM executions WHERE execution_id = 'exec-1'",
|
||||
Integer.class);
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertProcessorBatch_writesToClickHouse() {
|
||||
FlatProcessorRecord proc = new FlatProcessorRecord(1, "proc-1", "to");
|
||||
proc.setStatus(ExecutionStatus.COMPLETED);
|
||||
proc.setStartTime(Instant.parse("2026-03-31T10:00:00Z"));
|
||||
proc.setDurationMs(50L);
|
||||
proc.setResolvedEndpointUri("http://example.com");
|
||||
proc.setInputBody("input body");
|
||||
proc.setOutputBody("output body");
|
||||
proc.setInputHeaders(Map.of("h1", "v1"));
|
||||
proc.setOutputHeaders(Map.of("h2", "v2"));
|
||||
proc.setAttributes(Map.of("a1", "v1"));
|
||||
|
||||
store.insertProcessorBatch(
|
||||
"default", "exec-1", "route-a", "my-app",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
List.of(proc));
|
||||
|
||||
Integer count = jdbc.queryForObject(
|
||||
"SELECT count() FROM processor_executions WHERE execution_id = 'exec-1'",
|
||||
Integer.class);
|
||||
assertThat(count).isEqualTo(1);
|
||||
|
||||
// Verify seq is stored
|
||||
Integer seq = jdbc.queryForObject(
|
||||
"SELECT seq FROM processor_executions WHERE execution_id = 'exec-1'",
|
||||
Integer.class);
|
||||
assertThat(seq).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertProcessorBatch_withIterations() {
|
||||
FlatProcessorRecord splitContainer = new FlatProcessorRecord(1, "split-1", "split");
|
||||
splitContainer.setIterationSize(3);
|
||||
splitContainer.setStatus(ExecutionStatus.COMPLETED);
|
||||
splitContainer.setStartTime(Instant.parse("2026-03-31T10:00:00Z"));
|
||||
splitContainer.setDurationMs(300L);
|
||||
|
||||
FlatProcessorRecord child0 = new FlatProcessorRecord(2, "child-proc", "to");
|
||||
child0.setParentSeq(1);
|
||||
child0.setParentProcessorId("split-1");
|
||||
child0.setIteration(0);
|
||||
child0.setStatus(ExecutionStatus.COMPLETED);
|
||||
child0.setStartTime(Instant.parse("2026-03-31T10:00:00.100Z"));
|
||||
child0.setDurationMs(80L);
|
||||
child0.setResolvedEndpointUri("http://svc-a");
|
||||
child0.setInputBody("body0");
|
||||
child0.setOutputBody("out0");
|
||||
|
||||
FlatProcessorRecord child1 = new FlatProcessorRecord(3, "child-proc", "to");
|
||||
child1.setParentSeq(1);
|
||||
child1.setParentProcessorId("split-1");
|
||||
child1.setIteration(1);
|
||||
child1.setStatus(ExecutionStatus.COMPLETED);
|
||||
child1.setStartTime(Instant.parse("2026-03-31T10:00:00.200Z"));
|
||||
child1.setDurationMs(90L);
|
||||
child1.setResolvedEndpointUri("http://svc-a");
|
||||
child1.setInputBody("body1");
|
||||
child1.setOutputBody("out1");
|
||||
|
||||
FlatProcessorRecord child2 = new FlatProcessorRecord(4, "child-proc", "to");
|
||||
child2.setParentSeq(1);
|
||||
child2.setParentProcessorId("split-1");
|
||||
child2.setIteration(2);
|
||||
child2.setStatus(ExecutionStatus.COMPLETED);
|
||||
child2.setStartTime(Instant.parse("2026-03-31T10:00:00.300Z"));
|
||||
child2.setDurationMs(100L);
|
||||
child2.setResolvedEndpointUri("http://svc-a");
|
||||
child2.setInputBody("body2");
|
||||
child2.setOutputBody("out2");
|
||||
|
||||
store.insertProcessorBatch(
|
||||
"default", "exec-2", "route-b", "my-app",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
List.of(splitContainer, child0, child1, child2));
|
||||
|
||||
Integer count = jdbc.queryForObject(
|
||||
"SELECT count() FROM processor_executions WHERE execution_id = 'exec-2'",
|
||||
Integer.class);
|
||||
assertThat(count).isEqualTo(4);
|
||||
|
||||
// Verify iteration data on the split container
|
||||
Integer iterationSize = jdbc.queryForObject(
|
||||
"SELECT iteration_size FROM processor_executions " +
|
||||
"WHERE execution_id = 'exec-2' AND seq = 1",
|
||||
Integer.class);
|
||||
assertThat(iterationSize).isEqualTo(3);
|
||||
|
||||
// Verify iteration index on a child
|
||||
Integer iteration = jdbc.queryForObject(
|
||||
"SELECT iteration FROM processor_executions " +
|
||||
"WHERE execution_id = 'exec-2' AND seq = 3",
|
||||
Integer.class);
|
||||
assertThat(iteration).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertExecutionBatch_emptyList_doesNothing() {
|
||||
store.insertExecutionBatch(List.of());
|
||||
|
||||
Integer count = jdbc.queryForObject(
|
||||
"SELECT count() FROM executions", Integer.class);
|
||||
assertThat(count).isEqualTo(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertExecutionBatch_replacingMergeTree_keepsLatestVersion() {
|
||||
MergedExecution v1 = new MergedExecution(
|
||||
"default", 1L, "exec-r", "route-a", "agent-1", "my-app",
|
||||
"RUNNING", "corr-1", "exchange-1",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
null, null,
|
||||
"", "", "", "", "", "",
|
||||
"", "FULL",
|
||||
"", "", "", "", "",
|
||||
"", "",
|
||||
false, false,
|
||||
null, null
|
||||
);
|
||||
|
||||
MergedExecution v2 = new MergedExecution(
|
||||
"default", 2L, "exec-r", "route-a", "agent-1", "my-app",
|
||||
"COMPLETED", "corr-1", "exchange-1",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
Instant.parse("2026-03-31T10:00:05Z"),
|
||||
5000L,
|
||||
"", "", "", "", "", "",
|
||||
"", "FULL",
|
||||
"", "", "", "", "",
|
||||
"", "",
|
||||
false, false,
|
||||
null, null
|
||||
);
|
||||
|
||||
store.insertExecutionBatch(List.of(v1));
|
||||
store.insertExecutionBatch(List.of(v2));
|
||||
|
||||
// Force merge to apply ReplacingMergeTree deduplication
|
||||
jdbc.execute("OPTIMIZE TABLE executions FINAL");
|
||||
|
||||
String status = jdbc.queryForObject(
|
||||
"SELECT status FROM executions " +
|
||||
"WHERE execution_id = 'exec-r'",
|
||||
String.class);
|
||||
assertThat(status).isEqualTo("COMPLETED");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.storage.model.MetricTimeSeries;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseMetricsQueryStoreIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseMetricsQueryStore queryStore;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
jdbc.execute("""
|
||||
CREATE TABLE IF NOT EXISTS agent_metrics (
|
||||
tenant_id LowCardinality(String) DEFAULT 'default',
|
||||
collected_at DateTime64(3),
|
||||
agent_id LowCardinality(String),
|
||||
metric_name LowCardinality(String),
|
||||
metric_value Float64,
|
||||
tags Map(String, String) DEFAULT map(),
|
||||
server_received_at DateTime64(3) DEFAULT now64(3)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY (tenant_id, agent_id, metric_name, collected_at)
|
||||
""");
|
||||
|
||||
jdbc.execute("TRUNCATE TABLE agent_metrics");
|
||||
|
||||
// Seed test data: 6 data points across 1 hour for two metrics
|
||||
Instant base = Instant.parse("2026-03-31T10:00:00Z");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
Instant ts = base.plusSeconds(i * 600); // every 10 minutes
|
||||
jdbc.update("INSERT INTO agent_metrics (agent_id, metric_name, metric_value, collected_at) VALUES (?, ?, ?, ?)",
|
||||
"agent-1", "cpu.usage", 50.0 + i * 5, java.sql.Timestamp.from(ts));
|
||||
jdbc.update("INSERT INTO agent_metrics (agent_id, metric_name, metric_value, collected_at) VALUES (?, ?, ?, ?)",
|
||||
"agent-1", "memory.free", 1000.0 - i * 100, java.sql.Timestamp.from(ts));
|
||||
}
|
||||
|
||||
queryStore = new ClickHouseMetricsQueryStore(jdbc);
|
||||
}
|
||||
|
||||
@Test
|
||||
void queryTimeSeries_returnsDataGroupedByMetric() {
|
||||
Instant from = Instant.parse("2026-03-31T10:00:00Z");
|
||||
Instant to = Instant.parse("2026-03-31T11:00:00Z");
|
||||
|
||||
Map<String, List<MetricTimeSeries.Bucket>> result =
|
||||
queryStore.queryTimeSeries("agent-1", List.of("cpu.usage", "memory.free"), from, to, 6);
|
||||
|
||||
assertThat(result).containsKeys("cpu.usage", "memory.free");
|
||||
assertThat(result.get("cpu.usage")).isNotEmpty();
|
||||
assertThat(result.get("memory.free")).isNotEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void queryTimeSeries_bucketsAverageCorrectly() {
|
||||
Instant from = Instant.parse("2026-03-31T10:00:00Z");
|
||||
Instant to = Instant.parse("2026-03-31T11:00:00Z");
|
||||
|
||||
// 1 bucket for the entire hour = average of all 6 values
|
||||
Map<String, List<MetricTimeSeries.Bucket>> result =
|
||||
queryStore.queryTimeSeries("agent-1", List.of("cpu.usage"), from, to, 1);
|
||||
|
||||
assertThat(result.get("cpu.usage")).hasSize(1);
|
||||
// Values: 50, 55, 60, 65, 70, 75 → avg = 62.5
|
||||
assertThat(result.get("cpu.usage").get(0).value()).isCloseTo(62.5, org.assertj.core.data.Offset.offset(0.1));
|
||||
}
|
||||
|
||||
@Test
|
||||
void queryTimeSeries_noData_returnsEmptyLists() {
|
||||
Instant from = Instant.parse("2025-01-01T00:00:00Z");
|
||||
Instant to = Instant.parse("2025-01-01T01:00:00Z");
|
||||
|
||||
Map<String, List<MetricTimeSeries.Bucket>> result =
|
||||
queryStore.queryTimeSeries("agent-1", List.of("cpu.usage"), from, to, 6);
|
||||
|
||||
assertThat(result.get("cpu.usage")).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void queryTimeSeries_unknownAgent_returnsEmpty() {
|
||||
Instant from = Instant.parse("2026-03-31T10:00:00Z");
|
||||
Instant to = Instant.parse("2026-03-31T11:00:00Z");
|
||||
|
||||
Map<String, List<MetricTimeSeries.Bucket>> result =
|
||||
queryStore.queryTimeSeries("nonexistent", List.of("cpu.usage"), from, to, 6);
|
||||
|
||||
assertThat(result.get("cpu.usage")).isEmpty();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.storage.model.MetricsSnapshot;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseMetricsStoreIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseMetricsStore store;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
jdbc.execute("""
|
||||
CREATE TABLE IF NOT EXISTS agent_metrics (
|
||||
tenant_id LowCardinality(String) DEFAULT 'default',
|
||||
collected_at DateTime64(3),
|
||||
agent_id LowCardinality(String),
|
||||
metric_name LowCardinality(String),
|
||||
metric_value Float64,
|
||||
tags Map(String, String) DEFAULT map(),
|
||||
server_received_at DateTime64(3) DEFAULT now64(3)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY (tenant_id, agent_id, metric_name, collected_at)
|
||||
""");
|
||||
|
||||
jdbc.execute("TRUNCATE TABLE agent_metrics");
|
||||
|
||||
store = new ClickHouseMetricsStore(jdbc);
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertBatch_writesMetricsToClickHouse() {
|
||||
List<MetricsSnapshot> batch = List.of(
|
||||
new MetricsSnapshot("agent-1", Instant.parse("2026-03-31T10:00:00Z"),
|
||||
"cpu.usage", 75.5, Map.of("host", "server-1")),
|
||||
new MetricsSnapshot("agent-1", Instant.parse("2026-03-31T10:00:01Z"),
|
||||
"memory.free", 1024.0, null)
|
||||
);
|
||||
|
||||
store.insertBatch(batch);
|
||||
|
||||
Integer count = jdbc.queryForObject(
|
||||
"SELECT count() FROM agent_metrics WHERE agent_id = 'agent-1'",
|
||||
Integer.class);
|
||||
assertThat(count).isEqualTo(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertBatch_storesTags() {
|
||||
store.insertBatch(List.of(
|
||||
new MetricsSnapshot("agent-2", Instant.parse("2026-03-31T10:00:00Z"),
|
||||
"disk.used", 500.0, Map.of("mount", "/data", "fs", "ext4"))
|
||||
));
|
||||
|
||||
// Just verify we can read back the row with tags
|
||||
Integer count = jdbc.queryForObject(
|
||||
"SELECT count() FROM agent_metrics WHERE agent_id = 'agent-2'",
|
||||
Integer.class);
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertBatch_emptyList_doesNothing() {
|
||||
store.insertBatch(List.of());
|
||||
|
||||
Integer count = jdbc.queryForObject("SELECT count() FROM agent_metrics", Integer.class);
|
||||
assertThat(count).isEqualTo(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
void insertBatch_nullTags_defaultsToEmptyMap() {
|
||||
store.insertBatch(List.of(
|
||||
new MetricsSnapshot("agent-3", Instant.parse("2026-03-31T10:00:00Z"),
|
||||
"cpu.usage", 50.0, null)
|
||||
));
|
||||
|
||||
Integer count = jdbc.queryForObject(
|
||||
"SELECT count() FROM agent_metrics WHERE agent_id = 'agent-3'",
|
||||
Integer.class);
|
||||
assertThat(count).isEqualTo(1);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,375 @@
|
||||
package com.cameleer3.server.app.storage;
|
||||
|
||||
import com.cameleer3.server.core.search.ExecutionStats;
|
||||
import com.cameleer3.server.core.search.StatsTimeseries;
|
||||
import com.cameleer3.server.core.search.TopError;
|
||||
import com.cameleer3.server.core.storage.StatsStore.PunchcardCell;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.testcontainers.clickhouse.ClickHouseContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Testcontainers
|
||||
class ClickHouseStatsStoreIT {
|
||||
|
||||
@Container
|
||||
static final ClickHouseContainer clickhouse =
|
||||
new ClickHouseContainer("clickhouse/clickhouse-server:24.12");
|
||||
|
||||
private JdbcTemplate jdbc;
|
||||
private ClickHouseStatsStore store;
|
||||
|
||||
// base time: 2026-03-31T10:00:00Z (a Tuesday)
|
||||
private static final Instant BASE = Instant.parse("2026-03-31T10:00:00Z");
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(clickhouse.getJdbcUrl());
|
||||
ds.setUsername(clickhouse.getUsername());
|
||||
ds.setPassword(clickhouse.getPassword());
|
||||
|
||||
jdbc = new JdbcTemplate(ds);
|
||||
|
||||
// Load DDL from classpath resources
|
||||
String executionsDdl = new ClassPathResource("clickhouse/V2__executions.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
String processorsDdl = new ClassPathResource("clickhouse/V3__processor_executions.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
String statsDdl = new ClassPathResource("clickhouse/V4__stats_tables_and_mvs.sql")
|
||||
.getContentAsString(StandardCharsets.UTF_8);
|
||||
|
||||
jdbc.execute(executionsDdl);
|
||||
jdbc.execute(processorsDdl);
|
||||
|
||||
// Drop MVs first (they reference the stats tables), then recreate everything
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_all_mv");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_app_mv");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_route_mv");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_processor_mv");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_processor_detail_mv");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_all");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_app");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_route");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_processor");
|
||||
jdbc.execute("DROP TABLE IF EXISTS stats_1m_processor_detail");
|
||||
|
||||
// Strip SQL line comments first (they may contain semicolons),
|
||||
// then split by ';' and execute non-empty statements.
|
||||
String cleanedDdl = statsDdl.replaceAll("--[^\n]*", "");
|
||||
for (String stmt : cleanedDdl.split(";")) {
|
||||
String trimmed = stmt.trim();
|
||||
if (!trimmed.isEmpty()) {
|
||||
jdbc.execute(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
// Truncate base tables
|
||||
jdbc.execute("TRUNCATE TABLE executions");
|
||||
jdbc.execute("TRUNCATE TABLE processor_executions");
|
||||
|
||||
seedTestData();
|
||||
|
||||
// Try the failing query to capture it in query_log, then check
|
||||
try {
|
||||
jdbc.queryForMap(
|
||||
"SELECT countMerge(total_count) AS tc, countIfMerge(failed_count) AS fc, " +
|
||||
"sumMerge(duration_sum) / greatest(countMerge(total_count), 1) AS avg, " +
|
||||
"quantileMerge(0.99)(p99_duration) AS p99, " +
|
||||
"countIfMerge(running_count) AS rc " +
|
||||
"FROM stats_1m_all WHERE tenant_id = 'default' " +
|
||||
"AND bucket >= '2026-03-31 09:59:00' AND bucket < '2026-03-31 10:05:00'");
|
||||
} catch (Exception e) {
|
||||
System.out.println("Expected error: " + e.getMessage().substring(0, 80));
|
||||
}
|
||||
|
||||
jdbc.execute("SYSTEM FLUSH LOGS");
|
||||
// Get ALL recent queries to see what the driver sends
|
||||
var queryLog = jdbc.queryForList(
|
||||
"SELECT type, substring(query, 1, 200) AS q " +
|
||||
"FROM system.query_log WHERE event_time > now() - 30 " +
|
||||
"AND query NOT LIKE '%system.query_log%' AND query NOT LIKE '%FLUSH%' " +
|
||||
"ORDER BY event_time DESC LIMIT 20");
|
||||
for (var entry : queryLog) {
|
||||
System.out.println("LOG: " + entry.get("type") + " | " + entry.get("q"));
|
||||
}
|
||||
|
||||
store = new ClickHouseStatsStore(jdbc);
|
||||
}
|
||||
|
||||
private void seedTestData() {
|
||||
// 10 executions across 2 apps, 2 routes, spanning 5 minutes
|
||||
// app-1, route-a: 4 COMPLETED (200ms, 300ms, 400ms, 500ms)
|
||||
insertExecution("exec-01", BASE.plusSeconds(0), "app-1", "route-a", "agent-1",
|
||||
"COMPLETED", 200L, "", "");
|
||||
insertExecution("exec-02", BASE.plusSeconds(60), "app-1", "route-a", "agent-1",
|
||||
"COMPLETED", 300L, "", "");
|
||||
insertExecution("exec-03", BASE.plusSeconds(120), "app-1", "route-a", "agent-1",
|
||||
"COMPLETED", 400L, "", "");
|
||||
insertExecution("exec-04", BASE.plusSeconds(180), "app-1", "route-a", "agent-1",
|
||||
"COMPLETED", 500L, "", "");
|
||||
|
||||
// app-1, route-a: 2 FAILED (100ms, 150ms) with error_type="NPE"
|
||||
insertExecution("exec-05", BASE.plusSeconds(60), "app-1", "route-a", "agent-1",
|
||||
"FAILED", 100L, "NPE", "null ref");
|
||||
insertExecution("exec-06", BASE.plusSeconds(120), "app-1", "route-a", "agent-1",
|
||||
"FAILED", 150L, "NPE", "null ref");
|
||||
|
||||
// app-1, route-b: 2 COMPLETED (50ms, 60ms)
|
||||
insertExecution("exec-07", BASE.plusSeconds(60), "app-1", "route-b", "agent-1",
|
||||
"COMPLETED", 50L, "", "");
|
||||
insertExecution("exec-08", BASE.plusSeconds(120), "app-1", "route-b", "agent-1",
|
||||
"COMPLETED", 60L, "", "");
|
||||
|
||||
// app-2, route-c: 1 COMPLETED (1000ms)
|
||||
insertExecution("exec-09", BASE.plusSeconds(60), "app-2", "route-c", "agent-2",
|
||||
"COMPLETED", 1000L, "", "");
|
||||
|
||||
// app-2, route-c: 1 RUNNING (null duration)
|
||||
insertExecution("exec-10", BASE.plusSeconds(180), "app-2", "route-c", "agent-2",
|
||||
"RUNNING", null, "", "");
|
||||
|
||||
// 5 processor records for processor stats testing
|
||||
// app-1, route-a, processor_type="to": 3 COMPLETED
|
||||
insertProcessor("exec-01", 1, "proc-to-1", "to", BASE.plusSeconds(0),
|
||||
"app-1", "route-a", "COMPLETED", 50L);
|
||||
insertProcessor("exec-02", 1, "proc-to-2", "to", BASE.plusSeconds(60),
|
||||
"app-1", "route-a", "COMPLETED", 80L);
|
||||
insertProcessor("exec-03", 1, "proc-to-3", "to", BASE.plusSeconds(120),
|
||||
"app-1", "route-a", "COMPLETED", 90L);
|
||||
|
||||
// app-1, route-a, processor_type="log": 2 COMPLETED
|
||||
insertProcessor("exec-01", 2, "proc-log-1", "log", BASE.plusSeconds(1),
|
||||
"app-1", "route-a", "COMPLETED", 10L);
|
||||
insertProcessor("exec-02", 2, "proc-log-2", "log", BASE.plusSeconds(61),
|
||||
"app-1", "route-a", "COMPLETED", 15L);
|
||||
}
|
||||
|
||||
private void insertExecution(String executionId, Instant startTime, String appName,
|
||||
String routeId, String agentId, String status,
|
||||
Long durationMs, String errorType, String errorMessage) {
|
||||
jdbc.update(
|
||||
"INSERT INTO executions (tenant_id, execution_id, start_time, route_id, " +
|
||||
"agent_id, application_name, status, duration_ms, error_type, error_message) " +
|
||||
"VALUES ('default', ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
executionId, Timestamp.from(startTime), routeId, agentId, appName,
|
||||
status, durationMs, errorType, errorMessage);
|
||||
}
|
||||
|
||||
private void insertProcessor(String executionId, int seq, String processorId,
|
||||
String processorType, Instant startTime,
|
||||
String appName, String routeId, String status,
|
||||
Long durationMs) {
|
||||
jdbc.update(
|
||||
"INSERT INTO processor_executions (tenant_id, execution_id, seq, processor_id, " +
|
||||
"processor_type, start_time, route_id, application_name, status, duration_ms) " +
|
||||
"VALUES ('default', ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
executionId, seq, processorId, processorType, Timestamp.from(startTime),
|
||||
routeId, appName, status, durationMs);
|
||||
}
|
||||
|
||||
// ── Stats Tests ──────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void stats_returnsCorrectGlobalTotals() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
ExecutionStats stats = store.stats(from, to);
|
||||
|
||||
assertThat(stats.totalCount()).isEqualTo(10);
|
||||
assertThat(stats.failedCount()).isEqualTo(2);
|
||||
assertThat(stats.activeCount()).isEqualTo(1);
|
||||
assertThat(stats.avgDurationMs()).isGreaterThan(0);
|
||||
assertThat(stats.p99LatencyMs()).isGreaterThan(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
void statsForApp_filtersCorrectly() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
ExecutionStats app1 = store.statsForApp(from, to, "app-1");
|
||||
assertThat(app1.totalCount()).isEqualTo(8);
|
||||
|
||||
ExecutionStats app2 = store.statsForApp(from, to, "app-2");
|
||||
assertThat(app2.totalCount()).isEqualTo(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
void statsForRoute_filtersCorrectly() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
ExecutionStats routeA = store.statsForRoute(from, to, "route-a", List.of());
|
||||
assertThat(routeA.totalCount()).isEqualTo(6);
|
||||
}
|
||||
|
||||
// ── Timeseries Tests ─────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void timeseries_returnsBuckets() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
StatsTimeseries ts = store.timeseries(from, to, 5);
|
||||
|
||||
assertThat(ts.buckets()).isNotEmpty();
|
||||
long totalAcrossBuckets = ts.buckets().stream()
|
||||
.mapToLong(StatsTimeseries.TimeseriesBucket::totalCount).sum();
|
||||
assertThat(totalAcrossBuckets).isEqualTo(10);
|
||||
}
|
||||
|
||||
@Test
|
||||
void timeseriesForApp_filtersCorrectly() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
StatsTimeseries ts = store.timeseriesForApp(from, to, 5, "app-1");
|
||||
|
||||
long totalAcrossBuckets = ts.buckets().stream()
|
||||
.mapToLong(StatsTimeseries.TimeseriesBucket::totalCount).sum();
|
||||
assertThat(totalAcrossBuckets).isEqualTo(8);
|
||||
}
|
||||
|
||||
@Test
|
||||
void timeseriesGroupedByApp_returnsMap() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
Map<String, StatsTimeseries> grouped = store.timeseriesGroupedByApp(from, to, 5);
|
||||
|
||||
assertThat(grouped).containsKeys("app-1", "app-2");
|
||||
}
|
||||
|
||||
@Test
|
||||
void timeseriesGroupedByRoute_returnsMap() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
Map<String, StatsTimeseries> grouped = store.timeseriesGroupedByRoute(from, to, 5, "app-1");
|
||||
|
||||
assertThat(grouped).containsKeys("route-a", "route-b");
|
||||
}
|
||||
|
||||
// ── SLA Tests ────────────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void slaCompliance_calculatesCorrectly() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
// threshold=250ms: among 9 non-RUNNING executions:
|
||||
// compliant (<=250ms): exec-01(200), exec-05(100), exec-06(150), exec-07(50), exec-08(60) = 5
|
||||
// total non-running: 9
|
||||
// compliance = 5/9 * 100 ~ 55.56%
|
||||
double sla = store.slaCompliance(from, to, 250, null, null);
|
||||
assertThat(sla).isBetween(55.0, 56.0);
|
||||
}
|
||||
|
||||
// ── Top Errors Tests ─────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void topErrors_returnsRankedErrors() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
List<TopError> errors = store.topErrors(from, to, null, null, 10);
|
||||
|
||||
assertThat(errors).isNotEmpty();
|
||||
assertThat(errors.get(0).errorType()).isEqualTo("NPE");
|
||||
assertThat(errors.get(0).count()).isEqualTo(2);
|
||||
}
|
||||
|
||||
// ── Active Error Types Test ──────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void activeErrorTypes_countsDistinct() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
int count = store.activeErrorTypes(from, to, "app-1");
|
||||
|
||||
assertThat(count).isEqualTo(1); // only "NPE"
|
||||
}
|
||||
|
||||
// ── Punchcard Test ───────────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void punchcard_returnsWeekdayHourCells() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
List<PunchcardCell> cells = store.punchcard(from, to, null);
|
||||
|
||||
assertThat(cells).isNotEmpty();
|
||||
long totalCount = cells.stream().mapToLong(PunchcardCell::totalCount).sum();
|
||||
assertThat(totalCount).isEqualTo(10);
|
||||
}
|
||||
|
||||
@Test
|
||||
void slaCountsByApp_returnsMap() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
// threshold=250ms
|
||||
Map<String, long[]> counts = store.slaCountsByApp(from, to, 250);
|
||||
|
||||
assertThat(counts).containsKeys("app-1", "app-2");
|
||||
// app-1: 8 total executions, all non-RUNNING
|
||||
// compliant (<=250ms): exec-01(200), exec-05(100), exec-06(150), exec-07(50), exec-08(60) = 5
|
||||
long[] app1 = counts.get("app-1");
|
||||
assertThat(app1[0]).isEqualTo(5); // compliant
|
||||
assertThat(app1[1]).isEqualTo(8); // total non-running
|
||||
// app-2: 1 COMPLETED(1000ms) + 1 RUNNING → 1 non-RUNNING, 0 compliant
|
||||
long[] app2 = counts.get("app-2");
|
||||
assertThat(app2[0]).isEqualTo(0); // compliant
|
||||
assertThat(app2[1]).isEqualTo(1); // total non-running
|
||||
}
|
||||
|
||||
@Test
|
||||
void slaCountsByRoute_returnsMap() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
Map<String, long[]> counts = store.slaCountsByRoute(from, to, "app-1", 250);
|
||||
|
||||
assertThat(counts).containsKeys("route-a", "route-b");
|
||||
// route-a: exec-01(200)OK, exec-02(300)NO, exec-03(400)NO, exec-04(500)NO,
|
||||
// exec-05(100)OK, exec-06(150)OK → 3 compliant, 6 total
|
||||
long[] routeA = counts.get("route-a");
|
||||
assertThat(routeA[0]).isEqualTo(3); // compliant
|
||||
assertThat(routeA[1]).isEqualTo(6); // total
|
||||
// route-b: exec-07(50)OK, exec-08(60)OK → 2 compliant, 2 total
|
||||
long[] routeB = counts.get("route-b");
|
||||
assertThat(routeB[0]).isEqualTo(2);
|
||||
assertThat(routeB[1]).isEqualTo(2);
|
||||
}
|
||||
|
||||
// ── Processor Stats Test ─────────────────────────────────────────────
|
||||
|
||||
@Test
|
||||
void statsForProcessor_filtersCorrectly() {
|
||||
Instant from = BASE.minusSeconds(60);
|
||||
Instant to = BASE.plusSeconds(300);
|
||||
|
||||
ExecutionStats toStats = store.statsForProcessor(from, to, "route-a", "to");
|
||||
assertThat(toStats.totalCount()).isEqualTo(3);
|
||||
assertThat(toStats.activeCount()).isEqualTo(0); // processor stats have no running_count
|
||||
|
||||
ExecutionStats logStats = store.statsForProcessor(from, to, "route-a", "log");
|
||||
assertThat(logStats.totalCount()).isEqualTo(2);
|
||||
}
|
||||
}
|
||||
@@ -77,13 +77,15 @@ class PostgresExecutionStoreIT extends AbstractPostgresIT {
|
||||
now, now.plusMillis(10), 10L, null, null,
|
||||
"input body", "output body", null, null, null,
|
||||
null, null, null, null, null,
|
||||
null, null, null, null, null, null, null, null),
|
||||
null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, null),
|
||||
new ProcessorRecord("exec-proc", "proc-2", "to",
|
||||
"app-1", "route-a", 1, "proc-1", "COMPLETED",
|
||||
now.plusMillis(10), now.plusMillis(30), 20L, null, null,
|
||||
null, null, null, null, null,
|
||||
null, null, null, null, null,
|
||||
null, null, null, null, null, null, null, null)
|
||||
null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, null)
|
||||
);
|
||||
executionStore.upsertProcessors("exec-proc", now, "app-1", "route-a", processors);
|
||||
|
||||
|
||||
@@ -37,6 +37,11 @@
|
||||
<artifactId>spring-security-core</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||
<artifactId>jackson-datatype-jsr310</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
|
||||
@@ -64,6 +64,18 @@ public class DetailService {
|
||||
});
|
||||
}
|
||||
|
||||
public Optional<Map<String, String>> getProcessorSnapshotBySeq(String executionId, int seq) {
|
||||
return executionStore.findProcessorBySeq(executionId, seq)
|
||||
.map(p -> {
|
||||
Map<String, String> snapshot = new LinkedHashMap<>();
|
||||
if (p.inputBody() != null) snapshot.put("inputBody", p.inputBody());
|
||||
if (p.outputBody() != null) snapshot.put("outputBody", p.outputBody());
|
||||
if (p.inputHeaders() != null) snapshot.put("inputHeaders", p.inputHeaders());
|
||||
if (p.outputHeaders() != null) snapshot.put("outputHeaders", p.outputHeaders());
|
||||
return snapshot;
|
||||
});
|
||||
}
|
||||
|
||||
/** Parse the raw processor tree JSON stored alongside the execution. */
|
||||
private List<ProcessorNode> parseProcessorsJson(String json) {
|
||||
if (json == null || json.isBlank()) return null;
|
||||
@@ -75,7 +87,7 @@ public class DetailService {
|
||||
}
|
||||
}
|
||||
|
||||
/** Convert agent ProcessorExecution tree to detail ProcessorNode tree. */
|
||||
/** Convert agent ProcessorExecution list to detail ProcessorNode list. */
|
||||
private List<ProcessorNode> convertProcessors(List<ProcessorExecution> executions) {
|
||||
if (executions == null) return List.of();
|
||||
List<ProcessorNode> result = new ArrayList<>();
|
||||
@@ -89,9 +101,7 @@ public class DetailService {
|
||||
p.getDurationMs(),
|
||||
p.getErrorMessage(), p.getErrorStackTrace(),
|
||||
p.getAttributes() != null ? new LinkedHashMap<>(p.getAttributes()) : null,
|
||||
p.getLoopIndex(), p.getLoopSize(),
|
||||
p.getSplitIndex(), p.getSplitSize(),
|
||||
p.getMulticastIndex(),
|
||||
null, null, null, null, null,
|
||||
p.getResolvedEndpointUri(),
|
||||
p.getErrorType(), p.getErrorCategory(),
|
||||
p.getRootCauseType(), p.getRootCauseMessage(),
|
||||
@@ -100,21 +110,74 @@ public class DetailService {
|
||||
p.getFilterMatched(), p.getDuplicateMessage(),
|
||||
hasTrace
|
||||
);
|
||||
for (ProcessorNode child : convertProcessors(p.getChildren())) {
|
||||
node.addChild(child);
|
||||
}
|
||||
result.add(node);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback: reconstruct processor tree from flat records.
|
||||
* Note: this loses iteration context for processors with the same ID across iterations.
|
||||
* Reconstruct processor tree from flat records.
|
||||
* Detects whether records use the seq-based model (ClickHouse) or
|
||||
* processorId-based model (PostgreSQL) and delegates accordingly.
|
||||
*/
|
||||
List<ProcessorNode> buildTree(List<ProcessorRecord> processors) {
|
||||
if (processors.isEmpty()) return List.of();
|
||||
boolean hasSeq = processors.stream().anyMatch(p -> p.seq() != null);
|
||||
return hasSeq ? buildTreeBySeq(processors) : buildTreeByProcessorId(processors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Seq-based tree reconstruction for ClickHouse flat processor model.
|
||||
* Uses seq/parentSeq linkage, correctly handling duplicate processorIds
|
||||
* across iterations (e.g., the same processor inside a split running N times).
|
||||
*/
|
||||
private List<ProcessorNode> buildTreeBySeq(List<ProcessorRecord> processors) {
|
||||
Map<Integer, ProcessorNode> nodeBySeq = new LinkedHashMap<>();
|
||||
|
||||
for (ProcessorRecord p : processors) {
|
||||
boolean hasTrace = p.inputBody() != null || p.outputBody() != null
|
||||
|| p.inputHeaders() != null || p.outputHeaders() != null;
|
||||
ProcessorNode node = new ProcessorNode(
|
||||
p.processorId(), p.processorType(), p.status(),
|
||||
p.startTime(), p.endTime(),
|
||||
p.durationMs() != null ? p.durationMs() : 0L,
|
||||
p.errorMessage(), p.errorStacktrace(),
|
||||
parseAttributes(p.attributes()),
|
||||
p.iteration(), p.iterationSize(),
|
||||
null, null, null,
|
||||
p.resolvedEndpointUri(),
|
||||
p.errorType(), p.errorCategory(),
|
||||
p.rootCauseType(), p.rootCauseMessage(),
|
||||
null, p.circuitBreakerState(),
|
||||
p.fallbackTriggered(),
|
||||
p.filterMatched(), p.duplicateMessage(),
|
||||
hasTrace
|
||||
);
|
||||
nodeBySeq.put(p.seq(), node);
|
||||
}
|
||||
|
||||
List<ProcessorNode> roots = new ArrayList<>();
|
||||
for (ProcessorRecord p : processors) {
|
||||
ProcessorNode node = nodeBySeq.get(p.seq());
|
||||
if (p.parentSeq() == null) {
|
||||
roots.add(node);
|
||||
} else {
|
||||
ProcessorNode parent = nodeBySeq.get(p.parentSeq());
|
||||
if (parent != null) {
|
||||
parent.addChild(node);
|
||||
} else {
|
||||
roots.add(node); // orphan safety
|
||||
}
|
||||
}
|
||||
}
|
||||
return roots;
|
||||
}
|
||||
|
||||
/**
|
||||
* ProcessorId-based tree reconstruction for PostgreSQL flat records.
|
||||
* Note: this loses iteration context for processors with the same ID across iterations.
|
||||
*/
|
||||
private List<ProcessorNode> buildTreeByProcessorId(List<ProcessorRecord> processors) {
|
||||
Map<String, ProcessorNode> nodeMap = new LinkedHashMap<>();
|
||||
for (ProcessorRecord p : processors) {
|
||||
boolean hasTrace = p.inputBody() != null || p.outputBody() != null
|
||||
|
||||
@@ -0,0 +1,219 @@
|
||||
package com.cameleer3.server.core.ingestion;
|
||||
|
||||
import com.cameleer3.common.model.ExecutionChunk;
|
||||
import com.cameleer3.common.model.FlatProcessorRecord;
|
||||
import com.cameleer3.server.core.storage.DiagramStore;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
/**
|
||||
* Accumulates {@link ExecutionChunk} documents and produces:
|
||||
* <ul>
|
||||
* <li>{@link ProcessorBatch} — pushed immediately for each chunk (append-only)</li>
|
||||
* <li>{@link MergedExecution} — pushed when the final chunk arrives or on stale sweep</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class ChunkAccumulator {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ChunkAccumulator.class);
|
||||
private static final String DEFAULT_TENANT = "default";
|
||||
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||
|
||||
private final Consumer<MergedExecution> executionSink;
|
||||
private final Consumer<ProcessorBatch> processorSink;
|
||||
private final DiagramStore diagramStore;
|
||||
private final Duration staleThreshold;
|
||||
private final ConcurrentHashMap<String, PendingExchange> pending = new ConcurrentHashMap<>();
|
||||
|
||||
public ChunkAccumulator(Consumer<MergedExecution> executionSink,
|
||||
Consumer<ProcessorBatch> processorSink,
|
||||
DiagramStore diagramStore,
|
||||
Duration staleThreshold) {
|
||||
this.executionSink = executionSink;
|
||||
this.processorSink = processorSink;
|
||||
this.diagramStore = diagramStore;
|
||||
this.staleThreshold = staleThreshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process an incoming chunk: push processors immediately,
|
||||
* buffer/merge the envelope, and emit when final.
|
||||
*/
|
||||
public void onChunk(ExecutionChunk chunk) {
|
||||
// 1. Push processor records immediately (append-only)
|
||||
if (chunk.getProcessors() != null && !chunk.getProcessors().isEmpty()) {
|
||||
processorSink.accept(new ProcessorBatch(
|
||||
DEFAULT_TENANT,
|
||||
chunk.getExchangeId(),
|
||||
chunk.getRouteId(),
|
||||
chunk.getApplicationName(),
|
||||
chunk.getStartTime(),
|
||||
chunk.getProcessors()));
|
||||
}
|
||||
|
||||
// 2. Buffer/merge the exchange envelope
|
||||
if (chunk.isFinal()) {
|
||||
// Merge with any pending envelope, then emit
|
||||
PendingExchange existing = pending.remove(chunk.getExchangeId());
|
||||
ExecutionChunk merged = existing != null
|
||||
? mergeEnvelopes(existing.envelope(), chunk)
|
||||
: chunk;
|
||||
executionSink.accept(toMergedExecution(merged));
|
||||
} else {
|
||||
// Buffer the envelope for later merging
|
||||
pending.merge(chunk.getExchangeId(),
|
||||
new PendingExchange(chunk, Instant.now()),
|
||||
(old, incoming) -> new PendingExchange(
|
||||
mergeEnvelopes(old.envelope(), incoming.envelope()),
|
||||
old.receivedAt()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush exchanges that have been pending longer than the stale threshold.
|
||||
* Called periodically by a scheduled task.
|
||||
*/
|
||||
public void sweepStale() {
|
||||
Instant cutoff = Instant.now().minus(staleThreshold);
|
||||
pending.forEach((exchangeId, pe) -> {
|
||||
if (pe.receivedAt().isBefore(cutoff)) {
|
||||
PendingExchange removed = pending.remove(exchangeId);
|
||||
if (removed != null) {
|
||||
log.info("Flushing stale exchange {} (pending since {})",
|
||||
exchangeId, removed.receivedAt());
|
||||
executionSink.accept(toMergedExecution(removed.envelope()));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/** Number of exchanges awaiting a final chunk. */
|
||||
public int getPendingCount() {
|
||||
return pending.size();
|
||||
}
|
||||
|
||||
// ---- Merge logic ----
|
||||
|
||||
/**
|
||||
* COALESCE merge: for each field, prefer the newer value if non-null, else keep older.
|
||||
* The newer chunk (higher chunkSeq) takes precedence for status, endTime, durationMs.
|
||||
*/
|
||||
private static ExecutionChunk mergeEnvelopes(ExecutionChunk older, ExecutionChunk newer) {
|
||||
ExecutionChunk merged = new ExecutionChunk();
|
||||
merged.setExchangeId(coalesce(newer.getExchangeId(), older.getExchangeId()));
|
||||
merged.setApplicationName(coalesce(newer.getApplicationName(), older.getApplicationName()));
|
||||
merged.setAgentId(coalesce(newer.getAgentId(), older.getAgentId()));
|
||||
merged.setRouteId(coalesce(newer.getRouteId(), older.getRouteId()));
|
||||
merged.setCorrelationId(coalesce(newer.getCorrelationId(), older.getCorrelationId()));
|
||||
merged.setStatus(coalesce(newer.getStatus(), older.getStatus()));
|
||||
merged.setStartTime(coalesce(older.getStartTime(), newer.getStartTime())); // prefer earliest startTime
|
||||
merged.setEndTime(coalesce(newer.getEndTime(), older.getEndTime()));
|
||||
merged.setDurationMs(coalesce(newer.getDurationMs(), older.getDurationMs()));
|
||||
merged.setEngineLevel(coalesce(newer.getEngineLevel(), older.getEngineLevel()));
|
||||
merged.setErrorMessage(coalesce(newer.getErrorMessage(), older.getErrorMessage()));
|
||||
merged.setErrorStackTrace(coalesce(newer.getErrorStackTrace(), older.getErrorStackTrace()));
|
||||
merged.setErrorType(coalesce(newer.getErrorType(), older.getErrorType()));
|
||||
merged.setErrorCategory(coalesce(newer.getErrorCategory(), older.getErrorCategory()));
|
||||
merged.setRootCauseType(coalesce(newer.getRootCauseType(), older.getRootCauseType()));
|
||||
merged.setRootCauseMessage(coalesce(newer.getRootCauseMessage(), older.getRootCauseMessage()));
|
||||
merged.setAttributes(coalesce(newer.getAttributes(), older.getAttributes()));
|
||||
merged.setTraceId(coalesce(newer.getTraceId(), older.getTraceId()));
|
||||
merged.setSpanId(coalesce(newer.getSpanId(), older.getSpanId()));
|
||||
merged.setOriginalExchangeId(coalesce(newer.getOriginalExchangeId(), older.getOriginalExchangeId()));
|
||||
merged.setReplayExchangeId(coalesce(newer.getReplayExchangeId(), older.getReplayExchangeId()));
|
||||
merged.setChunkSeq(Math.max(newer.getChunkSeq(), older.getChunkSeq()));
|
||||
merged.setFinal(newer.isFinal() || older.isFinal());
|
||||
merged.setProcessors(List.of()); // processors are handled separately
|
||||
return merged;
|
||||
}
|
||||
|
||||
private static <T> T coalesce(T a, T b) {
|
||||
return a != null ? a : b;
|
||||
}
|
||||
|
||||
// ---- Conversion to MergedExecution ----
|
||||
|
||||
private MergedExecution toMergedExecution(ExecutionChunk envelope) {
|
||||
String diagramHash = "";
|
||||
try {
|
||||
diagramHash = diagramStore
|
||||
.findContentHashForRoute(envelope.getRouteId(), envelope.getAgentId())
|
||||
.orElse("");
|
||||
} catch (Exception e) {
|
||||
log.debug("Could not resolve diagram hash for route={}", envelope.getRouteId());
|
||||
}
|
||||
return new MergedExecution(
|
||||
DEFAULT_TENANT,
|
||||
1L,
|
||||
envelope.getExchangeId(),
|
||||
envelope.getRouteId(),
|
||||
envelope.getAgentId(),
|
||||
envelope.getApplicationName(),
|
||||
envelope.getStatus() != null ? envelope.getStatus().name() : "RUNNING",
|
||||
envelope.getCorrelationId(),
|
||||
envelope.getExchangeId(),
|
||||
envelope.getStartTime(),
|
||||
envelope.getEndTime(),
|
||||
envelope.getDurationMs(),
|
||||
envelope.getErrorMessage(),
|
||||
envelope.getErrorStackTrace(),
|
||||
envelope.getErrorType(),
|
||||
envelope.getErrorCategory(),
|
||||
envelope.getRootCauseType(),
|
||||
envelope.getRootCauseMessage(),
|
||||
diagramHash,
|
||||
envelope.getEngineLevel(),
|
||||
"", // inputBody — on processor records now
|
||||
"", // outputBody
|
||||
"", // inputHeaders
|
||||
"", // outputHeaders
|
||||
serializeAttributes(envelope.getAttributes()),
|
||||
envelope.getTraceId(),
|
||||
envelope.getSpanId(),
|
||||
false, // hasTraceData — not tracked at envelope level
|
||||
envelope.getReplayExchangeId() != null, // isReplay
|
||||
envelope.getOriginalExchangeId(),
|
||||
envelope.getReplayExchangeId()
|
||||
);
|
||||
}
|
||||
|
||||
private static String serializeAttributes(Map<String, String> attributes) {
|
||||
if (attributes == null || attributes.isEmpty()) {
|
||||
return "{}";
|
||||
}
|
||||
try {
|
||||
return MAPPER.writeValueAsString(attributes);
|
||||
} catch (JsonProcessingException e) {
|
||||
log.warn("Failed to serialize attributes, falling back to empty object", e);
|
||||
return "{}";
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Inner types ----
|
||||
|
||||
/**
|
||||
* A batch of processor records from a single chunk, ready for ClickHouse insertion.
|
||||
*/
|
||||
public record ProcessorBatch(
|
||||
String tenantId,
|
||||
String executionId,
|
||||
String routeId,
|
||||
String applicationName,
|
||||
Instant execStartTime,
|
||||
List<FlatProcessorRecord> processors
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Envelope buffered while waiting for the final chunk.
|
||||
*/
|
||||
private record PendingExchange(ExecutionChunk envelope, Instant receivedAt) {}
|
||||
}
|
||||
@@ -132,7 +132,6 @@ public class IngestionService {
|
||||
if (processors == null) return false;
|
||||
for (ProcessorExecution p : processors) {
|
||||
if (p.getInputBody() != null || p.getOutputBody() != null) return true;
|
||||
if (hasAnyTraceData(p.getChildren())) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@@ -155,20 +154,14 @@ public class IngestionService {
|
||||
truncateBody(p.getInputBody()), truncateBody(p.getOutputBody()),
|
||||
toJson(p.getInputHeaders()), toJson(p.getOutputHeaders()),
|
||||
toJson(p.getAttributes()),
|
||||
p.getLoopIndex(), p.getLoopSize(),
|
||||
p.getSplitIndex(), p.getSplitSize(),
|
||||
p.getMulticastIndex(),
|
||||
null, null, null, null, null,
|
||||
p.getResolvedEndpointUri(),
|
||||
p.getErrorType(), p.getErrorCategory(),
|
||||
p.getRootCauseType(), p.getRootCauseMessage(),
|
||||
p.getErrorHandlerType(), p.getCircuitBreakerState(),
|
||||
p.getFallbackTriggered()
|
||||
p.getFallbackTriggered(),
|
||||
null, null, null, null, null, null
|
||||
));
|
||||
if (p.getChildren() != null) {
|
||||
flat.addAll(flattenProcessors(
|
||||
p.getChildren(), executionId, execStartTime,
|
||||
applicationName, routeId, p.getProcessorId(), depth + 1));
|
||||
}
|
||||
}
|
||||
return flat;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
package com.cameleer3.server.core.ingestion;
|
||||
|
||||
import java.time.Instant;
|
||||
|
||||
/**
|
||||
* A merged execution envelope ready for ClickHouse insertion.
|
||||
* Produced by ChunkAccumulator after receiving the final chunk.
|
||||
*/
|
||||
public record MergedExecution(
|
||||
String tenantId,
|
||||
long version,
|
||||
String executionId,
|
||||
String routeId,
|
||||
String agentId,
|
||||
String applicationName,
|
||||
String status,
|
||||
String correlationId,
|
||||
String exchangeId,
|
||||
Instant startTime,
|
||||
Instant endTime,
|
||||
Long durationMs,
|
||||
String errorMessage,
|
||||
String errorStacktrace,
|
||||
String errorType,
|
||||
String errorCategory,
|
||||
String rootCauseType,
|
||||
String rootCauseMessage,
|
||||
String diagramContentHash,
|
||||
String engineLevel,
|
||||
String inputBody,
|
||||
String outputBody,
|
||||
String inputHeaders,
|
||||
String outputHeaders,
|
||||
String attributes,
|
||||
String traceId,
|
||||
String spanId,
|
||||
boolean hasTraceData,
|
||||
boolean isReplay,
|
||||
String originalExchangeId,
|
||||
String replayExchangeId
|
||||
) {}
|
||||
@@ -49,6 +49,17 @@ public interface ExecutionStore {
|
||||
String errorType, String errorCategory,
|
||||
String rootCauseType, String rootCauseMessage,
|
||||
String errorHandlerType, String circuitBreakerState,
|
||||
Boolean fallbackTriggered
|
||||
Boolean fallbackTriggered,
|
||||
// New fields for ClickHouse seq-based model
|
||||
Integer seq,
|
||||
Integer parentSeq,
|
||||
Integer iteration,
|
||||
Integer iterationSize,
|
||||
Boolean filterMatched,
|
||||
Boolean duplicateMessage
|
||||
) {}
|
||||
|
||||
default Optional<ProcessorRecord> findProcessorBySeq(String executionId, int seq) {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
package com.cameleer3.server.core.storage;
|
||||
|
||||
import java.time.Instant;
|
||||
|
||||
public record LogEntryResult(String timestamp, String level, String loggerName,
|
||||
String message, String threadName, String stackTrace) {}
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.cameleer3.server.core.storage;
|
||||
|
||||
import com.cameleer3.common.model.LogEntry;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
|
||||
public interface LogIndex {
|
||||
|
||||
List<LogEntryResult> search(String application, String agentId, String level,
|
||||
String query, String exchangeId,
|
||||
Instant from, Instant to, int limit);
|
||||
|
||||
void indexBatch(String agentId, String application, List<LogEntry> entries);
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
package com.cameleer3.server.core.storage;
|
||||
|
||||
import com.cameleer3.server.core.storage.model.MetricTimeSeries;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public interface MetricsQueryStore {
|
||||
|
||||
Map<String, List<MetricTimeSeries.Bucket>> queryTimeSeries(
|
||||
String agentId, List<String> metricNames,
|
||||
Instant from, Instant to, int buckets);
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package com.cameleer3.server.core.storage.model;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
|
||||
public record MetricTimeSeries(String metricName, List<Bucket> buckets) {
|
||||
|
||||
public record Bucket(Instant time, double value) {}
|
||||
}
|
||||
@@ -29,7 +29,8 @@ class TreeReconstructionTest {
|
||||
status, NOW, NOW, 10L,
|
||||
null, null, null, null, null, null, null,
|
||||
null, null, null, null, null,
|
||||
null, null, null, null, null, null, null, null
|
||||
null, null, null, null, null, null, null, null,
|
||||
null, null, null, null, null, null
|
||||
);
|
||||
}
|
||||
|
||||
@@ -108,4 +109,100 @@ class TreeReconstructionTest {
|
||||
List<ProcessorNode> roots = detailService.buildTree(List.of());
|
||||
assertThat(roots).isEmpty();
|
||||
}
|
||||
|
||||
// --- seq-based model tests (ClickHouse) ---
|
||||
|
||||
private ProcessorRecord procWithSeq(String id, String type, String status,
|
||||
int seq, Integer parentSeq,
|
||||
Integer iteration, Integer iterationSize) {
|
||||
return new ProcessorRecord(
|
||||
"exec-1", id, type, "app", "route1",
|
||||
0, null, status, NOW, NOW, 10L,
|
||||
null, null, null, null, null, null, null,
|
||||
null, null, null, null, null,
|
||||
null, null, null, null, null, null, null, null,
|
||||
seq, parentSeq, iteration, iterationSize, null, null
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
void buildTree_seqBasedModel_linearChain() {
|
||||
List<ProcessorRecord> processors = List.of(
|
||||
procWithSeq("from", "from", "COMPLETED", 1, null, null, null),
|
||||
procWithSeq("log1", "log", "COMPLETED", 2, 1, null, null),
|
||||
procWithSeq("to1", "to", "COMPLETED", 3, 2, null, null)
|
||||
);
|
||||
|
||||
List<ProcessorNode> roots = detailService.buildTree(processors);
|
||||
|
||||
assertThat(roots).hasSize(1);
|
||||
ProcessorNode root = roots.get(0);
|
||||
assertThat(root.getProcessorId()).isEqualTo("from");
|
||||
assertThat(root.getChildren()).hasSize(1);
|
||||
|
||||
ProcessorNode child = root.getChildren().get(0);
|
||||
assertThat(child.getProcessorId()).isEqualTo("log1");
|
||||
assertThat(child.getChildren()).hasSize(1);
|
||||
|
||||
ProcessorNode grandchild = child.getChildren().get(0);
|
||||
assertThat(grandchild.getProcessorId()).isEqualTo("to1");
|
||||
assertThat(grandchild.getChildren()).isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void buildTree_seqBasedModel_sameProcessorIdMultipleIterations() {
|
||||
// A split processor (seq 1) with 3 child processors all having the SAME
|
||||
// processorId but different seq values — this is the key scenario that
|
||||
// breaks the old processorId-based approach.
|
||||
List<ProcessorRecord> processors = List.of(
|
||||
procWithSeq("split1", "split", "COMPLETED", 1, null, null, null),
|
||||
procWithSeq("log-inside", "log", "COMPLETED", 2, 1, 0, 3),
|
||||
procWithSeq("log-inside", "log", "COMPLETED", 3, 1, 1, 3),
|
||||
procWithSeq("log-inside", "log", "COMPLETED", 4, 1, 2, 3)
|
||||
);
|
||||
|
||||
List<ProcessorNode> roots = detailService.buildTree(processors);
|
||||
|
||||
assertThat(roots).hasSize(1);
|
||||
ProcessorNode split = roots.get(0);
|
||||
assertThat(split.getProcessorId()).isEqualTo("split1");
|
||||
assertThat(split.getChildren()).hasSize(3);
|
||||
|
||||
// All three children should have the same processorId
|
||||
for (ProcessorNode child : split.getChildren()) {
|
||||
assertThat(child.getProcessorId()).isEqualTo("log-inside");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void buildTree_seqBasedModel_orphanSafety() {
|
||||
// A processor whose parentSeq points to a non-existent seq
|
||||
List<ProcessorRecord> processors = List.of(
|
||||
procWithSeq("root", "from", "COMPLETED", 1, null, null, null),
|
||||
procWithSeq("orphan", "log", "COMPLETED", 2, 999, null, null)
|
||||
);
|
||||
|
||||
List<ProcessorNode> roots = detailService.buildTree(processors);
|
||||
|
||||
// Both should be roots — the orphan falls through to root list
|
||||
assertThat(roots).hasSize(2);
|
||||
assertThat(roots.get(0).getProcessorId()).isEqualTo("root");
|
||||
assertThat(roots.get(1).getProcessorId()).isEqualTo("orphan");
|
||||
}
|
||||
|
||||
@Test
|
||||
void buildTree_seqBasedModel_iterationFields() {
|
||||
// Verify iteration/iterationSize are populated as loopIndex/loopSize
|
||||
List<ProcessorRecord> processors = List.of(
|
||||
procWithSeq("loop1", "loop", "COMPLETED", 1, null, null, null),
|
||||
procWithSeq("body", "log", "COMPLETED", 2, 1, 5, 10)
|
||||
);
|
||||
|
||||
List<ProcessorNode> roots = detailService.buildTree(processors);
|
||||
|
||||
assertThat(roots).hasSize(1);
|
||||
ProcessorNode child = roots.get(0).getChildren().get(0);
|
||||
assertThat(child.getLoopIndex()).isEqualTo(5);
|
||||
assertThat(child.getLoopSize()).isEqualTo(10);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,225 @@
|
||||
package com.cameleer3.server.core.ingestion;
|
||||
|
||||
import com.cameleer3.common.model.ExecutionChunk;
|
||||
import com.cameleer3.common.model.ExecutionStatus;
|
||||
import com.cameleer3.common.model.FlatProcessorRecord;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CopyOnWriteArrayList;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
class ChunkAccumulatorTest {
|
||||
|
||||
private CopyOnWriteArrayList<MergedExecution> executionSink;
|
||||
private CopyOnWriteArrayList<ChunkAccumulator.ProcessorBatch> processorSink;
|
||||
private ChunkAccumulator accumulator;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
executionSink = new CopyOnWriteArrayList<>();
|
||||
processorSink = new CopyOnWriteArrayList<>();
|
||||
accumulator = new ChunkAccumulator(
|
||||
executionSink::add, processorSink::add, Duration.ofMinutes(5));
|
||||
}
|
||||
|
||||
@Test
|
||||
void singleFinalChunk_producesExecutionAndProcessors() {
|
||||
ExecutionChunk chunk = chunk("ex-1", "COMPLETED",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
Instant.parse("2026-03-31T10:00:01Z"), 1000L,
|
||||
0, true,
|
||||
List.of(proc(1, null, "log1", "log", "COMPLETED", 5L)));
|
||||
chunk.setCorrelationId("corr-1");
|
||||
chunk.setAttributes(Map.of("orderId", "ORD-1"));
|
||||
chunk.setTraceId("trace-1");
|
||||
chunk.setSpanId("span-1");
|
||||
|
||||
accumulator.onChunk(chunk);
|
||||
|
||||
// Processor sink should receive 1 batch with 1 record
|
||||
assertThat(processorSink).hasSize(1);
|
||||
ChunkAccumulator.ProcessorBatch batch = processorSink.get(0);
|
||||
assertThat(batch.tenantId()).isEqualTo("default");
|
||||
assertThat(batch.executionId()).isEqualTo("ex-1");
|
||||
assertThat(batch.routeId()).isEqualTo("route-1");
|
||||
assertThat(batch.applicationName()).isEqualTo("order-service");
|
||||
assertThat(batch.execStartTime()).isEqualTo(Instant.parse("2026-03-31T10:00:00Z"));
|
||||
assertThat(batch.processors()).hasSize(1);
|
||||
|
||||
// Execution sink should receive 1 merged execution
|
||||
assertThat(executionSink).hasSize(1);
|
||||
MergedExecution exec = executionSink.get(0);
|
||||
assertThat(exec.tenantId()).isEqualTo("default");
|
||||
assertThat(exec.version()).isEqualTo(1L);
|
||||
assertThat(exec.executionId()).isEqualTo("ex-1");
|
||||
assertThat(exec.routeId()).isEqualTo("route-1");
|
||||
assertThat(exec.status()).isEqualTo("COMPLETED");
|
||||
assertThat(exec.durationMs()).isEqualTo(1000L);
|
||||
assertThat(exec.traceId()).isEqualTo("trace-1");
|
||||
assertThat(exec.spanId()).isEqualTo("span-1");
|
||||
assertThat(exec.attributes()).contains("orderId");
|
||||
}
|
||||
|
||||
@Test
|
||||
void multipleChunks_mergesEnvelope_insertsProcessorsImmediately() {
|
||||
ExecutionChunk chunk0 = chunk("ex-2", "RUNNING",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
null, null,
|
||||
0, false,
|
||||
List.of(
|
||||
proc(1, null, "log1", "log", "COMPLETED", 5L),
|
||||
proc(2, null, "log2", "log", "COMPLETED", 3L)));
|
||||
chunk0.setCorrelationId("ex-2");
|
||||
|
||||
accumulator.onChunk(chunk0);
|
||||
|
||||
// Processors pushed immediately on chunk 0
|
||||
assertThat(processorSink).hasSize(1);
|
||||
assertThat(processorSink.get(0).processors()).hasSize(2);
|
||||
|
||||
// No execution yet (not final)
|
||||
assertThat(executionSink).isEmpty();
|
||||
|
||||
ExecutionChunk chunk1 = chunk("ex-2", "COMPLETED",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
Instant.parse("2026-03-31T10:00:02Z"), 2000L,
|
||||
1, true,
|
||||
List.of(proc(3, null, "log3", "log", "COMPLETED", 7L)));
|
||||
chunk1.setCorrelationId("ex-2");
|
||||
|
||||
accumulator.onChunk(chunk1);
|
||||
|
||||
// Processors from chunk 1 also pushed
|
||||
assertThat(processorSink).hasSize(2);
|
||||
assertThat(processorSink.get(1).processors()).hasSize(1);
|
||||
|
||||
// Now execution is emitted
|
||||
assertThat(executionSink).hasSize(1);
|
||||
MergedExecution exec = executionSink.get(0);
|
||||
assertThat(exec.status()).isEqualTo("COMPLETED");
|
||||
assertThat(exec.durationMs()).isEqualTo(2000L);
|
||||
}
|
||||
|
||||
@Test
|
||||
void staleExchange_flushedBySweep() throws Exception {
|
||||
ChunkAccumulator staleAccumulator = new ChunkAccumulator(
|
||||
executionSink::add, processorSink::add, Duration.ofMillis(1));
|
||||
|
||||
ExecutionChunk c = chunk("ex-3", "RUNNING",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
null, null,
|
||||
0, false,
|
||||
List.of());
|
||||
c.setCorrelationId("ex-3");
|
||||
|
||||
staleAccumulator.onChunk(c);
|
||||
assertThat(executionSink).isEmpty();
|
||||
|
||||
Thread.sleep(5);
|
||||
staleAccumulator.sweepStale();
|
||||
|
||||
assertThat(executionSink).hasSize(1);
|
||||
MergedExecution exec = executionSink.get(0);
|
||||
assertThat(exec.status()).isEqualTo("RUNNING");
|
||||
assertThat(exec.executionId()).isEqualTo("ex-3");
|
||||
}
|
||||
|
||||
@Test
|
||||
void finalChunkWithErrors_populatesErrorFields() {
|
||||
ExecutionChunk c = chunk("ex-4", "FAILED",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
Instant.parse("2026-03-31T10:00:01Z"), 1000L,
|
||||
0, true,
|
||||
List.of());
|
||||
c.setCorrelationId("ex-4");
|
||||
c.setErrorMessage("NullPointerException");
|
||||
c.setErrorStackTrace("at com.foo.Bar.baz(Bar.java:42)");
|
||||
c.setErrorType("NullPointerException");
|
||||
c.setErrorCategory("RUNTIME");
|
||||
c.setRootCauseType("NullPointerException");
|
||||
c.setRootCauseMessage("null value at index 0");
|
||||
|
||||
accumulator.onChunk(c);
|
||||
|
||||
assertThat(executionSink).hasSize(1);
|
||||
MergedExecution exec = executionSink.get(0);
|
||||
assertThat(exec.status()).isEqualTo("FAILED");
|
||||
assertThat(exec.errorMessage()).isEqualTo("NullPointerException");
|
||||
assertThat(exec.errorStacktrace()).isEqualTo("at com.foo.Bar.baz(Bar.java:42)");
|
||||
assertThat(exec.errorType()).isEqualTo("NullPointerException");
|
||||
assertThat(exec.errorCategory()).isEqualTo("RUNTIME");
|
||||
assertThat(exec.rootCauseType()).isEqualTo("NullPointerException");
|
||||
assertThat(exec.rootCauseMessage()).isEqualTo("null value at index 0");
|
||||
}
|
||||
|
||||
@Test
|
||||
void getPendingCount_tracksBufferedExchanges() {
|
||||
ExecutionChunk running1 = chunk("ex-5", "RUNNING",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
null, null,
|
||||
0, false,
|
||||
List.of());
|
||||
running1.setCorrelationId("ex-5");
|
||||
|
||||
ExecutionChunk running2 = chunk("ex-6", "RUNNING",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
null, null,
|
||||
0, false,
|
||||
List.of());
|
||||
running2.setCorrelationId("ex-6");
|
||||
running2.setRouteId("route-2");
|
||||
|
||||
accumulator.onChunk(running1);
|
||||
accumulator.onChunk(running2);
|
||||
assertThat(accumulator.getPendingCount()).isEqualTo(2);
|
||||
|
||||
// Send final for ex-5
|
||||
ExecutionChunk final5 = chunk("ex-5", "COMPLETED",
|
||||
Instant.parse("2026-03-31T10:00:00Z"),
|
||||
Instant.parse("2026-03-31T10:00:01Z"), 1000L,
|
||||
1, true,
|
||||
List.of());
|
||||
final5.setCorrelationId("ex-5");
|
||||
|
||||
accumulator.onChunk(final5);
|
||||
assertThat(accumulator.getPendingCount()).isEqualTo(1);
|
||||
}
|
||||
|
||||
/** Helper to create an ExecutionChunk with common fields. */
|
||||
private static ExecutionChunk chunk(String exchangeId, String status, Instant start, Instant end, Long duration,
|
||||
int chunkSeq, boolean isFinal, List<FlatProcessorRecord> processors) {
|
||||
ExecutionChunk c = new ExecutionChunk();
|
||||
c.setExchangeId(exchangeId);
|
||||
c.setApplicationName(exchangeId.equals("ex-1") ? "order-service" : "app");
|
||||
c.setAgentId("agent-1");
|
||||
c.setRouteId("route-1");
|
||||
c.setCorrelationId(null);
|
||||
c.setStatus(ExecutionStatus.valueOf(status));
|
||||
c.setStartTime(start);
|
||||
c.setEndTime(end);
|
||||
c.setDurationMs(duration);
|
||||
c.setEngineLevel("REGULAR");
|
||||
c.setChunkSeq(chunkSeq);
|
||||
c.setFinal(isFinal);
|
||||
c.setProcessors(processors);
|
||||
return c;
|
||||
}
|
||||
|
||||
/** Helper to create a FlatProcessorRecord with minimal fields. */
|
||||
private static FlatProcessorRecord proc(int seq, Integer parentSeq,
|
||||
String processorId, String processorType,
|
||||
String status, long durationMs) {
|
||||
FlatProcessorRecord p = new FlatProcessorRecord(seq, processorId, processorType);
|
||||
p.setParentSeq(parentSeq);
|
||||
p.setStatus(ExecutionStatus.valueOf(status));
|
||||
p.setStartTime(Instant.parse("2026-03-31T10:00:00.100Z"));
|
||||
p.setDurationMs(durationMs);
|
||||
return p;
|
||||
}
|
||||
}
|
||||
@@ -75,6 +75,34 @@ spec:
|
||||
name: cameleer-auth
|
||||
key: CAMELEER_JWT_SECRET
|
||||
optional: true
|
||||
- name: CLICKHOUSE_ENABLED
|
||||
value: "true"
|
||||
- name: CLICKHOUSE_URL
|
||||
value: "jdbc:clickhouse://clickhouse.cameleer.svc.cluster.local:8123/cameleer"
|
||||
- name: CLICKHOUSE_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clickhouse-credentials
|
||||
key: CLICKHOUSE_USER
|
||||
- name: CLICKHOUSE_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clickhouse-credentials
|
||||
key: CLICKHOUSE_PASSWORD
|
||||
- name: CAMELEER_STORAGE_METRICS
|
||||
value: "clickhouse"
|
||||
- name: CAMELEER_STORAGE_SEARCH
|
||||
value: "clickhouse"
|
||||
- name: CAMELEER_STORAGE_STATS
|
||||
value: "clickhouse"
|
||||
- name: CAMELEER_STORAGE_DIAGRAMS
|
||||
value: "clickhouse"
|
||||
- name: CAMELEER_STORAGE_EVENTS
|
||||
value: "clickhouse"
|
||||
- name: CAMELEER_STORAGE_LOGS
|
||||
value: "clickhouse"
|
||||
- name: CAMELEER_STORAGE_EXECUTIONS
|
||||
value: "clickhouse"
|
||||
|
||||
resources:
|
||||
requests:
|
||||
|
||||
197
deploy/clickhouse.yaml
Normal file
197
deploy/clickhouse.yaml
Normal file
@@ -0,0 +1,197 @@
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: clickhouse
|
||||
namespace: cameleer
|
||||
spec:
|
||||
serviceName: clickhouse
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: clickhouse
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: clickhouse
|
||||
spec:
|
||||
containers:
|
||||
- name: clickhouse
|
||||
image: clickhouse/clickhouse-server:24.12
|
||||
env:
|
||||
- name: CLICKHOUSE_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clickhouse-credentials
|
||||
key: CLICKHOUSE_USER
|
||||
- name: CLICKHOUSE_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: clickhouse-credentials
|
||||
key: CLICKHOUSE_PASSWORD
|
||||
- name: CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT
|
||||
value: "1"
|
||||
ports:
|
||||
- containerPort: 8123
|
||||
name: http
|
||||
- containerPort: 9000
|
||||
name: native
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /var/lib/clickhouse
|
||||
- name: initdb
|
||||
mountPath: /docker-entrypoint-initdb.d
|
||||
- name: config
|
||||
mountPath: /etc/clickhouse-server/config.d/low-memory.xml
|
||||
subPath: low-memory.xml
|
||||
readOnly: true
|
||||
- name: config
|
||||
mountPath: /etc/clickhouse-server/users.d/low-memory-users.xml
|
||||
subPath: low-memory-users.xml
|
||||
readOnly: true
|
||||
resources:
|
||||
requests:
|
||||
memory: "2Gi"
|
||||
cpu: "500m"
|
||||
limits:
|
||||
memory: "4Gi"
|
||||
cpu: "2000m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /ping
|
||||
port: 8123
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ping
|
||||
port: 8123
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
volumes:
|
||||
- name: initdb
|
||||
configMap:
|
||||
name: clickhouse-initdb
|
||||
- name: config
|
||||
configMap:
|
||||
name: clickhouse-config
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: data
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: clickhouse
|
||||
namespace: cameleer
|
||||
spec:
|
||||
clusterIP: None
|
||||
selector:
|
||||
app: clickhouse
|
||||
ports:
|
||||
- port: 8123
|
||||
targetPort: 8123
|
||||
name: http
|
||||
- port: 9000
|
||||
targetPort: 9000
|
||||
name: native
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: clickhouse-config
|
||||
namespace: cameleer
|
||||
data:
|
||||
# Server-level tuning for 4Gi container.
|
||||
# Based on https://kb.altinity.com/altinity-kb-setup-and-maintenance/configure_clickhouse_for_low_mem_envs/
|
||||
low-memory.xml: |
|
||||
<clickhouse>
|
||||
<!-- Let OS page out binary segments instead of pinning them in RAM -->
|
||||
<mlock_executable>false</mlock_executable>
|
||||
|
||||
<!-- Disable unused protocol ports -->
|
||||
<mysql_port remove="1" />
|
||||
<postgresql_port remove="1" />
|
||||
|
||||
<!-- Disable noisy system log tables -->
|
||||
<query_thread_log remove="1" />
|
||||
<opentelemetry_span_log remove="1" />
|
||||
<processors_profile_log remove="1" />
|
||||
|
||||
<!-- Reserve 25% of cgroup memory for OS/kernel (default 0.9 is too aggressive) -->
|
||||
<max_server_memory_usage_to_ram_ratio>0.75</max_server_memory_usage_to_ram_ratio>
|
||||
<max_server_memory_usage>0</max_server_memory_usage>
|
||||
|
||||
<!-- Shrink caches — defaults are sized for 64+ GiB servers -->
|
||||
<mark_cache_size>268435456</mark_cache_size> <!-- 256 MiB -->
|
||||
<index_mark_cache_size>67108864</index_mark_cache_size> <!-- 64 MiB -->
|
||||
<uncompressed_cache_size>16777216</uncompressed_cache_size> <!-- 16 MiB -->
|
||||
<compiled_expression_cache_size>67108864</compiled_expression_cache_size> <!-- 64 MiB -->
|
||||
|
||||
<!-- Connection & concurrency limits -->
|
||||
<max_thread_pool_size>2000</max_thread_pool_size>
|
||||
<max_connections>64</max_connections>
|
||||
<max_concurrent_queries>8</max_concurrent_queries>
|
||||
|
||||
<!-- Background merge pools — keep low to avoid memory spikes -->
|
||||
<background_pool_size>2</background_pool_size>
|
||||
<background_merges_mutations_concurrency_ratio>2</background_merges_mutations_concurrency_ratio>
|
||||
<background_buffer_flush_schedule_pool_size>1</background_buffer_flush_schedule_pool_size>
|
||||
<background_move_pool_size>1</background_move_pool_size>
|
||||
<background_fetches_pool_size>1</background_fetches_pool_size>
|
||||
<background_common_pool_size>2</background_common_pool_size>
|
||||
<background_schedule_pool_size>8</background_schedule_pool_size>
|
||||
<background_message_broker_schedule_pool_size>1</background_message_broker_schedule_pool_size>
|
||||
<background_distributed_schedule_pool_size>1</background_distributed_schedule_pool_size>
|
||||
<tables_loader_foreground_pool_size>0</tables_loader_foreground_pool_size>
|
||||
<tables_loader_background_pool_size>0</tables_loader_background_pool_size>
|
||||
|
||||
<!-- MergeTree: limit merge memory and allow more concurrent smaller merges -->
|
||||
<merge_tree>
|
||||
<merge_max_block_size>1024</merge_max_block_size>
|
||||
<max_bytes_to_merge_at_max_space_in_pool>1073741824</max_bytes_to_merge_at_max_space_in_pool> <!-- 1 GiB -->
|
||||
<min_bytes_for_wide_part>134217728</min_bytes_for_wide_part>
|
||||
<vertical_merge_algorithm_min_bytes_to_activate>134217728</vertical_merge_algorithm_min_bytes_to_activate>
|
||||
<number_of_free_entries_in_pool_to_lower_max_size_of_merge>2</number_of_free_entries_in_pool_to_lower_max_size_of_merge>
|
||||
<number_of_free_entries_in_pool_to_execute_mutation>2</number_of_free_entries_in_pool_to_execute_mutation>
|
||||
<number_of_free_entries_in_pool_to_execute_optimize_entire_partition>2</number_of_free_entries_in_pool_to_execute_optimize_entire_partition>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
||||
# User profile settings — per-query limits and parallelism.
|
||||
low-memory-users.xml: |
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<!-- Limit per-query thread count (default = nproc) -->
|
||||
<max_threads>2</max_threads>
|
||||
<!-- Cap per-query memory at 1.5 GiB -->
|
||||
<max_memory_usage>1610612736</max_memory_usage>
|
||||
<max_block_size>8192</max_block_size>
|
||||
<queue_max_wait_ms>1000</queue_max_wait_ms>
|
||||
<max_execution_time>600</max_execution_time>
|
||||
<!-- Disable parallel parse/format to reduce per-query memory -->
|
||||
<input_format_parallel_parsing>0</input_format_parallel_parsing>
|
||||
<output_format_parallel_formatting>0</output_format_parallel_formatting>
|
||||
<!-- Spill to disk instead of OOM for large aggregations/sorts -->
|
||||
<max_bytes_before_external_group_by>1073741824</max_bytes_before_external_group_by>
|
||||
<max_bytes_before_external_sort>1073741824</max_bytes_before_external_sort>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: clickhouse-initdb
|
||||
namespace: cameleer
|
||||
data:
|
||||
01-create-database.sql: |
|
||||
CREATE DATABASE IF NOT EXISTS cameleer;
|
||||
1193
docs/superpowers/plans/2026-03-31-clickhouse-phase1-foundation.md
Normal file
1193
docs/superpowers/plans/2026-03-31-clickhouse-phase1-foundation.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,146 @@
|
||||
# Append-Only Execution Data Protocol
|
||||
|
||||
A reference document for redesigning the Cameleer agent's data reporting to be append-only,
|
||||
eliminating the need for upserts in the storage layer.
|
||||
|
||||
## Problem
|
||||
|
||||
The current protocol sends execution data in two phases:
|
||||
|
||||
1. **RUNNING phase**: Agent sends a partial record when a route starts executing (execution_id, route_id, start_time, status=RUNNING). No bodies, no duration, no error info.
|
||||
2. **COMPLETED/FAILED phase**: Agent sends an enriched record when execution finishes (duration, output body, headers, errors, processor tree).
|
||||
|
||||
The server uses `INSERT ... ON CONFLICT DO UPDATE SET COALESCE(...)` to merge these into a single row. This works in PostgreSQL but creates problems for append-only stores like ClickHouse, Kafka topics, or any event-sourced architecture.
|
||||
|
||||
### Why This Matters
|
||||
|
||||
- **ClickHouse**: No native upsert. Must use ReplacingMergeTree (eventual consistency, FINAL overhead) or application-side buffering.
|
||||
- **Event streaming**: Kafka/Pulsar topics are append-only. Two-phase lifecycle requires a stateful stream processor to merge.
|
||||
- **Data lakes**: Parquet files are immutable. Updates require read-modify-write of entire files.
|
||||
- **Materialized views**: Insert-triggered aggregations (ClickHouse MVs, Kafka Streams, Flink) double-count if they see both RUNNING and COMPLETED inserts for the same execution.
|
||||
|
||||
## Proposed Protocol Change
|
||||
|
||||
### Option A: Single-Phase Reporting (Recommended)
|
||||
|
||||
The agent buffers the execution locally and sends a **single, complete record** only when the execution reaches a terminal state (COMPLETED or FAILED).
|
||||
|
||||
```
|
||||
Current: Agent -> [RUNNING] -> Server -> [COMPLETED] -> Server (upsert)
|
||||
Proposed: Agent -> [buffer locally] -> [COMPLETED with all fields] -> Server (append)
|
||||
```
|
||||
|
||||
**What changes in the agent:**
|
||||
- `RouteExecutionTracker` holds in-flight executions in a local `ConcurrentHashMap`
|
||||
- On route start: create tracker entry with start_time, route_id, etc.
|
||||
- On route complete: enrich tracker entry with duration, bodies, errors, processor tree
|
||||
- On report: send the complete record in one HTTP POST
|
||||
- On timeout (configurable, e.g., 5 minutes): flush as RUNNING (for visibility of stuck routes)
|
||||
|
||||
**What changes in the server:**
|
||||
- Storage becomes pure append: `INSERT INTO executions VALUES (...)` — no upsert, no COALESCE
|
||||
- No `SearchIndexer` / `ExecutionAccumulator` needed — the server just writes what it receives
|
||||
- Materialized views count correctly (one insert = one execution)
|
||||
- Works with any append-only store (ClickHouse, Kafka, S3/Parquet)
|
||||
|
||||
**Trade-offs:**
|
||||
- RUNNING executions are not visible on the server until they complete (or timeout-flush)
|
||||
- "Active execution count" must come from agent heartbeat/registry data, not from stored RUNNING rows
|
||||
- If the agent crashes, in-flight executions are lost (same as current behavior — RUNNING rows become orphans anyway)
|
||||
|
||||
### Option B: Event Log with Reconstruction
|
||||
|
||||
Send both phases as separate **events** (not records), and let the server reconstruct the current state.
|
||||
|
||||
```
|
||||
Event 1: {type: "EXECUTION_STARTED", executionId: "abc", startTime: ..., routeId: ...}
|
||||
Event 2: {type: "EXECUTION_COMPLETED", executionId: "abc", duration: 250, outputBody: ..., processors: [...]}
|
||||
```
|
||||
|
||||
**Server-side:**
|
||||
- Store raw events in an append-only log table
|
||||
- Reconstruct current state via `SELECT argMax(field, event_time) FROM events WHERE execution_id = ? GROUP BY execution_id`
|
||||
- Or: use a materialized view with `AggregatingMergeTree` + `argMaxState` to maintain a "latest state" table
|
||||
|
||||
**Trade-offs:**
|
||||
- More complex server-side reconstruction
|
||||
- Higher storage (two rows per execution instead of one)
|
||||
- More flexible: supports any number of state transitions (RUNNING -> PAUSED -> RUNNING -> COMPLETED)
|
||||
- Natural fit for event sourcing architectures
|
||||
|
||||
### Option C: Hybrid (Current Cameleer3-Server Approach)
|
||||
|
||||
Keep the two-phase protocol but handle merging at the server application layer. This is what cameleer3-server implements today with the `ExecutionAccumulator`:
|
||||
|
||||
- RUNNING POST -> hold in `ConcurrentHashMap` (no DB write)
|
||||
- COMPLETED POST -> merge with RUNNING in-memory -> single INSERT to DB
|
||||
- Timeout sweep -> flush stale RUNNING entries for visibility
|
||||
|
||||
**Trade-offs:**
|
||||
- No agent changes required
|
||||
- Server must be stateful (in-memory accumulator)
|
||||
- Crash window: active executions lost if server restarts
|
||||
- Adds complexity to the server that wouldn't exist with Option A
|
||||
|
||||
## Recommendation
|
||||
|
||||
**Option A (single-phase reporting)** is the strongest choice for a new protocol version:
|
||||
|
||||
1. **Simplest server implementation**: Pure append, no state, no merging
|
||||
2. **Works everywhere**: ClickHouse, Kafka, S3, any append-only store
|
||||
3. **Correct by construction**: MVs, aggregations, and stream processing all see one event per execution
|
||||
4. **Agent is the natural place to buffer**: The agent already tracks in-flight executions for instrumentation — it just needs to hold the report until completion
|
||||
5. **Minimal data loss risk**: Agent crash loses in-flight data regardless of protocol — this doesn't make it worse
|
||||
|
||||
### Migration Strategy
|
||||
|
||||
1. Add `protocol_version` field to agent registration
|
||||
2. v1 agents: server uses `ExecutionAccumulator` (current behavior)
|
||||
3. v2 agents: server does pure append (no accumulator needed for v2 data)
|
||||
4. Both can coexist — the server checks protocol version per agent
|
||||
|
||||
### Fields for Single-Phase Record
|
||||
|
||||
The complete record sent by a v2 agent:
|
||||
|
||||
```json
|
||||
{
|
||||
"executionId": "uuid",
|
||||
"routeId": "myRoute",
|
||||
"agentId": "agent-1",
|
||||
"applicationName": "my-app",
|
||||
"correlationId": "corr-123",
|
||||
"exchangeId": "exchange-456",
|
||||
"status": "COMPLETED",
|
||||
"startTime": "2026-03-31T10:00:00.000Z",
|
||||
"endTime": "2026-03-31T10:00:00.250Z",
|
||||
"durationMs": 250,
|
||||
"errorMessage": null,
|
||||
"errorStackTrace": null,
|
||||
"errorType": null,
|
||||
"errorCategory": null,
|
||||
"rootCauseType": null,
|
||||
"rootCauseMessage": null,
|
||||
"inputSnapshot": {"body": "...", "headers": {"Content-Type": "application/json"}},
|
||||
"outputSnapshot": {"body": "...", "headers": {"Content-Type": "application/xml"}},
|
||||
"attributes": {"key": "value"},
|
||||
"traceId": "otel-trace-id",
|
||||
"spanId": "otel-span-id",
|
||||
"replayExchangeId": null,
|
||||
"processors": [
|
||||
{
|
||||
"processorId": "proc-1",
|
||||
"processorType": "to",
|
||||
"status": "COMPLETED",
|
||||
"startTime": "...",
|
||||
"endTime": "...",
|
||||
"durationMs": 120,
|
||||
"inputBody": "...",
|
||||
"outputBody": "...",
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
All fields populated. No second POST needed. Server does a single INSERT.
|
||||
916
docs/superpowers/specs/2026-03-31-clickhouse-migration-design.md
Normal file
916
docs/superpowers/specs/2026-03-31-clickhouse-migration-design.md
Normal file
@@ -0,0 +1,916 @@
|
||||
# ClickHouse Migration Design
|
||||
|
||||
Replace PostgreSQL/TimescaleDB + OpenSearch with ClickHouse OSS for all observability data.
|
||||
PostgreSQL retained only for RBAC, config, and audit log.
|
||||
|
||||
## Context
|
||||
|
||||
Cameleer3-server currently uses three storage systems:
|
||||
|
||||
- **PostgreSQL/TimescaleDB**: executions, processor_executions, agent_metrics (hypertables), agent_events, route_diagrams, plus RBAC/config/audit tables. Continuous aggregates for dashboard statistics.
|
||||
- **OpenSearch**: executions-YYYY-MM-DD indices (full-text search on bodies/headers/errors), logs-YYYY-MM-DD indices (application log storage with 7-day retention).
|
||||
- **Dual-write pattern**: PG is source of truth, OpenSearch is async-indexed via debounced `SearchIndexer`.
|
||||
|
||||
This architecture has scaling limits: three systems to operate, data duplication between PG and OpenSearch, TimescaleDB continuous aggregates with limited flexibility, and no multitenancy support.
|
||||
|
||||
**Goal**: Consolidate to ClickHouse OSS (self-hosted) for all observability data. Add multitenancy with custom per-tenant, per-document-type retention. Support billions of documents, terabytes of data, sub-second wildcard search.
|
||||
|
||||
## Decisions
|
||||
|
||||
| Decision | Choice | Rationale |
|
||||
|----------|--------|-----------|
|
||||
| Deployment | Self-hosted ClickHouse OSS on k3s | All needed features available in OSS. Fits existing infra. |
|
||||
| Execution lifecycle | Approach B: Application-side accumulator | Merges RUNNING+COMPLETED in-memory, writes one row. Avoids upsert problem. |
|
||||
| Table engine (executions) | ReplacingMergeTree | Handles rare late corrections via version column. Normal flow writes once. |
|
||||
| Table engine (all others) | MergeTree | Append-only data, no dedup needed. |
|
||||
| Client | JDBC + JdbcTemplate | Familiar pattern, matches current PG code. Async inserts via JDBC URL settings. |
|
||||
| Multitenancy | Shared tables + tenant_id column | Row policies for defense-in-depth. Application-layer WHERE for primary enforcement. |
|
||||
| Retention | Application-driven scheduler | Per-tenant, per-document-type. Config in PG, execution via ALTER TABLE DELETE. |
|
||||
| Search | Ngram bloom filter indexes | Sub-second wildcard search. Materialized `_search_text` column for cross-field search. |
|
||||
| Highlighting | Application-side in Java | Extract 120-char fragment around match from returned fields. |
|
||||
| Storage tiering | Local SSD only (initially) | S3/MinIO tiering can be added later via TTL MOVE rules. |
|
||||
|
||||
## ClickHouse OSS Constraints
|
||||
|
||||
These are features NOT available in the open-source version:
|
||||
|
||||
| Constraint | Impact on Cameleer3 |
|
||||
|------------|---------------------|
|
||||
| No SharedMergeTree | No elastic compute scaling; must size nodes up-front. Acceptable for self-hosted. |
|
||||
| No BM25 relevance scoring | Search returns matches without ranking. Acceptable for observability (want all matches, not ranked). |
|
||||
| No search highlighting | Replaced by application-side highlighting in Java. |
|
||||
| No fuzzy/typo-tolerant search | Must match exact tokens or use ngram index for substring match. Acceptable. |
|
||||
| No ClickPipes | Must build own ingestion pipeline. Already exists (agents push via HTTP POST). |
|
||||
| No managed backups | Must configure `clickhouse-backup` (Altinity, open-source) or built-in BACKUP SQL. |
|
||||
| No auto-scaling | Manual capacity planning. Single node handles 14+ TiB, sufficient for initial scale. |
|
||||
|
||||
General ClickHouse constraints (apply to both OSS and Cloud):
|
||||
|
||||
| Constraint | Mitigation |
|
||||
|------------|------------|
|
||||
| ORDER BY is immutable | Careful upfront schema design. Documented below. |
|
||||
| No transactions | Single-table INSERT atomic per block. No cross-table atomicity needed. |
|
||||
| Mutations are expensive | Avoid ALTER UPDATE/DELETE. Use ReplacingMergeTree for corrections, append-only for everything else. |
|
||||
| Row policies skip mutations | Application-layer WHERE on mutations. Mutations are rare (retention scheduler only). |
|
||||
| No JPA/Hibernate | Use JdbcTemplate (already the pattern for PG). |
|
||||
| JSON max_dynamic_paths | Store attributes as flattened String, not JSON type. Use ngram index for search. |
|
||||
| Text indexes can't index JSON subcolumns | Extract searchable text into materialized String columns. |
|
||||
| MVs only process new inserts | Historical data backfill writes through MV pipeline. |
|
||||
| MV errors block source inserts | Careful MV design. Test thoroughly before production. |
|
||||
| ReplacingMergeTree eventual consistency | Use FINAL on queries that need latest version. |
|
||||
|
||||
## What Stays in PostgreSQL
|
||||
|
||||
| Table | Reason |
|
||||
|-------|--------|
|
||||
| `users`, `roles`, `groups`, `user_groups`, `user_roles`, `group_roles` | RBAC with relational joins, foreign keys, transactions |
|
||||
| `server_config` | Global config, low volume, needs transactions |
|
||||
| `application_config` | Per-app observability settings |
|
||||
| `app_settings` | Per-app SLA thresholds |
|
||||
| `audit_log` | Security compliance, needs transactions, joins with RBAC tables |
|
||||
| OIDC config | Auth provider config |
|
||||
| `tenant_retention_config` (new) | Per-tenant retention settings, referenced by scheduler |
|
||||
|
||||
## What Moves to ClickHouse
|
||||
|
||||
| Data | Current Location | ClickHouse Table |
|
||||
|------|-----------------|------------------|
|
||||
| Route executions | PG `executions` hypertable + OpenSearch `executions-*` | `executions` |
|
||||
| Processor executions | PG `processor_executions` hypertable | `processor_executions` |
|
||||
| Agent metrics | PG `agent_metrics` hypertable | `agent_metrics` |
|
||||
| Agent events | PG `agent_events` | `agent_events` |
|
||||
| Route diagrams | PG `route_diagrams` | `route_diagrams` |
|
||||
| Application logs | OpenSearch `logs-*` | `logs` |
|
||||
| Dashboard statistics | PG continuous aggregates (`stats_1m_*`) | ClickHouse materialized views (`stats_1m_*`) |
|
||||
|
||||
## Table Schemas
|
||||
|
||||
### executions
|
||||
|
||||
```sql
|
||||
CREATE TABLE executions (
|
||||
tenant_id LowCardinality(String),
|
||||
execution_id String,
|
||||
start_time DateTime64(3),
|
||||
_version UInt64 DEFAULT 1,
|
||||
route_id LowCardinality(String),
|
||||
agent_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
status LowCardinality(String),
|
||||
correlation_id String DEFAULT '',
|
||||
exchange_id String DEFAULT '',
|
||||
end_time Nullable(DateTime64(3)),
|
||||
duration_ms Nullable(Int64),
|
||||
error_message String DEFAULT '',
|
||||
error_stacktrace String DEFAULT '',
|
||||
error_type LowCardinality(String) DEFAULT '',
|
||||
error_category LowCardinality(String) DEFAULT '',
|
||||
root_cause_type String DEFAULT '',
|
||||
root_cause_message String DEFAULT '',
|
||||
diagram_content_hash String DEFAULT '',
|
||||
engine_level LowCardinality(String) DEFAULT '',
|
||||
input_body String DEFAULT '',
|
||||
output_body String DEFAULT '',
|
||||
input_headers String DEFAULT '',
|
||||
output_headers String DEFAULT '',
|
||||
attributes String DEFAULT '',
|
||||
trace_id String DEFAULT '',
|
||||
span_id String DEFAULT '',
|
||||
processors_json String DEFAULT '',
|
||||
has_trace_data Bool DEFAULT false,
|
||||
is_replay Bool DEFAULT false,
|
||||
|
||||
_search_text String MATERIALIZED
|
||||
concat(error_message, ' ', error_stacktrace, ' ', attributes,
|
||||
' ', input_body, ' ', output_body, ' ', input_headers,
|
||||
' ', output_headers, ' ', root_cause_message),
|
||||
|
||||
INDEX idx_search _search_text TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_error error_message TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_bodies concat(input_body, ' ', output_body) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_headers concat(input_headers, ' ', output_headers) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_status status TYPE set(10) GRANULARITY 1,
|
||||
INDEX idx_corr correlation_id TYPE bloom_filter(0.01) GRANULARITY 4
|
||||
)
|
||||
ENGINE = ReplacingMergeTree(_version)
|
||||
PARTITION BY (tenant_id, toYYYYMM(start_time))
|
||||
ORDER BY (tenant_id, start_time, application_name, route_id, execution_id)
|
||||
TTL start_time + INTERVAL 365 DAY DELETE
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
Design rationale:
|
||||
- **ORDER BY** `(tenant_id, start_time, application_name, route_id, execution_id)`: Matches UI query pattern (tenant -> time range -> app -> route). Time before application because observability queries almost always include a time range.
|
||||
- **PARTITION BY** `(tenant_id, toYYYYMM(start_time))`: Enables per-tenant partition drops for retention. Monthly granularity balances partition count vs drop efficiency.
|
||||
- **ReplacingMergeTree(_version)**: Normal flow writes once (version 1). Late corrections write version 2+. Background merges keep latest version.
|
||||
- **`_search_text` materialized column**: Computed at insert time. Concatenates all searchable fields for cross-field wildcard search.
|
||||
- **`ngrambf_v1(3, 256, 2, 0)`**: 3-char ngrams in a 256-byte bloom filter with 2 hash functions. Prunes most granules for `LIKE '%term%'` queries. The bloom filter size (256 bytes) is a starting point — increase to 4096-8192 if false positive rates are too high for long text fields. Tune after benchmarking with real data.
|
||||
- **`LowCardinality(String)`**: Dictionary encoding for columns with few distinct values. Major compression improvement.
|
||||
- **TTL 365 days**: Safety net. Application-driven scheduler handles per-tenant retention at finer granularity.
|
||||
|
||||
### processor_executions
|
||||
|
||||
```sql
|
||||
CREATE TABLE processor_executions (
|
||||
tenant_id LowCardinality(String),
|
||||
execution_id String,
|
||||
processor_id String,
|
||||
start_time DateTime64(3),
|
||||
route_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
processor_type LowCardinality(String),
|
||||
parent_processor_id String DEFAULT '',
|
||||
depth UInt16 DEFAULT 0,
|
||||
status LowCardinality(String),
|
||||
end_time Nullable(DateTime64(3)),
|
||||
duration_ms Nullable(Int64),
|
||||
error_message String DEFAULT '',
|
||||
error_stacktrace String DEFAULT '',
|
||||
error_type LowCardinality(String) DEFAULT '',
|
||||
error_category LowCardinality(String) DEFAULT '',
|
||||
root_cause_type String DEFAULT '',
|
||||
root_cause_message String DEFAULT '',
|
||||
input_body String DEFAULT '',
|
||||
output_body String DEFAULT '',
|
||||
input_headers String DEFAULT '',
|
||||
output_headers String DEFAULT '',
|
||||
attributes String DEFAULT '',
|
||||
loop_index Nullable(Int32),
|
||||
loop_size Nullable(Int32),
|
||||
split_index Nullable(Int32),
|
||||
split_size Nullable(Int32),
|
||||
multicast_index Nullable(Int32),
|
||||
resolved_endpoint_uri String DEFAULT '',
|
||||
error_handler_type LowCardinality(String) DEFAULT '',
|
||||
circuit_breaker_state LowCardinality(String) DEFAULT '',
|
||||
fallback_triggered Bool DEFAULT false,
|
||||
|
||||
_search_text String MATERIALIZED
|
||||
concat(error_message, ' ', error_stacktrace, ' ', attributes,
|
||||
' ', input_body, ' ', output_body, ' ', input_headers, ' ', output_headers),
|
||||
|
||||
INDEX idx_search _search_text TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_exec_id execution_id TYPE bloom_filter(0.01) GRANULARITY 4
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(start_time))
|
||||
ORDER BY (tenant_id, start_time, application_name, route_id, execution_id, processor_id)
|
||||
TTL start_time + INTERVAL 365 DAY DELETE
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
### logs
|
||||
|
||||
```sql
|
||||
CREATE TABLE logs (
|
||||
tenant_id LowCardinality(String),
|
||||
timestamp DateTime64(3),
|
||||
application LowCardinality(String),
|
||||
agent_id LowCardinality(String),
|
||||
level LowCardinality(String),
|
||||
logger_name LowCardinality(String) DEFAULT '',
|
||||
message String,
|
||||
thread_name LowCardinality(String) DEFAULT '',
|
||||
stack_trace String DEFAULT '',
|
||||
exchange_id String DEFAULT '',
|
||||
mdc Map(String, String) DEFAULT map(),
|
||||
|
||||
INDEX idx_msg message TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_stack stack_trace TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4,
|
||||
INDEX idx_level level TYPE set(10) GRANULARITY 1
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(timestamp))
|
||||
ORDER BY (tenant_id, application, timestamp)
|
||||
TTL timestamp + INTERVAL 365 DAY DELETE
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
### agent_metrics
|
||||
|
||||
```sql
|
||||
CREATE TABLE agent_metrics (
|
||||
tenant_id LowCardinality(String),
|
||||
collected_at DateTime64(3),
|
||||
agent_id LowCardinality(String),
|
||||
metric_name LowCardinality(String),
|
||||
metric_value Float64,
|
||||
tags Map(String, String) DEFAULT map(),
|
||||
server_received_at DateTime64(3) DEFAULT now64(3)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(collected_at))
|
||||
ORDER BY (tenant_id, agent_id, metric_name, collected_at)
|
||||
TTL collected_at + INTERVAL 365 DAY DELETE
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
### agent_events
|
||||
|
||||
```sql
|
||||
CREATE TABLE agent_events (
|
||||
tenant_id LowCardinality(String),
|
||||
timestamp DateTime64(3) DEFAULT now64(3),
|
||||
agent_id LowCardinality(String),
|
||||
app_id LowCardinality(String),
|
||||
event_type LowCardinality(String),
|
||||
detail String DEFAULT ''
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(timestamp))
|
||||
ORDER BY (tenant_id, app_id, agent_id, timestamp)
|
||||
TTL timestamp + INTERVAL 365 DAY DELETE;
|
||||
```
|
||||
|
||||
### route_diagrams
|
||||
|
||||
```sql
|
||||
CREATE TABLE route_diagrams (
|
||||
tenant_id LowCardinality(String),
|
||||
content_hash String,
|
||||
route_id LowCardinality(String),
|
||||
agent_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
definition String,
|
||||
created_at DateTime64(3) DEFAULT now64(3)
|
||||
)
|
||||
ENGINE = ReplacingMergeTree(created_at)
|
||||
ORDER BY (tenant_id, content_hash)
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
## Materialized Views (Stats)
|
||||
|
||||
Replace TimescaleDB continuous aggregates. ClickHouse MVs trigger on INSERT and store aggregate states in target tables.
|
||||
|
||||
### stats_1m_all (global)
|
||||
|
||||
```sql
|
||||
CREATE TABLE stats_1m_all (
|
||||
tenant_id LowCardinality(String),
|
||||
bucket DateTime,
|
||||
total_count AggregateFunction(count, UInt64),
|
||||
failed_count AggregateFunction(countIf, UInt64, UInt8),
|
||||
running_count AggregateFunction(countIf, UInt64, UInt8),
|
||||
duration_sum AggregateFunction(sum, Nullable(Int64)),
|
||||
duration_max AggregateFunction(max, Nullable(Int64)),
|
||||
p99_duration AggregateFunction(quantile(0.99), Nullable(Int64))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(bucket))
|
||||
ORDER BY (tenant_id, bucket)
|
||||
TTL bucket + INTERVAL 365 DAY DELETE;
|
||||
|
||||
CREATE MATERIALIZED VIEW stats_1m_all_mv TO stats_1m_all AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
countState() AS total_count,
|
||||
countIfState(status = 'FAILED') AS failed_count,
|
||||
countIfState(status = 'RUNNING') AS running_count,
|
||||
sumState(duration_ms) AS duration_sum,
|
||||
maxState(duration_ms) AS duration_max,
|
||||
quantileState(0.99)(duration_ms) AS p99_duration
|
||||
FROM executions
|
||||
GROUP BY tenant_id, bucket;
|
||||
```
|
||||
|
||||
### stats_1m_app (per-application)
|
||||
|
||||
```sql
|
||||
CREATE TABLE stats_1m_app (
|
||||
tenant_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
bucket DateTime,
|
||||
total_count AggregateFunction(count, UInt64),
|
||||
failed_count AggregateFunction(countIf, UInt64, UInt8),
|
||||
running_count AggregateFunction(countIf, UInt64, UInt8),
|
||||
duration_sum AggregateFunction(sum, Nullable(Int64)),
|
||||
duration_max AggregateFunction(max, Nullable(Int64)),
|
||||
p99_duration AggregateFunction(quantile(0.99), Nullable(Int64))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(bucket))
|
||||
ORDER BY (tenant_id, application_name, bucket)
|
||||
TTL bucket + INTERVAL 365 DAY DELETE;
|
||||
|
||||
CREATE MATERIALIZED VIEW stats_1m_app_mv TO stats_1m_app AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
application_name,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
countState() AS total_count,
|
||||
countIfState(status = 'FAILED') AS failed_count,
|
||||
countIfState(status = 'RUNNING') AS running_count,
|
||||
sumState(duration_ms) AS duration_sum,
|
||||
maxState(duration_ms) AS duration_max,
|
||||
quantileState(0.99)(duration_ms) AS p99_duration
|
||||
FROM executions
|
||||
GROUP BY tenant_id, application_name, bucket;
|
||||
```
|
||||
|
||||
### stats_1m_route (per-route)
|
||||
|
||||
```sql
|
||||
CREATE TABLE stats_1m_route (
|
||||
tenant_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
route_id LowCardinality(String),
|
||||
bucket DateTime,
|
||||
total_count AggregateFunction(count, UInt64),
|
||||
failed_count AggregateFunction(countIf, UInt64, UInt8),
|
||||
running_count AggregateFunction(countIf, UInt64, UInt8),
|
||||
duration_sum AggregateFunction(sum, Nullable(Int64)),
|
||||
duration_max AggregateFunction(max, Nullable(Int64)),
|
||||
p99_duration AggregateFunction(quantile(0.99), Nullable(Int64))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(bucket))
|
||||
ORDER BY (tenant_id, application_name, route_id, bucket)
|
||||
TTL bucket + INTERVAL 365 DAY DELETE;
|
||||
|
||||
CREATE MATERIALIZED VIEW stats_1m_route_mv TO stats_1m_route AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
application_name,
|
||||
route_id,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
countState() AS total_count,
|
||||
countIfState(status = 'FAILED') AS failed_count,
|
||||
countIfState(status = 'RUNNING') AS running_count,
|
||||
sumState(duration_ms) AS duration_sum,
|
||||
maxState(duration_ms) AS duration_max,
|
||||
quantileState(0.99)(duration_ms) AS p99_duration
|
||||
FROM executions
|
||||
GROUP BY tenant_id, application_name, route_id, bucket;
|
||||
```
|
||||
|
||||
### stats_1m_processor (per-processor-type)
|
||||
|
||||
```sql
|
||||
CREATE TABLE stats_1m_processor (
|
||||
tenant_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
processor_type LowCardinality(String),
|
||||
bucket DateTime,
|
||||
total_count AggregateFunction(count, UInt64),
|
||||
failed_count AggregateFunction(countIf, UInt64, UInt8),
|
||||
duration_sum AggregateFunction(sum, Nullable(Int64)),
|
||||
duration_max AggregateFunction(max, Nullable(Int64)),
|
||||
p99_duration AggregateFunction(quantile(0.99), Nullable(Int64))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(bucket))
|
||||
ORDER BY (tenant_id, application_name, processor_type, bucket)
|
||||
TTL bucket + INTERVAL 365 DAY DELETE;
|
||||
|
||||
CREATE MATERIALIZED VIEW stats_1m_processor_mv TO stats_1m_processor AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
application_name,
|
||||
processor_type,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
countState() AS total_count,
|
||||
countIfState(status = 'FAILED') AS failed_count,
|
||||
sumState(duration_ms) AS duration_sum,
|
||||
maxState(duration_ms) AS duration_max,
|
||||
quantileState(0.99)(duration_ms) AS p99_duration
|
||||
FROM processor_executions
|
||||
GROUP BY tenant_id, application_name, processor_type, bucket;
|
||||
```
|
||||
|
||||
### stats_1m_processor_detail (per-processor-id)
|
||||
|
||||
```sql
|
||||
CREATE TABLE stats_1m_processor_detail (
|
||||
tenant_id LowCardinality(String),
|
||||
application_name LowCardinality(String),
|
||||
route_id LowCardinality(String),
|
||||
processor_id String,
|
||||
bucket DateTime,
|
||||
total_count AggregateFunction(count, UInt64),
|
||||
failed_count AggregateFunction(countIf, UInt64, UInt8),
|
||||
duration_sum AggregateFunction(sum, Nullable(Int64)),
|
||||
duration_max AggregateFunction(max, Nullable(Int64)),
|
||||
p99_duration AggregateFunction(quantile(0.99), Nullable(Int64))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree()
|
||||
PARTITION BY (tenant_id, toYYYYMM(bucket))
|
||||
ORDER BY (tenant_id, application_name, route_id, processor_id, bucket)
|
||||
TTL bucket + INTERVAL 365 DAY DELETE;
|
||||
|
||||
CREATE MATERIALIZED VIEW stats_1m_processor_detail_mv TO stats_1m_processor_detail AS
|
||||
SELECT
|
||||
tenant_id,
|
||||
application_name,
|
||||
route_id,
|
||||
processor_id,
|
||||
toStartOfMinute(start_time) AS bucket,
|
||||
countState() AS total_count,
|
||||
countIfState(status = 'FAILED') AS failed_count,
|
||||
sumState(duration_ms) AS duration_sum,
|
||||
maxState(duration_ms) AS duration_max,
|
||||
quantileState(0.99)(duration_ms) AS p99_duration
|
||||
FROM processor_executions
|
||||
GROUP BY tenant_id, application_name, route_id, processor_id, bucket;
|
||||
```
|
||||
|
||||
## Ingestion Pipeline
|
||||
|
||||
### Current Flow (replaced)
|
||||
|
||||
```
|
||||
Agent POST -> IngestionService -> PostgresExecutionStore.upsert() -> PG
|
||||
-> SearchIndexer (debounced 2s) -> reads from PG -> OpenSearch
|
||||
```
|
||||
|
||||
### New Flow
|
||||
|
||||
```
|
||||
Agent POST -> IngestionService -> ExecutionAccumulator
|
||||
|-- RUNNING: ConcurrentHashMap (no DB write)
|
||||
|-- COMPLETED/FAILED: merge with pending -> WriteBuffer
|
||||
'-- Timeout sweep (60s): flush stale -> WriteBuffer
|
||||
|
|
||||
ClickHouseExecutionStore.insertBatch()
|
||||
ClickHouseProcessorStore.insertBatch()
|
||||
```
|
||||
|
||||
### ExecutionAccumulator
|
||||
|
||||
New component replacing `SearchIndexer`. Core responsibilities:
|
||||
|
||||
1. **On RUNNING POST**: Store `PendingExecution` in `ConcurrentHashMap<String, PendingExecution>` keyed by `execution_id`. Return 200 OK immediately. No database write.
|
||||
|
||||
2. **On COMPLETED/FAILED POST**: Look up pending RUNNING by `execution_id`. If found, merge fields using the same COALESCE logic currently in `PostgresExecutionStore.upsert()`. Produce a complete `MergedExecution` and push to `WriteBuffer`. If not found (race condition or RUNNING already flushed by timeout), write COMPLETED directly with `_version=2`.
|
||||
|
||||
3. **Timeout sweep** (scheduled every 60s): Scan for RUNNING entries older than 5 minutes. Flush them to ClickHouse as-is with status=RUNNING, making them visible in the UI. When COMPLETED eventually arrives, it writes with `_version=2` (ReplacingMergeTree deduplicates).
|
||||
|
||||
4. **Late corrections**: If a correction arrives for an already-written execution, insert with `_version` incremented. ReplacingMergeTree handles deduplication.
|
||||
|
||||
### WriteBuffer
|
||||
|
||||
Reuse the existing `WriteBuffer` pattern (bounded queue, configurable batch size, scheduled drain):
|
||||
|
||||
- Buffer capacity: 50,000 items
|
||||
- Batch size: 5,000 per flush
|
||||
- Flush interval: 1 second
|
||||
- Separate buffers for executions and processor_executions (independent batch inserts)
|
||||
- Drain calls `ClickHouseExecutionStore.insertBatch()` using JDBC batch update
|
||||
|
||||
### Logs Ingestion
|
||||
|
||||
Direct batch INSERT, bypasses accumulator (logs are single-phase):
|
||||
|
||||
```
|
||||
Agent POST /api/v1/data/logs -> LogIngestionController -> ClickHouseLogStore.insertBatch()
|
||||
```
|
||||
|
||||
### Metrics Ingestion
|
||||
|
||||
Existing `MetricsWriteBuffer` targets ClickHouse instead of PG:
|
||||
|
||||
```
|
||||
Agent POST /api/v1/data/metrics -> MetricsController -> WriteBuffer -> ClickHouseMetricsStore.insertBatch()
|
||||
```
|
||||
|
||||
### JDBC Batch Insert Pattern
|
||||
|
||||
```java
|
||||
jdbcTemplate.batchUpdate(
|
||||
"INSERT INTO executions (tenant_id, execution_id, start_time, ...) VALUES (?, ?, ?, ...)",
|
||||
batchArgs
|
||||
);
|
||||
```
|
||||
|
||||
JDBC URL includes `async_insert=1&wait_for_async_insert=0` for server-side buffering, preventing "too many parts" errors under high load.
|
||||
|
||||
## Search Implementation
|
||||
|
||||
### Query Translation
|
||||
|
||||
Current OpenSearch bool queries map to ClickHouse SQL:
|
||||
|
||||
```sql
|
||||
-- Full-text wildcard search with time range, status filter, and pagination
|
||||
SELECT *
|
||||
FROM executions FINAL
|
||||
WHERE tenant_id = {tenant_id:String}
|
||||
AND start_time >= {time_from:DateTime64(3)}
|
||||
AND start_time < {time_to:DateTime64(3)}
|
||||
AND status IN ({statuses:Array(String)})
|
||||
AND (
|
||||
_search_text LIKE '%{search_term}%'
|
||||
OR execution_id IN (
|
||||
SELECT DISTINCT execution_id
|
||||
FROM processor_executions
|
||||
WHERE tenant_id = {tenant_id:String}
|
||||
AND start_time >= {time_from:DateTime64(3)}
|
||||
AND start_time < {time_to:DateTime64(3)}
|
||||
AND _search_text LIKE '%{search_term}%'
|
||||
)
|
||||
)
|
||||
ORDER BY start_time DESC
|
||||
LIMIT {limit:UInt32} OFFSET {offset:UInt32}
|
||||
```
|
||||
|
||||
### Scoped Searches
|
||||
|
||||
| Scope | ClickHouse WHERE clause |
|
||||
|-------|------------------------|
|
||||
| textInBody | `input_body LIKE '%term%' OR output_body LIKE '%term%'` |
|
||||
| textInHeaders | `input_headers LIKE '%term%' OR output_headers LIKE '%term%'` |
|
||||
| textInErrors | `error_message LIKE '%term%' OR error_stacktrace LIKE '%term%'` |
|
||||
| global text | `_search_text LIKE '%term%'` (covers all fields) |
|
||||
|
||||
All accelerated by `ngrambf_v1` indexes which prune 95%+ of data granules before scanning.
|
||||
|
||||
### Application-Side Highlighting
|
||||
|
||||
```java
|
||||
public String extractHighlight(String text, String searchTerm, int contextChars) {
|
||||
int idx = text.toLowerCase().indexOf(searchTerm.toLowerCase());
|
||||
if (idx < 0) return null;
|
||||
int start = Math.max(0, idx - contextChars / 2);
|
||||
int end = Math.min(text.length(), idx + searchTerm.length() + contextChars / 2);
|
||||
return (start > 0 ? "..." : "")
|
||||
+ text.substring(start, end)
|
||||
+ (end < text.length() ? "..." : "");
|
||||
}
|
||||
```
|
||||
|
||||
Returns the same `highlight` map structure the UI currently expects.
|
||||
|
||||
### Nested Processor Search
|
||||
|
||||
OpenSearch nested queries become a subquery on the `processor_executions` table:
|
||||
|
||||
```sql
|
||||
execution_id IN (
|
||||
SELECT DISTINCT execution_id
|
||||
FROM processor_executions
|
||||
WHERE tenant_id = ? AND start_time >= ? AND start_time < ?
|
||||
AND _search_text LIKE '%term%'
|
||||
)
|
||||
```
|
||||
|
||||
This is evaluated once with ngram index acceleration, then joined via IN.
|
||||
|
||||
## Stats Query Translation
|
||||
|
||||
### TimescaleDB -> ClickHouse Query Patterns
|
||||
|
||||
| TimescaleDB | ClickHouse |
|
||||
|-------------|------------|
|
||||
| `time_bucket('1 minute', bucket)` | `toStartOfInterval(bucket, INTERVAL 1 MINUTE)` |
|
||||
| `SUM(total_count)` | `countMerge(total_count)` |
|
||||
| `SUM(failed_count)` | `countIfMerge(failed_count)` |
|
||||
| `approx_percentile(0.99, rollup(p99_duration))` | `quantileMerge(0.99)(p99_duration)` |
|
||||
| `SUM(duration_sum) / SUM(total_count)` | `sumMerge(duration_sum) / countMerge(total_count)` |
|
||||
| `MAX(duration_max)` | `maxMerge(duration_max)` |
|
||||
|
||||
### Example: Timeseries Query
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toStartOfInterval(bucket, INTERVAL {interval:UInt32} SECOND) AS period,
|
||||
countMerge(total_count) AS total_count,
|
||||
countIfMerge(failed_count) AS failed_count,
|
||||
sumMerge(duration_sum) / countMerge(total_count) AS avg_duration,
|
||||
quantileMerge(0.99)(p99_duration) AS p99_duration
|
||||
FROM stats_1m_app
|
||||
WHERE tenant_id = {tenant_id:String}
|
||||
AND application_name = {app:String}
|
||||
AND bucket >= {from:DateTime}
|
||||
AND bucket < {to:DateTime}
|
||||
GROUP BY period
|
||||
ORDER BY period
|
||||
```
|
||||
|
||||
### SLA and Top Errors
|
||||
|
||||
SLA queries hit the raw `executions` table (need per-row duration filtering):
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
countIf(duration_ms <= {threshold:Int64} AND status != 'RUNNING') * 100.0 / count() AS sla_pct
|
||||
FROM executions FINAL
|
||||
WHERE tenant_id = ? AND application_name = ? AND start_time >= ? AND start_time < ?
|
||||
```
|
||||
|
||||
Top errors query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
error_message,
|
||||
count() AS error_count,
|
||||
max(start_time) AS last_seen
|
||||
FROM executions FINAL
|
||||
WHERE tenant_id = ? AND status = 'FAILED'
|
||||
AND start_time >= now() - INTERVAL 1 HOUR
|
||||
GROUP BY error_message
|
||||
ORDER BY error_count DESC
|
||||
LIMIT 10
|
||||
```
|
||||
|
||||
## Multitenancy
|
||||
|
||||
### Data Isolation
|
||||
|
||||
**Primary**: Application-layer WHERE clause injection. Every ClickHouse query gets `WHERE tenant_id = ?` from the authenticated user's JWT claims.
|
||||
|
||||
**Defense-in-depth**: ClickHouse row policies:
|
||||
|
||||
```sql
|
||||
-- Create a ClickHouse user per tenant
|
||||
CREATE USER tenant_acme IDENTIFIED BY '...';
|
||||
|
||||
-- Row policy ensures tenant can only see their data
|
||||
CREATE ROW POLICY tenant_acme_executions ON executions
|
||||
FOR SELECT USING tenant_id = 'acme';
|
||||
|
||||
-- Repeat for all tables
|
||||
```
|
||||
|
||||
### Tenant ID in Schema
|
||||
|
||||
`tenant_id` is the first column in every table's ORDER BY and PARTITION BY. This ensures:
|
||||
- Data for the same tenant is physically co-located on disk
|
||||
- Queries filtering by tenant_id use the sparse index efficiently
|
||||
- Partition drops for retention are scoped to individual tenants
|
||||
|
||||
### Resource Quotas
|
||||
|
||||
```sql
|
||||
CREATE SETTINGS PROFILE tenant_limits
|
||||
SETTINGS max_execution_time = 30,
|
||||
max_rows_to_read = 100000000,
|
||||
max_memory_usage = '4G';
|
||||
|
||||
ALTER USER tenant_acme SETTINGS PROFILE tenant_limits;
|
||||
```
|
||||
|
||||
Prevents noisy neighbor problems where one tenant's expensive query affects others.
|
||||
|
||||
## Retention
|
||||
|
||||
### Strategy: Application-Driven Scheduler
|
||||
|
||||
Per-tenant, per-document-type retention is too dynamic for static ClickHouse TTL rules. Instead:
|
||||
|
||||
1. **Config table** in PostgreSQL:
|
||||
|
||||
```sql
|
||||
CREATE TABLE tenant_retention_config (
|
||||
tenant_id VARCHAR(255) NOT NULL,
|
||||
document_type VARCHAR(50) NOT NULL, -- executions, logs, metrics, etc.
|
||||
retention_days INT NOT NULL,
|
||||
PRIMARY KEY (tenant_id, document_type)
|
||||
);
|
||||
```
|
||||
|
||||
2. **RetentionScheduler** (Spring `@Scheduled`, runs daily at 03:00 UTC):
|
||||
|
||||
```java
|
||||
@Scheduled(cron = "0 0 3 * * *")
|
||||
public void enforceRetention() {
|
||||
List<TenantRetention> configs = retentionConfigRepo.findAll();
|
||||
for (TenantRetention config : configs) {
|
||||
String table = config.documentType(); // executions, logs, metrics, etc.
|
||||
clickHouseJdbc.execute(
|
||||
"ALTER TABLE " + table + " DELETE WHERE tenant_id = ? AND start_time < now() - INTERVAL ? DAY",
|
||||
config.tenantId(), config.retentionDays()
|
||||
);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
3. **Safety-net TTL**: Each table has a generous default TTL (365 days) as a backstop in case the scheduler fails. The scheduler handles the per-tenant granularity.
|
||||
|
||||
4. **Partition-aligned drops**: Since `PARTITION BY (tenant_id, toYYYYMM(start_time))`, when all rows in a partition match the DELETE condition, ClickHouse drops the entire partition (fast, no rewrite). Enable `ttl_only_drop_parts=1` on tables.
|
||||
|
||||
## Java/Spring Integration
|
||||
|
||||
### Dependencies
|
||||
|
||||
```xml
|
||||
<dependency>
|
||||
<groupId>com.clickhouse</groupId>
|
||||
<artifactId>clickhouse-jdbc</artifactId>
|
||||
<version>0.7.x</version> <!-- latest stable -->
|
||||
<classifier>all</classifier>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
```yaml
|
||||
clickhouse:
|
||||
url: jdbc:clickhouse://clickhouse:8123/cameleer?async_insert=1&wait_for_async_insert=0
|
||||
username: cameleer_app
|
||||
password: ${CLICKHOUSE_PASSWORD}
|
||||
```
|
||||
|
||||
### DataSource Bean
|
||||
|
||||
```java
|
||||
@Configuration
|
||||
public class ClickHouseConfig {
|
||||
@Bean
|
||||
public DataSource clickHouseDataSource(ClickHouseProperties props) {
|
||||
HikariDataSource ds = new HikariDataSource();
|
||||
ds.setJdbcUrl(props.getUrl());
|
||||
ds.setUsername(props.getUsername());
|
||||
ds.setPassword(props.getPassword());
|
||||
ds.setMaximumPoolSize(10);
|
||||
return ds;
|
||||
}
|
||||
|
||||
@Bean
|
||||
public JdbcTemplate clickHouseJdbcTemplate(
|
||||
@Qualifier("clickHouseDataSource") DataSource ds) {
|
||||
return new JdbcTemplate(ds);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Interface Implementations
|
||||
|
||||
Existing interfaces remain unchanged. New implementations:
|
||||
|
||||
| Interface | Current Impl | New Impl |
|
||||
|-----------|-------------|----------|
|
||||
| `ExecutionStore` | `PostgresExecutionStore` | `ClickHouseExecutionStore` |
|
||||
| `SearchIndex` | `OpenSearchIndex` | `ClickHouseSearchIndex` |
|
||||
| `StatsStore` | `PostgresStatsStore` | `ClickHouseStatsStore` |
|
||||
| `DiagramStore` | `PostgresDiagramStore` | `ClickHouseDiagramStore` |
|
||||
| `MetricsStore` | `PostgresMetricsStore` | `ClickHouseMetricsStore` |
|
||||
| (log search) | `OpenSearchLogIndex` | `ClickHouseLogStore` |
|
||||
| (new) | `SearchIndexer` | `ExecutionAccumulator` |
|
||||
|
||||
## Kubernetes Deployment
|
||||
|
||||
### ClickHouse StatefulSet
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: clickhouse
|
||||
spec:
|
||||
serviceName: clickhouse
|
||||
replicas: 1 # single node initially
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: clickhouse
|
||||
image: clickhouse/clickhouse-server:26.2
|
||||
ports:
|
||||
- containerPort: 8123 # HTTP
|
||||
- containerPort: 9000 # Native
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /var/lib/clickhouse
|
||||
- name: config
|
||||
mountPath: /etc/clickhouse-server/config.d
|
||||
resources:
|
||||
requests:
|
||||
memory: "4Gi"
|
||||
cpu: "2"
|
||||
limits:
|
||||
memory: "8Gi"
|
||||
cpu: "4"
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: data
|
||||
spec:
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 100Gi # NVMe/SSD
|
||||
```
|
||||
|
||||
### Health Check
|
||||
|
||||
```yaml
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /ping
|
||||
port: 8123
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ping
|
||||
port: 8123
|
||||
```
|
||||
|
||||
## Migration Path
|
||||
|
||||
### Phase 1: Foundation
|
||||
|
||||
- Add `clickhouse-jdbc` dependency
|
||||
- Create `ClickHouseConfig` (DataSource, JdbcTemplate)
|
||||
- Schema initialization (idempotent DDL scripts, not Flyway -- ClickHouse DDL is different enough)
|
||||
- Implement `ClickHouseMetricsStore` (simplest table, validates pipeline)
|
||||
- Deploy ClickHouse to k8s alongside existing PG+OpenSearch
|
||||
|
||||
### Phase 2: Executions + Search
|
||||
|
||||
- Build `ExecutionAccumulator` (replaces SearchIndexer)
|
||||
- Implement `ClickHouseExecutionStore` and `ClickHouseProcessorStore`
|
||||
- Implement `ClickHouseSearchIndex` (ngram-based SQL queries)
|
||||
- Feature flag: dual-write to both PG and CH, read from PG
|
||||
|
||||
### Phase 3: Stats & Analytics
|
||||
|
||||
- Create MV definitions (all 5 stats views)
|
||||
- Implement `ClickHouseStatsStore`
|
||||
- Validate stats accuracy: compare CH vs PG continuous aggregates
|
||||
|
||||
### Phase 4: Remaining Tables
|
||||
|
||||
- `ClickHouseDiagramStore` (ReplacingMergeTree)
|
||||
- `ClickHouseAgentEventStore`
|
||||
- `ClickHouseLogStore` (replaces OpenSearchLogIndex)
|
||||
- Application-side highlighting
|
||||
|
||||
### Phase 5: Multitenancy
|
||||
|
||||
- Tables already include `tenant_id` from Phase 1 (schema is forward-looking). This phase activates multitenancy.
|
||||
- Wire `tenant_id` from JWT claims into all ClickHouse queries (application-layer WHERE injection)
|
||||
- Add `tenant_id` to PostgreSQL RBAC/config tables
|
||||
- Create ClickHouse row policies per tenant (defense-in-depth)
|
||||
- Create `tenant_retention_config` table in PG and `RetentionScheduler` component
|
||||
- Tenant user management and resource quotas in ClickHouse
|
||||
|
||||
### Phase 6: Cutover
|
||||
|
||||
- Backfill historical data from PG/OpenSearch to ClickHouse
|
||||
- Switch read path to ClickHouse (feature flag)
|
||||
- Validate end-to-end
|
||||
- Remove OpenSearch dependency (POM, config, k8s manifests)
|
||||
- Remove TimescaleDB extensions and hypertable-specific code
|
||||
- Keep PostgreSQL for RBAC/config/audit only
|
||||
|
||||
## Verification
|
||||
|
||||
### Functional Verification
|
||||
|
||||
1. **Ingestion**: Send executions via agent, verify they appear in ClickHouse with correct fields
|
||||
2. **Two-phase lifecycle**: Send RUNNING, then COMPLETED. Verify single merged row in CH
|
||||
3. **Search**: Wildcard search across bodies, headers, errors. Verify sub-second response
|
||||
4. **Stats**: Dashboard statistics match expected values. Compare with PG aggregates during dual-write
|
||||
5. **Logs**: Ingest log batches, query by app/level/time/text. Verify correctness
|
||||
6. **Retention**: Configure per-tenant retention, run scheduler, verify expired data is deleted
|
||||
7. **Multitenancy**: Two tenants, verify data isolation (one tenant cannot see another's data)
|
||||
|
||||
### Performance Verification
|
||||
|
||||
1. **Insert throughput**: 5K executions/batch at 1 flush/sec sustained
|
||||
2. **Search latency**: Sub-second for `LIKE '%term%'` across 1M+ rows
|
||||
3. **Stats query latency**: Dashboard stats in <100ms from materialized views
|
||||
4. **Log search**: <1s for text search across 7 days of logs
|
||||
|
||||
### Data Integrity
|
||||
|
||||
1. During dual-write phase: compare row counts between PG and CH
|
||||
2. After cutover: spot-check execution details, processor trees, search results
|
||||
Reference in New Issue
Block a user