feat(clickhouse): add ChunkAccumulator for chunked execution ingestion
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,205 @@
|
|||||||
|
package com.cameleer3.server.core.ingestion;
|
||||||
|
|
||||||
|
import com.cameleer3.server.core.storage.model.ExecutionChunk;
|
||||||
|
import com.cameleer3.server.core.storage.model.FlatProcessorRecord;
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Accumulates {@link ExecutionChunk} documents and produces:
|
||||||
|
* <ul>
|
||||||
|
* <li>{@link ProcessorBatch} — pushed immediately for each chunk (append-only)</li>
|
||||||
|
* <li>{@link MergedExecution} — pushed when the final chunk arrives or on stale sweep</li>
|
||||||
|
* </ul>
|
||||||
|
*/
|
||||||
|
public class ChunkAccumulator {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(ChunkAccumulator.class);
|
||||||
|
private static final String DEFAULT_TENANT = "default";
|
||||||
|
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private final Consumer<MergedExecution> executionSink;
|
||||||
|
private final Consumer<ProcessorBatch> processorSink;
|
||||||
|
private final Duration staleThreshold;
|
||||||
|
private final ConcurrentHashMap<String, PendingExchange> pending = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
public ChunkAccumulator(Consumer<MergedExecution> executionSink,
|
||||||
|
Consumer<ProcessorBatch> processorSink,
|
||||||
|
Duration staleThreshold) {
|
||||||
|
this.executionSink = executionSink;
|
||||||
|
this.processorSink = processorSink;
|
||||||
|
this.staleThreshold = staleThreshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process an incoming chunk: push processors immediately,
|
||||||
|
* buffer/merge the envelope, and emit when final.
|
||||||
|
*/
|
||||||
|
public void onChunk(ExecutionChunk chunk) {
|
||||||
|
// 1. Push processor records immediately (append-only)
|
||||||
|
if (chunk.processors() != null && !chunk.processors().isEmpty()) {
|
||||||
|
processorSink.accept(new ProcessorBatch(
|
||||||
|
DEFAULT_TENANT,
|
||||||
|
chunk.exchangeId(),
|
||||||
|
chunk.routeId(),
|
||||||
|
chunk.applicationName(),
|
||||||
|
chunk.startTime(),
|
||||||
|
chunk.processors()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Buffer/merge the exchange envelope
|
||||||
|
if (chunk.isFinal()) {
|
||||||
|
// Merge with any pending envelope, then emit
|
||||||
|
PendingExchange existing = pending.remove(chunk.exchangeId());
|
||||||
|
ExecutionChunk merged = existing != null
|
||||||
|
? mergeEnvelopes(existing.envelope(), chunk)
|
||||||
|
: chunk;
|
||||||
|
executionSink.accept(toMergedExecution(merged));
|
||||||
|
} else {
|
||||||
|
// Buffer the envelope for later merging
|
||||||
|
pending.merge(chunk.exchangeId(),
|
||||||
|
new PendingExchange(chunk, Instant.now()),
|
||||||
|
(old, incoming) -> new PendingExchange(
|
||||||
|
mergeEnvelopes(old.envelope(), incoming.envelope()),
|
||||||
|
old.receivedAt()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flush exchanges that have been pending longer than the stale threshold.
|
||||||
|
* Called periodically by a scheduled task.
|
||||||
|
*/
|
||||||
|
public void sweepStale() {
|
||||||
|
Instant cutoff = Instant.now().minus(staleThreshold);
|
||||||
|
pending.forEach((exchangeId, pe) -> {
|
||||||
|
if (pe.receivedAt().isBefore(cutoff)) {
|
||||||
|
PendingExchange removed = pending.remove(exchangeId);
|
||||||
|
if (removed != null) {
|
||||||
|
log.info("Flushing stale exchange {} (pending since {})",
|
||||||
|
exchangeId, removed.receivedAt());
|
||||||
|
executionSink.accept(toMergedExecution(removed.envelope()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Number of exchanges awaiting a final chunk. */
|
||||||
|
public int getPendingCount() {
|
||||||
|
return pending.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Merge logic ----
|
||||||
|
|
||||||
|
/**
|
||||||
|
* COALESCE merge: for each field, prefer the newer value if non-null, else keep older.
|
||||||
|
* The newer chunk (higher chunkSeq) takes precedence for status, endTime, durationMs.
|
||||||
|
*/
|
||||||
|
private static ExecutionChunk mergeEnvelopes(ExecutionChunk older, ExecutionChunk newer) {
|
||||||
|
return new ExecutionChunk(
|
||||||
|
coalesce(newer.exchangeId(), older.exchangeId()),
|
||||||
|
coalesce(newer.applicationName(), older.applicationName()),
|
||||||
|
coalesce(newer.agentId(), older.agentId()),
|
||||||
|
coalesce(newer.routeId(), older.routeId()),
|
||||||
|
coalesce(newer.correlationId(), older.correlationId()),
|
||||||
|
coalesce(newer.status(), older.status()),
|
||||||
|
coalesce(older.startTime(), newer.startTime()), // prefer earliest startTime
|
||||||
|
coalesce(newer.endTime(), older.endTime()),
|
||||||
|
coalesce(newer.durationMs(), older.durationMs()),
|
||||||
|
coalesce(newer.engineLevel(), older.engineLevel()),
|
||||||
|
coalesce(newer.errorMessage(), older.errorMessage()),
|
||||||
|
coalesce(newer.errorStackTrace(), older.errorStackTrace()),
|
||||||
|
coalesce(newer.errorType(), older.errorType()),
|
||||||
|
coalesce(newer.errorCategory(), older.errorCategory()),
|
||||||
|
coalesce(newer.rootCauseType(), older.rootCauseType()),
|
||||||
|
coalesce(newer.rootCauseMessage(), older.rootCauseMessage()),
|
||||||
|
coalesce(newer.attributes(), older.attributes()),
|
||||||
|
coalesce(newer.traceId(), older.traceId()),
|
||||||
|
coalesce(newer.spanId(), older.spanId()),
|
||||||
|
coalesce(newer.originalExchangeId(), older.originalExchangeId()),
|
||||||
|
coalesce(newer.replayExchangeId(), older.replayExchangeId()),
|
||||||
|
Math.max(newer.chunkSeq(), older.chunkSeq()),
|
||||||
|
newer.isFinal() || older.isFinal(),
|
||||||
|
List.of() // processors are handled separately
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T> T coalesce(T a, T b) {
|
||||||
|
return a != null ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Conversion to MergedExecution ----
|
||||||
|
|
||||||
|
private static MergedExecution toMergedExecution(ExecutionChunk envelope) {
|
||||||
|
return new MergedExecution(
|
||||||
|
DEFAULT_TENANT,
|
||||||
|
1L,
|
||||||
|
envelope.exchangeId(),
|
||||||
|
envelope.routeId(),
|
||||||
|
envelope.agentId(),
|
||||||
|
envelope.applicationName(),
|
||||||
|
envelope.status(),
|
||||||
|
envelope.correlationId(),
|
||||||
|
envelope.exchangeId(),
|
||||||
|
envelope.startTime(),
|
||||||
|
envelope.endTime(),
|
||||||
|
envelope.durationMs(),
|
||||||
|
envelope.errorMessage(),
|
||||||
|
envelope.errorStackTrace(),
|
||||||
|
envelope.errorType(),
|
||||||
|
envelope.errorCategory(),
|
||||||
|
envelope.rootCauseType(),
|
||||||
|
envelope.rootCauseMessage(),
|
||||||
|
"", // diagramContentHash — server-side lookup, not in chunk
|
||||||
|
envelope.engineLevel(),
|
||||||
|
"", // inputBody — on processor records now
|
||||||
|
"", // outputBody
|
||||||
|
"", // inputHeaders
|
||||||
|
"", // outputHeaders
|
||||||
|
serializeAttributes(envelope.attributes()),
|
||||||
|
envelope.traceId(),
|
||||||
|
envelope.spanId(),
|
||||||
|
false, // hasTraceData — not tracked at envelope level
|
||||||
|
envelope.replayExchangeId() != null // isReplay
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String serializeAttributes(Map<String, String> attributes) {
|
||||||
|
if (attributes == null || attributes.isEmpty()) {
|
||||||
|
return "{}";
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return MAPPER.writeValueAsString(attributes);
|
||||||
|
} catch (JsonProcessingException e) {
|
||||||
|
log.warn("Failed to serialize attributes, falling back to empty object", e);
|
||||||
|
return "{}";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Inner types ----
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A batch of processor records from a single chunk, ready for ClickHouse insertion.
|
||||||
|
*/
|
||||||
|
public record ProcessorBatch(
|
||||||
|
String tenantId,
|
||||||
|
String executionId,
|
||||||
|
String routeId,
|
||||||
|
String applicationName,
|
||||||
|
Instant execStartTime,
|
||||||
|
List<FlatProcessorRecord> processors
|
||||||
|
) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Envelope buffered while waiting for the final chunk.
|
||||||
|
*/
|
||||||
|
private record PendingExchange(ExecutionChunk envelope, Instant receivedAt) {}
|
||||||
|
}
|
||||||
@@ -0,0 +1,226 @@
|
|||||||
|
package com.cameleer3.server.core.ingestion;
|
||||||
|
|
||||||
|
import com.cameleer3.server.core.storage.model.ExecutionChunk;
|
||||||
|
import com.cameleer3.server.core.storage.model.FlatProcessorRecord;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.CopyOnWriteArrayList;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
class ChunkAccumulatorTest {
|
||||||
|
|
||||||
|
private CopyOnWriteArrayList<MergedExecution> executionSink;
|
||||||
|
private CopyOnWriteArrayList<ChunkAccumulator.ProcessorBatch> processorSink;
|
||||||
|
private ChunkAccumulator accumulator;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
void setUp() {
|
||||||
|
executionSink = new CopyOnWriteArrayList<>();
|
||||||
|
processorSink = new CopyOnWriteArrayList<>();
|
||||||
|
accumulator = new ChunkAccumulator(
|
||||||
|
executionSink::add, processorSink::add, Duration.ofMinutes(5));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void singleFinalChunk_producesExecutionAndProcessors() {
|
||||||
|
ExecutionChunk chunk = new ExecutionChunk(
|
||||||
|
"ex-1", "order-service", "agent-1", "route-1",
|
||||||
|
"corr-1", "COMPLETED",
|
||||||
|
Instant.parse("2026-03-31T10:00:00Z"),
|
||||||
|
Instant.parse("2026-03-31T10:00:01Z"), 1000L,
|
||||||
|
"REGULAR",
|
||||||
|
null, null, null, null, null, null,
|
||||||
|
Map.of("orderId", "ORD-1"),
|
||||||
|
"trace-1", "span-1", null, null,
|
||||||
|
0, true,
|
||||||
|
List.of(proc(1, null, "log1", "log", "COMPLETED", 5L)));
|
||||||
|
|
||||||
|
accumulator.onChunk(chunk);
|
||||||
|
|
||||||
|
// Processor sink should receive 1 batch with 1 record
|
||||||
|
assertThat(processorSink).hasSize(1);
|
||||||
|
ChunkAccumulator.ProcessorBatch batch = processorSink.get(0);
|
||||||
|
assertThat(batch.tenantId()).isEqualTo("default");
|
||||||
|
assertThat(batch.executionId()).isEqualTo("ex-1");
|
||||||
|
assertThat(batch.routeId()).isEqualTo("route-1");
|
||||||
|
assertThat(batch.applicationName()).isEqualTo("order-service");
|
||||||
|
assertThat(batch.execStartTime()).isEqualTo(Instant.parse("2026-03-31T10:00:00Z"));
|
||||||
|
assertThat(batch.processors()).hasSize(1);
|
||||||
|
|
||||||
|
// Execution sink should receive 1 merged execution
|
||||||
|
assertThat(executionSink).hasSize(1);
|
||||||
|
MergedExecution exec = executionSink.get(0);
|
||||||
|
assertThat(exec.tenantId()).isEqualTo("default");
|
||||||
|
assertThat(exec.version()).isEqualTo(1L);
|
||||||
|
assertThat(exec.executionId()).isEqualTo("ex-1");
|
||||||
|
assertThat(exec.routeId()).isEqualTo("route-1");
|
||||||
|
assertThat(exec.status()).isEqualTo("COMPLETED");
|
||||||
|
assertThat(exec.durationMs()).isEqualTo(1000L);
|
||||||
|
assertThat(exec.traceId()).isEqualTo("trace-1");
|
||||||
|
assertThat(exec.spanId()).isEqualTo("span-1");
|
||||||
|
assertThat(exec.attributes()).contains("orderId");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void multipleChunks_mergesEnvelope_insertsProcessorsImmediately() {
|
||||||
|
ExecutionChunk chunk0 = new ExecutionChunk(
|
||||||
|
"ex-2", "app", "agent-1", "route-1",
|
||||||
|
"ex-2", "RUNNING",
|
||||||
|
Instant.parse("2026-03-31T10:00:00Z"),
|
||||||
|
null, null, "REGULAR",
|
||||||
|
null, null, null, null, null, null,
|
||||||
|
null, null, null, null, null,
|
||||||
|
0, false,
|
||||||
|
List.of(
|
||||||
|
proc(1, null, "log1", "log", "COMPLETED", 5L),
|
||||||
|
proc(2, null, "log2", "log", "COMPLETED", 3L)));
|
||||||
|
|
||||||
|
accumulator.onChunk(chunk0);
|
||||||
|
|
||||||
|
// Processors pushed immediately on chunk 0
|
||||||
|
assertThat(processorSink).hasSize(1);
|
||||||
|
assertThat(processorSink.get(0).processors()).hasSize(2);
|
||||||
|
|
||||||
|
// No execution yet (not final)
|
||||||
|
assertThat(executionSink).isEmpty();
|
||||||
|
|
||||||
|
ExecutionChunk chunk1 = new ExecutionChunk(
|
||||||
|
"ex-2", "app", "agent-1", "route-1",
|
||||||
|
"ex-2", "COMPLETED",
|
||||||
|
Instant.parse("2026-03-31T10:00:00Z"),
|
||||||
|
Instant.parse("2026-03-31T10:00:02Z"), 2000L,
|
||||||
|
"REGULAR",
|
||||||
|
null, null, null, null, null, null,
|
||||||
|
null, null, null, null, null,
|
||||||
|
1, true,
|
||||||
|
List.of(proc(3, null, "log3", "log", "COMPLETED", 7L)));
|
||||||
|
|
||||||
|
accumulator.onChunk(chunk1);
|
||||||
|
|
||||||
|
// Processors from chunk 1 also pushed
|
||||||
|
assertThat(processorSink).hasSize(2);
|
||||||
|
assertThat(processorSink.get(1).processors()).hasSize(1);
|
||||||
|
|
||||||
|
// Now execution is emitted
|
||||||
|
assertThat(executionSink).hasSize(1);
|
||||||
|
MergedExecution exec = executionSink.get(0);
|
||||||
|
assertThat(exec.status()).isEqualTo("COMPLETED");
|
||||||
|
assertThat(exec.durationMs()).isEqualTo(2000L);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void staleExchange_flushedBySweep() throws Exception {
|
||||||
|
ChunkAccumulator staleAccumulator = new ChunkAccumulator(
|
||||||
|
executionSink::add, processorSink::add, Duration.ofMillis(1));
|
||||||
|
|
||||||
|
ExecutionChunk chunk = new ExecutionChunk(
|
||||||
|
"ex-3", "app", "agent-1", "route-1",
|
||||||
|
"ex-3", "RUNNING",
|
||||||
|
Instant.parse("2026-03-31T10:00:00Z"),
|
||||||
|
null, null, "REGULAR",
|
||||||
|
null, null, null, null, null, null,
|
||||||
|
null, null, null, null, null,
|
||||||
|
0, false,
|
||||||
|
List.of());
|
||||||
|
|
||||||
|
staleAccumulator.onChunk(chunk);
|
||||||
|
assertThat(executionSink).isEmpty();
|
||||||
|
|
||||||
|
Thread.sleep(5);
|
||||||
|
staleAccumulator.sweepStale();
|
||||||
|
|
||||||
|
assertThat(executionSink).hasSize(1);
|
||||||
|
MergedExecution exec = executionSink.get(0);
|
||||||
|
assertThat(exec.status()).isEqualTo("RUNNING");
|
||||||
|
assertThat(exec.executionId()).isEqualTo("ex-3");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void finalChunkWithErrors_populatesErrorFields() {
|
||||||
|
ExecutionChunk chunk = new ExecutionChunk(
|
||||||
|
"ex-4", "app", "agent-1", "route-1",
|
||||||
|
"ex-4", "FAILED",
|
||||||
|
Instant.parse("2026-03-31T10:00:00Z"),
|
||||||
|
Instant.parse("2026-03-31T10:00:01Z"), 1000L,
|
||||||
|
"REGULAR",
|
||||||
|
"NullPointerException", "at com.foo.Bar.baz(Bar.java:42)",
|
||||||
|
"NullPointerException", "RUNTIME",
|
||||||
|
"NullPointerException", "null value at index 0",
|
||||||
|
null, null, null, null, null,
|
||||||
|
0, true,
|
||||||
|
List.of());
|
||||||
|
|
||||||
|
accumulator.onChunk(chunk);
|
||||||
|
|
||||||
|
assertThat(executionSink).hasSize(1);
|
||||||
|
MergedExecution exec = executionSink.get(0);
|
||||||
|
assertThat(exec.status()).isEqualTo("FAILED");
|
||||||
|
assertThat(exec.errorMessage()).isEqualTo("NullPointerException");
|
||||||
|
assertThat(exec.errorStacktrace()).isEqualTo("at com.foo.Bar.baz(Bar.java:42)");
|
||||||
|
assertThat(exec.errorType()).isEqualTo("NullPointerException");
|
||||||
|
assertThat(exec.errorCategory()).isEqualTo("RUNTIME");
|
||||||
|
assertThat(exec.rootCauseType()).isEqualTo("NullPointerException");
|
||||||
|
assertThat(exec.rootCauseMessage()).isEqualTo("null value at index 0");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getPendingCount_tracksBufferedExchanges() {
|
||||||
|
ExecutionChunk running1 = new ExecutionChunk(
|
||||||
|
"ex-5", "app", "agent-1", "route-1",
|
||||||
|
"ex-5", "RUNNING",
|
||||||
|
Instant.parse("2026-03-31T10:00:00Z"),
|
||||||
|
null, null, "REGULAR",
|
||||||
|
null, null, null, null, null, null,
|
||||||
|
null, null, null, null, null,
|
||||||
|
0, false,
|
||||||
|
List.of());
|
||||||
|
|
||||||
|
ExecutionChunk running2 = new ExecutionChunk(
|
||||||
|
"ex-6", "app", "agent-1", "route-2",
|
||||||
|
"ex-6", "RUNNING",
|
||||||
|
Instant.parse("2026-03-31T10:00:00Z"),
|
||||||
|
null, null, "REGULAR",
|
||||||
|
null, null, null, null, null, null,
|
||||||
|
null, null, null, null, null,
|
||||||
|
0, false,
|
||||||
|
List.of());
|
||||||
|
|
||||||
|
accumulator.onChunk(running1);
|
||||||
|
accumulator.onChunk(running2);
|
||||||
|
assertThat(accumulator.getPendingCount()).isEqualTo(2);
|
||||||
|
|
||||||
|
// Send final for ex-5
|
||||||
|
ExecutionChunk final5 = new ExecutionChunk(
|
||||||
|
"ex-5", "app", "agent-1", "route-1",
|
||||||
|
"ex-5", "COMPLETED",
|
||||||
|
Instant.parse("2026-03-31T10:00:00Z"),
|
||||||
|
Instant.parse("2026-03-31T10:00:01Z"), 1000L,
|
||||||
|
"REGULAR",
|
||||||
|
null, null, null, null, null, null,
|
||||||
|
null, null, null, null, null,
|
||||||
|
1, true,
|
||||||
|
List.of());
|
||||||
|
|
||||||
|
accumulator.onChunk(final5);
|
||||||
|
assertThat(accumulator.getPendingCount()).isEqualTo(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Helper to create a FlatProcessorRecord with minimal fields. */
|
||||||
|
private static FlatProcessorRecord proc(int seq, Integer parentSeq,
|
||||||
|
String processorId, String processorType,
|
||||||
|
String status, long durationMs) {
|
||||||
|
return new FlatProcessorRecord(
|
||||||
|
seq, parentSeq, null, processorId, processorType,
|
||||||
|
null, null, status,
|
||||||
|
Instant.parse("2026-03-31T10:00:00.100Z"), durationMs,
|
||||||
|
null, null, null, null, null,
|
||||||
|
null, null, null, null, null, null,
|
||||||
|
null, null, null, null, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user