feat: implement multitenancy with tenant isolation + environment support
All checks were successful
CI / cleanup-branch (push) Has been skipped
CI / build (push) Successful in 1m8s
CI / docker (push) Successful in 42s
CI / deploy-feature (push) Has been skipped
CI / deploy (push) Successful in 1m25s

Adds configurable tenant ID (CAMELEER_TENANT_ID env var, default:
"default") and environment as a first-class concept. Each server
instance serves one tenant with multiple environments.

Changes across 36 files:
- TenantProperties config bean for tenant ID injection
- AgentInfo: added environmentId field
- AgentRegistrationRequest: added environmentId field
- All 9 ClickHouse stores: inject tenant ID, replace hardcoded
  "default" constant, add environment to writes/reads
- ChunkAccumulator: configurable tenant ID + environment resolver
- MergedExecution/ProcessorBatch/BufferedLogEntry: added environment
- ClickHouse init.sql: added environment column to all tables,
  updated ORDER BY (tenant→time→env→app), added tenant_id to
  usage_events, updated all MV GROUP BY clauses
- Controllers: pass environmentId through registration/auto-heal
- K8s deploy: added CAMELEER_TENANT_ID env var
- All tests updated for new signatures

Closes #123

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-04 15:00:18 +02:00
parent ee7226cf1c
commit a188308ec5
36 changed files with 310 additions and 188 deletions

View File

@@ -6,6 +6,7 @@
CREATE TABLE IF NOT EXISTS agent_metrics (
tenant_id LowCardinality(String) DEFAULT 'default',
collected_at DateTime64(3),
environment LowCardinality(String) DEFAULT 'default',
instance_id LowCardinality(String),
metric_name LowCardinality(String),
metric_value Float64,
@@ -14,7 +15,7 @@ CREATE TABLE IF NOT EXISTS agent_metrics (
)
ENGINE = MergeTree()
PARTITION BY (tenant_id, toYYYYMM(collected_at))
ORDER BY (tenant_id, instance_id, metric_name, collected_at)
ORDER BY (tenant_id, collected_at, environment, instance_id, metric_name)
TTL toDateTime(collected_at) + INTERVAL 365 DAY DELETE
SETTINGS index_granularity = 8192;
@@ -28,6 +29,7 @@ CREATE TABLE IF NOT EXISTS executions (
route_id LowCardinality(String),
instance_id LowCardinality(String),
application_id LowCardinality(String),
environment LowCardinality(String) DEFAULT 'default',
status LowCardinality(String),
correlation_id String DEFAULT '',
exchange_id String DEFAULT '',
@@ -68,7 +70,7 @@ CREATE TABLE IF NOT EXISTS executions (
)
ENGINE = ReplacingMergeTree(_version)
PARTITION BY (tenant_id, toYYYYMM(start_time))
ORDER BY (tenant_id, start_time, application_id, route_id, execution_id)
ORDER BY (tenant_id, start_time, environment, application_id, route_id, execution_id)
TTL toDateTime(start_time) + INTERVAL 365 DAY DELETE
SETTINGS index_granularity = 8192;
@@ -85,6 +87,7 @@ CREATE TABLE IF NOT EXISTS processor_executions (
start_time DateTime64(3),
route_id LowCardinality(String),
application_id LowCardinality(String),
environment LowCardinality(String) DEFAULT 'default',
iteration Nullable(Int32),
iteration_size Nullable(Int32),
status LowCardinality(String),
@@ -116,7 +119,7 @@ CREATE TABLE IF NOT EXISTS processor_executions (
)
ENGINE = MergeTree()
PARTITION BY (tenant_id, toYYYYMM(start_time))
ORDER BY (tenant_id, start_time, application_id, route_id, execution_id, seq)
ORDER BY (tenant_id, start_time, environment, application_id, route_id, execution_id, seq)
TTL toDateTime(start_time) + INTERVAL 365 DAY DELETE
SETTINGS index_granularity = 8192;
@@ -127,6 +130,7 @@ SETTINGS index_granularity = 8192;
CREATE TABLE IF NOT EXISTS stats_1m_all (
tenant_id LowCardinality(String),
bucket DateTime,
environment LowCardinality(String) DEFAULT 'default',
total_count AggregateFunction(count),
failed_count AggregateFunction(countIf, UInt8),
running_count AggregateFunction(countIf, UInt8),
@@ -136,13 +140,14 @@ CREATE TABLE IF NOT EXISTS stats_1m_all (
)
ENGINE = AggregatingMergeTree()
PARTITION BY (tenant_id, toYYYYMM(bucket))
ORDER BY (tenant_id, bucket)
ORDER BY (tenant_id, bucket, environment)
TTL bucket + INTERVAL 365 DAY DELETE;
CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_all_mv TO stats_1m_all AS
SELECT
tenant_id,
toStartOfMinute(start_time) AS bucket,
environment,
countState() AS total_count,
countIfState(status = 'FAILED') AS failed_count,
countIfState(status = 'RUNNING') AS running_count,
@@ -150,7 +155,7 @@ SELECT
maxState(duration_ms) AS duration_max,
quantileState(0.99)(duration_ms) AS p99_duration
FROM executions
GROUP BY tenant_id, bucket;
GROUP BY tenant_id, bucket, environment;
-- stats_1m_app (per-application)
@@ -158,6 +163,7 @@ CREATE TABLE IF NOT EXISTS stats_1m_app (
tenant_id LowCardinality(String),
application_id LowCardinality(String),
bucket DateTime,
environment LowCardinality(String) DEFAULT 'default',
total_count AggregateFunction(count),
failed_count AggregateFunction(countIf, UInt8),
running_count AggregateFunction(countIf, UInt8),
@@ -167,7 +173,7 @@ CREATE TABLE IF NOT EXISTS stats_1m_app (
)
ENGINE = AggregatingMergeTree()
PARTITION BY (tenant_id, toYYYYMM(bucket))
ORDER BY (tenant_id, application_id, bucket)
ORDER BY (tenant_id, bucket, environment, application_id)
TTL bucket + INTERVAL 365 DAY DELETE;
CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_app_mv TO stats_1m_app AS
@@ -175,6 +181,7 @@ SELECT
tenant_id,
application_id,
toStartOfMinute(start_time) AS bucket,
environment,
countState() AS total_count,
countIfState(status = 'FAILED') AS failed_count,
countIfState(status = 'RUNNING') AS running_count,
@@ -182,7 +189,7 @@ SELECT
maxState(duration_ms) AS duration_max,
quantileState(0.99)(duration_ms) AS p99_duration
FROM executions
GROUP BY tenant_id, application_id, bucket;
GROUP BY tenant_id, application_id, bucket, environment;
-- stats_1m_route (per-route)
@@ -191,6 +198,7 @@ CREATE TABLE IF NOT EXISTS stats_1m_route (
application_id LowCardinality(String),
route_id LowCardinality(String),
bucket DateTime,
environment LowCardinality(String) DEFAULT 'default',
total_count AggregateFunction(count),
failed_count AggregateFunction(countIf, UInt8),
running_count AggregateFunction(countIf, UInt8),
@@ -200,7 +208,7 @@ CREATE TABLE IF NOT EXISTS stats_1m_route (
)
ENGINE = AggregatingMergeTree()
PARTITION BY (tenant_id, toYYYYMM(bucket))
ORDER BY (tenant_id, application_id, route_id, bucket)
ORDER BY (tenant_id, bucket, environment, application_id, route_id)
TTL bucket + INTERVAL 365 DAY DELETE;
CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_route_mv TO stats_1m_route AS
@@ -209,6 +217,7 @@ SELECT
application_id,
route_id,
toStartOfMinute(start_time) AS bucket,
environment,
countState() AS total_count,
countIfState(status = 'FAILED') AS failed_count,
countIfState(status = 'RUNNING') AS running_count,
@@ -216,7 +225,7 @@ SELECT
maxState(duration_ms) AS duration_max,
quantileState(0.99)(duration_ms) AS p99_duration
FROM executions
GROUP BY tenant_id, application_id, route_id, bucket;
GROUP BY tenant_id, application_id, route_id, bucket, environment;
-- stats_1m_processor (per-processor-type)
@@ -225,6 +234,7 @@ CREATE TABLE IF NOT EXISTS stats_1m_processor (
application_id LowCardinality(String),
processor_type LowCardinality(String),
bucket DateTime,
environment LowCardinality(String) DEFAULT 'default',
total_count AggregateFunction(count),
failed_count AggregateFunction(countIf, UInt8),
duration_sum AggregateFunction(sum, Nullable(Int64)),
@@ -233,7 +243,7 @@ CREATE TABLE IF NOT EXISTS stats_1m_processor (
)
ENGINE = AggregatingMergeTree()
PARTITION BY (tenant_id, toYYYYMM(bucket))
ORDER BY (tenant_id, application_id, processor_type, bucket)
ORDER BY (tenant_id, bucket, environment, application_id, processor_type)
TTL bucket + INTERVAL 365 DAY DELETE;
CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_processor_mv TO stats_1m_processor AS
@@ -242,13 +252,14 @@ SELECT
application_id,
processor_type,
toStartOfMinute(start_time) AS bucket,
environment,
countState() AS total_count,
countIfState(status = 'FAILED') AS failed_count,
sumState(duration_ms) AS duration_sum,
maxState(duration_ms) AS duration_max,
quantileState(0.99)(duration_ms) AS p99_duration
FROM processor_executions
GROUP BY tenant_id, application_id, processor_type, bucket;
GROUP BY tenant_id, application_id, processor_type, bucket, environment;
-- stats_1m_processor_detail (per-processor-id)
@@ -259,6 +270,7 @@ CREATE TABLE IF NOT EXISTS stats_1m_processor_detail (
processor_id String,
processor_type LowCardinality(String),
bucket DateTime,
environment LowCardinality(String) DEFAULT 'default',
total_count AggregateFunction(count),
failed_count AggregateFunction(countIf, UInt8),
duration_sum AggregateFunction(sum, Nullable(Int64)),
@@ -267,7 +279,7 @@ CREATE TABLE IF NOT EXISTS stats_1m_processor_detail (
)
ENGINE = AggregatingMergeTree()
PARTITION BY (tenant_id, toYYYYMM(bucket))
ORDER BY (tenant_id, application_id, route_id, processor_id, processor_type, bucket)
ORDER BY (tenant_id, bucket, environment, application_id, route_id, processor_id, processor_type)
TTL bucket + INTERVAL 365 DAY DELETE;
CREATE MATERIALIZED VIEW IF NOT EXISTS stats_1m_processor_detail_mv TO stats_1m_processor_detail AS
@@ -278,13 +290,14 @@ SELECT
processor_id,
processor_type,
toStartOfMinute(start_time) AS bucket,
environment,
countState() AS total_count,
countIfState(status = 'FAILED') AS failed_count,
sumState(duration_ms) AS duration_sum,
maxState(duration_ms) AS duration_max,
quantileState(0.99)(duration_ms) AS p99_duration
FROM processor_executions
GROUP BY tenant_id, application_id, route_id, processor_id, processor_type, bucket;
GROUP BY tenant_id, application_id, route_id, processor_id, processor_type, bucket, environment;
-- ── Route Diagrams ──────────────────────────────────────────────────────
@@ -294,11 +307,12 @@ CREATE TABLE IF NOT EXISTS route_diagrams (
route_id LowCardinality(String),
instance_id LowCardinality(String),
application_id LowCardinality(String),
environment LowCardinality(String) DEFAULT 'default',
definition String,
created_at DateTime64(3) DEFAULT now64(3)
)
ENGINE = ReplacingMergeTree(created_at)
ORDER BY (tenant_id, content_hash)
ORDER BY (tenant_id, environment, route_id, instance_id, content_hash)
SETTINGS index_granularity = 8192;
-- ── Agent Events ────────────────────────────────────────────────────────
@@ -306,6 +320,7 @@ SETTINGS index_granularity = 8192;
CREATE TABLE IF NOT EXISTS agent_events (
tenant_id LowCardinality(String) DEFAULT 'default',
timestamp DateTime64(3) DEFAULT now64(3),
environment LowCardinality(String) DEFAULT 'default',
instance_id LowCardinality(String),
application_id LowCardinality(String),
event_type LowCardinality(String),
@@ -313,7 +328,7 @@ CREATE TABLE IF NOT EXISTS agent_events (
)
ENGINE = MergeTree()
PARTITION BY (tenant_id, toYYYYMM(timestamp))
ORDER BY (tenant_id, application_id, instance_id, timestamp)
ORDER BY (tenant_id, timestamp, environment, instance_id)
TTL toDateTime(timestamp) + INTERVAL 365 DAY DELETE;
-- ── Logs ────────────────────────────────────────────────────────────────
@@ -321,6 +336,7 @@ TTL toDateTime(timestamp) + INTERVAL 365 DAY DELETE;
CREATE TABLE IF NOT EXISTS logs (
tenant_id LowCardinality(String) DEFAULT 'default',
timestamp DateTime64(3),
environment LowCardinality(String) DEFAULT 'default',
application LowCardinality(String),
instance_id LowCardinality(String),
level LowCardinality(String),
@@ -337,14 +353,16 @@ CREATE TABLE IF NOT EXISTS logs (
)
ENGINE = MergeTree()
PARTITION BY (tenant_id, toYYYYMM(timestamp))
ORDER BY (tenant_id, application, timestamp)
ORDER BY (tenant_id, timestamp, environment, application, instance_id)
TTL toDateTime(timestamp) + INTERVAL 365 DAY DELETE
SETTINGS index_granularity = 8192;
-- ── Usage Events ────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS usage_events (
tenant_id LowCardinality(String) DEFAULT 'default',
timestamp DateTime64(3) DEFAULT now64(3),
environment LowCardinality(String) DEFAULT 'default',
username LowCardinality(String),
method LowCardinality(String),
path String,
@@ -354,5 +372,5 @@ CREATE TABLE IF NOT EXISTS usage_events (
query_params String DEFAULT ''
)
ENGINE = MergeTree()
ORDER BY (username, timestamp)
ORDER BY (tenant_id, timestamp, environment, username, normalized)
TTL toDateTime(timestamp) + INTERVAL 90 DAY;