fix: drop stale instance_id filter from search and scope route stats by app
All checks were successful
CI / cleanup-branch (push) Has been skipped
CI / build (push) Successful in 1m28s
CI / docker (push) Successful in 1m11s
CI / deploy-feature (push) Has been skipped
CI / deploy (push) Successful in 42s

The exchange search silently filtered by the in-memory agent registry's
current instance IDs on top of application_id. Historical exchanges written
by previous agent instances (or any instance not currently registered, e.g.
after a server restart before agents heartbeat back) were hidden from
results even though they matched the application filter.

Fix: drop the applicationId -> instanceIds resolution in SearchController.
Rely on application_id = ? in ClickHouseSearchIndex; keep explicit
instanceIds filtering only when a client passes them.

Related cleanup: the agentIds parameter on StatsStore.statsForRoute /
timeseriesForRoute was silently discarded inside ClickHouseStatsStore, so
per-route stats aggregated across any apps sharing a routeId. Replace with
String applicationId and add application_id to the stats_1m_route filters
so per-route stats are correctly scoped.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-16 19:49:55 +02:00
parent 1a68e1c46c
commit e2d9428dff
7 changed files with 37 additions and 59 deletions

View File

@@ -43,7 +43,7 @@ paths:
## search/ — Execution search and stats ## search/ — Execution search and stats
- `SearchService` — search, count, stats, statsForApp, timeseries, timeseriesForApp, timeseriesForRoute, timeseriesGroupedByApp, timeseriesGroupedByRoute, slaCompliance, slaCountsByApp, slaCountsByRoute, topErrors, activeErrorTypes, punchcard, distinctAttributeKeys - `SearchService` — search, count, stats, statsForApp, statsForRoute, timeseries, timeseriesForApp, timeseriesForRoute, timeseriesGroupedByApp, timeseriesGroupedByRoute, slaCompliance, slaCountsByApp, slaCountsByRoute, topErrors, activeErrorTypes, punchcard, distinctAttributeKeys. `statsForRoute`/`timeseriesForRoute` take `(routeId, applicationId)` — app filter is applied to `stats_1m_route`.
- `SearchRequest` / `SearchResult` — search DTOs - `SearchRequest` / `SearchResult` — search DTOs
- `ExecutionStats`, `ExecutionSummary` — stats aggregation records - `ExecutionStats`, `ExecutionSummary` — stats aggregation records
- `StatsTimeseries`, `TopError` — timeseries and error DTOs - `StatsTimeseries`, `TopError` — timeseries and error DTOs

View File

@@ -2,8 +2,6 @@ package com.cameleer.server.app.controller;
import com.cameleer.server.core.admin.AppSettings; import com.cameleer.server.core.admin.AppSettings;
import com.cameleer.server.core.admin.AppSettingsRepository; import com.cameleer.server.core.admin.AppSettingsRepository;
import com.cameleer.server.core.agent.AgentInfo;
import com.cameleer.server.core.agent.AgentRegistryService;
import com.cameleer.server.core.search.ExecutionStats; import com.cameleer.server.core.search.ExecutionStats;
import com.cameleer.server.core.search.ExecutionSummary; import com.cameleer.server.core.search.ExecutionSummary;
import com.cameleer.server.core.search.SearchRequest; import com.cameleer.server.core.search.SearchRequest;
@@ -38,13 +36,11 @@ import java.util.Map;
public class SearchController { public class SearchController {
private final SearchService searchService; private final SearchService searchService;
private final AgentRegistryService registryService;
private final AppSettingsRepository appSettingsRepository; private final AppSettingsRepository appSettingsRepository;
public SearchController(SearchService searchService, AgentRegistryService registryService, public SearchController(SearchService searchService,
AppSettingsRepository appSettingsRepository) { AppSettingsRepository appSettingsRepository) {
this.searchService = searchService; this.searchService = searchService;
this.registryService = registryService;
this.appSettingsRepository = appSettingsRepository; this.appSettingsRepository = appSettingsRepository;
} }
@@ -66,15 +62,13 @@ public class SearchController {
@RequestParam(required = false) String sortField, @RequestParam(required = false) String sortField,
@RequestParam(required = false) String sortDir) { @RequestParam(required = false) String sortDir) {
List<String> agentIds = resolveApplicationToAgentIds(application);
SearchRequest request = new SearchRequest( SearchRequest request = new SearchRequest(
status, timeFrom, timeTo, status, timeFrom, timeTo,
null, null, null, null,
correlationId, correlationId,
text, null, null, null, text, null, null, null,
routeId, instanceId, processorType, routeId, instanceId, processorType,
application, agentIds, application, null,
offset, limit, offset, limit,
sortField, sortDir, sortField, sortDir,
environment environment
@@ -87,13 +81,7 @@ public class SearchController {
@Operation(summary = "Advanced search with all filters") @Operation(summary = "Advanced search with all filters")
public ResponseEntity<SearchResult<ExecutionSummary>> searchPost( public ResponseEntity<SearchResult<ExecutionSummary>> searchPost(
@RequestBody SearchRequest request) { @RequestBody SearchRequest request) {
// Resolve application to agentIds if application is specified but agentIds is not return ResponseEntity.ok(searchService.search(request));
SearchRequest resolved = request;
if (request.applicationId() != null && !request.applicationId().isBlank()
&& (request.instanceIds() == null || request.instanceIds().isEmpty())) {
resolved = request.withInstanceIds(resolveApplicationToAgentIds(request.applicationId()));
}
return ResponseEntity.ok(searchService.search(resolved));
} }
@GetMapping("/stats") @GetMapping("/stats")
@@ -111,8 +99,7 @@ public class SearchController {
} else if (routeId == null) { } else if (routeId == null) {
stats = searchService.statsForApp(from, end, application, environment); stats = searchService.statsForApp(from, end, application, environment);
} else { } else {
List<String> agentIds = resolveApplicationToAgentIds(application); stats = searchService.statsForRoute(from, end, routeId, application, environment);
stats = searchService.stats(from, end, routeId, agentIds, environment);
} }
// Enrich with SLA compliance // Enrich with SLA compliance
@@ -139,11 +126,7 @@ public class SearchController {
if (routeId == null) { if (routeId == null) {
return ResponseEntity.ok(searchService.timeseriesForApp(from, end, buckets, application, environment)); return ResponseEntity.ok(searchService.timeseriesForApp(from, end, buckets, application, environment));
} }
List<String> agentIds = resolveApplicationToAgentIds(application); return ResponseEntity.ok(searchService.timeseriesForRoute(from, end, buckets, routeId, application, environment));
if (routeId == null && agentIds.isEmpty()) {
return ResponseEntity.ok(searchService.timeseries(from, end, buckets, environment));
}
return ResponseEntity.ok(searchService.timeseries(from, end, buckets, routeId, agentIds, environment));
} }
@GetMapping("/stats/timeseries/by-app") @GetMapping("/stats/timeseries/by-app")
@@ -197,17 +180,4 @@ public class SearchController {
Instant end = to != null ? to : Instant.now(); Instant end = to != null ? to : Instant.now();
return ResponseEntity.ok(searchService.topErrors(from, end, application, routeId, limit, environment)); return ResponseEntity.ok(searchService.topErrors(from, end, application, routeId, limit, environment));
} }
/**
* Resolve an application name to agent IDs.
* Returns empty list if application is null/blank (no filtering).
*/
private List<String> resolveApplicationToAgentIds(String application) {
if (application == null || application.isBlank()) {
return List.of();
}
return registryService.findByApplication(application).stream()
.map(AgentInfo::instanceId)
.toList();
}
} }

View File

@@ -53,9 +53,13 @@ public class ClickHouseStatsStore implements StatsStore {
} }
@Override @Override
public ExecutionStats statsForRoute(Instant from, Instant to, String routeId, List<String> agentIds, String environment) { public ExecutionStats statsForRoute(Instant from, Instant to, String routeId, String applicationId, String environment) {
return queryStats("stats_1m_route", from, to, List.of( List<Filter> filters = new ArrayList<>();
new Filter("route_id", routeId)), true, environment); filters.add(new Filter("route_id", routeId));
if (applicationId != null && !applicationId.isBlank()) {
filters.add(new Filter("application_id", applicationId));
}
return queryStats("stats_1m_route", from, to, filters, true, environment);
} }
@Override @Override
@@ -78,9 +82,13 @@ public class ClickHouseStatsStore implements StatsStore {
@Override @Override
public StatsTimeseries timeseriesForRoute(Instant from, Instant to, int bucketCount, public StatsTimeseries timeseriesForRoute(Instant from, Instant to, int bucketCount,
String routeId, List<String> agentIds, String environment) { String routeId, String applicationId, String environment) {
return queryTimeseries("stats_1m_route", from, to, bucketCount, List.of( List<Filter> filters = new ArrayList<>();
new Filter("route_id", routeId)), true, environment); filters.add(new Filter("route_id", routeId));
if (applicationId != null && !applicationId.isBlank()) {
filters.add(new Filter("application_id", applicationId));
}
return queryTimeseries("stats_1m_route", from, to, bucketCount, filters, true, environment);
} }
@Override @Override

View File

@@ -182,7 +182,7 @@ class ClickHouseStatsStoreIT {
Instant from = BASE.minusSeconds(60); Instant from = BASE.minusSeconds(60);
Instant to = BASE.plusSeconds(300); Instant to = BASE.plusSeconds(300);
ExecutionStats routeA = store.statsForRoute(from, to, "route-a", List.of(), null); ExecutionStats routeA = store.statsForRoute(from, to, "route-a", null, null);
assertThat(routeA.totalCount()).isEqualTo(6); assertThat(routeA.totalCount()).isEqualTo(6);
} }

View File

@@ -22,8 +22,8 @@ import java.util.List;
* @param routeId exact match on route_id * @param routeId exact match on route_id
* @param instanceId exact match on instance_id * @param instanceId exact match on instance_id
* @param processorType matches processor_types array via has() * @param processorType matches processor_types array via has()
* @param applicationId application ID filter (resolved to instanceIds server-side) * @param applicationId exact match on application_id
* @param instanceIds list of instance IDs (resolved from application, used for IN clause) * @param instanceIds list of instance IDs for an IN clause (only set when drilling down to specific agents)
* @param offset pagination offset (0-based) * @param offset pagination offset (0-based)
* @param limit page size (default 50, max 500) * @param limit page size (default 50, max 500)
* @param sortField column to sort by (default: startTime) * @param sortField column to sort by (default: startTime)

View File

@@ -45,12 +45,12 @@ public class SearchService {
return statsStore.statsForApp(from, to, applicationId, environment); return statsStore.statsForApp(from, to, applicationId, environment);
} }
public ExecutionStats stats(Instant from, Instant to, String routeId, List<String> agentIds) { public ExecutionStats statsForRoute(Instant from, Instant to, String routeId, String applicationId) {
return statsStore.statsForRoute(from, to, routeId, agentIds, null); return statsStore.statsForRoute(from, to, routeId, applicationId, null);
} }
public ExecutionStats stats(Instant from, Instant to, String routeId, List<String> agentIds, String environment) { public ExecutionStats statsForRoute(Instant from, Instant to, String routeId, String applicationId, String environment) {
return statsStore.statsForRoute(from, to, routeId, agentIds, environment); return statsStore.statsForRoute(from, to, routeId, applicationId, environment);
} }
public StatsTimeseries timeseries(Instant from, Instant to, int bucketCount) { public StatsTimeseries timeseries(Instant from, Instant to, int bucketCount) {
@@ -69,14 +69,14 @@ public class SearchService {
return statsStore.timeseriesForApp(from, to, bucketCount, applicationId, environment); return statsStore.timeseriesForApp(from, to, bucketCount, applicationId, environment);
} }
public StatsTimeseries timeseries(Instant from, Instant to, int bucketCount, public StatsTimeseries timeseriesForRoute(Instant from, Instant to, int bucketCount,
String routeId, List<String> agentIds) { String routeId, String applicationId) {
return statsStore.timeseriesForRoute(from, to, bucketCount, routeId, agentIds, null); return statsStore.timeseriesForRoute(from, to, bucketCount, routeId, applicationId, null);
} }
public StatsTimeseries timeseries(Instant from, Instant to, int bucketCount, public StatsTimeseries timeseriesForRoute(Instant from, Instant to, int bucketCount,
String routeId, List<String> agentIds, String environment) { String routeId, String applicationId, String environment) {
return statsStore.timeseriesForRoute(from, to, bucketCount, routeId, agentIds, environment); return statsStore.timeseriesForRoute(from, to, bucketCount, routeId, applicationId, environment);
} }
// ── Dashboard-specific queries ──────────────────────────────────────── // ── Dashboard-specific queries ────────────────────────────────────────

View File

@@ -16,8 +16,8 @@ public interface StatsStore {
// Per-app stats (stats_1m_app) // Per-app stats (stats_1m_app)
ExecutionStats statsForApp(Instant from, Instant to, String applicationId, String environment); ExecutionStats statsForApp(Instant from, Instant to, String applicationId, String environment);
// Per-route stats (stats_1m_route), optionally scoped to specific agents // Per-route stats (stats_1m_route), optionally scoped to an application
ExecutionStats statsForRoute(Instant from, Instant to, String routeId, List<String> agentIds, String environment); ExecutionStats statsForRoute(Instant from, Instant to, String routeId, String applicationId, String environment);
// Per-processor stats (stats_1m_processor) // Per-processor stats (stats_1m_processor)
ExecutionStats statsForProcessor(Instant from, Instant to, String routeId, String processorType); ExecutionStats statsForProcessor(Instant from, Instant to, String routeId, String processorType);
@@ -28,9 +28,9 @@ public interface StatsStore {
// Per-app timeseries // Per-app timeseries
StatsTimeseries timeseriesForApp(Instant from, Instant to, int bucketCount, String applicationId, String environment); StatsTimeseries timeseriesForApp(Instant from, Instant to, int bucketCount, String applicationId, String environment);
// Per-route timeseries, optionally scoped to specific agents // Per-route timeseries, optionally scoped to an application
StatsTimeseries timeseriesForRoute(Instant from, Instant to, int bucketCount, StatsTimeseries timeseriesForRoute(Instant from, Instant to, int bucketCount,
String routeId, List<String> agentIds, String environment); String routeId, String applicationId, String environment);
// Per-processor timeseries // Per-processor timeseries
StatsTimeseries timeseriesForProcessor(Instant from, Instant to, int bucketCount, StatsTimeseries timeseriesForProcessor(Instant from, Instant to, int bucketCount,