fix(deploy): stop previous active deployment before START_REPLICAS (fixes 409)

Container names are deterministic: {tenant}-{envSlug}-{appSlug}-{replica}.
The prior code did the stop-existing step at SWAP_TRAFFIC, *after*
START_REPLICAS had already tried to create containers with the same
names — so a redeploy against a RUNNING app consistently failed with
Docker 409 "container name already in use".

Move the stop-existing block to run right after CREATE_NETWORK and
before START_REPLICAS. SWAP_TRAFFIC becomes a label-only marker (traffic
is swapped implicitly by Traefik labels once new replicas are healthy).

Also: add `findActiveByAppIdAndEnvironmentIdExcluding` so the SQL
excludes the current deployment by id — previously the Java-side
`!id.equals(me)` guard failed because the newly-inserted row has
status=STARTING (DB default) and ORDER BY created_at DESC LIMIT 1
picked the new row, hiding the actual previous deployment.

Trade-off: this is destroy-then-start rather than true blue/green —
brief downtime during the swap. Matches the pre-unified-page behavior
and is what users reasonably expect. True blue/green would require
per-deployment container names.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-23 01:01:00 +02:00
parent 9ecc9ee72a
commit f8dccaae2b
3 changed files with 30 additions and 8 deletions

View File

@@ -167,6 +167,21 @@ public class DeploymentExecutor {
} }
} }
// === STOP PREVIOUS ACTIVE DEPLOYMENT ===
// Container names are deterministic ({tenant}-{env}-{app}-{replica}), so a
// previous active deployment holds the Docker names we need. Stop + remove
// it before starting new replicas to avoid a 409 name conflict. Excluding
// the current deployment id by SQL (not Java) because the newly created
// row already has status=STARTING and would otherwise be picked by
// findActiveByAppIdAndEnvironmentId ORDER BY created_at DESC LIMIT 1.
Optional<Deployment> previous = deploymentRepository.findActiveByAppIdAndEnvironmentIdExcluding(
deployment.appId(), deployment.environmentId(), deployment.id());
if (previous.isPresent()) {
log.info("Stopping previous deployment {} before starting new replicas", previous.get().id());
stopDeploymentContainers(previous.get());
deploymentService.markStopped(previous.get().id());
}
// === START REPLICAS === // === START REPLICAS ===
updateStage(deployment.id(), DeployStage.START_REPLICAS); updateStage(deployment.id(), DeployStage.START_REPLICAS);
@@ -244,16 +259,12 @@ public class DeploymentExecutor {
pgDeployRepo.updateReplicaStates(deployment.id(), replicaStates); pgDeployRepo.updateReplicaStates(deployment.id(), replicaStates);
// === SWAP TRAFFIC === // === SWAP TRAFFIC ===
// Traffic is routed via Traefik Docker labels, so the "swap" happens
// implicitly once the new replicas are healthy and the old containers
// are gone. The old deployment was already stopped before START_REPLICAS
// to free the deterministic container names.
updateStage(deployment.id(), DeployStage.SWAP_TRAFFIC); updateStage(deployment.id(), DeployStage.SWAP_TRAFFIC);
Optional<Deployment> existing = deploymentRepository.findActiveByAppIdAndEnvironmentId(
deployment.appId(), deployment.environmentId());
if (existing.isPresent() && !existing.get().id().equals(deployment.id())) {
stopDeploymentContainers(existing.get());
deploymentService.markStopped(existing.get().id());
log.info("Stopped previous deployment {} for replacement", existing.get().id());
}
// === COMPLETE === // === COMPLETE ===
updateStage(deployment.id(), DeployStage.COMPLETE); updateStage(deployment.id(), DeployStage.COMPLETE);

View File

@@ -63,6 +63,16 @@ public class PostgresDeploymentRepository implements DeploymentRepository {
return results.isEmpty() ? Optional.empty() : Optional.of(results.get(0)); return results.isEmpty() ? Optional.empty() : Optional.of(results.get(0));
} }
@Override
public Optional<Deployment> findActiveByAppIdAndEnvironmentIdExcluding(UUID appId, UUID environmentId, UUID excludeDeploymentId) {
var results = jdbc.query(
"SELECT " + SELECT_COLS + " FROM deployments WHERE app_id = ? AND environment_id = ? " +
"AND status IN ('STARTING', 'RUNNING', 'DEGRADED') AND id <> ? " +
"ORDER BY created_at DESC LIMIT 1",
(rs, rowNum) -> mapRow(rs), appId, environmentId, excludeDeploymentId);
return results.isEmpty() ? Optional.empty() : Optional.of(results.get(0));
}
public List<Deployment> findByStatus(List<DeploymentStatus> statuses) { public List<Deployment> findByStatus(List<DeploymentStatus> statuses) {
String placeholders = String.join(",", statuses.stream().map(s -> "'" + s.name() + "'").toList()); String placeholders = String.join(",", statuses.stream().map(s -> "'" + s.name() + "'").toList());
return jdbc.query( return jdbc.query(

View File

@@ -9,6 +9,7 @@ public interface DeploymentRepository {
List<Deployment> findByEnvironmentId(UUID environmentId); List<Deployment> findByEnvironmentId(UUID environmentId);
Optional<Deployment> findById(UUID id); Optional<Deployment> findById(UUID id);
Optional<Deployment> findActiveByAppIdAndEnvironmentId(UUID appId, UUID environmentId); Optional<Deployment> findActiveByAppIdAndEnvironmentId(UUID appId, UUID environmentId);
Optional<Deployment> findActiveByAppIdAndEnvironmentIdExcluding(UUID appId, UUID environmentId, UUID excludeDeploymentId);
UUID create(UUID appId, UUID appVersionId, UUID environmentId, String containerName); UUID create(UUID appId, UUID appVersionId, UUID environmentId, String containerName);
void updateStatus(UUID id, DeploymentStatus status, String containerId, String errorMessage); void updateStatus(UUID id, DeploymentStatus status, String containerId, String errorMessage);
void markDeployed(UUID id); void markDeployed(UUID id);