From 7e13b4ee5d95c4eeac318a1c12d552b583a6f042 Mon Sep 17 00:00:00 2001 From: hsiegeln <37154749+hsiegeln@users.noreply.github.com> Date: Mon, 13 Apr 2026 18:28:04 +0200 Subject: [PATCH] fix(installer): use Docker health status instead of exec for verification Replace docker compose exec commands with Docker's built-in health status checks. The exec-based ClickHouse check hung because clickhouse-client waits for the server during initialization. Docker's healthcheck status is already configured in compose and is more reliable. Logto + Bootstrap merged into one check since the healthcheck includes the bootstrap.json file test. Co-Authored-By: Claude Opus 4.6 (1M context) --- installer/install.sh | 84 +++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 32 deletions(-) diff --git a/installer/install.sh b/installer/install.sh index 79be829..07113ce 100644 --- a/installer/install.sh +++ b/installer/install.sh @@ -929,24 +929,54 @@ docker_compose_down() { # --- Health verification --- -check_service_health() { - local name="$1" check_cmd="$2" timeout_secs="${3:-120}" - local elapsed=0 +wait_for_docker_healthy() { + local name="$1" service="$2" timeout_secs="${3:-300}" local start_time=$(date +%s) - while [ $elapsed -lt $timeout_secs ]; do - if eval "$check_cmd" >/dev/null 2>&1; then - local duration=$(($(date +%s) - start_time)) + while true; do + local elapsed=$(( $(date +%s) - start_time )) + if [ $elapsed -ge $timeout_secs ]; then + printf " ${RED}[FAIL]${NC} %-20s not healthy after %ds\n" "$name" "$timeout_secs" + echo " Check: docker compose -p $COMPOSE_PROJECT logs $service" + return 1 + fi + local health + health=$(cd "$INSTALL_DIR" && docker compose -p "$COMPOSE_PROJECT" ps "$service" --format '{{.Health}}' 2>/dev/null || echo "unknown") + case "$health" in + healthy) + local duration=$(( $(date +%s) - start_time )) + printf " ${GREEN}[ok]${NC} %-20s ready (%ds)\n" "$name" "$duration" + return 0 + ;; + unhealthy) + printf " ${RED}[FAIL]${NC} %-20s unhealthy\n" "$name" + echo " Check: docker compose -p $COMPOSE_PROJECT logs $service" + return 1 + ;; + *) + sleep 3 + ;; + esac + done +} + +check_endpoint() { + local name="$1" url="$2" timeout_secs="${3:-120}" + local start_time=$(date +%s) + + while true; do + local elapsed=$(( $(date +%s) - start_time )) + if [ $elapsed -ge $timeout_secs ]; then + printf " ${RED}[FAIL]${NC} %-20s not reachable after %ds\n" "$name" "$timeout_secs" + return 1 + fi + if curl -sfk -o /dev/null "$url" 2>/dev/null; then + local duration=$(( $(date +%s) - start_time )) printf " ${GREEN}[ok]${NC} %-20s ready (%ds)\n" "$name" "$duration" return 0 fi - sleep 5 - elapsed=$(( $(date +%s) - start_time )) + sleep 3 done - - printf " ${RED}[FAIL]${NC} %-20s not ready after %ds\n" "$name" "$timeout_secs" - echo " Check: docker compose -p $COMPOSE_PROJECT logs ${name,,}" - return 1 } verify_health() { @@ -954,33 +984,23 @@ verify_health() { log_info "Verifying installation..." local failed=0 - check_service_health "PostgreSQL" \ - "cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T postgres pg_isready -U cameleer" \ - 120 || failed=1 + wait_for_docker_healthy "PostgreSQL" "postgres" 120 || failed=1 - [ $failed -eq 0 ] && check_service_health "ClickHouse" \ - "cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T clickhouse clickhouse-client --password '$CLICKHOUSE_PASSWORD' --query 'SELECT 1'" \ - 120 || failed=1 + [ $failed -eq 0 ] && \ + wait_for_docker_healthy "ClickHouse" "clickhouse" 120 || failed=1 - [ $failed -eq 0 ] && check_service_health "Logto" \ - "cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T logto node -e \"require('http').get('http://localhost:3001/oidc/.well-known/openid-configuration', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))\"" \ - 120 || failed=1 + [ $failed -eq 0 ] && \ + wait_for_docker_healthy "Logto + Bootstrap" "logto" 300 || failed=1 - [ $failed -eq 0 ] && check_service_health "Bootstrap" \ - "cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T logto test -f /data/logto-bootstrap.json" \ - 120 || failed=1 + [ $failed -eq 0 ] && \ + check_endpoint "Cameleer SaaS" "https://localhost:${HTTPS_PORT}/platform/api/config" 120 || failed=1 - [ $failed -eq 0 ] && check_service_health "Cameleer SaaS" \ - "curl -sfk https://localhost:${HTTPS_PORT}/platform/api/config" \ - 120 || failed=1 - - [ $failed -eq 0 ] && check_service_health "Traefik routing" \ - "curl -sfk -o /dev/null https://localhost:${HTTPS_PORT}/" \ - 120 || failed=1 + [ $failed -eq 0 ] && \ + check_endpoint "Traefik routing" "https://localhost:${HTTPS_PORT}/" 30 || failed=1 echo "" if [ $failed -ne 0 ]; then - log_error "Installation verification failed. Stack is running � check logs." + log_error "Installation verification failed. Stack is running — check logs." exit 1 fi log_success "All services healthy."