fix(installer): use Docker health status instead of exec for verification
All checks were successful
CI / build (push) Successful in 1m10s
CI / docker (push) Successful in 16s

Replace docker compose exec commands with Docker's built-in health
status checks. The exec-based ClickHouse check hung because
clickhouse-client waits for the server during initialization.
Docker's healthcheck status is already configured in compose and
is more reliable. Logto + Bootstrap merged into one check since
the healthcheck includes the bootstrap.json file test.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
hsiegeln
2026-04-13 18:28:04 +02:00
parent 85eabd86ef
commit 7e13b4ee5d

View File

@@ -929,24 +929,54 @@ docker_compose_down() {
# --- Health verification ---
check_service_health() {
local name="$1" check_cmd="$2" timeout_secs="${3:-120}"
local elapsed=0
wait_for_docker_healthy() {
local name="$1" service="$2" timeout_secs="${3:-300}"
local start_time=$(date +%s)
while [ $elapsed -lt $timeout_secs ]; do
if eval "$check_cmd" >/dev/null 2>&1; then
local duration=$(($(date +%s) - start_time))
while true; do
local elapsed=$(( $(date +%s) - start_time ))
if [ $elapsed -ge $timeout_secs ]; then
printf " ${RED}[FAIL]${NC} %-20s not healthy after %ds\n" "$name" "$timeout_secs"
echo " Check: docker compose -p $COMPOSE_PROJECT logs $service"
return 1
fi
local health
health=$(cd "$INSTALL_DIR" && docker compose -p "$COMPOSE_PROJECT" ps "$service" --format '{{.Health}}' 2>/dev/null || echo "unknown")
case "$health" in
healthy)
local duration=$(( $(date +%s) - start_time ))
printf " ${GREEN}[ok]${NC} %-20s ready (%ds)\n" "$name" "$duration"
return 0
;;
unhealthy)
printf " ${RED}[FAIL]${NC} %-20s unhealthy\n" "$name"
echo " Check: docker compose -p $COMPOSE_PROJECT logs $service"
return 1
;;
*)
sleep 3
;;
esac
done
}
check_endpoint() {
local name="$1" url="$2" timeout_secs="${3:-120}"
local start_time=$(date +%s)
while true; do
local elapsed=$(( $(date +%s) - start_time ))
if [ $elapsed -ge $timeout_secs ]; then
printf " ${RED}[FAIL]${NC} %-20s not reachable after %ds\n" "$name" "$timeout_secs"
return 1
fi
if curl -sfk -o /dev/null "$url" 2>/dev/null; then
local duration=$(( $(date +%s) - start_time ))
printf " ${GREEN}[ok]${NC} %-20s ready (%ds)\n" "$name" "$duration"
return 0
fi
sleep 5
elapsed=$(( $(date +%s) - start_time ))
sleep 3
done
printf " ${RED}[FAIL]${NC} %-20s not ready after %ds\n" "$name" "$timeout_secs"
echo " Check: docker compose -p $COMPOSE_PROJECT logs ${name,,}"
return 1
}
verify_health() {
@@ -954,33 +984,23 @@ verify_health() {
log_info "Verifying installation..."
local failed=0
check_service_health "PostgreSQL" \
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T postgres pg_isready -U cameleer" \
120 || failed=1
wait_for_docker_healthy "PostgreSQL" "postgres" 120 || failed=1
[ $failed -eq 0 ] && check_service_health "ClickHouse" \
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T clickhouse clickhouse-client --password '$CLICKHOUSE_PASSWORD' --query 'SELECT 1'" \
120 || failed=1
[ $failed -eq 0 ] && \
wait_for_docker_healthy "ClickHouse" "clickhouse" 120 || failed=1
[ $failed -eq 0 ] && check_service_health "Logto" \
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T logto node -e \"require('http').get('http://localhost:3001/oidc/.well-known/openid-configuration', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))\"" \
120 || failed=1
[ $failed -eq 0 ] && \
wait_for_docker_healthy "Logto + Bootstrap" "logto" 300 || failed=1
[ $failed -eq 0 ] && check_service_health "Bootstrap" \
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T logto test -f /data/logto-bootstrap.json" \
120 || failed=1
[ $failed -eq 0 ] && \
check_endpoint "Cameleer SaaS" "https://localhost:${HTTPS_PORT}/platform/api/config" 120 || failed=1
[ $failed -eq 0 ] && check_service_health "Cameleer SaaS" \
"curl -sfk https://localhost:${HTTPS_PORT}/platform/api/config" \
120 || failed=1
[ $failed -eq 0 ] && check_service_health "Traefik routing" \
"curl -sfk -o /dev/null https://localhost:${HTTPS_PORT}/" \
120 || failed=1
[ $failed -eq 0 ] && \
check_endpoint "Traefik routing" "https://localhost:${HTTPS_PORT}/" 30 || failed=1
echo ""
if [ $failed -ne 0 ]; then
log_error "Installation verification failed. Stack is running <EFBFBD> check logs."
log_error "Installation verification failed. Stack is running check logs."
exit 1
fi
log_success "All services healthy."