fix(installer): use Docker health status instead of exec for verification
Replace docker compose exec commands with Docker's built-in health status checks. The exec-based ClickHouse check hung because clickhouse-client waits for the server during initialization. Docker's healthcheck status is already configured in compose and is more reliable. Logto + Bootstrap merged into one check since the healthcheck includes the bootstrap.json file test. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -929,24 +929,54 @@ docker_compose_down() {
|
||||
|
||||
# --- Health verification ---
|
||||
|
||||
check_service_health() {
|
||||
local name="$1" check_cmd="$2" timeout_secs="${3:-120}"
|
||||
local elapsed=0
|
||||
wait_for_docker_healthy() {
|
||||
local name="$1" service="$2" timeout_secs="${3:-300}"
|
||||
local start_time=$(date +%s)
|
||||
|
||||
while [ $elapsed -lt $timeout_secs ]; do
|
||||
if eval "$check_cmd" >/dev/null 2>&1; then
|
||||
local duration=$(($(date +%s) - start_time))
|
||||
while true; do
|
||||
local elapsed=$(( $(date +%s) - start_time ))
|
||||
if [ $elapsed -ge $timeout_secs ]; then
|
||||
printf " ${RED}[FAIL]${NC} %-20s not healthy after %ds\n" "$name" "$timeout_secs"
|
||||
echo " Check: docker compose -p $COMPOSE_PROJECT logs $service"
|
||||
return 1
|
||||
fi
|
||||
local health
|
||||
health=$(cd "$INSTALL_DIR" && docker compose -p "$COMPOSE_PROJECT" ps "$service" --format '{{.Health}}' 2>/dev/null || echo "unknown")
|
||||
case "$health" in
|
||||
healthy)
|
||||
local duration=$(( $(date +%s) - start_time ))
|
||||
printf " ${GREEN}[ok]${NC} %-20s ready (%ds)\n" "$name" "$duration"
|
||||
return 0
|
||||
;;
|
||||
unhealthy)
|
||||
printf " ${RED}[FAIL]${NC} %-20s unhealthy\n" "$name"
|
||||
echo " Check: docker compose -p $COMPOSE_PROJECT logs $service"
|
||||
return 1
|
||||
;;
|
||||
*)
|
||||
sleep 3
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
check_endpoint() {
|
||||
local name="$1" url="$2" timeout_secs="${3:-120}"
|
||||
local start_time=$(date +%s)
|
||||
|
||||
while true; do
|
||||
local elapsed=$(( $(date +%s) - start_time ))
|
||||
if [ $elapsed -ge $timeout_secs ]; then
|
||||
printf " ${RED}[FAIL]${NC} %-20s not reachable after %ds\n" "$name" "$timeout_secs"
|
||||
return 1
|
||||
fi
|
||||
if curl -sfk -o /dev/null "$url" 2>/dev/null; then
|
||||
local duration=$(( $(date +%s) - start_time ))
|
||||
printf " ${GREEN}[ok]${NC} %-20s ready (%ds)\n" "$name" "$duration"
|
||||
return 0
|
||||
fi
|
||||
sleep 5
|
||||
elapsed=$(( $(date +%s) - start_time ))
|
||||
sleep 3
|
||||
done
|
||||
|
||||
printf " ${RED}[FAIL]${NC} %-20s not ready after %ds\n" "$name" "$timeout_secs"
|
||||
echo " Check: docker compose -p $COMPOSE_PROJECT logs ${name,,}"
|
||||
return 1
|
||||
}
|
||||
|
||||
verify_health() {
|
||||
@@ -954,33 +984,23 @@ verify_health() {
|
||||
log_info "Verifying installation..."
|
||||
local failed=0
|
||||
|
||||
check_service_health "PostgreSQL" \
|
||||
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T postgres pg_isready -U cameleer" \
|
||||
120 || failed=1
|
||||
wait_for_docker_healthy "PostgreSQL" "postgres" 120 || failed=1
|
||||
|
||||
[ $failed -eq 0 ] && check_service_health "ClickHouse" \
|
||||
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T clickhouse clickhouse-client --password '$CLICKHOUSE_PASSWORD' --query 'SELECT 1'" \
|
||||
120 || failed=1
|
||||
[ $failed -eq 0 ] && \
|
||||
wait_for_docker_healthy "ClickHouse" "clickhouse" 120 || failed=1
|
||||
|
||||
[ $failed -eq 0 ] && check_service_health "Logto" \
|
||||
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T logto node -e \"require('http').get('http://localhost:3001/oidc/.well-known/openid-configuration', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))\"" \
|
||||
120 || failed=1
|
||||
[ $failed -eq 0 ] && \
|
||||
wait_for_docker_healthy "Logto + Bootstrap" "logto" 300 || failed=1
|
||||
|
||||
[ $failed -eq 0 ] && check_service_health "Bootstrap" \
|
||||
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T logto test -f /data/logto-bootstrap.json" \
|
||||
120 || failed=1
|
||||
[ $failed -eq 0 ] && \
|
||||
check_endpoint "Cameleer SaaS" "https://localhost:${HTTPS_PORT}/platform/api/config" 120 || failed=1
|
||||
|
||||
[ $failed -eq 0 ] && check_service_health "Cameleer SaaS" \
|
||||
"curl -sfk https://localhost:${HTTPS_PORT}/platform/api/config" \
|
||||
120 || failed=1
|
||||
|
||||
[ $failed -eq 0 ] && check_service_health "Traefik routing" \
|
||||
"curl -sfk -o /dev/null https://localhost:${HTTPS_PORT}/" \
|
||||
120 || failed=1
|
||||
[ $failed -eq 0 ] && \
|
||||
check_endpoint "Traefik routing" "https://localhost:${HTTPS_PORT}/" 30 || failed=1
|
||||
|
||||
echo ""
|
||||
if [ $failed -ne 0 ]; then
|
||||
log_error "Installation verification failed. Stack is running <EFBFBD> check logs."
|
||||
log_error "Installation verification failed. Stack is running — check logs."
|
||||
exit 1
|
||||
fi
|
||||
log_success "All services healthy."
|
||||
|
||||
Reference in New Issue
Block a user