fix(installer): use Docker health status instead of exec for verification
Replace docker compose exec commands with Docker's built-in health status checks. The exec-based ClickHouse check hung because clickhouse-client waits for the server during initialization. Docker's healthcheck status is already configured in compose and is more reliable. Logto + Bootstrap merged into one check since the healthcheck includes the bootstrap.json file test. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -929,24 +929,54 @@ docker_compose_down() {
|
|||||||
|
|
||||||
# --- Health verification ---
|
# --- Health verification ---
|
||||||
|
|
||||||
check_service_health() {
|
wait_for_docker_healthy() {
|
||||||
local name="$1" check_cmd="$2" timeout_secs="${3:-120}"
|
local name="$1" service="$2" timeout_secs="${3:-300}"
|
||||||
local elapsed=0
|
|
||||||
local start_time=$(date +%s)
|
local start_time=$(date +%s)
|
||||||
|
|
||||||
while [ $elapsed -lt $timeout_secs ]; do
|
while true; do
|
||||||
if eval "$check_cmd" >/dev/null 2>&1; then
|
local elapsed=$(( $(date +%s) - start_time ))
|
||||||
local duration=$(($(date +%s) - start_time))
|
if [ $elapsed -ge $timeout_secs ]; then
|
||||||
|
printf " ${RED}[FAIL]${NC} %-20s not healthy after %ds\n" "$name" "$timeout_secs"
|
||||||
|
echo " Check: docker compose -p $COMPOSE_PROJECT logs $service"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
local health
|
||||||
|
health=$(cd "$INSTALL_DIR" && docker compose -p "$COMPOSE_PROJECT" ps "$service" --format '{{.Health}}' 2>/dev/null || echo "unknown")
|
||||||
|
case "$health" in
|
||||||
|
healthy)
|
||||||
|
local duration=$(( $(date +%s) - start_time ))
|
||||||
|
printf " ${GREEN}[ok]${NC} %-20s ready (%ds)\n" "$name" "$duration"
|
||||||
|
return 0
|
||||||
|
;;
|
||||||
|
unhealthy)
|
||||||
|
printf " ${RED}[FAIL]${NC} %-20s unhealthy\n" "$name"
|
||||||
|
echo " Check: docker compose -p $COMPOSE_PROJECT logs $service"
|
||||||
|
return 1
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
sleep 3
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
check_endpoint() {
|
||||||
|
local name="$1" url="$2" timeout_secs="${3:-120}"
|
||||||
|
local start_time=$(date +%s)
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
local elapsed=$(( $(date +%s) - start_time ))
|
||||||
|
if [ $elapsed -ge $timeout_secs ]; then
|
||||||
|
printf " ${RED}[FAIL]${NC} %-20s not reachable after %ds\n" "$name" "$timeout_secs"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if curl -sfk -o /dev/null "$url" 2>/dev/null; then
|
||||||
|
local duration=$(( $(date +%s) - start_time ))
|
||||||
printf " ${GREEN}[ok]${NC} %-20s ready (%ds)\n" "$name" "$duration"
|
printf " ${GREEN}[ok]${NC} %-20s ready (%ds)\n" "$name" "$duration"
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
sleep 5
|
sleep 3
|
||||||
elapsed=$(( $(date +%s) - start_time ))
|
|
||||||
done
|
done
|
||||||
|
|
||||||
printf " ${RED}[FAIL]${NC} %-20s not ready after %ds\n" "$name" "$timeout_secs"
|
|
||||||
echo " Check: docker compose -p $COMPOSE_PROJECT logs ${name,,}"
|
|
||||||
return 1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
verify_health() {
|
verify_health() {
|
||||||
@@ -954,33 +984,23 @@ verify_health() {
|
|||||||
log_info "Verifying installation..."
|
log_info "Verifying installation..."
|
||||||
local failed=0
|
local failed=0
|
||||||
|
|
||||||
check_service_health "PostgreSQL" \
|
wait_for_docker_healthy "PostgreSQL" "postgres" 120 || failed=1
|
||||||
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T postgres pg_isready -U cameleer" \
|
|
||||||
120 || failed=1
|
|
||||||
|
|
||||||
[ $failed -eq 0 ] && check_service_health "ClickHouse" \
|
[ $failed -eq 0 ] && \
|
||||||
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T clickhouse clickhouse-client --password '$CLICKHOUSE_PASSWORD' --query 'SELECT 1'" \
|
wait_for_docker_healthy "ClickHouse" "clickhouse" 120 || failed=1
|
||||||
120 || failed=1
|
|
||||||
|
|
||||||
[ $failed -eq 0 ] && check_service_health "Logto" \
|
[ $failed -eq 0 ] && \
|
||||||
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T logto node -e \"require('http').get('http://localhost:3001/oidc/.well-known/openid-configuration', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))\"" \
|
wait_for_docker_healthy "Logto + Bootstrap" "logto" 300 || failed=1
|
||||||
120 || failed=1
|
|
||||||
|
|
||||||
[ $failed -eq 0 ] && check_service_health "Bootstrap" \
|
[ $failed -eq 0 ] && \
|
||||||
"cd '$INSTALL_DIR' && docker compose -p '$COMPOSE_PROJECT' exec -T logto test -f /data/logto-bootstrap.json" \
|
check_endpoint "Cameleer SaaS" "https://localhost:${HTTPS_PORT}/platform/api/config" 120 || failed=1
|
||||||
120 || failed=1
|
|
||||||
|
|
||||||
[ $failed -eq 0 ] && check_service_health "Cameleer SaaS" \
|
[ $failed -eq 0 ] && \
|
||||||
"curl -sfk https://localhost:${HTTPS_PORT}/platform/api/config" \
|
check_endpoint "Traefik routing" "https://localhost:${HTTPS_PORT}/" 30 || failed=1
|
||||||
120 || failed=1
|
|
||||||
|
|
||||||
[ $failed -eq 0 ] && check_service_health "Traefik routing" \
|
|
||||||
"curl -sfk -o /dev/null https://localhost:${HTTPS_PORT}/" \
|
|
||||||
120 || failed=1
|
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
if [ $failed -ne 0 ]; then
|
if [ $failed -ne 0 ]; then
|
||||||
log_error "Installation verification failed. Stack is running <EFBFBD> check logs."
|
log_error "Installation verification failed. Stack is running — check logs."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
log_success "All services healthy."
|
log_success "All services healthy."
|
||||||
|
|||||||
Reference in New Issue
Block a user