warpgate/tests/04-supervision/test-max-restarts.sh

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/../harness/helpers.sh"
source "$SCRIPT_DIR/../harness/mock-nas.sh"

require_root
setup_test_env
trap teardown_test_env EXIT

# Start the mock NAS so rclone can connect via SFTP
start_mock_nas

# Generate a default config pointing at the mock NAS
gen_config

# Start warpgate and wait for full startup
start_warpgate
wait_for_log_line "Supervision active" 60

# Kill smbd 4 times in rapid succession.
# MAX_RESTARTS = 3, so after the 3rd restart attempt + 4th kill,
# the supervisor should give up.
for i in 1 2 3 4; do
    smbd_pid=$(pgrep -f "smbd.*--foreground" || true)
    if [[ -z "$smbd_pid" ]]; then
        # smbd may already be gone after exceeding max restarts
        break
    fi

    kill "$smbd_pid"

    if [[ "$i" -lt 4 ]]; then
        # Wait for restart between kills (backoff: 2s, 4s, 6s + margin)
        local_wait=$((i * 2 + 2))
        sleep "$local_wait"
    fi
done

# Wait for the supervisor to log the "exceeded max restarts" message
wait_for_log_line "exceeded max restarts" 30

# Verify the full give-up message
assert_log_contains "smbd exceeded max restarts (3), giving up"

# Verify warpgate itself is still running (smbd failure does not cause
# full shutdown -- only rclone mount death does that)
if ! kill -0 "$WARPGATE_PID" 2>/dev/null; then
    echo "FAIL: warpgate exited unexpectedly after smbd max restarts" >&2
    exit 1
fi

echo "PASS: $(basename "$0" .sh)"