#!/usr/bin/env bash # Test: SIGKILL to rclone triggers full warpgate shutdown # # Simulates an OOM kill of the rclone mount process. The supervisor should # detect rclone's unexpected exit, log "rclone mount exited unexpectedly", # and perform a full shutdown of warpgate (non-zero exit code). # # This is distinct from 04-supervision/test-rclone-death-shutdown.sh which # uses a normal kill; here we use SIGKILL to simulate the kernel OOM killer. # # Sequence: # 1. Start warpgate, wait for supervision to become active. # 2. Find the rclone mount PID. # 3. Send SIGKILL to rclone (simulating OOM kill). # 4. Verify warpgate exits within 30s. # 5. Verify the log contains "rclone mount exited unexpectedly". # 6. Verify warpgate exited with non-zero code. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" source "$SCRIPT_DIR/../harness/helpers.sh" source "$SCRIPT_DIR/../harness/mock-nas.sh" require_root setup_test_env trap teardown_test_env EXIT # Start the mock NAS start_mock_nas # Generate a default config gen_config # Start warpgate and wait for the supervisor to be active start_warpgate wait_for_log_line "Supervision active" 60 # Find the rclone mount process rclone_pid=$(pgrep -f "rclone mount.*$TEST_MOUNT") if [[ -z "$rclone_pid" ]]; then echo "FAIL: rclone mount process not found" >&2 exit 1 fi echo "INFO: rclone mount PID is $rclone_pid" # Save warpgate PID before it exits local_pid="$WARPGATE_PID" # SIGKILL rclone — simulates OOM killer kill -9 "$rclone_pid" # Wait for warpgate to exit (supervisor should detect rclone death) wait_for_exit "$local_pid" 30 # Verify the log contains the expected critical error message assert_log_contains "rclone mount exited unexpectedly" echo "INFO: log confirms rclone death was detected" # Verify warpgate exited with non-zero code exit_code=0 wait "$local_pid" 2>/dev/null || exit_code=$? if [[ "$exit_code" -eq 0 ]]; then echo "FAIL: expected non-zero exit code after rclone SIGKILL, got 0" >&2 exit 1 fi echo "INFO: warpgate exited with code $exit_code (non-zero, as expected)" # Verify warpgate is no longer running if kill -0 "$local_pid" 2>/dev/null; then echo "FAIL: warpgate is still running after rclone SIGKILL" >&2 exit 1 fi # Clear WARPGATE_PID so teardown does not try to stop a dead process WARPGATE_PID="" echo "PASS: $(basename "$0" .sh)"