warpgate/tests/08-crash-recovery/test-oom-kill-rclone.sh
grabbit a2d49137f9 Add comprehensive test suite: 63 integration tests + 110 Rust unit tests
Integration tests (tests/):
- 9 categories covering config, lifecycle, signals, supervision,
  cache, writeback, network faults, crash recovery, and CLI
- Shell-based harness with mock NAS (network namespace + SFTP),
  fault injection (tc netem), and power loss simulation
- TAP format runner (run-all.sh) with proper SKIP detection

Rust unit tests (warpgate/src/):
- 110 tests across 14 modules, all passing in 0.01s
- Config parsing, defaults validation, RestartTracker logic,
  RC API response parsing, rclone arg generation, service
  config generation, CLI output formatting, warmup path logic

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 11:21:35 +08:00

77 lines
2.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# Test: SIGKILL to rclone triggers full warpgate shutdown
#
# Simulates an OOM kill of the rclone mount process. The supervisor should
# detect rclone's unexpected exit, log "rclone mount exited unexpectedly",
# and perform a full shutdown of warpgate (non-zero exit code).
#
# This is distinct from 04-supervision/test-rclone-death-shutdown.sh which
# uses a normal kill; here we use SIGKILL to simulate the kernel OOM killer.
#
# Sequence:
# 1. Start warpgate, wait for supervision to become active.
# 2. Find the rclone mount PID.
# 3. Send SIGKILL to rclone (simulating OOM kill).
# 4. Verify warpgate exits within 30s.
# 5. Verify the log contains "rclone mount exited unexpectedly".
# 6. Verify warpgate exited with non-zero code.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/../harness/helpers.sh"
source "$SCRIPT_DIR/../harness/mock-nas.sh"
require_root
setup_test_env
trap teardown_test_env EXIT
# Start the mock NAS
start_mock_nas
# Generate a default config
gen_config
# Start warpgate and wait for the supervisor to be active
start_warpgate
wait_for_log_line "Supervision active" 60
# Find the rclone mount process
rclone_pid=$(pgrep -f "rclone mount.*$TEST_MOUNT")
if [[ -z "$rclone_pid" ]]; then
echo "FAIL: rclone mount process not found" >&2
exit 1
fi
echo "INFO: rclone mount PID is $rclone_pid"
# Save warpgate PID before it exits
local_pid="$WARPGATE_PID"
# SIGKILL rclone — simulates OOM killer
kill -9 "$rclone_pid"
# Wait for warpgate to exit (supervisor should detect rclone death)
wait_for_exit "$local_pid" 30
# Verify the log contains the expected critical error message
assert_log_contains "rclone mount exited unexpectedly"
echo "INFO: log confirms rclone death was detected"
# Verify warpgate exited with non-zero code
exit_code=0
wait "$local_pid" 2>/dev/null || exit_code=$?
if [[ "$exit_code" -eq 0 ]]; then
echo "FAIL: expected non-zero exit code after rclone SIGKILL, got 0" >&2
exit 1
fi
echo "INFO: warpgate exited with code $exit_code (non-zero, as expected)"
# Verify warpgate is no longer running
if kill -0 "$local_pid" 2>/dev/null; then
echo "FAIL: warpgate is still running after rclone SIGKILL" >&2
exit 1
fi
# Clear WARPGATE_PID so teardown does not try to stop a dead process
WARPGATE_PID=""
echo "PASS: $(basename "$0" .sh)"