warpgate/tests/08-crash-recovery/test-sigkill-dirty-recovery.sh
grabbit a2d49137f9 Add comprehensive test suite: 63 integration tests + 110 Rust unit tests
Integration tests (tests/):
- 9 categories covering config, lifecycle, signals, supervision,
  cache, writeback, network faults, crash recovery, and CLI
- Shell-based harness with mock NAS (network namespace + SFTP),
  fault injection (tc netem), and power loss simulation
- TAP format runner (run-all.sh) with proper SKIP detection

Rust unit tests (warpgate/src/):
- 110 tests across 14 modules, all passing in 0.01s
- Config parsing, defaults validation, RestartTracker logic,
  RC API response parsing, rclone arg generation, service
  config generation, CLI output formatting, warmup path logic

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 11:21:35 +08:00

101 lines
3.1 KiB
Bash
Executable File

#!/usr/bin/env bash
# Test: dirty files survive SIGKILL and are re-uploaded on restart
#
# Verifies that after a simulated power loss (SIGKILL all warpgate processes),
# dirty files that were pending write-back persist in the rclone VFS cache on
# disk. When warpgate is restarted, rclone re-reads the VFS cache directory,
# discovers the pending uploads, and flushes them to the remote NAS.
#
# Sequence:
# 1. Start warpgate with a long write-back delay (60s) so writes stay dirty.
# 2. Block the network so write-back cannot happen even accidentally.
# 3. Write a file through the FUSE mount.
# 4. Verify the file is counted as dirty.
# 5. simulate_power_loss (kill -9 everything + sync).
# 6. Restore the network.
# 7. Start a fresh warpgate instance.
# 8. Wait for dirty count to reach zero (file re-uploaded).
# 9. Verify the file exists on the NAS with correct content.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/../harness/helpers.sh"
source "$SCRIPT_DIR/../harness/mock-nas.sh"
require_root
setup_test_env
trap teardown_test_env EXIT
# Start the mock NAS
start_mock_nas
# Generate config with a very long write-back delay so files stay dirty
gen_config write_back=60s
# Start warpgate and wait for full readiness
start_warpgate
wait_for_mount
wait_for_rc_api
# Block the network to prevent any write-back from occurring
inject_network_down
# Write a file through the FUSE mount — it will be cached locally
echo "crash-recovery-data" > "$TEST_MOUNT/crash.txt"
# Allow time for VFS to register the dirty file
sleep 2
# Verify the file is counted as dirty (pending upload)
dirty=$(get_dirty_count)
if [[ "$dirty" -lt 1 ]]; then
echo "FAIL: expected dirty count > 0 before power loss, got $dirty" >&2
inject_network_up
exit 1
fi
echo "INFO: dirty count before power loss: $dirty"
# Simulate power loss — SIGKILL all warpgate processes + sync
simulate_power_loss
# Verify the cache file persists on disk after the crash
if [[ ! -f "$CACHE_DIR/vfs/nas/crash.txt" ]]; then
echo "FAIL: cache file missing after power loss: $CACHE_DIR/vfs/nas/crash.txt" >&2
inject_network_up
exit 1
fi
echo "INFO: cache file persists on disk after SIGKILL"
# Restore the network so write-back can proceed on restart
inject_network_up
sleep 2
# Clean up any stale FUSE mount left behind
if mountpoint -q "$TEST_MOUNT" 2>/dev/null; then
fusermount3 -uz "$TEST_MOUNT" 2>/dev/null || fusermount -uz "$TEST_MOUNT" 2>/dev/null || true
fi
# Start a fresh warpgate instance
start_warpgate
wait_for_mount 60
wait_for_rc_api 30
# Wait for the dirty file to be re-uploaded (rclone finds it in cache)
wait_for_dirty_zero 120
# Verify the file now exists on the NAS
if ! nas_file_exists "crash.txt"; then
echo "FAIL: crash.txt not found on NAS after recovery" >&2
exit 1
fi
# Verify the content matches what we originally wrote
actual=$(nas_read_file "crash.txt")
if [[ "$actual" != "crash-recovery-data" ]]; then
echo "FAIL: NAS file content mismatch after recovery" >&2
echo " expected: crash-recovery-data" >&2
echo " actual: $actual" >&2
exit 1
fi
echo "PASS: $(basename "$0" .sh)"