warpgate/tests/08-crash-recovery/test-sigkill-dirty-recovery.sh

#!/usr/bin/env bash
# Test: dirty files survive SIGKILL and are re-uploaded on restart
#
# Verifies that after a simulated power loss (SIGKILL all warpgate processes),
# dirty files that were pending write-back persist in the rclone VFS cache on
# disk.  When warpgate is restarted, rclone re-reads the VFS cache directory,
# discovers the pending uploads, and flushes them to the remote NAS.
#
# Sequence:
#   1. Start warpgate with a long write-back delay (60s) so writes stay dirty.
#   2. Block the network so write-back cannot happen even accidentally.
#   3. Write a file through the FUSE mount.
#   4. Verify the file is counted as dirty.
#   5. simulate_power_loss (kill -9 everything + sync).
#   6. Restore the network.
#   7. Start a fresh warpgate instance.
#   8. Wait for dirty count to reach zero (file re-uploaded).
#   9. Verify the file exists on the NAS with correct content.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/../harness/helpers.sh"
source "$SCRIPT_DIR/../harness/mock-nas.sh"

require_root
setup_test_env
trap teardown_test_env EXIT

# Start the mock NAS
start_mock_nas

# Generate config with a very long write-back delay so files stay dirty
gen_config write_back=60s

# Start warpgate and wait for full readiness
start_warpgate
wait_for_mount
wait_for_rc_api

# Block the network to prevent any write-back from occurring
inject_network_down

# Write a file through the FUSE mount — it will be cached locally
echo "crash-recovery-data" > "$TEST_MOUNT/crash.txt"

# Allow time for VFS to register the dirty file
sleep 2

# Verify the file is counted as dirty (pending upload)
dirty=$(get_dirty_count)
if [[ "$dirty" -lt 1 ]]; then
    echo "FAIL: expected dirty count > 0 before power loss, got $dirty" >&2
    inject_network_up
    exit 1
fi
echo "INFO: dirty count before power loss: $dirty"

# Simulate power loss — SIGKILL all warpgate processes + sync
simulate_power_loss

# Verify the cache file persists on disk after the crash
if [[ ! -f "$CACHE_DIR/vfs/nas/crash.txt" ]]; then
    echo "FAIL: cache file missing after power loss: $CACHE_DIR/vfs/nas/crash.txt" >&2
    inject_network_up
    exit 1
fi
echo "INFO: cache file persists on disk after SIGKILL"

# Restore the network so write-back can proceed on restart
inject_network_up
sleep 2

# Clean up any stale FUSE mount left behind
if mountpoint -q "$TEST_MOUNT" 2>/dev/null; then
    fusermount3 -uz "$TEST_MOUNT" 2>/dev/null || fusermount -uz "$TEST_MOUNT" 2>/dev/null || true
fi

# Start a fresh warpgate instance
start_warpgate
wait_for_mount 60
wait_for_rc_api 30

# Wait for the dirty file to be re-uploaded (rclone finds it in cache)
wait_for_dirty_zero 120

# Verify the file now exists on the NAS
if ! nas_file_exists "crash.txt"; then
    echo "FAIL: crash.txt not found on NAS after recovery" >&2
    exit 1
fi

# Verify the content matches what we originally wrote
actual=$(nas_read_file "crash.txt")
if [[ "$actual" != "crash-recovery-data" ]]; then
    echo "FAIL: NAS file content mismatch after recovery" >&2
    echo "  expected: crash-recovery-data" >&2
    echo "  actual:   $actual" >&2
    exit 1
fi

echo "PASS: $(basename "$0" .sh)"