#!/usr/bin/env bash # Test: large write interrupted by power loss — recovery behavior # # Verifies what happens when a large file write (5 MB) is interrupted # mid-way by a simulated power loss (SIGKILL). After restarting warpgate, # documents whether the file is partially present, fully recovered, or # missing on the NAS. # # With rclone VFS write-back=2s, the VFS may have begun uploading or may # have the file cached locally waiting for write-back. After power loss, # the partial/complete file should persist in the VFS cache and be # re-uploaded on restart. # # Sequence: # 1. Start warpgate with write_back=2s. # 2. Begin writing a 5 MB file in the background. # 3. Sleep 1s (let the write start but possibly not complete). # 4. simulate_power_loss. # 5. Start a fresh warpgate instance. # 6. Wait for dirty count to reach zero. # 7. Document what happened to the file on the NAS. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" source "$SCRIPT_DIR/../harness/helpers.sh" source "$SCRIPT_DIR/../harness/mock-nas.sh" require_root setup_test_env trap teardown_test_env EXIT # Start the mock NAS start_mock_nas # Generate config with a short write-back delay gen_config write_back=2s # Start warpgate and wait for readiness start_warpgate wait_for_mount wait_for_rc_api # Start writing a 5 MB file in the background dd if=/dev/urandom of="$TEST_MOUNT/bigwrite.dat" bs=1M count=5 2>/dev/null & dd_pid=$! _BG_PIDS+=("$dd_pid") echo "INFO: started 5 MB write in background (PID $dd_pid)" # Let the write proceed for a moment sleep 1 # Check if there's anything in the cache yet cache_file="$CACHE_DIR/vfs/nas/bigwrite.dat" if [[ -f "$cache_file" ]]; then cache_size=$(stat -c%s "$cache_file" 2>/dev/null || stat -f%z "$cache_file" 2>/dev/null || echo 0) echo "INFO: cache file exists before power loss, size: $cache_size bytes" else echo "INFO: cache file not yet created at time of power loss" fi # Simulate power loss — kills everything including the dd simulate_power_loss # The dd process is now dead too # Check what survived in the cache if [[ -f "$cache_file" ]]; then cache_size_after=$(stat -c%s "$cache_file" 2>/dev/null || stat -f%z "$cache_file" 2>/dev/null || echo 0) echo "INFO: cache file persists after power loss, size: $cache_size_after bytes" else echo "INFO: no cache file found after power loss" fi # Clean up any stale FUSE mount if mountpoint -q "$TEST_MOUNT" 2>/dev/null; then fusermount3 -uz "$TEST_MOUNT" 2>/dev/null || fusermount -uz "$TEST_MOUNT" 2>/dev/null || true fi # Start a fresh warpgate instance start_warpgate wait_for_mount 60 wait_for_rc_api 30 # Wait for any dirty files to be flushed wait_for_dirty_zero 120 # Document what happened to the file echo "INFO: --- Recovery results ---" if nas_file_exists "bigwrite.dat"; then nas_size=$(stat -c%s "$NAS_ROOT/bigwrite.dat" 2>/dev/null || stat -f%z "$NAS_ROOT/bigwrite.dat" 2>/dev/null || echo 0) expected_size=$((5 * 1024 * 1024)) echo "INFO: bigwrite.dat exists on NAS, size: $nas_size bytes" if [[ "$nas_size" -eq "$expected_size" ]]; then echo "INFO: file is complete (5 MB) — write finished before power loss" elif [[ "$nas_size" -gt 0 ]]; then echo "INFO: file is partial ($nas_size / $expected_size bytes)" echo "INFO: this is expected — write was interrupted mid-stream" fi else echo "INFO: bigwrite.dat NOT found on NAS" echo "INFO: the write may not have committed to cache before power loss" fi # Also check if the file is visible through the mount if [[ -f "$TEST_MOUNT/bigwrite.dat" ]]; then mount_size=$(stat -c%s "$TEST_MOUNT/bigwrite.dat" 2>/dev/null || stat -f%z "$TEST_MOUNT/bigwrite.dat" 2>/dev/null || echo 0) echo "INFO: bigwrite.dat visible through mount, size: $mount_size bytes" else echo "INFO: bigwrite.dat not visible through mount" fi # Stop the current warpgate instance before the optional btrfs test stop_warpgate # --- Optional btrfs test path --- # If WARPGATE_TEST_BTRFS is set to a block device, run the same test on a # btrfs-formatted cache filesystem and compare results vs ext4 above. if [[ -n "${WARPGATE_TEST_BTRFS:-}" ]]; then require_command mkfs.btrfs echo "INFO: --- btrfs test path (device: $WARPGATE_TEST_BTRFS) ---" # Format the device as btrfs mkfs.btrfs -f "$WARPGATE_TEST_BTRFS" > /dev/null 2>&1 # Create a btrfs mount point and mount btrfs_cache="$TEST_DIR/btrfs-cache" mkdir -p "$btrfs_cache" mount "$WARPGATE_TEST_BTRFS" "$btrfs_cache" # Re-generate config with the btrfs cache dir gen_config write_back=2s cache_dir="$btrfs_cache" # Start warpgate on btrfs cache start_warpgate wait_for_mount 60 wait_for_rc_api 30 # Write a 5 MB file in the background dd if=/dev/urandom of="$TEST_MOUNT/bigwrite-btrfs.dat" bs=1M count=5 2>/dev/null & btrfs_dd_pid=$! _BG_PIDS+=("$btrfs_dd_pid") sleep 1 # Simulate power loss simulate_power_loss # Check what survived in the btrfs cache btrfs_cache_file="$btrfs_cache/vfs/nas/bigwrite-btrfs.dat" if [[ -f "$btrfs_cache_file" ]]; then btrfs_size=$(stat -c%s "$btrfs_cache_file" 2>/dev/null || stat -f%z "$btrfs_cache_file" 2>/dev/null || echo 0) echo "INFO: btrfs cache file persists after power loss, size: $btrfs_size bytes" else echo "INFO: no btrfs cache file found after power loss" fi # Clean up stale FUSE mount if mountpoint -q "$TEST_MOUNT" 2>/dev/null; then fusermount3 -uz "$TEST_MOUNT" 2>/dev/null || fusermount -uz "$TEST_MOUNT" 2>/dev/null || true fi # Restart warpgate on btrfs cache and wait for recovery start_warpgate wait_for_mount 60 wait_for_rc_api 30 wait_for_dirty_zero 120 # Document btrfs recovery result if nas_file_exists "bigwrite-btrfs.dat"; then btrfs_nas_size=$(stat -c%s "$NAS_ROOT/bigwrite-btrfs.dat" 2>/dev/null || stat -f%z "$NAS_ROOT/bigwrite-btrfs.dat" 2>/dev/null || echo 0) echo "INFO: btrfs recovery: bigwrite-btrfs.dat on NAS, size: $btrfs_nas_size bytes" else echo "INFO: btrfs recovery: bigwrite-btrfs.dat NOT found on NAS" fi stop_warpgate # Unmount btrfs umount "$btrfs_cache" 2>/dev/null || true else echo "INFO: skipping btrfs test (set WARPGATE_TEST_BTRFS=/dev/sdX to enable)" fi echo "PASS: $(basename "$0" .sh)"