#!/usr/bin/env bash
# Test: stale FUSE mount left after rclone SIGKILL
#
# Verifies behavior when rclone is killed with SIGKILL, leaving a stale
# FUSE mount point registered in /proc/mounts.  The supervisor should
# detect rclone death and shut down.  On a subsequent start, warpgate
# (or rclone) should either clean up the stale mount via fusermount -uz
# or fail with a clear error message.
#
# Sequence:
#   1. Start warpgate, wait for mount + RC API.
#   2. Kill rclone with SIGKILL (leaves stale FUSE mount).
#   3. Wait for supervisor to detect rclone death and shut down.
#   4. Check if mount point is still registered in /proc/mounts.
#   5. Attempt to start warpgate again.
#   6. Document whether it recovers or fails with a clear error.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/../harness/helpers.sh"
source "$SCRIPT_DIR/../harness/mock-nas.sh"

require_root
setup_test_env
trap teardown_test_env EXIT

# Start the mock NAS
start_mock_nas

# Generate a default config
gen_config

# Start warpgate and wait for full readiness
start_warpgate
wait_for_mount
wait_for_rc_api

# Find the rclone mount process
rclone_pid=$(pgrep -f "rclone mount.*$TEST_MOUNT")
if [[ -z "$rclone_pid" ]]; then
    echo "FAIL: rclone mount process not found" >&2
    exit 1
fi
echo "INFO: rclone mount PID is $rclone_pid"

# Kill rclone with SIGKILL — this leaves the FUSE mount stale
kill -9 "$rclone_pid"

# Wait for the supervisor to detect rclone death and shut down warpgate
wait_for_exit "$WARPGATE_PID" 30
echo "INFO: warpgate exited after rclone SIGKILL"

# Clear WARPGATE_PID since the process is dead
first_pid="$WARPGATE_PID"
WARPGATE_PID=""

# Check if mount point is still registered in /proc/mounts
stale_mount=0
if grep -q "$TEST_MOUNT" /proc/mounts 2>/dev/null; then
    stale_mount=1
    echo "INFO: stale FUSE mount detected in /proc/mounts"
else
    echo "INFO: mount point already cleaned up (not in /proc/mounts)"
fi

# Do NOT manually run fusermount — we want to see if warpgate handles this

# Attempt to start a new warpgate instance
echo "INFO: attempting to start warpgate with potential stale mount..."
start_warpgate

# Give it time to either succeed or fail
sleep 5

if kill -0 "$WARPGATE_PID" 2>/dev/null; then
    # Process is still running — check if it mounted successfully
    if wait_for_mount 30 2>/dev/null; then
        echo "INFO: warpgate recovered — stale mount was cleaned up automatically"
        echo "INFO: mount is active and working"

        # Verify the mount is functional by testing file access
        if ls "$TEST_MOUNT" > /dev/null 2>&1; then
            echo "INFO: mount is functional (ls succeeds)"
        fi
    else
        echo "INFO: warpgate is running but mount did not become ready"
        echo "INFO: checking logs for details..."
        # Check logs for error messages about the stale mount
        if grep -q "fusermount" "$TEST_DIR/warpgate.log" 2>/dev/null; then
            echo "INFO: log mentions fusermount cleanup attempt"
        fi
        if grep -q "mount.*busy\|already mounted\|Transport endpoint" "$TEST_DIR/warpgate.log" 2>/dev/null; then
            echo "INFO: log shows stale mount interference"
        fi
    fi
else
    # Process exited — check why
    wait "$WARPGATE_PID" 2>/dev/null
    exit_code=$?
    WARPGATE_PID=""
    echo "INFO: warpgate exited with code $exit_code"

    if grep -q "fusermount" "$TEST_DIR/warpgate.log" 2>/dev/null; then
        echo "INFO: log mentions fusermount (attempted cleanup)"
    fi
    if grep -q "already mounted\|Transport endpoint\|mount point.*busy" "$TEST_DIR/warpgate.log" 2>/dev/null; then
        echo "INFO: warpgate detected stale mount and reported clear error"
    fi
fi

# Final cleanup: ensure we remove any lingering stale mount
if grep -q "$TEST_MOUNT" /proc/mounts 2>/dev/null; then
    echo "INFO: cleaning up residual stale mount via fusermount -uz"
    fusermount3 -uz "$TEST_MOUNT" 2>/dev/null || fusermount -uz "$TEST_MOUNT" 2>/dev/null || true
fi

echo "PASS: $(basename "$0" .sh)"