Harden supervisor shutdown: process group isolation, write-back drain
- Spawn all children (rclone, smbd, webdav) in isolated process groups so Ctrl+C doesn't reach them directly — supervisor controls shutdown order - Wait for rclone VFS write-back queue to drain before unmounting (5min cap) - Prefer fusermount3 over fusermount, skip if already unmounted Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
960ddd20ce
commit
e6c48c9bd9
@ -4,6 +4,7 @@
|
|||||||
//! coordinated startup and shutdown. Designed to run as a systemd unit
|
//! coordinated startup and shutdown. Designed to run as a systemd unit
|
||||||
//! or standalone (Docker-friendly).
|
//! or standalone (Docker-friendly).
|
||||||
|
|
||||||
|
use std::os::unix::process::CommandExt;
|
||||||
use std::process::{Child, Command};
|
use std::process::{Child, Command};
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@ -26,6 +27,10 @@ const SIGTERM_GRACE: Duration = Duration::from_secs(3);
|
|||||||
const MAX_RESTARTS: u32 = 3;
|
const MAX_RESTARTS: u32 = 3;
|
||||||
/// Reset restart counter after this period of stable running.
|
/// Reset restart counter after this period of stable running.
|
||||||
const RESTART_STABLE_PERIOD: Duration = Duration::from_secs(300);
|
const RESTART_STABLE_PERIOD: Duration = Duration::from_secs(300);
|
||||||
|
/// Max time to wait for write-back queue to drain on shutdown.
|
||||||
|
const WRITEBACK_DRAIN_TIMEOUT: Duration = Duration::from_secs(300);
|
||||||
|
/// Poll interval when waiting for write-back drain.
|
||||||
|
const WRITEBACK_POLL_INTERVAL: Duration = Duration::from_secs(2);
|
||||||
|
|
||||||
/// Tracks restart attempts for a supervised child process.
|
/// Tracks restart attempts for a supervised child process.
|
||||||
struct RestartTracker {
|
struct RestartTracker {
|
||||||
@ -170,6 +175,7 @@ fn start_and_wait_mount(config: &Config, shutdown: &AtomicBool) -> Result<Child>
|
|||||||
|
|
||||||
let mut child = Command::new("rclone")
|
let mut child = Command::new("rclone")
|
||||||
.args(&args)
|
.args(&args)
|
||||||
|
.process_group(0) // isolate from terminal SIGINT
|
||||||
.spawn()
|
.spawn()
|
||||||
.context("Failed to spawn rclone mount")?;
|
.context("Failed to spawn rclone mount")?;
|
||||||
|
|
||||||
@ -221,6 +227,7 @@ fn spawn_smbd() -> Result<Child> {
|
|||||||
Command::new("smbd")
|
Command::new("smbd")
|
||||||
.args(["--foreground", "--debug-stdout", "--no-process-group",
|
.args(["--foreground", "--debug-stdout", "--no-process-group",
|
||||||
"--configfile", samba::SMB_CONF_PATH])
|
"--configfile", samba::SMB_CONF_PATH])
|
||||||
|
.process_group(0)
|
||||||
.spawn()
|
.spawn()
|
||||||
.context("Failed to spawn smbd")
|
.context("Failed to spawn smbd")
|
||||||
}
|
}
|
||||||
@ -266,6 +273,7 @@ fn spawn_webdav(config: &Config) -> Result<Child> {
|
|||||||
let args = webdav::build_serve_args(config);
|
let args = webdav::build_serve_args(config);
|
||||||
Command::new("rclone")
|
Command::new("rclone")
|
||||||
.args(&args)
|
.args(&args)
|
||||||
|
.process_group(0)
|
||||||
.spawn()
|
.spawn()
|
||||||
.context("Failed to spawn rclone serve webdav")
|
.context("Failed to spawn rclone serve webdav")
|
||||||
}
|
}
|
||||||
@ -401,6 +409,55 @@ fn graceful_kill(child: &mut Child) {
|
|||||||
let _ = child.wait();
|
let _ = child.wait();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Wait for rclone VFS write-back queue to drain.
|
||||||
|
///
|
||||||
|
/// Polls `vfs/stats` every 2s. Exits when uploads_in_progress + uploads_queued
|
||||||
|
/// reaches 0, or after 5 minutes (safety cap to avoid hanging forever).
|
||||||
|
fn wait_writeback_drain() {
|
||||||
|
use crate::rclone::rc;
|
||||||
|
|
||||||
|
let deadline = Instant::now() + WRITEBACK_DRAIN_TIMEOUT;
|
||||||
|
let mut first = true;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match rc::vfs_stats() {
|
||||||
|
Ok(vfs) => {
|
||||||
|
if let Some(dc) = &vfs.disk_cache {
|
||||||
|
let pending = dc.uploads_in_progress + dc.uploads_queued;
|
||||||
|
if pending == 0 {
|
||||||
|
if !first {
|
||||||
|
println!(" Write-back queue drained.");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if first {
|
||||||
|
println!(
|
||||||
|
" Waiting for write-back queue ({pending} files pending)..."
|
||||||
|
);
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
eprint!("\r Write-back: {pending} files remaining... ");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return; // no cache info → nothing to wait for
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => return, // RC API unavailable → rclone already gone
|
||||||
|
}
|
||||||
|
|
||||||
|
if Instant::now() > deadline {
|
||||||
|
eprintln!();
|
||||||
|
eprintln!(
|
||||||
|
" Warning: write-back drain timed out after {}s, proceeding with shutdown.",
|
||||||
|
WRITEBACK_DRAIN_TIMEOUT.as_secs()
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
thread::sleep(WRITEBACK_POLL_INTERVAL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Reverse-order teardown of all services.
|
/// Reverse-order teardown of all services.
|
||||||
///
|
///
|
||||||
/// Order: stop smbd → unexport NFS → kill WebDAV → unmount FUSE → kill rclone.
|
/// Order: stop smbd → unexport NFS → kill WebDAV → unmount FUSE → kill rclone.
|
||||||
@ -423,11 +480,23 @@ fn shutdown_services(config: &Config, mount: &mut Child, protocols: &mut Protoco
|
|||||||
println!(" WebDAV: stopped");
|
println!(" WebDAV: stopped");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lazy unmount FUSE
|
// Wait for write-back queue to drain before unmounting
|
||||||
let mount_point = config.mount.point.display().to_string();
|
wait_writeback_drain();
|
||||||
let _ = Command::new("fusermount")
|
|
||||||
.args(["-uz", &mount_point])
|
// Lazy unmount FUSE (skip if rclone already unmounted on signal)
|
||||||
.status();
|
if is_mounted(config).unwrap_or(false) {
|
||||||
|
let mount_point = config.mount.point.display().to_string();
|
||||||
|
let unmounted = Command::new("fusermount3")
|
||||||
|
.args(["-uz", &mount_point])
|
||||||
|
.status()
|
||||||
|
.map(|s| s.success())
|
||||||
|
.unwrap_or(false);
|
||||||
|
if !unmounted {
|
||||||
|
let _ = Command::new("fusermount")
|
||||||
|
.args(["-uz", &mount_point])
|
||||||
|
.status();
|
||||||
|
}
|
||||||
|
}
|
||||||
println!(" FUSE: unmounted");
|
println!(" FUSE: unmounted");
|
||||||
|
|
||||||
// Gracefully stop rclone
|
// Gracefully stop rclone
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user