1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
//! A library for awaiting and killing child processes from multiple threads.
//!
//! - [Docs](https://docs.rs/shared_child)
//! - [Crate](https://crates.io/crates/shared_child)
//! - [Repo](https://github.com/oconnor663/shared_child.rs)
//!
//! The
//! [`std::process::Child`](https://doc.rust-lang.org/std/process/struct.Child.html)
//! type in the standard library provides
//! [`wait`](https://doc.rust-lang.org/std/process/struct.Child.html#method.wait)
//! and
//! [`kill`](https://doc.rust-lang.org/std/process/struct.Child.html#method.kill)
//! methods that take `&mut self`, making it impossible to kill a child process
//! while another thread is waiting on it. That design works around a race
//! condition in Unix's `waitpid` function, where a PID might get reused as soon
//! as the wait returns, so a signal sent around the same time could
//! accidentally get delivered to the wrong process.
//!
//! However with the newer POSIX `waitid` function, we can wait on a child
//! without freeing its PID for reuse. That makes it safe to send signals
//! concurrently. Windows has actually always supported this, by preventing PID
//! reuse while there are still open handles to a child process. This library
//! wraps `std::process::Child` for concurrent use, backed by these APIs.
//!
//! Compatibility note: The `libc` crate doesn't currently support `waitid` on
//! NetBSD or OpenBSD, or on older versions of OSX. There [might also
//! be](https://bugs.python.org/msg167016) some version of OSX where the
//! `waitid` function exists but is broken. We can add a "best effort"
//! workaround using `waitpid` for these platforms as we run into them. Please
//! [file an issue](https://github.com/oconnor663/shared_child.rs/issues/new) if
//! you hit this.
//!
//! # Example
//!
//! ```rust
//! use shared_child::SharedChild;
//! use std::process::Command;
//! use std::sync::Arc;
//!
//! // Spawn a child that will just sleep for a long time,
//! // and put it in an Arc to share between threads.
//! let mut command = Command::new("python");
//! command.arg("-c").arg("import time; time.sleep(1000000000)");
//! let shared_child = SharedChild::spawn(&mut command).unwrap();
//! let child_arc = Arc::new(shared_child);
//!
//! // On another thread, wait on the child process.
//! let child_arc_clone = child_arc.clone();
//! let thread = std::thread::spawn(move || {
//! child_arc_clone.wait().unwrap()
//! });
//!
//! // While the other thread is waiting, kill the child process.
//! // This wouldn't be possible with e.g. Arc<Mutex<Child>> from
//! // the standard library, because the waiting thread would be
//! // holding the mutex.
//! child_arc.kill().unwrap();
//!
//! // Join the waiting thread and get the exit status.
//! let exit_status = thread.join().unwrap();
//! assert!(!exit_status.success());
//! ```
use std::io;
use std::process::{Child, Command, ExitStatus};
use std::sync::{Condvar, Mutex};
mod sys;
// Publish the Unix-only SharedChildExt trait.
#[cfg(unix)]
pub mod unix;
#[derive(Debug)]
pub struct SharedChild {
// This lock provides shared access to kill() and wait(). We never hold it
// during a blocking wait, though, so that non-blocking waits and kills can
// go through. (Blocking waits use libc::waitid with the WNOWAIT flag.)
child: Mutex<Child>,
// When there are multiple waiting threads, one of them will actually wait
// on the child, and the rest will block on this condvar.
state_lock: Mutex<ChildState>,
state_condvar: Condvar,
}
impl SharedChild {
/// Spawn a new `SharedChild` from a `std::process::Command`.
pub fn spawn(command: &mut Command) -> io::Result<SharedChild> {
let child = command.spawn()?;
Ok(SharedChild {
child: Mutex::new(child),
state_lock: Mutex::new(NotWaiting),
state_condvar: Condvar::new(),
})
}
/// Return the child process ID.
pub fn id(&self) -> u32 {
self.child.lock().unwrap().id()
}
fn get_handle(&self) -> sys::Handle {
sys::get_handle(&self.child.lock().unwrap())
}
/// Wait for the child to exit, blocking the current thread, and return its
/// exit status.
pub fn wait(&self) -> io::Result<ExitStatus> {
let mut state = self.state_lock.lock().unwrap();
loop {
match *state {
NotWaiting => {
// Either no one is waiting on the child yet, or a previous
// waiter failed. That means we need to do it ourselves.
// Break out of this loop.
break;
}
Waiting => {
// Another thread is already waiting on the child. We'll
// block until it signal us on the condvar, then loop again.
// Spurious wakeups could bring us here multiple times
// though, see the Condvar docs.
state = self.state_condvar.wait(state).unwrap();
}
Exited(exit_status) => return Ok(exit_status),
}
}
// If we get here, we have the state lock, and we're the thread
// responsible for waiting on the child. Set the state to Waiting and
// then release the state lock, so that other threads can observe it
// while we block. Afterwards we must leave the Waiting state before
// this function exits, or other waiters will deadlock.
*state = Waiting;
drop(state);
// Block until the child exits without reaping it. (On Unix, that means
// we need to call libc::waitid with the WNOWAIT flag. On Windows
// waiting never reaps.) That makes it safe for another thread to kill
// while we're here, without racing against some process reusing the
// child's PID. Having only one thread in this section is important,
// because POSIX doesn't guarantee much about what happens when multiple
// threads wait on a child at the same time:
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_13
let noreap_result = sys::wait_without_reaping(self.get_handle());
// Now either we hit an error, or the child has exited and needs to be
// reaped. Retake the state lock and handle all the different exit
// cases. No matter what happened/happens, we'll leave the Waiting state
// and signal the state condvar.
let mut state = self.state_lock.lock().unwrap();
// The child has already exited, so this wait should clean up without blocking.
let final_result = noreap_result.and_then(|_| self.child.lock().unwrap().wait());
*state = if let Ok(exit_status) = final_result {
Exited(exit_status)
} else {
NotWaiting
};
self.state_condvar.notify_all();
final_result
}
/// Return the child's exit status if it has already exited. If the child is
/// still running, return `Ok(None)`.
pub fn try_wait(&self) -> io::Result<Option<ExitStatus>> {
let mut status = self.state_lock.lock().unwrap();
// Unlike wait() above, we don't loop on the Condvar here. If the status
// is Waiting or Exited, we return immediately. However, if the status
// is NotWaiting, we'll do a non-blocking wait below, in case the child
// has already exited.
match *status {
NotWaiting => {}
Waiting => return Ok(None),
Exited(exit_status) => return Ok(Some(exit_status)),
};
// No one is waiting on the child. Check to see if it's already exited.
// If it has, put ourselves in the Exited state. (There can't be any
// other waiters to signal, because the state was NotWaiting when we
// started, and we're still holding the status lock.)
if sys::try_wait_without_reaping(self.get_handle())? {
// The child has exited. Reap it. This should not block.
let exit_status = self.child.lock().unwrap().wait()?;
*status = Exited(exit_status);
Ok(Some(exit_status))
} else {
Ok(None)
}
}
/// Send a kill signal to the child. On Unix this sends SIGKILL, and you
/// should call `wait` afterwards to avoid leaving a zombie. If the process
/// has already been waited on, this returns `Ok(())` and does nothing.
pub fn kill(&self) -> io::Result<()> {
let status = self.state_lock.lock().unwrap();
if let Exited(_) = *status {
return Ok(());
}
// The child is still running. Kill it. This assumes that the wait
// functions above will never hold the child lock during a blocking
// wait.
self.child.lock().unwrap().kill()
}
/// Consume the `SharedChild` and return the `std::process::Child` it
/// contains.
///
/// We never reap the child process except through `Child::wait`, so the
/// child object's inner state is correct, even if it was waited on while it
/// was shared.
pub fn into_inner(self) -> Child {
self.child.into_inner().unwrap()
}
}
#[derive(Debug)]
enum ChildState {
NotWaiting,
Waiting,
Exited(ExitStatus),
}
use crate::ChildState::*;
#[cfg(test)]
mod tests {
use super::{sys, SharedChild};
use std;
use std::process::Command;
use std::sync::Arc;
// Python isn't available on some Unix platforms, e.g. Android, so we need this instead.
#[cfg(unix)]
pub fn true_cmd() -> Command {
Command::new("true")
}
#[cfg(not(unix))]
pub fn true_cmd() -> Command {
let mut cmd = Command::new("python");
cmd.arg("-c").arg("");
cmd
}
#[cfg(unix)]
pub fn sleep_forever_cmd() -> Command {
let mut cmd = Command::new("sleep");
cmd.arg("1000000");
cmd
}
#[cfg(not(unix))]
pub fn sleep_forever_cmd() -> Command {
let mut cmd = Command::new("python");
cmd.arg("-c").arg("import time; time.sleep(1000000)");
cmd
}
#[test]
fn test_wait() {
let child = SharedChild::spawn(&mut true_cmd()).unwrap();
// Test the id() function while we're at it.
let id = child.id();
assert!(id > 0);
let status = child.wait().unwrap();
assert_eq!(status.code().unwrap(), 0);
}
#[test]
fn test_kill() {
let child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
child.kill().unwrap();
let status = child.wait().unwrap();
assert!(!status.success());
}
#[test]
fn test_try_wait() {
let child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
let maybe_status = child.try_wait().unwrap();
assert_eq!(maybe_status, None);
child.kill().unwrap();
// The child will handle that signal asynchronously, so we check it
// repeatedly in a busy loop.
let mut maybe_status = None;
while let None = maybe_status {
maybe_status = child.try_wait().unwrap();
}
assert!(maybe_status.is_some());
assert!(!maybe_status.unwrap().success());
}
#[test]
fn test_many_waiters() {
let child = Arc::new(SharedChild::spawn(&mut sleep_forever_cmd()).unwrap());
let mut threads = Vec::new();
for _ in 0..10 {
let clone = child.clone();
threads.push(std::thread::spawn(move || clone.wait()));
}
child.kill().unwrap();
for thread in threads {
thread.join().unwrap().unwrap();
}
}
#[test]
fn test_waitid_after_exit_doesnt_hang() {
// There are ominous reports (https://bugs.python.org/issue10812) of a
// broken waitid implementation on OSX, which might hang forever if it
// tries to wait on a child that's already exited.
let child = true_cmd().spawn().unwrap();
sys::wait_without_reaping(sys::get_handle(&child)).unwrap();
// At this point the child has definitely exited. Wait again to test
// that a second wait doesn't block.
sys::wait_without_reaping(sys::get_handle(&child)).unwrap();
}
#[test]
fn test_into_inner_before_wait() {
let shared_child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
let mut child = shared_child.into_inner();
child.kill().unwrap();
child.wait().unwrap();
}
#[test]
fn test_into_inner_after_wait() {
// This makes sure the child's inner state is valid. If we used waitpid
// on the side, the inner child would try to wait again and cause an
// error.
let shared_child = SharedChild::spawn(&mut sleep_forever_cmd()).unwrap();
shared_child.kill().unwrap();
shared_child.wait().unwrap();
let mut child = shared_child.into_inner();
// The child has already been waited on, so kill should be an error.
let kill_err = child.kill().unwrap_err();
if cfg!(windows) {
assert_eq!(std::io::ErrorKind::PermissionDenied, kill_err.kind());
} else {
assert_eq!(std::io::ErrorKind::InvalidInput, kill_err.kind());
}
// But wait should succeed.
child.wait().unwrap();
}
}