OpenSSH server fails to terminate executed program under certain conditions

Damien Miller djm at mindrot.org
Wed Jun 4 09:37:05 AEST 2025


On Tue, 3 Jun 2025, Lucas Mulling via openssh-unix-dev wrote:

> Hello,
> 
> We started experiencing failures in all architectures in a libssh test after
> updating to OpenSSH 10.0p2. The test in question is
> torture_channel_exit_signal, relevant code below:
> 
>     rc = snprintf(request, sizeof(request), "cat");
>     [...]
>     /* Make the request, read parts with close */
>     rc = ssh_channel_request_exec(channel, request);
>     assert_ssh_return_code(session, rc);
>     rc = ssh_channel_request_send_signal(channel, "TERM");
>     assert_ssh_return_code(session, rc);
> 
>     exit_status = ssh_channel_get_exit_state(channel,
>                                              &exit_status,
>                                              &exit_signal,
>                                              &core_dumped);
> 
> In debugging I've found that the test requests SIGTERM so fast that the server
> -- OpenSSH -- has not had yet time to properly setup the child, resulting in
> killpg from session_signal_req failing (see log bellow).
> 
> OpenSSH was build with:
> ./configure                            \
>     --with-pam                         \
>     --with-privsep-path=/var/lib/empty \
>     --with-systemd --libexecdir=$(pwd)
> 
> With an added log to:
> @@ -1508,6 +1513,8 @@ do_child(struct ssh *ssh, Session *s, const char *command)
>         struct passwd *pw = s->pw;
>         int r = 0;
> 
> +       debug2("do child");
> +
>         sshpkt_fmt_connection_id(ssh, remote_id, sizeof(remote_id));
> 
>         /* remove keys from memory */
> 
> Relevant log snippet:
> debug3: send packet: type 99
> debug3: receive packet: type 98
> debug1: server_input_channel_req: channel 0 request signal reply 0
> debug1: session_by_channel: session 0 channel 0
> debug1: session_input_channel_req: session 0 req signal
> debug1: session_signal_req: signal TERM, killpg(458610, 15)
> debug1: temporarily_use_uid: 5001/9000 (e=5001/9000)
> debug1: restore_uid: (unprivileged)
> session_signal_req: status(0)
> session_signal_req: killpg(458610, 15): No such process
> debug2: do child
> 
> Note that do_child is called after killpg is called, this means (in my
> understating) that the correct gid has not been setup for killpg to
> work. This would, probably, not happen in a real world use case due to
> network delays.
> 
> Could you clarify if this behaviour is intended?

I'd describe the behaviour as "incidental" :)

The session child process is started via fork() and there is a small
window before it becomes a process group leader via setsid(). Your test
is hitting that window.

I think we could eliminate this window by adding an interlock between
the parent and child. Does this work for you?

diff --git a/session.c b/session.c
index acd44c6..f1ca285 100644
--- a/session.c
+++ b/session.c
@@ -339,6 +339,23 @@ xauth_valid_string(const char *s)
 	return 1;
 }
 
+/* Wait for child to signal it is now a process group leader by closing fd */
+static void
+wait_child_setsid(Session *s, int fd)
+{
+	char c;
+
+	debug3("session %d: waiting for child to start", s->self);
+	while (read(fd, &c, 1) == -1) {
+		if (errno != EINTR && errno != EAGAIN && errno != EWOULDBLOCK) {
+			error("session %d: read ready status: %s", s->self,
+			    strerror(errno));
+			break;
+		}
+	}
+	debug3("session %d: child ready", s->self);
+}
+
 #define USE_PIPES 1
 /*
  * This is called to fork and execute a command when we have no tty.  This
@@ -350,7 +367,7 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
 {
 	pid_t pid;
 #ifdef USE_PIPES
-	int pin[2], pout[2], perr[2];
+	int pin[2], pout[2], perr[2], ready[2];
 
 	if (s == NULL)
 		fatal("do_exec_no_pty: no session");
@@ -362,20 +379,24 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
 	}
 	if (pipe(pout) == -1) {
 		error_f("pipe out: %.100s", strerror(errno));
+ close1:
 		close(pin[0]);
 		close(pin[1]);
 		return -1;
 	}
 	if (pipe(perr) == -1) {
 		error_f("pipe err: %.100s", strerror(errno));
-		close(pin[0]);
-		close(pin[1]);
+ close2:
 		close(pout[0]);
 		close(pout[1]);
-		return -1;
+		goto close1;
+	}
+	if (pipe(ready) == -1) {
+		error_f("pipe ready: %.100s", strerror(errno));
+		goto close2;
 	}
 #else
-	int inout[2], err[2];
+	int inout[2], err[2], ready[2];
 
 	if (s == NULL)
 		fatal("do_exec_no_pty: no session");
@@ -387,10 +408,15 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
 	}
 	if (socketpair(AF_UNIX, SOCK_STREAM, 0, err) == -1) {
 		error_f("socketpair #2: %.100s", strerror(errno));
+ close1:
 		close(inout[0]);
 		close(inout[1]);
 		return -1;
 	}
+	if (socketpair(AF_UNIX, SOCK_STREAM, 0, ready) == -1) {
+		error_f("socketpair rdy: %.100s", strerror(errno));
+		goto close1;
+	}
 #endif
 
 	session_proctitle(s);
@@ -406,22 +432,28 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
 		close(pout[1]);
 		close(perr[0]);
 		close(perr[1]);
+		close(ready[0]);
+		close(ready[1]);
 #else
 		close(inout[0]);
 		close(inout[1]);
 		close(err[0]);
 		close(err[1]);
+		close(ready[0]);
+		close(ready[1]);
 #endif
 		return -1;
 	case 0:
 		is_child = 1;
 
+		close(ready[0]);
 		/*
 		 * Create a new session and process group since the 4.4BSD
 		 * setlogin() affects the entire process group.
 		 */
 		if (setsid() == -1)
 			error("setsid failed: %.100s", strerror(errno));
+		close(ready[1]);
 
 #ifdef USE_PIPES
 		/*
@@ -461,7 +493,6 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
 			perror("dup2 stderr");
 		close(err[0]);
 #endif
-
 		/* Do processing for the child (exec command etc). */
 		do_child(ssh, s, command);
 		/* NOTREACHED */
@@ -469,6 +500,10 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
 		break;
 	}
 
+	close(ready[1]);
+	wait_child_setsid(s, ready[0]);
+	close(ready[0]);
+
 	s->pid = pid;
 	/* Set interactive/non-interactive mode. */
 	ssh_packet_set_interactive(ssh, s->display != NULL,
@@ -508,12 +543,25 @@ do_exec_pty(struct ssh *ssh, Session *s, const char *command)
 {
 	int fdout, ptyfd, ttyfd, ptymaster;
 	pid_t pid;
+	int ready[2];
 
 	if (s == NULL)
 		fatal("do_exec_pty: no session");
 	ptyfd = s->ptyfd;
 	ttyfd = s->ttyfd;
 
+#ifdef USE_PIPES
+	if (pipe(ready) == -1) {
+		error_f("pipe ready: %.100s", strerror(errno));
+		return -1;
+	}
+#else
+	if (socketpair(AF_UNIX, SOCK_STREAM, 0, ready) == -1) {
+		error_f("socketpair rdy: %.100s", strerror(errno));
+		return -1;
+	}
+#endif
+
 	/*
 	 * Create another descriptor of the pty master side for use as the
 	 * standard input.  We could use the original descriptor, but this
@@ -544,18 +592,22 @@ do_exec_pty(struct ssh *ssh, Session *s, const char *command)
 		close(ptymaster);
 		close(ttyfd);
 		close(ptyfd);
+		close(ready[0]);
+		close(ready[1]);
 		return -1;
 	case 0:
 		is_child = 1;
 
 		close(fdout);
 		close(ptymaster);
+		close(ready[0]);
 
 		/* Close the master side of the pseudo tty. */
 		close(ptyfd);
 
 		/* Make the pseudo tty our controlling tty. */
 		pty_make_controlling_tty(&ttyfd, s->tty);
+		close(ready[1]);
 
 		/* Redirect stdin/stdout/stderr from the pseudo tty. */
 		if (dup2(ttyfd, 0) == -1)
@@ -582,6 +634,10 @@ do_exec_pty(struct ssh *ssh, Session *s, const char *command)
 	}
 	s->pid = pid;
 
+	close(ready[1]);
+	wait_child_setsid(s, ready[0]);
+	close(ready[0]);
+
 	/* Parent.  Close the slave side of the pseudo tty. */
 	close(ttyfd);
 


More information about the openssh-unix-dev mailing list