OpenSSH server fails to terminate executed program under certain conditions
Damien Miller
djm at mindrot.org
Wed Jun 4 09:37:05 AEST 2025
On Tue, 3 Jun 2025, Lucas Mulling via openssh-unix-dev wrote:
> Hello,
>
> We started experiencing failures in all architectures in a libssh test after
> updating to OpenSSH 10.0p2. The test in question is
> torture_channel_exit_signal, relevant code below:
>
> rc = snprintf(request, sizeof(request), "cat");
> [...]
> /* Make the request, read parts with close */
> rc = ssh_channel_request_exec(channel, request);
> assert_ssh_return_code(session, rc);
> rc = ssh_channel_request_send_signal(channel, "TERM");
> assert_ssh_return_code(session, rc);
>
> exit_status = ssh_channel_get_exit_state(channel,
> &exit_status,
> &exit_signal,
> &core_dumped);
>
> In debugging I've found that the test requests SIGTERM so fast that the server
> -- OpenSSH -- has not had yet time to properly setup the child, resulting in
> killpg from session_signal_req failing (see log bellow).
>
> OpenSSH was build with:
> ./configure \
> --with-pam \
> --with-privsep-path=/var/lib/empty \
> --with-systemd --libexecdir=$(pwd)
>
> With an added log to:
> @@ -1508,6 +1513,8 @@ do_child(struct ssh *ssh, Session *s, const char *command)
> struct passwd *pw = s->pw;
> int r = 0;
>
> + debug2("do child");
> +
> sshpkt_fmt_connection_id(ssh, remote_id, sizeof(remote_id));
>
> /* remove keys from memory */
>
> Relevant log snippet:
> debug3: send packet: type 99
> debug3: receive packet: type 98
> debug1: server_input_channel_req: channel 0 request signal reply 0
> debug1: session_by_channel: session 0 channel 0
> debug1: session_input_channel_req: session 0 req signal
> debug1: session_signal_req: signal TERM, killpg(458610, 15)
> debug1: temporarily_use_uid: 5001/9000 (e=5001/9000)
> debug1: restore_uid: (unprivileged)
> session_signal_req: status(0)
> session_signal_req: killpg(458610, 15): No such process
> debug2: do child
>
> Note that do_child is called after killpg is called, this means (in my
> understating) that the correct gid has not been setup for killpg to
> work. This would, probably, not happen in a real world use case due to
> network delays.
>
> Could you clarify if this behaviour is intended?
I'd describe the behaviour as "incidental" :)
The session child process is started via fork() and there is a small
window before it becomes a process group leader via setsid(). Your test
is hitting that window.
I think we could eliminate this window by adding an interlock between
the parent and child. Does this work for you?
diff --git a/session.c b/session.c
index acd44c6..f1ca285 100644
--- a/session.c
+++ b/session.c
@@ -339,6 +339,23 @@ xauth_valid_string(const char *s)
return 1;
}
+/* Wait for child to signal it is now a process group leader by closing fd */
+static void
+wait_child_setsid(Session *s, int fd)
+{
+ char c;
+
+ debug3("session %d: waiting for child to start", s->self);
+ while (read(fd, &c, 1) == -1) {
+ if (errno != EINTR && errno != EAGAIN && errno != EWOULDBLOCK) {
+ error("session %d: read ready status: %s", s->self,
+ strerror(errno));
+ break;
+ }
+ }
+ debug3("session %d: child ready", s->self);
+}
+
#define USE_PIPES 1
/*
* This is called to fork and execute a command when we have no tty. This
@@ -350,7 +367,7 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
{
pid_t pid;
#ifdef USE_PIPES
- int pin[2], pout[2], perr[2];
+ int pin[2], pout[2], perr[2], ready[2];
if (s == NULL)
fatal("do_exec_no_pty: no session");
@@ -362,20 +379,24 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
}
if (pipe(pout) == -1) {
error_f("pipe out: %.100s", strerror(errno));
+ close1:
close(pin[0]);
close(pin[1]);
return -1;
}
if (pipe(perr) == -1) {
error_f("pipe err: %.100s", strerror(errno));
- close(pin[0]);
- close(pin[1]);
+ close2:
close(pout[0]);
close(pout[1]);
- return -1;
+ goto close1;
+ }
+ if (pipe(ready) == -1) {
+ error_f("pipe ready: %.100s", strerror(errno));
+ goto close2;
}
#else
- int inout[2], err[2];
+ int inout[2], err[2], ready[2];
if (s == NULL)
fatal("do_exec_no_pty: no session");
@@ -387,10 +408,15 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
}
if (socketpair(AF_UNIX, SOCK_STREAM, 0, err) == -1) {
error_f("socketpair #2: %.100s", strerror(errno));
+ close1:
close(inout[0]);
close(inout[1]);
return -1;
}
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, ready) == -1) {
+ error_f("socketpair rdy: %.100s", strerror(errno));
+ goto close1;
+ }
#endif
session_proctitle(s);
@@ -406,22 +432,28 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
close(pout[1]);
close(perr[0]);
close(perr[1]);
+ close(ready[0]);
+ close(ready[1]);
#else
close(inout[0]);
close(inout[1]);
close(err[0]);
close(err[1]);
+ close(ready[0]);
+ close(ready[1]);
#endif
return -1;
case 0:
is_child = 1;
+ close(ready[0]);
/*
* Create a new session and process group since the 4.4BSD
* setlogin() affects the entire process group.
*/
if (setsid() == -1)
error("setsid failed: %.100s", strerror(errno));
+ close(ready[1]);
#ifdef USE_PIPES
/*
@@ -461,7 +493,6 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
perror("dup2 stderr");
close(err[0]);
#endif
-
/* Do processing for the child (exec command etc). */
do_child(ssh, s, command);
/* NOTREACHED */
@@ -469,6 +500,10 @@ do_exec_no_pty(struct ssh *ssh, Session *s, const char *command)
break;
}
+ close(ready[1]);
+ wait_child_setsid(s, ready[0]);
+ close(ready[0]);
+
s->pid = pid;
/* Set interactive/non-interactive mode. */
ssh_packet_set_interactive(ssh, s->display != NULL,
@@ -508,12 +543,25 @@ do_exec_pty(struct ssh *ssh, Session *s, const char *command)
{
int fdout, ptyfd, ttyfd, ptymaster;
pid_t pid;
+ int ready[2];
if (s == NULL)
fatal("do_exec_pty: no session");
ptyfd = s->ptyfd;
ttyfd = s->ttyfd;
+#ifdef USE_PIPES
+ if (pipe(ready) == -1) {
+ error_f("pipe ready: %.100s", strerror(errno));
+ return -1;
+ }
+#else
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, ready) == -1) {
+ error_f("socketpair rdy: %.100s", strerror(errno));
+ return -1;
+ }
+#endif
+
/*
* Create another descriptor of the pty master side for use as the
* standard input. We could use the original descriptor, but this
@@ -544,18 +592,22 @@ do_exec_pty(struct ssh *ssh, Session *s, const char *command)
close(ptymaster);
close(ttyfd);
close(ptyfd);
+ close(ready[0]);
+ close(ready[1]);
return -1;
case 0:
is_child = 1;
close(fdout);
close(ptymaster);
+ close(ready[0]);
/* Close the master side of the pseudo tty. */
close(ptyfd);
/* Make the pseudo tty our controlling tty. */
pty_make_controlling_tty(&ttyfd, s->tty);
+ close(ready[1]);
/* Redirect stdin/stdout/stderr from the pseudo tty. */
if (dup2(ttyfd, 0) == -1)
@@ -582,6 +634,10 @@ do_exec_pty(struct ssh *ssh, Session *s, const char *command)
}
s->pid = pid;
+ close(ready[1]);
+ wait_child_setsid(s, ready[0]);
+ close(ready[0]);
+
/* Parent. Close the slave side of the pseudo tty. */
close(ttyfd);
More information about the openssh-unix-dev
mailing list