so-called-hang-on-exit

Frank Cusack fcusack at fcusack.com
Tue Aug 6 19:42:05 EST 2002


On Mon, Aug 05, 2002 at 09:41:36PM -0700, Frank Cusack wrote:
> I'll put up my patch for comparison later tonight.

Here it is ... against current CVS.  It's the combined work of Markus,
Jani, myself and perhaps others.  It's relatively simple (I think) and
also looks "correct" so IMHO it should make it into 3.5p1.

Now, all you people griping ... TEST IT!!  Thanks Markus, for keeping
this issue alive.

I called the option close_on_sigchld instead of the earlier
allow_data_loss_on_pty but that's just cuz that's what my local patch
has historically been named.

====================
diff -ur openssh.orig/channels.c openssh/channels.c
--- openssh.orig/channels.c	Tue Aug  6 02:05:47 2002
+++ openssh/channels.c	Tue Aug  6 02:09:36 2002
@@ -248,6 +248,7 @@
 	c->istate = CHAN_INPUT_OPEN;
 	c->flags = 0;
 	channel_register_fds(c, rfd, wfd, efd, extusage, nonblock);
+	c->read_last_input = 0;
 	c->self = found;
 	c->type = type;
 	c->ctype = ctype;
@@ -1240,6 +1241,29 @@
 {
 	char buf[16*1024];
 	int len;
+
+	/*
+	 * After SIGCHLD && options.close_on_sigchld (c->read_last_input == 1),
+	 * set eof on the channel's rfd once there is no more data available.
+	 * This "fixes" the so-called hang-on-exit problem.
+	 * rfd was only in the input readset under certain conditions
+	 * determined by channel_pre_open_20(); this is to avoid growing
+	 * the output buffer unnecessarily.
+	 */
+	if (c->rfd != -1 && compat20 &&
+	    c->read_last_input && !FD_ISSET(c->rfd, readset) &&
+	    /* The rest of this is what channel_pre_open_20() does */
+	    c->istate == CHAN_INPUT_OPEN &&
+	    c->remote_window > 0 &&
+	    buffer_len(&c->input) < c->remote_window) {
+	    debug("channel %d: rfd %d not in readset, closing",
+		  c->self, c->rfd);
+	    if (c->type != SSH_CHANNEL_OPEN)
+		    chan_mark_dead(c);
+	    else
+		    chan_read_failed(c);
+	    return -1;
+	}
 
 	if (c->rfd != -1 &&
 	    FD_ISSET(c->rfd, readset)) {
diff -ur openssh.orig/channels.h openssh/channels.h
--- openssh.orig/channels.h	Tue Aug  6 02:05:47 2002
+++ openssh/channels.h	Tue Aug  6 02:09:36 2002
@@ -72,6 +72,7 @@
 	u_int   istate;		/* input from channel (state of receive half) */
 	u_int   ostate;		/* output to channel  (state of transmit half) */
 	int     flags;		/* close sent/rcvd */
+	int	read_last_input;	/* read available data then close */
 	int     rfd;		/* read fd */
 	int     wfd;		/* write fd */
 	int     efd;		/* extended fd */
diff -ur openssh.orig/servconf.c openssh/servconf.c
--- openssh.orig/servconf.c	Tue Aug  6 02:05:48 2002
+++ openssh/servconf.c	Tue Aug  6 02:16:25 2002
@@ -105,6 +105,7 @@
 	options->use_login = -1;
 	options->compression = -1;
 	options->allow_tcp_forwarding = -1;
+	options->close_on_sigchld = -1;
 	options->num_allow_users = 0;
 	options->num_deny_users = 0;
 	options->num_allow_groups = 0;
@@ -232,6 +233,8 @@
 		options->compression = 1;
 	if (options->allow_tcp_forwarding == -1)
 		options->allow_tcp_forwarding = 1;
+	if (options->close_on_sigchld == -1)
+		options->close_on_sigchld = 0;
 	if (options->gateway_ports == -1)
 		options->gateway_ports = 0;
 	if (options->max_startups == -1)
@@ -293,7 +296,7 @@
 	sPasswordAuthentication, sKbdInteractiveAuthentication, sListenAddress,
 	sPrintMotd, sPrintLastLog, sIgnoreRhosts,
 	sX11Forwarding, sX11DisplayOffset, sX11UseLocalhost,
-	sStrictModes, sEmptyPasswd, sKeepAlives,
+	sStrictModes, sEmptyPasswd, sKeepAlives, sCloseOnSIGCHLD,
 	sPermitUserEnvironment, sUseLogin, sAllowTcpForwarding, sCompression,
 	sAllowUsers, sDenyUsers, sAllowGroups, sDenyGroups,
 	sIgnoreUserKnownHosts, sCiphers, sMacs, sProtocol, sPidFile,
@@ -362,6 +365,7 @@
 	{ "compression", sCompression },
 	{ "keepalive", sKeepAlives },
 	{ "allowtcpforwarding", sAllowTcpForwarding },
+	{ "closeonsigchld", sCloseOnSIGCHLD },
 	{ "allowusers", sAllowUsers },
 	{ "denyusers", sDenyUsers },
 	{ "allowgroups", sAllowGroups },
@@ -761,6 +765,10 @@
 
 	case sAllowTcpForwarding:
 		intptr = &options->allow_tcp_forwarding;
+		goto parse_flag;
+
+	case sCloseOnSIGCHLD:
+		intptr = &options->close_on_sigchld;
 		goto parse_flag;
 
 	case sUsePrivilegeSeparation:
diff -ur openssh.orig/servconf.h openssh/servconf.h
--- openssh.orig/servconf.h	Tue Aug  6 02:05:48 2002
+++ openssh/servconf.h	Tue Aug  6 02:14:01 2002
@@ -101,6 +101,7 @@
 	int     use_login;	/* If true, login(1) is used */
 	int     compression;	/* If true, compression is allowed */
 	int	allow_tcp_forwarding;
+	int	close_on_sigchld;
 	u_int num_allow_users;
 	char   *allow_users[MAX_ALLOW_USERS];
 	u_int num_deny_users;
diff -ur openssh.orig/serverloop.c openssh/serverloop.c
--- openssh.orig/serverloop.c	Tue Aug  6 02:05:48 2002
+++ openssh/serverloop.c	Tue Aug  6 02:26:30 2002
@@ -88,6 +88,7 @@
  */
 
 static volatile sig_atomic_t child_terminated = 0;	/* The child has terminated. */
+static volatile sig_atomic_t child_just_terminated = 0;	/* just now. (for compat20) */
 
 /* prototypes */
 static void server_init_dispatch(void);
@@ -143,6 +144,7 @@
 	int save_errno = errno;
 	debug("Received SIGCHLD.");
 	child_terminated = 1;
+	child_just_terminated = 1;
 	mysignal(SIGCHLD, sigchld_handler);
 	notify_parent();
 	errno = save_errno;
@@ -306,6 +308,13 @@
 	/*
 	 * If child has terminated and there is enough buffer space to read
 	 * from it, then read as much as is available and exit.
+	 *
+	 * packet_not_very_much_data_to_write() is always true in the
+	 * compat20 case, which is fine: we want to force a non-NULL tvp
+	 * once child_terminated so we can do options.close_on_sigchld.
+	 * If that option isn't set, we incur some extra overhead of having
+	 * to keep running through server_loop2() but the session is likely
+	 * to be ending soon anyway, so this should be OK.
 	 */
 	if (child_terminated && packet_not_very_much_data_to_write())
 		if (max_time_milliseconds == 0 || client_alive_scheduled)
@@ -365,6 +374,22 @@
 	if (compat20)
 		return;
 
+	/*
+	 * After SIGCHLD, set eof on program output fd's once there is no
+	 * more data available.  This "fixes" the so-called hang-on-exit
+	 * problem.  The call to packet_not_very_much_data_to_write() is
+	 * because fdout and fderr were only in the input readset if 
+	 * we didn't already have a lot of data pending transmission
+	 * to the client; this is to avoid growing the output buffer
+	 * unnecessarily.
+	 */
+	if (child_terminated && options.close_on_sigchld &&
+	    fderr == -1 && /* only do this for pty sessions */
+	    packet_not_very_much_data_to_write()) {
+		if (!FD_ISSET(fdout, readset))
+			fdout_eof = 1;
+	}
+
 	/* Read and buffer any available stdout data from the program. */
 	if (!fdout_eof && FD_ISSET(fdout, readset)) {
 		len = read(fdout, buf, sizeof(buf));
@@ -722,12 +747,12 @@
 	sigemptyset(&nset);
 	sigaddset(&nset, SIGCHLD);
 	sigprocmask(SIG_BLOCK, &nset, &oset);
-	if (child_terminated) {
+	if (child_just_terminated) {
 		while ((pid = waitpid(-1, &status, WNOHANG)) > 0 ||
 		    (pid < 0 && errno == EINTR))
 			if (pid > 0)
 				session_close_by_pid(pid, status);
-		child_terminated = 0;
+		child_just_terminated = 0;
 	}
 	sigprocmask(SIG_SETMASK, &oset, NULL);
 }
@@ -742,6 +767,7 @@
 
 	mysignal(SIGCHLD, sigchld_handler);
 	child_terminated = 0;
+	child_just_terminated = 0;
 	connection_in = packet_get_connection_in();
 	connection_out = packet_get_connection_out();
 
diff -ur openssh.orig/session.c openssh/session.c
--- openssh.orig/session.c	Tue Aug  6 02:05:48 2002
+++ openssh/session.c	Tue Aug  6 02:30:45 2002
@@ -1851,6 +1851,15 @@
 	 */
 	if (c->ostate != CHAN_OUTPUT_CLOSED)
 		chan_write_failed(c);
+	if (s->ttyfd != -1 && /* only do this for pty sessions */
+	    c->istate == CHAN_INPUT_OPEN &&
+	    options.close_on_sigchld) {
+		/*
+		 * Set the channel read_last_input state so that we can close
+		 * the read side as soon as there is no more data waiting.
+		 */
+		c->read_last_input = 1;
+	}
 	s->chanid = -1;
 }
 
======================

And for you heathens, you can change the above to do the same
for both the pty and non-pty case.  (I wouldn't expect this to make it
into any openssh release, although it is what the old ssh-1.2.x does.)
Just comment out the two bits marked "only do this for pty sessions" and
add

                if (fderr != -1 && !FD_ISSET(fderr, readset))
                        fderr_eof = 1;

to the bit in serverloop.c:process_input().

/fc




More information about the openssh-unix-dev mailing list