intermittent stderr

John Dunlap dunlap at apl.washington.edu
Wed Mar 14 02:31:58 EST 2001


Rachit and Damien,
Yes,  Markus fixed it for my tests:  he gave me two patches to
be applied as noted below to version OpenSSH-2.5.1.p1 or 2.5.1.p2.
Regards, John

$ patch    < patch3
$ patch -R < patch4

patch3:
---------------------- cut here -------------------
Index: nchan.c
===================================================================
RCS file: /home/markus/cvs/ssh/nchan.c,v
retrieving revision 1.22
diff -u -r1.22 nchan.c
--- nchan.c	2001/01/21 19:05:52	1.22
+++ nchan.c	2001/02/27 13:08:31
@@ -54,9 +54,6 @@
 static void	chan_send_close2(Channel *c);
 static void	chan_send_eof2(Channel *c);
 
-/* channel cleanup */
-chan_event_fn *chan_delete_if_full_closed	= NULL;
-
 /* helper */
 static void	chan_shutdown_write(Channel *c);
 static void	chan_shutdown_read(Channel *c);
@@ -249,16 +246,6 @@
 		break;
 	}
 }
-static void
-chan_delete_if_full_closed1(Channel *c)
-{
-	debug3("channel %d: chan_delete_if_full_closed1: istate %d ostate %d",
-	    c->self, c->istate, c->ostate);
-	if (c->istate == CHAN_INPUT_CLOSED && c->ostate == CHAN_OUTPUT_CLOSED) {
-		debug("channel %d: full closed", c->self);
-		channel_free(c->self);
-	}
-}
 
 /*
  * the same for SSH2
@@ -401,24 +388,49 @@
 		c->flags |= CHAN_CLOSE_SENT;
 	}
 }
-static void
-chan_delete_if_full_closed2(Channel *c)
+
+/* shared */
+
+int
+chan_is_dead(Channel *c)
 {
-	debug3("channel %d: chan_delete_if_full_closed2: istate %d ostate %d",
+	debug3("channel %d: chan_is_dead: istate %d ostate %d",
 	    c->self, c->istate, c->ostate);
-	if (c->istate == CHAN_INPUT_CLOSED && c->ostate == CHAN_OUTPUT_CLOSED) {
+
+	if (c->istate != CHAN_INPUT_CLOSED || c->ostate != CHAN_OUTPUT_CLOSED)
+		return 0;
+	if (!compat20) {
+		debug("channel %d: is dead", c->self);
+		return 1;
+	}
+	/*
+	 * we have to delay the close message if the efd (for stderr) is
+	 * still active
+	 */
+	if (((c->extended_usage != CHAN_EXTENDED_IGNORE) &&
+	    buffer_len(&c->extended) > 0)
+#if 0
+	    || ((c->extended_usage == CHAN_EXTENDED_READ) &&
+	    c->efd != -1)
+#endif
+	    ) {
+		debug2("channel %d: active efd: %d len %d type %s",
+		    c->self, c->efd, buffer_len(&c->extended),
+		    c->extended_usage==CHAN_EXTENDED_READ ?
+		       "read": "write");
+	} else {
 		if (!(c->flags & CHAN_CLOSE_SENT)) {
 			chan_send_close2(c);
 		}
 		if ((c->flags & CHAN_CLOSE_SENT) &&
 		    (c->flags & CHAN_CLOSE_RCVD)) {
-			debug("channel %d: full closed2", c->self);
-			channel_free(c->self);
+			debug("channel %d: is dead", c->self);
+			return 1;
 		}
 	}
+	return 0;
 }
 
-/* shared */
 void
 chan_init_iostates(Channel *c)
 {
@@ -439,8 +451,6 @@
 		chan_rcvd_ieof			= chan_rcvd_ieof2;
 		chan_write_failed		= chan_write_failed2;
 		chan_obuf_empty			= chan_obuf_empty2;
-
-		chan_delete_if_full_closed	= chan_delete_if_full_closed2;
 	} else {
 		chan_rcvd_oclose		= chan_rcvd_oclose1;
 		chan_read_failed		= chan_read_failed_12;
@@ -449,8 +459,6 @@
 		chan_rcvd_ieof			= chan_rcvd_ieof1;
 		chan_write_failed		= chan_write_failed1;
 		chan_obuf_empty			= chan_obuf_empty1;
-
-		chan_delete_if_full_closed	= chan_delete_if_full_closed1;
 	}
 }
 
Index: nchan.h
===================================================================
RCS file: /home/markus/cvs/ssh/nchan.h,v
retrieving revision 1.9
diff -u -r1.9 nchan.h
--- nchan.h	2000/09/07 20:27:52	1.9
+++ nchan.h	2001/02/27 12:34:41
@@ -84,7 +84,7 @@
 extern chan_event_fn	*chan_write_failed;
 extern chan_event_fn	*chan_obuf_empty;
 
-extern chan_event_fn	*chan_delete_if_full_closed;
+int chan_is_dead(Channel * c);
 
 void    chan_init_iostates(Channel * c);
 void	chan_init(void);
Index: channels.c
===================================================================
RCS file: /home/markus/cvs/ssh/channels.c,v
retrieving revision 1.92
diff -u -r1.92 channels.c
--- channels.c	2001/02/16 13:38:18	1.92
+++ channels.c	2001/02/27 12:51:57
@@ -297,6 +297,7 @@
 channel_close_fds(Channel *c)
 {
 	if (c->sock != -1) {
+		shutdown(c->sock, SHUT_RDWR);
 		close(c->sock);
 		c->sock = -1;
 	}
@@ -331,8 +332,6 @@
 		debug("channel_free: channel %d: dettaching channel user", id);
 		c->dettach_user(c->self, NULL);
 	}
-	if (c->sock != -1)
-		shutdown(c->sock, SHUT_RDWR);
 	channel_close_fds(c);
 	buffer_free(&c->input);
 	buffer_free(&c->output);
@@ -824,7 +823,14 @@
 			    buffer_len(&c->extended));
 			debug2("channel %d: written %d to efd %d",
 			    c->self, len, c->efd);
-			if (len > 0) {
+			if (len < 0 && (errno == EINTR || errno == EAGAIN))
+				return 1;
+			if (len <= 0) {
+				debug2("channel %d: closing write-efd %d",
+				    c->self, c->efd);
+				close(c->efd);
+				c->efd = -1;
+			} else {
 				buffer_consume(&c->extended, len);
 				c->local_consumed += len;
 			}
@@ -833,19 +839,22 @@
 			len = read(c->efd, buf, sizeof(buf));
 			debug2("channel %d: read %d from efd %d",
 			     c->self, len, c->efd);
-			if (len == 0) {
-				debug("channel %d: closing efd %d",
+			if (len < 0 && (errno == EINTR || errno == EAGAIN))
+				return 1;
+			if (len <= 0) {
+				debug2("channel %d: closing read-efd %d",
 				    c->self, c->efd);
 				close(c->efd);
 				c->efd = -1;
-			} else if (len > 0)
+			} else {
 				buffer_append(&c->extended, buf, len);
+			}
 		}
 	}
 	return 1;
 }
 int
-channel_check_window(Channel *c, fd_set * readset, fd_set * writeset)
+channel_check_window(Channel *c)
 {
 	if (!(c->flags & (CHAN_CLOSE_SENT|CHAN_CLOSE_RCVD)) &&
 	    c->local_window < c->local_window_max/2 &&
@@ -876,7 +885,8 @@
 	channel_handle_rfd(c, readset, writeset);
 	channel_handle_wfd(c, readset, writeset);
 	channel_handle_efd(c, readset, writeset);
-	channel_check_window(c, readset, writeset);
+
+	channel_check_window(c);
 }
 
 void
@@ -984,7 +994,24 @@
 		if (ftab[c->type] == NULL)
 			continue;
 		(*ftab[c->type])(c, readset, writeset);
-		chan_delete_if_full_closed(c);
+		if (chan_is_dead(c)) {
+			/*
+			 * we have to remove the fd's from the select mask
+			 * before the channels are free'd and the fd's are
+			 * closed
+			 */
+			if (c->wfd != -1)
+				FD_CLR(c->wfd, writeset);
+			if (c->rfd != -1)
+				FD_CLR(c->rfd, readset);
+			if (c->efd != -1) {
+				if (c->extended_usage == CHAN_EXTENDED_READ)
+					FD_CLR(c->efd, readset);
+				if (c->extended_usage == CHAN_EXTENDED_WRITE)
+					FD_CLR(c->efd, writeset);
+			}
+			channel_free(c->self);
+		}
 	}
 }
 
@@ -1037,19 +1064,18 @@
 		} else {
 			if (c->type != SSH_CHANNEL_OPEN)
 				continue;
-			if (c->istate != CHAN_INPUT_OPEN &&
-			    c->istate != CHAN_INPUT_WAIT_DRAIN)
-				continue;
 		}
 		if (compat20 &&
 		    (c->flags & (CHAN_CLOSE_SENT|CHAN_CLOSE_RCVD))) {
+			/* XXX is this true? */
 			debug("channel: %d: no data after CLOSE", c->self);
 			continue;
 		}
 
 		/* Get the amount of buffered data for this channel. */
-		len = buffer_len(&c->input);
-		if (len > 0) {
+		if ((c->istate == CHAN_INPUT_OPEN ||
+		    c->istate == CHAN_INPUT_WAIT_DRAIN) &&
+		    (len = buffer_len(&c->input)) > 0) {
 			/* Send some data for the other side over the secure connection. */
 			if (compat20) {
 				if (len > c->remote_window)
@@ -1089,6 +1115,9 @@
 		    c->remote_window > 0 &&
 		    (len = buffer_len(&c->extended)) > 0 &&
 		    c->extended_usage == CHAN_EXTENDED_READ) {
+			debug2("channel %d: rwin %d elen %d euse %d",
+			    c->self, c->remote_window, buffer_len(&c->extended),
+			    c->extended_usage);
 			if (len > c->remote_window)
 				len = c->remote_window;
 			if (len > c->remote_maxpacket)
@@ -1100,6 +1129,7 @@
 			packet_send();
 			buffer_consume(&c->extended, len);
 			c->remote_window -= len;
+			debug2("channel %d: sent ext data %d", c->self, len);
 		}
 	}
 }
---------------------- cut here -------------------

patch4:
---------------------- cut here -------------------
Index: channels.c
===================================================================
RCS file: /home/markus/cvs/ssh/channels.c,v
retrieving revision 1.92
diff -u -r1.92 channels.c
--- channels.c	2001/02/16 13:38:18	1.92
+++ channels.c	2001/02/27 18:32:09
@@ -297,6 +297,7 @@
 channel_close_fds(Channel *c)
 {
 	if (c->sock != -1) {
+		shutdown(c->sock, SHUT_RDWR);
 		close(c->sock);
 		c->sock = -1;
 	}
@@ -331,8 +332,6 @@
 		debug("channel_free: channel %d: dettaching channel user", id);
 		c->dettach_user(c->self, NULL);
 	}
-	if (c->sock != -1)
-		shutdown(c->sock, SHUT_RDWR);
 	channel_close_fds(c);
 	buffer_free(&c->input);
 	buffer_free(&c->output);
---------------------- cut here -------------------

> 
> 
> Has this problem been fixed? I just started having problems like this as
> well (i'm using a early Jan snapshot version of openssh 2.3.0p1). Thnx.
> 
> -rchit
> 
> > I've just noticed that using ssh -1 always works.  The problem
> > is with ssh -2 from slow to fast machines. -- John
> > 
> > >
> > > The command "ssh ls -l /doesnotexist" gives various responses:
> > >
> > > Running from a 200 MHz PentiumPro with dsa key added to ssh-agent:
> > >
> > > Mistakes worst to fast machine:
> > > To a faster 600 MHz dual processor i686 600 MHz machine:
> > > ls: /doesnotexist: No such file or directory  -- correct
> > > nothing at all                                -- wrong
> > > ls: select: Bad file descriptor               -- wrong
> > >
> > > No mistakes to slower machine.
> > > To a slower 166 MHz i586 600 MHz machine:
> > > ls: /doesnotexist: No such file or directory  -- correct all the time
> > >
> > > All machines run compiled OpenSSH-2.5.1p1 on RHL 6.2 with all
> > > patches, kernel 2.2.16-3.  Set up on all machines with
> > > ssh and sshd defaulting to ssh2 protocol.
> > >
> > > My test script:
> > >
> > > #!/bin/sh
> > > while [ 1 ]
> > > do
> > >     date
> > >     ssh tesla ls -l /doesnotexist
> > > done
> > >
> > > Responses from fast machine are not consistent:
> > >
> > > Thu Feb 22 14:15:35 PST 2001
> > > ls: /doesnotexist: No such file or directory
> > > Thu Feb 22 14:15:37 PST 2001
> > > Thu Feb 22 14:15:39 PST 2001
> > > ls: /doesnotexist: No such file or directory
> > > Thu Feb 22 14:15:41 PST 2001
> > > Thu Feb 22 14:15:43 PST 2001
> > > ls: /doesnotexist: No such file or directory
> > > Thu Feb 22 14:15:45 PST 2001
> > > ls: select: Bad file descriptor
> > > Thu Feb 22 14:15:47 PST 2001
> > > Thu Feb 22 14:15:49 PST 2001
> > > ls: /doesnotexist: No such file or directory
> > > Thu Feb 22 14:15:51 PST 2001
> > > ls: /doesnotexist: No such file or directory
> > > Thu Feb 22 14:15:54 PST 2001
> > > ls: /doesnotexist: No such file or directory
> > > Thu Feb 22 14:15:56 PST 2001
> > > select: Bad file descriptor
> > > Thu Feb 22 14:15:58 PST 2001
> > >
> > > --
> > > John Dunlap                           University of Washington
> > > Senior Electrical Engineer            Applied Physics Laboratory
> > > dunlap at apl.washington.edu             1013 NE 40th Street
> > > 206-543-7207, 543-1300, FAX 543-6785  Seattle, WA   98105-6698
> 





More information about the openssh-unix-dev mailing list