ssh client does not timeout if the network fails after ssh_connect but before ssh_exchange_identification, even with Alive options set

Jiaying Zhang jiayingz at google.com
Fri Jul 27 04:56:34 EST 2007


Hi Dan,

Thanks for looking at the patch. I changed the code as you suggested. The
new patch is attached below. I have a little concern about restarting
select() in case of EINTR, because it can postpone timeout. It may cause
some problem if people schedule their jobs based on the expected ssh
timeout. I also noticed that in server_alive_check() function, we check
if (++server_alive_timeouts > options.server_alive_count_max). Should it be
>=? According to the comment in ssh_config, if ServerAliveInterval equals to
30 and ServerAliveCountMax equals to 3, ssh disconnects after 90 seconds.
But with the current server_alive_check, we actually disconnects after 120
seconds.

diff -puN sshconnect.c~old sshconnect.c
--- sshconnect.c~old    2007-07-25 16:59:36.000000000 -0700
+++ sshconnect.c        2007-07-26 11:46:48.000000000 -0700
@@ -404,6 +404,28 @@ ssh_exchange_identification(void)
        int minor1 = PROTOCOL_MINOR_1;
        u_int i, n;

+       if (options.server_alive_interval) {
+               fd_set rfds;
+               struct timeval timeo;
+               int read_timeouts, ret;
+
+               FD_SET(connection_in, &rfds);
+               for (read_timeouts = 0;;) {
+                       timeo.tv_sec = options.server_alive_interval;
+                       timeo.tv_usec = 0;
+                       ret = select(connection_in+1, &rfds, NULL, NULL,
&timeo);
+                       if (ret < 0) {
+                               if (errno == EINTR)
+                                       continue;
+                               fatal("ssh_exchange_identification: select
read error: %.100s", strerror(errno));
+                       } else if (ret == 0) {
+                               if (++read_timeouts >=
options.server_alive_count_max)
+                                       fatal("ssh_exchange_identification:
Timeout, server not responding");
+                       } else
+                               break;
+               }
+
+       }
        /* Read other side's version identification. */
        for (n = 0;;) {
                for (i = 0; i < sizeof(buf) - 1; i++) {
@@ -488,6 +510,27 @@ ssh_exchange_identification(void)
            compat20 ? PROTOCOL_MAJOR_2 : PROTOCOL_MAJOR_1,
            compat20 ? PROTOCOL_MINOR_2 : minor1,
            SSH_VERSION);
+       if (options.server_alive_interval) {
+               fd_set wfds;
+               struct timeval timeo = { .tv_usec=0 };
+               int write_timeouts, ret;
+
+               FD_SET(connection_out, &wfds);
+               for (write_timeouts = 0;;) {
+                       timeo.tv_sec = options.server_alive_interval;
+                       ret = select(connection_out+1, NULL, &wfds, NULL,
&timeo);
+                       if (ret < 0) {
+                               if (errno == EINTR)
+                                       continue;
+                               fatal("ssh_exchange_identification: select
write error: %.100s", strerror(errno));
+                       } else if (ret == 0) {
+                               if (++write_timeouts >=
options.server_alive_count_max)
+                                       fatal("ssh_exchange_identification:
Timeout, server not responding");
+                       } else
+                               break;
+               }
+
+       }
        if (atomicio(vwrite, connection_out, buf, strlen(buf)) !=
strlen(buf))
                fatal("write: %.100s", strerror(errno));
        client_version_string = xstrdup(buf);

Jiaying

On 7/26/07, Dan Yefimov <dan at d00m.lightwave.net.ru> wrote:
>
> On Wed, 25 Jul 2007, Jiaying Zhang wrote:
>
> > Hello again,
> >
> > Here is the patch I came up with to prevent the hanging in
> > ssh_exchange_identification. I tested it a little bit and it seems to
> have
> > solved the problem. Could anyone help to have a look at the patch?
> Thanks a
> > lot!
> >
> > --- sshconnect.c~old    2007-07-25 10:44:26.000000000 -0700
> > +++ sshconnect.c        2007-07-25 14:45:57.000000000 -0700
> > @@ -404,9 +404,26 @@ ssh_exchange_identification(void)
> >         int minor1 = PROTOCOL_MINOR_1;
> >         u_int i, n;
> >
> > +       if (options.server_alive_interval) {
> > +               fd_set rfds;
> > +               struct timeval timeo = { .tv_usec=0 };
> > +               int read_timeouts, ret;
> > +
> > +               FD_SET(connection_in, &rfds);
> > +               for (read_timeouts = 0;;) {
> > +                       timeo.tv_sec = options.server_alive_interval;
> > +                       ret = select(connection_in+1, &rfds, NULL, NULL,
> > &timeo);
> > +                       if (ret < 0) {
>
> I think you should handle errno == EINTR case here restarting select(),
> otherwise any arriving signal would make ssh fail here.
>
> > +                               fatal("ssh_exchange_identification:
> select
> > read error: %.100s", strerror(errno));
> > +                       } else if (ret == 0) {
> > +                               if (++read_timeouts >=
> > options.server_alive_count_max)
> > +
> fatal("ssh_exchange_identification:
> > Timeout, server not responding");
> > +                       } else
> > +                               break;
> > +               }
> > +
> > +       }
> >         /* Read other side's version identification. */
> > -       struct timeval timeo = { .tv_sec=10, .tv_usec=0 };
> > -       setsockopt(connection_in, SOL_SOCKET, SO_SNDTIMEO, &timeo,
> > sizeof(timeo));
> >         for (n = 0;;) {
> >                 for (i = 0; i < sizeof(buf) - 1; i++) {
> >                         size_t len = atomicio(read, connection_in,
> &buf[i],
> > 1);
> > @@ -490,6 +507,25 @@ ssh_exchange_identification(void)
> >             compat20 ? PROTOCOL_MAJOR_2 : PROTOCOL_MAJOR_1,
> >             compat20 ? PROTOCOL_MINOR_2 : minor1,
> >             SSH_VERSION);
> > +       if (options.server_alive_interval) {
> > +               fd_set wfds;
> > +               struct timeval timeo = { .tv_usec=0 };
> > +               int write_timeouts, ret;
> > +
> > +               FD_SET(connection_out, &wfds);
> > +               for (write_timeouts = 0;;) {
> > +                       timeo.tv_sec = options.server_alive_interval;
> > +                       ret = select(connection_out+1, NULL, &wfds,
> NULL,
> > &timeo);
> > +                       if (ret < 0) {
>
> The same notice as above here.
>
> > +                               fatal("ssh_exchange_identification:
> select
> > write error: %.100s", strerror(errno));
> > +                       } else if (ret == 0) {
> > +                               if (++write_timeouts >=
> > options.server_alive_count_max)
> > +
> fatal("ssh_exchange_identification:
> > Timeout, server not responding");
> > +                       } else
> > +                               break;
> > +               }
> > +
> > +       }
> >         if (atomicio(vwrite, connection_out, buf, strlen(buf)) !=
> > strlen(buf))
> >                 fatal("write: %.100s", strerror(errno));
> >         client_version_string = xstrdup(buf);
> >
> --
>
>     Sincerely Your, Dan.
>
>


More information about the openssh-unix-dev mailing list