ssh client does not timeout if the network fails after ssh_connect but before ssh_exchange_identification, even with Alive options set
Jiaying Zhang
jiayingz at google.com
Fri Jul 27 04:56:34 EST 2007
Hi Dan,
Thanks for looking at the patch. I changed the code as you suggested. The
new patch is attached below. I have a little concern about restarting
select() in case of EINTR, because it can postpone timeout. It may cause
some problem if people schedule their jobs based on the expected ssh
timeout. I also noticed that in server_alive_check() function, we check
if (++server_alive_timeouts > options.server_alive_count_max). Should it be
>=? According to the comment in ssh_config, if ServerAliveInterval equals to
30 and ServerAliveCountMax equals to 3, ssh disconnects after 90 seconds.
But with the current server_alive_check, we actually disconnects after 120
seconds.
diff -puN sshconnect.c~old sshconnect.c
--- sshconnect.c~old 2007-07-25 16:59:36.000000000 -0700
+++ sshconnect.c 2007-07-26 11:46:48.000000000 -0700
@@ -404,6 +404,28 @@ ssh_exchange_identification(void)
int minor1 = PROTOCOL_MINOR_1;
u_int i, n;
+ if (options.server_alive_interval) {
+ fd_set rfds;
+ struct timeval timeo;
+ int read_timeouts, ret;
+
+ FD_SET(connection_in, &rfds);
+ for (read_timeouts = 0;;) {
+ timeo.tv_sec = options.server_alive_interval;
+ timeo.tv_usec = 0;
+ ret = select(connection_in+1, &rfds, NULL, NULL,
&timeo);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ fatal("ssh_exchange_identification: select
read error: %.100s", strerror(errno));
+ } else if (ret == 0) {
+ if (++read_timeouts >=
options.server_alive_count_max)
+ fatal("ssh_exchange_identification:
Timeout, server not responding");
+ } else
+ break;
+ }
+
+ }
/* Read other side's version identification. */
for (n = 0;;) {
for (i = 0; i < sizeof(buf) - 1; i++) {
@@ -488,6 +510,27 @@ ssh_exchange_identification(void)
compat20 ? PROTOCOL_MAJOR_2 : PROTOCOL_MAJOR_1,
compat20 ? PROTOCOL_MINOR_2 : minor1,
SSH_VERSION);
+ if (options.server_alive_interval) {
+ fd_set wfds;
+ struct timeval timeo = { .tv_usec=0 };
+ int write_timeouts, ret;
+
+ FD_SET(connection_out, &wfds);
+ for (write_timeouts = 0;;) {
+ timeo.tv_sec = options.server_alive_interval;
+ ret = select(connection_out+1, NULL, &wfds, NULL,
&timeo);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ fatal("ssh_exchange_identification: select
write error: %.100s", strerror(errno));
+ } else if (ret == 0) {
+ if (++write_timeouts >=
options.server_alive_count_max)
+ fatal("ssh_exchange_identification:
Timeout, server not responding");
+ } else
+ break;
+ }
+
+ }
if (atomicio(vwrite, connection_out, buf, strlen(buf)) !=
strlen(buf))
fatal("write: %.100s", strerror(errno));
client_version_string = xstrdup(buf);
Jiaying
On 7/26/07, Dan Yefimov <dan at d00m.lightwave.net.ru> wrote:
>
> On Wed, 25 Jul 2007, Jiaying Zhang wrote:
>
> > Hello again,
> >
> > Here is the patch I came up with to prevent the hanging in
> > ssh_exchange_identification. I tested it a little bit and it seems to
> have
> > solved the problem. Could anyone help to have a look at the patch?
> Thanks a
> > lot!
> >
> > --- sshconnect.c~old 2007-07-25 10:44:26.000000000 -0700
> > +++ sshconnect.c 2007-07-25 14:45:57.000000000 -0700
> > @@ -404,9 +404,26 @@ ssh_exchange_identification(void)
> > int minor1 = PROTOCOL_MINOR_1;
> > u_int i, n;
> >
> > + if (options.server_alive_interval) {
> > + fd_set rfds;
> > + struct timeval timeo = { .tv_usec=0 };
> > + int read_timeouts, ret;
> > +
> > + FD_SET(connection_in, &rfds);
> > + for (read_timeouts = 0;;) {
> > + timeo.tv_sec = options.server_alive_interval;
> > + ret = select(connection_in+1, &rfds, NULL, NULL,
> > &timeo);
> > + if (ret < 0) {
>
> I think you should handle errno == EINTR case here restarting select(),
> otherwise any arriving signal would make ssh fail here.
>
> > + fatal("ssh_exchange_identification:
> select
> > read error: %.100s", strerror(errno));
> > + } else if (ret == 0) {
> > + if (++read_timeouts >=
> > options.server_alive_count_max)
> > +
> fatal("ssh_exchange_identification:
> > Timeout, server not responding");
> > + } else
> > + break;
> > + }
> > +
> > + }
> > /* Read other side's version identification. */
> > - struct timeval timeo = { .tv_sec=10, .tv_usec=0 };
> > - setsockopt(connection_in, SOL_SOCKET, SO_SNDTIMEO, &timeo,
> > sizeof(timeo));
> > for (n = 0;;) {
> > for (i = 0; i < sizeof(buf) - 1; i++) {
> > size_t len = atomicio(read, connection_in,
> &buf[i],
> > 1);
> > @@ -490,6 +507,25 @@ ssh_exchange_identification(void)
> > compat20 ? PROTOCOL_MAJOR_2 : PROTOCOL_MAJOR_1,
> > compat20 ? PROTOCOL_MINOR_2 : minor1,
> > SSH_VERSION);
> > + if (options.server_alive_interval) {
> > + fd_set wfds;
> > + struct timeval timeo = { .tv_usec=0 };
> > + int write_timeouts, ret;
> > +
> > + FD_SET(connection_out, &wfds);
> > + for (write_timeouts = 0;;) {
> > + timeo.tv_sec = options.server_alive_interval;
> > + ret = select(connection_out+1, NULL, &wfds,
> NULL,
> > &timeo);
> > + if (ret < 0) {
>
> The same notice as above here.
>
> > + fatal("ssh_exchange_identification:
> select
> > write error: %.100s", strerror(errno));
> > + } else if (ret == 0) {
> > + if (++write_timeouts >=
> > options.server_alive_count_max)
> > +
> fatal("ssh_exchange_identification:
> > Timeout, server not responding");
> > + } else
> > + break;
> > + }
> > +
> > + }
> > if (atomicio(vwrite, connection_out, buf, strlen(buf)) !=
> > strlen(buf))
> > fatal("write: %.100s", strerror(errno));
> > client_version_string = xstrdup(buf);
> >
> --
>
> Sincerely Your, Dan.
>
>
More information about the openssh-unix-dev
mailing list