FWD: [patch] scp + UTF-8
Damien Miller
djm at mindrot.org
Wed Jan 27 19:05:41 AEDT 2016
POSIX is fine, but why not prepare a filtered version of the filename
once instead of doing it on every output?
On Tue, 19 Jan 2016, Ingo Schwarze wrote:
> Hi,
>
> Martijn sent the following patch to me in private and agreed that i post
> it here.
>
> In any other program in OpenBSD base, i'd probably agree with the
> basic approach. Regarding OpenSSH, however, i worry whether wcwidth(3)
> can be used. While wcwidth(3) is POSIX, it is not ISO C. Does
> OpenSSH target platforms that don't provide wcwidth(3)? If so,
> do you think the problem can be solved by simply providing US-ASCII
> support only on such platforms, but no UTF-8 support at all?
>
> If you think we can require wcwidth(3), or we can ditch UTF-8 support
> where wcwidth(3) it isn't available, i will work with Martijn to
> iron out a few style issues such that we can submit a patch that
> is ready for commit.
>
> If you think we cannot require wcwidth(3) but need UTF-8 support
> everywhere, i suggest to postpone this until we get djm@'s ssh(1)
> banner patch in. I sent some feedback on that earlier, proposing
> to use a whitelist rather than the blacklist proposed by djm@ which
> seems dangerous to me. Should i integrate that suggestion into Damien's
> patch, repost the modified patch, and then continue review? I suspect
> there might be one or two other things that could be improved, but i'm
> not quite sure yet.
>
> Once that is in, we can do something similar for wcwidth(3).
>
> Yours,
> Ingo
>
> P.S.
> This patch also uses mbtowc(3), but i assume that's no problem
> because that's ANSI C.
>
> ----- Forwarded message from Martijn van Duren -----
>
> From: Martijn van Duren
> Date: Sun, 17 Jan 2016 11:13:01 +0100
> To: Ingo Schwarze <schwarze at usta.de>
> Subject: [patch] scp + UTF-8
>
> [...]
>
> I've tested this under the following conditions:
> - It lines out the same way the current scp does for ascii.
> - when shrinking the terminal it prints just as much characters
> (width) of the filename as ascii would.
> - To support terminals larger then MAX_WINSIZE and still be properly
> indented I increased the buf size to 4x the size of MAX_WINSIZE,
> since the maximum size of an UTF-8 char <should> be 4 bytes.
> It's quite a lot more memory, but I reckon it's better then the
> horrible indentation we have now.
>
> I primarily developed this with scp and only minimally tested it with
> sftp, but it should work with both. sftp already called setlocale,
> so no patch is needed for sftp.c.
>
> [...]
>
> Index: progressmeter.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/ssh/progressmeter.c,v
> retrieving revision 1.41
> diff -u -p -r1.41 progressmeter.c
> --- progressmeter.c 14 Jan 2015 13:54:13 -0000 1.41
> +++ progressmeter.c 17 Jan 2016 09:07:51 -0000
> @@ -30,9 +30,11 @@
> #include <errno.h>
> #include <signal.h>
> #include <stdio.h>
> +#include <stdlib.h>
> #include <string.h>
> #include <time.h>
> #include <unistd.h>
> +#include <wchar.h>
>
> #include "progressmeter.h"
> #include "atomicio.h"
> @@ -117,7 +119,7 @@ format_size(char *buf, int size, off_t b
> void
> refresh_progress_meter(void)
> {
> - char buf[MAX_WINSIZE + 1];
> + char buf[(MAX_WINSIZE * 4) + 1];
> time_t now;
> off_t transferred;
> double elapsed;
> @@ -125,8 +127,10 @@ refresh_progress_meter(void)
> off_t bytes_left;
> int cur_speed;
> int hours, minutes, seconds;
> - int i, len;
> - int file_len;
> + int width, size, buf_width, buf_size;
> + int i;
> + int file_width;
> + wchar_t wc;
>
> transferred = *counter - (cur_pos ? cur_pos : start_pos);
> cur_pos = *counter;
> @@ -157,16 +161,33 @@ refresh_progress_meter(void)
>
> /* filename */
> buf[0] = '\0';
> - file_len = win_size - 35;
> - if (file_len > 0) {
> - len = snprintf(buf, file_len + 1, "\r%s", file);
> - if (len < 0)
> - len = 0;
> - if (len >= file_len + 1)
> - len = file_len;
> - for (i = len; i < file_len; i++)
> - buf[i] = ' ';
> - buf[file_len] = '\0';
> + file_width = win_size - 36;
> + if (file_width > 0) {
> + buf[0] = '\r';
> + for (i = 0, buf_width = 0, buf_size = 1;
> + file[i] != '\0';) {
> + if ((size = mbtowc(&wc, &(file[i]), MB_CUR_MAX)) == -1) {
> + (void)mbtowc(NULL, NULL, MB_CUR_MAX);
> + buf[buf_size++] = '?';
> + buf_width++;
> + i++;
> + } else if ((width = wcwidth(wc)) == -1) {
> + buf[buf_size++] = '?';
> + buf_width++;
> + i++;
> + } else if (buf_width + width <= file_width &&
> + buf_size + size <= (int) sizeof(buf) - 35) {
> + memcpy(&(buf[buf_size]), &(file[i]), size);
> + i += size;
> + buf_size += size;
> + buf_width += width;
> + } else
> + break;
> + }
> + for (; buf_width < file_width &&
> + buf_size < (int) sizeof(buf) - 35; buf_width++)
> + buf[buf_size++] = ' ';
> + buf[buf_size] = '\0';
> }
>
> /* percent of transfer done */
> @@ -174,18 +195,18 @@ refresh_progress_meter(void)
> percent = ((float)cur_pos / end_pos) * 100;
> else
> percent = 100;
> - snprintf(buf + strlen(buf), win_size - strlen(buf),
> + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
> " %3d%% ", percent);
>
> /* amount transferred */
> - format_size(buf + strlen(buf), win_size - strlen(buf),
> + format_size(buf + strlen(buf), sizeof(buf) - strlen(buf),
> cur_pos);
> - strlcat(buf, " ", win_size);
> + strlcat(buf, " ", sizeof(buf));
>
> /* bandwidth usage */
> - format_rate(buf + strlen(buf), win_size - strlen(buf),
> + format_rate(buf + strlen(buf), sizeof(buf) - strlen(buf),
> (off_t)bytes_per_second);
> - strlcat(buf, "/s ", win_size);
> + strlcat(buf, "/s ", sizeof(buf));
>
> /* ETA */
> if (!transferred)
> @@ -194,9 +215,9 @@ refresh_progress_meter(void)
> stalled = 0;
>
> if (stalled >= STALL_TIME)
> - strlcat(buf, "- stalled -", win_size);
> + strlcat(buf, "- stalled -", sizeof(buf));
> else if (bytes_per_second == 0 && bytes_left)
> - strlcat(buf, " --:-- ETA", win_size);
> + strlcat(buf, " --:-- ETA", sizeof(buf));
> else {
> if (bytes_left > 0)
> seconds = bytes_left / bytes_per_second;
> @@ -209,19 +230,21 @@ refresh_progress_meter(void)
> seconds -= minutes * 60;
>
> if (hours != 0)
> - snprintf(buf + strlen(buf), win_size - strlen(buf),
> + snprintf(buf + strlen(buf),
> + sizeof(buf) - strlen(buf),
> "%d:%02d:%02d", hours, minutes, seconds);
> else
> - snprintf(buf + strlen(buf), win_size - strlen(buf),
> + snprintf(buf + strlen(buf),
> + sizeof(buf) - strlen(buf),
> " %02d:%02d", minutes, seconds);
>
> if (bytes_left > 0)
> - strlcat(buf, " ETA", win_size);
> + strlcat(buf, " ETA", sizeof(buf));
> else
> - strlcat(buf, " ", win_size);
> + strlcat(buf, " ", sizeof(buf));
> }
>
> - atomicio(vwrite, STDOUT_FILENO, buf, win_size - 1);
> + atomicio(vwrite, STDOUT_FILENO, buf, strlen(buf));
> last_update = now;
> }
>
> Index: scp.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/ssh/scp.c,v
> retrieving revision 1.184
> diff -u -p -r1.184 scp.c
> --- scp.c 27 Nov 2015 00:49:31 -0000 1.184
> +++ scp.c 17 Jan 2016 09:07:52 -0000
> @@ -83,6 +83,7 @@
> #include <dirent.h>
> #include <errno.h>
> #include <fcntl.h>
> +#include <locale.h>
> #include <pwd.h>
> #include <signal.h>
> #include <stdarg.h>
> @@ -501,6 +502,8 @@ main(int argc, char **argv)
> targetshouldbedirectory ? " -d" : "");
>
> (void) signal(SIGPIPE, lostconn);
> +
> + (void) setlocale(LC_CTYPE, "");
>
> if ((targ = colon(argv[argc - 1]))) /* Dest is remote host. */
> toremote(targ, argc, argv);
>
> ----- End forwarded message -----
> _______________________________________________
> openssh-unix-dev mailing list
> openssh-unix-dev at mindrot.org
> https://lists.mindrot.org/mailman/listinfo/openssh-unix-dev
>
More information about the openssh-unix-dev
mailing list