FWD: [patch] scp + UTF-8

Ingo Schwarze schwarze at usta.de
Wed Jan 20 08:48:53 AEDT 2016


Hi,

Martijn sent the following patch to me in private and agreed that i post
it here.

In any other program in OpenBSD base, i'd probably agree with the
basic approach.  Regarding OpenSSH, however, i worry whether wcwidth(3)
can be used.  While wcwidth(3) is POSIX, it is not ISO C.  Does
OpenSSH target platforms that don't provide wcwidth(3)?  If so,
do you think the problem can be solved by simply providing US-ASCII
support only on such platforms, but no UTF-8 support at all?

If you think we can require wcwidth(3), or we can ditch UTF-8 support
where wcwidth(3) it isn't available, i will work with Martijn to
iron out a few style issues such that we can submit a patch that
is ready for commit.

If you think we cannot require wcwidth(3) but need UTF-8 support
everywhere, i suggest to postpone this until we get djm@'s ssh(1)
banner patch in.  I sent some feedback on that earlier, proposing
to use a whitelist rather than the blacklist proposed by djm@ which
seems dangerous to me.  Should i integrate that suggestion into Damien's
patch, repost the modified patch, and then continue review?  I suspect
there might be one or two other things that could be improved, but i'm
not quite sure yet.

Once that is in, we can do something similar for wcwidth(3).

Yours,
  Ingo

P.S.
This patch also uses mbtowc(3), but i assume that's no problem
because that's ANSI C.

----- Forwarded message from Martijn van Duren -----

From: Martijn van Duren
Date: Sun, 17 Jan 2016 11:13:01 +0100
To: Ingo Schwarze <schwarze at usta.de>
Subject: [patch] scp + UTF-8

[...]

I've tested this under the following conditions:
- It lines out the same way the current scp does for ascii.
- when shrinking the terminal it prints just as much characters
  (width) of the filename as ascii would.
- To support terminals larger then MAX_WINSIZE and still be properly
  indented I increased the buf size to 4x the size of MAX_WINSIZE,
  since the maximum size of an UTF-8 char <should> be 4 bytes.
  It's quite a lot more memory, but I reckon it's better then the
  horrible indentation we have now.

I primarily developed this with scp and only minimally tested it with
sftp, but it should work with both.  sftp already called setlocale,
so no patch is needed for sftp.c.

[...]

Index: progressmeter.c
===================================================================
RCS file: /cvs/src/usr.bin/ssh/progressmeter.c,v
retrieving revision 1.41
diff -u -p -r1.41 progressmeter.c
--- progressmeter.c	14 Jan 2015 13:54:13 -0000	1.41
+++ progressmeter.c	17 Jan 2016 09:07:51 -0000
@@ -30,9 +30,11 @@
 #include <errno.h>
 #include <signal.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
+#include <wchar.h>
 
 #include "progressmeter.h"
 #include "atomicio.h"
@@ -117,7 +119,7 @@ format_size(char *buf, int size, off_t b
 void
 refresh_progress_meter(void)
 {
-	char buf[MAX_WINSIZE + 1];
+	char buf[(MAX_WINSIZE * 4) + 1];
 	time_t now;
 	off_t transferred;
 	double elapsed;
@@ -125,8 +127,10 @@ refresh_progress_meter(void)
 	off_t bytes_left;
 	int cur_speed;
 	int hours, minutes, seconds;
-	int i, len;
-	int file_len;
+	int width, size, buf_width, buf_size;
+	int i;
+	int file_width;
+	wchar_t wc;
 
 	transferred = *counter - (cur_pos ? cur_pos : start_pos);
 	cur_pos = *counter;
@@ -157,16 +161,33 @@ refresh_progress_meter(void)
 
 	/* filename */
 	buf[0] = '\0';
-	file_len = win_size - 35;
-	if (file_len > 0) {
-		len = snprintf(buf, file_len + 1, "\r%s", file);
-		if (len < 0)
-			len = 0;
-		if (len >= file_len + 1)
-			len = file_len;
-		for (i = len; i < file_len; i++)
-			buf[i] = ' ';
-		buf[file_len] = '\0';
+	file_width = win_size - 36;
+	if (file_width > 0) {
+		buf[0] = '\r';
+		for (i = 0, buf_width = 0, buf_size = 1;
+		    file[i] != '\0';) {
+			if ((size = mbtowc(&wc, &(file[i]), MB_CUR_MAX)) == -1) {
+				(void)mbtowc(NULL, NULL, MB_CUR_MAX);
+				buf[buf_size++] = '?';
+				buf_width++;
+				i++;
+			} else if ((width = wcwidth(wc)) == -1) {
+				buf[buf_size++] = '?';
+				buf_width++;
+				i++;
+			} else if (buf_width + width <= file_width &&
+			    buf_size + size <= (int) sizeof(buf) - 35) {
+				memcpy(&(buf[buf_size]), &(file[i]), size);
+				i += size;
+				buf_size += size;
+				buf_width += width;
+			} else
+				break;
+		}
+		for (; buf_width < file_width &&
+		    buf_size < (int) sizeof(buf) - 35; buf_width++)
+			buf[buf_size++] = ' ';
+		buf[buf_size] = '\0';
 	}
 
 	/* percent of transfer done */
@@ -174,18 +195,18 @@ refresh_progress_meter(void)
 		percent = ((float)cur_pos / end_pos) * 100;
 	else
 		percent = 100;
-	snprintf(buf + strlen(buf), win_size - strlen(buf),
+	snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
 	    " %3d%% ", percent);
 
 	/* amount transferred */
-	format_size(buf + strlen(buf), win_size - strlen(buf),
+	format_size(buf + strlen(buf), sizeof(buf) - strlen(buf),
 	    cur_pos);
-	strlcat(buf, " ", win_size);
+	strlcat(buf, " ", sizeof(buf));
 
 	/* bandwidth usage */
-	format_rate(buf + strlen(buf), win_size - strlen(buf),
+	format_rate(buf + strlen(buf), sizeof(buf) - strlen(buf),
 	    (off_t)bytes_per_second);
-	strlcat(buf, "/s ", win_size);
+	strlcat(buf, "/s ", sizeof(buf));
 
 	/* ETA */
 	if (!transferred)
@@ -194,9 +215,9 @@ refresh_progress_meter(void)
 		stalled = 0;
 
 	if (stalled >= STALL_TIME)
-		strlcat(buf, "- stalled -", win_size);
+		strlcat(buf, "- stalled -", sizeof(buf));
 	else if (bytes_per_second == 0 && bytes_left)
-		strlcat(buf, "  --:-- ETA", win_size);
+		strlcat(buf, "  --:-- ETA", sizeof(buf));
 	else {
 		if (bytes_left > 0)
 			seconds = bytes_left / bytes_per_second;
@@ -209,19 +230,21 @@ refresh_progress_meter(void)
 		seconds -= minutes * 60;
 
 		if (hours != 0)
-			snprintf(buf + strlen(buf), win_size - strlen(buf),
+			snprintf(buf + strlen(buf),
+			    sizeof(buf) - strlen(buf),
 			    "%d:%02d:%02d", hours, minutes, seconds);
 		else
-			snprintf(buf + strlen(buf), win_size - strlen(buf),
+			snprintf(buf + strlen(buf),
+			    sizeof(buf) - strlen(buf),
 			    "  %02d:%02d", minutes, seconds);
 
 		if (bytes_left > 0)
-			strlcat(buf, " ETA", win_size);
+			strlcat(buf, " ETA", sizeof(buf));
 		else
-			strlcat(buf, "    ", win_size);
+			strlcat(buf, "    ", sizeof(buf));
 	}
 
-	atomicio(vwrite, STDOUT_FILENO, buf, win_size - 1);
+	atomicio(vwrite, STDOUT_FILENO, buf, strlen(buf));
 	last_update = now;
 }
 
Index: scp.c
===================================================================
RCS file: /cvs/src/usr.bin/ssh/scp.c,v
retrieving revision 1.184
diff -u -p -r1.184 scp.c
--- scp.c	27 Nov 2015 00:49:31 -0000	1.184
+++ scp.c	17 Jan 2016 09:07:52 -0000
@@ -83,6 +83,7 @@
 #include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <locale.h>
 #include <pwd.h>
 #include <signal.h>
 #include <stdarg.h>
@@ -501,6 +502,8 @@ main(int argc, char **argv)
 	    targetshouldbedirectory ? " -d" : "");
 
 	(void) signal(SIGPIPE, lostconn);
+
+	(void) setlocale(LC_CTYPE, "");
 
 	if ((targ = colon(argv[argc - 1])))	/* Dest is remote host. */
 		toremote(targ, argc, argv);

----- End forwarded message -----


More information about the openssh-unix-dev mailing list