FWD: [patch] scp + UTF-8

Damien Miller djm at mindrot.org
Wed Jan 27 19:05:41 AEDT 2016


POSIX is fine, but why not prepare a filtered version of the filename
once instead of doing it on every output?

On Tue, 19 Jan 2016, Ingo Schwarze wrote:

> Hi,
> 
> Martijn sent the following patch to me in private and agreed that i post
> it here.
> 
> In any other program in OpenBSD base, i'd probably agree with the
> basic approach.  Regarding OpenSSH, however, i worry whether wcwidth(3)
> can be used.  While wcwidth(3) is POSIX, it is not ISO C.  Does
> OpenSSH target platforms that don't provide wcwidth(3)?  If so,
> do you think the problem can be solved by simply providing US-ASCII
> support only on such platforms, but no UTF-8 support at all?
> 
> If you think we can require wcwidth(3), or we can ditch UTF-8 support
> where wcwidth(3) it isn't available, i will work with Martijn to
> iron out a few style issues such that we can submit a patch that
> is ready for commit.
> 
> If you think we cannot require wcwidth(3) but need UTF-8 support
> everywhere, i suggest to postpone this until we get djm@'s ssh(1)
> banner patch in.  I sent some feedback on that earlier, proposing
> to use a whitelist rather than the blacklist proposed by djm@ which
> seems dangerous to me.  Should i integrate that suggestion into Damien's
> patch, repost the modified patch, and then continue review?  I suspect
> there might be one or two other things that could be improved, but i'm
> not quite sure yet.
> 
> Once that is in, we can do something similar for wcwidth(3).
> 
> Yours,
>   Ingo
> 
> P.S.
> This patch also uses mbtowc(3), but i assume that's no problem
> because that's ANSI C.
> 
> ----- Forwarded message from Martijn van Duren -----
> 
> From: Martijn van Duren
> Date: Sun, 17 Jan 2016 11:13:01 +0100
> To: Ingo Schwarze <schwarze at usta.de>
> Subject: [patch] scp + UTF-8
> 
> [...]
> 
> I've tested this under the following conditions:
> - It lines out the same way the current scp does for ascii.
> - when shrinking the terminal it prints just as much characters
>   (width) of the filename as ascii would.
> - To support terminals larger then MAX_WINSIZE and still be properly
>   indented I increased the buf size to 4x the size of MAX_WINSIZE,
>   since the maximum size of an UTF-8 char <should> be 4 bytes.
>   It's quite a lot more memory, but I reckon it's better then the
>   horrible indentation we have now.
> 
> I primarily developed this with scp and only minimally tested it with
> sftp, but it should work with both.  sftp already called setlocale,
> so no patch is needed for sftp.c.
> 
> [...]
> 
> Index: progressmeter.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/ssh/progressmeter.c,v
> retrieving revision 1.41
> diff -u -p -r1.41 progressmeter.c
> --- progressmeter.c	14 Jan 2015 13:54:13 -0000	1.41
> +++ progressmeter.c	17 Jan 2016 09:07:51 -0000
> @@ -30,9 +30,11 @@
>  #include <errno.h>
>  #include <signal.h>
>  #include <stdio.h>
> +#include <stdlib.h>
>  #include <string.h>
>  #include <time.h>
>  #include <unistd.h>
> +#include <wchar.h>
>  
>  #include "progressmeter.h"
>  #include "atomicio.h"
> @@ -117,7 +119,7 @@ format_size(char *buf, int size, off_t b
>  void
>  refresh_progress_meter(void)
>  {
> -	char buf[MAX_WINSIZE + 1];
> +	char buf[(MAX_WINSIZE * 4) + 1];
>  	time_t now;
>  	off_t transferred;
>  	double elapsed;
> @@ -125,8 +127,10 @@ refresh_progress_meter(void)
>  	off_t bytes_left;
>  	int cur_speed;
>  	int hours, minutes, seconds;
> -	int i, len;
> -	int file_len;
> +	int width, size, buf_width, buf_size;
> +	int i;
> +	int file_width;
> +	wchar_t wc;
>  
>  	transferred = *counter - (cur_pos ? cur_pos : start_pos);
>  	cur_pos = *counter;
> @@ -157,16 +161,33 @@ refresh_progress_meter(void)
>  
>  	/* filename */
>  	buf[0] = '\0';
> -	file_len = win_size - 35;
> -	if (file_len > 0) {
> -		len = snprintf(buf, file_len + 1, "\r%s", file);
> -		if (len < 0)
> -			len = 0;
> -		if (len >= file_len + 1)
> -			len = file_len;
> -		for (i = len; i < file_len; i++)
> -			buf[i] = ' ';
> -		buf[file_len] = '\0';
> +	file_width = win_size - 36;
> +	if (file_width > 0) {
> +		buf[0] = '\r';
> +		for (i = 0, buf_width = 0, buf_size = 1;
> +		    file[i] != '\0';) {
> +			if ((size = mbtowc(&wc, &(file[i]), MB_CUR_MAX)) == -1) {
> +				(void)mbtowc(NULL, NULL, MB_CUR_MAX);
> +				buf[buf_size++] = '?';
> +				buf_width++;
> +				i++;
> +			} else if ((width = wcwidth(wc)) == -1) {
> +				buf[buf_size++] = '?';
> +				buf_width++;
> +				i++;
> +			} else if (buf_width + width <= file_width &&
> +			    buf_size + size <= (int) sizeof(buf) - 35) {
> +				memcpy(&(buf[buf_size]), &(file[i]), size);
> +				i += size;
> +				buf_size += size;
> +				buf_width += width;
> +			} else
> +				break;
> +		}
> +		for (; buf_width < file_width &&
> +		    buf_size < (int) sizeof(buf) - 35; buf_width++)
> +			buf[buf_size++] = ' ';
> +		buf[buf_size] = '\0';
>  	}
>  
>  	/* percent of transfer done */
> @@ -174,18 +195,18 @@ refresh_progress_meter(void)
>  		percent = ((float)cur_pos / end_pos) * 100;
>  	else
>  		percent = 100;
> -	snprintf(buf + strlen(buf), win_size - strlen(buf),
> +	snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
>  	    " %3d%% ", percent);
>  
>  	/* amount transferred */
> -	format_size(buf + strlen(buf), win_size - strlen(buf),
> +	format_size(buf + strlen(buf), sizeof(buf) - strlen(buf),
>  	    cur_pos);
> -	strlcat(buf, " ", win_size);
> +	strlcat(buf, " ", sizeof(buf));
>  
>  	/* bandwidth usage */
> -	format_rate(buf + strlen(buf), win_size - strlen(buf),
> +	format_rate(buf + strlen(buf), sizeof(buf) - strlen(buf),
>  	    (off_t)bytes_per_second);
> -	strlcat(buf, "/s ", win_size);
> +	strlcat(buf, "/s ", sizeof(buf));
>  
>  	/* ETA */
>  	if (!transferred)
> @@ -194,9 +215,9 @@ refresh_progress_meter(void)
>  		stalled = 0;
>  
>  	if (stalled >= STALL_TIME)
> -		strlcat(buf, "- stalled -", win_size);
> +		strlcat(buf, "- stalled -", sizeof(buf));
>  	else if (bytes_per_second == 0 && bytes_left)
> -		strlcat(buf, "  --:-- ETA", win_size);
> +		strlcat(buf, "  --:-- ETA", sizeof(buf));
>  	else {
>  		if (bytes_left > 0)
>  			seconds = bytes_left / bytes_per_second;
> @@ -209,19 +230,21 @@ refresh_progress_meter(void)
>  		seconds -= minutes * 60;
>  
>  		if (hours != 0)
> -			snprintf(buf + strlen(buf), win_size - strlen(buf),
> +			snprintf(buf + strlen(buf),
> +			    sizeof(buf) - strlen(buf),
>  			    "%d:%02d:%02d", hours, minutes, seconds);
>  		else
> -			snprintf(buf + strlen(buf), win_size - strlen(buf),
> +			snprintf(buf + strlen(buf),
> +			    sizeof(buf) - strlen(buf),
>  			    "  %02d:%02d", minutes, seconds);
>  
>  		if (bytes_left > 0)
> -			strlcat(buf, " ETA", win_size);
> +			strlcat(buf, " ETA", sizeof(buf));
>  		else
> -			strlcat(buf, "    ", win_size);
> +			strlcat(buf, "    ", sizeof(buf));
>  	}
>  
> -	atomicio(vwrite, STDOUT_FILENO, buf, win_size - 1);
> +	atomicio(vwrite, STDOUT_FILENO, buf, strlen(buf));
>  	last_update = now;
>  }
>  
> Index: scp.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/ssh/scp.c,v
> retrieving revision 1.184
> diff -u -p -r1.184 scp.c
> --- scp.c	27 Nov 2015 00:49:31 -0000	1.184
> +++ scp.c	17 Jan 2016 09:07:52 -0000
> @@ -83,6 +83,7 @@
>  #include <dirent.h>
>  #include <errno.h>
>  #include <fcntl.h>
> +#include <locale.h>
>  #include <pwd.h>
>  #include <signal.h>
>  #include <stdarg.h>
> @@ -501,6 +502,8 @@ main(int argc, char **argv)
>  	    targetshouldbedirectory ? " -d" : "");
>  
>  	(void) signal(SIGPIPE, lostconn);
> +
> +	(void) setlocale(LC_CTYPE, "");
>  
>  	if ((targ = colon(argv[argc - 1])))	/* Dest is remote host. */
>  		toremote(targ, argc, argv);
> 
> ----- End forwarded message -----
> _______________________________________________
> openssh-unix-dev mailing list
> openssh-unix-dev at mindrot.org
> https://lists.mindrot.org/mailman/listinfo/openssh-unix-dev
> 


More information about the openssh-unix-dev mailing list