From amdmi3@amdmi3.ru  Thu Jan 19 20:14:53 2012
Return-Path: <amdmi3@amdmi3.ru>
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 66B5C1065677
	for <freebsd-gnats-submit@freebsd.org>; Thu, 19 Jan 2012 20:14:53 +0000 (UTC)
	(envelope-from amdmi3@amdmi3.ru)
Received: from smtp.timeweb.ru (smtp.timeweb.ru [92.53.116.15])
	by mx1.freebsd.org (Postfix) with ESMTP id 1C8B68FC0C
	for <freebsd-gnats-submit@freebsd.org>; Thu, 19 Jan 2012 20:14:52 +0000 (UTC)
Received: from [213.148.20.85] (helo=hive.panopticon)
	by smtp.timeweb.ru with esmtpsa (TLSv1:CAMELLIA256-SHA:256)
	(Exim 4.76)
	(envelope-from <amdmi3@amdmi3.ru>)
	id 1Rny7u-0004Eb-0i
	for FreeBSD-gnats-submit@freebsd.org; Thu, 19 Jan 2012 23:58:26 +0400
Received: from hades.panopticon (hades.panopticon [192.168.0.32])
	by hive.panopticon (Postfix) with ESMTP id 3ED29B84D
	for <FreeBSD-gnats-submit@freebsd.org>; Thu, 19 Jan 2012 23:58:25 +0400 (MSK)
Received: by hades.panopticon (Postfix, from userid 1000)
	id 3233CA2D; Thu, 19 Jan 2012 23:58:25 +0400 (MSK)
Message-Id: <20120119195825.3233CA2D@hades.panopticon>
Date: Thu, 19 Jan 2012 23:58:25 +0400 (MSK)
From: Dmitry Marakasov <amdmi3@FreeBSD.org>
Reply-To: Dmitry Marakasov <amdmi3@FreeBSD.org>
To: FreeBSD-gnats-submit@freebsd.org
Cc:
Subject: [patch] sbin/write: add multibyte character support
X-Send-Pr-Version: 3.113
X-GNATS-Notify:

>Number:         164317
>Category:       bin
>Synopsis:       [patch] write(1): add multibyte character support
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    glebius
>State:          closed
>Quarter:        
>Keywords:       
>Date-Required:  
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Thu Jan 19 20:20:08 UTC 2012
>Closed-Date:    Sat Apr 14 09:44:08 UTC 2012
>Last-Modified:  Sat Apr 14 09:50:11 UTC 2012
>Originator:     Dmitry Marakasov
>Release:        FreeBSD 9.0-RC2 amd64
>Organization:
>Environment:
System: FreeBSD hades.panopticon 9.0-RC2 FreeBSD 9.0-RC2 #0: Tue Nov 29 07:18:03 MSK 2011 root@hades.panopticon:/usr/work/usr/src/sys/HADES amd64


>Description:
Currently write(1) doesn't handle utf8 locale at all:

    (this is Russian)
# echo "Проверка" | write amdmi3 pts/29

Message from amdmi3@hades.panopticon on pts/29 at 23:31 ...
M-PM-^_M-QM-^@M-PM->M-PM-2M-PM-5M-QM-^@M-PM-:M-PM-0
EOF

checks used in character printing routine (((*s & 0x80) && *s < 0xA0)) seem to assume specific encoding (for example, CP866 has letters in 0x80-0xA0), so this will not work correctly for even 8 bit locales.

The utility is easily convertable to wchar_t however, which should handle them all, and the patch for it is attached.

% (echo "Проверка"; echo "Some control characters: \b\t^[[D^[[C^[[A^[[B^[") | ./write amdmi3 pts/29

Message from amdmi3@hades.panopticon on pts/29 at 23:43 ...
Проверка
Some control characters: <0x8>  <0x1B>[D<0x1B>[C<0x1B>[A<0x1B>[B<0x1B>
EOF

The way of displaying non-printable characters is discussable, but since one can neither assume that locale is UTF nor that that wchar_t value is somehow linked to codepoint, it would be inappropriate to use notations like U+%X or \u%X or &#%d; and/or modify wchar_t with bitwise operations. Notation like <0x%X> however is charset-agnostic and pretty readable, so I think it's quite suitable here.

>How-To-Repeat:
>Fix:
Index: write.1
===================================================================
--- write.1	(revision 230334)
+++ write.1	(working copy)
@@ -107,7 +107,3 @@
 terminal, not the receiver's (which
 .Nm
 has no way of knowing).
-.Pp
-The
-.Nm
-utility does not recognize multibyte characters.
Index: write.c
===================================================================
--- write.c	(revision 230334)
+++ write.c	(working copy)
@@ -60,12 +60,14 @@
 #include <string.h>
 #include <unistd.h>
 #include <utmpx.h>
+#include <wchar.h>
+#include <wctype.h>
 
 void done(int);
 void do_write(char *, char *, uid_t);
 static void usage(void);
 int term_chk(char *, int *, time_t *, int);
-void wr_fputs(unsigned char *s);
+void wr_fputs(wchar_t *s);
 void search_utmp(char *, char *, char *, uid_t);
 int utmp_chk(char *, char *);
 
@@ -243,7 +245,8 @@
 	char *nows;
 	struct passwd *pwd;
 	time_t now;
-	char path[MAXPATHLEN], host[MAXHOSTNAMELEN], line[512];
+	char path[MAXPATHLEN], host[MAXHOSTNAMELEN];
+	wchar_t line[512];
 
 	/* Determine our login name before we reopen() stdout */
 	if ((login = getlogin()) == NULL) {
@@ -269,7 +272,7 @@
 	(void)printf("\r\n\007\007\007Message from %s@%s on %s at %s ...\r\n",
 	    login, host, mytty, nows + 11);
 
-	while (fgets(line, sizeof(line), stdin) != NULL)
+	while (fgetws(line, sizeof(line)/sizeof(wchar_t), stdin) != NULL)
 		wr_fputs(line);
 }
 
@@ -288,30 +291,20 @@
  *     turns \n into \r\n
  */
 void
-wr_fputs(unsigned char *s)
+wr_fputs(wchar_t *s)
 {
 
-#define	PUTC(c)	if (putchar(c) == EOF) err(1, NULL);
+#define	PUTC(c)	if (putwchar(c) == WEOF) err(1, NULL);
 
-	for (; *s != '\0'; ++s) {
-		if (*s == '\n') {
-			PUTC('\r');
-		} else if (((*s & 0x80) && *s < 0xA0) ||
-			   /* disable upper controls */
-			   (!isprint(*s) && !isspace(*s) &&
-			    *s != '\a' && *s != '\b')
-			  ) {
-			if (*s & 0x80) {
-				*s &= ~0x80;
-				PUTC('M');
-				PUTC('-');
-			}
-			if (iscntrl(*s)) {
-				*s ^= 0x40;
-				PUTC('^');
-			}
+	for (; *s != L'\0'; ++s) {
+		if (*s == L'\n') {
+			PUTC(L'\r');
+			PUTC(L'\n');
+		} else if (iswprint(*s) || iswspace(*s)) {
+			PUTC(*s);
+		} else {
+			wprintf(L"<0x%X>", *s);
 		}
-		PUTC(*s);
 	}
 	return;
 #undef PUTC
>Release-Note:
>Audit-Trail:

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: bin/164317: commit references a PR
Date: Mon, 13 Feb 2012 14:40:25 +0000 (UTC)

 Author: glebius
 Date: Mon Feb 13 14:40:15 2012
 New Revision: 231586
 URL: http://svn.freebsd.org/changeset/base/231586
 
 Log:
   Fix write(1) to support wide characters.
   
   Submitted by:	amdmi3
   PR:		bin/164317
 
 Modified:
   head/usr.bin/write/write.1
   head/usr.bin/write/write.c
 
 Modified: head/usr.bin/write/write.1
 ==============================================================================
 --- head/usr.bin/write/write.1	Mon Feb 13 13:07:56 2012	(r231585)
 +++ head/usr.bin/write/write.1	Mon Feb 13 14:40:15 2012	(r231586)
 @@ -31,7 +31,7 @@
  .\"     @(#)write.1	8.1 (Berkeley) 6/6/93
  .\" $FreeBSD$
  .\"
 -.Dd July 17, 2004
 +.Dd February 13, 2012
  .Dt WRITE 1
  .Os
  .Sh NAME
 @@ -107,7 +107,3 @@ setting is used to determine which chara
  terminal, not the receiver's (which
  .Nm
  has no way of knowing).
 -.Pp
 -The
 -.Nm
 -utility does not recognize multibyte characters.
 
 Modified: head/usr.bin/write/write.c
 ==============================================================================
 --- head/usr.bin/write/write.c	Mon Feb 13 13:07:56 2012	(r231585)
 +++ head/usr.bin/write/write.c	Mon Feb 13 14:40:15 2012	(r231586)
 @@ -60,12 +60,14 @@ __FBSDID("$FreeBSD$");
  #include <string.h>
  #include <unistd.h>
  #include <utmpx.h>
 +#include <wchar.h>
 +#include <wctype.h>
  
  void done(int);
  void do_write(char *, char *, uid_t);
  static void usage(void);
  int term_chk(char *, int *, time_t *, int);
 -void wr_fputs(unsigned char *s);
 +void wr_fputs(wchar_t *s);
  void search_utmp(char *, char *, char *, uid_t);
  int utmp_chk(char *, char *);
  
 @@ -243,7 +245,8 @@ do_write(char *tty, char *mytty, uid_t m
  	char *nows;
  	struct passwd *pwd;
  	time_t now;
 -	char path[MAXPATHLEN], host[MAXHOSTNAMELEN], line[512];
 +	char path[MAXPATHLEN], host[MAXHOSTNAMELEN];
 +	wchar_t line[512];
  
  	/* Determine our login name before we reopen() stdout */
  	if ((login = getlogin()) == NULL) {
 @@ -269,7 +272,7 @@ do_write(char *tty, char *mytty, uid_t m
  	(void)printf("\r\n\007\007\007Message from %s@%s on %s at %s ...\r\n",
  	    login, host, mytty, nows + 11);
  
 -	while (fgets(line, sizeof(line), stdin) != NULL)
 +	while (fgetws(line, sizeof(line)/sizeof(wchar_t), stdin) != NULL)
  		wr_fputs(line);
  }
  
 @@ -288,30 +291,20 @@ done(int n __unused)
   *     turns \n into \r\n
   */
  void
 -wr_fputs(unsigned char *s)
 +wr_fputs(wchar_t *s)
  {
  
 -#define	PUTC(c)	if (putchar(c) == EOF) err(1, NULL);
 +#define	PUTC(c)	if (putwchar(c) == WEOF) err(1, NULL);
  
 -	for (; *s != '\0'; ++s) {
 -		if (*s == '\n') {
 -			PUTC('\r');
 -		} else if (((*s & 0x80) && *s < 0xA0) ||
 -			   /* disable upper controls */
 -			   (!isprint(*s) && !isspace(*s) &&
 -			    *s != '\a' && *s != '\b')
 -			  ) {
 -			if (*s & 0x80) {
 -				*s &= ~0x80;
 -				PUTC('M');
 -				PUTC('-');
 -			}
 -			if (iscntrl(*s)) {
 -				*s ^= 0x40;
 -				PUTC('^');
 -			}
 +	for (; *s != L'\0'; ++s) {
 +		if (*s == L'\n') {
 +			PUTC(L'\r');
 +			PUTC(L'\n');
 +		} else if (iswprint(*s) || iswspace(*s)) {
 +			PUTC(*s);
 +		} else {
 +			wprintf(L"<0x%X>", *s);
  		}
 -		PUTC(*s);
  	}
  	return;
  #undef PUTC
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
State-Changed-From-To: open->patched 
State-Changed-By: glebius 
State-Changed-When: Mon Feb 13 14:57:51 UTC 2012 
State-Changed-Why:  
Committed, thanks! 


Responsible-Changed-From-To: freebsd-bugs->glebius 
Responsible-Changed-By: glebius 
Responsible-Changed-When: Mon Feb 13 14:57:51 UTC 2012 
Responsible-Changed-Why:  
Grab. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=164317 
State-Changed-From-To: patched->closed 
State-Changed-By: glebius 
State-Changed-When: Sat Apr 14 09:43:56 UTC 2012 
State-Changed-Why:  
Merged to stable/9. 

http://www.freebsd.org/cgi/query-pr.cgi?pr=164317 

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: bin/164317: commit references a PR
Date: Sat, 14 Apr 2012 09:44:08 +0000 (UTC)

 Author: glebius
 Date: Sat Apr 14 09:43:58 2012
 New Revision: 234274
 URL: http://svn.freebsd.org/changeset/base/234274
 
 Log:
   Merge 231586:
     Fix write(1) to support wide characters.
   
     Submitted by: amdmi3
     PR:           bin/164317
 
 Modified:
   stable/9/usr.bin/write/write.1
   stable/9/usr.bin/write/write.c
 Directory Properties:
   stable/9/usr.bin/write/   (props changed)
 
 Modified: stable/9/usr.bin/write/write.1
 ==============================================================================
 --- stable/9/usr.bin/write/write.1	Sat Apr 14 09:21:06 2012	(r234273)
 +++ stable/9/usr.bin/write/write.1	Sat Apr 14 09:43:58 2012	(r234274)
 @@ -31,7 +31,7 @@
  .\"     @(#)write.1	8.1 (Berkeley) 6/6/93
  .\" $FreeBSD$
  .\"
 -.Dd July 17, 2004
 +.Dd February 13, 2012
  .Dt WRITE 1
  .Os
  .Sh NAME
 @@ -107,7 +107,3 @@ setting is used to determine which chara
  terminal, not the receiver's (which
  .Nm
  has no way of knowing).
 -.Pp
 -The
 -.Nm
 -utility does not recognize multibyte characters.
 
 Modified: stable/9/usr.bin/write/write.c
 ==============================================================================
 --- stable/9/usr.bin/write/write.c	Sat Apr 14 09:21:06 2012	(r234273)
 +++ stable/9/usr.bin/write/write.c	Sat Apr 14 09:43:58 2012	(r234274)
 @@ -60,12 +60,14 @@ __FBSDID("$FreeBSD$");
  #include <string.h>
  #include <unistd.h>
  #include <utmpx.h>
 +#include <wchar.h>
 +#include <wctype.h>
  
  void done(int);
  void do_write(char *, char *, uid_t);
  static void usage(void);
  int term_chk(char *, int *, time_t *, int);
 -void wr_fputs(unsigned char *s);
 +void wr_fputs(wchar_t *s);
  void search_utmp(char *, char *, char *, uid_t);
  int utmp_chk(char *, char *);
  
 @@ -243,7 +245,8 @@ do_write(char *tty, char *mytty, uid_t m
  	char *nows;
  	struct passwd *pwd;
  	time_t now;
 -	char path[MAXPATHLEN], host[MAXHOSTNAMELEN], line[512];
 +	char path[MAXPATHLEN], host[MAXHOSTNAMELEN];
 +	wchar_t line[512];
  
  	/* Determine our login name before we reopen() stdout */
  	if ((login = getlogin()) == NULL) {
 @@ -269,7 +272,7 @@ do_write(char *tty, char *mytty, uid_t m
  	(void)printf("\r\n\007\007\007Message from %s@%s on %s at %s ...\r\n",
  	    login, host, mytty, nows + 11);
  
 -	while (fgets(line, sizeof(line), stdin) != NULL)
 +	while (fgetws(line, sizeof(line)/sizeof(wchar_t), stdin) != NULL)
  		wr_fputs(line);
  }
  
 @@ -288,30 +291,20 @@ done(int n __unused)
   *     turns \n into \r\n
   */
  void
 -wr_fputs(unsigned char *s)
 +wr_fputs(wchar_t *s)
  {
  
 -#define	PUTC(c)	if (putchar(c) == EOF) err(1, NULL);
 +#define	PUTC(c)	if (putwchar(c) == WEOF) err(1, NULL);
  
 -	for (; *s != '\0'; ++s) {
 -		if (*s == '\n') {
 -			PUTC('\r');
 -		} else if (((*s & 0x80) && *s < 0xA0) ||
 -			   /* disable upper controls */
 -			   (!isprint(*s) && !isspace(*s) &&
 -			    *s != '\a' && *s != '\b')
 -			  ) {
 -			if (*s & 0x80) {
 -				*s &= ~0x80;
 -				PUTC('M');
 -				PUTC('-');
 -			}
 -			if (iscntrl(*s)) {
 -				*s ^= 0x40;
 -				PUTC('^');
 -			}
 +	for (; *s != L'\0'; ++s) {
 +		if (*s == L'\n') {
 +			PUTC(L'\r');
 +			PUTC(L'\n');
 +		} else if (iswprint(*s) || iswspace(*s)) {
 +			PUTC(*s);
 +		} else {
 +			wprintf(L"<0x%X>", *s);
  		}
 -		PUTC(*s);
  	}
  	return;
  #undef PUTC
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 
>Unformatted:
