From: наб <nabijaczleweli@nabijaczleweli.xyz>
To: unlisted-recipients:; (no To-header on input)
Cc: util-linux@vger.kernel.org
Subject: [PATCH 1/3] write: correctly handle wide characters
Date: Tue, 14 Mar 2023 23:02:15 +0100 [thread overview]
Message-ID: <5d68dce11f61b85743c36d57e2bd0d90e978a197.1678831302.git.nabijaczleweli@nabijaczleweli.xyz> (raw)
[-- Attachment #1: Type: text/plain, Size: 5773 bytes --]
Do this by replacing fputc_careful() (notice that the description said
it's locale-aware ‒ it very much is /not/), with a fputs_careful() which
does the same thing, but if it were to output a byte in the \123 format,
first it checks whether this byte starts a valid multibyte character.
If it does, and that character is printable, write it verbatim.
This means that
echo 'foo åäö ąęćźżń bar' | write nabijaczleweli pts/4
instead of
foo \303\245\303\244\303\266
\304\205\304\231\304\207\305\272\305\274\305\204 bar
yields
foo åäö ąęćźżń bar
or, more realistically, from a message I got earlier today,
Filip powiedzia\305\202 \305\274e zap\305\202aci jutro
becomes
Filip powiedział że zapłaci jutro
Invalid/non-printable sequences get processed as before.
Line reading in write must become getline() to avoid dealing with
partial characters: for example on input consisting solely of
ąęćźżń, where every {1} is an instance, the output would be
{42}ąęć\305\272żń{84}ąęćź\305\274ń{84}ąęćźż\305\204{39}
with just fixed-512 fgets()
Bug-Debian: https://bugs.debian.org/826596
---
Please keep me in CC, as I'm not subscribed.
include/carefulputc.h | 54 ++++++++++++++++++++++++++++++-------------
login-utils/last.c | 4 +---
term-utils/write.c | 25 ++++++--------------
3 files changed, 46 insertions(+), 37 deletions(-)
diff --git a/include/carefulputc.h b/include/carefulputc.h
index 8860b1234..416a347bf 100644
--- a/include/carefulputc.h
+++ b/include/carefulputc.h
@@ -1,31 +1,53 @@
#ifndef UTIL_LINUX_CAREFULPUTC_H
#define UTIL_LINUX_CAREFULPUTC_H
-/*
- * A putc() for use in write and wall (that sometimes are sgid tty).
- * It avoids control characters in our locale, and also ASCII control
- * characters. Note that the locale of the recipient is unknown.
-*/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
+#include <wctype.h>
+#include <stdbool.h>
#include "cctype.h"
-static inline int fputc_careful(int c, FILE *fp, const char fail)
+/*
+ * A puts() for use in write and wall (that sometimes are sgid tty).
+ * It avoids control and invalid characters.
+ * The locale of the recipient is nominally unknown,
+ * but it's a solid bet that it's compatible with the author's.
+ */
+static inline int fputs_careful(const char * s, FILE *fp, const char ctrl, bool cr_lf)
{
- int ret;
+ int ret = 0;
- if (isprint(c) || c == '\a' || c == '\t' || c == '\r' || c == '\n')
- ret = putc(c, fp);
- else if (!c_isascii(c))
- ret = fprintf(fp, "\\%3o", (unsigned char)c);
- else {
- ret = putc(fail, fp);
- if (ret != EOF)
- ret = putc(c ^ 0x40, fp);
+ for (size_t slen = strlen(s); *s; ++s, --slen) {
+ if (*s == '\n')
+ ret = fputs(&"\r\n"[!cr_lf], fp);
+ else if (isprint(*s) || *s == '\a' || *s == '\t' || *s == '\r')
+ ret = putc(*s, fp);
+ else if (!c_isascii(*s)) {
+ wchar_t w;
+ size_t clen = mbtowc(&w, s, slen);
+ switch(clen) {
+ case (size_t)-2: // incomplete
+ case (size_t)-1: // EILSEQ
+ nonprint:
+ ret = fprintf(fp, "\\%3hho", *s);
+ mbtowc(NULL, NULL, 0);
+ break;
+ default:
+ if(!iswprint(w))
+ goto nonprint;
+ ret = fwrite(s, 1, clen, fp);
+ s += clen - 1;
+ slen -= clen - 1;
+ break;
+ }
+ } else
+ ret = fputs((char[]){ ctrl, *s ^ 0x40, '\0' }, fp);
+ if (ret < 0)
+ return EOF;
}
- return (ret < 0) ? EOF : 0;
+ return 0;
}
static inline void fputs_quoted_case(const char *data, FILE *out, int dir)
diff --git a/login-utils/last.c b/login-utils/last.c
index d3eeed4b6..1b45dbf24 100644
--- a/login-utils/last.c
+++ b/login-utils/last.c
@@ -392,7 +392,6 @@ static int list(const struct last_control *ctl, struct utmpx *p, time_t logout_t
char final[512];
char utline[sizeof(p->ut_line) + 1];
char domain[256];
- char *s;
int mins, hours, days;
int r, len;
struct last_timefmt *fmt;
@@ -548,8 +547,7 @@ static int list(const struct last_control *ctl, struct utmpx *p, time_t logout_t
/*
* Print out "final" string safely.
*/
- for (s = final; *s; s++)
- fputc_careful(*s, stdout, '*');
+ fputs_careful(final, stdout, '*', false);
if (len < 0 || (size_t)len >= sizeof(final))
putchar('\n');
diff --git a/term-utils/write.c b/term-utils/write.c
index 8b86e9a9d..b485e28fd 100644
--- a/term-utils/write.c
+++ b/term-utils/write.c
@@ -223,21 +223,6 @@ static void signal_handler(int signo)
signal_received = signo;
}
-/*
- * write_line - like fputs(), but makes control characters visible and
- * turns \n into \r\n.
- */
-static void write_line(char *s)
-{
- while (*s) {
- const int c = *s++;
-
- if ((c == '\n' && fputc_careful('\r', stdout, '^') == EOF)
- || fputc_careful(c, stdout, '^') == EOF)
- err(EXIT_FAILURE, _("carefulputc failed"));
- }
-}
-
/*
* do_write - actually make the connection
*/
@@ -247,7 +232,8 @@ static void do_write(const struct write_control *ctl)
struct passwd *pwd;
time_t now;
struct tm *tm;
- char *host, line[512];
+ char *host, *line = NULL;
+ size_t linelen = 0;
struct sigaction sigact;
/* Determine our login name(s) before the we reopen() stdout */
@@ -286,11 +272,14 @@ static void do_write(const struct write_control *ctl)
free(host);
printf("\r\n");
- while (fgets(line, sizeof(line), stdin) != NULL) {
+ while (getline(&line, &linelen, stdin) >= 0) {
if (signal_received)
break;
- write_line(line);
+
+ if (fputs_careful(line, stdout, '^', true) == EOF)
+ err(EXIT_FAILURE, _("carefulputc failed"));
}
+ free(line);
printf("EOF\r\n");
}
--
2.30.2
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next reply other threads:[~2023-03-14 22:02 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-14 22:02 наб [this message]
2023-03-14 22:02 ` [PATCH 2/3] wall: convert homebrew buffering to open_memstream() наб
2023-03-14 22:02 ` [PATCH 3/3] wall: use fputs_careful() наб
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5d68dce11f61b85743c36d57e2bd0d90e978a197.1678831302.git.nabijaczleweli@nabijaczleweli.xyz \
--to=nabijaczleweli@nabijaczleweli.xyz \
--cc=util-linux@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).