Dash Archive mirror
 help / color / mirror / Atom feed
From: Herbert Xu <herbert@gondor.apana.org.au>
To: DASH Mailing List <dash@vger.kernel.org>
Subject: [v3 PATCH 03/13] expand: Count multi-byte characters for VSLENGTH
Date: Sun, 05 May 2024 17:14:30 +0800	[thread overview]
Message-ID: <bc7a793eae26aa07fd0697af90728e078f92c4b6.1714900377.git.herbert@gondor.apana.org.au> (raw)
In-Reply-To: <cover.1714900377.git.herbert@gondor.apana.org.au>

Count multi-byte characters in variables and rather than bytes
and return that as the length expansion.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 src/expand.c | 62 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/src/expand.c b/src/expand.c
index 9ac981e..ad186b0 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -53,6 +53,7 @@
 #endif
 #include <ctype.h>
 #include <stdbool.h>
+#include <wchar.h>
 
 /*
  * Routines to expand arguments to commands.  We have to deal with
@@ -796,6 +797,18 @@ really_record:
 	return p;
 }
 
+static char *chtodest(int c, int flags, char *out)
+{
+	const char *syntax = flags & EXP_QUOTED ? DQSYNTAX : BASESYNTAX;
+
+	if ((flags & QUOTES_ESC) &&
+	    ((syntax[c] == CCTL) ||
+	     (flags & EXP_QUOTED && syntax[c] == CBACK)))
+		USTPUTC(CTLESC, out);
+	USTPUTC(c, out);
+
+	return out;
+}
 
 /*
  * Put a string on the stack.
@@ -803,38 +816,48 @@ really_record:
 
 static size_t memtodest(const char *p, size_t len, int flags)
 {
-	const char *syntax = flags & EXP_QUOTED ? DQSYNTAX : BASESYNTAX;
+	size_t count = 0;
 	char *q;
-	char *s;
+	int c;
 
 	if (unlikely(!len))
 		return 0;
 
 	q = makestrspace(len * 2, expdest);
-	s = q;
 
 	do {
-		int c = (signed char)*p++;
-		if (c) {
-			if ((flags & QUOTES_ESC) &&
-			    ((syntax[c] == CCTL) ||
-			     (flags & EXP_QUOTED && syntax[c] == CBACK)))
-				USTPUTC(CTLESC, q);
-		} else if (!(flags & EXP_KEEPNUL))
+		c = (signed char)*p++;
+
+		if (c)
+			count++;
+		else if (!(flags & EXP_KEEPNUL))
 			continue;
-		USTPUTC(c, q);
+
+		if (c < 0) {
+			mbstate_t mbs = {};
+
+			p--;
+			do {
+				q = chtodest(c, flags, q);
+			} while (mbrlen(p++, 1, &mbs) == -2 &&
+				 (c = *p, --len));
+			if (!len)
+				break;
+			continue;
+		}
+
+		q = chtodest(c, flags, q);
 	} while (--len);
 
 	expdest = q;
-	return q - s;
+	return count;
 }
 
 
 static size_t strtodest(const char *p, int flags)
 {
 	size_t len = strlen(p);
-	memtodest(p, len, flags);
-	return len;
+	return memtodest(p, len, flags);
 }
 
 
@@ -856,6 +879,7 @@ varvalue(char *name, int varflags, int flags, int quoted)
 	int discard = (subtype == VSPLUS || subtype == VSLENGTH) |
 		      (flags & EXP_DISCARD);
 	ssize_t len = 0;
+	size_t start;
 	char c;
 
 	if (!subtype) {
@@ -865,9 +889,9 @@ varvalue(char *name, int varflags, int flags, int quoted)
 		sh_error("Bad substitution");
 	}
 
-	flags |= EXP_KEEPNUL;
 	flags &= discard ? ~QUOTES_ESC : ~0;
 	sep = (flags & EXP_FULL) << CHAR_BIT;
+	start = expdest - (char *)stackblock();
 
 	switch (*name) {
 	case '$':
@@ -927,7 +951,7 @@ param:
 
 			if (*ap && sep) {
 				len++;
-				memtodest(&sepc, 1, flags);
+				memtodest(&sepc, 1, flags | EXP_KEEPNUL);
 			}
 		}
 		break;
@@ -957,7 +981,7 @@ value:
 	}
 
 	if (discard)
-		STADJUST(-len, expdest);
+		expdest = (char *)stackblock() + start;
 
 	return len;
 }
@@ -1758,11 +1782,13 @@ casematch(union node *pattern, char *val)
 
 static size_t cvtnum(intmax_t num, int flags)
 {
+	size_t start = expdest - (char *)stackblock();
 	int len = max_int_length(sizeof(num));
 	char buf[len];
 
 	len = fmtstr(buf, len, "%" PRIdMAX, num);
-	return memtodest(buf, len, flags);
+	memtodest(buf, len, flags);
+	return (expdest - (char *)stackblock()) - start;
 }
 
 STATIC void
-- 
2.39.2


  parent reply	other threads:[~2024-05-05  9:14 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-05  9:14 [v3 PATCH 00/13] Add multi-byte support Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 01/13] shell: Call setlocale Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 02/13] shell: Use strcoll instead of strcmp where applicable Herbert Xu
2024-05-05  9:14 ` Herbert Xu [this message]
2024-05-05  9:14 ` [v3 PATCH 04/13] expand: Process multi-byte characters in subevalvar Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 05/13] expand: Process multi-byte characters in expmeta Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 06/13] expand: Support multi-byte characters during field splitting Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 07/13] input: Allow MB_LEN_MAX calls to pungetc Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 08/13] input: Add pgetc_eoa Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 09/13] parser: Add support for multi-byte characters Herbert Xu
2024-05-05  9:15 ` [v3 PATCH 10/13] input: Always push in setinputfile Herbert Xu
2024-05-05  9:15 ` [v3 PATCH 11/13] memalloc: Use void * instead of pointer Herbert Xu
2024-05-05  9:15 ` [v3 PATCH 12/13] builtin: Use pgetc in read(1) Herbert Xu
2024-05-05  9:15 ` [v3 PATCH 13/13] builtin: Process multi-byte characters " Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bc7a793eae26aa07fd0697af90728e078f92c4b6.1714900377.git.herbert@gondor.apana.org.au \
    --to=herbert@gondor.apana.org.au \
    --cc=dash@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).