From: Herbert Xu <herbert@gondor.apana.org.au>
To: DASH Mailing List <dash@vger.kernel.org>
Subject: [v3 PATCH 03/13] expand: Count multi-byte characters for VSLENGTH
Date: Sun, 05 May 2024 17:14:30 +0800 [thread overview]
Message-ID: <bc7a793eae26aa07fd0697af90728e078f92c4b6.1714900377.git.herbert@gondor.apana.org.au> (raw)
In-Reply-To: <cover.1714900377.git.herbert@gondor.apana.org.au>
Count multi-byte characters in variables and rather than bytes
and return that as the length expansion.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
src/expand.c | 62 +++++++++++++++++++++++++++++++++++++---------------
1 file changed, 44 insertions(+), 18 deletions(-)
diff --git a/src/expand.c b/src/expand.c
index 9ac981e..ad186b0 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -53,6 +53,7 @@
#endif
#include <ctype.h>
#include <stdbool.h>
+#include <wchar.h>
/*
* Routines to expand arguments to commands. We have to deal with
@@ -796,6 +797,18 @@ really_record:
return p;
}
+static char *chtodest(int c, int flags, char *out)
+{
+ const char *syntax = flags & EXP_QUOTED ? DQSYNTAX : BASESYNTAX;
+
+ if ((flags & QUOTES_ESC) &&
+ ((syntax[c] == CCTL) ||
+ (flags & EXP_QUOTED && syntax[c] == CBACK)))
+ USTPUTC(CTLESC, out);
+ USTPUTC(c, out);
+
+ return out;
+}
/*
* Put a string on the stack.
@@ -803,38 +816,48 @@ really_record:
static size_t memtodest(const char *p, size_t len, int flags)
{
- const char *syntax = flags & EXP_QUOTED ? DQSYNTAX : BASESYNTAX;
+ size_t count = 0;
char *q;
- char *s;
+ int c;
if (unlikely(!len))
return 0;
q = makestrspace(len * 2, expdest);
- s = q;
do {
- int c = (signed char)*p++;
- if (c) {
- if ((flags & QUOTES_ESC) &&
- ((syntax[c] == CCTL) ||
- (flags & EXP_QUOTED && syntax[c] == CBACK)))
- USTPUTC(CTLESC, q);
- } else if (!(flags & EXP_KEEPNUL))
+ c = (signed char)*p++;
+
+ if (c)
+ count++;
+ else if (!(flags & EXP_KEEPNUL))
continue;
- USTPUTC(c, q);
+
+ if (c < 0) {
+ mbstate_t mbs = {};
+
+ p--;
+ do {
+ q = chtodest(c, flags, q);
+ } while (mbrlen(p++, 1, &mbs) == -2 &&
+ (c = *p, --len));
+ if (!len)
+ break;
+ continue;
+ }
+
+ q = chtodest(c, flags, q);
} while (--len);
expdest = q;
- return q - s;
+ return count;
}
static size_t strtodest(const char *p, int flags)
{
size_t len = strlen(p);
- memtodest(p, len, flags);
- return len;
+ return memtodest(p, len, flags);
}
@@ -856,6 +879,7 @@ varvalue(char *name, int varflags, int flags, int quoted)
int discard = (subtype == VSPLUS || subtype == VSLENGTH) |
(flags & EXP_DISCARD);
ssize_t len = 0;
+ size_t start;
char c;
if (!subtype) {
@@ -865,9 +889,9 @@ varvalue(char *name, int varflags, int flags, int quoted)
sh_error("Bad substitution");
}
- flags |= EXP_KEEPNUL;
flags &= discard ? ~QUOTES_ESC : ~0;
sep = (flags & EXP_FULL) << CHAR_BIT;
+ start = expdest - (char *)stackblock();
switch (*name) {
case '$':
@@ -927,7 +951,7 @@ param:
if (*ap && sep) {
len++;
- memtodest(&sepc, 1, flags);
+ memtodest(&sepc, 1, flags | EXP_KEEPNUL);
}
}
break;
@@ -957,7 +981,7 @@ value:
}
if (discard)
- STADJUST(-len, expdest);
+ expdest = (char *)stackblock() + start;
return len;
}
@@ -1758,11 +1782,13 @@ casematch(union node *pattern, char *val)
static size_t cvtnum(intmax_t num, int flags)
{
+ size_t start = expdest - (char *)stackblock();
int len = max_int_length(sizeof(num));
char buf[len];
len = fmtstr(buf, len, "%" PRIdMAX, num);
- return memtodest(buf, len, flags);
+ memtodest(buf, len, flags);
+ return (expdest - (char *)stackblock()) - start;
}
STATIC void
--
2.39.2
next prev parent reply other threads:[~2024-05-05 9:14 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-05 9:14 [v3 PATCH 00/13] Add multi-byte support Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 01/13] shell: Call setlocale Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 02/13] shell: Use strcoll instead of strcmp where applicable Herbert Xu
2024-05-05 9:14 ` Herbert Xu [this message]
2024-05-05 9:14 ` [v3 PATCH 04/13] expand: Process multi-byte characters in subevalvar Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 05/13] expand: Process multi-byte characters in expmeta Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 06/13] expand: Support multi-byte characters during field splitting Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 07/13] input: Allow MB_LEN_MAX calls to pungetc Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 08/13] input: Add pgetc_eoa Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 09/13] parser: Add support for multi-byte characters Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 10/13] input: Always push in setinputfile Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 11/13] memalloc: Use void * instead of pointer Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 12/13] builtin: Use pgetc in read(1) Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 13/13] builtin: Process multi-byte characters " Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=bc7a793eae26aa07fd0697af90728e078f92c4b6.1714900377.git.herbert@gondor.apana.org.au \
--to=herbert@gondor.apana.org.au \
--cc=dash@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).