* [PATCH 0/3] Add dollar single quote
@ 2024-06-10 6:45 Herbert Xu
2024-06-10 6:45 ` [PATCH 1/3] parser: Move non-variable case in parsesub to end Herbert Xu
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Herbert Xu @ 2024-06-10 6:45 UTC (permalink / raw
To: DASH Mailing List
This patch series adds support for $' quoting, including \u and \U.
Herbert Xu (3):
parser: Move non-variable case in parsesub to end
parser: Merge first and last chkeofmark branches in parsesub
parser: Add dollar single quote
src/bltin/printf.c | 156 +++++++++++++++++++++++++++++++++++----------
src/parser.c | 108 ++++++++++++++++++++++---------
src/system.h | 3 +
3 files changed, 206 insertions(+), 61 deletions(-)
--
2.39.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/3] parser: Move non-variable case in parsesub to end
2024-06-10 6:45 [PATCH 0/3] Add dollar single quote Herbert Xu
@ 2024-06-10 6:45 ` Herbert Xu
2024-06-10 6:45 ` [PATCH 2/3] parser: Merge first and last chkeofmark branches in parsesub Herbert Xu
2024-06-10 6:45 ` [PATCH 3/3] parser: Add dollar single quote Herbert Xu
2 siblings, 0 replies; 4+ messages in thread
From: Herbert Xu @ 2024-06-10 6:45 UTC (permalink / raw
To: DASH Mailing List
Move the rare case of a literal dollar sign to the end of the
parsesub block. This eliminates a duplicate USTPUTC call.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
src/parser.c | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/src/parser.c b/src/parser.c
index 3d21894..b711d6c 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1298,15 +1298,9 @@ parsesub: {
char *p;
static const char types[] = "}-+?=";
- c = pgetc_eatbnl();
- if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) {
- USTPUTC('$', out);
- pungetc();
- goto parsesub_return;
- }
-
USTPUTC('$', out);
+ c = pgetc_eatbnl();
if (c == '(') { /* $(command) or $((arith)) */
USTPUTC(c, out);
if (pgetc_eatbnl() == '(') {
@@ -1315,7 +1309,7 @@ parsesub: {
pungetc();
PARSEBACKQNEW();
}
- } else {
+ } else if (c == '{' || is_name(c) || is_special(c)) {
const char *newsyn = synstack->syntax;
typeloc = out - (char *)stackblock();
@@ -1441,7 +1435,9 @@ badsub:
*((char *)stackblock() + typeloc) = subtype | VSBIT;
STPUTC('=', out);
}
- }
+ } else
+ pungetc();
+
goto parsesub_return;
}
--
2.39.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] parser: Merge first and last chkeofmark branches in parsesub
2024-06-10 6:45 [PATCH 0/3] Add dollar single quote Herbert Xu
2024-06-10 6:45 ` [PATCH 1/3] parser: Move non-variable case in parsesub to end Herbert Xu
@ 2024-06-10 6:45 ` Herbert Xu
2024-06-10 6:45 ` [PATCH 3/3] parser: Add dollar single quote Herbert Xu
2 siblings, 0 replies; 4+ messages in thread
From: Herbert Xu @ 2024-06-10 6:45 UTC (permalink / raw
To: DASH Mailing List
Elminate the first chkeofmark branch by moving the CTLVAR to the
end of the parsesub block and always doing STADJUST.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
src/parser.c | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/src/parser.c b/src/parser.c
index b711d6c..2517721 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1293,10 +1293,9 @@ parseredir: {
*/
parsesub: {
- int subtype;
- int typeloc;
- char *p;
static const char types[] = "}-+?=";
+ int subtype;
+ char *p;
USTPUTC('$', out);
@@ -1310,13 +1309,10 @@ parsesub: {
PARSEBACKQNEW();
}
} else if (c == '{' || is_name(c) || is_special(c)) {
+ int typeloc = out - (char *)stackblock();
const char *newsyn = synstack->syntax;
- typeloc = out - (char *)stackblock();
- if (!chkeofmark) {
- out[-1] = CTLVAR;
- STADJUST(1, out);
- }
+ STADJUST(!chkeofmark, out);
subtype = VSNORMAL;
if (likely(c == '{')) {
if (chkeofmark)
@@ -1432,7 +1428,10 @@ badsub:
synstack->dqvarnest++;
}
if (!chkeofmark) {
- *((char *)stackblock() + typeloc) = subtype | VSBIT;
+ char *p = stackblock();
+
+ p[typeloc - 1] = CTLVAR;
+ p[typeloc] = subtype | VSBIT;
STPUTC('=', out);
}
} else
--
2.39.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] parser: Add dollar single quote
2024-06-10 6:45 [PATCH 0/3] Add dollar single quote Herbert Xu
2024-06-10 6:45 ` [PATCH 1/3] parser: Move non-variable case in parsesub to end Herbert Xu
2024-06-10 6:45 ` [PATCH 2/3] parser: Merge first and last chkeofmark branches in parsesub Herbert Xu
@ 2024-06-10 6:45 ` Herbert Xu
2 siblings, 0 replies; 4+ messages in thread
From: Herbert Xu @ 2024-06-10 6:45 UTC (permalink / raw
To: DASH Mailing List
Add support for $' quoting, including \u and \U. The code is shared
with printf, so printf (both format and %b) will recognise the new
escape codes (except \c) too.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
src/bltin/printf.c | 156 +++++++++++++++++++++++++++++++++++----------
src/parser.c | 77 ++++++++++++++++++----
src/system.h | 3 +
3 files changed, 193 insertions(+), 43 deletions(-)
diff --git a/src/bltin/printf.c b/src/bltin/printf.c
index 7785735..2c18e93 100644
--- a/src/bltin/printf.c
+++ b/src/bltin/printf.c
@@ -29,8 +29,7 @@
* SUCH DAMAGE.
*/
-#include <sys/types.h>
-
+#include <arpa/inet.h>
#include <ctype.h>
#include <errno.h>
#include <inttypes.h>
@@ -38,10 +37,10 @@
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/types.h>
#include <unistd.h>
static int conv_escape_str(char *, char **);
-static char *conv_escape(char *, int *);
static int getchr(void);
static double getdouble(void);
static uintmax_t getuintmax(int);
@@ -56,6 +55,7 @@ static char **gargv;
#define octtobin(c) ((c) - '0')
#include "bltin.h"
+#include "parser.h"
#include "system.h"
#define PF(f, func) { \
@@ -164,13 +164,17 @@ int printfcmd(int argc, char *argv[])
int *param;
if (ch == '\\') {
- int c_ch;
- fmt = conv_escape(fmt, &c_ch);
- ch = c_ch;
- goto pc;
+ unsigned ret;
+ char *cp;
+
+ STARTSTACKSTR(cp);
+ CHECKSTRSPACE(4, cp);
+ ret = conv_escape(fmt, cp, false);
+ fmt += ret >> 4;
+ out1mem(cp, ret & 15);
+ continue;
}
if (ch != '%' || (*fmt == '%' && (++fmt || 1))) {
-pc:
putchar(ch);
continue;
}
@@ -275,58 +279,69 @@ out:
static int
conv_escape_str(char *str, char **sp)
{
- int c;
- int ch;
char *cp;
+ int c;
/* convert string into a temporary buffer... */
STARTSTACKSTR(cp);
do {
- c = ch = *str++;
- if (ch != '\\')
- continue;
+ unsigned ret;
+ int ch;
+
+ CHECKSTRSPACE(4, cp);
c = *str++;
- if (c == 'c') {
- /* \c as in SYSV echo - abort all processing.... */
- c = ch = 0x100;
+ if (c != '\\') {
+putchar:
+ USTPUTC(c, cp);
continue;
}
+ ch = *str;
+ if (ch == 'c') {
+ /* \c as in SYSV echo - abort all processing.... */
+ c = 0x100;
+ goto putchar;
+ }
+
/*
* %b string octal constants are not like those in C.
* They start with a \0, and are followed by 0, 1, 2,
* or 3 octal digits.
*/
- if (c == '0' && isodigit(*str))
+ if (ch == '0' && isodigit(str[1]))
str++;
/* Finally test for sequences valid in the format string */
- str = conv_escape(str - 1, &c);
- } while (STPUTC(c, cp), (char)ch);
+ ret = conv_escape(str, cp, false);
+ str += ret >> 4;
+ cp += ret & 15;
+ } while (c & 0xff);
*sp = cp;
- return ch;
+ return c;
}
/*
* Print "standard" escape characters
*/
-static char *
-conv_escape(char *str, int *conv_ch)
+unsigned conv_escape(char *str0, char *out0, bool mbchar)
{
- int value;
+ char *out = out0;
+ char *str = str0;
+ unsigned value;
int ch;
ch = *str;
switch (ch) {
default:
- if (!isodigit(*str)) {
- value = '\\';
- goto out;
+ if (!isodigit(ch)) {
+ value = ch ?: '\\';
+ str -= !ch;
+ break;
}
ch = 3;
@@ -334,12 +349,88 @@ conv_escape(char *str, int *conv_ch)
do {
value <<= 3;
value += octtobin(*str++);
- } while (isodigit(*str) && --ch);
- goto out;
+ } while (--ch && isodigit(*str));
+ str--;
+ break;
+
+ case 'x':
+ ch = 2;
+
+hex:
+ value = 0;
+ do {
+ int c = *++str;
+ int d;
+
+ if (c >= '0' && c <= '9')
+ d = c - '0';
+ else {
+ int cl;
+
+ cl = c & ~0x20;
+ if (cl >= 'A' && cl <= 'F')
+ d = cl - 'A' + 10;
+ else {
+ str--;
+ break;
+ }
+ }
+
+ value <<= 4;
+ value += d;
+ } while (--ch);
+
+ if (value < 0x80)
+ break;
+
+ if (value < 0x110000) {
+ int mboff = (mbchar - 1) * 2;
+ unsigned uni = value;
+ int len;
+
+ value = 0x80 << 8 | (value & 0xfc0) << 2 |
+ 0x80 | (value & 0x3f);
+
+ if (uni < 0x800) {
+ value |= 0x40 << 8;
+ len = 2;
+ } else {
+ value |= 0x80 << 16 | (uni & 0x3f000) << 4;
+ if (uni < 0x10000) {
+ value |= 0x60 << 16;
+ len = 3;
+ } else {
+ value |= 0xf0 << 24 |
+ (uni & ~0x3ffff) << 6;
+ len = 4;
+ }
+ }
+
+ value = htonl(value << (4 - len) * 8);
+
+ USTPUTC(CTLMBCHAR, out);
+ USTPUTC(len, out);
+ STADJUST(mboff, out);
+ *(uint32_t *)out = value;
+ STADJUST(len, out);
+ USTPUTC(len, out);
+ USTPUTC(CTLMBCHAR, out);
+ STADJUST(mboff, out);
+ }
+
+ goto out_noput;
+
+ case 'u':
+ ch = 4;
+ goto hex;
+
+ case 'U':
+ ch = 8;
+ goto hex;
- case '\\': value = '\\'; break; /* backslash */
case 'a': value = '\a'; break; /* alert */
case 'b': value = '\b'; break; /* backspace */
+ case 'e': value = '\033'; break; /* <ESC> */
case 'f': value = '\f'; break; /* form-feed */
case 'n': value = '\n'; break; /* newline */
case 'r': value = '\r'; break; /* carriage-return */
@@ -347,10 +438,11 @@ conv_escape(char *str, int *conv_ch)
case 'v': value = '\v'; break; /* vertical-tab */
}
+ USTPUTC(value, out);
+
+out_noput:
str++;
-out:
- *conv_ch = value;
- return str;
+ return (out - out0) | (str - str0) << 4;
}
static char *
diff --git a/src/parser.c b/src/parser.c
index 2517721..d1bec58 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -931,6 +931,46 @@ unsigned getmbc(int c, char *out, int mode)
return 0;
}
+static char *dollarsq_escape(char *out)
+{
+ /* 10 = length of UXXXXXXXX + NUL */
+ char str[10];
+ unsigned len;
+ char *p;
+
+ for (len = 0; len < sizeof(str) - 1; len++) {
+ int c = pgetc();
+
+ if (c <= PEOF)
+ break;
+
+ str[len] = c;
+ }
+ str[len] = 0;
+
+ p = str;
+ if (*p != 'c') {
+ unsigned ret;
+
+ ret = conv_escape(p, out, true);
+ p += ret >> 4;
+ out += ret & 15;
+ } else if (*++p) {
+ int conv_ch;
+ int c;
+
+ c = (unsigned char)*p++;
+
+ p += !((c ^ *p) | (c ^ '\\'));
+
+ conv_ch = (c & ~((c & 0x40) >> 1)) ^ 0x40;
+ USTPUTC(conv_ch, out);
+ }
+
+ pungetn(len - (p - str));
+ return out;
+}
+
/*
* If eofmark is NULL, read a word or a redirection symbol. If eofmark
* is not NULL, read a here document. In the latter case, eofmark is the
@@ -953,21 +993,19 @@ unsigned getmbc(int c, char *out, int mode)
STATIC int
readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
{
- int c = firstc;
- char *out;
- size_t len;
- struct nodelist *bqlist;
- int quotef;
- int oldstyle;
- /* syntax stack */
struct synstack synbase = { .syntax = syntax };
- struct synstack *synstack = &synbase;
int chkeofmark = checkkwd & CHKEOFMARK;
+ struct synstack *synstack = &synbase;
+ struct nodelist *bqlist = NULL;
+ int dollarsq = 0;
+ int c = firstc;
+ int quotef = 0;
+ int oldstyle;
+ size_t len;
+ char *out;
if (syntax == DQSYNTAX)
synstack->dblquote = 1;
- quotef = 0;
- bqlist = NULL;
STARTSTACKSTR(out);
loop: { /* for each line, until end of word */
@@ -1014,6 +1052,10 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
USTPUTC(c, out);
break;
case CCTL:
+ if (c == dollarsq) {
+ out = dollarsq_escape(out);
+ break;
+ }
if ((!eofmark) | synstack->dblquote |
synstack->varnest)
USTPUTC(CTLESC, out);
@@ -1055,6 +1097,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
USTPUTC(c, out);
break;
case CSQUOTE:
+csquote:
synstack->syntax = SQSYNTAX;
quotemark:
if (eofmark == NULL) {
@@ -1075,6 +1118,14 @@ toggledq:
}
if (synstack->dqvarnest == 0) {
+ if (likely(dollarsq)) {
+ char *p = stackblock();
+
+ *out = 0;
+ out = p + strlen(p);
+ dollarsq = 0;
+ }
+
synstack->syntax = BASESYNTAX;
synstack->dblquote = 0;
}
@@ -1293,6 +1344,7 @@ parseredir: {
*/
parsesub: {
+ const char *newsyn = synstack->syntax;
static const char types[] = "}-+?=";
int subtype;
char *p;
@@ -1308,9 +1360,12 @@ parsesub: {
pungetc();
PARSEBACKQNEW();
}
+ } else if (c == '\'' && newsyn['&']) {
+ STADJUST(-1, out);
+ dollarsq = '\\';
+ goto csquote;
} else if (c == '{' || is_name(c) || is_special(c)) {
int typeloc = out - (char *)stackblock();
- const char *newsyn = synstack->syntax;
STADJUST(!chkeofmark, out);
subtype = VSNORMAL;
diff --git a/src/system.h b/src/system.h
index e7f968b..8cb4726 100644
--- a/src/system.h
+++ b/src/system.h
@@ -28,6 +28,7 @@
#include <limits.h>
#include <signal.h>
+#include <stdbool.h>
#include <sys/types.h>
#ifndef SSIZE_MAX
@@ -188,3 +189,5 @@ static inline void globfree64(glob64_t *pglob)
* code
*/
#define uninitialized_var(x) x = x
+
+unsigned conv_escape(char *str, char *out, bool mbchar);
--
2.39.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-06-10 6:45 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-10 6:45 [PATCH 0/3] Add dollar single quote Herbert Xu
2024-06-10 6:45 ` [PATCH 1/3] parser: Move non-variable case in parsesub to end Herbert Xu
2024-06-10 6:45 ` [PATCH 2/3] parser: Merge first and last chkeofmark branches in parsesub Herbert Xu
2024-06-10 6:45 ` [PATCH 3/3] parser: Add dollar single quote Herbert Xu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).