summaryrefslogtreecommitdiff
diff options
authorArnold D. Robbins <[email protected]>2025-10-29 10:24:33 +0200
committerArnold D. Robbins <[email protected]>2025-10-29 10:24:33 +0200
commita0879473da5f65b1687ed5a4af565bf400f51f88 (patch)
treeb89e2ab5e12c82f3b043d00060105379d18a7e08
parent3992eafcead82df31a8daea5d25803c3e9f5e726 (diff)
downloadgawk-master.tar.gz
In re.c, change a bad assertion into code.HEADmaster
-rw-r--r--ChangeLog9
-rw-r--r--awk.h2
-rw-r--r--pc/ChangeLog4
-rw-r--r--pc/Makefile.tst7
-rw-r--r--re.c22
-rw-r--r--test/ChangeLog5
-rw-r--r--test/Makefile.am5
-rw-r--r--test/Makefile.in10
-rw-r--r--test/Maketests5
-rw-r--r--test/match5.awk6
-rw-r--r--test/match5.in199
-rw-r--r--test/match5.ok4
12 files changed, 271 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index c2064ccb..c032e46c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2025-10-29 Arnold D. Robbins <[email protected]>
+
+ * re.c (make_regexp): Remove assertion that string is zero
+ terminated. It isn't always. Instead, save and restore the
+ character past the value of `len'. This required making the
+ first parameter not be const anymore. Thanks to Ben Hoyt
+ <[email protected]> for the report.
+ * awk.k (make_regexp): Adjust declaration.
+
2025-10-28 Arnold D. Robbins <[email protected]>
* awkgram.y: Remove never-documented support for tawk extension
diff --git a/awk.h b/awk.h
index b7f6758f..846177d1 100644
--- a/awk.h
+++ b/awk.h
@@ -1789,7 +1789,7 @@ extern bool out_of_range(NODE *n);
extern char *format_nan_inf(NODE *n, char format);
extern bool is_ieee_magic_val(const char *val);
/* re.c */
-extern Regexp *make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal);
+extern Regexp *make_regexp(char *s, size_t len, bool ignorecase, bool dfa, bool canfatal);
extern int research(Regexp *rp, char *str, int start, size_t len, int flags);
extern void refree(Regexp *rp);
extern void reg_error(const char *s);
diff --git a/pc/ChangeLog b/pc/ChangeLog
index faa2d1a1..918ac82a 100644
--- a/pc/ChangeLog
+++ b/pc/ChangeLog
@@ -1,3 +1,7 @@
+2025-10-29 Arnold D. Robbins <[email protected]>
+
+ * Makefile.tst: Regenerated.
+
2025-10-27 Arnold D. Robbins <[email protected]>
* Makefile.tst: Regenerated.
diff --git a/pc/Makefile.tst b/pc/Makefile.tst
index cdce2270..01d8ea40 100644
--- a/pc/Makefile.tst
+++ b/pc/Makefile.tst
@@ -165,7 +165,7 @@ BASIC_TESTS = \
inpref inputred intest intprec iobug1 \
leaddig leadnl litoct longsub longwrds \
manglprm match4 matchuninitialized math membug1 memleak messages \
- matchbadarg1 matchbadarg2 \
+ matchbadarg1 matchbadarg2 match5 \
minusstr mmap8k \
nasty nasty2 negexp negrange nested nfldstr nfloop nfneg nfset \
nlfldsep nlinstr nlstrina noeffect nofile nofmtch noloop1 \
@@ -1969,6 +1969,11 @@ matchbadarg2:
@-AWKPATH="$(srcdir)" $(AWK) -f [email protected] < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@
+match5:
+ @echo $@
+ @-AWKPATH="$(srcdir)" $(AWK) -f [email protected] < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@
+
minusstr:
@echo $@
@-AWKPATH="$(srcdir)" $(AWK) -f [email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/re.c b/re.c
index 21b80346..f54dc244 100644
--- a/re.c
+++ b/re.c
@@ -37,7 +37,7 @@ static struct localeinfo localeinfo;
/* make_regexp --- generate compiled regular expressions */
Regexp *
-make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
+make_regexp(char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
{
static char metas[] = ".*+(){}[]|?^$\\";
Regexp *rp;
@@ -53,8 +53,22 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
int i;
static struct dfa* dfaregs[2] = { NULL, NULL };
static bool nul_warned = false;
+ char save;
+ size_t savelen;
- assert(s[len] == '\0');
+ /*
+ * 10/2025: We used to have:
+ *
+ * assert(s[len] == '\0');
+ *
+ * here, but data can come in, by way of re_update(), that is from $0 or
+ * elsewhere where there is no final '\0'. So we save and restore
+ * the character at s[len] and force a '\0' into position there.
+ * It needs to be a C string for use in error messages.
+ */
+ savelen = len;
+ save = s[len];
+ s[len] = '\0';
if (do_lint && ! nul_warned && memchr(s, '\0', len) != NULL) {
nul_warned = true;
@@ -275,7 +289,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
* character sets only.
*
* On the other hand, if we do have a single-byte character set,
- * using the casetable should give a performance improvement, since
+ * using the casetable should give a performance improvement, since
* it's computed only once, not each time a regex is compiled. We
* also think it's probably better for portability. See the
* discussion by the definition of casetable[] in eval.c.
@@ -311,6 +325,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
if (! canfatal) {
/* rerr already gettextized inside regex routines */
error("%s: /%s/", rerr, s);
+ s[savelen] = save;
return NULL;
}
fatal("invalid regexp: %s: /%s/", rerr, s);
@@ -340,6 +355,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
}
}
+ s[savelen] = save;
return rp;
}
diff --git a/test/ChangeLog b/test/ChangeLog
index b341b3ba..e7a8d773 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,8 @@
+2025-10-29 Arnold D. Robbins <[email protected]>
+
+ * Makefile.am (EXTRADIST): New test, match5.
+ * match5.awk, match5.in, match5.ok: New files.
+
2025-10-28 Arnold D. Robbins <[email protected]>
* lintwarn.awk, lintwarn.ok: Adjust after code changes.
diff --git a/test/Makefile.am b/test/Makefile.am
index 3f72d117..d6b8a8c3 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -792,6 +792,9 @@ EXTRA_DIST = \
match3.ok \
match4.awk \
match4.ok \
+ match5.awk \
+ match5.in \
+ match5.ok \
matchbadarg1.awk \
matchbadarg1.in \
matchbadarg1.ok \
@@ -1581,7 +1584,7 @@ BASIC_TESTS = \
inpref inputred intest intprec iobug1 \
leaddig leadnl litoct longsub longwrds \
manglprm match4 matchuninitialized math membug1 memleak messages \
- matchbadarg1 matchbadarg2 \
+ matchbadarg1 matchbadarg2 match5 \
minusstr mmap8k \
nasty nasty2 negexp negrange nested nfldstr nfloop nfneg nfset \
nlfldsep nlinstr nlstrina noeffect nofile nofmtch noloop1 \
diff --git a/test/Makefile.in b/test/Makefile.in
index a5793aae..a6bc128e 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -1056,6 +1056,9 @@ EXTRA_DIST = \
match3.ok \
match4.awk \
match4.ok \
+ match5.awk \
+ match5.in \
+ match5.ok \
matchbadarg1.awk \
matchbadarg1.in \
matchbadarg1.ok \
@@ -1845,7 +1848,7 @@ BASIC_TESTS = \
inpref inputred intest intprec iobug1 \
leaddig leadnl litoct longsub longwrds \
manglprm match4 matchuninitialized math membug1 memleak messages \
- matchbadarg1 matchbadarg2 \
+ matchbadarg1 matchbadarg2 match5 \
minusstr mmap8k \
nasty nasty2 negexp negrange nested nfldstr nfloop nfneg nfset \
nlfldsep nlinstr nlstrina noeffect nofile nofmtch noloop1 \
@@ -3836,6 +3839,11 @@ matchbadarg2:
@-AWKPATH="$(srcdir)" $(AWK) -f [email protected] < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@
+match5:
+ @echo $@
+ @-AWKPATH="$(srcdir)" $(AWK) -f [email protected] < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@
+
minusstr:
@echo $@
@-AWKPATH="$(srcdir)" $(AWK) -f [email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/Maketests b/test/Maketests
index a782659e..276eccdb 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -629,6 +629,11 @@ matchbadarg2:
@-AWKPATH="$(srcdir)" $(AWK) -f [email protected] < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@
+match5:
+ @echo $@
+ @-AWKPATH="$(srcdir)" $(AWK) -f [email protected] < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@
+
minusstr:
@echo $@
@-AWKPATH="$(srcdir)" $(AWK) -f [email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/match5.awk b/test/match5.awk
new file mode 100644
index 00000000..0b238a29
--- /dev/null
+++ b/test/match5.awk
@@ -0,0 +1,6 @@
+NF > 0 && match($NF, $1) {
+ print $0, RSTART, RLENGTH
+ if (RLENGTH != length($1))
+ printf "match error at %d: %d %d\n",
+ NR, RLENGTH, RSTART >"/dev/tty"
+}
diff --git a/test/match5.in b/test/match5.in
new file mode 100644
index 00000000..be3dbf61
--- /dev/null
+++ b/test/match5.in
@@ -0,0 +1,199 @@
+/dev/rrp3:
+
+17379 mel
+16693 bwk me
+16116 ken him someone else
+15713 srb
+11895 lem
+10409 scj
+10252 rhm
+ 9853 shen
+ 9748 a68
+ 9492 sif
+ 9190 pjw
+ 8912 nls
+ 8895 dmr
+ 8491 cda
+ 8372 bs
+ 8252 llc
+ 7450 mb
+ 7360 ava
+ 7273 jrv
+ 7080 bin
+ 7063 greg
+ 6567 dict
+ 6462 lck
+ 6291 rje
+ 6211 lwf
+ 5671 dave
+ 5373 jhc
+ 5220 agf
+ 5167 doug
+ 5007 valerie
+ 3963 jca
+ 3895 bbs
+ 3796 moh
+ 3481 xchar
+ 3200 tbl
+ 2845 s
+ 2774 tgs
+ 2641 met
+ 2566 jck
+ 2511 port
+ 2479 sue
+ 2127 root
+ 1989 bsb
+ 1989 jeg
+ 1933 eag
+ 1801 pdj
+ 1590 tpc
+ 1385 cvw
+ 1370 rwm
+ 1316 avg
+ 1205 eg
+ 1194 jam
+ 1153 dl
+ 1150 lgm
+ 1031 cmb
+ 1018 jwr
+ 950 gdb
+ 931 marc
+ 898 usg
+ 865 ggr
+ 822 daemon
+ 803 mihalis
+ 700 honey
+ 624 tad
+ 559 acs
+ 541 uucp
+ 523 raf
+ 495 adh
+ 456 kec
+ 414 craig
+ 386 donmac
+ 375 jj
+ 348 ravi
+ 344 drw
+ 327 stars
+ 288 mrg
+ 272 jcb
+ 263 ralph
+ 253 tom
+ 251 sjb
+ 248 haight
+ 224 sharon
+ 222 chuck
+ 213 dsj
+ 201 bill
+ 184 god
+ 176 sys
+ 166 meh
+ 163 jon
+ 144 dan
+ 143 fox
+ 123 dale
+ 116 kab
+ 95 buz
+ 80 asc
+ 79 jas
+ 79 trt
+ 64 wsb
+ 62 dwh
+ 56 ktf
+ 54 lr
+ 47 dlc
+ 45 dls
+ 45 jwf
+ 44 mash
+ 43 ars
+ 43 vgl
+ 37 jfo
+ 32 rab
+ 31 pd
+ 29 jns
+ 25 spm
+ 22 rob
+ 15 egb
+ 10 hm
+ 10 mhb
+ 6 aed
+ 6 cpb
+ 5 evp
+ 4 ber
+ 4 men
+ 4 mitch
+ 3 ast
+ 3 jfr
+ 3 lax
+ 3 nel
+ 2 blue
+ 2 jfk
+ 2 njas
+ 1 122sec
+ 1 ddwar
+ 1 gopi
+ 1 jk
+ 1 learn
+ 1 low
+ 1 nac
+ 1 sidor
+1root:EMpNB8Zp56:0:0:Super-User,,,,,,,:/:/bin/sh
+2roottcsh:*:0:0:Super-User running tcsh [cbm]:/:/bin/tcsh
+3sysadm:*:0:0:System V Administration:/usr/admin:/bin/sh
+4diag:*:0:996:Hardware Diagnostics:/usr/diags:/bin/csh
+5daemon:*:1:1:daemons:/:/bin/sh
+6bin:*:2:2:System Tools Owner:/bin:/dev/null
+7nuucp:BJnuQbAo:6:10:UUCP.Admin:/usr/spool/uucppublic:/usr/lib/uucp/uucico
+8uucp:*:3:5:UUCP.Admin:/usr/lib/uucp:
+9sys:*:4:0:System Activity Owner:/usr/adm:/bin/sh
+10adm:*:5:3:Accounting Files Owner:/usr/adm:/bin/sh
+11lp:*:9:9:Print Spooler Owner:/var/spool/lp:/bin/sh
+12auditor:*:11:0:Audit Activity Owner:/auditor:/bin/sh
+13dbadmin:*:12:0:Security Database Owner:/dbadmin:/bin/sh
+14bootes:dcon:50:1:Tom Killian (DO NOT REMOVE):/tmp:
+15cdjuke:dcon:51:1:Tom Killian (DO NOT REMOVE):/tmp:
+16rfindd:*:66:1:Rfind Daemon and Fsdump:/var/rfindd:/bin/sh
+17EZsetup:*:992:998:System Setup:/var/sysadmdesktop/EZsetup:/bin/csh
+18demos:*:993:997:Demonstration User:/usr/demos:/bin/csh
+19tutor:*:994:997:Tutorial User:/usr/tutor:/bin/csh
+20tour:*:995:997:IRIS Space Tour:/usr/people/tour:/bin/csh
+21guest:nfP4/Wpvio/Rw:998:998:Guest Account:/usr/people/guest:/bin/csh
+224Dgifts:0nWRTZsOMt.:999:998:4Dgifts Account:/usr/people/4Dgifts:/bin/csh
+23nobody:*:60001:60001:SVR4 nobody uid:/dev/null:/dev/null
+24noaccess:*:60002:60002:uid no access:/dev/null:/dev/null
+25nobody:*:-2:-2:original nobody uid:/dev/null:/dev/null
+26rje:*:8:8:RJE Owner:/usr/spool/rje:
+27changes:*:11:11:system change log:/:
+28dist:sorry:9999:4:file distributions:/v/adm/dist:/v/bin/sh
+29man:*:99:995:On-line Manual Owner:/:
+30phoneca:*:991:991:phone call log [tom]:/v/adm/log:/v/bin/sh
+1r oot EMpNB8Zp56 0 0 Super-User,,,,,,, / /bin/sh
+2r oottcsh * 0 0 Super-User running tcsh [cbm] / /bin/tcsh
+3s ysadm * 0 0 System V Administration /usr/admin /bin/sh
+4d iag * 0 996 Hardware Diagnostics /usr/diags /bin/csh
+5d aemon * 1 1 daemons / /bin/sh
+6b in * 2 2 System Tools Owner /bin /dev/null
+7n uucp BJnuQbAo 6 10 UUCP.Admin /usr/spool/uucppublic /usr/lib/uucp/uucico
+8u ucp * 3 5 UUCP.Admin /usr/lib/uucp
+9s ys * 4 0 System Activity Owner /usr/adm /bin/sh
+10 adm * 5 3 Accounting Files Owner /usr/adm /bin/sh
+11 lp * 9 9 Print Spooler Owner /var/spool/lp /bin/sh
+12 auditor * 11 0 Audit Activity Owner /auditor /bin/sh
+13 dbadmin * 12 0 Security Database Owner /dbadmin /bin/sh
+14 bootes dcon 50 1 Tom Killian (DO NOT REMOVE) /tmp
+15 cdjuke dcon 51 1 Tom Killian (DO NOT REMOVE) /tmp
+16 rfindd * 66 1 Rfind Daemon and Fsdump /var/rfindd /bin/sh
+17 EZsetup * 992 998 System Setup /var/sysadmdesktop/EZsetup /bin/csh
+18 demos * 993 997 Demonstration User /usr/demos /bin/csh
+19 tutor * 994 997 Tutorial User /usr/tutor /bin/csh
+20 tour * 995 997 IRIS Space Tour /usr/people/tour /bin/csh
+21 guest nfP4/Wpvio/Rw 998 998 Guest Account /usr/people/guest /bin/csh
+22 4Dgifts 0nWRTZsOMt. 999 998 4Dgifts Account /usr/people/4Dgifts /bin/csh
+23 nobody * 60001 60001 SVR4 nobody uid /dev/null /dev/null
+24 noaccess * 60002 60002 uid no access /dev/null /dev/null
+25 nobody * -2 -2 original nobody uid /dev/null /dev/null
+26 rje * 8 8 RJE Owner /usr/spool/rje
+27 changes * 11 11 system change log /
+28 dist sorry 9999 4 file distributions /v/adm/dist /v/bin/sh
+29 man * 99 995 On-line Manual Owner /
+30 phoneca * 991 991 phone call log [tom] /v/adm/log /v/bin/sh
diff --git a/test/match5.ok b/test/match5.ok
new file mode 100644
index 00000000..8b0731d9
--- /dev/null
+++ b/test/match5.ok
@@ -0,0 +1,4 @@
+/dev/rrp3: 1 10
+ 1 122sec 1 1
+1root:EMpNB8Zp56:0:0:Super-User,,,,,,,:/:/bin/sh 1 48
+7nuucp:BJnuQbAo:6:10:UUCP.Admin:/usr/spool/uucppublic:/usr/lib/uucp/uucico 1 74