diff options
| author | Arnold D. Robbins <[email protected]> | 2025-10-29 10:24:33 +0200 | 
|---|---|---|
| committer | Arnold D. Robbins <[email protected]> | 2025-10-29 10:24:33 +0200 | 
| commit | a0879473da5f65b1687ed5a4af565bf400f51f88 (patch) | |
| tree | b89e2ab5e12c82f3b043d00060105379d18a7e08 | |
| parent | 3992eafcead82df31a8daea5d25803c3e9f5e726 (diff) | |
| download | gawk-master.tar.gz | |
| -rw-r--r-- | ChangeLog | 9 | ||||
| -rw-r--r-- | awk.h | 2 | ||||
| -rw-r--r-- | pc/ChangeLog | 4 | ||||
| -rw-r--r-- | pc/Makefile.tst | 7 | ||||
| -rw-r--r-- | re.c | 22 | ||||
| -rw-r--r-- | test/ChangeLog | 5 | ||||
| -rw-r--r-- | test/Makefile.am | 5 | ||||
| -rw-r--r-- | test/Makefile.in | 10 | ||||
| -rw-r--r-- | test/Maketests | 5 | ||||
| -rw-r--r-- | test/match5.awk | 6 | ||||
| -rw-r--r-- | test/match5.in | 199 | ||||
| -rw-r--r-- | test/match5.ok | 4 | 
12 files changed, 271 insertions, 7 deletions
| @@ -1,3 +1,12 @@ +2025-10-29         Arnold D. Robbins     <[email protected]> + +	* re.c (make_regexp): Remove assertion that string is zero +	terminated. It isn't always. Instead, save and restore the +	character past the value of `len'. This required making the +	first parameter not be const anymore. Thanks to Ben Hoyt +	<[email protected]> for the report. +	* awk.k (make_regexp): Adjust declaration. +  2025-10-28         Arnold D. Robbins     <[email protected]>  	* awkgram.y: Remove never-documented support for tawk extension @@ -1789,7 +1789,7 @@ extern bool out_of_range(NODE *n);  extern char *format_nan_inf(NODE *n, char format);  extern bool is_ieee_magic_val(const char *val);  /* re.c */ -extern Regexp *make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal); +extern Regexp *make_regexp(char *s, size_t len, bool ignorecase, bool dfa, bool canfatal);  extern int research(Regexp *rp, char *str, int start, size_t len, int flags);  extern void refree(Regexp *rp);  extern void reg_error(const char *s); diff --git a/pc/ChangeLog b/pc/ChangeLog index faa2d1a1..918ac82a 100644 --- a/pc/ChangeLog +++ b/pc/ChangeLog @@ -1,3 +1,7 @@ +2025-10-29         Arnold D. Robbins     <[email protected]> + +	* Makefile.tst: Regenerated. +  2025-10-27         Arnold D. Robbins     <[email protected]>  	* Makefile.tst: Regenerated. diff --git a/pc/Makefile.tst b/pc/Makefile.tst index cdce2270..01d8ea40 100644 --- a/pc/Makefile.tst +++ b/pc/Makefile.tst @@ -165,7 +165,7 @@ BASIC_TESTS = \  	inpref inputred intest intprec iobug1 \  	leaddig leadnl litoct longsub longwrds \  	manglprm match4 matchuninitialized math membug1 memleak messages \ -	matchbadarg1 matchbadarg2 \ +	matchbadarg1 matchbadarg2 match5 \  	minusstr mmap8k \  	nasty nasty2 negexp negrange nested nfldstr nfloop nfneg nfset \  	nlfldsep nlinstr nlstrina noeffect nofile nofmtch noloop1 \ @@ -1969,6 +1969,11 @@ matchbadarg2:  	@-AWKPATH="$(srcdir)" $(AWK) -f [email protected]  < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@  	@-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@ +match5: +	@echo $@ +	@-AWKPATH="$(srcdir)" $(AWK) -f [email protected]  < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ +	@-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@ +  minusstr:  	@echo $@  	@-AWKPATH="$(srcdir)" $(AWK) -f [email protected]  >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @@ -37,7 +37,7 @@ static struct localeinfo localeinfo;  /* make_regexp --- generate compiled regular expressions */  Regexp * -make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) +make_regexp(char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)  {  	static char metas[] = ".*+(){}[]|?^$\\";  	Regexp *rp; @@ -53,8 +53,22 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)  	int i;  	static struct dfa* dfaregs[2] = { NULL, NULL };  	static bool nul_warned = false; +	char save; +	size_t savelen; -	assert(s[len] == '\0'); +	/* +	 * 10/2025: We used to have: +	 * +	 *	assert(s[len] == '\0'); +	 * +	 * here, but data can come in, by way of re_update(), that is from $0 or +	 * elsewhere where there is no final '\0'. So we save and restore +	 * the character at s[len] and force a '\0' into position there. +	 * It needs to be a C string for use in error messages. +	 */ +	savelen = len; +	save = s[len]; +	s[len] = '\0';  	if (do_lint && ! nul_warned && memchr(s, '\0', len) != NULL) {  		nul_warned = true; @@ -275,7 +289,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)  	 * character sets only.  	 *  	 * On the other hand, if we do have a single-byte character set, -	 * using the casetable should give  a performance improvement, since +	 * using the casetable should give a performance improvement, since  	 * it's computed only once, not each time a regex is compiled.  We  	 * also think it's probably better for portability.  See the  	 * discussion by the definition of casetable[] in eval.c. @@ -311,6 +325,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)  		if (! canfatal) {  			/* rerr already gettextized inside regex routines */  			error("%s: /%s/", rerr, s); +			s[savelen] = save;   			return NULL;  		}  		fatal("invalid regexp: %s: /%s/", rerr, s); @@ -340,6 +355,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)  		}  	} +	s[savelen] = save;  	return rp;  } diff --git a/test/ChangeLog b/test/ChangeLog index b341b3ba..e7a8d773 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,8 @@ +2025-10-29         Arnold D. Robbins     <[email protected]> + +	* Makefile.am (EXTRADIST): New test, match5. +	* match5.awk, match5.in, match5.ok: New files. +  2025-10-28         Arnold D. Robbins     <[email protected]>  	* lintwarn.awk, lintwarn.ok: Adjust after code changes. diff --git a/test/Makefile.am b/test/Makefile.am index 3f72d117..d6b8a8c3 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -792,6 +792,9 @@ EXTRA_DIST = \  	match3.ok \  	match4.awk \  	match4.ok \ +	match5.awk \ +	match5.in \ +	match5.ok \  	matchbadarg1.awk \  	matchbadarg1.in \  	matchbadarg1.ok \ @@ -1581,7 +1584,7 @@ BASIC_TESTS = \  	inpref inputred intest intprec iobug1 \  	leaddig leadnl litoct longsub longwrds \  	manglprm match4 matchuninitialized math membug1 memleak messages \ -	matchbadarg1 matchbadarg2 \ +	matchbadarg1 matchbadarg2 match5 \  	minusstr mmap8k \  	nasty nasty2 negexp negrange nested nfldstr nfloop nfneg nfset \  	nlfldsep nlinstr nlstrina noeffect nofile nofmtch noloop1 \ diff --git a/test/Makefile.in b/test/Makefile.in index a5793aae..a6bc128e 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -1056,6 +1056,9 @@ EXTRA_DIST = \  	match3.ok \  	match4.awk \  	match4.ok \ +	match5.awk \ +	match5.in \ +	match5.ok \  	matchbadarg1.awk \  	matchbadarg1.in \  	matchbadarg1.ok \ @@ -1845,7 +1848,7 @@ BASIC_TESTS = \  	inpref inputred intest intprec iobug1 \  	leaddig leadnl litoct longsub longwrds \  	manglprm match4 matchuninitialized math membug1 memleak messages \ -	matchbadarg1 matchbadarg2 \ +	matchbadarg1 matchbadarg2 match5 \  	minusstr mmap8k \  	nasty nasty2 negexp negrange nested nfldstr nfloop nfneg nfset \  	nlfldsep nlinstr nlstrina noeffect nofile nofmtch noloop1 \ @@ -3836,6 +3839,11 @@ matchbadarg2:  	@-AWKPATH="$(srcdir)" $(AWK) -f [email protected]  < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@  	@-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@ +match5: +	@echo $@ +	@-AWKPATH="$(srcdir)" $(AWK) -f [email protected]  < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ +	@-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@ +  minusstr:  	@echo $@  	@-AWKPATH="$(srcdir)" $(AWK) -f [email protected]  >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index a782659e..276eccdb 100644 --- a/test/Maketests +++ b/test/Maketests @@ -629,6 +629,11 @@ matchbadarg2:  	@-AWKPATH="$(srcdir)" $(AWK) -f [email protected]  < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@  	@-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@ +match5: +	@echo $@ +	@-AWKPATH="$(srcdir)" $(AWK) -f [email protected]  < "$(srcdir)"/[email protected] >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ +	@-$(CMP) "$(srcdir)"/[email protected] _$@ && rm -f _$@ +  minusstr:  	@echo $@  	@-AWKPATH="$(srcdir)" $(AWK) -f [email protected]  >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/match5.awk b/test/match5.awk new file mode 100644 index 00000000..0b238a29 --- /dev/null +++ b/test/match5.awk @@ -0,0 +1,6 @@ +NF > 0 && match($NF, $1) { +	print $0, RSTART, RLENGTH +	if (RLENGTH != length($1)) +		printf "match error at %d: %d %d\n", +			NR, RLENGTH, RSTART >"/dev/tty" +} diff --git a/test/match5.in b/test/match5.in new file mode 100644 index 00000000..be3dbf61 --- /dev/null +++ b/test/match5.in @@ -0,0 +1,199 @@ +/dev/rrp3: + +17379	mel +16693	bwk	me +16116	ken	him	someone else +15713	srb +11895	lem +10409	scj +10252	rhm + 9853	shen + 9748	a68 + 9492	sif + 9190	pjw + 8912	nls + 8895	dmr + 8491	cda + 8372	bs + 8252	llc + 7450	mb + 7360	ava + 7273	jrv + 7080	bin + 7063	greg + 6567	dict + 6462	lck + 6291	rje + 6211	lwf + 5671	dave + 5373	jhc + 5220	agf + 5167	doug + 5007	valerie + 3963	jca + 3895	bbs + 3796	moh + 3481	xchar + 3200	tbl + 2845	s + 2774	tgs + 2641	met + 2566	jck + 2511	port + 2479	sue + 2127	root + 1989	bsb + 1989	jeg + 1933	eag + 1801	pdj + 1590	tpc + 1385	cvw + 1370	rwm + 1316	avg + 1205	eg + 1194	jam + 1153	dl + 1150	lgm + 1031	cmb + 1018	jwr +  950	gdb +  931	marc +  898	usg +  865	ggr +  822	daemon +  803	mihalis +  700	honey +  624	tad +  559	acs +  541	uucp +  523	raf +  495	adh +  456	kec +  414	craig +  386	donmac +  375	jj +  348	ravi +  344	drw +  327	stars +  288	mrg +  272	jcb +  263	ralph +  253	tom +  251	sjb +  248	haight +  224	sharon +  222	chuck +  213	dsj +  201	bill +  184	god +  176	sys +  166	meh +  163	jon +  144	dan +  143	fox +  123	dale +  116	kab +   95	buz +   80	asc +   79	jas +   79	trt +   64	wsb +   62	dwh +   56	ktf +   54	lr +   47	dlc +   45	dls +   45	jwf +   44	mash +   43	ars +   43	vgl +   37	jfo +   32	rab +   31	pd +   29	jns +   25	spm +   22	rob +   15	egb +   10	hm +   10	mhb +    6	aed +    6	cpb +    5	evp +    4	ber +    4	men +    4	mitch +    3	ast +    3	jfr +    3	lax +    3	nel +    2	blue +    2	jfk +    2	njas +    1	122sec +    1	ddwar +    1	gopi +    1	jk +    1	learn +    1	low +    1	nac +    1	sidor +1root:EMpNB8Zp56:0:0:Super-User,,,,,,,:/:/bin/sh +2roottcsh:*:0:0:Super-User running tcsh [cbm]:/:/bin/tcsh +3sysadm:*:0:0:System V Administration:/usr/admin:/bin/sh +4diag:*:0:996:Hardware Diagnostics:/usr/diags:/bin/csh +5daemon:*:1:1:daemons:/:/bin/sh +6bin:*:2:2:System Tools Owner:/bin:/dev/null +7nuucp:BJnuQbAo:6:10:UUCP.Admin:/usr/spool/uucppublic:/usr/lib/uucp/uucico +8uucp:*:3:5:UUCP.Admin:/usr/lib/uucp: +9sys:*:4:0:System Activity Owner:/usr/adm:/bin/sh +10adm:*:5:3:Accounting Files Owner:/usr/adm:/bin/sh +11lp:*:9:9:Print Spooler Owner:/var/spool/lp:/bin/sh +12auditor:*:11:0:Audit Activity Owner:/auditor:/bin/sh +13dbadmin:*:12:0:Security Database Owner:/dbadmin:/bin/sh +14bootes:dcon:50:1:Tom Killian (DO NOT REMOVE):/tmp: +15cdjuke:dcon:51:1:Tom Killian (DO NOT REMOVE):/tmp: +16rfindd:*:66:1:Rfind Daemon and Fsdump:/var/rfindd:/bin/sh +17EZsetup:*:992:998:System Setup:/var/sysadmdesktop/EZsetup:/bin/csh +18demos:*:993:997:Demonstration User:/usr/demos:/bin/csh +19tutor:*:994:997:Tutorial User:/usr/tutor:/bin/csh +20tour:*:995:997:IRIS Space Tour:/usr/people/tour:/bin/csh +21guest:nfP4/Wpvio/Rw:998:998:Guest Account:/usr/people/guest:/bin/csh +224Dgifts:0nWRTZsOMt.:999:998:4Dgifts Account:/usr/people/4Dgifts:/bin/csh +23nobody:*:60001:60001:SVR4 nobody uid:/dev/null:/dev/null +24noaccess:*:60002:60002:uid no access:/dev/null:/dev/null +25nobody:*:-2:-2:original nobody uid:/dev/null:/dev/null +26rje:*:8:8:RJE Owner:/usr/spool/rje: +27changes:*:11:11:system change log:/: +28dist:sorry:9999:4:file distributions:/v/adm/dist:/v/bin/sh +29man:*:99:995:On-line Manual Owner:/: +30phoneca:*:991:991:phone call log [tom]:/v/adm/log:/v/bin/sh +1r oot EMpNB8Zp56 0 0 Super-User,,,,,,, / /bin/sh +2r oottcsh * 0 0 Super-User running tcsh [cbm] / /bin/tcsh +3s ysadm * 0 0 System V Administration /usr/admin /bin/sh +4d iag * 0 996 Hardware Diagnostics /usr/diags /bin/csh +5d aemon * 1 1 daemons / /bin/sh +6b in * 2 2 System Tools Owner /bin /dev/null +7n uucp BJnuQbAo 6 10 UUCP.Admin /usr/spool/uucppublic /usr/lib/uucp/uucico +8u ucp * 3 5 UUCP.Admin /usr/lib/uucp  +9s ys * 4 0 System Activity Owner /usr/adm /bin/sh +10 adm * 5 3 Accounting Files Owner /usr/adm /bin/sh +11 lp * 9 9 Print Spooler Owner /var/spool/lp /bin/sh +12 auditor * 11 0 Audit Activity Owner /auditor /bin/sh +13 dbadmin * 12 0 Security Database Owner /dbadmin /bin/sh +14 bootes dcon 50 1 Tom Killian (DO NOT REMOVE) /tmp  +15 cdjuke dcon 51 1 Tom Killian (DO NOT REMOVE) /tmp  +16 rfindd * 66 1 Rfind Daemon and Fsdump /var/rfindd /bin/sh +17 EZsetup * 992 998 System Setup /var/sysadmdesktop/EZsetup /bin/csh +18 demos * 993 997 Demonstration User /usr/demos /bin/csh +19 tutor * 994 997 Tutorial User /usr/tutor /bin/csh +20 tour * 995 997 IRIS Space Tour /usr/people/tour /bin/csh +21 guest nfP4/Wpvio/Rw 998 998 Guest Account /usr/people/guest /bin/csh +22 4Dgifts 0nWRTZsOMt. 999 998 4Dgifts Account /usr/people/4Dgifts /bin/csh +23 nobody * 60001 60001 SVR4 nobody uid /dev/null /dev/null +24 noaccess * 60002 60002 uid no access /dev/null /dev/null +25 nobody * -2 -2 original nobody uid /dev/null /dev/null +26 rje * 8 8 RJE Owner /usr/spool/rje  +27 changes * 11 11 system change log /  +28 dist sorry 9999 4 file distributions /v/adm/dist /v/bin/sh +29 man * 99 995 On-line Manual Owner /  +30 phoneca * 991 991 phone call log [tom] /v/adm/log /v/bin/sh diff --git a/test/match5.ok b/test/match5.ok new file mode 100644 index 00000000..8b0731d9 --- /dev/null +++ b/test/match5.ok @@ -0,0 +1,4 @@ +/dev/rrp3: 1 10 +    1	122sec 1 1 +1root:EMpNB8Zp56:0:0:Super-User,,,,,,,:/:/bin/sh 1 48 +7nuucp:BJnuQbAo:6:10:UUCP.Admin:/usr/spool/uucppublic:/usr/lib/uucp/uucico 1 74 | 
