Browse code

add regcomp(), regexec() impl. from OpenBSD's libc. This code is licensed under the 3-clause BSD. This will be used instead of system provided regexec()/regcomp() to have consistent behaviour across platforms.

git-svn: trunk@3225

Török Edvin authored on 2007/09/18 03:54:56
Showing 20 changed files
... ...
@@ -1,3 +1,10 @@
1
+Mon Sep 17 21:06:59 EEST 2007(edwin)
2
+------------------------------------
3
+ * libclamav/regex/: add regcomp(), regexec() impl. from OpenBSD's libc.
4
+ 	This code is licensed under the 3-clause BSD.
5
+	This will be used instead of system provided regexec()/regcomp() to
6
+	have consistent behaviour across platforms.
7
+
1 8
 Mon Sep 17 17:12:27 BST 2007 (njh)
2 9
 ----------------------------------
3 10
   * libclamav/mbox.c:	Bugs 665/667
... ...
@@ -207,9 +207,6 @@
207 207
 /* Define to 1 if you have the `recvmsg' function. */
208 208
 #undef HAVE_RECVMSG
209 209
 
210
-/* Define to 1 if you have the <regex.h> header file. */
211
-#undef HAVE_REGEX_H
212
-
213 210
 /* have resolv.h */
214 211
 #undef HAVE_RESOLV_H
215 212
 
... ...
@@ -19993,8 +19993,7 @@ fi
19993 19993
 
19994 19994
 
19995 19995
 
19996
-
19997
-for ac_header in stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h regex.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h
19996
+for ac_header in stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h
19998 19997
 do
19999 19998
 as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
20000 19999
 if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
... ...
@@ -40,7 +40,7 @@ AC_DEFINE(SCANBUFF, 131072, [scan buffer size])
40 40
 AC_DEFINE(FILEBUFF, 8192,   [file i/o buffer size])
41 41
 
42 42
 AC_HEADER_STDC
43
-AC_CHECK_HEADERS(stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h regex.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h)
43
+AC_CHECK_HEADERS(stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h)
44 44
 AC_CHECK_HEADER(syslog.h,AC_DEFINE(USE_SYSLOG,1,[use syslog]),)
45 45
 
46 46
 AC_TYPE_OFF_T
... ...
@@ -110,6 +110,11 @@ libclamav_la_SOURCES = \
110 110
 	is_tar.h \
111 111
 	tnef.c \
112 112
 	tnef.h \
113
+	regex/strlcpy.c \
114
+	regex/regcomp.c \
115
+	regex/regerror.c \
116
+	regex/regexec.c \
117
+	regex/regfree.c \
113 118
 	unrar/unrar15.c \
114 119
 	unrar/unrar20.h \
115 120
 	unrar/unrarcmd.h \
... ...
@@ -58,7 +58,7 @@ host_triplet = @host@
58 58
 target_triplet = @target@
59 59
 subdir = libclamav
60 60
 DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \
61
-	$(srcdir)/Makefile.in
61
+	$(srcdir)/Makefile.in COPYING
62 62
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
63 63
 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
64 64
 	$(top_srcdir)/configure.in
... ...
@@ -84,10 +84,11 @@ am_libclamav_la_OBJECTS = matcher-ac.lo matcher-bm.lo matcher.lo \
84 84
 	pe.lo upx.lo htmlnorm.lo chmunpack.lo rebuildpe.lo petite.lo \
85 85
 	wwunpack.lo unsp.lo aspack.lo packlibs.lo fsg.lo mew.lo \
86 86
 	upack.lo line.lo untar.lo unzip.lo special.lo binhex.lo \
87
-	is_tar.lo tnef.lo unrar15.lo unrarvm.lo unrar.lo \
88
-	unrarfilter.lo unrarppm.lo unrar20.lo unrarcmd.lo unarj.lo \
89
-	LZMADecode.lo bzlib.lo infblock.lo nulsft.lo pdf.lo spin.lo \
90
-	yc.lo elf.lo sis.lo uuencode.lo pst.lo phishcheck.lo \
87
+	is_tar.lo tnef.lo strlcpy.lo regcomp.lo regerror.lo regexec.lo \
88
+	regfree.lo unrar15.lo unrarvm.lo unrar.lo unrarfilter.lo \
89
+	unrarppm.lo unrar20.lo unrarcmd.lo unarj.lo LZMADecode.lo \
90
+	bzlib.lo infblock.lo nulsft.lo pdf.lo spin.lo yc.lo elf.lo \
91
+	sis.lo uuencode.lo pst.lo phishcheck.lo \
91 92
 	phish_domaincheck_db.lo phish_whitelist.lo regex_list.lo \
92 93
 	sha256.lo mspack.lo cab.lo entconv.lo hashtab.lo dconf.lo \
93 94
 	lockdb.lo
... ...
@@ -325,6 +326,11 @@ libclamav_la_SOURCES = \
325 325
 	is_tar.h \
326 326
 	tnef.c \
327 327
 	tnef.h \
328
+	regex/strlcpy.c \
329
+	regex/regcomp.c \
330
+	regex/regerror.c \
331
+	regex/regexec.c \
332
+	regex/regfree.c \
328 333
 	unrar/unrar15.c \
329 334
 	unrar/unrar20.h \
330 335
 	unrar/unrarcmd.h \
... ...
@@ -505,7 +511,11 @@ distclean-compile:
505 505
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pst.Plo@am__quote@
506 506
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readdb.Plo@am__quote@
507 507
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rebuildpe.Plo@am__quote@
508
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regcomp.Plo@am__quote@
509
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regerror.Plo@am__quote@
508 510
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regex_list.Plo@am__quote@
511
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regexec.Plo@am__quote@
512
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regfree.Plo@am__quote@
509 513
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rtf.Plo@am__quote@
510 514
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scanners.Plo@am__quote@
511 515
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256.Plo@am__quote@
... ...
@@ -514,6 +524,7 @@ distclean-compile:
514 514
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/special.Plo@am__quote@
515 515
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spin.Plo@am__quote@
516 516
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str.Plo@am__quote@
517
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strlcpy.Plo@am__quote@
517 518
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/table.Plo@am__quote@
518 519
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/text.Plo@am__quote@
519 520
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tnef.Plo@am__quote@
... ...
@@ -556,6 +567,41 @@ distclean-compile:
556 556
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
557 557
 @am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<
558 558
 
559
+strlcpy.lo: regex/strlcpy.c
560
+@am__fastdepCC_TRUE@	if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT strlcpy.lo -MD -MP -MF "$(DEPDIR)/strlcpy.Tpo" -c -o strlcpy.lo `test -f 'regex/strlcpy.c' || echo '$(srcdir)/'`regex/strlcpy.c; \
561
+@am__fastdepCC_TRUE@	then mv -f "$(DEPDIR)/strlcpy.Tpo" "$(DEPDIR)/strlcpy.Plo"; else rm -f "$(DEPDIR)/strlcpy.Tpo"; exit 1; fi
562
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='regex/strlcpy.c' object='strlcpy.lo' libtool=yes @AMDEPBACKSLASH@
563
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
564
+@am__fastdepCC_FALSE@	$(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o strlcpy.lo `test -f 'regex/strlcpy.c' || echo '$(srcdir)/'`regex/strlcpy.c
565
+
566
+regcomp.lo: regex/regcomp.c
567
+@am__fastdepCC_TRUE@	if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regcomp.lo -MD -MP -MF "$(DEPDIR)/regcomp.Tpo" -c -o regcomp.lo `test -f 'regex/regcomp.c' || echo '$(srcdir)/'`regex/regcomp.c; \
568
+@am__fastdepCC_TRUE@	then mv -f "$(DEPDIR)/regcomp.Tpo" "$(DEPDIR)/regcomp.Plo"; else rm -f "$(DEPDIR)/regcomp.Tpo"; exit 1; fi
569
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='regex/regcomp.c' object='regcomp.lo' libtool=yes @AMDEPBACKSLASH@
570
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
571
+@am__fastdepCC_FALSE@	$(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regcomp.lo `test -f 'regex/regcomp.c' || echo '$(srcdir)/'`regex/regcomp.c
572
+
573
+regerror.lo: regex/regerror.c
574
+@am__fastdepCC_TRUE@	if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regerror.lo -MD -MP -MF "$(DEPDIR)/regerror.Tpo" -c -o regerror.lo `test -f 'regex/regerror.c' || echo '$(srcdir)/'`regex/regerror.c; \
575
+@am__fastdepCC_TRUE@	then mv -f "$(DEPDIR)/regerror.Tpo" "$(DEPDIR)/regerror.Plo"; else rm -f "$(DEPDIR)/regerror.Tpo"; exit 1; fi
576
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='regex/regerror.c' object='regerror.lo' libtool=yes @AMDEPBACKSLASH@
577
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
578
+@am__fastdepCC_FALSE@	$(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regerror.lo `test -f 'regex/regerror.c' || echo '$(srcdir)/'`regex/regerror.c
579
+
580
+regexec.lo: regex/regexec.c
581
+@am__fastdepCC_TRUE@	if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regexec.lo -MD -MP -MF "$(DEPDIR)/regexec.Tpo" -c -o regexec.lo `test -f 'regex/regexec.c' || echo '$(srcdir)/'`regex/regexec.c; \
582
+@am__fastdepCC_TRUE@	then mv -f "$(DEPDIR)/regexec.Tpo" "$(DEPDIR)/regexec.Plo"; else rm -f "$(DEPDIR)/regexec.Tpo"; exit 1; fi
583
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='regex/regexec.c' object='regexec.lo' libtool=yes @AMDEPBACKSLASH@
584
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
585
+@am__fastdepCC_FALSE@	$(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regexec.lo `test -f 'regex/regexec.c' || echo '$(srcdir)/'`regex/regexec.c
586
+
587
+regfree.lo: regex/regfree.c
588
+@am__fastdepCC_TRUE@	if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regfree.lo -MD -MP -MF "$(DEPDIR)/regfree.Tpo" -c -o regfree.lo `test -f 'regex/regfree.c' || echo '$(srcdir)/'`regex/regfree.c; \
589
+@am__fastdepCC_TRUE@	then mv -f "$(DEPDIR)/regfree.Tpo" "$(DEPDIR)/regfree.Plo"; else rm -f "$(DEPDIR)/regfree.Tpo"; exit 1; fi
590
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='regex/regfree.c' object='regfree.lo' libtool=yes @AMDEPBACKSLASH@
591
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
592
+@am__fastdepCC_FALSE@	$(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regfree.lo `test -f 'regex/regfree.c' || echo '$(srcdir)/'`regex/regfree.c
593
+
559 594
 unrar15.lo: unrar/unrar15.c
560 595
 @am__fastdepCC_TRUE@	if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unrar15.lo -MD -MP -MF "$(DEPDIR)/unrar15.Tpo" -c -o unrar15.lo `test -f 'unrar/unrar15.c' || echo '$(srcdir)/'`unrar/unrar15.c; \
561 596
 @am__fastdepCC_TRUE@	then mv -f "$(DEPDIR)/unrar15.Tpo" "$(DEPDIR)/unrar15.Plo"; else rm -f "$(DEPDIR)/unrar15.Tpo"; exit 1; fi
... ...
@@ -353,17 +353,17 @@ static int build_regex(regex_t* preg,const char* regex,int nosub)
353 353
 {
354 354
 	int rc;
355 355
 	cli_dbgmsg("Phishcheck: Compiling regex: %s\n",regex);
356
-	rc = regcomp(preg,regex,REG_EXTENDED|REG_ICASE|(nosub ? REG_NOSUB :0));
356
+	rc = cli_regcomp(preg,regex,REG_EXTENDED|REG_ICASE|(nosub ? REG_NOSUB :0));
357 357
 	if(rc) {
358 358
 	
359 359
 #ifdef	C_WINDOWS
360 360
 		cli_errmsg("Phishcheck: Error in compiling regex, disabling phishing checks\n");
361 361
 #else
362
-		size_t buflen =	regerror(rc,preg,NULL,0);
362
+		size_t buflen =	cli_regerror(rc,preg,NULL,0);
363 363
 		char *errbuf = cli_malloc(buflen);
364 364
 		
365 365
 		if(errbuf) {
366
-			regerror(rc,preg,errbuf,buflen);
366
+			cli_regerror(rc,preg,errbuf,buflen);
367 367
 			cli_errmsg("Phishcheck: Error in compiling regex:%s\nDisabling phishing checks\n",errbuf);
368 368
 			free(errbuf);
369 369
 		} else
... ...
@@ -446,7 +446,7 @@ static int get_host(const struct phishcheck* s,struct string* dest,const char* U
446 446
 
447 447
 static int isCountryCode(const struct phishcheck* s,const char* str)
448 448
 {
449
-	return str ? !regexec(&s->preg_cctld,str,0,NULL,0) : 0;
449
+	return str ? !cli_regexec(&s->preg_cctld,str,0,NULL,0) : 0;
450 450
 }
451 451
 
452 452
 static int isTLD(const struct phishcheck* pchk,const char* str,int len)
... ...
@@ -461,7 +461,7 @@ static int isTLD(const struct phishcheck* pchk,const char* str,int len)
461 461
 			return CL_EMEM;
462 462
 		strncpy(s,str,len);
463 463
 		s[len]='\0';
464
-		rc = !regexec(&pchk->preg_tld,s,0,NULL,0);
464
+		rc = !cli_regexec(&pchk->preg_tld,s,0,NULL,0);
465 465
 		free(s);
466 466
 		return rc ? 1 : 0;
467 467
 	}
... ...
@@ -880,7 +880,7 @@ static char hex2int(const unsigned char* src)
880 880
 static void free_regex(regex_t* p)
881 881
 {
882 882
 	if(p) {
883
-		regfree(p);
883
+		cli_regfree(p);
884 884
 	}
885 885
 }
886 886
 
... ...
@@ -977,12 +977,12 @@ void phishing_done(struct cl_engine* engine)
977 977
  */
978 978
 static int isURL(const struct phishcheck* pchk,const char* URL)
979 979
 {
980
-	return URL ? !regexec(&pchk->preg,URL,0,NULL,0) : 0;
980
+	return URL ? !cli_regexec(&pchk->preg,URL,0,NULL,0) : 0;
981 981
 }
982 982
 
983 983
 static int isNumericURL(const struct phishcheck* pchk,const char* URL)
984 984
 {
985
-	return URL ? !regexec(&pchk->preg_numeric,URL,0,NULL,0) : 0;
985
+	return URL ? !cli_regexec(&pchk->preg_numeric,URL,0,NULL,0) : 0;
986 986
 }
987 987
 
988 988
 /* Cleans up @urls
... ...
@@ -1013,7 +1013,7 @@ static int url_get_host(const struct phishcheck* pchk, struct url_check* url,str
1013 1013
 		string_free(host);
1014 1014
 		return CL_PHISH_TEXTURL;
1015 1015
 	}
1016
-	if(url->flags&CHECK_CLOAKING && !regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) {
1016
+	if(url->flags&CHECK_CLOAKING && !cli_regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) {
1017 1017
 		/* uses a regex here, so that we don't accidentally block 0xacab.net style hosts */
1018 1018
 		string_free(host);
1019 1019
 		return CL_PHISH_HEX_URL;
... ...
@@ -20,9 +20,7 @@
20 20
 #ifndef _PHISH_CHECK_H
21 21
 #define _PHISH_CHECK_H
22 22
 
23
-#ifdef	HAVE_REGEX_H
24
-#include <regex.h>
25
-#endif
23
+#include "regex/regex.h"
26 24
 
27 25
 #define CL_PHISH_BASE 100
28 26
 enum phish_status {CL_PHISH_NODECISION=0,CL_PHISH_CLEAN=CL_PHISH_BASE, CL_PHISH_CLEANUP_OK,CL_PHISH_HOST_OK, CL_PHISH_DOMAIN_OK,
29 27
new file mode 100644
... ...
@@ -0,0 +1,68 @@
0
+/*-
1
+ * This code is derived from OpenBSD's libc/regex, original license follows:
2
+ *
3
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4
+ * Copyright (c) 1992, 1993, 1994
5
+ *	The Regents of the University of California.  All rights reserved.
6
+ *
7
+ * This code is derived from software contributed to Berkeley by
8
+ * Henry Spencer.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ *    notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ *    notice, this list of conditions and the following disclaimer in the
17
+ *    documentation and/or other materials provided with the distribution.
18
+ * 3. Neither the name of the University nor the names of its contributors
19
+ *    may be used to endorse or promote products derived from this software
20
+ *    without specific prior written permission.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
+ * SUCH DAMAGE.
33
+ *
34
+ *	@(#)cclass.h	8.3 (Berkeley) 3/20/94
35
+ */
36
+
37
+/* character-class table */
38
+static struct cclass {
39
+	const char *name;
40
+	const char *chars;
41
+	const char *multis;
42
+} cclasses[] = {
43
+	{ "alnum",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
44
+0123456789",				""} ,
45
+	{ "alpha",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
46
+					""} ,
47
+	{ "blank",	" \t",		""} ,
48
+	{ "cntrl",	"\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
49
+\25\26\27\30\31\32\33\34\35\36\37\177",	""} ,
50
+	{ "digit",	"0123456789",	""} ,
51
+	{ "graph",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
52
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
53
+					""} ,
54
+	{ "lower",	"abcdefghijklmnopqrstuvwxyz",
55
+					""} ,
56
+	{ "print",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
57
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
58
+					""} ,
59
+	{ "punct",	"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
60
+					""} ,
61
+	{ "space",	"\t\n\v\f\r ",	""} ,
62
+	{ "upper",	"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
63
+					""} ,
64
+	{ "xdigit",	"0123456789ABCDEFabcdef",
65
+					""} ,
66
+	{ NULL,		0,		"" }
67
+};
0 68
new file mode 100644
... ...
@@ -0,0 +1,139 @@
0
+/*-
1
+ * This code is derived from OpenBSD's libc/regex, original license follows:
2
+ *
3
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4
+ * Copyright (c) 1992, 1993, 1994
5
+ *	The Regents of the University of California.  All rights reserved.
6
+ *
7
+ * This code is derived from software contributed to Berkeley by
8
+ * Henry Spencer.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ *    notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ *    notice, this list of conditions and the following disclaimer in the
17
+ *    documentation and/or other materials provided with the distribution.
18
+ * 3. Neither the name of the University nor the names of its contributors
19
+ *    may be used to endorse or promote products derived from this software
20
+ *    without specific prior written permission.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
+ * SUCH DAMAGE.
33
+ *
34
+ *	@(#)cname.h	8.3 (Berkeley) 3/20/94
35
+ */
36
+
37
+/* character-name table */
38
+static struct cname {
39
+	const char *name;
40
+	char code;
41
+} cnames[] = {
42
+	{ "NUL",			'\0' },
43
+	{ "SOH",			'\001' },
44
+	{ "STX",			'\002' },
45
+	{ "ETX",			'\003' },
46
+	{ "EOT",			'\004' },
47
+	{ "ENQ",			'\005' },
48
+	{ "ACK",			'\006' },
49
+	{ "BEL",			'\007' },
50
+	{ "alert",			'\007' },
51
+	{ "BS",				'\010' },
52
+	{ "backspace",			'\b' },
53
+	{ "HT",				'\011' },
54
+	{ "tab",			'\t' },
55
+	{ "LF",				'\012' },
56
+	{ "newline",			'\n' },
57
+	{ "VT",				'\013' },
58
+	{ "vertical-tab",		'\v' },
59
+	{ "FF",				'\014' },
60
+	{ "form-feed",			'\f' },
61
+	{ "CR",				'\015' },
62
+	{ "carriage-return",		'\r' },
63
+	{ "SO",				'\016' },
64
+	{ "SI",				'\017' },
65
+	{ "DLE",			'\020' },
66
+	{ "DC1",			'\021' },
67
+	{ "DC2",			'\022' },
68
+	{ "DC3",			'\023' },
69
+	{ "DC4",			'\024' },
70
+	{ "NAK",			'\025' },
71
+	{ "SYN",			'\026' },
72
+	{ "ETB",			'\027' },
73
+	{ "CAN",			'\030' },
74
+	{ "EM",				'\031' },
75
+	{ "SUB",			'\032' },
76
+	{ "ESC",			'\033' },
77
+	{ "IS4",			'\034' },
78
+	{ "FS",				'\034' },
79
+	{ "IS3",			'\035' },
80
+	{ "GS",				'\035' },
81
+	{ "IS2",			'\036' },
82
+	{ "RS",				'\036' },
83
+	{ "IS1",			'\037' },
84
+	{ "US",				'\037' },
85
+	{ "space",			' ' },
86
+	{ "exclamation-mark",		'!' },
87
+	{ "quotation-mark",		'"' },
88
+	{ "number-sign",		'#' },
89
+	{ "dollar-sign",		'$' },
90
+	{ "percent-sign",		'%' },
91
+	{ "ampersand",			'&' },
92
+	{ "apostrophe",			'\'' },
93
+	{ "left-parenthesis",		'(' },
94
+	{ "right-parenthesis",		')' },
95
+	{ "asterisk",			'*' },
96
+	{ "plus-sign",			'+' },
97
+	{ "comma",			',' },
98
+	{ "hyphen",			'-' },
99
+	{ "hyphen-minus",		'-' },
100
+	{ "period",			'.' },
101
+	{ "full-stop",			'.' },
102
+	{ "slash",			'/' },
103
+	{ "solidus",			'/' },
104
+	{ "zero",			'0' },
105
+	{ "one",			'1' },
106
+	{ "two",			'2' },
107
+	{ "three",			'3' },
108
+	{ "four",			'4' },
109
+	{ "five",			'5' },
110
+	{ "six",			'6' },
111
+	{ "seven",			'7' },
112
+	{ "eight",			'8' },
113
+	{ "nine",			'9' },
114
+	{ "colon",			':' },
115
+	{ "semicolon",			';' },
116
+	{ "less-than-sign",		'<' },
117
+	{ "equals-sign",		'=' },
118
+	{ "greater-than-sign",		'>' },
119
+	{ "question-mark",		'?' },
120
+	{ "commercial-at",		'@' },
121
+	{ "left-square-bracket",	'[' },
122
+	{ "backslash",			'\\' },
123
+	{ "reverse-solidus",		'\\' },
124
+	{ "right-square-bracket",	']' },
125
+	{ "circumflex",			'^' },
126
+	{ "circumflex-accent",		'^' },
127
+	{ "underscore",			'_' },
128
+	{ "low-line",			'_' },
129
+	{ "grave-accent",		'`' },
130
+	{ "left-brace",			'{' },
131
+	{ "left-curly-bracket",		'{' },
132
+	{ "vertical-line",		'|' },
133
+	{ "right-brace",		'}' },
134
+	{ "right-curly-bracket",	'}' },
135
+	{ "tilde",			'~' },
136
+	{ "DEL",			'\177' },
137
+	{ NULL,				0 }
138
+};
0 139
new file mode 100644
... ...
@@ -0,0 +1,1020 @@
0
+/*-
1
+ * This code is derived from OpenBSD's libc/regex, original license follows:
2
+ *
3
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4
+ * Copyright (c) 1992, 1993, 1994
5
+ *	The Regents of the University of California.  All rights reserved.
6
+ *
7
+ * This code is derived from software contributed to Berkeley by
8
+ * Henry Spencer.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ *    notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ *    notice, this list of conditions and the following disclaimer in the
17
+ *    documentation and/or other materials provided with the distribution.
18
+ * 3. Neither the name of the University nor the names of its contributors
19
+ *    may be used to endorse or promote products derived from this software
20
+ *    without specific prior written permission.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
+ * SUCH DAMAGE.
33
+ *
34
+ *	@(#)engine.c	8.5 (Berkeley) 3/20/94
35
+ */
36
+
37
+/*
38
+ * The matching engine and friends.  This file is #included by regexec.c
39
+ * after suitable #defines of a variety of macros used herein, so that
40
+ * different state representations can be used without duplicating masses
41
+ * of code.
42
+ */
43
+
44
+#ifdef SNAMES
45
+#define	matcher	smatcher
46
+#define	fast	sfast
47
+#define	slow	sslow
48
+#define	dissect	sdissect
49
+#define	backref	sbackref
50
+#define	step	sstep
51
+#define	print	sprint
52
+#define	at	sat
53
+#define	match	smat
54
+#define	nope	snope
55
+#endif
56
+#ifdef LNAMES
57
+#define	matcher	lmatcher
58
+#define	fast	lfast
59
+#define	slow	lslow
60
+#define	dissect	ldissect
61
+#define	backref	lbackref
62
+#define	step	lstep
63
+#define	print	lprint
64
+#define	at	lat
65
+#define	match	lmat
66
+#define	nope	lnope
67
+#endif
68
+
69
+/* another structure passed up and down to avoid zillions of parameters */
70
+struct match {
71
+	struct re_guts *g;
72
+	int eflags;
73
+	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
74
+	char *offp;		/* offsets work from here */
75
+	char *beginp;		/* start of string -- virtual NUL precedes */
76
+	char *endp;		/* end of string -- virtual NUL here */
77
+	char *coldp;		/* can be no match starting before here */
78
+	char **lastpos;		/* [nplus+1] */
79
+	STATEVARS;
80
+	states st;		/* current states */
81
+	states fresh;		/* states for a fresh start */
82
+	states tmp;		/* temporary */
83
+	states empty;		/* empty set of states */
84
+};
85
+
86
+static int matcher(struct re_guts *, char *, size_t, regmatch_t[], int);
87
+static char *dissect(struct match *, char *, char *, sopno, sopno);
88
+static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int);
89
+static char *fast(struct match *, char *, char *, sopno, sopno);
90
+static char *slow(struct match *, char *, char *, sopno, sopno);
91
+static states step(struct re_guts *, sopno, sopno, states, int, states);
92
+#define MAX_RECURSION	100
93
+#define	BOL	(OUT+1)
94
+#define	EOL	(BOL+1)
95
+#define	BOLEOL	(BOL+2)
96
+#define	NOTHING	(BOL+3)
97
+#define	BOW	(BOL+4)
98
+#define	EOW	(BOL+5)
99
+#define	CODEMAX	(BOL+5)		/* highest code used */
100
+#define	NONCHAR(c)	((c) > CHAR_MAX)
101
+#define	NNONCHAR	(CODEMAX-CHAR_MAX)
102
+#ifdef REDEBUG
103
+static void print(struct match *, char *, states, int, FILE *);
104
+#endif
105
+#ifdef REDEBUG
106
+static void at(struct match *, char *, char *, char *, sopno, sopno);
107
+#endif
108
+#ifdef REDEBUG
109
+static char *pchar(int);
110
+#endif
111
+
112
+#ifdef REDEBUG
113
+#define	SP(t, s, c)	print(m, t, s, c, stdout)
114
+#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
115
+#define	NOTE(str)	{ if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
116
+static int nope = 0;
117
+#else
118
+#define	SP(t, s, c)	/* nothing */
119
+#define	AT(t, p1, p2, s1, s2)	/* nothing */
120
+#define	NOTE(s)	/* nothing */
121
+#endif
122
+
123
+/*
124
+ - matcher - the actual matching engine
125
+ */
126
+static int			/* 0 success, REG_NOMATCH failure */
127
+matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
128
+    int eflags)
129
+{
130
+	char *endp;
131
+	size_t i;
132
+	struct match mv;
133
+	struct match *m = &mv;
134
+	char *dp;
135
+	const sopno gf = g->firststate+1;	/* +1 for OEND */
136
+	const sopno gl = g->laststate;
137
+	char *start;
138
+	char *stop;
139
+
140
+	/* simplify the situation where possible */
141
+	if (g->cflags&REG_NOSUB)
142
+		nmatch = 0;
143
+	if (eflags&REG_STARTEND) {
144
+		start = string + pmatch[0].rm_so;
145
+		stop = string + pmatch[0].rm_eo;
146
+	} else {
147
+		start = string;
148
+		stop = start + strlen(start);
149
+	}
150
+	if (stop < start)
151
+		return(REG_INVARG);
152
+
153
+	/* prescreening; this does wonders for this rather slow code */
154
+	if (g->must != NULL) {
155
+		for (dp = start; dp < stop; dp++)
156
+			if (*dp == g->must[0] && stop - dp >= g->mlen &&
157
+				memcmp(dp, g->must, (size_t)g->mlen) == 0)
158
+				break;
159
+		if (dp == stop)		/* we didn't find g->must */
160
+			return(REG_NOMATCH);
161
+	}
162
+
163
+	/* match struct setup */
164
+	m->g = g;
165
+	m->eflags = eflags;
166
+	m->pmatch = NULL;
167
+	m->lastpos = NULL;
168
+	m->offp = string;
169
+	m->beginp = start;
170
+	m->endp = stop;
171
+	STATESETUP(m, 4);
172
+	SETUP(m->st);
173
+	SETUP(m->fresh);
174
+	SETUP(m->tmp);
175
+	SETUP(m->empty);
176
+	CLEAR(m->empty);
177
+
178
+	/* this loop does only one repetition except for backrefs */
179
+	for (;;) {
180
+		endp = fast(m, start, stop, gf, gl);
181
+		if (endp == NULL) {		/* a miss */
182
+			free(m->pmatch);
183
+			free(m->lastpos);
184
+			STATETEARDOWN(m);
185
+			return(REG_NOMATCH);
186
+		}
187
+		if (nmatch == 0 && !g->backrefs)
188
+			break;		/* no further info needed */
189
+
190
+		/* where? */
191
+		assert(m->coldp != NULL);
192
+		for (;;) {
193
+			NOTE("finding start");
194
+			endp = slow(m, m->coldp, stop, gf, gl);
195
+			if (endp != NULL)
196
+				break;
197
+			assert(m->coldp < m->endp);
198
+			m->coldp++;
199
+		}
200
+		if (nmatch == 1 && !g->backrefs)
201
+			break;		/* no further info needed */
202
+
203
+		/* oh my, he wants the subexpressions... */
204
+		if (m->pmatch == NULL)
205
+			m->pmatch = (regmatch_t *)cli_malloc((m->g->nsub + 1) *
206
+							sizeof(regmatch_t));
207
+		if (m->pmatch == NULL) {
208
+			STATETEARDOWN(m);
209
+			return(REG_ESPACE);
210
+		}
211
+		for (i = 1; i <= m->g->nsub; i++)
212
+			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
213
+		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
214
+			NOTE("dissecting");
215
+			dp = dissect(m, m->coldp, endp, gf, gl);
216
+		} else {
217
+			if (g->nplus > 0 && m->lastpos == NULL)
218
+				m->lastpos = (char **)cli_malloc((g->nplus+1) *
219
+							sizeof(char *));
220
+			if (g->nplus > 0 && m->lastpos == NULL) {
221
+				free(m->pmatch);
222
+				STATETEARDOWN(m);
223
+				return(REG_ESPACE);
224
+			}
225
+			NOTE("backref dissect");
226
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
227
+		}
228
+		if (dp != NULL)
229
+			break;
230
+
231
+		/* uh-oh... we couldn't find a subexpression-level match */
232
+		assert(g->backrefs);	/* must be back references doing it */
233
+		assert(g->nplus == 0 || m->lastpos != NULL);
234
+		for (;;) {
235
+			if (dp != NULL || endp <= m->coldp)
236
+				break;		/* defeat */
237
+			NOTE("backoff");
238
+			endp = slow(m, m->coldp, endp-1, gf, gl);
239
+			if (endp == NULL)
240
+				break;		/* defeat */
241
+			/* try it on a shorter possibility */
242
+#ifndef NDEBUG
243
+			for (i = 1; i <= m->g->nsub; i++) {
244
+				assert(m->pmatch[i].rm_so == -1);
245
+				assert(m->pmatch[i].rm_eo == -1);
246
+			}
247
+#endif
248
+			NOTE("backoff dissect");
249
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
250
+		}
251
+		assert(dp == NULL || dp == endp);
252
+		if (dp != NULL)		/* found a shorter one */
253
+			break;
254
+
255
+		/* despite initial appearances, there is no match here */
256
+		NOTE("false alarm");
257
+		if (m->coldp == stop)
258
+			break;
259
+		start = m->coldp + 1;	/* recycle starting later */
260
+	}
261
+
262
+	/* fill in the details if requested */
263
+	if (nmatch > 0) {
264
+		pmatch[0].rm_so = m->coldp - m->offp;
265
+		pmatch[0].rm_eo = endp - m->offp;
266
+	}
267
+	if (nmatch > 1) {
268
+		assert(m->pmatch != NULL);
269
+		for (i = 1; i < nmatch; i++)
270
+			if (i <= m->g->nsub)
271
+				pmatch[i] = m->pmatch[i];
272
+			else {
273
+				pmatch[i].rm_so = -1;
274
+				pmatch[i].rm_eo = -1;
275
+			}
276
+	}
277
+
278
+	if (m->pmatch != NULL)
279
+		free((char *)m->pmatch);
280
+	if (m->lastpos != NULL)
281
+		free((char *)m->lastpos);
282
+	STATETEARDOWN(m);
283
+	return(0);
284
+}
285
+
286
+/*
287
+ - dissect - figure out what matched what, no back references
288
+ */
289
+static char *			/* == stop (success) always */
290
+dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
291
+{
292
+	int i;
293
+	sopno ss;	/* start sop of current subRE */
294
+	sopno es;	/* end sop of current subRE */
295
+	char *sp;	/* start of string matched by it */
296
+	char *stp;	/* string matched by it cannot pass here */
297
+	char *rest;	/* start of rest of string */
298
+	char *tail;	/* string unmatched by rest of RE */
299
+	sopno ssub;	/* start sop of subsubRE */
300
+	sopno esub;	/* end sop of subsubRE */
301
+	char *ssp;	/* start of string matched by subsubRE */
302
+	char *sep;	/* end of string matched by subsubRE */
303
+	char *oldssp;	/* previous ssp */
304
+	char *dp;
305
+
306
+	AT("diss", start, stop, startst, stopst);
307
+	sp = start;
308
+	for (ss = startst; ss < stopst; ss = es) {
309
+		/* identify end of subRE */
310
+		es = ss;
311
+		switch (OP(m->g->strip[es])) {
312
+		case OPLUS_:
313
+		case OQUEST_:
314
+			es += OPND(m->g->strip[es]);
315
+			break;
316
+		case OCH_:
317
+			while (OP(m->g->strip[es]) != O_CH)
318
+				es += OPND(m->g->strip[es]);
319
+			break;
320
+		}
321
+		es++;
322
+
323
+		/* figure out what it matched */
324
+		switch (OP(m->g->strip[ss])) {
325
+		case OEND:
326
+			assert(nope);
327
+			break;
328
+		case OCHAR:
329
+			sp++;
330
+			break;
331
+		case OBOL:
332
+		case OEOL:
333
+		case OBOW:
334
+		case OEOW:
335
+			break;
336
+		case OANY:
337
+		case OANYOF:
338
+			sp++;
339
+			break;
340
+		case OBACK_:
341
+		case O_BACK:
342
+			assert(nope);
343
+			break;
344
+		/* cases where length of match is hard to find */
345
+		case OQUEST_:
346
+			stp = stop;
347
+			for (;;) {
348
+				/* how long could this one be? */
349
+				rest = slow(m, sp, stp, ss, es);
350
+				assert(rest != NULL);	/* it did match */
351
+				/* could the rest match the rest? */
352
+				tail = slow(m, rest, stop, es, stopst);
353
+				if (tail == stop)
354
+					break;		/* yes! */
355
+				/* no -- try a shorter match for this one */
356
+				stp = rest - 1;
357
+				assert(stp >= sp);	/* it did work */
358
+			}
359
+			ssub = ss + 1;
360
+			esub = es - 1;
361
+			/* did innards match? */
362
+			if (slow(m, sp, rest, ssub, esub) != NULL) {
363
+				dp = dissect(m, sp, rest, ssub, esub);
364
+				assert(dp == rest);
365
+			} else		/* no */
366
+				assert(sp == rest);
367
+			sp = rest;
368
+			break;
369
+		case OPLUS_:
370
+			stp = stop;
371
+			for (;;) {
372
+				/* how long could this one be? */
373
+				rest = slow(m, sp, stp, ss, es);
374
+				assert(rest != NULL);	/* it did match */
375
+				/* could the rest match the rest? */
376
+				tail = slow(m, rest, stop, es, stopst);
377
+				if (tail == stop)
378
+					break;		/* yes! */
379
+				/* no -- try a shorter match for this one */
380
+				stp = rest - 1;
381
+				assert(stp >= sp);	/* it did work */
382
+			}
383
+			ssub = ss + 1;
384
+			esub = es - 1;
385
+			ssp = sp;
386
+			oldssp = ssp;
387
+			for (;;) {	/* find last match of innards */
388
+				sep = slow(m, ssp, rest, ssub, esub);
389
+				if (sep == NULL || sep == ssp)
390
+					break;	/* failed or matched null */
391
+				oldssp = ssp;	/* on to next try */
392
+				ssp = sep;
393
+			}
394
+			if (sep == NULL) {
395
+				/* last successful match */
396
+				sep = ssp;
397
+				ssp = oldssp;
398
+			}
399
+			assert(sep == rest);	/* must exhaust substring */
400
+			assert(slow(m, ssp, sep, ssub, esub) == rest);
401
+			dp = dissect(m, ssp, sep, ssub, esub);
402
+			assert(dp == sep);
403
+			sp = rest;
404
+			break;
405
+		case OCH_:
406
+			stp = stop;
407
+			for (;;) {
408
+				/* how long could this one be? */
409
+				rest = slow(m, sp, stp, ss, es);
410
+				assert(rest != NULL);	/* it did match */
411
+				/* could the rest match the rest? */
412
+				tail = slow(m, rest, stop, es, stopst);
413
+				if (tail == stop)
414
+					break;		/* yes! */
415
+				/* no -- try a shorter match for this one */
416
+				stp = rest - 1;
417
+				assert(stp >= sp);	/* it did work */
418
+			}
419
+			ssub = ss + 1;
420
+			esub = ss + OPND(m->g->strip[ss]) - 1;
421
+			assert(OP(m->g->strip[esub]) == OOR1);
422
+			for (;;) {	/* find first matching branch */
423
+				if (slow(m, sp, rest, ssub, esub) == rest)
424
+					break;	/* it matched all of it */
425
+				/* that one missed, try next one */
426
+				assert(OP(m->g->strip[esub]) == OOR1);
427
+				esub++;
428
+				assert(OP(m->g->strip[esub]) == OOR2);
429
+				ssub = esub + 1;
430
+				esub += OPND(m->g->strip[esub]);
431
+				if (OP(m->g->strip[esub]) == OOR2)
432
+					esub--;
433
+				else
434
+					assert(OP(m->g->strip[esub]) == O_CH);
435
+			}
436
+			dp = dissect(m, sp, rest, ssub, esub);
437
+			assert(dp == rest);
438
+			sp = rest;
439
+			break;
440
+		case O_PLUS:
441
+		case O_QUEST:
442
+		case OOR1:
443
+		case OOR2:
444
+		case O_CH:
445
+			assert(nope);
446
+			break;
447
+		case OLPAREN:
448
+			i = OPND(m->g->strip[ss]);
449
+			assert(0 < i && i <= m->g->nsub);
450
+			m->pmatch[i].rm_so = sp - m->offp;
451
+			break;
452
+		case ORPAREN:
453
+			i = OPND(m->g->strip[ss]);
454
+			assert(0 < i && i <= m->g->nsub);
455
+			m->pmatch[i].rm_eo = sp - m->offp;
456
+			break;
457
+		default:		/* uh oh */
458
+			assert(nope);
459
+			break;
460
+		}
461
+	}
462
+
463
+	assert(sp == stop);
464
+	return(sp);
465
+}
466
+
467
+/*
468
+ - backref - figure out what matched what, figuring in back references
469
+ */
470
+static char *			/* == stop (success) or NULL (failure) */
471
+backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
472
+    sopno lev, int rec)			/* PLUS nesting level */
473
+{
474
+	int i;
475
+	sopno ss;	/* start sop of current subRE */
476
+	char *sp;	/* start of string matched by it */
477
+	sopno ssub;	/* start sop of subsubRE */
478
+	sopno esub;	/* end sop of subsubRE */
479
+	char *ssp;	/* start of string matched by subsubRE */
480
+	char *dp;
481
+	size_t len;
482
+	int hard;
483
+	sop s;
484
+	regoff_t offsave;
485
+	cset *cs;
486
+
487
+	AT("back", start, stop, startst, stopst);
488
+	sp = start;
489
+
490
+	/* get as far as we can with easy stuff */
491
+	hard = 0;
492
+	for (ss = startst; !hard && ss < stopst; ss++)
493
+		switch (OP(s = m->g->strip[ss])) {
494
+		case OCHAR:
495
+			if (sp == stop || *sp++ != (char)OPND(s))
496
+				return(NULL);
497
+			break;
498
+		case OANY:
499
+			if (sp == stop)
500
+				return(NULL);
501
+			sp++;
502
+			break;
503
+		case OANYOF:
504
+			cs = &m->g->sets[OPND(s)];
505
+			if (sp == stop || !CHIN(cs, *sp++))
506
+				return(NULL);
507
+			break;
508
+		case OBOL:
509
+			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
510
+					(sp < m->endp && *(sp-1) == '\n' &&
511
+						(m->g->cflags&REG_NEWLINE)) )
512
+				{ /* yes */ }
513
+			else
514
+				return(NULL);
515
+			break;
516
+		case OEOL:
517
+			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
518
+					(sp < m->endp && *sp == '\n' &&
519
+						(m->g->cflags&REG_NEWLINE)) )
520
+				{ /* yes */ }
521
+			else
522
+				return(NULL);
523
+			break;
524
+		case OBOW:
525
+			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
526
+					(sp < m->endp && *(sp-1) == '\n' &&
527
+						(m->g->cflags&REG_NEWLINE)) ||
528
+					(sp > m->beginp &&
529
+							!ISWORD(*(sp-1))) ) &&
530
+					(sp < m->endp && ISWORD(*sp)) )
531
+				{ /* yes */ }
532
+			else
533
+				return(NULL);
534
+			break;
535
+		case OEOW:
536
+			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
537
+					(sp < m->endp && *sp == '\n' &&
538
+						(m->g->cflags&REG_NEWLINE)) ||
539
+					(sp < m->endp && !ISWORD(*sp)) ) &&
540
+					(sp > m->beginp && ISWORD(*(sp-1))) )
541
+				{ /* yes */ }
542
+			else
543
+				return(NULL);
544
+			break;
545
+		case O_QUEST:
546
+			break;
547
+		case OOR1:	/* matches null but needs to skip */
548
+			ss++;
549
+			s = m->g->strip[ss];
550
+			do {
551
+				assert(OP(s) == OOR2);
552
+				ss += OPND(s);
553
+			} while (OP(s = m->g->strip[ss]) != O_CH);
554
+			/* note that the ss++ gets us past the O_CH */
555
+			break;
556
+		default:	/* have to make a choice */
557
+			hard = 1;
558
+			break;
559
+		}
560
+	if (!hard) {		/* that was it! */
561
+		if (sp != stop)
562
+			return(NULL);
563
+		return(sp);
564
+	}
565
+	ss--;			/* adjust for the for's final increment */
566
+
567
+	/* the hard stuff */
568
+	AT("hard", sp, stop, ss, stopst);
569
+	s = m->g->strip[ss];
570
+	switch (OP(s)) {
571
+	case OBACK_:		/* the vilest depths */
572
+		i = OPND(s);
573
+		assert(0 < i && i <= m->g->nsub);
574
+		if (m->pmatch[i].rm_eo == -1)
575
+			return(NULL);
576
+		assert(m->pmatch[i].rm_so != -1);
577
+		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
578
+		if (len == 0 && rec++ > MAX_RECURSION)
579
+			return(NULL);
580
+		assert(stop - m->beginp >= len);
581
+		if (sp > stop - len)
582
+			return(NULL);	/* not enough left to match */
583
+		ssp = m->offp + m->pmatch[i].rm_so;
584
+		if (memcmp(sp, ssp, len) != 0)
585
+			return(NULL);
586
+		while (m->g->strip[ss] != SOP(O_BACK, i))
587
+			ss++;
588
+		return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
589
+		break;
590
+	case OQUEST_:		/* to null or not */
591
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
592
+		if (dp != NULL)
593
+			return(dp);	/* not */
594
+		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
595
+		break;
596
+	case OPLUS_:
597
+		assert(m->lastpos != NULL);
598
+		assert(lev+1 <= m->g->nplus);
599
+		m->lastpos[lev+1] = sp;
600
+		return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
601
+		break;
602
+	case O_PLUS:
603
+		if (sp == m->lastpos[lev])	/* last pass matched null */
604
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
605
+		/* try another pass */
606
+		m->lastpos[lev] = sp;
607
+		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
608
+		if (dp == NULL)
609
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
610
+		else
611
+			return(dp);
612
+		break;
613
+	case OCH_:		/* find the right one, if any */
614
+		ssub = ss + 1;
615
+		esub = ss + OPND(s) - 1;
616
+		assert(OP(m->g->strip[esub]) == OOR1);
617
+		for (;;) {	/* find first matching branch */
618
+			dp = backref(m, sp, stop, ssub, esub, lev, rec);
619
+			if (dp != NULL)
620
+				return(dp);
621
+			/* that one missed, try next one */
622
+			if (OP(m->g->strip[esub]) == O_CH)
623
+				return(NULL);	/* there is none */
624
+			esub++;
625
+			assert(OP(m->g->strip[esub]) == OOR2);
626
+			ssub = esub + 1;
627
+			esub += OPND(m->g->strip[esub]);
628
+			if (OP(m->g->strip[esub]) == OOR2)
629
+				esub--;
630
+			else
631
+				assert(OP(m->g->strip[esub]) == O_CH);
632
+		}
633
+		break;
634
+	case OLPAREN:		/* must undo assignment if rest fails */
635
+		i = OPND(s);
636
+		assert(0 < i && i <= m->g->nsub);
637
+		offsave = m->pmatch[i].rm_so;
638
+		m->pmatch[i].rm_so = sp - m->offp;
639
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
640
+		if (dp != NULL)
641
+			return(dp);
642
+		m->pmatch[i].rm_so = offsave;
643
+		return(NULL);
644
+		break;
645
+	case ORPAREN:		/* must undo assignment if rest fails */
646
+		i = OPND(s);
647
+		assert(0 < i && i <= m->g->nsub);
648
+		offsave = m->pmatch[i].rm_eo;
649
+		m->pmatch[i].rm_eo = sp - m->offp;
650
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
651
+		if (dp != NULL)
652
+			return(dp);
653
+		m->pmatch[i].rm_eo = offsave;
654
+		return(NULL);
655
+		break;
656
+	default:		/* uh oh */
657
+		assert(nope);
658
+		break;
659
+	}
660
+
661
+	/* "can't happen" */
662
+	assert(nope);
663
+	/* NOTREACHED */
664
+}
665
+
666
+/*
667
+ - fast - step through the string at top speed
668
+ */
669
+static char *			/* where tentative match ended, or NULL */
670
+fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
671
+{
672
+	states st = m->st;
673
+	states fresh = m->fresh;
674
+	states tmp = m->tmp;
675
+	char *p = start;
676
+	int c = (start == m->beginp) ? OUT : *(start-1);
677
+	int lastc;	/* previous c */
678
+	int flagch;
679
+	int i;
680
+	char *coldp;	/* last p after which no match was underway */
681
+
682
+	CLEAR(st);
683
+	SET1(st, startst);
684
+	st = step(m->g, startst, stopst, st, NOTHING, st);
685
+	ASSIGN(fresh, st);
686
+	SP("start", st, *p);
687
+	coldp = NULL;
688
+	for (;;) {
689
+		/* next character */
690
+		lastc = c;
691
+		c = (p == m->endp) ? OUT : *p;
692
+		if (EQ(st, fresh))
693
+			coldp = p;
694
+
695
+		/* is there an EOL and/or BOL between lastc and c? */
696
+		flagch = '\0';
697
+		i = 0;
698
+		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
699
+				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
700
+			flagch = BOL;
701
+			i = m->g->nbol;
702
+		}
703
+		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
704
+				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
705
+			flagch = (flagch == BOL) ? BOLEOL : EOL;
706
+			i += m->g->neol;
707
+		}
708
+		if (i != 0) {
709
+			for (; i > 0; i--)
710
+				st = step(m->g, startst, stopst, st, flagch, st);
711
+			SP("boleol", st, c);
712
+		}
713
+
714
+		/* how about a word boundary? */
715
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
716
+					(c != OUT && ISWORD(c)) ) {
717
+			flagch = BOW;
718
+		}
719
+		if ( (lastc != OUT && ISWORD(lastc)) &&
720
+				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
721
+			flagch = EOW;
722
+		}
723
+		if (flagch == BOW || flagch == EOW) {
724
+			st = step(m->g, startst, stopst, st, flagch, st);
725
+			SP("boweow", st, c);
726
+		}
727
+
728
+		/* are we done? */
729
+		if (ISSET(st, stopst) || p == stop)
730
+			break;		/* NOTE BREAK OUT */
731
+
732
+		/* no, we must deal with this character */
733
+		ASSIGN(tmp, st);
734
+		ASSIGN(st, fresh);
735
+		assert(c != OUT);
736
+		st = step(m->g, startst, stopst, tmp, c, st);
737
+		SP("aft", st, c);
738
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
739
+		p++;
740
+	}
741
+
742
+	assert(coldp != NULL);
743
+	m->coldp = coldp;
744
+	if (ISSET(st, stopst))
745
+		return(p+1);
746
+	else
747
+		return(NULL);
748
+}
749
+
750
+/*
751
+ - slow - step through the string more deliberately
752
+ */
753
+static char *			/* where it ended */
754
+slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
755
+{
756
+	states st = m->st;
757
+	states empty = m->empty;
758
+	states tmp = m->tmp;
759
+	char *p = start;
760
+	int c = (start == m->beginp) ? OUT : *(start-1);
761
+	int lastc;	/* previous c */
762
+	int flagch;
763
+	int i;
764
+	char *matchp;	/* last p at which a match ended */
765
+
766
+	AT("slow", start, stop, startst, stopst);
767
+	CLEAR(st);
768
+	SET1(st, startst);
769
+	SP("sstart", st, *p);
770
+	st = step(m->g, startst, stopst, st, NOTHING, st);
771
+	matchp = NULL;
772
+	for (;;) {
773
+		/* next character */
774
+		lastc = c;
775
+		c = (p == m->endp) ? OUT : *p;
776
+
777
+		/* is there an EOL and/or BOL between lastc and c? */
778
+		flagch = '\0';
779
+		i = 0;
780
+		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
781
+				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
782
+			flagch = BOL;
783
+			i = m->g->nbol;
784
+		}
785
+		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
786
+				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
787
+			flagch = (flagch == BOL) ? BOLEOL : EOL;
788
+			i += m->g->neol;
789
+		}
790
+		if (i != 0) {
791
+			for (; i > 0; i--)
792
+				st = step(m->g, startst, stopst, st, flagch, st);
793
+			SP("sboleol", st, c);
794
+		}
795
+
796
+		/* how about a word boundary? */
797
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
798
+					(c != OUT && ISWORD(c)) ) {
799
+			flagch = BOW;
800
+		}
801
+		if ( (lastc != OUT && ISWORD(lastc)) &&
802
+				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
803
+			flagch = EOW;
804
+		}
805
+		if (flagch == BOW || flagch == EOW) {
806
+			st = step(m->g, startst, stopst, st, flagch, st);
807
+			SP("sboweow", st, c);
808
+		}
809
+
810
+		/* are we done? */
811
+		if (ISSET(st, stopst))
812
+			matchp = p;
813
+		if (EQ(st, empty) || p == stop)
814
+			break;		/* NOTE BREAK OUT */
815
+
816
+		/* no, we must deal with this character */
817
+		ASSIGN(tmp, st);
818
+		ASSIGN(st, empty);
819
+		assert(c != OUT);
820
+		st = step(m->g, startst, stopst, tmp, c, st);
821
+		SP("saft", st, c);
822
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
823
+		p++;
824
+	}
825
+
826
+	return(matchp);
827
+}
828
+
829
+
830
+/*
831
+ - step - map set of states reachable before char to set reachable after
832
+ */
833
+static states
834
+step(struct re_guts *g,
835
+    sopno start,		/* start state within strip */
836
+    sopno stop,			/* state after stop state within strip */
837
+    states bef,			/* states reachable before */
838
+    int ch,			/* character or NONCHAR code */
839
+    states aft)			/* states already known reachable after */
840
+{
841
+	cset *cs;
842
+	sop s;
843
+	sopno pc;
844
+	onestate here;		/* note, macros know this name */
845
+	sopno look;
846
+	int i;
847
+
848
+	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
849
+		s = g->strip[pc];
850
+		switch (OP(s)) {
851
+		case OEND:
852
+			assert(pc == stop-1);
853
+			break;
854
+		case OCHAR:
855
+			/* only characters can match */
856
+			assert(!NONCHAR(ch) || ch != (char)OPND(s));
857
+			if (ch == (char)OPND(s))
858
+				FWD(aft, bef, 1);
859
+			break;
860
+		case OBOL:
861
+			if (ch == BOL || ch == BOLEOL)
862
+				FWD(aft, bef, 1);
863
+			break;
864
+		case OEOL:
865
+			if (ch == EOL || ch == BOLEOL)
866
+				FWD(aft, bef, 1);
867
+			break;
868
+		case OBOW:
869
+			if (ch == BOW)
870
+				FWD(aft, bef, 1);
871
+			break;
872
+		case OEOW:
873
+			if (ch == EOW)
874
+				FWD(aft, bef, 1);
875
+			break;
876
+		case OANY:
877
+			if (!NONCHAR(ch))
878
+				FWD(aft, bef, 1);
879
+			break;
880
+		case OANYOF:
881
+			cs = &g->sets[OPND(s)];
882
+			if (!NONCHAR(ch) && CHIN(cs, ch))
883
+				FWD(aft, bef, 1);
884
+			break;
885
+		case OBACK_:		/* ignored here */
886
+		case O_BACK:
887
+			FWD(aft, aft, 1);
888
+			break;
889
+		case OPLUS_:		/* forward, this is just an empty */
890
+			FWD(aft, aft, 1);
891
+			break;
892
+		case O_PLUS:		/* both forward and back */
893
+			FWD(aft, aft, 1);
894
+			i = ISSETBACK(aft, OPND(s));
895
+			BACK(aft, aft, OPND(s));
896
+			if (!i && ISSETBACK(aft, OPND(s))) {
897
+				/* oho, must reconsider loop body */
898
+				pc -= OPND(s) + 1;
899
+				INIT(here, pc);
900
+			}
901
+			break;
902
+		case OQUEST_:		/* two branches, both forward */
903
+			FWD(aft, aft, 1);
904
+			FWD(aft, aft, OPND(s));
905
+			break;
906
+		case O_QUEST:		/* just an empty */
907
+			FWD(aft, aft, 1);
908
+			break;
909
+		case OLPAREN:		/* not significant here */
910
+		case ORPAREN:
911
+			FWD(aft, aft, 1);
912
+			break;
913
+		case OCH_:		/* mark the first two branches */
914
+			FWD(aft, aft, 1);
915
+			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
916
+			FWD(aft, aft, OPND(s));
917
+			break;
918
+		case OOR1:		/* done a branch, find the O_CH */
919
+			if (ISSTATEIN(aft, here)) {
920
+				for (look = 1;
921
+						OP(s = g->strip[pc+look]) != O_CH;
922
+						look += OPND(s))
923
+					assert(OP(s) == OOR2);
924
+				FWD(aft, aft, look);
925
+			}
926
+			break;
927
+		case OOR2:		/* propagate OCH_'s marking */
928
+			FWD(aft, aft, 1);
929
+			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
930
+				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
931
+				FWD(aft, aft, OPND(s));
932
+			}
933
+			break;
934
+		case O_CH:		/* just empty */
935
+			FWD(aft, aft, 1);
936
+			break;
937
+		default:		/* ooooops... */
938
+			assert(nope);
939
+			break;
940
+		}
941
+	}
942
+
943
+	return(aft);
944
+}
945
+
946
+#ifdef REDEBUG
947
+/*
948
+ - print - print a set of states
949
+ */
950
+static void
951
+print(struct match *m, char *caption, states st, int ch, FILE *d)
952
+{
953
+	struct re_guts *g = m->g;
954
+	int i;
955
+	int first = 1;
956
+
957
+	if (!(m->eflags&REG_TRACE))
958
+		return;
959
+
960
+	(void)fprintf(d, "%s", caption);
961
+	if (ch != '\0')
962
+		(void)fprintf(d, " %s", pchar(ch));
963
+	for (i = 0; i < g->nstates; i++)
964
+		if (ISSET(st, i)) {
965
+			(void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
966
+			first = 0;
967
+		}
968
+	(void)fprintf(d, "\n");
969
+}
970
+
971
+/* 
972
+ - at - print current situation
973
+ */
974
+static void
975
+at(struct match *m, char *title, char *start, char *stop, sopno startst,
976
+    sopno stopst)
977
+{
978
+	if (!(m->eflags&REG_TRACE))
979
+		return;
980
+
981
+	(void)printf("%s %s-", title, pchar(*start));
982
+	(void)printf("%s ", pchar(*stop));
983
+	(void)printf("%ld-%ld\n", (long)startst, (long)stopst);
984
+}
985
+
986
+#ifndef PCHARDONE
987
+#define	PCHARDONE	/* never again */
988
+/*
989
+ - pchar - make a character printable
990
+ *
991
+ * Is this identical to regchar() over in debug.c?  Well, yes.  But a
992
+ * duplicate here avoids having a debugging-capable regexec.o tied to
993
+ * a matching debug.o, and this is convenient.  It all disappears in
994
+ * the non-debug compilation anyway, so it doesn't matter much.
995
+ */
996
+static char *			/* -> representation */
997
+pchar(int ch)
998
+{
999
+	static char pbuf[10];
1000
+
1001
+	if (isprint(ch) || ch == ' ')
1002
+		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
1003
+	else
1004
+		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
1005
+	return(pbuf);
1006
+}
1007
+#endif
1008
+#endif
1009
+
1010
+#undef	matcher
1011
+#undef	fast
1012
+#undef	slow
1013
+#undef	dissect
1014
+#undef	backref
1015
+#undef	step
1016
+#undef	print
1017
+#undef	at
1018
+#undef	match
1019
+#undef	nope
0 1020
new file mode 100644
... ...
@@ -0,0 +1,1519 @@
0
+/*-
1
+ * This code is derived from OpenBSD's libc/regex, original license follows:
2
+ *
3
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4
+ * Copyright (c) 1992, 1993, 1994
5
+ *	The Regents of the University of California.  All rights reserved.
6
+ *
7
+ * This code is derived from software contributed to Berkeley by
8
+ * Henry Spencer.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ *    notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ *    notice, this list of conditions and the following disclaimer in the
17
+ *    documentation and/or other materials provided with the distribution.
18
+ * 3. Neither the name of the University nor the names of its contributors
19
+ *    may be used to endorse or promote products derived from this software
20
+ *    without specific prior written permission.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
+ * SUCH DAMAGE.
33
+ *
34
+ *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
35
+ */
36
+
37
+#include <sys/types.h>
38
+#include <stdio.h>
39
+#include <string.h>
40
+#include <ctype.h>
41
+#include <limits.h>
42
+#include <stdlib.h>
43
+#include "others.h"
44
+#include "regex.h"
45
+
46
+#include "utils.h"
47
+#include "regex2.h"
48
+
49
+#include "cclass.h"
50
+#include "cname.h"
51
+
52
+/*
53
+ * parse structure, passed up and down to avoid global variables and
54
+ * other clumsinesses
55
+ */
56
+struct parse {
57
+	char *next;		/* next character in RE */
58
+	char *end;		/* end of string (-> NUL normally) */
59
+	int error;		/* has an error been seen? */
60
+	sop *strip;		/* malloced strip */
61
+	sopno ssize;		/* malloced strip size (allocated) */
62
+	sopno slen;		/* malloced strip length (used) */
63
+	int ncsalloc;		/* number of csets allocated */
64
+	struct re_guts *g;
65
+#	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
66
+	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
67
+	sopno pend[NPAREN];	/* -> ) ([0] unused) */
68
+};
69
+
70
+static void p_ere(struct parse *, int);
71
+static void p_ere_exp(struct parse *);
72
+static void p_str(struct parse *);
73
+static void p_bre(struct parse *, int, int);
74
+static int p_simp_re(struct parse *, int);
75
+static int p_count(struct parse *);
76
+static void p_bracket(struct parse *);
77
+static void p_b_term(struct parse *, cset *);
78
+static void p_b_cclass(struct parse *, cset *);
79
+static void p_b_eclass(struct parse *, cset *);
80
+static char p_b_symbol(struct parse *);
81
+static char p_b_coll_elem(struct parse *, int);
82
+static char othercase(int);
83
+static void bothcases(struct parse *, int);
84
+static void ordinary(struct parse *, int);
85
+static void nonnewline(struct parse *);
86
+static void repeat(struct parse *, sopno, int, int);
87
+static int seterr(struct parse *, int);
88
+static cset *allocset(struct parse *);
89
+static void freeset(struct parse *, cset *);
90
+static int freezeset(struct parse *, cset *);
91
+static int firstch(struct parse *, cset *);
92
+static int nch(struct parse *, cset *);
93
+static void mcadd(struct parse *, cset *, const char *);
94
+static void mcinvert(struct parse *, cset *);
95
+static void mccase(struct parse *, cset *);
96
+static int isinsets(struct re_guts *, int);
97
+static int samesets(struct re_guts *, int, int);
98
+static void categorize(struct parse *, struct re_guts *);
99
+static sopno dupl(struct parse *, sopno, sopno);
100
+static void doemit(struct parse *, sop, size_t);
101
+static void doinsert(struct parse *, sop, size_t, sopno);
102
+static void dofwd(struct parse *, sopno, sop);
103
+static void enlarge(struct parse *, sopno);
104
+static void stripsnug(struct parse *, struct re_guts *);
105
+static void findmust(struct parse *, struct re_guts *);
106
+static sopno pluscount(struct parse *, struct re_guts *);
107
+
108
+static char nuls[10];		/* place to point scanner in event of error */
109
+
110
+/*
111
+ * macros for use with parse structure
112
+ * BEWARE:  these know that the parse structure is named `p' !!!
113
+ */
114
+#define	PEEK()	(*p->next)
115
+#define	PEEK2()	(*(p->next+1))
116
+#define	MORE()	(p->next < p->end)
117
+#define	MORE2()	(p->next+1 < p->end)
118
+#define	SEE(c)	(MORE() && PEEK() == (c))
119
+#define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
120
+#define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
121
+#define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
122
+#define	NEXT()	(p->next++)
123
+#define	NEXT2()	(p->next += 2)
124
+#define	NEXTn(n)	(p->next += (n))
125
+#define	GETNEXT()	(*p->next++)
126
+#define	SETERROR(e)	seterr(p, (e))
127
+#define	REQUIRE(co, e)	(void)((co) || SETERROR(e))
128
+#define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
129
+#define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
130
+#define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
131
+#define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
132
+#define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
133
+#define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
134
+#define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
135
+#define	HERE()		(p->slen)
136
+#define	THERE()		(p->slen - 1)
137
+#define	THERETHERE()	(p->slen - 2)
138
+#define	DROP(n)	(p->slen -= (n))
139
+
140
+#ifndef NDEBUG
141
+static int never = 0;		/* for use in asserts; shuts lint up */
142
+#else
143
+#define	never	0		/* some <assert.h>s have bugs too */
144
+#endif
145
+
146
+/*
147
+ - cli_regcomp - interface for parser and compilation
148
+ */
149
+int				/* 0 success, otherwise REG_something */
150
+cli_regcomp(regex_t *preg, const char *pattern, int cflags)
151
+{
152
+	struct parse pa;
153
+	struct re_guts *g;
154
+	struct parse *p = &pa;
155
+	int i;
156
+	size_t len;
157
+#ifdef REDEBUG
158
+#	define	GOODFLAGS(f)	(f)
159
+#else
160
+#	define	GOODFLAGS(f)	((f)&~REG_DUMP)
161
+#endif
162
+
163
+	cflags = GOODFLAGS(cflags);
164
+	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
165
+		return(REG_INVARG);
166
+
167
+	if (cflags&REG_PEND) {
168
+		if (preg->re_endp < pattern)
169
+			return(REG_INVARG);
170
+		len = preg->re_endp - pattern;
171
+	} else
172
+		len = strlen((const char *)pattern);
173
+
174
+	/* do the mallocs early so failure handling is easy */
175
+	g = (struct re_guts *)cli_malloc(sizeof(struct re_guts) +
176
+							(NC-1)*sizeof(cat_t));
177
+	if (g == NULL)
178
+		return(REG_ESPACE);
179
+	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
180
+	p->strip = (sop *)cli_calloc(p->ssize, sizeof(sop));
181
+	p->slen = 0;
182
+	if (p->strip == NULL) {
183
+		free((char *)g);
184
+		return(REG_ESPACE);
185
+	}
186
+
187
+	/* set things up */
188
+	p->g = g;
189
+	p->next = (char *)pattern;	/* convenience; we do not modify it */
190
+	p->end = p->next + len;
191
+	p->error = 0;
192
+	p->ncsalloc = 0;
193
+	for (i = 0; i < NPAREN; i++) {
194
+		p->pbegin[i] = 0;
195
+		p->pend[i] = 0;
196
+	}
197
+	g->csetsize = NC;
198
+	g->sets = NULL;
199
+	g->setbits = NULL;
200
+	g->ncsets = 0;
201
+	g->cflags = cflags;
202
+	g->iflags = 0;
203
+	g->nbol = 0;
204
+	g->neol = 0;
205
+	g->must = NULL;
206
+	g->mlen = 0;
207
+	g->nsub = 0;
208
+	g->ncategories = 1;	/* category 0 is "everything else" */
209
+	g->categories = &g->catspace[-(CHAR_MIN)];
210
+	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
211
+	g->backrefs = 0;
212
+
213
+	/* do it */
214
+	EMIT(OEND, 0);
215
+	g->firststate = THERE();
216
+	if (cflags&REG_EXTENDED)
217
+		p_ere(p, OUT);
218
+	else if (cflags&REG_NOSPEC)
219
+		p_str(p);
220
+	else
221
+		p_bre(p, OUT, OUT);
222
+	EMIT(OEND, 0);
223
+	g->laststate = THERE();
224
+
225
+	/* tidy up loose ends and fill things in */
226
+	categorize(p, g);
227
+	stripsnug(p, g);
228
+	findmust(p, g);
229
+	g->nplus = pluscount(p, g);
230
+	g->magic = MAGIC2;
231
+	preg->re_nsub = g->nsub;
232
+	preg->re_g = g;
233
+	preg->re_magic = MAGIC1;
234
+#ifndef REDEBUG
235
+	/* not debugging, so can't rely on the assert() in cli_regexec() */
236
+	if (g->iflags&BAD)
237
+		SETERROR(REG_ASSERT);
238
+#endif
239
+
240
+	/* win or lose, we're done */
241
+	if (p->error != 0)	/* lose */
242
+		cli_regfree(preg);
243
+	return(p->error);
244
+}
245
+
246
+/*
247
+ - p_ere - ERE parser top level, concatenation and alternation
248
+ */
249
+static void
250
+p_ere(struct parse *p, int stop)	/* character this ERE should end at */
251
+{
252
+	char c;
253
+	sopno prevback;
254
+	sopno prevfwd;
255
+	sopno conc;
256
+	int first = 1;		/* is this the first alternative? */
257
+
258
+	for (;;) {
259
+		/* do a bunch of concatenated expressions */
260
+		conc = HERE();
261
+		while (MORE() && (c = PEEK()) != '|' && c != stop)
262
+			p_ere_exp(p);
263
+		REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
264
+
265
+		if (!EAT('|'))
266
+			break;		/* NOTE BREAK OUT */
267
+
268
+		if (first) {
269
+			INSERT(OCH_, conc);	/* offset is wrong */
270
+			prevfwd = conc;
271
+			prevback = conc;
272
+			first = 0;
273
+		}
274
+		ASTERN(OOR1, prevback);
275
+		prevback = THERE();
276
+		AHEAD(prevfwd);			/* fix previous offset */
277
+		prevfwd = HERE();
278
+		EMIT(OOR2, 0);			/* offset is very wrong */
279
+	}
280
+
281
+	if (!first) {		/* tail-end fixups */
282
+		AHEAD(prevfwd);
283
+		ASTERN(O_CH, prevback);
284
+	}
285
+
286
+	assert(!MORE() || SEE(stop));
287
+}
288
+
289
+/*
290
+ - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
291
+ */
292
+static void
293
+p_ere_exp(struct parse *p)
294
+{
295
+	char c;
296
+	sopno pos;
297
+	int count;
298
+	int count2;
299
+	sopno subno;
300
+	int wascaret = 0;
301
+
302
+	assert(MORE());		/* caller should have ensured this */
303
+	c = GETNEXT();
304
+
305
+	pos = HERE();
306
+	switch (c) {
307
+	case '(':
308
+		REQUIRE(MORE(), REG_EPAREN);
309
+		p->g->nsub++;
310
+		subno = p->g->nsub;
311
+		if (subno < NPAREN)
312
+			p->pbegin[subno] = HERE();
313
+		EMIT(OLPAREN, subno);
314
+		if (!SEE(')'))
315
+			p_ere(p, ')');
316
+		if (subno < NPAREN) {
317
+			p->pend[subno] = HERE();
318
+			assert(p->pend[subno] != 0);
319
+		}
320
+		EMIT(ORPAREN, subno);
321
+		MUSTEAT(')', REG_EPAREN);
322
+		break;
323
+#ifndef POSIX_MISTAKE
324
+	case ')':		/* happens only if no current unmatched ( */
325
+		/*
326
+		 * You may ask, why the ifndef?  Because I didn't notice
327
+		 * this until slightly too late for 1003.2, and none of the
328
+		 * other 1003.2 regular-expression reviewers noticed it at
329
+		 * all.  So an unmatched ) is legal POSIX, at least until
330
+		 * we can get it fixed.
331
+		 */
332
+		SETERROR(REG_EPAREN);
333
+		break;
334
+#endif
335
+	case '^':
336
+		EMIT(OBOL, 0);
337
+		p->g->iflags |= USEBOL;
338
+		p->g->nbol++;
339
+		wascaret = 1;
340
+		break;
341
+	case '$':
342
+		EMIT(OEOL, 0);
343
+		p->g->iflags |= USEEOL;
344
+		p->g->neol++;
345
+		break;
346
+	case '|':
347
+		SETERROR(REG_EMPTY);
348
+		break;
349
+	case '*':
350
+	case '+':
351
+	case '?':
352
+		SETERROR(REG_BADRPT);
353
+		break;
354
+	case '.':
355
+		if (p->g->cflags&REG_NEWLINE)
356
+			nonnewline(p);
357
+		else
358
+			EMIT(OANY, 0);
359
+		break;
360
+	case '[':
361
+		p_bracket(p);
362
+		break;
363
+	case '\\':
364
+		REQUIRE(MORE(), REG_EESCAPE);
365
+		c = GETNEXT();
366
+		ordinary(p, c);
367
+		break;
368
+	case '{':		/* okay as ordinary except if digit follows */
369
+		REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
370
+		/* FALLTHROUGH */
371
+	default:
372
+		ordinary(p, c);
373
+		break;
374
+	}
375
+
376
+	if (!MORE())
377
+		return;
378
+	c = PEEK();
379
+	/* we call { a repetition if followed by a digit */
380
+	if (!( c == '*' || c == '+' || c == '?' ||
381
+				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
382
+		return;		/* no repetition, we're done */
383
+	NEXT();
384
+
385
+	REQUIRE(!wascaret, REG_BADRPT);
386
+	switch (c) {
387
+	case '*':	/* implemented as +? */
388
+		/* this case does not require the (y|) trick, noKLUDGE */
389
+		INSERT(OPLUS_, pos);
390
+		ASTERN(O_PLUS, pos);
391
+		INSERT(OQUEST_, pos);
392
+		ASTERN(O_QUEST, pos);
393
+		break;
394
+	case '+':
395
+		INSERT(OPLUS_, pos);
396
+		ASTERN(O_PLUS, pos);
397
+		break;
398
+	case '?':
399
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
400
+		INSERT(OCH_, pos);		/* offset slightly wrong */
401
+		ASTERN(OOR1, pos);		/* this one's right */
402
+		AHEAD(pos);			/* fix the OCH_ */
403
+		EMIT(OOR2, 0);			/* offset very wrong... */
404
+		AHEAD(THERE());			/* ...so fix it */
405
+		ASTERN(O_CH, THERETHERE());
406
+		break;
407
+	case '{':
408
+		count = p_count(p);
409
+		if (EAT(',')) {
410
+			if (isdigit((uch)PEEK())) {
411
+				count2 = p_count(p);
412
+				REQUIRE(count <= count2, REG_BADBR);
413
+			} else		/* single number with comma */
414
+				count2 = INFINITY;
415
+		} else		/* just a single number */
416
+			count2 = count;
417
+		repeat(p, pos, count, count2);
418
+		if (!EAT('}')) {	/* error heuristics */
419
+			while (MORE() && PEEK() != '}')
420
+				NEXT();
421
+			REQUIRE(MORE(), REG_EBRACE);
422
+			SETERROR(REG_BADBR);
423
+		}
424
+		break;
425
+	}
426
+
427
+	if (!MORE())
428
+		return;
429
+	c = PEEK();
430
+	if (!( c == '*' || c == '+' || c == '?' ||
431
+				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
432
+		return;
433
+	SETERROR(REG_BADRPT);
434
+}
435
+
436
+/*
437
+ - p_str - string (no metacharacters) "parser"
438
+ */
439
+static void
440
+p_str(struct parse *p)
441
+{
442
+	REQUIRE(MORE(), REG_EMPTY);
443
+	while (MORE())
444
+		ordinary(p, GETNEXT());
445
+}
446
+
447
+/*
448
+ - p_bre - BRE parser top level, anchoring and concatenation
449
+ * Giving end1 as OUT essentially eliminates the end1/end2 check.
450
+ *
451
+ * This implementation is a bit of a kludge, in that a trailing $ is first
452
+ * taken as an ordinary character and then revised to be an anchor.  The
453
+ * only undesirable side effect is that '$' gets included as a character
454
+ * category in such cases.  This is fairly harmless; not worth fixing.
455
+ * The amount of lookahead needed to avoid this kludge is excessive.
456
+ */
457
+static void
458
+p_bre(struct parse *p,
459
+    int end1,		/* first terminating character */
460
+    int end2)		/* second terminating character */
461
+{
462
+	sopno start = HERE();
463
+	int first = 1;			/* first subexpression? */
464
+	int wasdollar = 0;
465
+
466
+	if (EAT('^')) {
467
+		EMIT(OBOL, 0);
468
+		p->g->iflags |= USEBOL;
469
+		p->g->nbol++;
470
+	}
471
+	while (MORE() && !SEETWO(end1, end2)) {
472
+		wasdollar = p_simp_re(p, first);
473
+		first = 0;
474
+	}
475
+	if (wasdollar) {	/* oops, that was a trailing anchor */
476
+		DROP(1);
477
+		EMIT(OEOL, 0);
478
+		p->g->iflags |= USEEOL;
479
+		p->g->neol++;
480
+	}
481
+
482
+	REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
483
+}
484
+
485
+/*
486
+ - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
487
+ */
488
+static int			/* was the simple RE an unbackslashed $? */
489
+p_simp_re(struct parse *p,
490
+    int starordinary)		/* is a leading * an ordinary character? */
491
+{
492
+	int c;
493
+	int count;
494
+	int count2;
495
+	sopno pos;
496
+	int i;
497
+	sopno subno;
498
+#	define	BACKSL	(1<<CHAR_BIT)
499
+
500
+	pos = HERE();		/* repetion op, if any, covers from here */
501
+
502
+	assert(MORE());		/* caller should have ensured this */
503
+	c = GETNEXT();
504
+	if (c == '\\') {
505
+		REQUIRE(MORE(), REG_EESCAPE);
506
+		c = BACKSL | GETNEXT();
507
+	}
508
+	switch (c) {
509
+	case '.':
510
+		if (p->g->cflags&REG_NEWLINE)
511
+			nonnewline(p);
512
+		else
513
+			EMIT(OANY, 0);
514
+		break;
515
+	case '[':
516
+		p_bracket(p);
517
+		break;
518
+	case BACKSL|'{':
519
+		SETERROR(REG_BADRPT);
520
+		break;
521
+	case BACKSL|'(':
522
+		p->g->nsub++;
523
+		subno = p->g->nsub;
524
+		if (subno < NPAREN)
525
+			p->pbegin[subno] = HERE();
526
+		EMIT(OLPAREN, subno);
527
+		/* the MORE here is an error heuristic */
528
+		if (MORE() && !SEETWO('\\', ')'))
529
+			p_bre(p, '\\', ')');
530
+		if (subno < NPAREN) {
531
+			p->pend[subno] = HERE();
532
+			assert(p->pend[subno] != 0);
533
+		}
534
+		EMIT(ORPAREN, subno);
535
+		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
536
+		break;
537
+	case BACKSL|')':	/* should not get here -- must be user */
538
+	case BACKSL|'}':
539
+		SETERROR(REG_EPAREN);
540
+		break;
541
+	case BACKSL|'1':
542
+	case BACKSL|'2':
543
+	case BACKSL|'3':
544
+	case BACKSL|'4':
545
+	case BACKSL|'5':
546
+	case BACKSL|'6':
547
+	case BACKSL|'7':
548
+	case BACKSL|'8':
549
+	case BACKSL|'9':
550
+		i = (c&~BACKSL) - '0';
551
+		assert(i < NPAREN);
552
+		if (p->pend[i] != 0) {
553
+			assert(i <= p->g->nsub);
554
+			EMIT(OBACK_, i);
555
+			assert(p->pbegin[i] != 0);
556
+			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
557
+			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
558
+			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
559
+			EMIT(O_BACK, i);
560
+		} else
561
+			SETERROR(REG_ESUBREG);
562
+		p->g->backrefs = 1;
563
+		break;
564
+	case '*':
565
+		REQUIRE(starordinary, REG_BADRPT);
566
+		/* FALLTHROUGH */
567
+	default:
568
+		ordinary(p, (char)c);
569
+		break;
570
+	}
571
+
572
+	if (EAT('*')) {		/* implemented as +? */
573
+		/* this case does not require the (y|) trick, noKLUDGE */
574
+		INSERT(OPLUS_, pos);
575
+		ASTERN(O_PLUS, pos);
576
+		INSERT(OQUEST_, pos);
577
+		ASTERN(O_QUEST, pos);
578
+	} else if (EATTWO('\\', '{')) {
579
+		count = p_count(p);
580
+		if (EAT(',')) {
581
+			if (MORE() && isdigit((uch)PEEK())) {
582
+				count2 = p_count(p);
583
+				REQUIRE(count <= count2, REG_BADBR);
584
+			} else		/* single number with comma */
585
+				count2 = INFINITY;
586
+		} else		/* just a single number */
587
+			count2 = count;
588
+		repeat(p, pos, count, count2);
589
+		if (!EATTWO('\\', '}')) {	/* error heuristics */
590
+			while (MORE() && !SEETWO('\\', '}'))
591
+				NEXT();
592
+			REQUIRE(MORE(), REG_EBRACE);
593
+			SETERROR(REG_BADBR);
594
+		}
595
+	} else if (c == '$')	/* $ (but not \$) ends it */
596
+		return(1);
597
+
598
+	return(0);
599
+}
600
+
601
+/*
602
+ - p_count - parse a repetition count
603
+ */
604
+static int			/* the value */
605
+p_count(struct parse *p)
606
+{
607
+	int count = 0;
608
+	int ndigits = 0;
609
+
610
+	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
611
+		count = count*10 + (GETNEXT() - '0');
612
+		ndigits++;
613
+	}
614
+
615
+	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
616
+	return(count);
617
+}
618
+
619
+/*
620
+ - p_bracket - parse a bracketed character list
621
+ *
622
+ * Note a significant property of this code:  if the allocset() did SETERROR,
623
+ * no set operations are done.
624
+ */
625
+static void
626
+p_bracket(struct parse *p)
627
+{
628
+	cset *cs;
629
+	int invert = 0;
630
+
631
+	/* Dept of Truly Sickening Special-Case Kludges */
632
+	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
633
+		EMIT(OBOW, 0);
634
+		NEXTn(6);
635
+		return;
636
+	}
637
+	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
638
+		EMIT(OEOW, 0);
639
+		NEXTn(6);
640
+		return;
641
+	}
642
+
643
+	if ((cs = allocset(p)) == NULL) {
644
+		/* allocset did set error status in p */
645
+		return;
646
+	}
647
+
648
+	if (EAT('^'))
649
+		invert++;	/* make note to invert set at end */
650
+	if (EAT(']'))
651
+		CHadd(cs, ']');
652
+	else if (EAT('-'))
653
+		CHadd(cs, '-');
654
+	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
655
+		p_b_term(p, cs);
656
+	if (EAT('-'))
657
+		CHadd(cs, '-');
658
+	MUSTEAT(']', REG_EBRACK);
659
+
660
+	if (p->error != 0) {	/* don't mess things up further */
661
+		freeset(p, cs);
662
+		return;
663
+	}
664
+
665
+	if (p->g->cflags&REG_ICASE) {
666
+		int i;
667
+		int ci;
668
+
669
+		for (i = p->g->csetsize - 1; i >= 0; i--)
670
+			if (CHIN(cs, i) && isalpha(i)) {
671
+				ci = othercase(i);
672
+				if (ci != i)
673
+					CHadd(cs, ci);
674
+			}
675
+		if (cs->multis != NULL)
676
+			mccase(p, cs);
677
+	}
678
+	if (invert) {
679
+		int i;
680
+
681
+		for (i = p->g->csetsize - 1; i >= 0; i--)
682
+			if (CHIN(cs, i))
683
+				CHsub(cs, i);
684
+			else
685
+				CHadd(cs, i);
686
+		if (p->g->cflags&REG_NEWLINE)
687
+			CHsub(cs, '\n');
688
+		if (cs->multis != NULL)
689
+			mcinvert(p, cs);
690
+	}
691
+
692
+	assert(cs->multis == NULL);		/* xxx */
693
+
694
+	if (nch(p, cs) == 1) {		/* optimize singleton sets */
695
+		ordinary(p, firstch(p, cs));
696
+		freeset(p, cs);
697
+	} else
698
+		EMIT(OANYOF, freezeset(p, cs));
699
+}
700
+
701
+/*
702
+ - p_b_term - parse one term of a bracketed character list
703
+ */
704
+static void
705
+p_b_term(struct parse *p, cset *cs)
706
+{
707
+	char c;
708
+	char start, finish;
709
+	int i;
710
+
711
+	/* classify what we've got */
712
+	switch ((MORE()) ? PEEK() : '\0') {
713
+	case '[':
714
+		c = (MORE2()) ? PEEK2() : '\0';
715
+		break;
716
+	case '-':
717
+		SETERROR(REG_ERANGE);
718
+		return;			/* NOTE RETURN */
719
+		break;
720
+	default:
721
+		c = '\0';
722
+		break;
723
+	}
724
+
725
+	switch (c) {
726
+	case ':':		/* character class */
727
+		NEXT2();
728
+		REQUIRE(MORE(), REG_EBRACK);
729
+		c = PEEK();
730
+		REQUIRE(c != '-' && c != ']', REG_ECTYPE);
731
+		p_b_cclass(p, cs);
732
+		REQUIRE(MORE(), REG_EBRACK);
733
+		REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
734
+		break;
735
+	case '=':		/* equivalence class */
736
+		NEXT2();
737
+		REQUIRE(MORE(), REG_EBRACK);
738
+		c = PEEK();
739
+		REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
740
+		p_b_eclass(p, cs);
741
+		REQUIRE(MORE(), REG_EBRACK);
742
+		REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
743
+		break;
744
+	default:		/* symbol, ordinary character, or range */
745
+/* xxx revision needed for multichar stuff */
746
+		start = p_b_symbol(p);
747
+		if (SEE('-') && MORE2() && PEEK2() != ']') {
748
+			/* range */
749
+			NEXT();
750
+			if (EAT('-'))
751
+				finish = '-';
752
+			else
753
+				finish = p_b_symbol(p);
754
+		} else
755
+			finish = start;
756
+/* xxx what about signed chars here... */
757
+		REQUIRE(start <= finish, REG_ERANGE);
758
+		for (i = start; i <= finish; i++)
759
+			CHadd(cs, i);
760
+		break;
761
+	}
762
+}
763
+
764
+/*
765
+ - p_b_cclass - parse a character-class name and deal with it
766
+ */
767
+static void
768
+p_b_cclass(struct parse *p, cset *cs)
769
+{
770
+	char *sp = p->next;
771
+	struct cclass *cp;
772
+	size_t len;
773
+	const char *u;
774
+	char c;
775
+
776
+	while (MORE() && isalpha(PEEK()))
777
+		NEXT();
778
+	len = p->next - sp;
779
+	for (cp = cclasses; cp->name != NULL; cp++)
780
+		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
781
+			break;
782
+	if (cp->name == NULL) {
783
+		/* oops, didn't find it */
784
+		SETERROR(REG_ECTYPE);
785
+		return;
786
+	}
787
+
788
+	u = cp->chars;
789
+	while ((c = *u++) != '\0')
790
+		CHadd(cs, c);
791
+	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
792
+		MCadd(p, cs, u);
793
+}
794
+
795
+/*
796
+ - p_b_eclass - parse an equivalence-class name and deal with it
797
+ *
798
+ * This implementation is incomplete. xxx
799
+ */
800
+static void
801
+p_b_eclass(struct parse *p, cset *cs)
802
+{
803
+	char c;
804
+
805
+	c = p_b_coll_elem(p, '=');
806
+	CHadd(cs, c);
807
+}
808
+
809
+/*
810
+ - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
811
+ */
812
+static char			/* value of symbol */
813
+p_b_symbol(struct parse *p)
814
+{
815
+	char value;
816
+
817
+	REQUIRE(MORE(), REG_EBRACK);
818
+	if (!EATTWO('[', '.'))
819
+		return(GETNEXT());
820
+
821
+	/* collating symbol */
822
+	value = p_b_coll_elem(p, '.');
823
+	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
824
+	return(value);
825
+}
826
+
827
+/*
828
+ - p_b_coll_elem - parse a collating-element name and look it up
829
+ */
830
+static char			/* value of collating element */
831
+p_b_coll_elem(struct parse *p,
832
+    int endc)			/* name ended by endc,']' */
833
+{
834
+	char *sp = p->next;
835
+	struct cname *cp;
836
+	int len;
837
+
838
+	while (MORE() && !SEETWO(endc, ']'))
839
+		NEXT();
840
+	if (!MORE()) {
841
+		SETERROR(REG_EBRACK);
842
+		return(0);
843
+	}
844
+	len = p->next - sp;
845
+	for (cp = cnames; cp->name != NULL; cp++)
846
+		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
847
+			return(cp->code);	/* known name */
848
+	if (len == 1)
849
+		return(*sp);	/* single character */
850
+	SETERROR(REG_ECOLLATE);			/* neither */
851
+	return(0);
852
+}
853
+
854
+/*
855
+ - othercase - return the case counterpart of an alphabetic
856
+ */
857
+static char			/* if no counterpart, return ch */
858
+othercase(int ch)
859
+{
860
+	ch = (uch)ch;
861
+	assert(isalpha(ch));
862
+	if (isupper(ch))
863
+		return ((uch)tolower(ch));
864
+	else if (islower(ch))
865
+		return ((uch)toupper(ch));
866
+	else			/* peculiar, but could happen */
867
+		return(ch);
868
+}
869
+
870
+/*
871
+ - bothcases - emit a dualcase version of a two-case character
872
+ *
873
+ * Boy, is this implementation ever a kludge...
874
+ */
875
+static void
876
+bothcases(struct parse *p, int ch)
877
+{
878
+	char *oldnext = p->next;
879
+	char *oldend = p->end;
880
+	char bracket[3];
881
+
882
+	ch = (uch)ch;
883
+	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
884
+	p->next = bracket;
885
+	p->end = bracket+2;
886
+	bracket[0] = ch;
887
+	bracket[1] = ']';
888
+	bracket[2] = '\0';
889
+	p_bracket(p);
890
+	assert(p->next == bracket+2);
891
+	p->next = oldnext;
892
+	p->end = oldend;
893
+}
894
+
895
+/*
896
+ - ordinary - emit an ordinary character
897
+ */
898
+static void
899
+ordinary(struct parse *p, int ch)
900
+{
901
+	cat_t *cap = p->g->categories;
902
+
903
+	if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
904
+		bothcases(p, ch);
905
+	else {
906
+		EMIT(OCHAR, (uch)ch);
907
+		if (cap[ch] == 0)
908
+			cap[ch] = p->g->ncategories++;
909
+	}
910
+}
911
+
912
+/*
913
+ - nonnewline - emit REG_NEWLINE version of OANY
914
+ *
915
+ * Boy, is this implementation ever a kludge...
916
+ */
917
+static void
918
+nonnewline(struct parse *p)
919
+{
920
+	char *oldnext = p->next;
921
+	char *oldend = p->end;
922
+	char bracket[4];
923
+
924
+	p->next = bracket;
925
+	p->end = bracket+3;
926
+	bracket[0] = '^';
927
+	bracket[1] = '\n';
928
+	bracket[2] = ']';
929
+	bracket[3] = '\0';
930
+	p_bracket(p);
931
+	assert(p->next == bracket+3);
932
+	p->next = oldnext;
933
+	p->end = oldend;
934
+}
935
+
936
+/*
937
+ - repeat - generate code for a bounded repetition, recursively if needed
938
+ */
939
+static void
940
+repeat(struct parse *p,
941
+    sopno start,		/* operand from here to end of strip */
942
+    int from,			/* repeated from this number */
943
+    int to)			/* to this number of times (maybe INFINITY) */
944
+{
945
+	sopno finish = HERE();
946
+#	define	N	2
947
+#	define	INF	3
948
+#	define	REP(f, t)	((f)*8 + (t))
949
+#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
950
+	sopno copy;
951
+
952
+	if (p->error != 0)	/* head off possible runaway recursion */
953
+		return;
954
+
955
+	assert(from <= to);
956
+
957
+	switch (REP(MAP(from), MAP(to))) {
958
+	case REP(0, 0):			/* must be user doing this */
959
+		DROP(finish-start);	/* drop the operand */
960
+		break;
961
+	case REP(0, 1):			/* as x{1,1}? */
962
+	case REP(0, N):			/* as x{1,n}? */
963
+	case REP(0, INF):		/* as x{1,}? */
964
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
965
+		INSERT(OCH_, start);		/* offset is wrong... */
966
+		repeat(p, start+1, 1, to);
967
+		ASTERN(OOR1, start);
968
+		AHEAD(start);			/* ... fix it */
969
+		EMIT(OOR2, 0);
970
+		AHEAD(THERE());
971
+		ASTERN(O_CH, THERETHERE());
972
+		break;
973
+	case REP(1, 1):			/* trivial case */
974
+		/* done */
975
+		break;
976
+	case REP(1, N):			/* as x?x{1,n-1} */
977
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
978
+		INSERT(OCH_, start);
979
+		ASTERN(OOR1, start);
980
+		AHEAD(start);
981
+		EMIT(OOR2, 0);			/* offset very wrong... */
982
+		AHEAD(THERE());			/* ...so fix it */
983
+		ASTERN(O_CH, THERETHERE());
984
+		copy = dupl(p, start+1, finish+1);
985
+		assert(copy == finish+4);
986
+		repeat(p, copy, 1, to-1);
987
+		break;
988
+	case REP(1, INF):		/* as x+ */
989
+		INSERT(OPLUS_, start);
990
+		ASTERN(O_PLUS, start);
991
+		break;
992
+	case REP(N, N):			/* as xx{m-1,n-1} */
993
+		copy = dupl(p, start, finish);
994
+		repeat(p, copy, from-1, to-1);
995
+		break;
996
+	case REP(N, INF):		/* as xx{n-1,INF} */
997
+		copy = dupl(p, start, finish);
998
+		repeat(p, copy, from-1, to);
999
+		break;
1000
+	default:			/* "can't happen" */
1001
+		SETERROR(REG_ASSERT);	/* just in case */
1002
+		break;
1003
+	}
1004
+}
1005
+
1006
+/*
1007
+ - seterr - set an error condition
1008
+ */
1009
+static int			/* useless but makes type checking happy */
1010
+seterr(struct parse *p, int e)
1011
+{
1012
+	if (p->error == 0)	/* keep earliest error condition */
1013
+		p->error = e;
1014
+	p->next = nuls;		/* try to bring things to a halt */
1015
+	p->end = nuls;
1016
+	return(0);		/* make the return value well-defined */
1017
+}
1018
+
1019
+/*
1020
+ - allocset - allocate a set of characters for []
1021
+ */
1022
+static cset *
1023
+allocset(struct parse *p)
1024
+{
1025
+	int no = p->g->ncsets++;
1026
+	size_t nc;
1027
+	size_t nbytes;
1028
+	cset *cs;
1029
+	size_t css = (size_t)p->g->csetsize;
1030
+	int i;
1031
+
1032
+	if (no >= p->ncsalloc) {	/* need another column of space */
1033
+		void *ptr;
1034
+
1035
+		p->ncsalloc += CHAR_BIT;
1036
+		nc = p->ncsalloc;
1037
+		assert(nc % CHAR_BIT == 0);
1038
+		nbytes = nc / CHAR_BIT * css;
1039
+
1040
+		ptr = (cset *)cli_realloc((char *)p->g->sets, nc * sizeof(cset));
1041
+		if (ptr == NULL)
1042
+			goto nomem;
1043
+		p->g->sets = ptr;
1044
+
1045
+		ptr = (uch *)cli_realloc((char *)p->g->setbits, nbytes);
1046
+		if (ptr == NULL)
1047
+			goto nomem;
1048
+		p->g->setbits = ptr;
1049
+
1050
+		for (i = 0; i < no; i++)
1051
+			p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1052
+
1053
+		(void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
1054
+	}
1055
+
1056
+	cs = &p->g->sets[no];
1057
+	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1058
+	cs->mask = 1 << ((no) % CHAR_BIT);
1059
+	cs->hash = 0;
1060
+	cs->smultis = 0;
1061
+	cs->multis = NULL;
1062
+
1063
+	return(cs);
1064
+nomem:
1065
+	free(p->g->sets);
1066
+	p->g->sets = NULL;
1067
+	free(p->g->setbits);
1068
+	p->g->setbits = NULL;
1069
+
1070
+	SETERROR(REG_ESPACE);
1071
+	/* caller's responsibility not to do set ops */
1072
+	return(NULL);
1073
+}
1074
+
1075
+/*
1076
+ - freeset - free a now-unused set
1077
+ */
1078
+static void
1079
+freeset(struct parse *p, cset *cs)
1080
+{
1081
+	size_t i;
1082
+	cset *top = &p->g->sets[p->g->ncsets];
1083
+	size_t css = (size_t)p->g->csetsize;
1084
+
1085
+	for (i = 0; i < css; i++)
1086
+		CHsub(cs, i);
1087
+	if (cs == top-1)	/* recover only the easy case */
1088
+		p->g->ncsets--;
1089
+}
1090
+
1091
+/*
1092
+ - freezeset - final processing on a set of characters
1093
+ *
1094
+ * The main task here is merging identical sets.  This is usually a waste
1095
+ * of time (although the hash code minimizes the overhead), but can win
1096
+ * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
1097
+ * is done using addition rather than xor -- all ASCII [aA] sets xor to
1098
+ * the same value!
1099
+ */
1100
+static int			/* set number */
1101
+freezeset(struct parse *p, cset *cs)
1102
+{
1103
+	uch h = cs->hash;
1104
+	size_t i;
1105
+	cset *top = &p->g->sets[p->g->ncsets];
1106
+	cset *cs2;
1107
+	size_t css = (size_t)p->g->csetsize;
1108
+
1109
+	/* look for an earlier one which is the same */
1110
+	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1111
+		if (cs2->hash == h && cs2 != cs) {
1112
+			/* maybe */
1113
+			for (i = 0; i < css; i++)
1114
+				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1115
+					break;		/* no */
1116
+			if (i == css)
1117
+				break;			/* yes */
1118
+		}
1119
+
1120
+	if (cs2 < top) {	/* found one */
1121
+		freeset(p, cs);
1122
+		cs = cs2;
1123
+	}
1124
+
1125
+	return((int)(cs - p->g->sets));
1126
+}
1127
+
1128
+/*
1129
+ - firstch - return first character in a set (which must have at least one)
1130
+ */
1131
+static int			/* character; there is no "none" value */
1132
+firstch(struct parse *p, cset *cs)
1133
+{
1134
+	size_t i;
1135
+	size_t css = (size_t)p->g->csetsize;
1136
+
1137
+	for (i = 0; i < css; i++)
1138
+		if (CHIN(cs, i))
1139
+			return((char)i);
1140
+	assert(never);
1141
+	return(0);		/* arbitrary */
1142
+}
1143
+
1144
+/*
1145
+ - nch - number of characters in a set
1146
+ */
1147
+static int
1148
+nch(struct parse *p, cset *cs)
1149
+{
1150
+	size_t i;
1151
+	size_t css = (size_t)p->g->csetsize;
1152
+	int n = 0;
1153
+
1154
+	for (i = 0; i < css; i++)
1155
+		if (CHIN(cs, i))
1156
+			n++;
1157
+	return(n);
1158
+}
1159
+
1160
+/*
1161
+ - mcadd - add a collating element to a cset
1162
+ */
1163
+static void
1164
+mcadd( struct parse *p, cset *cs, const char *cp)
1165
+{
1166
+	size_t oldend = cs->smultis;
1167
+	void *np;
1168
+
1169
+	cs->smultis += strlen(cp) + 1;
1170
+	if (cs->multis == NULL)
1171
+		np = cli_malloc(cs->smultis);
1172
+	else
1173
+		np = cli_realloc(cs->multis, cs->smultis);
1174
+	if (np == NULL) {
1175
+		if (cs->multis)
1176
+			free(cs->multis);
1177
+		cs->multis = NULL;
1178
+		SETERROR(REG_ESPACE);
1179
+		return;
1180
+	}
1181
+	cs->multis = np;
1182
+
1183
+	cli_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
1184
+}
1185
+
1186
+/*
1187
+ - mcinvert - invert the list of collating elements in a cset
1188
+ *
1189
+ * This would have to know the set of possibilities.  Implementation
1190
+ * is deferred.
1191
+ */
1192
+/* ARGSUSED */
1193
+static void
1194
+mcinvert(struct parse *p, cset *cs)
1195
+{
1196
+	assert(cs->multis == NULL);	/* xxx */
1197
+}
1198
+
1199
+/*
1200
+ - mccase - add case counterparts of the list of collating elements in a cset
1201
+ *
1202
+ * This would have to know the set of possibilities.  Implementation
1203
+ * is deferred.
1204
+ */
1205
+/* ARGSUSED */
1206
+static void
1207
+mccase(struct parse *p, cset *cs)
1208
+{
1209
+	assert(cs->multis == NULL);	/* xxx */
1210
+}
1211
+
1212
+/*
1213
+ - isinsets - is this character in any sets?
1214
+ */
1215
+static int			/* predicate */
1216
+isinsets(struct re_guts *g, int c)
1217
+{
1218
+	uch *col;
1219
+	int i;
1220
+	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1221
+	unsigned uc = (uch)c;
1222
+
1223
+	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1224
+		if (col[uc] != 0)
1225
+			return(1);
1226
+	return(0);
1227
+}
1228
+
1229
+/*
1230
+ - samesets - are these two characters in exactly the same sets?
1231
+ */
1232
+static int			/* predicate */
1233
+samesets(struct re_guts *g, int c1, int c2)
1234
+{
1235
+	uch *col;
1236
+	int i;
1237
+	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1238
+	unsigned uc1 = (uch)c1;
1239
+	unsigned uc2 = (uch)c2;
1240
+
1241
+	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1242
+		if (col[uc1] != col[uc2])
1243
+			return(0);
1244
+	return(1);
1245
+}
1246
+
1247
+/*
1248
+ - categorize - sort out character categories
1249
+ */
1250
+static void
1251
+categorize(struct parse *p, struct re_guts *g)
1252
+{
1253
+	cat_t *cats = g->categories;
1254
+	int c;
1255
+	int c2;
1256
+	cat_t cat;
1257
+
1258
+	/* avoid making error situations worse */
1259
+	if (p->error != 0)
1260
+		return;
1261
+
1262
+	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1263
+		if (cats[c] == 0 && isinsets(g, c)) {
1264
+			cat = g->ncategories++;
1265
+			cats[c] = cat;
1266
+			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1267
+				if (cats[c2] == 0 && samesets(g, c, c2))
1268
+					cats[c2] = cat;
1269
+		}
1270
+}
1271
+
1272
+/*
1273
+ - dupl - emit a duplicate of a bunch of sops
1274
+ */
1275
+static sopno			/* start of duplicate */
1276
+dupl(struct parse *p,
1277
+    sopno start,		/* from here */
1278
+    sopno finish)		/* to this less one */
1279
+{
1280
+	sopno ret = HERE();
1281
+	sopno len = finish - start;
1282
+
1283
+	assert(finish >= start);
1284
+	if (len == 0)
1285
+		return(ret);
1286
+	enlarge(p, p->ssize + len);	/* this many unexpected additions */
1287
+	assert(p->ssize >= p->slen + len);
1288
+	(void) memmove((char *)(p->strip + p->slen),
1289
+		(char *)(p->strip + start), (size_t)len*sizeof(sop));
1290
+	p->slen += len;
1291
+	return(ret);
1292
+}
1293
+
1294
+/*
1295
+ - doemit - emit a strip operator
1296
+ *
1297
+ * It might seem better to implement this as a macro with a function as
1298
+ * hard-case backup, but it's just too big and messy unless there are
1299
+ * some changes to the data structures.  Maybe later.
1300
+ */
1301
+static void
1302
+doemit(struct parse *p, sop op, size_t opnd)
1303
+{
1304
+	/* avoid making error situations worse */
1305
+	if (p->error != 0)
1306
+		return;
1307
+
1308
+	/* deal with oversize operands ("can't happen", more or less) */
1309
+	assert(opnd < 1<<OPSHIFT);
1310
+
1311
+	/* deal with undersized strip */
1312
+	if (p->slen >= p->ssize)
1313
+		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
1314
+	assert(p->slen < p->ssize);
1315
+
1316
+	/* finally, it's all reduced to the easy case */
1317
+	p->strip[p->slen++] = SOP(op, opnd);
1318
+}
1319
+
1320
+/*
1321
+ - doinsert - insert a sop into the strip
1322
+ */
1323
+static void
1324
+doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
1325
+{
1326
+	sopno sn;
1327
+	sop s;
1328
+	int i;
1329
+
1330
+	/* avoid making error situations worse */
1331
+	if (p->error != 0)
1332
+		return;
1333
+
1334
+	sn = HERE();
1335
+	EMIT(op, opnd);		/* do checks, ensure space */
1336
+	assert(HERE() == sn+1);
1337
+	s = p->strip[sn];
1338
+
1339
+	/* adjust paren pointers */
1340
+	assert(pos > 0);
1341
+	for (i = 1; i < NPAREN; i++) {
1342
+		if (p->pbegin[i] >= pos) {
1343
+			p->pbegin[i]++;
1344
+		}
1345
+		if (p->pend[i] >= pos) {
1346
+			p->pend[i]++;
1347
+		}
1348
+	}
1349
+
1350
+	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1351
+						(HERE()-pos-1)*sizeof(sop));
1352
+	p->strip[pos] = s;
1353
+}
1354
+
1355
+/*
1356
+ - dofwd - complete a forward reference
1357
+ */
1358
+static void
1359
+dofwd(struct parse *p, sopno pos, sop value)
1360
+{
1361
+	/* avoid making error situations worse */
1362
+	if (p->error != 0)
1363
+		return;
1364
+
1365
+	assert(value < 1<<OPSHIFT);
1366
+	p->strip[pos] = OP(p->strip[pos]) | value;
1367
+}
1368
+
1369
+/*
1370
+ - enlarge - enlarge the strip
1371
+ */
1372
+static void
1373
+enlarge(struct parse *p, sopno size)
1374
+{
1375
+	sop *sp;
1376
+
1377
+	if (p->ssize >= size)
1378
+		return;
1379
+
1380
+	sp = (sop *)cli_realloc(p->strip, size*sizeof(sop));
1381
+	if (sp == NULL) {
1382
+		SETERROR(REG_ESPACE);
1383
+		return;
1384
+	}
1385
+	p->strip = sp;
1386
+	p->ssize = size;
1387
+}
1388
+
1389
+/*
1390
+ - stripsnug - compact the strip
1391
+ */
1392
+static void
1393
+stripsnug(struct parse *p, struct re_guts *g)
1394
+{
1395
+	g->nstates = p->slen;
1396
+	g->strip = (sop *)cli_realloc((char *)p->strip, p->slen * sizeof(sop));
1397
+	if (g->strip == NULL) {
1398
+		SETERROR(REG_ESPACE);
1399
+		g->strip = p->strip;
1400
+	}
1401
+}
1402
+
1403
+/*
1404
+ - findmust - fill in must and mlen with longest mandatory literal string
1405
+ *
1406
+ * This algorithm could do fancy things like analyzing the operands of |
1407
+ * for common subsequences.  Someday.  This code is simple and finds most
1408
+ * of the interesting cases.
1409
+ *
1410
+ * Note that must and mlen got initialized during setup.
1411
+ */
1412
+static void
1413
+findmust(struct parse *p, struct re_guts *g)
1414
+{
1415
+	sop *scan;
1416
+	sop *start;
1417
+	sop *newstart;
1418
+	sopno newlen;
1419
+	sop s;
1420
+	char *cp;
1421
+	sopno i;
1422
+
1423
+	/* avoid making error situations worse */
1424
+	if (p->error != 0)
1425
+		return;
1426
+
1427
+	/* find the longest OCHAR sequence in strip */
1428
+	newlen = 0;
1429
+	scan = g->strip + 1;
1430
+	do {
1431
+		s = *scan++;
1432
+		switch (OP(s)) {
1433
+		case OCHAR:		/* sequence member */
1434
+			if (newlen == 0)		/* new sequence */
1435
+				newstart = scan - 1;
1436
+			newlen++;
1437
+			break;
1438
+		case OPLUS_:		/* things that don't break one */
1439
+		case OLPAREN:
1440
+		case ORPAREN:
1441
+			break;
1442
+		case OQUEST_:		/* things that must be skipped */
1443
+		case OCH_:
1444
+			scan--;
1445
+			do {
1446
+				scan += OPND(s);
1447
+				s = *scan;
1448
+				/* assert() interferes w debug printouts */
1449
+				if (OP(s) != O_QUEST && OP(s) != O_CH &&
1450
+							OP(s) != OOR2) {
1451
+					g->iflags |= BAD;
1452
+					return;
1453
+				}
1454
+			} while (OP(s) != O_QUEST && OP(s) != O_CH);
1455
+			/* fallthrough */
1456
+		default:		/* things that break a sequence */
1457
+			if (newlen > g->mlen) {		/* ends one */
1458
+				start = newstart;
1459
+				g->mlen = newlen;
1460
+			}
1461
+			newlen = 0;
1462
+			break;
1463
+		}
1464
+	} while (OP(s) != OEND);
1465
+
1466
+	if (g->mlen == 0)		/* there isn't one */
1467
+		return;
1468
+
1469
+	/* turn it into a character string */
1470
+	g->must = cli_malloc((size_t)g->mlen + 1);
1471
+	if (g->must == NULL) {		/* argh; just forget it */
1472
+		g->mlen = 0;
1473
+		return;
1474
+	}
1475
+	cp = g->must;
1476
+	scan = start;
1477
+	for (i = g->mlen; i > 0; i--) {
1478
+		while (OP(s = *scan++) != OCHAR)
1479
+			continue;
1480
+		assert(cp < g->must + g->mlen);
1481
+		*cp++ = (char)OPND(s);
1482
+	}
1483
+	assert(cp == g->must + g->mlen);
1484
+	*cp++ = '\0';		/* just on general principles */
1485
+}
1486
+
1487
+/*
1488
+ - pluscount - count + nesting
1489
+ */
1490
+static sopno			/* nesting depth */
1491
+pluscount(struct parse *p, struct re_guts *g)
1492
+{
1493
+	sop *scan;
1494
+	sop s;
1495
+	sopno plusnest = 0;
1496
+	sopno maxnest = 0;
1497
+
1498
+	if (p->error != 0)
1499
+		return(0);	/* there may not be an OEND */
1500
+
1501
+	scan = g->strip + 1;
1502
+	do {
1503
+		s = *scan++;
1504
+		switch (OP(s)) {
1505
+		case OPLUS_:
1506
+			plusnest++;
1507
+			break;
1508
+		case O_PLUS:
1509
+			if (plusnest > maxnest)
1510
+				maxnest = plusnest;
1511
+			plusnest--;
1512
+			break;
1513
+		}
1514
+	} while (OP(s) != OEND);
1515
+	if (plusnest != 0)
1516
+		g->iflags |= BAD;
1517
+	return(maxnest);
1518
+}
0 1519
new file mode 100644
... ...
@@ -0,0 +1,132 @@
0
+/*-
1
+ * This code is derived from OpenBSD's libc/regex, original license follows:
2
+ *
3
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4
+ * Copyright (c) 1992, 1993, 1994
5
+ *	The Regents of the University of California.  All rights reserved.
6
+ *
7
+ * This code is derived from software contributed to Berkeley by
8
+ * Henry Spencer.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ *    notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ *    notice, this list of conditions and the following disclaimer in the
17
+ *    documentation and/or other materials provided with the distribution.
18
+ * 3. Neither the name of the University nor the names of its contributors
19
+ *    may be used to endorse or promote products derived from this software
20
+ *    without specific prior written permission.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
+ * SUCH DAMAGE.
33
+ *
34
+ *	@(#)regerror.c	8.4 (Berkeley) 3/20/94
35
+ */
36
+
37
+#include <sys/types.h>
38
+#include <stdio.h>
39
+#include <string.h>
40
+#include <ctype.h>
41
+#include <limits.h>
42
+#include <stdlib.h>
43
+#include "others.h"
44
+#include "regex.h"
45
+
46
+#include "utils.h"
47
+
48
+static const char *regatoi(const regex_t *, char *, int);
49
+
50
+static struct rerr {
51
+	int code;
52
+	const char *name;
53
+	const char *explain;
54
+} rerrs[] = {
55
+	{ REG_NOMATCH,	"REG_NOMATCH",	"cli_regexec() failed to match" },
56
+	{ REG_BADPAT,	"REG_BADPAT",	"invalid regular expression" },
57
+	{ REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element" },
58
+	{ REG_ECTYPE,	"REG_ECTYPE",	"invalid character class" },
59
+	{ REG_EESCAPE,	"REG_EESCAPE",	"trailing backslash (\\)" },
60
+	{ REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number" },
61
+	{ REG_EBRACK,	"REG_EBRACK",	"brackets ([ ]) not balanced" },
62
+	{ REG_EPAREN,	"REG_EPAREN",	"parentheses not balanced" },
63
+	{ REG_EBRACE,	"REG_EBRACE",	"braces not balanced" },
64
+	{ REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)" },
65
+	{ REG_ERANGE,	"REG_ERANGE",	"invalid character range" },
66
+	{ REG_ESPACE,	"REG_ESPACE",	"out of memory" },
67
+	{ REG_BADRPT,	"REG_BADRPT",	"repetition-operator operand invalid" },
68
+	{ REG_EMPTY,	"REG_EMPTY",	"empty (sub)expression" },
69
+	{ REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug" },
70
+	{ REG_INVARG,	"REG_INVARG",	"invalid argument to regex routine" },
71
+	{ 0,		"",		"*** unknown regexp error code ***" }
72
+};
73
+
74
+/*
75
+ - cli_regerror - the interface to error numbers
76
+ = extern size_t cli_regerror(int, const regex_t *, char *, size_t);
77
+ */
78
+/* ARGSUSED */
79
+size_t
80
+cli_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
81
+{
82
+	struct rerr *r;
83
+	size_t len;
84
+	int target = errcode &~ REG_ITOA;
85
+	const char *s;
86
+	char convbuf[50];
87
+
88
+	if (errcode == REG_ATOI)
89
+		s = regatoi(preg, convbuf, sizeof convbuf);
90
+	else {
91
+		for (r = rerrs; r->code != 0; r++)
92
+			if (r->code == target)
93
+				break;
94
+	
95
+		if (errcode&REG_ITOA) {
96
+			if (r->code != 0) {
97
+				assert(strlen(r->name) < sizeof(convbuf));
98
+				(void) cli_strlcpy(convbuf, r->name, sizeof convbuf);
99
+			} else
100
+				(void)snprintf(convbuf, sizeof convbuf,
101
+				    "REG_0x%x", target);
102
+			s = convbuf;
103
+		} else
104
+			s = r->explain;
105
+	}
106
+
107
+	len = strlen(s) + 1;
108
+	if (errbuf_size > 0) {
109
+		cli_strlcpy(errbuf, s, errbuf_size);
110
+	}
111
+
112
+	return(len);
113
+}
114
+
115
+/*
116
+ - regatoi - internal routine to implement REG_ATOI
117
+ */
118
+static const char *
119
+regatoi(const regex_t *preg, char *localbuf, int localbufsize)
120
+{
121
+	struct rerr *r;
122
+
123
+	for (r = rerrs; r->code != 0; r++)
124
+		if (strcmp(r->name, preg->re_endp) == 0)
125
+			break;
126
+	if (r->code == 0)
127
+		return("0");
128
+
129
+	(void)snprintf(localbuf, localbufsize, "%d", r->code);
130
+	return(localbuf);
131
+}
0 132
new file mode 100644
... ...
@@ -0,0 +1,102 @@
0
+/*-
1
+ * This code is derived from OpenBSD's libc/regex, original license follows:
2
+ *
3
+ * Copyright (c) 1992 Henry Spencer.
4
+ * Copyright (c) 1992, 1993
5
+ *	The Regents of the University of California.  All rights reserved.
6
+ *
7
+ * This code is derived from software contributed to Berkeley by
8
+ * Henry Spencer of the University of Toronto.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ *    notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ *    notice, this list of conditions and the following disclaimer in the
17
+ *    documentation and/or other materials provided with the distribution.
18
+ * 3. Neither the name of the University nor the names of its contributors
19
+ *    may be used to endorse or promote products derived from this software
20
+ *    without specific prior written permission.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
+ * SUCH DAMAGE.
33
+ *
34
+ *	@(#)regex.h	8.1 (Berkeley) 6/2/93
35
+ */
36
+
37
+#ifndef _REGEX_H_
38
+#define	_REGEX_H_
39
+
40
+#include <sys/types.h>
41
+
42
+/* types */
43
+typedef off_t regoff_t;
44
+
45
+typedef struct {
46
+	int re_magic;
47
+	size_t re_nsub;		/* number of parenthesized subexpressions */
48
+	const char *re_endp;	/* end pointer for REG_PEND */
49
+	struct re_guts *re_g;	/* none of your business :-) */
50
+} regex_t;
51
+
52
+typedef struct {
53
+	regoff_t rm_so;		/* start of match */
54
+	regoff_t rm_eo;		/* end of match */
55
+} regmatch_t;
56
+
57
+/* cli_regcomp() flags */
58
+#define	REG_BASIC	0000
59
+#define	REG_EXTENDED	0001
60
+#define	REG_ICASE	0002
61
+#define	REG_NOSUB	0004
62
+#define	REG_NEWLINE	0010
63
+#define	REG_NOSPEC	0020
64
+#define	REG_PEND	0040
65
+#define	REG_DUMP	0200
66
+
67
+/* cli_regerror() flags */
68
+#define	REG_NOMATCH	 1
69
+#define	REG_BADPAT	 2
70
+#define	REG_ECOLLATE	 3
71
+#define	REG_ECTYPE	 4
72
+#define	REG_EESCAPE	 5
73
+#define	REG_ESUBREG	 6
74
+#define	REG_EBRACK	 7
75
+#define	REG_EPAREN	 8
76
+#define	REG_EBRACE	 9
77
+#define	REG_BADBR	10
78
+#define	REG_ERANGE	11
79
+#define	REG_ESPACE	12
80
+#define	REG_BADRPT	13
81
+#define	REG_EMPTY	14
82
+#define	REG_ASSERT	15
83
+#define	REG_INVARG	16
84
+#define	REG_ATOI	255	/* convert name to number (!) */
85
+#define	REG_ITOA	0400	/* convert number to name (!) */
86
+
87
+/* cli_regexec() flags */
88
+#define	REG_NOTBOL	00001
89
+#define	REG_NOTEOL	00002
90
+#define	REG_STARTEND	00004
91
+#define	REG_TRACE	00400	/* tracing of execution */
92
+#define	REG_LARGE	01000	/* force large representation */
93
+#define	REG_BACKR	02000	/* force use of backref code */
94
+
95
+int	cli_regcomp(regex_t *, const char *, int);
96
+size_t	cli_regerror(int, const regex_t *, char *, size_t);
97
+int	cli_regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
98
+void	cli_regfree(regex_t *);
99
+size_t  cli_strlcpy(char *dst, const char *src, size_t siz);
100
+
101
+#endif /* !_REGEX_H_ */
0 102
new file mode 100644
... ...
@@ -0,0 +1,157 @@
0
+/*-
1
+ * This code is derived from OpenBSD's libc/regex, original license follows:
2
+ *
3
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4
+ * Copyright (c) 1992, 1993, 1994
5
+ *	The Regents of the University of California.  All rights reserved.
6
+ *
7
+ * This code is derived from software contributed to Berkeley by
8
+ * Henry Spencer.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ *    notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ *    notice, this list of conditions and the following disclaimer in the
17
+ *    documentation and/or other materials provided with the distribution.
18
+ * 3. Neither the name of the University nor the names of its contributors
19
+ *    may be used to endorse or promote products derived from this software
20
+ *    without specific prior written permission.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
+ * SUCH DAMAGE.
33
+ *
34
+ *	@(#)regex2.h	8.4 (Berkeley) 3/20/94
35
+ */
36
+
37
+/*
38
+ * internals of regex_t
39
+ */
40
+#define	MAGIC1	((('r'^0200)<<8) | 'e')
41
+
42
+/*
43
+ * The internal representation is a *strip*, a sequence of
44
+ * operators ending with an endmarker.  (Some terminology etc. is a
45
+ * historical relic of earlier versions which used multiple strips.)
46
+ * Certain oddities in the representation are there to permit running
47
+ * the machinery backwards; in particular, any deviation from sequential
48
+ * flow must be marked at both its source and its destination.  Some
49
+ * fine points:
50
+ *
51
+ * - OPLUS_ and O_PLUS are *inside* the loop they create.
52
+ * - OQUEST_ and O_QUEST are *outside* the bypass they create.
53
+ * - OCH_ and O_CH are *outside* the multi-way branch they create, while
54
+ *   OOR1 and OOR2 are respectively the end and the beginning of one of
55
+ *   the branches.  Note that there is an implicit OOR2 following OCH_
56
+ *   and an implicit OOR1 preceding O_CH.
57
+ *
58
+ * In state representations, an operator's bit is on to signify a state
59
+ * immediately *preceding* "execution" of that operator.
60
+ */
61
+typedef unsigned long sop;	/* strip operator */
62
+typedef long sopno;
63
+#define	OPRMASK	0xf8000000LU
64
+#define	OPDMASK	0x07ffffffLU
65
+#define	OPSHIFT	((unsigned)27)
66
+#define	OP(n)	((n)&OPRMASK)
67
+#define	OPND(n)	((n)&OPDMASK)
68
+#define	SOP(op, opnd)	((op)|(opnd))
69
+/* operators			   meaning	operand			*/
70
+/*						(back, fwd are offsets)	*/
71
+#define	OEND	(1LU<<OPSHIFT)	/* endmarker	-			*/
72
+#define	OCHAR	(2LU<<OPSHIFT)	/* character	unsigned char		*/
73
+#define	OBOL	(3LU<<OPSHIFT)	/* left anchor	-			*/
74
+#define	OEOL	(4LU<<OPSHIFT)	/* right anchor	-			*/
75
+#define	OANY	(5LU<<OPSHIFT)	/* .		-			*/
76
+#define	OANYOF	(6LU<<OPSHIFT)	/* [...]	set number		*/
77
+#define	OBACK_	(7LU<<OPSHIFT)	/* begin \d	paren number		*/
78
+#define	O_BACK	(8LU<<OPSHIFT)	/* end \d	paren number		*/
79
+#define	OPLUS_	(9LU<<OPSHIFT)	/* + prefix	fwd to suffix		*/
80
+#define	O_PLUS	(10LU<<OPSHIFT)	/* + suffix	back to prefix		*/
81
+#define	OQUEST_	(11LU<<OPSHIFT)	/* ? prefix	fwd to suffix		*/
82
+#define	O_QUEST	(12LU<<OPSHIFT)	/* ? suffix	back to prefix		*/
83
+#define	OLPAREN	(13LU<<OPSHIFT)	/* (		fwd to )		*/
84
+#define	ORPAREN	(14LU<<OPSHIFT)	/* )		back to (		*/
85
+#define	OCH_	(15LU<<OPSHIFT)	/* begin choice	fwd to OOR2		*/
86
+#define	OOR1	(16LU<<OPSHIFT)	/* | pt. 1	back to OOR1 or OCH_	*/
87
+#define	OOR2	(17LU<<OPSHIFT)	/* | pt. 2	fwd to OOR2 or O_CH	*/
88
+#define	O_CH	(18LU<<OPSHIFT)	/* end choice	back to OOR1		*/
89
+#define	OBOW	(19LU<<OPSHIFT)	/* begin word	-			*/
90
+#define	OEOW	(20LU<<OPSHIFT)	/* end word	-			*/
91
+
92
+/*
93
+ * Structure for [] character-set representation.  Character sets are
94
+ * done as bit vectors, grouped 8 to a byte vector for compactness.
95
+ * The individual set therefore has both a pointer to the byte vector
96
+ * and a mask to pick out the relevant bit of each byte.  A hash code
97
+ * simplifies testing whether two sets could be identical.
98
+ *
99
+ * This will get trickier for multicharacter collating elements.  As
100
+ * preliminary hooks for dealing with such things, we also carry along
101
+ * a string of multi-character elements, and decide the size of the
102
+ * vectors at run time.
103
+ */
104
+typedef struct {
105
+	uch *ptr;		/* -> uch [csetsize] */
106
+	uch mask;		/* bit within array */
107
+	uch hash;		/* hash code */
108
+	size_t smultis;
109
+	char *multis;		/* -> char[smulti]  ab\0cd\0ef\0\0 */
110
+} cset;
111
+/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
112
+#define	CHadd(cs, c)	((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
113
+#define	CHsub(cs, c)	((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
114
+#define	CHIN(cs, c)	((cs)->ptr[(uch)(c)] & (cs)->mask)
115
+#define	MCadd(p, cs, cp)	mcadd(p, cs, cp)	/* cli_regcomp() internal fns */
116
+#define	MCsub(p, cs, cp)	mcsub(p, cs, cp)
117
+#define	MCin(p, cs, cp)	mcin(p, cs, cp)
118
+
119
+/* stuff for character categories */
120
+typedef unsigned char cat_t;
121
+
122
+/*
123
+ * main compiled-expression structure
124
+ */
125
+struct re_guts {
126
+	int magic;
127
+#		define	MAGIC2	((('R'^0200)<<8)|'E')
128
+	sop *strip;		/* malloced area for strip */
129
+	int csetsize;		/* number of bits in a cset vector */
130
+	int ncsets;		/* number of csets in use */
131
+	cset *sets;		/* -> cset [ncsets] */
132
+	uch *setbits;		/* -> uch[csetsize][ncsets/CHAR_BIT] */
133
+	int cflags;		/* copy of cli_regcomp() cflags argument */
134
+	sopno nstates;		/* = number of sops */
135
+	sopno firststate;	/* the initial OEND (normally 0) */
136
+	sopno laststate;	/* the final OEND */
137
+	int iflags;		/* internal flags */
138
+#		define	USEBOL	01	/* used ^ */
139
+#		define	USEEOL	02	/* used $ */
140
+#		define	BAD	04	/* something wrong */
141
+	int nbol;		/* number of ^ used */
142
+	int neol;		/* number of $ used */
143
+	int ncategories;	/* how many character categories */
144
+	cat_t *categories;	/* ->catspace[-CHAR_MIN] */
145
+	char *must;		/* match must contain this string */
146
+	int mlen;		/* length of must */
147
+	size_t nsub;		/* copy of re_nsub */
148
+	int backrefs;		/* does it use back references? */
149
+	sopno nplus;		/* how deep does it nest +s? */
150
+	/* catspace must be last */
151
+	cat_t catspace[1];	/* actually [NC] */
152
+};
153
+
154
+/* misc utilities */
155
+#define	OUT	(CHAR_MAX+1)	/* a non-character value */
156
+#define	ISWORD(c)	(isalnum(c) || (c) == '_')
0 157
new file mode 100644
... ...
@@ -0,0 +1,162 @@
0
+/*-
1
+ * This code is derived from OpenBSD's libc/regex, original license follows:
2
+ *
3
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4
+ * Copyright (c) 1992, 1993, 1994
5
+ *	The Regents of the University of California.  All rights reserved.
6
+ *
7
+ * This code is derived from software contributed to Berkeley by
8
+ * Henry Spencer.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ *    notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ *    notice, this list of conditions and the following disclaimer in the
17
+ *    documentation and/or other materials provided with the distribution.
18
+ * 3. Neither the name of the University nor the names of its contributors
19
+ *    may be used to endorse or promote products derived from this software
20
+ *    without specific prior written permission.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
+ * SUCH DAMAGE.
33
+ *
34
+ *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
35
+ */
36
+
37
+/*
38
+ * the outer shell of cli_regexec()
39
+ *
40
+ * This file includes engine.c *twice*, after muchos fiddling with the
41
+ * macros that code uses.  This lets the same code operate on two different
42
+ * representations for state sets.
43
+ */
44
+#include <sys/types.h>
45
+#include <stdio.h>
46
+#include <stdlib.h>
47
+#include <string.h>
48
+#include <limits.h>
49
+#include <ctype.h>
50
+#include "others.h"
51
+#include "regex.h"
52
+
53
+#include "utils.h"
54
+#include "regex2.h"
55
+
56
+/* macros for manipulating states, small version */
57
+#define	states	long
58
+#define	states1	states		/* for later use in cli_regexec() decision */
59
+#define	CLEAR(v)	((v) = 0)
60
+#define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
61
+#define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
62
+#define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
63
+#define	ASSIGN(d, s)	((d) = (s))
64
+#define	EQ(a, b)	((a) == (b))
65
+#define	STATEVARS	long dummy	/* dummy version */
66
+#define	STATESETUP(m, n)	/* nothing */
67
+#define	STATETEARDOWN(m)	/* nothing */
68
+#define	SETUP(v)	((v) = 0)
69
+#define	onestate	long
70
+#define	INIT(o, n)	((o) = (unsigned long)1 << (n))
71
+#define	INC(o)		((o) <<= 1)
72
+#define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
73
+/* some abbreviations; note that some of these know variable names! */
74
+/* do "if I'm here, I can also be there" etc without branches */
75
+#define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
76
+#define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
77
+#define	ISSETBACK(v, n)		(((v) & ((unsigned long)here >> (n))) != 0)
78
+/* function names */
79
+#define SNAMES			/* engine.c looks after details */
80
+
81
+#include "engine.c"
82
+
83
+/* now undo things */
84
+#undef	states
85
+#undef	CLEAR
86
+#undef	SET0
87
+#undef	SET1
88
+#undef	ISSET
89
+#undef	ASSIGN
90
+#undef	EQ
91
+#undef	STATEVARS
92
+#undef	STATESETUP
93
+#undef	STATETEARDOWN
94
+#undef	SETUP
95
+#undef	onestate
96
+#undef	INIT
97
+#undef	INC
98
+#undef	ISSTATEIN
99
+#undef	FWD
100
+#undef	BACK
101
+#undef	ISSETBACK
102
+#undef	SNAMES
103
+
104
+/* macros for manipulating states, large version */
105
+#define	states	char *
106
+#define	CLEAR(v)	memset(v, 0, m->g->nstates)
107
+#define	SET0(v, n)	((v)[n] = 0)
108
+#define	SET1(v, n)	((v)[n] = 1)
109
+#define	ISSET(v, n)	((v)[n])
110
+#define	ASSIGN(d, s)	memmove(d, s, m->g->nstates)
111
+#define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
112
+#define	STATEVARS	long vn; char *space
113
+#define	STATESETUP(m, nv)	{ (m)->space = cli_malloc((nv)*(m)->g->nstates); \
114
+				if ((m)->space == NULL) return(REG_ESPACE); \
115
+				(m)->vn = 0; }
116
+#define	STATETEARDOWN(m)	{ free((m)->space); }
117
+#define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
118
+#define	onestate	long
119
+#define	INIT(o, n)	((o) = (n))
120
+#define	INC(o)	((o)++)
121
+#define	ISSTATEIN(v, o)	((v)[o])
122
+/* some abbreviations; note that some of these know variable names! */
123
+/* do "if I'm here, I can also be there" etc without branches */
124
+#define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
125
+#define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
126
+#define	ISSETBACK(v, n)	((v)[here - (n)])
127
+/* function names */
128
+#define	LNAMES			/* flag */
129
+
130
+#include "engine.c"
131
+
132
+/*
133
+ - cli_regexec - interface for matching
134
+ *
135
+ * We put this here so we can exploit knowledge of the state representation
136
+ * when choosing which matcher to call.  Also, by this point the matchers
137
+ * have been prototyped.
138
+ */
139
+int				/* 0 success, REG_NOMATCH failure */
140
+cli_regexec(const regex_t *preg, const char *string, size_t nmatch,
141
+    regmatch_t pmatch[], int eflags)
142
+{
143
+	struct re_guts *g = preg->re_g;
144
+#ifdef REDEBUG
145
+#	define	GOODFLAGS(f)	(f)
146
+#else
147
+#	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
148
+#endif
149
+
150
+	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
151
+		return(REG_BADPAT);
152
+	assert(!(g->iflags&BAD));
153
+	if (g->iflags&BAD)		/* backstop for no-debug case */
154
+		return(REG_BADPAT);
155
+	eflags = GOODFLAGS(eflags);
156
+
157
+	if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
158
+		return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
159
+	else
160
+		return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
161
+}
0 162
new file mode 100644
... ...
@@ -0,0 +1,73 @@
0
+/*-
1
+ * This code is derived from OpenBSD's libc/regex, original license follows:
2
+ *
3
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4
+ * Copyright (c) 1992, 1993, 1994
5
+ *	The Regents of the University of California.  All rights reserved.
6
+ *
7
+ * This code is derived from software contributed to Berkeley by
8
+ * Henry Spencer.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ *    notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ *    notice, this list of conditions and the following disclaimer in the
17
+ *    documentation and/or other materials provided with the distribution.
18
+ * 3. Neither the name of the University nor the names of its contributors
19
+ *    may be used to endorse or promote products derived from this software
20
+ *    without specific prior written permission.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
+ * SUCH DAMAGE.
33
+ *
34
+ *	@(#)regfree.c	8.3 (Berkeley) 3/20/94
35
+ */
36
+
37
+#include <sys/types.h>
38
+#include <stdio.h>
39
+#include <stdlib.h>
40
+#include "others.h"
41
+#include "regex.h"
42
+
43
+#include "utils.h"
44
+#include "regex2.h"
45
+
46
+/*
47
+ - cli_regfree - free everything
48
+ */
49
+void
50
+cli_regfree(regex_t *preg)
51
+{
52
+	struct re_guts *g;
53
+
54
+	if (preg->re_magic != MAGIC1)	/* oops */
55
+		return;			/* nice to complain, but hard */
56
+
57
+	g = preg->re_g;
58
+	if (g == NULL || g->magic != MAGIC2)	/* oops again */
59
+		return;
60
+	preg->re_magic = 0;		/* mark it invalid */
61
+	g->magic = 0;			/* mark it invalid */
62
+
63
+	if (g->strip != NULL)
64
+		free((char *)g->strip);
65
+	if (g->sets != NULL)
66
+		free((char *)g->sets);
67
+	if (g->setbits != NULL)
68
+		free((char *)g->setbits);
69
+	if (g->must != NULL)
70
+		free(g->must);
71
+	free((char *)g);
72
+}
0 73
new file mode 100644
... ...
@@ -0,0 +1,52 @@
0
+/*
1
+ * This code is derived from OpenBSD's libc, original license follows:
2
+ *
3
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
4
+ *
5
+ * Permission to use, copy, modify, and distribute this software for any
6
+ * purpose with or without fee is hereby granted, provided that the above
7
+ * copyright notice and this permission notice appear in all copies.
8
+ *
9
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
+ */
17
+
18
+#include <sys/types.h>
19
+#include <string.h>
20
+
21
+#include "regex.h"
22
+/*
23
+ * Copy src to string dst of size siz.  At most siz-1 characters
24
+ * will be copied.  Always NUL terminates (unless siz == 0).
25
+ * Returns strlen(src); if retval >= siz, truncation occurred.
26
+ */
27
+size_t
28
+cli_strlcpy(char *dst, const char *src, size_t siz)
29
+{
30
+	char *d = dst;
31
+	const char *s = src;
32
+	size_t n = siz;
33
+
34
+	/* Copy as many bytes as will fit */
35
+	if (n != 0) {
36
+		while (--n != 0) {
37
+			if ((*d++ = *s++) == '\0')
38
+				break;
39
+		}
40
+	}
41
+
42
+	/* Not enough room in dst, add NUL and traverse rest of src */
43
+	if (n == 0) {
44
+		if (siz != 0)
45
+			*d = '\0';		/* NUL-terminate dst */
46
+		while (*s++)
47
+			;
48
+	}
49
+
50
+	return(s - src - 1);	/* count does not include NUL */
51
+}
0 52
new file mode 100644
... ...
@@ -0,0 +1,59 @@
0
+/*-
1
+ * This code is derived from OpenBSD's libc/regex, original license follows:
2
+ *
3
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
4
+ * Copyright (c) 1992, 1993, 1994
5
+ *	The Regents of the University of California.  All rights reserved.
6
+ *
7
+ * This code is derived from software contributed to Berkeley by
8
+ * Henry Spencer.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ *    notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ *    notice, this list of conditions and the following disclaimer in the
17
+ *    documentation and/or other materials provided with the distribution.
18
+ * 3. Neither the name of the University nor the names of its contributors
19
+ *    may be used to endorse or promote products derived from this software
20
+ *    without specific prior written permission.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
+ * SUCH DAMAGE.
33
+ *
34
+ *	@(#)utils.h	8.3 (Berkeley) 3/20/94
35
+ */
36
+
37
+/* utility definitions */
38
+#define	DUPMAX		_POSIX2_RE_DUP_MAX	/* xxx is this right? */
39
+#define	INFINITY	(DUPMAX + 1)
40
+#define	NC		(CHAR_MAX - CHAR_MIN + 1)
41
+typedef unsigned char uch;
42
+
43
+/* switch off assertions (if not already off) if no REDEBUG */
44
+#ifdef CL_DEBUG
45
+#define REDEBUG
46
+#endif
47
+
48
+#ifndef REDEBUG
49
+#ifndef NDEBUG
50
+#define	NDEBUG	/* no assertions please */
51
+#endif
52
+#endif
53
+#include <assert.h>
54
+
55
+/* for old systems with bcopy() but no memmove() */
56
+#ifdef USEBCOPY
57
+#define	memmove(d, s, c)	bcopy(s, d, c)
58
+#endif
... ...
@@ -52,9 +52,7 @@
52 52
 #include <limits.h>
53 53
 #include <sys/types.h>
54 54
 
55
-#ifdef	HAVE_REGEX_H
56
-#include <regex.h>
57
-#endif
55
+#include "regex/regex.h"
58 56
 
59 57
 
60 58
 #include "clamav.h"
... ...
@@ -357,7 +355,6 @@ static struct tree_node* stack_pop(struct node_stack* stack)
357 357
 }
358 358
 
359 359
 /* Initialization & loading */
360
-
361 360
 /* Initializes @matcher, allocating necesarry substructures */
362 361
 int init_regex_list(struct regex_matcher* matcher)
363 362
 {
... ...
@@ -1194,7 +1191,7 @@ static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,co
1194 1194
 							 preg=cli_malloc(sizeof(*preg));
1195 1195
 							 if(!preg)
1196 1196
 								 return CL_EMEM;
1197
-							 rc = regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL));
1197
+							 rc = cli_regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL));
1198 1198
 							 leaf->preg=preg;
1199 1199
 							 if(rc)
1200 1200
 								 return rc;
... ...
@@ -1275,7 +1272,7 @@ static int match_node(struct tree_node* node,const unsigned char* c,size_t len,c
1275 1275
 				const struct leaf_info* leaf = node->u.leaf;
1276 1276
 				/*isleaf = 1;*/
1277 1277
 				if(leaf->preg) {
1278
-					rc = !regexec(leaf->preg,(const char*)c,0,NULL,0);
1278
+					rc = !cli_regexec(leaf->preg,(const char*)c,0,NULL,0);
1279 1279
 				}
1280 1280
 				else  {
1281 1281
 					massert(*c==node->c && "We know this has to match[2]");
... ...
@@ -1394,7 +1391,7 @@ static void destroy_tree_internal(struct regex_matcher* matcher,struct tree_node
1394 1394
 		stack_push_once(&matcher->node_stack,(struct tree_node*)node->u.leaf);/* cast to make compiler happy, and to not make another stack implementation for storing void* */
1395 1395
 		stack_push_once(&matcher->node_stack,node);
1396 1396
 		if(leaf->preg) {
1397
-			regfree(leaf->preg);
1397
+			cli_regfree(leaf->preg);
1398 1398
 			free(leaf->preg);
1399 1399
 			leaf->preg=NULL;
1400 1400
 		}