git-svn: trunk@3225
Török Edvin authored on 2007/09/18 03:54:56... | ... |
@@ -1,3 +1,10 @@ |
1 |
+Mon Sep 17 21:06:59 EEST 2007(edwin) |
|
2 |
+------------------------------------ |
|
3 |
+ * libclamav/regex/: add regcomp(), regexec() impl. from OpenBSD's libc. |
|
4 |
+ This code is licensed under the 3-clause BSD. |
|
5 |
+ This will be used instead of system provided regexec()/regcomp() to |
|
6 |
+ have consistent behaviour across platforms. |
|
7 |
+ |
|
1 | 8 |
Mon Sep 17 17:12:27 BST 2007 (njh) |
2 | 9 |
---------------------------------- |
3 | 10 |
* libclamav/mbox.c: Bugs 665/667 |
... | ... |
@@ -19993,8 +19993,7 @@ fi |
19993 | 19993 |
|
19994 | 19994 |
|
19995 | 19995 |
|
19996 |
- |
|
19997 |
-for ac_header in stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h regex.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h |
|
19996 |
+for ac_header in stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h |
|
19998 | 19997 |
do |
19999 | 19998 |
as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` |
20000 | 19999 |
if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then |
... | ... |
@@ -40,7 +40,7 @@ AC_DEFINE(SCANBUFF, 131072, [scan buffer size]) |
40 | 40 |
AC_DEFINE(FILEBUFF, 8192, [file i/o buffer size]) |
41 | 41 |
|
42 | 42 |
AC_HEADER_STDC |
43 |
-AC_CHECK_HEADERS(stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h regex.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h) |
|
43 |
+AC_CHECK_HEADERS(stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h limits.h sys/filio.h sys/uio.h termios.h iconv.h stdbool.h pwd.h grp.h) |
|
44 | 44 |
AC_CHECK_HEADER(syslog.h,AC_DEFINE(USE_SYSLOG,1,[use syslog]),) |
45 | 45 |
|
46 | 46 |
AC_TYPE_OFF_T |
... | ... |
@@ -110,6 +110,11 @@ libclamav_la_SOURCES = \ |
110 | 110 |
is_tar.h \ |
111 | 111 |
tnef.c \ |
112 | 112 |
tnef.h \ |
113 |
+ regex/strlcpy.c \ |
|
114 |
+ regex/regcomp.c \ |
|
115 |
+ regex/regerror.c \ |
|
116 |
+ regex/regexec.c \ |
|
117 |
+ regex/regfree.c \ |
|
113 | 118 |
unrar/unrar15.c \ |
114 | 119 |
unrar/unrar20.h \ |
115 | 120 |
unrar/unrarcmd.h \ |
... | ... |
@@ -58,7 +58,7 @@ host_triplet = @host@ |
58 | 58 |
target_triplet = @target@ |
59 | 59 |
subdir = libclamav |
60 | 60 |
DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \ |
61 |
- $(srcdir)/Makefile.in |
|
61 |
+ $(srcdir)/Makefile.in COPYING |
|
62 | 62 |
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 |
63 | 63 |
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ |
64 | 64 |
$(top_srcdir)/configure.in |
... | ... |
@@ -84,10 +84,11 @@ am_libclamav_la_OBJECTS = matcher-ac.lo matcher-bm.lo matcher.lo \ |
84 | 84 |
pe.lo upx.lo htmlnorm.lo chmunpack.lo rebuildpe.lo petite.lo \ |
85 | 85 |
wwunpack.lo unsp.lo aspack.lo packlibs.lo fsg.lo mew.lo \ |
86 | 86 |
upack.lo line.lo untar.lo unzip.lo special.lo binhex.lo \ |
87 |
- is_tar.lo tnef.lo unrar15.lo unrarvm.lo unrar.lo \ |
|
88 |
- unrarfilter.lo unrarppm.lo unrar20.lo unrarcmd.lo unarj.lo \ |
|
89 |
- LZMADecode.lo bzlib.lo infblock.lo nulsft.lo pdf.lo spin.lo \ |
|
90 |
- yc.lo elf.lo sis.lo uuencode.lo pst.lo phishcheck.lo \ |
|
87 |
+ is_tar.lo tnef.lo strlcpy.lo regcomp.lo regerror.lo regexec.lo \ |
|
88 |
+ regfree.lo unrar15.lo unrarvm.lo unrar.lo unrarfilter.lo \ |
|
89 |
+ unrarppm.lo unrar20.lo unrarcmd.lo unarj.lo LZMADecode.lo \ |
|
90 |
+ bzlib.lo infblock.lo nulsft.lo pdf.lo spin.lo yc.lo elf.lo \ |
|
91 |
+ sis.lo uuencode.lo pst.lo phishcheck.lo \ |
|
91 | 92 |
phish_domaincheck_db.lo phish_whitelist.lo regex_list.lo \ |
92 | 93 |
sha256.lo mspack.lo cab.lo entconv.lo hashtab.lo dconf.lo \ |
93 | 94 |
lockdb.lo |
... | ... |
@@ -325,6 +326,11 @@ libclamav_la_SOURCES = \ |
325 | 325 |
is_tar.h \ |
326 | 326 |
tnef.c \ |
327 | 327 |
tnef.h \ |
328 |
+ regex/strlcpy.c \ |
|
329 |
+ regex/regcomp.c \ |
|
330 |
+ regex/regerror.c \ |
|
331 |
+ regex/regexec.c \ |
|
332 |
+ regex/regfree.c \ |
|
328 | 333 |
unrar/unrar15.c \ |
329 | 334 |
unrar/unrar20.h \ |
330 | 335 |
unrar/unrarcmd.h \ |
... | ... |
@@ -505,7 +511,11 @@ distclean-compile: |
505 | 505 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pst.Plo@am__quote@ |
506 | 506 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readdb.Plo@am__quote@ |
507 | 507 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rebuildpe.Plo@am__quote@ |
508 |
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regcomp.Plo@am__quote@ |
|
509 |
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regerror.Plo@am__quote@ |
|
508 | 510 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regex_list.Plo@am__quote@ |
511 |
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regexec.Plo@am__quote@ |
|
512 |
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regfree.Plo@am__quote@ |
|
509 | 513 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rtf.Plo@am__quote@ |
510 | 514 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scanners.Plo@am__quote@ |
511 | 515 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256.Plo@am__quote@ |
... | ... |
@@ -514,6 +524,7 @@ distclean-compile: |
514 | 514 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/special.Plo@am__quote@ |
515 | 515 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spin.Plo@am__quote@ |
516 | 516 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str.Plo@am__quote@ |
517 |
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strlcpy.Plo@am__quote@ |
|
517 | 518 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/table.Plo@am__quote@ |
518 | 519 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/text.Plo@am__quote@ |
519 | 520 |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tnef.Plo@am__quote@ |
... | ... |
@@ -556,6 +567,41 @@ distclean-compile: |
556 | 556 |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
557 | 557 |
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< |
558 | 558 |
|
559 |
+strlcpy.lo: regex/strlcpy.c |
|
560 |
+@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT strlcpy.lo -MD -MP -MF "$(DEPDIR)/strlcpy.Tpo" -c -o strlcpy.lo `test -f 'regex/strlcpy.c' || echo '$(srcdir)/'`regex/strlcpy.c; \ |
|
561 |
+@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/strlcpy.Tpo" "$(DEPDIR)/strlcpy.Plo"; else rm -f "$(DEPDIR)/strlcpy.Tpo"; exit 1; fi |
|
562 |
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='regex/strlcpy.c' object='strlcpy.lo' libtool=yes @AMDEPBACKSLASH@ |
|
563 |
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
|
564 |
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o strlcpy.lo `test -f 'regex/strlcpy.c' || echo '$(srcdir)/'`regex/strlcpy.c |
|
565 |
+ |
|
566 |
+regcomp.lo: regex/regcomp.c |
|
567 |
+@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regcomp.lo -MD -MP -MF "$(DEPDIR)/regcomp.Tpo" -c -o regcomp.lo `test -f 'regex/regcomp.c' || echo '$(srcdir)/'`regex/regcomp.c; \ |
|
568 |
+@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/regcomp.Tpo" "$(DEPDIR)/regcomp.Plo"; else rm -f "$(DEPDIR)/regcomp.Tpo"; exit 1; fi |
|
569 |
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='regex/regcomp.c' object='regcomp.lo' libtool=yes @AMDEPBACKSLASH@ |
|
570 |
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
|
571 |
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regcomp.lo `test -f 'regex/regcomp.c' || echo '$(srcdir)/'`regex/regcomp.c |
|
572 |
+ |
|
573 |
+regerror.lo: regex/regerror.c |
|
574 |
+@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regerror.lo -MD -MP -MF "$(DEPDIR)/regerror.Tpo" -c -o regerror.lo `test -f 'regex/regerror.c' || echo '$(srcdir)/'`regex/regerror.c; \ |
|
575 |
+@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/regerror.Tpo" "$(DEPDIR)/regerror.Plo"; else rm -f "$(DEPDIR)/regerror.Tpo"; exit 1; fi |
|
576 |
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='regex/regerror.c' object='regerror.lo' libtool=yes @AMDEPBACKSLASH@ |
|
577 |
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
|
578 |
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regerror.lo `test -f 'regex/regerror.c' || echo '$(srcdir)/'`regex/regerror.c |
|
579 |
+ |
|
580 |
+regexec.lo: regex/regexec.c |
|
581 |
+@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regexec.lo -MD -MP -MF "$(DEPDIR)/regexec.Tpo" -c -o regexec.lo `test -f 'regex/regexec.c' || echo '$(srcdir)/'`regex/regexec.c; \ |
|
582 |
+@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/regexec.Tpo" "$(DEPDIR)/regexec.Plo"; else rm -f "$(DEPDIR)/regexec.Tpo"; exit 1; fi |
|
583 |
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='regex/regexec.c' object='regexec.lo' libtool=yes @AMDEPBACKSLASH@ |
|
584 |
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
|
585 |
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regexec.lo `test -f 'regex/regexec.c' || echo '$(srcdir)/'`regex/regexec.c |
|
586 |
+ |
|
587 |
+regfree.lo: regex/regfree.c |
|
588 |
+@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT regfree.lo -MD -MP -MF "$(DEPDIR)/regfree.Tpo" -c -o regfree.lo `test -f 'regex/regfree.c' || echo '$(srcdir)/'`regex/regfree.c; \ |
|
589 |
+@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/regfree.Tpo" "$(DEPDIR)/regfree.Plo"; else rm -f "$(DEPDIR)/regfree.Tpo"; exit 1; fi |
|
590 |
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='regex/regfree.c' object='regfree.lo' libtool=yes @AMDEPBACKSLASH@ |
|
591 |
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
|
592 |
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o regfree.lo `test -f 'regex/regfree.c' || echo '$(srcdir)/'`regex/regfree.c |
|
593 |
+ |
|
559 | 594 |
unrar15.lo: unrar/unrar15.c |
560 | 595 |
@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unrar15.lo -MD -MP -MF "$(DEPDIR)/unrar15.Tpo" -c -o unrar15.lo `test -f 'unrar/unrar15.c' || echo '$(srcdir)/'`unrar/unrar15.c; \ |
561 | 596 |
@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/unrar15.Tpo" "$(DEPDIR)/unrar15.Plo"; else rm -f "$(DEPDIR)/unrar15.Tpo"; exit 1; fi |
... | ... |
@@ -353,17 +353,17 @@ static int build_regex(regex_t* preg,const char* regex,int nosub) |
353 | 353 |
{ |
354 | 354 |
int rc; |
355 | 355 |
cli_dbgmsg("Phishcheck: Compiling regex: %s\n",regex); |
356 |
- rc = regcomp(preg,regex,REG_EXTENDED|REG_ICASE|(nosub ? REG_NOSUB :0)); |
|
356 |
+ rc = cli_regcomp(preg,regex,REG_EXTENDED|REG_ICASE|(nosub ? REG_NOSUB :0)); |
|
357 | 357 |
if(rc) { |
358 | 358 |
|
359 | 359 |
#ifdef C_WINDOWS |
360 | 360 |
cli_errmsg("Phishcheck: Error in compiling regex, disabling phishing checks\n"); |
361 | 361 |
#else |
362 |
- size_t buflen = regerror(rc,preg,NULL,0); |
|
362 |
+ size_t buflen = cli_regerror(rc,preg,NULL,0); |
|
363 | 363 |
char *errbuf = cli_malloc(buflen); |
364 | 364 |
|
365 | 365 |
if(errbuf) { |
366 |
- regerror(rc,preg,errbuf,buflen); |
|
366 |
+ cli_regerror(rc,preg,errbuf,buflen); |
|
367 | 367 |
cli_errmsg("Phishcheck: Error in compiling regex:%s\nDisabling phishing checks\n",errbuf); |
368 | 368 |
free(errbuf); |
369 | 369 |
} else |
... | ... |
@@ -446,7 +446,7 @@ static int get_host(const struct phishcheck* s,struct string* dest,const char* U |
446 | 446 |
|
447 | 447 |
static int isCountryCode(const struct phishcheck* s,const char* str) |
448 | 448 |
{ |
449 |
- return str ? !regexec(&s->preg_cctld,str,0,NULL,0) : 0; |
|
449 |
+ return str ? !cli_regexec(&s->preg_cctld,str,0,NULL,0) : 0; |
|
450 | 450 |
} |
451 | 451 |
|
452 | 452 |
static int isTLD(const struct phishcheck* pchk,const char* str,int len) |
... | ... |
@@ -461,7 +461,7 @@ static int isTLD(const struct phishcheck* pchk,const char* str,int len) |
461 | 461 |
return CL_EMEM; |
462 | 462 |
strncpy(s,str,len); |
463 | 463 |
s[len]='\0'; |
464 |
- rc = !regexec(&pchk->preg_tld,s,0,NULL,0); |
|
464 |
+ rc = !cli_regexec(&pchk->preg_tld,s,0,NULL,0); |
|
465 | 465 |
free(s); |
466 | 466 |
return rc ? 1 : 0; |
467 | 467 |
} |
... | ... |
@@ -880,7 +880,7 @@ static char hex2int(const unsigned char* src) |
880 | 880 |
static void free_regex(regex_t* p) |
881 | 881 |
{ |
882 | 882 |
if(p) { |
883 |
- regfree(p); |
|
883 |
+ cli_regfree(p); |
|
884 | 884 |
} |
885 | 885 |
} |
886 | 886 |
|
... | ... |
@@ -977,12 +977,12 @@ void phishing_done(struct cl_engine* engine) |
977 | 977 |
*/ |
978 | 978 |
static int isURL(const struct phishcheck* pchk,const char* URL) |
979 | 979 |
{ |
980 |
- return URL ? !regexec(&pchk->preg,URL,0,NULL,0) : 0; |
|
980 |
+ return URL ? !cli_regexec(&pchk->preg,URL,0,NULL,0) : 0; |
|
981 | 981 |
} |
982 | 982 |
|
983 | 983 |
static int isNumericURL(const struct phishcheck* pchk,const char* URL) |
984 | 984 |
{ |
985 |
- return URL ? !regexec(&pchk->preg_numeric,URL,0,NULL,0) : 0; |
|
985 |
+ return URL ? !cli_regexec(&pchk->preg_numeric,URL,0,NULL,0) : 0; |
|
986 | 986 |
} |
987 | 987 |
|
988 | 988 |
/* Cleans up @urls |
... | ... |
@@ -1013,7 +1013,7 @@ static int url_get_host(const struct phishcheck* pchk, struct url_check* url,str |
1013 | 1013 |
string_free(host); |
1014 | 1014 |
return CL_PHISH_TEXTURL; |
1015 | 1015 |
} |
1016 |
- if(url->flags&CHECK_CLOAKING && !regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) { |
|
1016 |
+ if(url->flags&CHECK_CLOAKING && !cli_regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) { |
|
1017 | 1017 |
/* uses a regex here, so that we don't accidentally block 0xacab.net style hosts */ |
1018 | 1018 |
string_free(host); |
1019 | 1019 |
return CL_PHISH_HEX_URL; |
... | ... |
@@ -20,9 +20,7 @@ |
20 | 20 |
#ifndef _PHISH_CHECK_H |
21 | 21 |
#define _PHISH_CHECK_H |
22 | 22 |
|
23 |
-#ifdef HAVE_REGEX_H |
|
24 |
-#include <regex.h> |
|
25 |
-#endif |
|
23 |
+#include "regex/regex.h" |
|
26 | 24 |
|
27 | 25 |
#define CL_PHISH_BASE 100 |
28 | 26 |
enum phish_status {CL_PHISH_NODECISION=0,CL_PHISH_CLEAN=CL_PHISH_BASE, CL_PHISH_CLEANUP_OK,CL_PHISH_HOST_OK, CL_PHISH_DOMAIN_OK, |
29 | 27 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,68 @@ |
0 |
+/*- |
|
1 |
+ * This code is derived from OpenBSD's libc/regex, original license follows: |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
|
4 |
+ * Copyright (c) 1992, 1993, 1994 |
|
5 |
+ * The Regents of the University of California. All rights reserved. |
|
6 |
+ * |
|
7 |
+ * This code is derived from software contributed to Berkeley by |
|
8 |
+ * Henry Spencer. |
|
9 |
+ * |
|
10 |
+ * Redistribution and use in source and binary forms, with or without |
|
11 |
+ * modification, are permitted provided that the following conditions |
|
12 |
+ * are met: |
|
13 |
+ * 1. Redistributions of source code must retain the above copyright |
|
14 |
+ * notice, this list of conditions and the following disclaimer. |
|
15 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
16 |
+ * notice, this list of conditions and the following disclaimer in the |
|
17 |
+ * documentation and/or other materials provided with the distribution. |
|
18 |
+ * 3. Neither the name of the University nor the names of its contributors |
|
19 |
+ * may be used to endorse or promote products derived from this software |
|
20 |
+ * without specific prior written permission. |
|
21 |
+ * |
|
22 |
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
23 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
24 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
25 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
26 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
27 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
28 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
29 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
30 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
31 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
32 |
+ * SUCH DAMAGE. |
|
33 |
+ * |
|
34 |
+ * @(#)cclass.h 8.3 (Berkeley) 3/20/94 |
|
35 |
+ */ |
|
36 |
+ |
|
37 |
+/* character-class table */ |
|
38 |
+static struct cclass { |
|
39 |
+ const char *name; |
|
40 |
+ const char *chars; |
|
41 |
+ const char *multis; |
|
42 |
+} cclasses[] = { |
|
43 |
+ { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ |
|
44 |
+0123456789", ""} , |
|
45 |
+ { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", |
|
46 |
+ ""} , |
|
47 |
+ { "blank", " \t", ""} , |
|
48 |
+ { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ |
|
49 |
+\25\26\27\30\31\32\33\34\35\36\37\177", ""} , |
|
50 |
+ { "digit", "0123456789", ""} , |
|
51 |
+ { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ |
|
52 |
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", |
|
53 |
+ ""} , |
|
54 |
+ { "lower", "abcdefghijklmnopqrstuvwxyz", |
|
55 |
+ ""} , |
|
56 |
+ { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ |
|
57 |
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", |
|
58 |
+ ""} , |
|
59 |
+ { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", |
|
60 |
+ ""} , |
|
61 |
+ { "space", "\t\n\v\f\r ", ""} , |
|
62 |
+ { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", |
|
63 |
+ ""} , |
|
64 |
+ { "xdigit", "0123456789ABCDEFabcdef", |
|
65 |
+ ""} , |
|
66 |
+ { NULL, 0, "" } |
|
67 |
+}; |
0 | 68 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,139 @@ |
0 |
+/*- |
|
1 |
+ * This code is derived from OpenBSD's libc/regex, original license follows: |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
|
4 |
+ * Copyright (c) 1992, 1993, 1994 |
|
5 |
+ * The Regents of the University of California. All rights reserved. |
|
6 |
+ * |
|
7 |
+ * This code is derived from software contributed to Berkeley by |
|
8 |
+ * Henry Spencer. |
|
9 |
+ * |
|
10 |
+ * Redistribution and use in source and binary forms, with or without |
|
11 |
+ * modification, are permitted provided that the following conditions |
|
12 |
+ * are met: |
|
13 |
+ * 1. Redistributions of source code must retain the above copyright |
|
14 |
+ * notice, this list of conditions and the following disclaimer. |
|
15 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
16 |
+ * notice, this list of conditions and the following disclaimer in the |
|
17 |
+ * documentation and/or other materials provided with the distribution. |
|
18 |
+ * 3. Neither the name of the University nor the names of its contributors |
|
19 |
+ * may be used to endorse or promote products derived from this software |
|
20 |
+ * without specific prior written permission. |
|
21 |
+ * |
|
22 |
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
23 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
24 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
25 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
26 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
27 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
28 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
29 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
30 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
31 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
32 |
+ * SUCH DAMAGE. |
|
33 |
+ * |
|
34 |
+ * @(#)cname.h 8.3 (Berkeley) 3/20/94 |
|
35 |
+ */ |
|
36 |
+ |
|
37 |
+/* character-name table */ |
|
38 |
+static struct cname { |
|
39 |
+ const char *name; |
|
40 |
+ char code; |
|
41 |
+} cnames[] = { |
|
42 |
+ { "NUL", '\0' }, |
|
43 |
+ { "SOH", '\001' }, |
|
44 |
+ { "STX", '\002' }, |
|
45 |
+ { "ETX", '\003' }, |
|
46 |
+ { "EOT", '\004' }, |
|
47 |
+ { "ENQ", '\005' }, |
|
48 |
+ { "ACK", '\006' }, |
|
49 |
+ { "BEL", '\007' }, |
|
50 |
+ { "alert", '\007' }, |
|
51 |
+ { "BS", '\010' }, |
|
52 |
+ { "backspace", '\b' }, |
|
53 |
+ { "HT", '\011' }, |
|
54 |
+ { "tab", '\t' }, |
|
55 |
+ { "LF", '\012' }, |
|
56 |
+ { "newline", '\n' }, |
|
57 |
+ { "VT", '\013' }, |
|
58 |
+ { "vertical-tab", '\v' }, |
|
59 |
+ { "FF", '\014' }, |
|
60 |
+ { "form-feed", '\f' }, |
|
61 |
+ { "CR", '\015' }, |
|
62 |
+ { "carriage-return", '\r' }, |
|
63 |
+ { "SO", '\016' }, |
|
64 |
+ { "SI", '\017' }, |
|
65 |
+ { "DLE", '\020' }, |
|
66 |
+ { "DC1", '\021' }, |
|
67 |
+ { "DC2", '\022' }, |
|
68 |
+ { "DC3", '\023' }, |
|
69 |
+ { "DC4", '\024' }, |
|
70 |
+ { "NAK", '\025' }, |
|
71 |
+ { "SYN", '\026' }, |
|
72 |
+ { "ETB", '\027' }, |
|
73 |
+ { "CAN", '\030' }, |
|
74 |
+ { "EM", '\031' }, |
|
75 |
+ { "SUB", '\032' }, |
|
76 |
+ { "ESC", '\033' }, |
|
77 |
+ { "IS4", '\034' }, |
|
78 |
+ { "FS", '\034' }, |
|
79 |
+ { "IS3", '\035' }, |
|
80 |
+ { "GS", '\035' }, |
|
81 |
+ { "IS2", '\036' }, |
|
82 |
+ { "RS", '\036' }, |
|
83 |
+ { "IS1", '\037' }, |
|
84 |
+ { "US", '\037' }, |
|
85 |
+ { "space", ' ' }, |
|
86 |
+ { "exclamation-mark", '!' }, |
|
87 |
+ { "quotation-mark", '"' }, |
|
88 |
+ { "number-sign", '#' }, |
|
89 |
+ { "dollar-sign", '$' }, |
|
90 |
+ { "percent-sign", '%' }, |
|
91 |
+ { "ampersand", '&' }, |
|
92 |
+ { "apostrophe", '\'' }, |
|
93 |
+ { "left-parenthesis", '(' }, |
|
94 |
+ { "right-parenthesis", ')' }, |
|
95 |
+ { "asterisk", '*' }, |
|
96 |
+ { "plus-sign", '+' }, |
|
97 |
+ { "comma", ',' }, |
|
98 |
+ { "hyphen", '-' }, |
|
99 |
+ { "hyphen-minus", '-' }, |
|
100 |
+ { "period", '.' }, |
|
101 |
+ { "full-stop", '.' }, |
|
102 |
+ { "slash", '/' }, |
|
103 |
+ { "solidus", '/' }, |
|
104 |
+ { "zero", '0' }, |
|
105 |
+ { "one", '1' }, |
|
106 |
+ { "two", '2' }, |
|
107 |
+ { "three", '3' }, |
|
108 |
+ { "four", '4' }, |
|
109 |
+ { "five", '5' }, |
|
110 |
+ { "six", '6' }, |
|
111 |
+ { "seven", '7' }, |
|
112 |
+ { "eight", '8' }, |
|
113 |
+ { "nine", '9' }, |
|
114 |
+ { "colon", ':' }, |
|
115 |
+ { "semicolon", ';' }, |
|
116 |
+ { "less-than-sign", '<' }, |
|
117 |
+ { "equals-sign", '=' }, |
|
118 |
+ { "greater-than-sign", '>' }, |
|
119 |
+ { "question-mark", '?' }, |
|
120 |
+ { "commercial-at", '@' }, |
|
121 |
+ { "left-square-bracket", '[' }, |
|
122 |
+ { "backslash", '\\' }, |
|
123 |
+ { "reverse-solidus", '\\' }, |
|
124 |
+ { "right-square-bracket", ']' }, |
|
125 |
+ { "circumflex", '^' }, |
|
126 |
+ { "circumflex-accent", '^' }, |
|
127 |
+ { "underscore", '_' }, |
|
128 |
+ { "low-line", '_' }, |
|
129 |
+ { "grave-accent", '`' }, |
|
130 |
+ { "left-brace", '{' }, |
|
131 |
+ { "left-curly-bracket", '{' }, |
|
132 |
+ { "vertical-line", '|' }, |
|
133 |
+ { "right-brace", '}' }, |
|
134 |
+ { "right-curly-bracket", '}' }, |
|
135 |
+ { "tilde", '~' }, |
|
136 |
+ { "DEL", '\177' }, |
|
137 |
+ { NULL, 0 } |
|
138 |
+}; |
0 | 139 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,1020 @@ |
0 |
+/*- |
|
1 |
+ * This code is derived from OpenBSD's libc/regex, original license follows: |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
|
4 |
+ * Copyright (c) 1992, 1993, 1994 |
|
5 |
+ * The Regents of the University of California. All rights reserved. |
|
6 |
+ * |
|
7 |
+ * This code is derived from software contributed to Berkeley by |
|
8 |
+ * Henry Spencer. |
|
9 |
+ * |
|
10 |
+ * Redistribution and use in source and binary forms, with or without |
|
11 |
+ * modification, are permitted provided that the following conditions |
|
12 |
+ * are met: |
|
13 |
+ * 1. Redistributions of source code must retain the above copyright |
|
14 |
+ * notice, this list of conditions and the following disclaimer. |
|
15 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
16 |
+ * notice, this list of conditions and the following disclaimer in the |
|
17 |
+ * documentation and/or other materials provided with the distribution. |
|
18 |
+ * 3. Neither the name of the University nor the names of its contributors |
|
19 |
+ * may be used to endorse or promote products derived from this software |
|
20 |
+ * without specific prior written permission. |
|
21 |
+ * |
|
22 |
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
23 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
24 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
25 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
26 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
27 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
28 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
29 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
30 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
31 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
32 |
+ * SUCH DAMAGE. |
|
33 |
+ * |
|
34 |
+ * @(#)engine.c 8.5 (Berkeley) 3/20/94 |
|
35 |
+ */ |
|
36 |
+ |
|
37 |
+/* |
|
38 |
+ * The matching engine and friends. This file is #included by regexec.c |
|
39 |
+ * after suitable #defines of a variety of macros used herein, so that |
|
40 |
+ * different state representations can be used without duplicating masses |
|
41 |
+ * of code. |
|
42 |
+ */ |
|
43 |
+ |
|
44 |
+#ifdef SNAMES |
|
45 |
+#define matcher smatcher |
|
46 |
+#define fast sfast |
|
47 |
+#define slow sslow |
|
48 |
+#define dissect sdissect |
|
49 |
+#define backref sbackref |
|
50 |
+#define step sstep |
|
51 |
+#define print sprint |
|
52 |
+#define at sat |
|
53 |
+#define match smat |
|
54 |
+#define nope snope |
|
55 |
+#endif |
|
56 |
+#ifdef LNAMES |
|
57 |
+#define matcher lmatcher |
|
58 |
+#define fast lfast |
|
59 |
+#define slow lslow |
|
60 |
+#define dissect ldissect |
|
61 |
+#define backref lbackref |
|
62 |
+#define step lstep |
|
63 |
+#define print lprint |
|
64 |
+#define at lat |
|
65 |
+#define match lmat |
|
66 |
+#define nope lnope |
|
67 |
+#endif |
|
68 |
+ |
|
69 |
+/* another structure passed up and down to avoid zillions of parameters */ |
|
70 |
+struct match { |
|
71 |
+ struct re_guts *g; |
|
72 |
+ int eflags; |
|
73 |
+ regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ |
|
74 |
+ char *offp; /* offsets work from here */ |
|
75 |
+ char *beginp; /* start of string -- virtual NUL precedes */ |
|
76 |
+ char *endp; /* end of string -- virtual NUL here */ |
|
77 |
+ char *coldp; /* can be no match starting before here */ |
|
78 |
+ char **lastpos; /* [nplus+1] */ |
|
79 |
+ STATEVARS; |
|
80 |
+ states st; /* current states */ |
|
81 |
+ states fresh; /* states for a fresh start */ |
|
82 |
+ states tmp; /* temporary */ |
|
83 |
+ states empty; /* empty set of states */ |
|
84 |
+}; |
|
85 |
+ |
|
86 |
+static int matcher(struct re_guts *, char *, size_t, regmatch_t[], int); |
|
87 |
+static char *dissect(struct match *, char *, char *, sopno, sopno); |
|
88 |
+static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int); |
|
89 |
+static char *fast(struct match *, char *, char *, sopno, sopno); |
|
90 |
+static char *slow(struct match *, char *, char *, sopno, sopno); |
|
91 |
+static states step(struct re_guts *, sopno, sopno, states, int, states); |
|
92 |
+#define MAX_RECURSION 100 |
|
93 |
+#define BOL (OUT+1) |
|
94 |
+#define EOL (BOL+1) |
|
95 |
+#define BOLEOL (BOL+2) |
|
96 |
+#define NOTHING (BOL+3) |
|
97 |
+#define BOW (BOL+4) |
|
98 |
+#define EOW (BOL+5) |
|
99 |
+#define CODEMAX (BOL+5) /* highest code used */ |
|
100 |
+#define NONCHAR(c) ((c) > CHAR_MAX) |
|
101 |
+#define NNONCHAR (CODEMAX-CHAR_MAX) |
|
102 |
+#ifdef REDEBUG |
|
103 |
+static void print(struct match *, char *, states, int, FILE *); |
|
104 |
+#endif |
|
105 |
+#ifdef REDEBUG |
|
106 |
+static void at(struct match *, char *, char *, char *, sopno, sopno); |
|
107 |
+#endif |
|
108 |
+#ifdef REDEBUG |
|
109 |
+static char *pchar(int); |
|
110 |
+#endif |
|
111 |
+ |
|
112 |
+#ifdef REDEBUG |
|
113 |
+#define SP(t, s, c) print(m, t, s, c, stdout) |
|
114 |
+#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) |
|
115 |
+#define NOTE(str) { if (m->eflags®_TRACE) (void)printf("=%s\n", (str)); } |
|
116 |
+static int nope = 0; |
|
117 |
+#else |
|
118 |
+#define SP(t, s, c) /* nothing */ |
|
119 |
+#define AT(t, p1, p2, s1, s2) /* nothing */ |
|
120 |
+#define NOTE(s) /* nothing */ |
|
121 |
+#endif |
|
122 |
+ |
|
123 |
+/* |
|
124 |
+ - matcher - the actual matching engine |
|
125 |
+ */ |
|
126 |
+static int /* 0 success, REG_NOMATCH failure */ |
|
127 |
+matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], |
|
128 |
+ int eflags) |
|
129 |
+{ |
|
130 |
+ char *endp; |
|
131 |
+ size_t i; |
|
132 |
+ struct match mv; |
|
133 |
+ struct match *m = &mv; |
|
134 |
+ char *dp; |
|
135 |
+ const sopno gf = g->firststate+1; /* +1 for OEND */ |
|
136 |
+ const sopno gl = g->laststate; |
|
137 |
+ char *start; |
|
138 |
+ char *stop; |
|
139 |
+ |
|
140 |
+ /* simplify the situation where possible */ |
|
141 |
+ if (g->cflags®_NOSUB) |
|
142 |
+ nmatch = 0; |
|
143 |
+ if (eflags®_STARTEND) { |
|
144 |
+ start = string + pmatch[0].rm_so; |
|
145 |
+ stop = string + pmatch[0].rm_eo; |
|
146 |
+ } else { |
|
147 |
+ start = string; |
|
148 |
+ stop = start + strlen(start); |
|
149 |
+ } |
|
150 |
+ if (stop < start) |
|
151 |
+ return(REG_INVARG); |
|
152 |
+ |
|
153 |
+ /* prescreening; this does wonders for this rather slow code */ |
|
154 |
+ if (g->must != NULL) { |
|
155 |
+ for (dp = start; dp < stop; dp++) |
|
156 |
+ if (*dp == g->must[0] && stop - dp >= g->mlen && |
|
157 |
+ memcmp(dp, g->must, (size_t)g->mlen) == 0) |
|
158 |
+ break; |
|
159 |
+ if (dp == stop) /* we didn't find g->must */ |
|
160 |
+ return(REG_NOMATCH); |
|
161 |
+ } |
|
162 |
+ |
|
163 |
+ /* match struct setup */ |
|
164 |
+ m->g = g; |
|
165 |
+ m->eflags = eflags; |
|
166 |
+ m->pmatch = NULL; |
|
167 |
+ m->lastpos = NULL; |
|
168 |
+ m->offp = string; |
|
169 |
+ m->beginp = start; |
|
170 |
+ m->endp = stop; |
|
171 |
+ STATESETUP(m, 4); |
|
172 |
+ SETUP(m->st); |
|
173 |
+ SETUP(m->fresh); |
|
174 |
+ SETUP(m->tmp); |
|
175 |
+ SETUP(m->empty); |
|
176 |
+ CLEAR(m->empty); |
|
177 |
+ |
|
178 |
+ /* this loop does only one repetition except for backrefs */ |
|
179 |
+ for (;;) { |
|
180 |
+ endp = fast(m, start, stop, gf, gl); |
|
181 |
+ if (endp == NULL) { /* a miss */ |
|
182 |
+ free(m->pmatch); |
|
183 |
+ free(m->lastpos); |
|
184 |
+ STATETEARDOWN(m); |
|
185 |
+ return(REG_NOMATCH); |
|
186 |
+ } |
|
187 |
+ if (nmatch == 0 && !g->backrefs) |
|
188 |
+ break; /* no further info needed */ |
|
189 |
+ |
|
190 |
+ /* where? */ |
|
191 |
+ assert(m->coldp != NULL); |
|
192 |
+ for (;;) { |
|
193 |
+ NOTE("finding start"); |
|
194 |
+ endp = slow(m, m->coldp, stop, gf, gl); |
|
195 |
+ if (endp != NULL) |
|
196 |
+ break; |
|
197 |
+ assert(m->coldp < m->endp); |
|
198 |
+ m->coldp++; |
|
199 |
+ } |
|
200 |
+ if (nmatch == 1 && !g->backrefs) |
|
201 |
+ break; /* no further info needed */ |
|
202 |
+ |
|
203 |
+ /* oh my, he wants the subexpressions... */ |
|
204 |
+ if (m->pmatch == NULL) |
|
205 |
+ m->pmatch = (regmatch_t *)cli_malloc((m->g->nsub + 1) * |
|
206 |
+ sizeof(regmatch_t)); |
|
207 |
+ if (m->pmatch == NULL) { |
|
208 |
+ STATETEARDOWN(m); |
|
209 |
+ return(REG_ESPACE); |
|
210 |
+ } |
|
211 |
+ for (i = 1; i <= m->g->nsub; i++) |
|
212 |
+ m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; |
|
213 |
+ if (!g->backrefs && !(m->eflags®_BACKR)) { |
|
214 |
+ NOTE("dissecting"); |
|
215 |
+ dp = dissect(m, m->coldp, endp, gf, gl); |
|
216 |
+ } else { |
|
217 |
+ if (g->nplus > 0 && m->lastpos == NULL) |
|
218 |
+ m->lastpos = (char **)cli_malloc((g->nplus+1) * |
|
219 |
+ sizeof(char *)); |
|
220 |
+ if (g->nplus > 0 && m->lastpos == NULL) { |
|
221 |
+ free(m->pmatch); |
|
222 |
+ STATETEARDOWN(m); |
|
223 |
+ return(REG_ESPACE); |
|
224 |
+ } |
|
225 |
+ NOTE("backref dissect"); |
|
226 |
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
|
227 |
+ } |
|
228 |
+ if (dp != NULL) |
|
229 |
+ break; |
|
230 |
+ |
|
231 |
+ /* uh-oh... we couldn't find a subexpression-level match */ |
|
232 |
+ assert(g->backrefs); /* must be back references doing it */ |
|
233 |
+ assert(g->nplus == 0 || m->lastpos != NULL); |
|
234 |
+ for (;;) { |
|
235 |
+ if (dp != NULL || endp <= m->coldp) |
|
236 |
+ break; /* defeat */ |
|
237 |
+ NOTE("backoff"); |
|
238 |
+ endp = slow(m, m->coldp, endp-1, gf, gl); |
|
239 |
+ if (endp == NULL) |
|
240 |
+ break; /* defeat */ |
|
241 |
+ /* try it on a shorter possibility */ |
|
242 |
+#ifndef NDEBUG |
|
243 |
+ for (i = 1; i <= m->g->nsub; i++) { |
|
244 |
+ assert(m->pmatch[i].rm_so == -1); |
|
245 |
+ assert(m->pmatch[i].rm_eo == -1); |
|
246 |
+ } |
|
247 |
+#endif |
|
248 |
+ NOTE("backoff dissect"); |
|
249 |
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
|
250 |
+ } |
|
251 |
+ assert(dp == NULL || dp == endp); |
|
252 |
+ if (dp != NULL) /* found a shorter one */ |
|
253 |
+ break; |
|
254 |
+ |
|
255 |
+ /* despite initial appearances, there is no match here */ |
|
256 |
+ NOTE("false alarm"); |
|
257 |
+ if (m->coldp == stop) |
|
258 |
+ break; |
|
259 |
+ start = m->coldp + 1; /* recycle starting later */ |
|
260 |
+ } |
|
261 |
+ |
|
262 |
+ /* fill in the details if requested */ |
|
263 |
+ if (nmatch > 0) { |
|
264 |
+ pmatch[0].rm_so = m->coldp - m->offp; |
|
265 |
+ pmatch[0].rm_eo = endp - m->offp; |
|
266 |
+ } |
|
267 |
+ if (nmatch > 1) { |
|
268 |
+ assert(m->pmatch != NULL); |
|
269 |
+ for (i = 1; i < nmatch; i++) |
|
270 |
+ if (i <= m->g->nsub) |
|
271 |
+ pmatch[i] = m->pmatch[i]; |
|
272 |
+ else { |
|
273 |
+ pmatch[i].rm_so = -1; |
|
274 |
+ pmatch[i].rm_eo = -1; |
|
275 |
+ } |
|
276 |
+ } |
|
277 |
+ |
|
278 |
+ if (m->pmatch != NULL) |
|
279 |
+ free((char *)m->pmatch); |
|
280 |
+ if (m->lastpos != NULL) |
|
281 |
+ free((char *)m->lastpos); |
|
282 |
+ STATETEARDOWN(m); |
|
283 |
+ return(0); |
|
284 |
+} |
|
285 |
+ |
|
286 |
+/* |
|
287 |
+ - dissect - figure out what matched what, no back references |
|
288 |
+ */ |
|
289 |
+static char * /* == stop (success) always */ |
|
290 |
+dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) |
|
291 |
+{ |
|
292 |
+ int i; |
|
293 |
+ sopno ss; /* start sop of current subRE */ |
|
294 |
+ sopno es; /* end sop of current subRE */ |
|
295 |
+ char *sp; /* start of string matched by it */ |
|
296 |
+ char *stp; /* string matched by it cannot pass here */ |
|
297 |
+ char *rest; /* start of rest of string */ |
|
298 |
+ char *tail; /* string unmatched by rest of RE */ |
|
299 |
+ sopno ssub; /* start sop of subsubRE */ |
|
300 |
+ sopno esub; /* end sop of subsubRE */ |
|
301 |
+ char *ssp; /* start of string matched by subsubRE */ |
|
302 |
+ char *sep; /* end of string matched by subsubRE */ |
|
303 |
+ char *oldssp; /* previous ssp */ |
|
304 |
+ char *dp; |
|
305 |
+ |
|
306 |
+ AT("diss", start, stop, startst, stopst); |
|
307 |
+ sp = start; |
|
308 |
+ for (ss = startst; ss < stopst; ss = es) { |
|
309 |
+ /* identify end of subRE */ |
|
310 |
+ es = ss; |
|
311 |
+ switch (OP(m->g->strip[es])) { |
|
312 |
+ case OPLUS_: |
|
313 |
+ case OQUEST_: |
|
314 |
+ es += OPND(m->g->strip[es]); |
|
315 |
+ break; |
|
316 |
+ case OCH_: |
|
317 |
+ while (OP(m->g->strip[es]) != O_CH) |
|
318 |
+ es += OPND(m->g->strip[es]); |
|
319 |
+ break; |
|
320 |
+ } |
|
321 |
+ es++; |
|
322 |
+ |
|
323 |
+ /* figure out what it matched */ |
|
324 |
+ switch (OP(m->g->strip[ss])) { |
|
325 |
+ case OEND: |
|
326 |
+ assert(nope); |
|
327 |
+ break; |
|
328 |
+ case OCHAR: |
|
329 |
+ sp++; |
|
330 |
+ break; |
|
331 |
+ case OBOL: |
|
332 |
+ case OEOL: |
|
333 |
+ case OBOW: |
|
334 |
+ case OEOW: |
|
335 |
+ break; |
|
336 |
+ case OANY: |
|
337 |
+ case OANYOF: |
|
338 |
+ sp++; |
|
339 |
+ break; |
|
340 |
+ case OBACK_: |
|
341 |
+ case O_BACK: |
|
342 |
+ assert(nope); |
|
343 |
+ break; |
|
344 |
+ /* cases where length of match is hard to find */ |
|
345 |
+ case OQUEST_: |
|
346 |
+ stp = stop; |
|
347 |
+ for (;;) { |
|
348 |
+ /* how long could this one be? */ |
|
349 |
+ rest = slow(m, sp, stp, ss, es); |
|
350 |
+ assert(rest != NULL); /* it did match */ |
|
351 |
+ /* could the rest match the rest? */ |
|
352 |
+ tail = slow(m, rest, stop, es, stopst); |
|
353 |
+ if (tail == stop) |
|
354 |
+ break; /* yes! */ |
|
355 |
+ /* no -- try a shorter match for this one */ |
|
356 |
+ stp = rest - 1; |
|
357 |
+ assert(stp >= sp); /* it did work */ |
|
358 |
+ } |
|
359 |
+ ssub = ss + 1; |
|
360 |
+ esub = es - 1; |
|
361 |
+ /* did innards match? */ |
|
362 |
+ if (slow(m, sp, rest, ssub, esub) != NULL) { |
|
363 |
+ dp = dissect(m, sp, rest, ssub, esub); |
|
364 |
+ assert(dp == rest); |
|
365 |
+ } else /* no */ |
|
366 |
+ assert(sp == rest); |
|
367 |
+ sp = rest; |
|
368 |
+ break; |
|
369 |
+ case OPLUS_: |
|
370 |
+ stp = stop; |
|
371 |
+ for (;;) { |
|
372 |
+ /* how long could this one be? */ |
|
373 |
+ rest = slow(m, sp, stp, ss, es); |
|
374 |
+ assert(rest != NULL); /* it did match */ |
|
375 |
+ /* could the rest match the rest? */ |
|
376 |
+ tail = slow(m, rest, stop, es, stopst); |
|
377 |
+ if (tail == stop) |
|
378 |
+ break; /* yes! */ |
|
379 |
+ /* no -- try a shorter match for this one */ |
|
380 |
+ stp = rest - 1; |
|
381 |
+ assert(stp >= sp); /* it did work */ |
|
382 |
+ } |
|
383 |
+ ssub = ss + 1; |
|
384 |
+ esub = es - 1; |
|
385 |
+ ssp = sp; |
|
386 |
+ oldssp = ssp; |
|
387 |
+ for (;;) { /* find last match of innards */ |
|
388 |
+ sep = slow(m, ssp, rest, ssub, esub); |
|
389 |
+ if (sep == NULL || sep == ssp) |
|
390 |
+ break; /* failed or matched null */ |
|
391 |
+ oldssp = ssp; /* on to next try */ |
|
392 |
+ ssp = sep; |
|
393 |
+ } |
|
394 |
+ if (sep == NULL) { |
|
395 |
+ /* last successful match */ |
|
396 |
+ sep = ssp; |
|
397 |
+ ssp = oldssp; |
|
398 |
+ } |
|
399 |
+ assert(sep == rest); /* must exhaust substring */ |
|
400 |
+ assert(slow(m, ssp, sep, ssub, esub) == rest); |
|
401 |
+ dp = dissect(m, ssp, sep, ssub, esub); |
|
402 |
+ assert(dp == sep); |
|
403 |
+ sp = rest; |
|
404 |
+ break; |
|
405 |
+ case OCH_: |
|
406 |
+ stp = stop; |
|
407 |
+ for (;;) { |
|
408 |
+ /* how long could this one be? */ |
|
409 |
+ rest = slow(m, sp, stp, ss, es); |
|
410 |
+ assert(rest != NULL); /* it did match */ |
|
411 |
+ /* could the rest match the rest? */ |
|
412 |
+ tail = slow(m, rest, stop, es, stopst); |
|
413 |
+ if (tail == stop) |
|
414 |
+ break; /* yes! */ |
|
415 |
+ /* no -- try a shorter match for this one */ |
|
416 |
+ stp = rest - 1; |
|
417 |
+ assert(stp >= sp); /* it did work */ |
|
418 |
+ } |
|
419 |
+ ssub = ss + 1; |
|
420 |
+ esub = ss + OPND(m->g->strip[ss]) - 1; |
|
421 |
+ assert(OP(m->g->strip[esub]) == OOR1); |
|
422 |
+ for (;;) { /* find first matching branch */ |
|
423 |
+ if (slow(m, sp, rest, ssub, esub) == rest) |
|
424 |
+ break; /* it matched all of it */ |
|
425 |
+ /* that one missed, try next one */ |
|
426 |
+ assert(OP(m->g->strip[esub]) == OOR1); |
|
427 |
+ esub++; |
|
428 |
+ assert(OP(m->g->strip[esub]) == OOR2); |
|
429 |
+ ssub = esub + 1; |
|
430 |
+ esub += OPND(m->g->strip[esub]); |
|
431 |
+ if (OP(m->g->strip[esub]) == OOR2) |
|
432 |
+ esub--; |
|
433 |
+ else |
|
434 |
+ assert(OP(m->g->strip[esub]) == O_CH); |
|
435 |
+ } |
|
436 |
+ dp = dissect(m, sp, rest, ssub, esub); |
|
437 |
+ assert(dp == rest); |
|
438 |
+ sp = rest; |
|
439 |
+ break; |
|
440 |
+ case O_PLUS: |
|
441 |
+ case O_QUEST: |
|
442 |
+ case OOR1: |
|
443 |
+ case OOR2: |
|
444 |
+ case O_CH: |
|
445 |
+ assert(nope); |
|
446 |
+ break; |
|
447 |
+ case OLPAREN: |
|
448 |
+ i = OPND(m->g->strip[ss]); |
|
449 |
+ assert(0 < i && i <= m->g->nsub); |
|
450 |
+ m->pmatch[i].rm_so = sp - m->offp; |
|
451 |
+ break; |
|
452 |
+ case ORPAREN: |
|
453 |
+ i = OPND(m->g->strip[ss]); |
|
454 |
+ assert(0 < i && i <= m->g->nsub); |
|
455 |
+ m->pmatch[i].rm_eo = sp - m->offp; |
|
456 |
+ break; |
|
457 |
+ default: /* uh oh */ |
|
458 |
+ assert(nope); |
|
459 |
+ break; |
|
460 |
+ } |
|
461 |
+ } |
|
462 |
+ |
|
463 |
+ assert(sp == stop); |
|
464 |
+ return(sp); |
|
465 |
+} |
|
466 |
+ |
|
467 |
+/* |
|
468 |
+ - backref - figure out what matched what, figuring in back references |
|
469 |
+ */ |
|
470 |
+static char * /* == stop (success) or NULL (failure) */ |
|
471 |
+backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, |
|
472 |
+ sopno lev, int rec) /* PLUS nesting level */ |
|
473 |
+{ |
|
474 |
+ int i; |
|
475 |
+ sopno ss; /* start sop of current subRE */ |
|
476 |
+ char *sp; /* start of string matched by it */ |
|
477 |
+ sopno ssub; /* start sop of subsubRE */ |
|
478 |
+ sopno esub; /* end sop of subsubRE */ |
|
479 |
+ char *ssp; /* start of string matched by subsubRE */ |
|
480 |
+ char *dp; |
|
481 |
+ size_t len; |
|
482 |
+ int hard; |
|
483 |
+ sop s; |
|
484 |
+ regoff_t offsave; |
|
485 |
+ cset *cs; |
|
486 |
+ |
|
487 |
+ AT("back", start, stop, startst, stopst); |
|
488 |
+ sp = start; |
|
489 |
+ |
|
490 |
+ /* get as far as we can with easy stuff */ |
|
491 |
+ hard = 0; |
|
492 |
+ for (ss = startst; !hard && ss < stopst; ss++) |
|
493 |
+ switch (OP(s = m->g->strip[ss])) { |
|
494 |
+ case OCHAR: |
|
495 |
+ if (sp == stop || *sp++ != (char)OPND(s)) |
|
496 |
+ return(NULL); |
|
497 |
+ break; |
|
498 |
+ case OANY: |
|
499 |
+ if (sp == stop) |
|
500 |
+ return(NULL); |
|
501 |
+ sp++; |
|
502 |
+ break; |
|
503 |
+ case OANYOF: |
|
504 |
+ cs = &m->g->sets[OPND(s)]; |
|
505 |
+ if (sp == stop || !CHIN(cs, *sp++)) |
|
506 |
+ return(NULL); |
|
507 |
+ break; |
|
508 |
+ case OBOL: |
|
509 |
+ if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || |
|
510 |
+ (sp < m->endp && *(sp-1) == '\n' && |
|
511 |
+ (m->g->cflags®_NEWLINE)) ) |
|
512 |
+ { /* yes */ } |
|
513 |
+ else |
|
514 |
+ return(NULL); |
|
515 |
+ break; |
|
516 |
+ case OEOL: |
|
517 |
+ if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || |
|
518 |
+ (sp < m->endp && *sp == '\n' && |
|
519 |
+ (m->g->cflags®_NEWLINE)) ) |
|
520 |
+ { /* yes */ } |
|
521 |
+ else |
|
522 |
+ return(NULL); |
|
523 |
+ break; |
|
524 |
+ case OBOW: |
|
525 |
+ if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || |
|
526 |
+ (sp < m->endp && *(sp-1) == '\n' && |
|
527 |
+ (m->g->cflags®_NEWLINE)) || |
|
528 |
+ (sp > m->beginp && |
|
529 |
+ !ISWORD(*(sp-1))) ) && |
|
530 |
+ (sp < m->endp && ISWORD(*sp)) ) |
|
531 |
+ { /* yes */ } |
|
532 |
+ else |
|
533 |
+ return(NULL); |
|
534 |
+ break; |
|
535 |
+ case OEOW: |
|
536 |
+ if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || |
|
537 |
+ (sp < m->endp && *sp == '\n' && |
|
538 |
+ (m->g->cflags®_NEWLINE)) || |
|
539 |
+ (sp < m->endp && !ISWORD(*sp)) ) && |
|
540 |
+ (sp > m->beginp && ISWORD(*(sp-1))) ) |
|
541 |
+ { /* yes */ } |
|
542 |
+ else |
|
543 |
+ return(NULL); |
|
544 |
+ break; |
|
545 |
+ case O_QUEST: |
|
546 |
+ break; |
|
547 |
+ case OOR1: /* matches null but needs to skip */ |
|
548 |
+ ss++; |
|
549 |
+ s = m->g->strip[ss]; |
|
550 |
+ do { |
|
551 |
+ assert(OP(s) == OOR2); |
|
552 |
+ ss += OPND(s); |
|
553 |
+ } while (OP(s = m->g->strip[ss]) != O_CH); |
|
554 |
+ /* note that the ss++ gets us past the O_CH */ |
|
555 |
+ break; |
|
556 |
+ default: /* have to make a choice */ |
|
557 |
+ hard = 1; |
|
558 |
+ break; |
|
559 |
+ } |
|
560 |
+ if (!hard) { /* that was it! */ |
|
561 |
+ if (sp != stop) |
|
562 |
+ return(NULL); |
|
563 |
+ return(sp); |
|
564 |
+ } |
|
565 |
+ ss--; /* adjust for the for's final increment */ |
|
566 |
+ |
|
567 |
+ /* the hard stuff */ |
|
568 |
+ AT("hard", sp, stop, ss, stopst); |
|
569 |
+ s = m->g->strip[ss]; |
|
570 |
+ switch (OP(s)) { |
|
571 |
+ case OBACK_: /* the vilest depths */ |
|
572 |
+ i = OPND(s); |
|
573 |
+ assert(0 < i && i <= m->g->nsub); |
|
574 |
+ if (m->pmatch[i].rm_eo == -1) |
|
575 |
+ return(NULL); |
|
576 |
+ assert(m->pmatch[i].rm_so != -1); |
|
577 |
+ len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; |
|
578 |
+ if (len == 0 && rec++ > MAX_RECURSION) |
|
579 |
+ return(NULL); |
|
580 |
+ assert(stop - m->beginp >= len); |
|
581 |
+ if (sp > stop - len) |
|
582 |
+ return(NULL); /* not enough left to match */ |
|
583 |
+ ssp = m->offp + m->pmatch[i].rm_so; |
|
584 |
+ if (memcmp(sp, ssp, len) != 0) |
|
585 |
+ return(NULL); |
|
586 |
+ while (m->g->strip[ss] != SOP(O_BACK, i)) |
|
587 |
+ ss++; |
|
588 |
+ return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); |
|
589 |
+ break; |
|
590 |
+ case OQUEST_: /* to null or not */ |
|
591 |
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
|
592 |
+ if (dp != NULL) |
|
593 |
+ return(dp); /* not */ |
|
594 |
+ return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); |
|
595 |
+ break; |
|
596 |
+ case OPLUS_: |
|
597 |
+ assert(m->lastpos != NULL); |
|
598 |
+ assert(lev+1 <= m->g->nplus); |
|
599 |
+ m->lastpos[lev+1] = sp; |
|
600 |
+ return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); |
|
601 |
+ break; |
|
602 |
+ case O_PLUS: |
|
603 |
+ if (sp == m->lastpos[lev]) /* last pass matched null */ |
|
604 |
+ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); |
|
605 |
+ /* try another pass */ |
|
606 |
+ m->lastpos[lev] = sp; |
|
607 |
+ dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); |
|
608 |
+ if (dp == NULL) |
|
609 |
+ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); |
|
610 |
+ else |
|
611 |
+ return(dp); |
|
612 |
+ break; |
|
613 |
+ case OCH_: /* find the right one, if any */ |
|
614 |
+ ssub = ss + 1; |
|
615 |
+ esub = ss + OPND(s) - 1; |
|
616 |
+ assert(OP(m->g->strip[esub]) == OOR1); |
|
617 |
+ for (;;) { /* find first matching branch */ |
|
618 |
+ dp = backref(m, sp, stop, ssub, esub, lev, rec); |
|
619 |
+ if (dp != NULL) |
|
620 |
+ return(dp); |
|
621 |
+ /* that one missed, try next one */ |
|
622 |
+ if (OP(m->g->strip[esub]) == O_CH) |
|
623 |
+ return(NULL); /* there is none */ |
|
624 |
+ esub++; |
|
625 |
+ assert(OP(m->g->strip[esub]) == OOR2); |
|
626 |
+ ssub = esub + 1; |
|
627 |
+ esub += OPND(m->g->strip[esub]); |
|
628 |
+ if (OP(m->g->strip[esub]) == OOR2) |
|
629 |
+ esub--; |
|
630 |
+ else |
|
631 |
+ assert(OP(m->g->strip[esub]) == O_CH); |
|
632 |
+ } |
|
633 |
+ break; |
|
634 |
+ case OLPAREN: /* must undo assignment if rest fails */ |
|
635 |
+ i = OPND(s); |
|
636 |
+ assert(0 < i && i <= m->g->nsub); |
|
637 |
+ offsave = m->pmatch[i].rm_so; |
|
638 |
+ m->pmatch[i].rm_so = sp - m->offp; |
|
639 |
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
|
640 |
+ if (dp != NULL) |
|
641 |
+ return(dp); |
|
642 |
+ m->pmatch[i].rm_so = offsave; |
|
643 |
+ return(NULL); |
|
644 |
+ break; |
|
645 |
+ case ORPAREN: /* must undo assignment if rest fails */ |
|
646 |
+ i = OPND(s); |
|
647 |
+ assert(0 < i && i <= m->g->nsub); |
|
648 |
+ offsave = m->pmatch[i].rm_eo; |
|
649 |
+ m->pmatch[i].rm_eo = sp - m->offp; |
|
650 |
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
|
651 |
+ if (dp != NULL) |
|
652 |
+ return(dp); |
|
653 |
+ m->pmatch[i].rm_eo = offsave; |
|
654 |
+ return(NULL); |
|
655 |
+ break; |
|
656 |
+ default: /* uh oh */ |
|
657 |
+ assert(nope); |
|
658 |
+ break; |
|
659 |
+ } |
|
660 |
+ |
|
661 |
+ /* "can't happen" */ |
|
662 |
+ assert(nope); |
|
663 |
+ /* NOTREACHED */ |
|
664 |
+} |
|
665 |
+ |
|
666 |
+/* |
|
667 |
+ - fast - step through the string at top speed |
|
668 |
+ */ |
|
669 |
+static char * /* where tentative match ended, or NULL */ |
|
670 |
+fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst) |
|
671 |
+{ |
|
672 |
+ states st = m->st; |
|
673 |
+ states fresh = m->fresh; |
|
674 |
+ states tmp = m->tmp; |
|
675 |
+ char *p = start; |
|
676 |
+ int c = (start == m->beginp) ? OUT : *(start-1); |
|
677 |
+ int lastc; /* previous c */ |
|
678 |
+ int flagch; |
|
679 |
+ int i; |
|
680 |
+ char *coldp; /* last p after which no match was underway */ |
|
681 |
+ |
|
682 |
+ CLEAR(st); |
|
683 |
+ SET1(st, startst); |
|
684 |
+ st = step(m->g, startst, stopst, st, NOTHING, st); |
|
685 |
+ ASSIGN(fresh, st); |
|
686 |
+ SP("start", st, *p); |
|
687 |
+ coldp = NULL; |
|
688 |
+ for (;;) { |
|
689 |
+ /* next character */ |
|
690 |
+ lastc = c; |
|
691 |
+ c = (p == m->endp) ? OUT : *p; |
|
692 |
+ if (EQ(st, fresh)) |
|
693 |
+ coldp = p; |
|
694 |
+ |
|
695 |
+ /* is there an EOL and/or BOL between lastc and c? */ |
|
696 |
+ flagch = '\0'; |
|
697 |
+ i = 0; |
|
698 |
+ if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || |
|
699 |
+ (lastc == OUT && !(m->eflags®_NOTBOL)) ) { |
|
700 |
+ flagch = BOL; |
|
701 |
+ i = m->g->nbol; |
|
702 |
+ } |
|
703 |
+ if ( (c == '\n' && m->g->cflags®_NEWLINE) || |
|
704 |
+ (c == OUT && !(m->eflags®_NOTEOL)) ) { |
|
705 |
+ flagch = (flagch == BOL) ? BOLEOL : EOL; |
|
706 |
+ i += m->g->neol; |
|
707 |
+ } |
|
708 |
+ if (i != 0) { |
|
709 |
+ for (; i > 0; i--) |
|
710 |
+ st = step(m->g, startst, stopst, st, flagch, st); |
|
711 |
+ SP("boleol", st, c); |
|
712 |
+ } |
|
713 |
+ |
|
714 |
+ /* how about a word boundary? */ |
|
715 |
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && |
|
716 |
+ (c != OUT && ISWORD(c)) ) { |
|
717 |
+ flagch = BOW; |
|
718 |
+ } |
|
719 |
+ if ( (lastc != OUT && ISWORD(lastc)) && |
|
720 |
+ (flagch == EOL || (c != OUT && !ISWORD(c))) ) { |
|
721 |
+ flagch = EOW; |
|
722 |
+ } |
|
723 |
+ if (flagch == BOW || flagch == EOW) { |
|
724 |
+ st = step(m->g, startst, stopst, st, flagch, st); |
|
725 |
+ SP("boweow", st, c); |
|
726 |
+ } |
|
727 |
+ |
|
728 |
+ /* are we done? */ |
|
729 |
+ if (ISSET(st, stopst) || p == stop) |
|
730 |
+ break; /* NOTE BREAK OUT */ |
|
731 |
+ |
|
732 |
+ /* no, we must deal with this character */ |
|
733 |
+ ASSIGN(tmp, st); |
|
734 |
+ ASSIGN(st, fresh); |
|
735 |
+ assert(c != OUT); |
|
736 |
+ st = step(m->g, startst, stopst, tmp, c, st); |
|
737 |
+ SP("aft", st, c); |
|
738 |
+ assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
|
739 |
+ p++; |
|
740 |
+ } |
|
741 |
+ |
|
742 |
+ assert(coldp != NULL); |
|
743 |
+ m->coldp = coldp; |
|
744 |
+ if (ISSET(st, stopst)) |
|
745 |
+ return(p+1); |
|
746 |
+ else |
|
747 |
+ return(NULL); |
|
748 |
+} |
|
749 |
+ |
|
750 |
+/* |
|
751 |
+ - slow - step through the string more deliberately |
|
752 |
+ */ |
|
753 |
+static char * /* where it ended */ |
|
754 |
+slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) |
|
755 |
+{ |
|
756 |
+ states st = m->st; |
|
757 |
+ states empty = m->empty; |
|
758 |
+ states tmp = m->tmp; |
|
759 |
+ char *p = start; |
|
760 |
+ int c = (start == m->beginp) ? OUT : *(start-1); |
|
761 |
+ int lastc; /* previous c */ |
|
762 |
+ int flagch; |
|
763 |
+ int i; |
|
764 |
+ char *matchp; /* last p at which a match ended */ |
|
765 |
+ |
|
766 |
+ AT("slow", start, stop, startst, stopst); |
|
767 |
+ CLEAR(st); |
|
768 |
+ SET1(st, startst); |
|
769 |
+ SP("sstart", st, *p); |
|
770 |
+ st = step(m->g, startst, stopst, st, NOTHING, st); |
|
771 |
+ matchp = NULL; |
|
772 |
+ for (;;) { |
|
773 |
+ /* next character */ |
|
774 |
+ lastc = c; |
|
775 |
+ c = (p == m->endp) ? OUT : *p; |
|
776 |
+ |
|
777 |
+ /* is there an EOL and/or BOL between lastc and c? */ |
|
778 |
+ flagch = '\0'; |
|
779 |
+ i = 0; |
|
780 |
+ if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || |
|
781 |
+ (lastc == OUT && !(m->eflags®_NOTBOL)) ) { |
|
782 |
+ flagch = BOL; |
|
783 |
+ i = m->g->nbol; |
|
784 |
+ } |
|
785 |
+ if ( (c == '\n' && m->g->cflags®_NEWLINE) || |
|
786 |
+ (c == OUT && !(m->eflags®_NOTEOL)) ) { |
|
787 |
+ flagch = (flagch == BOL) ? BOLEOL : EOL; |
|
788 |
+ i += m->g->neol; |
|
789 |
+ } |
|
790 |
+ if (i != 0) { |
|
791 |
+ for (; i > 0; i--) |
|
792 |
+ st = step(m->g, startst, stopst, st, flagch, st); |
|
793 |
+ SP("sboleol", st, c); |
|
794 |
+ } |
|
795 |
+ |
|
796 |
+ /* how about a word boundary? */ |
|
797 |
+ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && |
|
798 |
+ (c != OUT && ISWORD(c)) ) { |
|
799 |
+ flagch = BOW; |
|
800 |
+ } |
|
801 |
+ if ( (lastc != OUT && ISWORD(lastc)) && |
|
802 |
+ (flagch == EOL || (c != OUT && !ISWORD(c))) ) { |
|
803 |
+ flagch = EOW; |
|
804 |
+ } |
|
805 |
+ if (flagch == BOW || flagch == EOW) { |
|
806 |
+ st = step(m->g, startst, stopst, st, flagch, st); |
|
807 |
+ SP("sboweow", st, c); |
|
808 |
+ } |
|
809 |
+ |
|
810 |
+ /* are we done? */ |
|
811 |
+ if (ISSET(st, stopst)) |
|
812 |
+ matchp = p; |
|
813 |
+ if (EQ(st, empty) || p == stop) |
|
814 |
+ break; /* NOTE BREAK OUT */ |
|
815 |
+ |
|
816 |
+ /* no, we must deal with this character */ |
|
817 |
+ ASSIGN(tmp, st); |
|
818 |
+ ASSIGN(st, empty); |
|
819 |
+ assert(c != OUT); |
|
820 |
+ st = step(m->g, startst, stopst, tmp, c, st); |
|
821 |
+ SP("saft", st, c); |
|
822 |
+ assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
|
823 |
+ p++; |
|
824 |
+ } |
|
825 |
+ |
|
826 |
+ return(matchp); |
|
827 |
+} |
|
828 |
+ |
|
829 |
+ |
|
830 |
+/* |
|
831 |
+ - step - map set of states reachable before char to set reachable after |
|
832 |
+ */ |
|
833 |
+static states |
|
834 |
+step(struct re_guts *g, |
|
835 |
+ sopno start, /* start state within strip */ |
|
836 |
+ sopno stop, /* state after stop state within strip */ |
|
837 |
+ states bef, /* states reachable before */ |
|
838 |
+ int ch, /* character or NONCHAR code */ |
|
839 |
+ states aft) /* states already known reachable after */ |
|
840 |
+{ |
|
841 |
+ cset *cs; |
|
842 |
+ sop s; |
|
843 |
+ sopno pc; |
|
844 |
+ onestate here; /* note, macros know this name */ |
|
845 |
+ sopno look; |
|
846 |
+ int i; |
|
847 |
+ |
|
848 |
+ for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { |
|
849 |
+ s = g->strip[pc]; |
|
850 |
+ switch (OP(s)) { |
|
851 |
+ case OEND: |
|
852 |
+ assert(pc == stop-1); |
|
853 |
+ break; |
|
854 |
+ case OCHAR: |
|
855 |
+ /* only characters can match */ |
|
856 |
+ assert(!NONCHAR(ch) || ch != (char)OPND(s)); |
|
857 |
+ if (ch == (char)OPND(s)) |
|
858 |
+ FWD(aft, bef, 1); |
|
859 |
+ break; |
|
860 |
+ case OBOL: |
|
861 |
+ if (ch == BOL || ch == BOLEOL) |
|
862 |
+ FWD(aft, bef, 1); |
|
863 |
+ break; |
|
864 |
+ case OEOL: |
|
865 |
+ if (ch == EOL || ch == BOLEOL) |
|
866 |
+ FWD(aft, bef, 1); |
|
867 |
+ break; |
|
868 |
+ case OBOW: |
|
869 |
+ if (ch == BOW) |
|
870 |
+ FWD(aft, bef, 1); |
|
871 |
+ break; |
|
872 |
+ case OEOW: |
|
873 |
+ if (ch == EOW) |
|
874 |
+ FWD(aft, bef, 1); |
|
875 |
+ break; |
|
876 |
+ case OANY: |
|
877 |
+ if (!NONCHAR(ch)) |
|
878 |
+ FWD(aft, bef, 1); |
|
879 |
+ break; |
|
880 |
+ case OANYOF: |
|
881 |
+ cs = &g->sets[OPND(s)]; |
|
882 |
+ if (!NONCHAR(ch) && CHIN(cs, ch)) |
|
883 |
+ FWD(aft, bef, 1); |
|
884 |
+ break; |
|
885 |
+ case OBACK_: /* ignored here */ |
|
886 |
+ case O_BACK: |
|
887 |
+ FWD(aft, aft, 1); |
|
888 |
+ break; |
|
889 |
+ case OPLUS_: /* forward, this is just an empty */ |
|
890 |
+ FWD(aft, aft, 1); |
|
891 |
+ break; |
|
892 |
+ case O_PLUS: /* both forward and back */ |
|
893 |
+ FWD(aft, aft, 1); |
|
894 |
+ i = ISSETBACK(aft, OPND(s)); |
|
895 |
+ BACK(aft, aft, OPND(s)); |
|
896 |
+ if (!i && ISSETBACK(aft, OPND(s))) { |
|
897 |
+ /* oho, must reconsider loop body */ |
|
898 |
+ pc -= OPND(s) + 1; |
|
899 |
+ INIT(here, pc); |
|
900 |
+ } |
|
901 |
+ break; |
|
902 |
+ case OQUEST_: /* two branches, both forward */ |
|
903 |
+ FWD(aft, aft, 1); |
|
904 |
+ FWD(aft, aft, OPND(s)); |
|
905 |
+ break; |
|
906 |
+ case O_QUEST: /* just an empty */ |
|
907 |
+ FWD(aft, aft, 1); |
|
908 |
+ break; |
|
909 |
+ case OLPAREN: /* not significant here */ |
|
910 |
+ case ORPAREN: |
|
911 |
+ FWD(aft, aft, 1); |
|
912 |
+ break; |
|
913 |
+ case OCH_: /* mark the first two branches */ |
|
914 |
+ FWD(aft, aft, 1); |
|
915 |
+ assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
|
916 |
+ FWD(aft, aft, OPND(s)); |
|
917 |
+ break; |
|
918 |
+ case OOR1: /* done a branch, find the O_CH */ |
|
919 |
+ if (ISSTATEIN(aft, here)) { |
|
920 |
+ for (look = 1; |
|
921 |
+ OP(s = g->strip[pc+look]) != O_CH; |
|
922 |
+ look += OPND(s)) |
|
923 |
+ assert(OP(s) == OOR2); |
|
924 |
+ FWD(aft, aft, look); |
|
925 |
+ } |
|
926 |
+ break; |
|
927 |
+ case OOR2: /* propagate OCH_'s marking */ |
|
928 |
+ FWD(aft, aft, 1); |
|
929 |
+ if (OP(g->strip[pc+OPND(s)]) != O_CH) { |
|
930 |
+ assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
|
931 |
+ FWD(aft, aft, OPND(s)); |
|
932 |
+ } |
|
933 |
+ break; |
|
934 |
+ case O_CH: /* just empty */ |
|
935 |
+ FWD(aft, aft, 1); |
|
936 |
+ break; |
|
937 |
+ default: /* ooooops... */ |
|
938 |
+ assert(nope); |
|
939 |
+ break; |
|
940 |
+ } |
|
941 |
+ } |
|
942 |
+ |
|
943 |
+ return(aft); |
|
944 |
+} |
|
945 |
+ |
|
946 |
+#ifdef REDEBUG |
|
947 |
+/* |
|
948 |
+ - print - print a set of states |
|
949 |
+ */ |
|
950 |
+static void |
|
951 |
+print(struct match *m, char *caption, states st, int ch, FILE *d) |
|
952 |
+{ |
|
953 |
+ struct re_guts *g = m->g; |
|
954 |
+ int i; |
|
955 |
+ int first = 1; |
|
956 |
+ |
|
957 |
+ if (!(m->eflags®_TRACE)) |
|
958 |
+ return; |
|
959 |
+ |
|
960 |
+ (void)fprintf(d, "%s", caption); |
|
961 |
+ if (ch != '\0') |
|
962 |
+ (void)fprintf(d, " %s", pchar(ch)); |
|
963 |
+ for (i = 0; i < g->nstates; i++) |
|
964 |
+ if (ISSET(st, i)) { |
|
965 |
+ (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); |
|
966 |
+ first = 0; |
|
967 |
+ } |
|
968 |
+ (void)fprintf(d, "\n"); |
|
969 |
+} |
|
970 |
+ |
|
971 |
+/* |
|
972 |
+ - at - print current situation |
|
973 |
+ */ |
|
974 |
+static void |
|
975 |
+at(struct match *m, char *title, char *start, char *stop, sopno startst, |
|
976 |
+ sopno stopst) |
|
977 |
+{ |
|
978 |
+ if (!(m->eflags®_TRACE)) |
|
979 |
+ return; |
|
980 |
+ |
|
981 |
+ (void)printf("%s %s-", title, pchar(*start)); |
|
982 |
+ (void)printf("%s ", pchar(*stop)); |
|
983 |
+ (void)printf("%ld-%ld\n", (long)startst, (long)stopst); |
|
984 |
+} |
|
985 |
+ |
|
986 |
+#ifndef PCHARDONE |
|
987 |
+#define PCHARDONE /* never again */ |
|
988 |
+/* |
|
989 |
+ - pchar - make a character printable |
|
990 |
+ * |
|
991 |
+ * Is this identical to regchar() over in debug.c? Well, yes. But a |
|
992 |
+ * duplicate here avoids having a debugging-capable regexec.o tied to |
|
993 |
+ * a matching debug.o, and this is convenient. It all disappears in |
|
994 |
+ * the non-debug compilation anyway, so it doesn't matter much. |
|
995 |
+ */ |
|
996 |
+static char * /* -> representation */ |
|
997 |
+pchar(int ch) |
|
998 |
+{ |
|
999 |
+ static char pbuf[10]; |
|
1000 |
+ |
|
1001 |
+ if (isprint(ch) || ch == ' ') |
|
1002 |
+ (void)snprintf(pbuf, sizeof pbuf, "%c", ch); |
|
1003 |
+ else |
|
1004 |
+ (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); |
|
1005 |
+ return(pbuf); |
|
1006 |
+} |
|
1007 |
+#endif |
|
1008 |
+#endif |
|
1009 |
+ |
|
1010 |
+#undef matcher |
|
1011 |
+#undef fast |
|
1012 |
+#undef slow |
|
1013 |
+#undef dissect |
|
1014 |
+#undef backref |
|
1015 |
+#undef step |
|
1016 |
+#undef print |
|
1017 |
+#undef at |
|
1018 |
+#undef match |
|
1019 |
+#undef nope |
0 | 1020 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,1519 @@ |
0 |
+/*- |
|
1 |
+ * This code is derived from OpenBSD's libc/regex, original license follows: |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
|
4 |
+ * Copyright (c) 1992, 1993, 1994 |
|
5 |
+ * The Regents of the University of California. All rights reserved. |
|
6 |
+ * |
|
7 |
+ * This code is derived from software contributed to Berkeley by |
|
8 |
+ * Henry Spencer. |
|
9 |
+ * |
|
10 |
+ * Redistribution and use in source and binary forms, with or without |
|
11 |
+ * modification, are permitted provided that the following conditions |
|
12 |
+ * are met: |
|
13 |
+ * 1. Redistributions of source code must retain the above copyright |
|
14 |
+ * notice, this list of conditions and the following disclaimer. |
|
15 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
16 |
+ * notice, this list of conditions and the following disclaimer in the |
|
17 |
+ * documentation and/or other materials provided with the distribution. |
|
18 |
+ * 3. Neither the name of the University nor the names of its contributors |
|
19 |
+ * may be used to endorse or promote products derived from this software |
|
20 |
+ * without specific prior written permission. |
|
21 |
+ * |
|
22 |
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
23 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
24 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
25 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
26 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
27 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
28 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
29 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
30 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
31 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
32 |
+ * SUCH DAMAGE. |
|
33 |
+ * |
|
34 |
+ * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 |
|
35 |
+ */ |
|
36 |
+ |
|
37 |
+#include <sys/types.h> |
|
38 |
+#include <stdio.h> |
|
39 |
+#include <string.h> |
|
40 |
+#include <ctype.h> |
|
41 |
+#include <limits.h> |
|
42 |
+#include <stdlib.h> |
|
43 |
+#include "others.h" |
|
44 |
+#include "regex.h" |
|
45 |
+ |
|
46 |
+#include "utils.h" |
|
47 |
+#include "regex2.h" |
|
48 |
+ |
|
49 |
+#include "cclass.h" |
|
50 |
+#include "cname.h" |
|
51 |
+ |
|
52 |
+/* |
|
53 |
+ * parse structure, passed up and down to avoid global variables and |
|
54 |
+ * other clumsinesses |
|
55 |
+ */ |
|
56 |
+struct parse { |
|
57 |
+ char *next; /* next character in RE */ |
|
58 |
+ char *end; /* end of string (-> NUL normally) */ |
|
59 |
+ int error; /* has an error been seen? */ |
|
60 |
+ sop *strip; /* malloced strip */ |
|
61 |
+ sopno ssize; /* malloced strip size (allocated) */ |
|
62 |
+ sopno slen; /* malloced strip length (used) */ |
|
63 |
+ int ncsalloc; /* number of csets allocated */ |
|
64 |
+ struct re_guts *g; |
|
65 |
+# define NPAREN 10 /* we need to remember () 1-9 for back refs */ |
|
66 |
+ sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ |
|
67 |
+ sopno pend[NPAREN]; /* -> ) ([0] unused) */ |
|
68 |
+}; |
|
69 |
+ |
|
70 |
+static void p_ere(struct parse *, int); |
|
71 |
+static void p_ere_exp(struct parse *); |
|
72 |
+static void p_str(struct parse *); |
|
73 |
+static void p_bre(struct parse *, int, int); |
|
74 |
+static int p_simp_re(struct parse *, int); |
|
75 |
+static int p_count(struct parse *); |
|
76 |
+static void p_bracket(struct parse *); |
|
77 |
+static void p_b_term(struct parse *, cset *); |
|
78 |
+static void p_b_cclass(struct parse *, cset *); |
|
79 |
+static void p_b_eclass(struct parse *, cset *); |
|
80 |
+static char p_b_symbol(struct parse *); |
|
81 |
+static char p_b_coll_elem(struct parse *, int); |
|
82 |
+static char othercase(int); |
|
83 |
+static void bothcases(struct parse *, int); |
|
84 |
+static void ordinary(struct parse *, int); |
|
85 |
+static void nonnewline(struct parse *); |
|
86 |
+static void repeat(struct parse *, sopno, int, int); |
|
87 |
+static int seterr(struct parse *, int); |
|
88 |
+static cset *allocset(struct parse *); |
|
89 |
+static void freeset(struct parse *, cset *); |
|
90 |
+static int freezeset(struct parse *, cset *); |
|
91 |
+static int firstch(struct parse *, cset *); |
|
92 |
+static int nch(struct parse *, cset *); |
|
93 |
+static void mcadd(struct parse *, cset *, const char *); |
|
94 |
+static void mcinvert(struct parse *, cset *); |
|
95 |
+static void mccase(struct parse *, cset *); |
|
96 |
+static int isinsets(struct re_guts *, int); |
|
97 |
+static int samesets(struct re_guts *, int, int); |
|
98 |
+static void categorize(struct parse *, struct re_guts *); |
|
99 |
+static sopno dupl(struct parse *, sopno, sopno); |
|
100 |
+static void doemit(struct parse *, sop, size_t); |
|
101 |
+static void doinsert(struct parse *, sop, size_t, sopno); |
|
102 |
+static void dofwd(struct parse *, sopno, sop); |
|
103 |
+static void enlarge(struct parse *, sopno); |
|
104 |
+static void stripsnug(struct parse *, struct re_guts *); |
|
105 |
+static void findmust(struct parse *, struct re_guts *); |
|
106 |
+static sopno pluscount(struct parse *, struct re_guts *); |
|
107 |
+ |
|
108 |
+static char nuls[10]; /* place to point scanner in event of error */ |
|
109 |
+ |
|
110 |
+/* |
|
111 |
+ * macros for use with parse structure |
|
112 |
+ * BEWARE: these know that the parse structure is named `p' !!! |
|
113 |
+ */ |
|
114 |
+#define PEEK() (*p->next) |
|
115 |
+#define PEEK2() (*(p->next+1)) |
|
116 |
+#define MORE() (p->next < p->end) |
|
117 |
+#define MORE2() (p->next+1 < p->end) |
|
118 |
+#define SEE(c) (MORE() && PEEK() == (c)) |
|
119 |
+#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) |
|
120 |
+#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) |
|
121 |
+#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) |
|
122 |
+#define NEXT() (p->next++) |
|
123 |
+#define NEXT2() (p->next += 2) |
|
124 |
+#define NEXTn(n) (p->next += (n)) |
|
125 |
+#define GETNEXT() (*p->next++) |
|
126 |
+#define SETERROR(e) seterr(p, (e)) |
|
127 |
+#define REQUIRE(co, e) (void)((co) || SETERROR(e)) |
|
128 |
+#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) |
|
129 |
+#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) |
|
130 |
+#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) |
|
131 |
+#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) |
|
132 |
+#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) |
|
133 |
+#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) |
|
134 |
+#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) |
|
135 |
+#define HERE() (p->slen) |
|
136 |
+#define THERE() (p->slen - 1) |
|
137 |
+#define THERETHERE() (p->slen - 2) |
|
138 |
+#define DROP(n) (p->slen -= (n)) |
|
139 |
+ |
|
140 |
+#ifndef NDEBUG |
|
141 |
+static int never = 0; /* for use in asserts; shuts lint up */ |
|
142 |
+#else |
|
143 |
+#define never 0 /* some <assert.h>s have bugs too */ |
|
144 |
+#endif |
|
145 |
+ |
|
146 |
+/* |
|
147 |
+ - cli_regcomp - interface for parser and compilation |
|
148 |
+ */ |
|
149 |
+int /* 0 success, otherwise REG_something */ |
|
150 |
+cli_regcomp(regex_t *preg, const char *pattern, int cflags) |
|
151 |
+{ |
|
152 |
+ struct parse pa; |
|
153 |
+ struct re_guts *g; |
|
154 |
+ struct parse *p = &pa; |
|
155 |
+ int i; |
|
156 |
+ size_t len; |
|
157 |
+#ifdef REDEBUG |
|
158 |
+# define GOODFLAGS(f) (f) |
|
159 |
+#else |
|
160 |
+# define GOODFLAGS(f) ((f)&~REG_DUMP) |
|
161 |
+#endif |
|
162 |
+ |
|
163 |
+ cflags = GOODFLAGS(cflags); |
|
164 |
+ if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) |
|
165 |
+ return(REG_INVARG); |
|
166 |
+ |
|
167 |
+ if (cflags®_PEND) { |
|
168 |
+ if (preg->re_endp < pattern) |
|
169 |
+ return(REG_INVARG); |
|
170 |
+ len = preg->re_endp - pattern; |
|
171 |
+ } else |
|
172 |
+ len = strlen((const char *)pattern); |
|
173 |
+ |
|
174 |
+ /* do the mallocs early so failure handling is easy */ |
|
175 |
+ g = (struct re_guts *)cli_malloc(sizeof(struct re_guts) + |
|
176 |
+ (NC-1)*sizeof(cat_t)); |
|
177 |
+ if (g == NULL) |
|
178 |
+ return(REG_ESPACE); |
|
179 |
+ p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ |
|
180 |
+ p->strip = (sop *)cli_calloc(p->ssize, sizeof(sop)); |
|
181 |
+ p->slen = 0; |
|
182 |
+ if (p->strip == NULL) { |
|
183 |
+ free((char *)g); |
|
184 |
+ return(REG_ESPACE); |
|
185 |
+ } |
|
186 |
+ |
|
187 |
+ /* set things up */ |
|
188 |
+ p->g = g; |
|
189 |
+ p->next = (char *)pattern; /* convenience; we do not modify it */ |
|
190 |
+ p->end = p->next + len; |
|
191 |
+ p->error = 0; |
|
192 |
+ p->ncsalloc = 0; |
|
193 |
+ for (i = 0; i < NPAREN; i++) { |
|
194 |
+ p->pbegin[i] = 0; |
|
195 |
+ p->pend[i] = 0; |
|
196 |
+ } |
|
197 |
+ g->csetsize = NC; |
|
198 |
+ g->sets = NULL; |
|
199 |
+ g->setbits = NULL; |
|
200 |
+ g->ncsets = 0; |
|
201 |
+ g->cflags = cflags; |
|
202 |
+ g->iflags = 0; |
|
203 |
+ g->nbol = 0; |
|
204 |
+ g->neol = 0; |
|
205 |
+ g->must = NULL; |
|
206 |
+ g->mlen = 0; |
|
207 |
+ g->nsub = 0; |
|
208 |
+ g->ncategories = 1; /* category 0 is "everything else" */ |
|
209 |
+ g->categories = &g->catspace[-(CHAR_MIN)]; |
|
210 |
+ (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); |
|
211 |
+ g->backrefs = 0; |
|
212 |
+ |
|
213 |
+ /* do it */ |
|
214 |
+ EMIT(OEND, 0); |
|
215 |
+ g->firststate = THERE(); |
|
216 |
+ if (cflags®_EXTENDED) |
|
217 |
+ p_ere(p, OUT); |
|
218 |
+ else if (cflags®_NOSPEC) |
|
219 |
+ p_str(p); |
|
220 |
+ else |
|
221 |
+ p_bre(p, OUT, OUT); |
|
222 |
+ EMIT(OEND, 0); |
|
223 |
+ g->laststate = THERE(); |
|
224 |
+ |
|
225 |
+ /* tidy up loose ends and fill things in */ |
|
226 |
+ categorize(p, g); |
|
227 |
+ stripsnug(p, g); |
|
228 |
+ findmust(p, g); |
|
229 |
+ g->nplus = pluscount(p, g); |
|
230 |
+ g->magic = MAGIC2; |
|
231 |
+ preg->re_nsub = g->nsub; |
|
232 |
+ preg->re_g = g; |
|
233 |
+ preg->re_magic = MAGIC1; |
|
234 |
+#ifndef REDEBUG |
|
235 |
+ /* not debugging, so can't rely on the assert() in cli_regexec() */ |
|
236 |
+ if (g->iflags&BAD) |
|
237 |
+ SETERROR(REG_ASSERT); |
|
238 |
+#endif |
|
239 |
+ |
|
240 |
+ /* win or lose, we're done */ |
|
241 |
+ if (p->error != 0) /* lose */ |
|
242 |
+ cli_regfree(preg); |
|
243 |
+ return(p->error); |
|
244 |
+} |
|
245 |
+ |
|
246 |
+/* |
|
247 |
+ - p_ere - ERE parser top level, concatenation and alternation |
|
248 |
+ */ |
|
249 |
+static void |
|
250 |
+p_ere(struct parse *p, int stop) /* character this ERE should end at */ |
|
251 |
+{ |
|
252 |
+ char c; |
|
253 |
+ sopno prevback; |
|
254 |
+ sopno prevfwd; |
|
255 |
+ sopno conc; |
|
256 |
+ int first = 1; /* is this the first alternative? */ |
|
257 |
+ |
|
258 |
+ for (;;) { |
|
259 |
+ /* do a bunch of concatenated expressions */ |
|
260 |
+ conc = HERE(); |
|
261 |
+ while (MORE() && (c = PEEK()) != '|' && c != stop) |
|
262 |
+ p_ere_exp(p); |
|
263 |
+ REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ |
|
264 |
+ |
|
265 |
+ if (!EAT('|')) |
|
266 |
+ break; /* NOTE BREAK OUT */ |
|
267 |
+ |
|
268 |
+ if (first) { |
|
269 |
+ INSERT(OCH_, conc); /* offset is wrong */ |
|
270 |
+ prevfwd = conc; |
|
271 |
+ prevback = conc; |
|
272 |
+ first = 0; |
|
273 |
+ } |
|
274 |
+ ASTERN(OOR1, prevback); |
|
275 |
+ prevback = THERE(); |
|
276 |
+ AHEAD(prevfwd); /* fix previous offset */ |
|
277 |
+ prevfwd = HERE(); |
|
278 |
+ EMIT(OOR2, 0); /* offset is very wrong */ |
|
279 |
+ } |
|
280 |
+ |
|
281 |
+ if (!first) { /* tail-end fixups */ |
|
282 |
+ AHEAD(prevfwd); |
|
283 |
+ ASTERN(O_CH, prevback); |
|
284 |
+ } |
|
285 |
+ |
|
286 |
+ assert(!MORE() || SEE(stop)); |
|
287 |
+} |
|
288 |
+ |
|
289 |
+/* |
|
290 |
+ - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op |
|
291 |
+ */ |
|
292 |
+static void |
|
293 |
+p_ere_exp(struct parse *p) |
|
294 |
+{ |
|
295 |
+ char c; |
|
296 |
+ sopno pos; |
|
297 |
+ int count; |
|
298 |
+ int count2; |
|
299 |
+ sopno subno; |
|
300 |
+ int wascaret = 0; |
|
301 |
+ |
|
302 |
+ assert(MORE()); /* caller should have ensured this */ |
|
303 |
+ c = GETNEXT(); |
|
304 |
+ |
|
305 |
+ pos = HERE(); |
|
306 |
+ switch (c) { |
|
307 |
+ case '(': |
|
308 |
+ REQUIRE(MORE(), REG_EPAREN); |
|
309 |
+ p->g->nsub++; |
|
310 |
+ subno = p->g->nsub; |
|
311 |
+ if (subno < NPAREN) |
|
312 |
+ p->pbegin[subno] = HERE(); |
|
313 |
+ EMIT(OLPAREN, subno); |
|
314 |
+ if (!SEE(')')) |
|
315 |
+ p_ere(p, ')'); |
|
316 |
+ if (subno < NPAREN) { |
|
317 |
+ p->pend[subno] = HERE(); |
|
318 |
+ assert(p->pend[subno] != 0); |
|
319 |
+ } |
|
320 |
+ EMIT(ORPAREN, subno); |
|
321 |
+ MUSTEAT(')', REG_EPAREN); |
|
322 |
+ break; |
|
323 |
+#ifndef POSIX_MISTAKE |
|
324 |
+ case ')': /* happens only if no current unmatched ( */ |
|
325 |
+ /* |
|
326 |
+ * You may ask, why the ifndef? Because I didn't notice |
|
327 |
+ * this until slightly too late for 1003.2, and none of the |
|
328 |
+ * other 1003.2 regular-expression reviewers noticed it at |
|
329 |
+ * all. So an unmatched ) is legal POSIX, at least until |
|
330 |
+ * we can get it fixed. |
|
331 |
+ */ |
|
332 |
+ SETERROR(REG_EPAREN); |
|
333 |
+ break; |
|
334 |
+#endif |
|
335 |
+ case '^': |
|
336 |
+ EMIT(OBOL, 0); |
|
337 |
+ p->g->iflags |= USEBOL; |
|
338 |
+ p->g->nbol++; |
|
339 |
+ wascaret = 1; |
|
340 |
+ break; |
|
341 |
+ case '$': |
|
342 |
+ EMIT(OEOL, 0); |
|
343 |
+ p->g->iflags |= USEEOL; |
|
344 |
+ p->g->neol++; |
|
345 |
+ break; |
|
346 |
+ case '|': |
|
347 |
+ SETERROR(REG_EMPTY); |
|
348 |
+ break; |
|
349 |
+ case '*': |
|
350 |
+ case '+': |
|
351 |
+ case '?': |
|
352 |
+ SETERROR(REG_BADRPT); |
|
353 |
+ break; |
|
354 |
+ case '.': |
|
355 |
+ if (p->g->cflags®_NEWLINE) |
|
356 |
+ nonnewline(p); |
|
357 |
+ else |
|
358 |
+ EMIT(OANY, 0); |
|
359 |
+ break; |
|
360 |
+ case '[': |
|
361 |
+ p_bracket(p); |
|
362 |
+ break; |
|
363 |
+ case '\\': |
|
364 |
+ REQUIRE(MORE(), REG_EESCAPE); |
|
365 |
+ c = GETNEXT(); |
|
366 |
+ ordinary(p, c); |
|
367 |
+ break; |
|
368 |
+ case '{': /* okay as ordinary except if digit follows */ |
|
369 |
+ REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); |
|
370 |
+ /* FALLTHROUGH */ |
|
371 |
+ default: |
|
372 |
+ ordinary(p, c); |
|
373 |
+ break; |
|
374 |
+ } |
|
375 |
+ |
|
376 |
+ if (!MORE()) |
|
377 |
+ return; |
|
378 |
+ c = PEEK(); |
|
379 |
+ /* we call { a repetition if followed by a digit */ |
|
380 |
+ if (!( c == '*' || c == '+' || c == '?' || |
|
381 |
+ (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) |
|
382 |
+ return; /* no repetition, we're done */ |
|
383 |
+ NEXT(); |
|
384 |
+ |
|
385 |
+ REQUIRE(!wascaret, REG_BADRPT); |
|
386 |
+ switch (c) { |
|
387 |
+ case '*': /* implemented as +? */ |
|
388 |
+ /* this case does not require the (y|) trick, noKLUDGE */ |
|
389 |
+ INSERT(OPLUS_, pos); |
|
390 |
+ ASTERN(O_PLUS, pos); |
|
391 |
+ INSERT(OQUEST_, pos); |
|
392 |
+ ASTERN(O_QUEST, pos); |
|
393 |
+ break; |
|
394 |
+ case '+': |
|
395 |
+ INSERT(OPLUS_, pos); |
|
396 |
+ ASTERN(O_PLUS, pos); |
|
397 |
+ break; |
|
398 |
+ case '?': |
|
399 |
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ |
|
400 |
+ INSERT(OCH_, pos); /* offset slightly wrong */ |
|
401 |
+ ASTERN(OOR1, pos); /* this one's right */ |
|
402 |
+ AHEAD(pos); /* fix the OCH_ */ |
|
403 |
+ EMIT(OOR2, 0); /* offset very wrong... */ |
|
404 |
+ AHEAD(THERE()); /* ...so fix it */ |
|
405 |
+ ASTERN(O_CH, THERETHERE()); |
|
406 |
+ break; |
|
407 |
+ case '{': |
|
408 |
+ count = p_count(p); |
|
409 |
+ if (EAT(',')) { |
|
410 |
+ if (isdigit((uch)PEEK())) { |
|
411 |
+ count2 = p_count(p); |
|
412 |
+ REQUIRE(count <= count2, REG_BADBR); |
|
413 |
+ } else /* single number with comma */ |
|
414 |
+ count2 = INFINITY; |
|
415 |
+ } else /* just a single number */ |
|
416 |
+ count2 = count; |
|
417 |
+ repeat(p, pos, count, count2); |
|
418 |
+ if (!EAT('}')) { /* error heuristics */ |
|
419 |
+ while (MORE() && PEEK() != '}') |
|
420 |
+ NEXT(); |
|
421 |
+ REQUIRE(MORE(), REG_EBRACE); |
|
422 |
+ SETERROR(REG_BADBR); |
|
423 |
+ } |
|
424 |
+ break; |
|
425 |
+ } |
|
426 |
+ |
|
427 |
+ if (!MORE()) |
|
428 |
+ return; |
|
429 |
+ c = PEEK(); |
|
430 |
+ if (!( c == '*' || c == '+' || c == '?' || |
|
431 |
+ (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) |
|
432 |
+ return; |
|
433 |
+ SETERROR(REG_BADRPT); |
|
434 |
+} |
|
435 |
+ |
|
436 |
+/* |
|
437 |
+ - p_str - string (no metacharacters) "parser" |
|
438 |
+ */ |
|
439 |
+static void |
|
440 |
+p_str(struct parse *p) |
|
441 |
+{ |
|
442 |
+ REQUIRE(MORE(), REG_EMPTY); |
|
443 |
+ while (MORE()) |
|
444 |
+ ordinary(p, GETNEXT()); |
|
445 |
+} |
|
446 |
+ |
|
447 |
+/* |
|
448 |
+ - p_bre - BRE parser top level, anchoring and concatenation |
|
449 |
+ * Giving end1 as OUT essentially eliminates the end1/end2 check. |
|
450 |
+ * |
|
451 |
+ * This implementation is a bit of a kludge, in that a trailing $ is first |
|
452 |
+ * taken as an ordinary character and then revised to be an anchor. The |
|
453 |
+ * only undesirable side effect is that '$' gets included as a character |
|
454 |
+ * category in such cases. This is fairly harmless; not worth fixing. |
|
455 |
+ * The amount of lookahead needed to avoid this kludge is excessive. |
|
456 |
+ */ |
|
457 |
+static void |
|
458 |
+p_bre(struct parse *p, |
|
459 |
+ int end1, /* first terminating character */ |
|
460 |
+ int end2) /* second terminating character */ |
|
461 |
+{ |
|
462 |
+ sopno start = HERE(); |
|
463 |
+ int first = 1; /* first subexpression? */ |
|
464 |
+ int wasdollar = 0; |
|
465 |
+ |
|
466 |
+ if (EAT('^')) { |
|
467 |
+ EMIT(OBOL, 0); |
|
468 |
+ p->g->iflags |= USEBOL; |
|
469 |
+ p->g->nbol++; |
|
470 |
+ } |
|
471 |
+ while (MORE() && !SEETWO(end1, end2)) { |
|
472 |
+ wasdollar = p_simp_re(p, first); |
|
473 |
+ first = 0; |
|
474 |
+ } |
|
475 |
+ if (wasdollar) { /* oops, that was a trailing anchor */ |
|
476 |
+ DROP(1); |
|
477 |
+ EMIT(OEOL, 0); |
|
478 |
+ p->g->iflags |= USEEOL; |
|
479 |
+ p->g->neol++; |
|
480 |
+ } |
|
481 |
+ |
|
482 |
+ REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ |
|
483 |
+} |
|
484 |
+ |
|
485 |
+/* |
|
486 |
+ - p_simp_re - parse a simple RE, an atom possibly followed by a repetition |
|
487 |
+ */ |
|
488 |
+static int /* was the simple RE an unbackslashed $? */ |
|
489 |
+p_simp_re(struct parse *p, |
|
490 |
+ int starordinary) /* is a leading * an ordinary character? */ |
|
491 |
+{ |
|
492 |
+ int c; |
|
493 |
+ int count; |
|
494 |
+ int count2; |
|
495 |
+ sopno pos; |
|
496 |
+ int i; |
|
497 |
+ sopno subno; |
|
498 |
+# define BACKSL (1<<CHAR_BIT) |
|
499 |
+ |
|
500 |
+ pos = HERE(); /* repetion op, if any, covers from here */ |
|
501 |
+ |
|
502 |
+ assert(MORE()); /* caller should have ensured this */ |
|
503 |
+ c = GETNEXT(); |
|
504 |
+ if (c == '\\') { |
|
505 |
+ REQUIRE(MORE(), REG_EESCAPE); |
|
506 |
+ c = BACKSL | GETNEXT(); |
|
507 |
+ } |
|
508 |
+ switch (c) { |
|
509 |
+ case '.': |
|
510 |
+ if (p->g->cflags®_NEWLINE) |
|
511 |
+ nonnewline(p); |
|
512 |
+ else |
|
513 |
+ EMIT(OANY, 0); |
|
514 |
+ break; |
|
515 |
+ case '[': |
|
516 |
+ p_bracket(p); |
|
517 |
+ break; |
|
518 |
+ case BACKSL|'{': |
|
519 |
+ SETERROR(REG_BADRPT); |
|
520 |
+ break; |
|
521 |
+ case BACKSL|'(': |
|
522 |
+ p->g->nsub++; |
|
523 |
+ subno = p->g->nsub; |
|
524 |
+ if (subno < NPAREN) |
|
525 |
+ p->pbegin[subno] = HERE(); |
|
526 |
+ EMIT(OLPAREN, subno); |
|
527 |
+ /* the MORE here is an error heuristic */ |
|
528 |
+ if (MORE() && !SEETWO('\\', ')')) |
|
529 |
+ p_bre(p, '\\', ')'); |
|
530 |
+ if (subno < NPAREN) { |
|
531 |
+ p->pend[subno] = HERE(); |
|
532 |
+ assert(p->pend[subno] != 0); |
|
533 |
+ } |
|
534 |
+ EMIT(ORPAREN, subno); |
|
535 |
+ REQUIRE(EATTWO('\\', ')'), REG_EPAREN); |
|
536 |
+ break; |
|
537 |
+ case BACKSL|')': /* should not get here -- must be user */ |
|
538 |
+ case BACKSL|'}': |
|
539 |
+ SETERROR(REG_EPAREN); |
|
540 |
+ break; |
|
541 |
+ case BACKSL|'1': |
|
542 |
+ case BACKSL|'2': |
|
543 |
+ case BACKSL|'3': |
|
544 |
+ case BACKSL|'4': |
|
545 |
+ case BACKSL|'5': |
|
546 |
+ case BACKSL|'6': |
|
547 |
+ case BACKSL|'7': |
|
548 |
+ case BACKSL|'8': |
|
549 |
+ case BACKSL|'9': |
|
550 |
+ i = (c&~BACKSL) - '0'; |
|
551 |
+ assert(i < NPAREN); |
|
552 |
+ if (p->pend[i] != 0) { |
|
553 |
+ assert(i <= p->g->nsub); |
|
554 |
+ EMIT(OBACK_, i); |
|
555 |
+ assert(p->pbegin[i] != 0); |
|
556 |
+ assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); |
|
557 |
+ assert(OP(p->strip[p->pend[i]]) == ORPAREN); |
|
558 |
+ (void) dupl(p, p->pbegin[i]+1, p->pend[i]); |
|
559 |
+ EMIT(O_BACK, i); |
|
560 |
+ } else |
|
561 |
+ SETERROR(REG_ESUBREG); |
|
562 |
+ p->g->backrefs = 1; |
|
563 |
+ break; |
|
564 |
+ case '*': |
|
565 |
+ REQUIRE(starordinary, REG_BADRPT); |
|
566 |
+ /* FALLTHROUGH */ |
|
567 |
+ default: |
|
568 |
+ ordinary(p, (char)c); |
|
569 |
+ break; |
|
570 |
+ } |
|
571 |
+ |
|
572 |
+ if (EAT('*')) { /* implemented as +? */ |
|
573 |
+ /* this case does not require the (y|) trick, noKLUDGE */ |
|
574 |
+ INSERT(OPLUS_, pos); |
|
575 |
+ ASTERN(O_PLUS, pos); |
|
576 |
+ INSERT(OQUEST_, pos); |
|
577 |
+ ASTERN(O_QUEST, pos); |
|
578 |
+ } else if (EATTWO('\\', '{')) { |
|
579 |
+ count = p_count(p); |
|
580 |
+ if (EAT(',')) { |
|
581 |
+ if (MORE() && isdigit((uch)PEEK())) { |
|
582 |
+ count2 = p_count(p); |
|
583 |
+ REQUIRE(count <= count2, REG_BADBR); |
|
584 |
+ } else /* single number with comma */ |
|
585 |
+ count2 = INFINITY; |
|
586 |
+ } else /* just a single number */ |
|
587 |
+ count2 = count; |
|
588 |
+ repeat(p, pos, count, count2); |
|
589 |
+ if (!EATTWO('\\', '}')) { /* error heuristics */ |
|
590 |
+ while (MORE() && !SEETWO('\\', '}')) |
|
591 |
+ NEXT(); |
|
592 |
+ REQUIRE(MORE(), REG_EBRACE); |
|
593 |
+ SETERROR(REG_BADBR); |
|
594 |
+ } |
|
595 |
+ } else if (c == '$') /* $ (but not \$) ends it */ |
|
596 |
+ return(1); |
|
597 |
+ |
|
598 |
+ return(0); |
|
599 |
+} |
|
600 |
+ |
|
601 |
+/* |
|
602 |
+ - p_count - parse a repetition count |
|
603 |
+ */ |
|
604 |
+static int /* the value */ |
|
605 |
+p_count(struct parse *p) |
|
606 |
+{ |
|
607 |
+ int count = 0; |
|
608 |
+ int ndigits = 0; |
|
609 |
+ |
|
610 |
+ while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { |
|
611 |
+ count = count*10 + (GETNEXT() - '0'); |
|
612 |
+ ndigits++; |
|
613 |
+ } |
|
614 |
+ |
|
615 |
+ REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); |
|
616 |
+ return(count); |
|
617 |
+} |
|
618 |
+ |
|
619 |
+/* |
|
620 |
+ - p_bracket - parse a bracketed character list |
|
621 |
+ * |
|
622 |
+ * Note a significant property of this code: if the allocset() did SETERROR, |
|
623 |
+ * no set operations are done. |
|
624 |
+ */ |
|
625 |
+static void |
|
626 |
+p_bracket(struct parse *p) |
|
627 |
+{ |
|
628 |
+ cset *cs; |
|
629 |
+ int invert = 0; |
|
630 |
+ |
|
631 |
+ /* Dept of Truly Sickening Special-Case Kludges */ |
|
632 |
+ if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { |
|
633 |
+ EMIT(OBOW, 0); |
|
634 |
+ NEXTn(6); |
|
635 |
+ return; |
|
636 |
+ } |
|
637 |
+ if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { |
|
638 |
+ EMIT(OEOW, 0); |
|
639 |
+ NEXTn(6); |
|
640 |
+ return; |
|
641 |
+ } |
|
642 |
+ |
|
643 |
+ if ((cs = allocset(p)) == NULL) { |
|
644 |
+ /* allocset did set error status in p */ |
|
645 |
+ return; |
|
646 |
+ } |
|
647 |
+ |
|
648 |
+ if (EAT('^')) |
|
649 |
+ invert++; /* make note to invert set at end */ |
|
650 |
+ if (EAT(']')) |
|
651 |
+ CHadd(cs, ']'); |
|
652 |
+ else if (EAT('-')) |
|
653 |
+ CHadd(cs, '-'); |
|
654 |
+ while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) |
|
655 |
+ p_b_term(p, cs); |
|
656 |
+ if (EAT('-')) |
|
657 |
+ CHadd(cs, '-'); |
|
658 |
+ MUSTEAT(']', REG_EBRACK); |
|
659 |
+ |
|
660 |
+ if (p->error != 0) { /* don't mess things up further */ |
|
661 |
+ freeset(p, cs); |
|
662 |
+ return; |
|
663 |
+ } |
|
664 |
+ |
|
665 |
+ if (p->g->cflags®_ICASE) { |
|
666 |
+ int i; |
|
667 |
+ int ci; |
|
668 |
+ |
|
669 |
+ for (i = p->g->csetsize - 1; i >= 0; i--) |
|
670 |
+ if (CHIN(cs, i) && isalpha(i)) { |
|
671 |
+ ci = othercase(i); |
|
672 |
+ if (ci != i) |
|
673 |
+ CHadd(cs, ci); |
|
674 |
+ } |
|
675 |
+ if (cs->multis != NULL) |
|
676 |
+ mccase(p, cs); |
|
677 |
+ } |
|
678 |
+ if (invert) { |
|
679 |
+ int i; |
|
680 |
+ |
|
681 |
+ for (i = p->g->csetsize - 1; i >= 0; i--) |
|
682 |
+ if (CHIN(cs, i)) |
|
683 |
+ CHsub(cs, i); |
|
684 |
+ else |
|
685 |
+ CHadd(cs, i); |
|
686 |
+ if (p->g->cflags®_NEWLINE) |
|
687 |
+ CHsub(cs, '\n'); |
|
688 |
+ if (cs->multis != NULL) |
|
689 |
+ mcinvert(p, cs); |
|
690 |
+ } |
|
691 |
+ |
|
692 |
+ assert(cs->multis == NULL); /* xxx */ |
|
693 |
+ |
|
694 |
+ if (nch(p, cs) == 1) { /* optimize singleton sets */ |
|
695 |
+ ordinary(p, firstch(p, cs)); |
|
696 |
+ freeset(p, cs); |
|
697 |
+ } else |
|
698 |
+ EMIT(OANYOF, freezeset(p, cs)); |
|
699 |
+} |
|
700 |
+ |
|
701 |
+/* |
|
702 |
+ - p_b_term - parse one term of a bracketed character list |
|
703 |
+ */ |
|
704 |
+static void |
|
705 |
+p_b_term(struct parse *p, cset *cs) |
|
706 |
+{ |
|
707 |
+ char c; |
|
708 |
+ char start, finish; |
|
709 |
+ int i; |
|
710 |
+ |
|
711 |
+ /* classify what we've got */ |
|
712 |
+ switch ((MORE()) ? PEEK() : '\0') { |
|
713 |
+ case '[': |
|
714 |
+ c = (MORE2()) ? PEEK2() : '\0'; |
|
715 |
+ break; |
|
716 |
+ case '-': |
|
717 |
+ SETERROR(REG_ERANGE); |
|
718 |
+ return; /* NOTE RETURN */ |
|
719 |
+ break; |
|
720 |
+ default: |
|
721 |
+ c = '\0'; |
|
722 |
+ break; |
|
723 |
+ } |
|
724 |
+ |
|
725 |
+ switch (c) { |
|
726 |
+ case ':': /* character class */ |
|
727 |
+ NEXT2(); |
|
728 |
+ REQUIRE(MORE(), REG_EBRACK); |
|
729 |
+ c = PEEK(); |
|
730 |
+ REQUIRE(c != '-' && c != ']', REG_ECTYPE); |
|
731 |
+ p_b_cclass(p, cs); |
|
732 |
+ REQUIRE(MORE(), REG_EBRACK); |
|
733 |
+ REQUIRE(EATTWO(':', ']'), REG_ECTYPE); |
|
734 |
+ break; |
|
735 |
+ case '=': /* equivalence class */ |
|
736 |
+ NEXT2(); |
|
737 |
+ REQUIRE(MORE(), REG_EBRACK); |
|
738 |
+ c = PEEK(); |
|
739 |
+ REQUIRE(c != '-' && c != ']', REG_ECOLLATE); |
|
740 |
+ p_b_eclass(p, cs); |
|
741 |
+ REQUIRE(MORE(), REG_EBRACK); |
|
742 |
+ REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); |
|
743 |
+ break; |
|
744 |
+ default: /* symbol, ordinary character, or range */ |
|
745 |
+/* xxx revision needed for multichar stuff */ |
|
746 |
+ start = p_b_symbol(p); |
|
747 |
+ if (SEE('-') && MORE2() && PEEK2() != ']') { |
|
748 |
+ /* range */ |
|
749 |
+ NEXT(); |
|
750 |
+ if (EAT('-')) |
|
751 |
+ finish = '-'; |
|
752 |
+ else |
|
753 |
+ finish = p_b_symbol(p); |
|
754 |
+ } else |
|
755 |
+ finish = start; |
|
756 |
+/* xxx what about signed chars here... */ |
|
757 |
+ REQUIRE(start <= finish, REG_ERANGE); |
|
758 |
+ for (i = start; i <= finish; i++) |
|
759 |
+ CHadd(cs, i); |
|
760 |
+ break; |
|
761 |
+ } |
|
762 |
+} |
|
763 |
+ |
|
764 |
+/* |
|
765 |
+ - p_b_cclass - parse a character-class name and deal with it |
|
766 |
+ */ |
|
767 |
+static void |
|
768 |
+p_b_cclass(struct parse *p, cset *cs) |
|
769 |
+{ |
|
770 |
+ char *sp = p->next; |
|
771 |
+ struct cclass *cp; |
|
772 |
+ size_t len; |
|
773 |
+ const char *u; |
|
774 |
+ char c; |
|
775 |
+ |
|
776 |
+ while (MORE() && isalpha(PEEK())) |
|
777 |
+ NEXT(); |
|
778 |
+ len = p->next - sp; |
|
779 |
+ for (cp = cclasses; cp->name != NULL; cp++) |
|
780 |
+ if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') |
|
781 |
+ break; |
|
782 |
+ if (cp->name == NULL) { |
|
783 |
+ /* oops, didn't find it */ |
|
784 |
+ SETERROR(REG_ECTYPE); |
|
785 |
+ return; |
|
786 |
+ } |
|
787 |
+ |
|
788 |
+ u = cp->chars; |
|
789 |
+ while ((c = *u++) != '\0') |
|
790 |
+ CHadd(cs, c); |
|
791 |
+ for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) |
|
792 |
+ MCadd(p, cs, u); |
|
793 |
+} |
|
794 |
+ |
|
795 |
+/* |
|
796 |
+ - p_b_eclass - parse an equivalence-class name and deal with it |
|
797 |
+ * |
|
798 |
+ * This implementation is incomplete. xxx |
|
799 |
+ */ |
|
800 |
+static void |
|
801 |
+p_b_eclass(struct parse *p, cset *cs) |
|
802 |
+{ |
|
803 |
+ char c; |
|
804 |
+ |
|
805 |
+ c = p_b_coll_elem(p, '='); |
|
806 |
+ CHadd(cs, c); |
|
807 |
+} |
|
808 |
+ |
|
809 |
+/* |
|
810 |
+ - p_b_symbol - parse a character or [..]ed multicharacter collating symbol |
|
811 |
+ */ |
|
812 |
+static char /* value of symbol */ |
|
813 |
+p_b_symbol(struct parse *p) |
|
814 |
+{ |
|
815 |
+ char value; |
|
816 |
+ |
|
817 |
+ REQUIRE(MORE(), REG_EBRACK); |
|
818 |
+ if (!EATTWO('[', '.')) |
|
819 |
+ return(GETNEXT()); |
|
820 |
+ |
|
821 |
+ /* collating symbol */ |
|
822 |
+ value = p_b_coll_elem(p, '.'); |
|
823 |
+ REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); |
|
824 |
+ return(value); |
|
825 |
+} |
|
826 |
+ |
|
827 |
+/* |
|
828 |
+ - p_b_coll_elem - parse a collating-element name and look it up |
|
829 |
+ */ |
|
830 |
+static char /* value of collating element */ |
|
831 |
+p_b_coll_elem(struct parse *p, |
|
832 |
+ int endc) /* name ended by endc,']' */ |
|
833 |
+{ |
|
834 |
+ char *sp = p->next; |
|
835 |
+ struct cname *cp; |
|
836 |
+ int len; |
|
837 |
+ |
|
838 |
+ while (MORE() && !SEETWO(endc, ']')) |
|
839 |
+ NEXT(); |
|
840 |
+ if (!MORE()) { |
|
841 |
+ SETERROR(REG_EBRACK); |
|
842 |
+ return(0); |
|
843 |
+ } |
|
844 |
+ len = p->next - sp; |
|
845 |
+ for (cp = cnames; cp->name != NULL; cp++) |
|
846 |
+ if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') |
|
847 |
+ return(cp->code); /* known name */ |
|
848 |
+ if (len == 1) |
|
849 |
+ return(*sp); /* single character */ |
|
850 |
+ SETERROR(REG_ECOLLATE); /* neither */ |
|
851 |
+ return(0); |
|
852 |
+} |
|
853 |
+ |
|
854 |
+/* |
|
855 |
+ - othercase - return the case counterpart of an alphabetic |
|
856 |
+ */ |
|
857 |
+static char /* if no counterpart, return ch */ |
|
858 |
+othercase(int ch) |
|
859 |
+{ |
|
860 |
+ ch = (uch)ch; |
|
861 |
+ assert(isalpha(ch)); |
|
862 |
+ if (isupper(ch)) |
|
863 |
+ return ((uch)tolower(ch)); |
|
864 |
+ else if (islower(ch)) |
|
865 |
+ return ((uch)toupper(ch)); |
|
866 |
+ else /* peculiar, but could happen */ |
|
867 |
+ return(ch); |
|
868 |
+} |
|
869 |
+ |
|
870 |
+/* |
|
871 |
+ - bothcases - emit a dualcase version of a two-case character |
|
872 |
+ * |
|
873 |
+ * Boy, is this implementation ever a kludge... |
|
874 |
+ */ |
|
875 |
+static void |
|
876 |
+bothcases(struct parse *p, int ch) |
|
877 |
+{ |
|
878 |
+ char *oldnext = p->next; |
|
879 |
+ char *oldend = p->end; |
|
880 |
+ char bracket[3]; |
|
881 |
+ |
|
882 |
+ ch = (uch)ch; |
|
883 |
+ assert(othercase(ch) != ch); /* p_bracket() would recurse */ |
|
884 |
+ p->next = bracket; |
|
885 |
+ p->end = bracket+2; |
|
886 |
+ bracket[0] = ch; |
|
887 |
+ bracket[1] = ']'; |
|
888 |
+ bracket[2] = '\0'; |
|
889 |
+ p_bracket(p); |
|
890 |
+ assert(p->next == bracket+2); |
|
891 |
+ p->next = oldnext; |
|
892 |
+ p->end = oldend; |
|
893 |
+} |
|
894 |
+ |
|
895 |
+/* |
|
896 |
+ - ordinary - emit an ordinary character |
|
897 |
+ */ |
|
898 |
+static void |
|
899 |
+ordinary(struct parse *p, int ch) |
|
900 |
+{ |
|
901 |
+ cat_t *cap = p->g->categories; |
|
902 |
+ |
|
903 |
+ if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) |
|
904 |
+ bothcases(p, ch); |
|
905 |
+ else { |
|
906 |
+ EMIT(OCHAR, (uch)ch); |
|
907 |
+ if (cap[ch] == 0) |
|
908 |
+ cap[ch] = p->g->ncategories++; |
|
909 |
+ } |
|
910 |
+} |
|
911 |
+ |
|
912 |
+/* |
|
913 |
+ - nonnewline - emit REG_NEWLINE version of OANY |
|
914 |
+ * |
|
915 |
+ * Boy, is this implementation ever a kludge... |
|
916 |
+ */ |
|
917 |
+static void |
|
918 |
+nonnewline(struct parse *p) |
|
919 |
+{ |
|
920 |
+ char *oldnext = p->next; |
|
921 |
+ char *oldend = p->end; |
|
922 |
+ char bracket[4]; |
|
923 |
+ |
|
924 |
+ p->next = bracket; |
|
925 |
+ p->end = bracket+3; |
|
926 |
+ bracket[0] = '^'; |
|
927 |
+ bracket[1] = '\n'; |
|
928 |
+ bracket[2] = ']'; |
|
929 |
+ bracket[3] = '\0'; |
|
930 |
+ p_bracket(p); |
|
931 |
+ assert(p->next == bracket+3); |
|
932 |
+ p->next = oldnext; |
|
933 |
+ p->end = oldend; |
|
934 |
+} |
|
935 |
+ |
|
936 |
+/* |
|
937 |
+ - repeat - generate code for a bounded repetition, recursively if needed |
|
938 |
+ */ |
|
939 |
+static void |
|
940 |
+repeat(struct parse *p, |
|
941 |
+ sopno start, /* operand from here to end of strip */ |
|
942 |
+ int from, /* repeated from this number */ |
|
943 |
+ int to) /* to this number of times (maybe INFINITY) */ |
|
944 |
+{ |
|
945 |
+ sopno finish = HERE(); |
|
946 |
+# define N 2 |
|
947 |
+# define INF 3 |
|
948 |
+# define REP(f, t) ((f)*8 + (t)) |
|
949 |
+# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) |
|
950 |
+ sopno copy; |
|
951 |
+ |
|
952 |
+ if (p->error != 0) /* head off possible runaway recursion */ |
|
953 |
+ return; |
|
954 |
+ |
|
955 |
+ assert(from <= to); |
|
956 |
+ |
|
957 |
+ switch (REP(MAP(from), MAP(to))) { |
|
958 |
+ case REP(0, 0): /* must be user doing this */ |
|
959 |
+ DROP(finish-start); /* drop the operand */ |
|
960 |
+ break; |
|
961 |
+ case REP(0, 1): /* as x{1,1}? */ |
|
962 |
+ case REP(0, N): /* as x{1,n}? */ |
|
963 |
+ case REP(0, INF): /* as x{1,}? */ |
|
964 |
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ |
|
965 |
+ INSERT(OCH_, start); /* offset is wrong... */ |
|
966 |
+ repeat(p, start+1, 1, to); |
|
967 |
+ ASTERN(OOR1, start); |
|
968 |
+ AHEAD(start); /* ... fix it */ |
|
969 |
+ EMIT(OOR2, 0); |
|
970 |
+ AHEAD(THERE()); |
|
971 |
+ ASTERN(O_CH, THERETHERE()); |
|
972 |
+ break; |
|
973 |
+ case REP(1, 1): /* trivial case */ |
|
974 |
+ /* done */ |
|
975 |
+ break; |
|
976 |
+ case REP(1, N): /* as x?x{1,n-1} */ |
|
977 |
+ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ |
|
978 |
+ INSERT(OCH_, start); |
|
979 |
+ ASTERN(OOR1, start); |
|
980 |
+ AHEAD(start); |
|
981 |
+ EMIT(OOR2, 0); /* offset very wrong... */ |
|
982 |
+ AHEAD(THERE()); /* ...so fix it */ |
|
983 |
+ ASTERN(O_CH, THERETHERE()); |
|
984 |
+ copy = dupl(p, start+1, finish+1); |
|
985 |
+ assert(copy == finish+4); |
|
986 |
+ repeat(p, copy, 1, to-1); |
|
987 |
+ break; |
|
988 |
+ case REP(1, INF): /* as x+ */ |
|
989 |
+ INSERT(OPLUS_, start); |
|
990 |
+ ASTERN(O_PLUS, start); |
|
991 |
+ break; |
|
992 |
+ case REP(N, N): /* as xx{m-1,n-1} */ |
|
993 |
+ copy = dupl(p, start, finish); |
|
994 |
+ repeat(p, copy, from-1, to-1); |
|
995 |
+ break; |
|
996 |
+ case REP(N, INF): /* as xx{n-1,INF} */ |
|
997 |
+ copy = dupl(p, start, finish); |
|
998 |
+ repeat(p, copy, from-1, to); |
|
999 |
+ break; |
|
1000 |
+ default: /* "can't happen" */ |
|
1001 |
+ SETERROR(REG_ASSERT); /* just in case */ |
|
1002 |
+ break; |
|
1003 |
+ } |
|
1004 |
+} |
|
1005 |
+ |
|
1006 |
+/* |
|
1007 |
+ - seterr - set an error condition |
|
1008 |
+ */ |
|
1009 |
+static int /* useless but makes type checking happy */ |
|
1010 |
+seterr(struct parse *p, int e) |
|
1011 |
+{ |
|
1012 |
+ if (p->error == 0) /* keep earliest error condition */ |
|
1013 |
+ p->error = e; |
|
1014 |
+ p->next = nuls; /* try to bring things to a halt */ |
|
1015 |
+ p->end = nuls; |
|
1016 |
+ return(0); /* make the return value well-defined */ |
|
1017 |
+} |
|
1018 |
+ |
|
1019 |
+/* |
|
1020 |
+ - allocset - allocate a set of characters for [] |
|
1021 |
+ */ |
|
1022 |
+static cset * |
|
1023 |
+allocset(struct parse *p) |
|
1024 |
+{ |
|
1025 |
+ int no = p->g->ncsets++; |
|
1026 |
+ size_t nc; |
|
1027 |
+ size_t nbytes; |
|
1028 |
+ cset *cs; |
|
1029 |
+ size_t css = (size_t)p->g->csetsize; |
|
1030 |
+ int i; |
|
1031 |
+ |
|
1032 |
+ if (no >= p->ncsalloc) { /* need another column of space */ |
|
1033 |
+ void *ptr; |
|
1034 |
+ |
|
1035 |
+ p->ncsalloc += CHAR_BIT; |
|
1036 |
+ nc = p->ncsalloc; |
|
1037 |
+ assert(nc % CHAR_BIT == 0); |
|
1038 |
+ nbytes = nc / CHAR_BIT * css; |
|
1039 |
+ |
|
1040 |
+ ptr = (cset *)cli_realloc((char *)p->g->sets, nc * sizeof(cset)); |
|
1041 |
+ if (ptr == NULL) |
|
1042 |
+ goto nomem; |
|
1043 |
+ p->g->sets = ptr; |
|
1044 |
+ |
|
1045 |
+ ptr = (uch *)cli_realloc((char *)p->g->setbits, nbytes); |
|
1046 |
+ if (ptr == NULL) |
|
1047 |
+ goto nomem; |
|
1048 |
+ p->g->setbits = ptr; |
|
1049 |
+ |
|
1050 |
+ for (i = 0; i < no; i++) |
|
1051 |
+ p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); |
|
1052 |
+ |
|
1053 |
+ (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); |
|
1054 |
+ } |
|
1055 |
+ |
|
1056 |
+ cs = &p->g->sets[no]; |
|
1057 |
+ cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); |
|
1058 |
+ cs->mask = 1 << ((no) % CHAR_BIT); |
|
1059 |
+ cs->hash = 0; |
|
1060 |
+ cs->smultis = 0; |
|
1061 |
+ cs->multis = NULL; |
|
1062 |
+ |
|
1063 |
+ return(cs); |
|
1064 |
+nomem: |
|
1065 |
+ free(p->g->sets); |
|
1066 |
+ p->g->sets = NULL; |
|
1067 |
+ free(p->g->setbits); |
|
1068 |
+ p->g->setbits = NULL; |
|
1069 |
+ |
|
1070 |
+ SETERROR(REG_ESPACE); |
|
1071 |
+ /* caller's responsibility not to do set ops */ |
|
1072 |
+ return(NULL); |
|
1073 |
+} |
|
1074 |
+ |
|
1075 |
+/* |
|
1076 |
+ - freeset - free a now-unused set |
|
1077 |
+ */ |
|
1078 |
+static void |
|
1079 |
+freeset(struct parse *p, cset *cs) |
|
1080 |
+{ |
|
1081 |
+ size_t i; |
|
1082 |
+ cset *top = &p->g->sets[p->g->ncsets]; |
|
1083 |
+ size_t css = (size_t)p->g->csetsize; |
|
1084 |
+ |
|
1085 |
+ for (i = 0; i < css; i++) |
|
1086 |
+ CHsub(cs, i); |
|
1087 |
+ if (cs == top-1) /* recover only the easy case */ |
|
1088 |
+ p->g->ncsets--; |
|
1089 |
+} |
|
1090 |
+ |
|
1091 |
+/* |
|
1092 |
+ - freezeset - final processing on a set of characters |
|
1093 |
+ * |
|
1094 |
+ * The main task here is merging identical sets. This is usually a waste |
|
1095 |
+ * of time (although the hash code minimizes the overhead), but can win |
|
1096 |
+ * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash |
|
1097 |
+ * is done using addition rather than xor -- all ASCII [aA] sets xor to |
|
1098 |
+ * the same value! |
|
1099 |
+ */ |
|
1100 |
+static int /* set number */ |
|
1101 |
+freezeset(struct parse *p, cset *cs) |
|
1102 |
+{ |
|
1103 |
+ uch h = cs->hash; |
|
1104 |
+ size_t i; |
|
1105 |
+ cset *top = &p->g->sets[p->g->ncsets]; |
|
1106 |
+ cset *cs2; |
|
1107 |
+ size_t css = (size_t)p->g->csetsize; |
|
1108 |
+ |
|
1109 |
+ /* look for an earlier one which is the same */ |
|
1110 |
+ for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) |
|
1111 |
+ if (cs2->hash == h && cs2 != cs) { |
|
1112 |
+ /* maybe */ |
|
1113 |
+ for (i = 0; i < css; i++) |
|
1114 |
+ if (!!CHIN(cs2, i) != !!CHIN(cs, i)) |
|
1115 |
+ break; /* no */ |
|
1116 |
+ if (i == css) |
|
1117 |
+ break; /* yes */ |
|
1118 |
+ } |
|
1119 |
+ |
|
1120 |
+ if (cs2 < top) { /* found one */ |
|
1121 |
+ freeset(p, cs); |
|
1122 |
+ cs = cs2; |
|
1123 |
+ } |
|
1124 |
+ |
|
1125 |
+ return((int)(cs - p->g->sets)); |
|
1126 |
+} |
|
1127 |
+ |
|
1128 |
+/* |
|
1129 |
+ - firstch - return first character in a set (which must have at least one) |
|
1130 |
+ */ |
|
1131 |
+static int /* character; there is no "none" value */ |
|
1132 |
+firstch(struct parse *p, cset *cs) |
|
1133 |
+{ |
|
1134 |
+ size_t i; |
|
1135 |
+ size_t css = (size_t)p->g->csetsize; |
|
1136 |
+ |
|
1137 |
+ for (i = 0; i < css; i++) |
|
1138 |
+ if (CHIN(cs, i)) |
|
1139 |
+ return((char)i); |
|
1140 |
+ assert(never); |
|
1141 |
+ return(0); /* arbitrary */ |
|
1142 |
+} |
|
1143 |
+ |
|
1144 |
+/* |
|
1145 |
+ - nch - number of characters in a set |
|
1146 |
+ */ |
|
1147 |
+static int |
|
1148 |
+nch(struct parse *p, cset *cs) |
|
1149 |
+{ |
|
1150 |
+ size_t i; |
|
1151 |
+ size_t css = (size_t)p->g->csetsize; |
|
1152 |
+ int n = 0; |
|
1153 |
+ |
|
1154 |
+ for (i = 0; i < css; i++) |
|
1155 |
+ if (CHIN(cs, i)) |
|
1156 |
+ n++; |
|
1157 |
+ return(n); |
|
1158 |
+} |
|
1159 |
+ |
|
1160 |
+/* |
|
1161 |
+ - mcadd - add a collating element to a cset |
|
1162 |
+ */ |
|
1163 |
+static void |
|
1164 |
+mcadd( struct parse *p, cset *cs, const char *cp) |
|
1165 |
+{ |
|
1166 |
+ size_t oldend = cs->smultis; |
|
1167 |
+ void *np; |
|
1168 |
+ |
|
1169 |
+ cs->smultis += strlen(cp) + 1; |
|
1170 |
+ if (cs->multis == NULL) |
|
1171 |
+ np = cli_malloc(cs->smultis); |
|
1172 |
+ else |
|
1173 |
+ np = cli_realloc(cs->multis, cs->smultis); |
|
1174 |
+ if (np == NULL) { |
|
1175 |
+ if (cs->multis) |
|
1176 |
+ free(cs->multis); |
|
1177 |
+ cs->multis = NULL; |
|
1178 |
+ SETERROR(REG_ESPACE); |
|
1179 |
+ return; |
|
1180 |
+ } |
|
1181 |
+ cs->multis = np; |
|
1182 |
+ |
|
1183 |
+ cli_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); |
|
1184 |
+} |
|
1185 |
+ |
|
1186 |
+/* |
|
1187 |
+ - mcinvert - invert the list of collating elements in a cset |
|
1188 |
+ * |
|
1189 |
+ * This would have to know the set of possibilities. Implementation |
|
1190 |
+ * is deferred. |
|
1191 |
+ */ |
|
1192 |
+/* ARGSUSED */ |
|
1193 |
+static void |
|
1194 |
+mcinvert(struct parse *p, cset *cs) |
|
1195 |
+{ |
|
1196 |
+ assert(cs->multis == NULL); /* xxx */ |
|
1197 |
+} |
|
1198 |
+ |
|
1199 |
+/* |
|
1200 |
+ - mccase - add case counterparts of the list of collating elements in a cset |
|
1201 |
+ * |
|
1202 |
+ * This would have to know the set of possibilities. Implementation |
|
1203 |
+ * is deferred. |
|
1204 |
+ */ |
|
1205 |
+/* ARGSUSED */ |
|
1206 |
+static void |
|
1207 |
+mccase(struct parse *p, cset *cs) |
|
1208 |
+{ |
|
1209 |
+ assert(cs->multis == NULL); /* xxx */ |
|
1210 |
+} |
|
1211 |
+ |
|
1212 |
+/* |
|
1213 |
+ - isinsets - is this character in any sets? |
|
1214 |
+ */ |
|
1215 |
+static int /* predicate */ |
|
1216 |
+isinsets(struct re_guts *g, int c) |
|
1217 |
+{ |
|
1218 |
+ uch *col; |
|
1219 |
+ int i; |
|
1220 |
+ int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; |
|
1221 |
+ unsigned uc = (uch)c; |
|
1222 |
+ |
|
1223 |
+ for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) |
|
1224 |
+ if (col[uc] != 0) |
|
1225 |
+ return(1); |
|
1226 |
+ return(0); |
|
1227 |
+} |
|
1228 |
+ |
|
1229 |
+/* |
|
1230 |
+ - samesets - are these two characters in exactly the same sets? |
|
1231 |
+ */ |
|
1232 |
+static int /* predicate */ |
|
1233 |
+samesets(struct re_guts *g, int c1, int c2) |
|
1234 |
+{ |
|
1235 |
+ uch *col; |
|
1236 |
+ int i; |
|
1237 |
+ int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; |
|
1238 |
+ unsigned uc1 = (uch)c1; |
|
1239 |
+ unsigned uc2 = (uch)c2; |
|
1240 |
+ |
|
1241 |
+ for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) |
|
1242 |
+ if (col[uc1] != col[uc2]) |
|
1243 |
+ return(0); |
|
1244 |
+ return(1); |
|
1245 |
+} |
|
1246 |
+ |
|
1247 |
+/* |
|
1248 |
+ - categorize - sort out character categories |
|
1249 |
+ */ |
|
1250 |
+static void |
|
1251 |
+categorize(struct parse *p, struct re_guts *g) |
|
1252 |
+{ |
|
1253 |
+ cat_t *cats = g->categories; |
|
1254 |
+ int c; |
|
1255 |
+ int c2; |
|
1256 |
+ cat_t cat; |
|
1257 |
+ |
|
1258 |
+ /* avoid making error situations worse */ |
|
1259 |
+ if (p->error != 0) |
|
1260 |
+ return; |
|
1261 |
+ |
|
1262 |
+ for (c = CHAR_MIN; c <= CHAR_MAX; c++) |
|
1263 |
+ if (cats[c] == 0 && isinsets(g, c)) { |
|
1264 |
+ cat = g->ncategories++; |
|
1265 |
+ cats[c] = cat; |
|
1266 |
+ for (c2 = c+1; c2 <= CHAR_MAX; c2++) |
|
1267 |
+ if (cats[c2] == 0 && samesets(g, c, c2)) |
|
1268 |
+ cats[c2] = cat; |
|
1269 |
+ } |
|
1270 |
+} |
|
1271 |
+ |
|
1272 |
+/* |
|
1273 |
+ - dupl - emit a duplicate of a bunch of sops |
|
1274 |
+ */ |
|
1275 |
+static sopno /* start of duplicate */ |
|
1276 |
+dupl(struct parse *p, |
|
1277 |
+ sopno start, /* from here */ |
|
1278 |
+ sopno finish) /* to this less one */ |
|
1279 |
+{ |
|
1280 |
+ sopno ret = HERE(); |
|
1281 |
+ sopno len = finish - start; |
|
1282 |
+ |
|
1283 |
+ assert(finish >= start); |
|
1284 |
+ if (len == 0) |
|
1285 |
+ return(ret); |
|
1286 |
+ enlarge(p, p->ssize + len); /* this many unexpected additions */ |
|
1287 |
+ assert(p->ssize >= p->slen + len); |
|
1288 |
+ (void) memmove((char *)(p->strip + p->slen), |
|
1289 |
+ (char *)(p->strip + start), (size_t)len*sizeof(sop)); |
|
1290 |
+ p->slen += len; |
|
1291 |
+ return(ret); |
|
1292 |
+} |
|
1293 |
+ |
|
1294 |
+/* |
|
1295 |
+ - doemit - emit a strip operator |
|
1296 |
+ * |
|
1297 |
+ * It might seem better to implement this as a macro with a function as |
|
1298 |
+ * hard-case backup, but it's just too big and messy unless there are |
|
1299 |
+ * some changes to the data structures. Maybe later. |
|
1300 |
+ */ |
|
1301 |
+static void |
|
1302 |
+doemit(struct parse *p, sop op, size_t opnd) |
|
1303 |
+{ |
|
1304 |
+ /* avoid making error situations worse */ |
|
1305 |
+ if (p->error != 0) |
|
1306 |
+ return; |
|
1307 |
+ |
|
1308 |
+ /* deal with oversize operands ("can't happen", more or less) */ |
|
1309 |
+ assert(opnd < 1<<OPSHIFT); |
|
1310 |
+ |
|
1311 |
+ /* deal with undersized strip */ |
|
1312 |
+ if (p->slen >= p->ssize) |
|
1313 |
+ enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ |
|
1314 |
+ assert(p->slen < p->ssize); |
|
1315 |
+ |
|
1316 |
+ /* finally, it's all reduced to the easy case */ |
|
1317 |
+ p->strip[p->slen++] = SOP(op, opnd); |
|
1318 |
+} |
|
1319 |
+ |
|
1320 |
+/* |
|
1321 |
+ - doinsert - insert a sop into the strip |
|
1322 |
+ */ |
|
1323 |
+static void |
|
1324 |
+doinsert(struct parse *p, sop op, size_t opnd, sopno pos) |
|
1325 |
+{ |
|
1326 |
+ sopno sn; |
|
1327 |
+ sop s; |
|
1328 |
+ int i; |
|
1329 |
+ |
|
1330 |
+ /* avoid making error situations worse */ |
|
1331 |
+ if (p->error != 0) |
|
1332 |
+ return; |
|
1333 |
+ |
|
1334 |
+ sn = HERE(); |
|
1335 |
+ EMIT(op, opnd); /* do checks, ensure space */ |
|
1336 |
+ assert(HERE() == sn+1); |
|
1337 |
+ s = p->strip[sn]; |
|
1338 |
+ |
|
1339 |
+ /* adjust paren pointers */ |
|
1340 |
+ assert(pos > 0); |
|
1341 |
+ for (i = 1; i < NPAREN; i++) { |
|
1342 |
+ if (p->pbegin[i] >= pos) { |
|
1343 |
+ p->pbegin[i]++; |
|
1344 |
+ } |
|
1345 |
+ if (p->pend[i] >= pos) { |
|
1346 |
+ p->pend[i]++; |
|
1347 |
+ } |
|
1348 |
+ } |
|
1349 |
+ |
|
1350 |
+ memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], |
|
1351 |
+ (HERE()-pos-1)*sizeof(sop)); |
|
1352 |
+ p->strip[pos] = s; |
|
1353 |
+} |
|
1354 |
+ |
|
1355 |
+/* |
|
1356 |
+ - dofwd - complete a forward reference |
|
1357 |
+ */ |
|
1358 |
+static void |
|
1359 |
+dofwd(struct parse *p, sopno pos, sop value) |
|
1360 |
+{ |
|
1361 |
+ /* avoid making error situations worse */ |
|
1362 |
+ if (p->error != 0) |
|
1363 |
+ return; |
|
1364 |
+ |
|
1365 |
+ assert(value < 1<<OPSHIFT); |
|
1366 |
+ p->strip[pos] = OP(p->strip[pos]) | value; |
|
1367 |
+} |
|
1368 |
+ |
|
1369 |
+/* |
|
1370 |
+ - enlarge - enlarge the strip |
|
1371 |
+ */ |
|
1372 |
+static void |
|
1373 |
+enlarge(struct parse *p, sopno size) |
|
1374 |
+{ |
|
1375 |
+ sop *sp; |
|
1376 |
+ |
|
1377 |
+ if (p->ssize >= size) |
|
1378 |
+ return; |
|
1379 |
+ |
|
1380 |
+ sp = (sop *)cli_realloc(p->strip, size*sizeof(sop)); |
|
1381 |
+ if (sp == NULL) { |
|
1382 |
+ SETERROR(REG_ESPACE); |
|
1383 |
+ return; |
|
1384 |
+ } |
|
1385 |
+ p->strip = sp; |
|
1386 |
+ p->ssize = size; |
|
1387 |
+} |
|
1388 |
+ |
|
1389 |
+/* |
|
1390 |
+ - stripsnug - compact the strip |
|
1391 |
+ */ |
|
1392 |
+static void |
|
1393 |
+stripsnug(struct parse *p, struct re_guts *g) |
|
1394 |
+{ |
|
1395 |
+ g->nstates = p->slen; |
|
1396 |
+ g->strip = (sop *)cli_realloc((char *)p->strip, p->slen * sizeof(sop)); |
|
1397 |
+ if (g->strip == NULL) { |
|
1398 |
+ SETERROR(REG_ESPACE); |
|
1399 |
+ g->strip = p->strip; |
|
1400 |
+ } |
|
1401 |
+} |
|
1402 |
+ |
|
1403 |
+/* |
|
1404 |
+ - findmust - fill in must and mlen with longest mandatory literal string |
|
1405 |
+ * |
|
1406 |
+ * This algorithm could do fancy things like analyzing the operands of | |
|
1407 |
+ * for common subsequences. Someday. This code is simple and finds most |
|
1408 |
+ * of the interesting cases. |
|
1409 |
+ * |
|
1410 |
+ * Note that must and mlen got initialized during setup. |
|
1411 |
+ */ |
|
1412 |
+static void |
|
1413 |
+findmust(struct parse *p, struct re_guts *g) |
|
1414 |
+{ |
|
1415 |
+ sop *scan; |
|
1416 |
+ sop *start; |
|
1417 |
+ sop *newstart; |
|
1418 |
+ sopno newlen; |
|
1419 |
+ sop s; |
|
1420 |
+ char *cp; |
|
1421 |
+ sopno i; |
|
1422 |
+ |
|
1423 |
+ /* avoid making error situations worse */ |
|
1424 |
+ if (p->error != 0) |
|
1425 |
+ return; |
|
1426 |
+ |
|
1427 |
+ /* find the longest OCHAR sequence in strip */ |
|
1428 |
+ newlen = 0; |
|
1429 |
+ scan = g->strip + 1; |
|
1430 |
+ do { |
|
1431 |
+ s = *scan++; |
|
1432 |
+ switch (OP(s)) { |
|
1433 |
+ case OCHAR: /* sequence member */ |
|
1434 |
+ if (newlen == 0) /* new sequence */ |
|
1435 |
+ newstart = scan - 1; |
|
1436 |
+ newlen++; |
|
1437 |
+ break; |
|
1438 |
+ case OPLUS_: /* things that don't break one */ |
|
1439 |
+ case OLPAREN: |
|
1440 |
+ case ORPAREN: |
|
1441 |
+ break; |
|
1442 |
+ case OQUEST_: /* things that must be skipped */ |
|
1443 |
+ case OCH_: |
|
1444 |
+ scan--; |
|
1445 |
+ do { |
|
1446 |
+ scan += OPND(s); |
|
1447 |
+ s = *scan; |
|
1448 |
+ /* assert() interferes w debug printouts */ |
|
1449 |
+ if (OP(s) != O_QUEST && OP(s) != O_CH && |
|
1450 |
+ OP(s) != OOR2) { |
|
1451 |
+ g->iflags |= BAD; |
|
1452 |
+ return; |
|
1453 |
+ } |
|
1454 |
+ } while (OP(s) != O_QUEST && OP(s) != O_CH); |
|
1455 |
+ /* fallthrough */ |
|
1456 |
+ default: /* things that break a sequence */ |
|
1457 |
+ if (newlen > g->mlen) { /* ends one */ |
|
1458 |
+ start = newstart; |
|
1459 |
+ g->mlen = newlen; |
|
1460 |
+ } |
|
1461 |
+ newlen = 0; |
|
1462 |
+ break; |
|
1463 |
+ } |
|
1464 |
+ } while (OP(s) != OEND); |
|
1465 |
+ |
|
1466 |
+ if (g->mlen == 0) /* there isn't one */ |
|
1467 |
+ return; |
|
1468 |
+ |
|
1469 |
+ /* turn it into a character string */ |
|
1470 |
+ g->must = cli_malloc((size_t)g->mlen + 1); |
|
1471 |
+ if (g->must == NULL) { /* argh; just forget it */ |
|
1472 |
+ g->mlen = 0; |
|
1473 |
+ return; |
|
1474 |
+ } |
|
1475 |
+ cp = g->must; |
|
1476 |
+ scan = start; |
|
1477 |
+ for (i = g->mlen; i > 0; i--) { |
|
1478 |
+ while (OP(s = *scan++) != OCHAR) |
|
1479 |
+ continue; |
|
1480 |
+ assert(cp < g->must + g->mlen); |
|
1481 |
+ *cp++ = (char)OPND(s); |
|
1482 |
+ } |
|
1483 |
+ assert(cp == g->must + g->mlen); |
|
1484 |
+ *cp++ = '\0'; /* just on general principles */ |
|
1485 |
+} |
|
1486 |
+ |
|
1487 |
+/* |
|
1488 |
+ - pluscount - count + nesting |
|
1489 |
+ */ |
|
1490 |
+static sopno /* nesting depth */ |
|
1491 |
+pluscount(struct parse *p, struct re_guts *g) |
|
1492 |
+{ |
|
1493 |
+ sop *scan; |
|
1494 |
+ sop s; |
|
1495 |
+ sopno plusnest = 0; |
|
1496 |
+ sopno maxnest = 0; |
|
1497 |
+ |
|
1498 |
+ if (p->error != 0) |
|
1499 |
+ return(0); /* there may not be an OEND */ |
|
1500 |
+ |
|
1501 |
+ scan = g->strip + 1; |
|
1502 |
+ do { |
|
1503 |
+ s = *scan++; |
|
1504 |
+ switch (OP(s)) { |
|
1505 |
+ case OPLUS_: |
|
1506 |
+ plusnest++; |
|
1507 |
+ break; |
|
1508 |
+ case O_PLUS: |
|
1509 |
+ if (plusnest > maxnest) |
|
1510 |
+ maxnest = plusnest; |
|
1511 |
+ plusnest--; |
|
1512 |
+ break; |
|
1513 |
+ } |
|
1514 |
+ } while (OP(s) != OEND); |
|
1515 |
+ if (plusnest != 0) |
|
1516 |
+ g->iflags |= BAD; |
|
1517 |
+ return(maxnest); |
|
1518 |
+} |
0 | 1519 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,132 @@ |
0 |
+/*- |
|
1 |
+ * This code is derived from OpenBSD's libc/regex, original license follows: |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
|
4 |
+ * Copyright (c) 1992, 1993, 1994 |
|
5 |
+ * The Regents of the University of California. All rights reserved. |
|
6 |
+ * |
|
7 |
+ * This code is derived from software contributed to Berkeley by |
|
8 |
+ * Henry Spencer. |
|
9 |
+ * |
|
10 |
+ * Redistribution and use in source and binary forms, with or without |
|
11 |
+ * modification, are permitted provided that the following conditions |
|
12 |
+ * are met: |
|
13 |
+ * 1. Redistributions of source code must retain the above copyright |
|
14 |
+ * notice, this list of conditions and the following disclaimer. |
|
15 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
16 |
+ * notice, this list of conditions and the following disclaimer in the |
|
17 |
+ * documentation and/or other materials provided with the distribution. |
|
18 |
+ * 3. Neither the name of the University nor the names of its contributors |
|
19 |
+ * may be used to endorse or promote products derived from this software |
|
20 |
+ * without specific prior written permission. |
|
21 |
+ * |
|
22 |
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
23 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
24 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
25 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
26 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
27 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
28 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
29 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
30 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
31 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
32 |
+ * SUCH DAMAGE. |
|
33 |
+ * |
|
34 |
+ * @(#)regerror.c 8.4 (Berkeley) 3/20/94 |
|
35 |
+ */ |
|
36 |
+ |
|
37 |
+#include <sys/types.h> |
|
38 |
+#include <stdio.h> |
|
39 |
+#include <string.h> |
|
40 |
+#include <ctype.h> |
|
41 |
+#include <limits.h> |
|
42 |
+#include <stdlib.h> |
|
43 |
+#include "others.h" |
|
44 |
+#include "regex.h" |
|
45 |
+ |
|
46 |
+#include "utils.h" |
|
47 |
+ |
|
48 |
+static const char *regatoi(const regex_t *, char *, int); |
|
49 |
+ |
|
50 |
+static struct rerr { |
|
51 |
+ int code; |
|
52 |
+ const char *name; |
|
53 |
+ const char *explain; |
|
54 |
+} rerrs[] = { |
|
55 |
+ { REG_NOMATCH, "REG_NOMATCH", "cli_regexec() failed to match" }, |
|
56 |
+ { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, |
|
57 |
+ { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, |
|
58 |
+ { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, |
|
59 |
+ { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" }, |
|
60 |
+ { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, |
|
61 |
+ { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" }, |
|
62 |
+ { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" }, |
|
63 |
+ { REG_EBRACE, "REG_EBRACE", "braces not balanced" }, |
|
64 |
+ { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, |
|
65 |
+ { REG_ERANGE, "REG_ERANGE", "invalid character range" }, |
|
66 |
+ { REG_ESPACE, "REG_ESPACE", "out of memory" }, |
|
67 |
+ { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" }, |
|
68 |
+ { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" }, |
|
69 |
+ { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, |
|
70 |
+ { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" }, |
|
71 |
+ { 0, "", "*** unknown regexp error code ***" } |
|
72 |
+}; |
|
73 |
+ |
|
74 |
+/* |
|
75 |
+ - cli_regerror - the interface to error numbers |
|
76 |
+ = extern size_t cli_regerror(int, const regex_t *, char *, size_t); |
|
77 |
+ */ |
|
78 |
+/* ARGSUSED */ |
|
79 |
+size_t |
|
80 |
+cli_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) |
|
81 |
+{ |
|
82 |
+ struct rerr *r; |
|
83 |
+ size_t len; |
|
84 |
+ int target = errcode &~ REG_ITOA; |
|
85 |
+ const char *s; |
|
86 |
+ char convbuf[50]; |
|
87 |
+ |
|
88 |
+ if (errcode == REG_ATOI) |
|
89 |
+ s = regatoi(preg, convbuf, sizeof convbuf); |
|
90 |
+ else { |
|
91 |
+ for (r = rerrs; r->code != 0; r++) |
|
92 |
+ if (r->code == target) |
|
93 |
+ break; |
|
94 |
+ |
|
95 |
+ if (errcode®_ITOA) { |
|
96 |
+ if (r->code != 0) { |
|
97 |
+ assert(strlen(r->name) < sizeof(convbuf)); |
|
98 |
+ (void) cli_strlcpy(convbuf, r->name, sizeof convbuf); |
|
99 |
+ } else |
|
100 |
+ (void)snprintf(convbuf, sizeof convbuf, |
|
101 |
+ "REG_0x%x", target); |
|
102 |
+ s = convbuf; |
|
103 |
+ } else |
|
104 |
+ s = r->explain; |
|
105 |
+ } |
|
106 |
+ |
|
107 |
+ len = strlen(s) + 1; |
|
108 |
+ if (errbuf_size > 0) { |
|
109 |
+ cli_strlcpy(errbuf, s, errbuf_size); |
|
110 |
+ } |
|
111 |
+ |
|
112 |
+ return(len); |
|
113 |
+} |
|
114 |
+ |
|
115 |
+/* |
|
116 |
+ - regatoi - internal routine to implement REG_ATOI |
|
117 |
+ */ |
|
118 |
+static const char * |
|
119 |
+regatoi(const regex_t *preg, char *localbuf, int localbufsize) |
|
120 |
+{ |
|
121 |
+ struct rerr *r; |
|
122 |
+ |
|
123 |
+ for (r = rerrs; r->code != 0; r++) |
|
124 |
+ if (strcmp(r->name, preg->re_endp) == 0) |
|
125 |
+ break; |
|
126 |
+ if (r->code == 0) |
|
127 |
+ return("0"); |
|
128 |
+ |
|
129 |
+ (void)snprintf(localbuf, localbufsize, "%d", r->code); |
|
130 |
+ return(localbuf); |
|
131 |
+} |
0 | 132 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,102 @@ |
0 |
+/*- |
|
1 |
+ * This code is derived from OpenBSD's libc/regex, original license follows: |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 1992 Henry Spencer. |
|
4 |
+ * Copyright (c) 1992, 1993 |
|
5 |
+ * The Regents of the University of California. All rights reserved. |
|
6 |
+ * |
|
7 |
+ * This code is derived from software contributed to Berkeley by |
|
8 |
+ * Henry Spencer of the University of Toronto. |
|
9 |
+ * |
|
10 |
+ * Redistribution and use in source and binary forms, with or without |
|
11 |
+ * modification, are permitted provided that the following conditions |
|
12 |
+ * are met: |
|
13 |
+ * 1. Redistributions of source code must retain the above copyright |
|
14 |
+ * notice, this list of conditions and the following disclaimer. |
|
15 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
16 |
+ * notice, this list of conditions and the following disclaimer in the |
|
17 |
+ * documentation and/or other materials provided with the distribution. |
|
18 |
+ * 3. Neither the name of the University nor the names of its contributors |
|
19 |
+ * may be used to endorse or promote products derived from this software |
|
20 |
+ * without specific prior written permission. |
|
21 |
+ * |
|
22 |
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
23 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
24 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
25 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
26 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
27 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
28 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
29 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
30 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
31 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
32 |
+ * SUCH DAMAGE. |
|
33 |
+ * |
|
34 |
+ * @(#)regex.h 8.1 (Berkeley) 6/2/93 |
|
35 |
+ */ |
|
36 |
+ |
|
37 |
+#ifndef _REGEX_H_ |
|
38 |
+#define _REGEX_H_ |
|
39 |
+ |
|
40 |
+#include <sys/types.h> |
|
41 |
+ |
|
42 |
+/* types */ |
|
43 |
+typedef off_t regoff_t; |
|
44 |
+ |
|
45 |
+typedef struct { |
|
46 |
+ int re_magic; |
|
47 |
+ size_t re_nsub; /* number of parenthesized subexpressions */ |
|
48 |
+ const char *re_endp; /* end pointer for REG_PEND */ |
|
49 |
+ struct re_guts *re_g; /* none of your business :-) */ |
|
50 |
+} regex_t; |
|
51 |
+ |
|
52 |
+typedef struct { |
|
53 |
+ regoff_t rm_so; /* start of match */ |
|
54 |
+ regoff_t rm_eo; /* end of match */ |
|
55 |
+} regmatch_t; |
|
56 |
+ |
|
57 |
+/* cli_regcomp() flags */ |
|
58 |
+#define REG_BASIC 0000 |
|
59 |
+#define REG_EXTENDED 0001 |
|
60 |
+#define REG_ICASE 0002 |
|
61 |
+#define REG_NOSUB 0004 |
|
62 |
+#define REG_NEWLINE 0010 |
|
63 |
+#define REG_NOSPEC 0020 |
|
64 |
+#define REG_PEND 0040 |
|
65 |
+#define REG_DUMP 0200 |
|
66 |
+ |
|
67 |
+/* cli_regerror() flags */ |
|
68 |
+#define REG_NOMATCH 1 |
|
69 |
+#define REG_BADPAT 2 |
|
70 |
+#define REG_ECOLLATE 3 |
|
71 |
+#define REG_ECTYPE 4 |
|
72 |
+#define REG_EESCAPE 5 |
|
73 |
+#define REG_ESUBREG 6 |
|
74 |
+#define REG_EBRACK 7 |
|
75 |
+#define REG_EPAREN 8 |
|
76 |
+#define REG_EBRACE 9 |
|
77 |
+#define REG_BADBR 10 |
|
78 |
+#define REG_ERANGE 11 |
|
79 |
+#define REG_ESPACE 12 |
|
80 |
+#define REG_BADRPT 13 |
|
81 |
+#define REG_EMPTY 14 |
|
82 |
+#define REG_ASSERT 15 |
|
83 |
+#define REG_INVARG 16 |
|
84 |
+#define REG_ATOI 255 /* convert name to number (!) */ |
|
85 |
+#define REG_ITOA 0400 /* convert number to name (!) */ |
|
86 |
+ |
|
87 |
+/* cli_regexec() flags */ |
|
88 |
+#define REG_NOTBOL 00001 |
|
89 |
+#define REG_NOTEOL 00002 |
|
90 |
+#define REG_STARTEND 00004 |
|
91 |
+#define REG_TRACE 00400 /* tracing of execution */ |
|
92 |
+#define REG_LARGE 01000 /* force large representation */ |
|
93 |
+#define REG_BACKR 02000 /* force use of backref code */ |
|
94 |
+ |
|
95 |
+int cli_regcomp(regex_t *, const char *, int); |
|
96 |
+size_t cli_regerror(int, const regex_t *, char *, size_t); |
|
97 |
+int cli_regexec(const regex_t *, const char *, size_t, regmatch_t [], int); |
|
98 |
+void cli_regfree(regex_t *); |
|
99 |
+size_t cli_strlcpy(char *dst, const char *src, size_t siz); |
|
100 |
+ |
|
101 |
+#endif /* !_REGEX_H_ */ |
0 | 102 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,157 @@ |
0 |
+/*- |
|
1 |
+ * This code is derived from OpenBSD's libc/regex, original license follows: |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
|
4 |
+ * Copyright (c) 1992, 1993, 1994 |
|
5 |
+ * The Regents of the University of California. All rights reserved. |
|
6 |
+ * |
|
7 |
+ * This code is derived from software contributed to Berkeley by |
|
8 |
+ * Henry Spencer. |
|
9 |
+ * |
|
10 |
+ * Redistribution and use in source and binary forms, with or without |
|
11 |
+ * modification, are permitted provided that the following conditions |
|
12 |
+ * are met: |
|
13 |
+ * 1. Redistributions of source code must retain the above copyright |
|
14 |
+ * notice, this list of conditions and the following disclaimer. |
|
15 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
16 |
+ * notice, this list of conditions and the following disclaimer in the |
|
17 |
+ * documentation and/or other materials provided with the distribution. |
|
18 |
+ * 3. Neither the name of the University nor the names of its contributors |
|
19 |
+ * may be used to endorse or promote products derived from this software |
|
20 |
+ * without specific prior written permission. |
|
21 |
+ * |
|
22 |
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
23 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
24 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
25 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
26 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
27 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
28 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
29 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
30 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
31 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
32 |
+ * SUCH DAMAGE. |
|
33 |
+ * |
|
34 |
+ * @(#)regex2.h 8.4 (Berkeley) 3/20/94 |
|
35 |
+ */ |
|
36 |
+ |
|
37 |
+/* |
|
38 |
+ * internals of regex_t |
|
39 |
+ */ |
|
40 |
+#define MAGIC1 ((('r'^0200)<<8) | 'e') |
|
41 |
+ |
|
42 |
+/* |
|
43 |
+ * The internal representation is a *strip*, a sequence of |
|
44 |
+ * operators ending with an endmarker. (Some terminology etc. is a |
|
45 |
+ * historical relic of earlier versions which used multiple strips.) |
|
46 |
+ * Certain oddities in the representation are there to permit running |
|
47 |
+ * the machinery backwards; in particular, any deviation from sequential |
|
48 |
+ * flow must be marked at both its source and its destination. Some |
|
49 |
+ * fine points: |
|
50 |
+ * |
|
51 |
+ * - OPLUS_ and O_PLUS are *inside* the loop they create. |
|
52 |
+ * - OQUEST_ and O_QUEST are *outside* the bypass they create. |
|
53 |
+ * - OCH_ and O_CH are *outside* the multi-way branch they create, while |
|
54 |
+ * OOR1 and OOR2 are respectively the end and the beginning of one of |
|
55 |
+ * the branches. Note that there is an implicit OOR2 following OCH_ |
|
56 |
+ * and an implicit OOR1 preceding O_CH. |
|
57 |
+ * |
|
58 |
+ * In state representations, an operator's bit is on to signify a state |
|
59 |
+ * immediately *preceding* "execution" of that operator. |
|
60 |
+ */ |
|
61 |
+typedef unsigned long sop; /* strip operator */ |
|
62 |
+typedef long sopno; |
|
63 |
+#define OPRMASK 0xf8000000LU |
|
64 |
+#define OPDMASK 0x07ffffffLU |
|
65 |
+#define OPSHIFT ((unsigned)27) |
|
66 |
+#define OP(n) ((n)&OPRMASK) |
|
67 |
+#define OPND(n) ((n)&OPDMASK) |
|
68 |
+#define SOP(op, opnd) ((op)|(opnd)) |
|
69 |
+/* operators meaning operand */ |
|
70 |
+/* (back, fwd are offsets) */ |
|
71 |
+#define OEND (1LU<<OPSHIFT) /* endmarker - */ |
|
72 |
+#define OCHAR (2LU<<OPSHIFT) /* character unsigned char */ |
|
73 |
+#define OBOL (3LU<<OPSHIFT) /* left anchor - */ |
|
74 |
+#define OEOL (4LU<<OPSHIFT) /* right anchor - */ |
|
75 |
+#define OANY (5LU<<OPSHIFT) /* . - */ |
|
76 |
+#define OANYOF (6LU<<OPSHIFT) /* [...] set number */ |
|
77 |
+#define OBACK_ (7LU<<OPSHIFT) /* begin \d paren number */ |
|
78 |
+#define O_BACK (8LU<<OPSHIFT) /* end \d paren number */ |
|
79 |
+#define OPLUS_ (9LU<<OPSHIFT) /* + prefix fwd to suffix */ |
|
80 |
+#define O_PLUS (10LU<<OPSHIFT) /* + suffix back to prefix */ |
|
81 |
+#define OQUEST_ (11LU<<OPSHIFT) /* ? prefix fwd to suffix */ |
|
82 |
+#define O_QUEST (12LU<<OPSHIFT) /* ? suffix back to prefix */ |
|
83 |
+#define OLPAREN (13LU<<OPSHIFT) /* ( fwd to ) */ |
|
84 |
+#define ORPAREN (14LU<<OPSHIFT) /* ) back to ( */ |
|
85 |
+#define OCH_ (15LU<<OPSHIFT) /* begin choice fwd to OOR2 */ |
|
86 |
+#define OOR1 (16LU<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ |
|
87 |
+#define OOR2 (17LU<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ |
|
88 |
+#define O_CH (18LU<<OPSHIFT) /* end choice back to OOR1 */ |
|
89 |
+#define OBOW (19LU<<OPSHIFT) /* begin word - */ |
|
90 |
+#define OEOW (20LU<<OPSHIFT) /* end word - */ |
|
91 |
+ |
|
92 |
+/* |
|
93 |
+ * Structure for [] character-set representation. Character sets are |
|
94 |
+ * done as bit vectors, grouped 8 to a byte vector for compactness. |
|
95 |
+ * The individual set therefore has both a pointer to the byte vector |
|
96 |
+ * and a mask to pick out the relevant bit of each byte. A hash code |
|
97 |
+ * simplifies testing whether two sets could be identical. |
|
98 |
+ * |
|
99 |
+ * This will get trickier for multicharacter collating elements. As |
|
100 |
+ * preliminary hooks for dealing with such things, we also carry along |
|
101 |
+ * a string of multi-character elements, and decide the size of the |
|
102 |
+ * vectors at run time. |
|
103 |
+ */ |
|
104 |
+typedef struct { |
|
105 |
+ uch *ptr; /* -> uch [csetsize] */ |
|
106 |
+ uch mask; /* bit within array */ |
|
107 |
+ uch hash; /* hash code */ |
|
108 |
+ size_t smultis; |
|
109 |
+ char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ |
|
110 |
+} cset; |
|
111 |
+/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ |
|
112 |
+#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) |
|
113 |
+#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) |
|
114 |
+#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) |
|
115 |
+#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* cli_regcomp() internal fns */ |
|
116 |
+#define MCsub(p, cs, cp) mcsub(p, cs, cp) |
|
117 |
+#define MCin(p, cs, cp) mcin(p, cs, cp) |
|
118 |
+ |
|
119 |
+/* stuff for character categories */ |
|
120 |
+typedef unsigned char cat_t; |
|
121 |
+ |
|
122 |
+/* |
|
123 |
+ * main compiled-expression structure |
|
124 |
+ */ |
|
125 |
+struct re_guts { |
|
126 |
+ int magic; |
|
127 |
+# define MAGIC2 ((('R'^0200)<<8)|'E') |
|
128 |
+ sop *strip; /* malloced area for strip */ |
|
129 |
+ int csetsize; /* number of bits in a cset vector */ |
|
130 |
+ int ncsets; /* number of csets in use */ |
|
131 |
+ cset *sets; /* -> cset [ncsets] */ |
|
132 |
+ uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ |
|
133 |
+ int cflags; /* copy of cli_regcomp() cflags argument */ |
|
134 |
+ sopno nstates; /* = number of sops */ |
|
135 |
+ sopno firststate; /* the initial OEND (normally 0) */ |
|
136 |
+ sopno laststate; /* the final OEND */ |
|
137 |
+ int iflags; /* internal flags */ |
|
138 |
+# define USEBOL 01 /* used ^ */ |
|
139 |
+# define USEEOL 02 /* used $ */ |
|
140 |
+# define BAD 04 /* something wrong */ |
|
141 |
+ int nbol; /* number of ^ used */ |
|
142 |
+ int neol; /* number of $ used */ |
|
143 |
+ int ncategories; /* how many character categories */ |
|
144 |
+ cat_t *categories; /* ->catspace[-CHAR_MIN] */ |
|
145 |
+ char *must; /* match must contain this string */ |
|
146 |
+ int mlen; /* length of must */ |
|
147 |
+ size_t nsub; /* copy of re_nsub */ |
|
148 |
+ int backrefs; /* does it use back references? */ |
|
149 |
+ sopno nplus; /* how deep does it nest +s? */ |
|
150 |
+ /* catspace must be last */ |
|
151 |
+ cat_t catspace[1]; /* actually [NC] */ |
|
152 |
+}; |
|
153 |
+ |
|
154 |
+/* misc utilities */ |
|
155 |
+#define OUT (CHAR_MAX+1) /* a non-character value */ |
|
156 |
+#define ISWORD(c) (isalnum(c) || (c) == '_') |
0 | 157 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,162 @@ |
0 |
+/*- |
|
1 |
+ * This code is derived from OpenBSD's libc/regex, original license follows: |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
|
4 |
+ * Copyright (c) 1992, 1993, 1994 |
|
5 |
+ * The Regents of the University of California. All rights reserved. |
|
6 |
+ * |
|
7 |
+ * This code is derived from software contributed to Berkeley by |
|
8 |
+ * Henry Spencer. |
|
9 |
+ * |
|
10 |
+ * Redistribution and use in source and binary forms, with or without |
|
11 |
+ * modification, are permitted provided that the following conditions |
|
12 |
+ * are met: |
|
13 |
+ * 1. Redistributions of source code must retain the above copyright |
|
14 |
+ * notice, this list of conditions and the following disclaimer. |
|
15 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
16 |
+ * notice, this list of conditions and the following disclaimer in the |
|
17 |
+ * documentation and/or other materials provided with the distribution. |
|
18 |
+ * 3. Neither the name of the University nor the names of its contributors |
|
19 |
+ * may be used to endorse or promote products derived from this software |
|
20 |
+ * without specific prior written permission. |
|
21 |
+ * |
|
22 |
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
23 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
24 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
25 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
26 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
27 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
28 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
29 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
30 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
31 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
32 |
+ * SUCH DAMAGE. |
|
33 |
+ * |
|
34 |
+ * @(#)regexec.c 8.3 (Berkeley) 3/20/94 |
|
35 |
+ */ |
|
36 |
+ |
|
37 |
+/* |
|
38 |
+ * the outer shell of cli_regexec() |
|
39 |
+ * |
|
40 |
+ * This file includes engine.c *twice*, after muchos fiddling with the |
|
41 |
+ * macros that code uses. This lets the same code operate on two different |
|
42 |
+ * representations for state sets. |
|
43 |
+ */ |
|
44 |
+#include <sys/types.h> |
|
45 |
+#include <stdio.h> |
|
46 |
+#include <stdlib.h> |
|
47 |
+#include <string.h> |
|
48 |
+#include <limits.h> |
|
49 |
+#include <ctype.h> |
|
50 |
+#include "others.h" |
|
51 |
+#include "regex.h" |
|
52 |
+ |
|
53 |
+#include "utils.h" |
|
54 |
+#include "regex2.h" |
|
55 |
+ |
|
56 |
+/* macros for manipulating states, small version */ |
|
57 |
+#define states long |
|
58 |
+#define states1 states /* for later use in cli_regexec() decision */ |
|
59 |
+#define CLEAR(v) ((v) = 0) |
|
60 |
+#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) |
|
61 |
+#define SET1(v, n) ((v) |= (unsigned long)1 << (n)) |
|
62 |
+#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0) |
|
63 |
+#define ASSIGN(d, s) ((d) = (s)) |
|
64 |
+#define EQ(a, b) ((a) == (b)) |
|
65 |
+#define STATEVARS long dummy /* dummy version */ |
|
66 |
+#define STATESETUP(m, n) /* nothing */ |
|
67 |
+#define STATETEARDOWN(m) /* nothing */ |
|
68 |
+#define SETUP(v) ((v) = 0) |
|
69 |
+#define onestate long |
|
70 |
+#define INIT(o, n) ((o) = (unsigned long)1 << (n)) |
|
71 |
+#define INC(o) ((o) <<= 1) |
|
72 |
+#define ISSTATEIN(v, o) (((v) & (o)) != 0) |
|
73 |
+/* some abbreviations; note that some of these know variable names! */ |
|
74 |
+/* do "if I'm here, I can also be there" etc without branches */ |
|
75 |
+#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) |
|
76 |
+#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) |
|
77 |
+#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) |
|
78 |
+/* function names */ |
|
79 |
+#define SNAMES /* engine.c looks after details */ |
|
80 |
+ |
|
81 |
+#include "engine.c" |
|
82 |
+ |
|
83 |
+/* now undo things */ |
|
84 |
+#undef states |
|
85 |
+#undef CLEAR |
|
86 |
+#undef SET0 |
|
87 |
+#undef SET1 |
|
88 |
+#undef ISSET |
|
89 |
+#undef ASSIGN |
|
90 |
+#undef EQ |
|
91 |
+#undef STATEVARS |
|
92 |
+#undef STATESETUP |
|
93 |
+#undef STATETEARDOWN |
|
94 |
+#undef SETUP |
|
95 |
+#undef onestate |
|
96 |
+#undef INIT |
|
97 |
+#undef INC |
|
98 |
+#undef ISSTATEIN |
|
99 |
+#undef FWD |
|
100 |
+#undef BACK |
|
101 |
+#undef ISSETBACK |
|
102 |
+#undef SNAMES |
|
103 |
+ |
|
104 |
+/* macros for manipulating states, large version */ |
|
105 |
+#define states char * |
|
106 |
+#define CLEAR(v) memset(v, 0, m->g->nstates) |
|
107 |
+#define SET0(v, n) ((v)[n] = 0) |
|
108 |
+#define SET1(v, n) ((v)[n] = 1) |
|
109 |
+#define ISSET(v, n) ((v)[n]) |
|
110 |
+#define ASSIGN(d, s) memmove(d, s, m->g->nstates) |
|
111 |
+#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) |
|
112 |
+#define STATEVARS long vn; char *space |
|
113 |
+#define STATESETUP(m, nv) { (m)->space = cli_malloc((nv)*(m)->g->nstates); \ |
|
114 |
+ if ((m)->space == NULL) return(REG_ESPACE); \ |
|
115 |
+ (m)->vn = 0; } |
|
116 |
+#define STATETEARDOWN(m) { free((m)->space); } |
|
117 |
+#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) |
|
118 |
+#define onestate long |
|
119 |
+#define INIT(o, n) ((o) = (n)) |
|
120 |
+#define INC(o) ((o)++) |
|
121 |
+#define ISSTATEIN(v, o) ((v)[o]) |
|
122 |
+/* some abbreviations; note that some of these know variable names! */ |
|
123 |
+/* do "if I'm here, I can also be there" etc without branches */ |
|
124 |
+#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) |
|
125 |
+#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) |
|
126 |
+#define ISSETBACK(v, n) ((v)[here - (n)]) |
|
127 |
+/* function names */ |
|
128 |
+#define LNAMES /* flag */ |
|
129 |
+ |
|
130 |
+#include "engine.c" |
|
131 |
+ |
|
132 |
+/* |
|
133 |
+ - cli_regexec - interface for matching |
|
134 |
+ * |
|
135 |
+ * We put this here so we can exploit knowledge of the state representation |
|
136 |
+ * when choosing which matcher to call. Also, by this point the matchers |
|
137 |
+ * have been prototyped. |
|
138 |
+ */ |
|
139 |
+int /* 0 success, REG_NOMATCH failure */ |
|
140 |
+cli_regexec(const regex_t *preg, const char *string, size_t nmatch, |
|
141 |
+ regmatch_t pmatch[], int eflags) |
|
142 |
+{ |
|
143 |
+ struct re_guts *g = preg->re_g; |
|
144 |
+#ifdef REDEBUG |
|
145 |
+# define GOODFLAGS(f) (f) |
|
146 |
+#else |
|
147 |
+# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) |
|
148 |
+#endif |
|
149 |
+ |
|
150 |
+ if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) |
|
151 |
+ return(REG_BADPAT); |
|
152 |
+ assert(!(g->iflags&BAD)); |
|
153 |
+ if (g->iflags&BAD) /* backstop for no-debug case */ |
|
154 |
+ return(REG_BADPAT); |
|
155 |
+ eflags = GOODFLAGS(eflags); |
|
156 |
+ |
|
157 |
+ if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) |
|
158 |
+ return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); |
|
159 |
+ else |
|
160 |
+ return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); |
|
161 |
+} |
0 | 162 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,73 @@ |
0 |
+/*- |
|
1 |
+ * This code is derived from OpenBSD's libc/regex, original license follows: |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
|
4 |
+ * Copyright (c) 1992, 1993, 1994 |
|
5 |
+ * The Regents of the University of California. All rights reserved. |
|
6 |
+ * |
|
7 |
+ * This code is derived from software contributed to Berkeley by |
|
8 |
+ * Henry Spencer. |
|
9 |
+ * |
|
10 |
+ * Redistribution and use in source and binary forms, with or without |
|
11 |
+ * modification, are permitted provided that the following conditions |
|
12 |
+ * are met: |
|
13 |
+ * 1. Redistributions of source code must retain the above copyright |
|
14 |
+ * notice, this list of conditions and the following disclaimer. |
|
15 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
16 |
+ * notice, this list of conditions and the following disclaimer in the |
|
17 |
+ * documentation and/or other materials provided with the distribution. |
|
18 |
+ * 3. Neither the name of the University nor the names of its contributors |
|
19 |
+ * may be used to endorse or promote products derived from this software |
|
20 |
+ * without specific prior written permission. |
|
21 |
+ * |
|
22 |
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
23 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
24 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
25 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
26 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
27 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
28 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
29 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
30 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
31 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
32 |
+ * SUCH DAMAGE. |
|
33 |
+ * |
|
34 |
+ * @(#)regfree.c 8.3 (Berkeley) 3/20/94 |
|
35 |
+ */ |
|
36 |
+ |
|
37 |
+#include <sys/types.h> |
|
38 |
+#include <stdio.h> |
|
39 |
+#include <stdlib.h> |
|
40 |
+#include "others.h" |
|
41 |
+#include "regex.h" |
|
42 |
+ |
|
43 |
+#include "utils.h" |
|
44 |
+#include "regex2.h" |
|
45 |
+ |
|
46 |
+/* |
|
47 |
+ - cli_regfree - free everything |
|
48 |
+ */ |
|
49 |
+void |
|
50 |
+cli_regfree(regex_t *preg) |
|
51 |
+{ |
|
52 |
+ struct re_guts *g; |
|
53 |
+ |
|
54 |
+ if (preg->re_magic != MAGIC1) /* oops */ |
|
55 |
+ return; /* nice to complain, but hard */ |
|
56 |
+ |
|
57 |
+ g = preg->re_g; |
|
58 |
+ if (g == NULL || g->magic != MAGIC2) /* oops again */ |
|
59 |
+ return; |
|
60 |
+ preg->re_magic = 0; /* mark it invalid */ |
|
61 |
+ g->magic = 0; /* mark it invalid */ |
|
62 |
+ |
|
63 |
+ if (g->strip != NULL) |
|
64 |
+ free((char *)g->strip); |
|
65 |
+ if (g->sets != NULL) |
|
66 |
+ free((char *)g->sets); |
|
67 |
+ if (g->setbits != NULL) |
|
68 |
+ free((char *)g->setbits); |
|
69 |
+ if (g->must != NULL) |
|
70 |
+ free(g->must); |
|
71 |
+ free((char *)g); |
|
72 |
+} |
0 | 73 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,52 @@ |
0 |
+/* |
|
1 |
+ * This code is derived from OpenBSD's libc, original license follows: |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> |
|
4 |
+ * |
|
5 |
+ * Permission to use, copy, modify, and distribute this software for any |
|
6 |
+ * purpose with or without fee is hereby granted, provided that the above |
|
7 |
+ * copyright notice and this permission notice appear in all copies. |
|
8 |
+ * |
|
9 |
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
|
10 |
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
|
11 |
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
|
12 |
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
|
13 |
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
|
14 |
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
|
15 |
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include <sys/types.h> |
|
19 |
+#include <string.h> |
|
20 |
+ |
|
21 |
+#include "regex.h" |
|
22 |
+/* |
|
23 |
+ * Copy src to string dst of size siz. At most siz-1 characters |
|
24 |
+ * will be copied. Always NUL terminates (unless siz == 0). |
|
25 |
+ * Returns strlen(src); if retval >= siz, truncation occurred. |
|
26 |
+ */ |
|
27 |
+size_t |
|
28 |
+cli_strlcpy(char *dst, const char *src, size_t siz) |
|
29 |
+{ |
|
30 |
+ char *d = dst; |
|
31 |
+ const char *s = src; |
|
32 |
+ size_t n = siz; |
|
33 |
+ |
|
34 |
+ /* Copy as many bytes as will fit */ |
|
35 |
+ if (n != 0) { |
|
36 |
+ while (--n != 0) { |
|
37 |
+ if ((*d++ = *s++) == '\0') |
|
38 |
+ break; |
|
39 |
+ } |
|
40 |
+ } |
|
41 |
+ |
|
42 |
+ /* Not enough room in dst, add NUL and traverse rest of src */ |
|
43 |
+ if (n == 0) { |
|
44 |
+ if (siz != 0) |
|
45 |
+ *d = '\0'; /* NUL-terminate dst */ |
|
46 |
+ while (*s++) |
|
47 |
+ ; |
|
48 |
+ } |
|
49 |
+ |
|
50 |
+ return(s - src - 1); /* count does not include NUL */ |
|
51 |
+} |
0 | 52 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,59 @@ |
0 |
+/*- |
|
1 |
+ * This code is derived from OpenBSD's libc/regex, original license follows: |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
|
4 |
+ * Copyright (c) 1992, 1993, 1994 |
|
5 |
+ * The Regents of the University of California. All rights reserved. |
|
6 |
+ * |
|
7 |
+ * This code is derived from software contributed to Berkeley by |
|
8 |
+ * Henry Spencer. |
|
9 |
+ * |
|
10 |
+ * Redistribution and use in source and binary forms, with or without |
|
11 |
+ * modification, are permitted provided that the following conditions |
|
12 |
+ * are met: |
|
13 |
+ * 1. Redistributions of source code must retain the above copyright |
|
14 |
+ * notice, this list of conditions and the following disclaimer. |
|
15 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
16 |
+ * notice, this list of conditions and the following disclaimer in the |
|
17 |
+ * documentation and/or other materials provided with the distribution. |
|
18 |
+ * 3. Neither the name of the University nor the names of its contributors |
|
19 |
+ * may be used to endorse or promote products derived from this software |
|
20 |
+ * without specific prior written permission. |
|
21 |
+ * |
|
22 |
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
23 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
24 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
25 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
26 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
27 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
28 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
29 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
30 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
31 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
32 |
+ * SUCH DAMAGE. |
|
33 |
+ * |
|
34 |
+ * @(#)utils.h 8.3 (Berkeley) 3/20/94 |
|
35 |
+ */ |
|
36 |
+ |
|
37 |
+/* utility definitions */ |
|
38 |
+#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */ |
|
39 |
+#define INFINITY (DUPMAX + 1) |
|
40 |
+#define NC (CHAR_MAX - CHAR_MIN + 1) |
|
41 |
+typedef unsigned char uch; |
|
42 |
+ |
|
43 |
+/* switch off assertions (if not already off) if no REDEBUG */ |
|
44 |
+#ifdef CL_DEBUG |
|
45 |
+#define REDEBUG |
|
46 |
+#endif |
|
47 |
+ |
|
48 |
+#ifndef REDEBUG |
|
49 |
+#ifndef NDEBUG |
|
50 |
+#define NDEBUG /* no assertions please */ |
|
51 |
+#endif |
|
52 |
+#endif |
|
53 |
+#include <assert.h> |
|
54 |
+ |
|
55 |
+/* for old systems with bcopy() but no memmove() */ |
|
56 |
+#ifdef USEBCOPY |
|
57 |
+#define memmove(d, s, c) bcopy(s, d, c) |
|
58 |
+#endif |
... | ... |
@@ -52,9 +52,7 @@ |
52 | 52 |
#include <limits.h> |
53 | 53 |
#include <sys/types.h> |
54 | 54 |
|
55 |
-#ifdef HAVE_REGEX_H |
|
56 |
-#include <regex.h> |
|
57 |
-#endif |
|
55 |
+#include "regex/regex.h" |
|
58 | 56 |
|
59 | 57 |
|
60 | 58 |
#include "clamav.h" |
... | ... |
@@ -357,7 +355,6 @@ static struct tree_node* stack_pop(struct node_stack* stack) |
357 | 357 |
} |
358 | 358 |
|
359 | 359 |
/* Initialization & loading */ |
360 |
- |
|
361 | 360 |
/* Initializes @matcher, allocating necesarry substructures */ |
362 | 361 |
int init_regex_list(struct regex_matcher* matcher) |
363 | 362 |
{ |
... | ... |
@@ -1194,7 +1191,7 @@ static int add_pattern(struct regex_matcher* matcher,const unsigned char* pat,co |
1194 | 1194 |
preg=cli_malloc(sizeof(*preg)); |
1195 | 1195 |
if(!preg) |
1196 | 1196 |
return CL_EMEM; |
1197 |
- rc = regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL)); |
|
1197 |
+ rc = cli_regcomp(preg,(const char*)token.u.start,REG_EXTENDED|(bol?0:REG_NOTBOL)); |
|
1198 | 1198 |
leaf->preg=preg; |
1199 | 1199 |
if(rc) |
1200 | 1200 |
return rc; |
... | ... |
@@ -1275,7 +1272,7 @@ static int match_node(struct tree_node* node,const unsigned char* c,size_t len,c |
1275 | 1275 |
const struct leaf_info* leaf = node->u.leaf; |
1276 | 1276 |
/*isleaf = 1;*/ |
1277 | 1277 |
if(leaf->preg) { |
1278 |
- rc = !regexec(leaf->preg,(const char*)c,0,NULL,0); |
|
1278 |
+ rc = !cli_regexec(leaf->preg,(const char*)c,0,NULL,0); |
|
1279 | 1279 |
} |
1280 | 1280 |
else { |
1281 | 1281 |
massert(*c==node->c && "We know this has to match[2]"); |
... | ... |
@@ -1394,7 +1391,7 @@ static void destroy_tree_internal(struct regex_matcher* matcher,struct tree_node |
1394 | 1394 |
stack_push_once(&matcher->node_stack,(struct tree_node*)node->u.leaf);/* cast to make compiler happy, and to not make another stack implementation for storing void* */ |
1395 | 1395 |
stack_push_once(&matcher->node_stack,node); |
1396 | 1396 |
if(leaf->preg) { |
1397 |
- regfree(leaf->preg); |
|
1397 |
+ cli_regfree(leaf->preg); |
|
1398 | 1398 |
free(leaf->preg); |
1399 | 1399 |
leaf->preg=NULL; |
1400 | 1400 |
} |