Browse code

add Javascript normalizer (not yet enabled)

git-svn: trunk@3917

Török Edvin authored on 2008/07/08 04:10:50
Showing 30 changed files
... ...
@@ -1,3 +1,7 @@
1
+Mon Jul  7 21:42:18 EEST 2008 (edwin)
2
+-------------------------------------
3
+  * build system, libclamav/jsparse: add Javascript normalizer (not yet enabled)
4
+
1 5
 Mon Jul  7 15:41:02 CEST 2008 (tk)
2 6
 ----------------------------------
3 7
   * libclamav/ole2_extract.c, sigtool: make sigtool compatible with the new
... ...
@@ -150,6 +150,7 @@ EGREP = @EGREP@
150 150
 EXEEXT = @EXEEXT@
151 151
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
152 152
 GETENT = @GETENT@
153
+GPERF = @GPERF@
153 154
 GREP = @GREP@
154 155
 HAVE_LIBGMP = @HAVE_LIBGMP@
155 156
 INSTALL = @INSTALL@
... ...
@@ -132,6 +132,7 @@ EGREP = @EGREP@
132 132
 EXEEXT = @EXEEXT@
133 133
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
134 134
 GETENT = @GETENT@
135
+GPERF = @GPERF@
135 136
 GREP = @GREP@
136 137
 HAVE_LIBGMP = @HAVE_LIBGMP@
137 138
 INSTALL = @INSTALL@
... ...
@@ -117,6 +117,7 @@ EGREP = @EGREP@
117 117
 EXEEXT = @EXEEXT@
118 118
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
119 119
 GETENT = @GETENT@
120
+GPERF = @GPERF@
120 121
 GREP = @GREP@
121 122
 HAVE_LIBGMP = @HAVE_LIBGMP@
122 123
 INSTALL = @INSTALL@
... ...
@@ -137,6 +137,7 @@ EGREP = @EGREP@
137 137
 EXEEXT = @EXEEXT@
138 138
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
139 139
 GETENT = @GETENT@
140
+GPERF = @GPERF@
140 141
 GREP = @GREP@
141 142
 HAVE_LIBGMP = @HAVE_LIBGMP@
142 143
 INSTALL = @INSTALL@
... ...
@@ -126,6 +126,7 @@ EGREP = @EGREP@
126 126
 EXEEXT = @EXEEXT@
127 127
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
128 128
 GETENT = @GETENT@
129
+GPERF = @GPERF@
129 130
 GREP = @GREP@
130 131
 HAVE_LIBGMP = @HAVE_LIBGMP@
131 132
 INSTALL = @INSTALL@
... ...
@@ -119,6 +119,7 @@ EGREP = @EGREP@
119 119
 EXEEXT = @EXEEXT@
120 120
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
121 121
 GETENT = @GETENT@
122
+GPERF = @GPERF@
122 123
 GREP = @GREP@
123 124
 HAVE_LIBGMP = @HAVE_LIBGMP@
124 125
 INSTALL = @INSTALL@
... ...
@@ -867,6 +867,7 @@ LIBTOOL
867 867
 VERSIONSCRIPTFLAG
868 868
 VERSIONSCRIPT_TRUE
869 869
 VERSIONSCRIPT_FALSE
870
+GPERF
870 871
 LCHECK
871 872
 MAINTAINER_MODE_TRUE
872 873
 MAINTAINER_MODE_FALSE
... ...
@@ -4767,7 +4768,7 @@ ia64-*-hpux*)
4767 4767
   ;;
4768 4768
 *-*-irix6*)
4769 4769
   # Find out which ABI we are using.
4770
-  echo '#line 4770 "configure"' > conftest.$ac_ext
4770
+  echo '#line 4771 "configure"' > conftest.$ac_ext
4771 4771
   if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
4772 4772
   (eval $ac_compile) 2>&5
4773 4773
   ac_status=$?
... ...
@@ -6857,11 +6858,11 @@ else
6857 6857
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
6858 6858
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
6859 6859
    -e 's:$: $lt_compiler_flag:'`
6860
-   (eval echo "\"\$as_me:6860: $lt_compile\"" >&5)
6860
+   (eval echo "\"\$as_me:6861: $lt_compile\"" >&5)
6861 6861
    (eval "$lt_compile" 2>conftest.err)
6862 6862
    ac_status=$?
6863 6863
    cat conftest.err >&5
6864
-   echo "$as_me:6864: \$? = $ac_status" >&5
6864
+   echo "$as_me:6865: \$? = $ac_status" >&5
6865 6865
    if (exit $ac_status) && test -s "$ac_outfile"; then
6866 6866
      # The compiler can only warn and ignore the option if not recognized
6867 6867
      # So say no if there are warnings other than the usual output.
... ...
@@ -7147,11 +7148,11 @@ else
7147 7147
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
7148 7148
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
7149 7149
    -e 's:$: $lt_compiler_flag:'`
7150
-   (eval echo "\"\$as_me:7150: $lt_compile\"" >&5)
7150
+   (eval echo "\"\$as_me:7151: $lt_compile\"" >&5)
7151 7151
    (eval "$lt_compile" 2>conftest.err)
7152 7152
    ac_status=$?
7153 7153
    cat conftest.err >&5
7154
-   echo "$as_me:7154: \$? = $ac_status" >&5
7154
+   echo "$as_me:7155: \$? = $ac_status" >&5
7155 7155
    if (exit $ac_status) && test -s "$ac_outfile"; then
7156 7156
      # The compiler can only warn and ignore the option if not recognized
7157 7157
      # So say no if there are warnings other than the usual output.
... ...
@@ -7251,11 +7252,11 @@ else
7251 7251
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
7252 7252
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
7253 7253
    -e 's:$: $lt_compiler_flag:'`
7254
-   (eval echo "\"\$as_me:7254: $lt_compile\"" >&5)
7254
+   (eval echo "\"\$as_me:7255: $lt_compile\"" >&5)
7255 7255
    (eval "$lt_compile" 2>out/conftest.err)
7256 7256
    ac_status=$?
7257 7257
    cat out/conftest.err >&5
7258
-   echo "$as_me:7258: \$? = $ac_status" >&5
7258
+   echo "$as_me:7259: \$? = $ac_status" >&5
7259 7259
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
7260 7260
    then
7261 7261
      # The compiler can only warn and ignore the option if not recognized
... ...
@@ -9628,7 +9629,7 @@ else
9628 9628
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
9629 9629
   lt_status=$lt_dlunknown
9630 9630
   cat > conftest.$ac_ext <<EOF
9631
-#line 9631 "configure"
9631
+#line 9632 "configure"
9632 9632
 #include "confdefs.h"
9633 9633
 
9634 9634
 #if HAVE_DLFCN_H
... ...
@@ -9728,7 +9729,7 @@ else
9728 9728
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
9729 9729
   lt_status=$lt_dlunknown
9730 9730
   cat > conftest.$ac_ext <<EOF
9731
-#line 9731 "configure"
9731
+#line 9732 "configure"
9732 9732
 #include "confdefs.h"
9733 9733
 
9734 9734
 #if HAVE_DLFCN_H
... ...
@@ -11570,6 +11571,12 @@ else
11570 11570
 fi
11571 11571
 
11572 11572
 
11573
+# it is not fatal if gperf is missing
11574
+
11575
+GPERF=${GPERF-"${am_missing_run}gperf"}
11576
+
11577
+
11578
+
11573 11579
 
11574 11580
 
11575 11581
 
... ...
@@ -20179,6 +20186,7 @@ LIBTOOL!$LIBTOOL$ac_delim
20179 20179
 VERSIONSCRIPTFLAG!$VERSIONSCRIPTFLAG$ac_delim
20180 20180
 VERSIONSCRIPT_TRUE!$VERSIONSCRIPT_TRUE$ac_delim
20181 20181
 VERSIONSCRIPT_FALSE!$VERSIONSCRIPT_FALSE$ac_delim
20182
+GPERF!$GPERF$ac_delim
20182 20183
 LCHECK!$LCHECK$ac_delim
20183 20184
 MAINTAINER_MODE_TRUE!$MAINTAINER_MODE_TRUE$ac_delim
20184 20185
 MAINTAINER_MODE_FALSE!$MAINTAINER_MODE_FALSE$ac_delim
... ...
@@ -20212,7 +20220,7 @@ LIBOBJS!$LIBOBJS$ac_delim
20212 20212
 LTLIBOBJS!$LTLIBOBJS$ac_delim
20213 20213
 _ACEOF
20214 20214
 
20215
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 36; then
20215
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 37; then
20216 20216
     break
20217 20217
   elif $ac_last_try; then
20218 20218
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
... ...
@@ -289,6 +289,10 @@ EOF
289 289
 AC_SUBST([VERSIONSCRIPTFLAG])
290 290
 AM_CONDITIONAL([VERSIONSCRIPT], test "x$ac_cv_ld_version_script" = "xyes")
291 291
 
292
+# it is not fatal if gperf is missing
293
+AM_MISSING_PROG(GPERF, gperf)
294
+AC_SUBST(GPERF)
295
+
292 296
 AC_CHECK_HEADERS([stdint.h unistd.h sys/int_types.h dlfcn.h inttypes.h sys/inttypes.h memory.h ndir.h stdlib.h strings.h string.h sys/mman.h sys/param.h sys/stat.h sys/types.h malloc.h poll.h limits.h sys/filio.h sys/uio.h termios.h stdbool.h pwd.h grp.h])
293 297
 AC_CHECK_HEADER([syslog.h],AC_DEFINE([USE_SYSLOG],1,[use syslog]),)
294 298
 
... ...
@@ -96,6 +96,7 @@ EGREP = @EGREP@
96 96
 EXEEXT = @EXEEXT@
97 97
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
98 98
 GETENT = @GETENT@
99
+GPERF = @GPERF@
99 100
 GREP = @GREP@
100 101
 HAVE_LIBGMP = @HAVE_LIBGMP@
101 102
 INSTALL = @INSTALL@
... ...
@@ -101,6 +101,7 @@ EGREP = @EGREP@
101 101
 EXEEXT = @EXEEXT@
102 102
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
103 103
 GETENT = @GETENT@
104
+GPERF = @GPERF@
104 105
 GREP = @GREP@
105 106
 HAVE_LIBGMP = @HAVE_LIBGMP@
106 107
 INSTALL = @INSTALL@
... ...
@@ -94,6 +94,7 @@ EGREP = @EGREP@
94 94
 EXEEXT = @EXEEXT@
95 95
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
96 96
 GETENT = @GETENT@
97
+GPERF = @GPERF@
97 98
 GREP = @GREP@
98 99
 HAVE_LIBGMP = @HAVE_LIBGMP@
99 100
 INSTALL = @INSTALL@
... ...
@@ -122,6 +122,7 @@ EGREP = @EGREP@
122 122
 EXEEXT = @EXEEXT@
123 123
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
124 124
 GETENT = @GETENT@
125
+GPERF = @GPERF@
125 126
 GREP = @GREP@
126 127
 HAVE_LIBGMP = @HAVE_LIBGMP@
127 128
 INSTALL = @INSTALL@
... ...
@@ -188,7 +188,10 @@ libclamav_la_SOURCES = \
188 188
 	textnorm.c \
189 189
 	textnorm.h \
190 190
 	dlp.c \
191
-	dlp.h
191
+	dlp.h \
192
+	jsparse/js-norm.c \
193
+	jsparse/js-norm.h \
194
+	jsparse/lexglobal.h
192 195
 
193 196
 libclamav_internal_utils_la_SOURCES=str.c \
194 197
 				    str.h \
... ...
@@ -199,4 +202,33 @@ libclamav_internal_utils_la_LDFLAGS=-static
199 199
 libclamav_internal_utils_la_CFLAGS=-DCLI_MEMFUNSONLY
200 200
 lib_LTLIBRARIES = libclamav.la
201 201
 noinst_LTLIBRARIES = libclamav_internal_utils.la
202
-EXTRA_DIST = regex/engine.c libclamav.map
202
+EXTRA_DIST = regex/engine.c libclamav.map jsparse-keywords.gperf \
203
+	     jsparse-operators.h jsparse-keywords.h jsparse/future_reserved_words\
204
+	     jsparse/keywords jsparse/special_keywords jsparse/operators.gperf
205
+
206
+if MAINTAINER_MODE
207
+BUILT_SOURCES=jsparse/generated/operators.h jsparse/generated/keywords.h jsparse-keywords.gperf
208
+
209
+GPERF_FLAGS=-E -t -L ANSI-C -C -F ', TOK_ERROR' -c
210
+
211
+jsparse-keywords.gperf: jsparse/keywords.list jsparse/future_reserved_words.list jsparse/special_keywords.list
212
+	echo -e "struct keyword { const char *name; int val; };\n%%" >keywords-g-tmp
213
+	for i in `cat @srcdir@/jsparse/keywords.list`; do j=`echo $$i |tr \[a-z\] \[A-Z\]`; echo "$$i, TOK_$$j" >>keywords-g-tmp; done
214
+	for i in `cat @srcdir@/jsparse/future_reserved_words.list`; do echo "$$i, TOK_FUTURE_RESERVED_WORD" >>keywords-g-tmp; done
215
+	cat @srcdir@/jsparse/special_keywords.list >>keywords-g-tmp
216
+	mv keywords-g-tmp $@
217
+
218
+jsparse/generated/operators.h: jsparse/operators.gperf
219
+	$(GPERF) $(GPERF_FLAGS) -H op_hash -N in_op_set -W oplist $< >operators-tmp-g
220
+	grep -v '^#line' <operators-tmp-g | sed -e 's/^const struct/static const struct/' -e 's/register //g' >operators-tmp
221
+	rm operators-tmp-g
222
+	mv operators-tmp @srcdir@/jsparse/generated/operators.h
223
+
224
+jsparse/generated/keywords.h: jsparse-keywords.gperf
225
+	$(GPERF) $(GPERF_FLAGS) $< >keywords-tmp-g
226
+	grep -v '^#line' <keywords-tmp-g |  sed -e 's/^const struct/static const struct/' -e 's/register //g' >keywords-tmp
227
+	rm keywords-tmp-g
228
+	mv keywords-tmp @srcdir@/jsparse/generated/keywords.h
229
+
230
+CLEANFILES=jsparse-keywords.gperf @srcdir@/jsparse/generated/operators.h @srcdir@/jsparse/generated/keywords.h
231
+endif
... ...
@@ -89,7 +89,7 @@ am_libclamav_la_OBJECTS = matcher-ac.lo matcher-bm.lo matcher.lo \
89 89
 	infblock.lo pdf.lo spin.lo yc.lo elf.lo sis.lo uuencode.lo \
90 90
 	phishcheck.lo phish_domaincheck_db.lo phish_whitelist.lo \
91 91
 	regex_list.lo mspack.lo cab.lo entconv.lo hashtab.lo dconf.lo \
92
-	lzma_iface.lo explode.lo textnorm.lo dlp.lo
92
+	lzma_iface.lo explode.lo textnorm.lo dlp.lo js-norm.lo
93 93
 libclamav_la_OBJECTS = $(am_libclamav_la_OBJECTS)
94 94
 libclamav_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
95 95
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
... ...
@@ -165,6 +165,7 @@ EGREP = @EGREP@
165 165
 EXEEXT = @EXEEXT@
166 166
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
167 167
 GETENT = @GETENT@
168
+GPERF = @GPERF@
168 169
 GREP = @GREP@
169 170
 HAVE_LIBGMP = @HAVE_LIBGMP@
170 171
 INSTALL = @INSTALL@
... ...
@@ -423,7 +424,10 @@ libclamav_la_SOURCES = \
423 423
 	textnorm.c \
424 424
 	textnorm.h \
425 425
 	dlp.c \
426
-	dlp.h
426
+	dlp.h \
427
+	jsparse/js-norm.c \
428
+	jsparse/js-norm.h \
429
+	jsparse/lexglobal.h
427 430
 
428 431
 libclamav_internal_utils_la_SOURCES = str.c \
429 432
 				    str.h \
... ...
@@ -434,8 +438,15 @@ libclamav_internal_utils_la_LDFLAGS = -static
434 434
 libclamav_internal_utils_la_CFLAGS = -DCLI_MEMFUNSONLY
435 435
 lib_LTLIBRARIES = libclamav.la
436 436
 noinst_LTLIBRARIES = libclamav_internal_utils.la
437
-EXTRA_DIST = regex/engine.c libclamav.map
438
-all: all-recursive
437
+EXTRA_DIST = regex/engine.c libclamav.map jsparse-keywords.gperf \
438
+	     jsparse-operators.h jsparse-keywords.h jsparse/future_reserved_words\
439
+	     jsparse/keywords jsparse/special_keywords jsparse/operators.gperf
440
+
441
+@MAINTAINER_MODE_TRUE@BUILT_SOURCES = jsparse/generated/operators.h jsparse/generated/keywords.h jsparse-keywords.gperf
442
+@MAINTAINER_MODE_TRUE@GPERF_FLAGS = -E -t -L ANSI-C -C -F ', TOK_ERROR' -c
443
+@MAINTAINER_MODE_TRUE@CLEANFILES = jsparse-keywords.gperf @srcdir@/jsparse/generated/operators.h @srcdir@/jsparse/generated/keywords.h
444
+all: $(BUILT_SOURCES)
445
+	$(MAKE) $(AM_MAKEFLAGS) all-recursive
439 446
 
440 447
 .SUFFIXES:
441 448
 .SUFFIXES: .c .lo .o .obj
... ...
@@ -536,6 +547,7 @@ distclean-compile:
536 536
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/infblock.Plo@am__quote@
537 537
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/inflate64.Plo@am__quote@
538 538
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/is_tar.Plo@am__quote@
539
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/js-norm.Plo@am__quote@
539 540
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_internal_utils_la-others.Plo@am__quote@
540 541
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_internal_utils_la-str.Plo@am__quote@
541 542
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/line.Plo@am__quote@
... ...
@@ -666,6 +678,13 @@ infblock.lo: nsis/infblock.c
666 666
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
667 667
 @am__fastdepCC_FALSE@	$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o infblock.lo `test -f 'nsis/infblock.c' || echo '$(srcdir)/'`nsis/infblock.c
668 668
 
669
+js-norm.lo: jsparse/js-norm.c
670
+@am__fastdepCC_TRUE@	$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT js-norm.lo -MD -MP -MF $(DEPDIR)/js-norm.Tpo -c -o js-norm.lo `test -f 'jsparse/js-norm.c' || echo '$(srcdir)/'`jsparse/js-norm.c
671
+@am__fastdepCC_TRUE@	mv -f $(DEPDIR)/js-norm.Tpo $(DEPDIR)/js-norm.Plo
672
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='jsparse/js-norm.c' object='js-norm.lo' libtool=yes @AMDEPBACKSLASH@
673
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
674
+@am__fastdepCC_FALSE@	$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o js-norm.lo `test -f 'jsparse/js-norm.c' || echo '$(srcdir)/'`jsparse/js-norm.c
675
+
669 676
 libclamav_internal_utils_la-str.lo: str.c
670 677
 @am__fastdepCC_TRUE@	$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_internal_utils_la_CFLAGS) $(CFLAGS) -MT libclamav_internal_utils_la-str.lo -MD -MP -MF $(DEPDIR)/libclamav_internal_utils_la-str.Tpo -c -o libclamav_internal_utils_la-str.lo `test -f 'str.c' || echo '$(srcdir)/'`str.c
671 678
 @am__fastdepCC_TRUE@	mv -f $(DEPDIR)/libclamav_internal_utils_la-str.Tpo $(DEPDIR)/libclamav_internal_utils_la-str.Plo
... ...
@@ -877,14 +896,16 @@ distdir: $(DISTFILES)
877 877
 	  fi; \
878 878
 	done
879 879
 check-am: all-am
880
-check: check-recursive
880
+check: $(BUILT_SOURCES)
881
+	$(MAKE) $(AM_MAKEFLAGS) check-recursive
881 882
 all-am: Makefile $(LTLIBRARIES) $(HEADERS)
882 883
 installdirs: installdirs-recursive
883 884
 installdirs-am:
884 885
 	for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \
885 886
 	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
886 887
 	done
887
-install: install-recursive
888
+install: $(BUILT_SOURCES)
889
+	$(MAKE) $(AM_MAKEFLAGS) install-recursive
888 890
 install-exec: install-exec-recursive
889 891
 install-data: install-data-recursive
890 892
 uninstall: uninstall-recursive
... ...
@@ -901,6 +922,7 @@ install-strip:
901 901
 mostlyclean-generic:
902 902
 
903 903
 clean-generic:
904
+	-test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
904 905
 
905 906
 distclean-generic:
906 907
 	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
... ...
@@ -908,6 +930,7 @@ distclean-generic:
908 908
 maintainer-clean-generic:
909 909
 	@echo "This command is intended for maintainers to use"
910 910
 	@echo "it deletes files that may require special tools to rebuild."
911
+	-test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
911 912
 clean: clean-recursive
912 913
 
913 914
 clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
... ...
@@ -987,6 +1010,25 @@ uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES
987 987
 	tags tags-recursive uninstall uninstall-am \
988 988
 	uninstall-includeHEADERS uninstall-libLTLIBRARIES
989 989
 
990
+
991
+@MAINTAINER_MODE_TRUE@jsparse-keywords.gperf: jsparse/keywords.list jsparse/future_reserved_words.list jsparse/special_keywords.list
992
+@MAINTAINER_MODE_TRUE@	echo -e "struct keyword { const char *name; int val; };\n%%" >keywords-g-tmp
993
+@MAINTAINER_MODE_TRUE@	for i in `cat @srcdir@/jsparse/keywords.list`; do j=`echo $$i |tr \[a-z\] \[A-Z\]`; echo "$$i, TOK_$$j" >>keywords-g-tmp; done
994
+@MAINTAINER_MODE_TRUE@	for i in `cat @srcdir@/jsparse/future_reserved_words.list`; do echo "$$i, TOK_FUTURE_RESERVED_WORD" >>keywords-g-tmp; done
995
+@MAINTAINER_MODE_TRUE@	cat @srcdir@/jsparse/special_keywords.list >>keywords-g-tmp
996
+@MAINTAINER_MODE_TRUE@	mv keywords-g-tmp $@
997
+
998
+@MAINTAINER_MODE_TRUE@jsparse/generated/operators.h: jsparse/operators.gperf
999
+@MAINTAINER_MODE_TRUE@	$(GPERF) $(GPERF_FLAGS) -H op_hash -N in_op_set -W oplist $< >operators-tmp-g
1000
+@MAINTAINER_MODE_TRUE@	grep -v '^#line' <operators-tmp-g | sed -e 's/^const struct/static const struct/' -e 's/register //g' >operators-tmp
1001
+@MAINTAINER_MODE_TRUE@	rm operators-tmp-g
1002
+@MAINTAINER_MODE_TRUE@	mv operators-tmp @srcdir@/jsparse/generated/operators.h
1003
+
1004
+@MAINTAINER_MODE_TRUE@jsparse/generated/keywords.h: jsparse-keywords.gperf
1005
+@MAINTAINER_MODE_TRUE@	$(GPERF) $(GPERF_FLAGS) $< >keywords-tmp-g
1006
+@MAINTAINER_MODE_TRUE@	grep -v '^#line' <keywords-tmp-g |  sed -e 's/^const struct/static const struct/' -e 's/register //g' >keywords-tmp
1007
+@MAINTAINER_MODE_TRUE@	rm keywords-tmp-g
1008
+@MAINTAINER_MODE_TRUE@	mv keywords-tmp @srcdir@/jsparse/generated/keywords.h
990 1009
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
991 1010
 # Otherwise a system limit (for SysV at least) may be exceeded.
992 1011
 .NOEXPORT:
993 1012
new file mode 100644
... ...
@@ -0,0 +1,31 @@
0
+abstract
1
+boolean
2
+byte
3
+char
4
+class
5
+const
6
+debugger
7
+double
8
+enum
9
+export
10
+extends
11
+final
12
+float
13
+goto
14
+implements
15
+import
16
+int
17
+interface
18
+long
19
+native
20
+package
21
+private
22
+protected
23
+public
24
+short
25
+static
26
+super
27
+synchronized
28
+throws
29
+transient
30
+volatile
0 31
new file mode 100644
... ...
@@ -0,0 +1,194 @@
0
+/* ANSI-C code produced by gperf version 3.0.3 */
1
+/* Command-line: gperf -E -t -L ANSI-C -C -F ', TOK_ERROR' -c jsparse-keywords.gperf  */
2
+/* Computed positions: -k'1-2' */
3
+
4
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
5
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
6
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
7
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
8
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
9
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
10
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
11
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
12
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
13
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
14
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
15
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
16
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
17
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
18
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
19
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
20
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
21
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
22
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
23
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
24
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
25
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
26
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
27
+/* The character set is not based on ISO-646.  */
28
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
29
+#endif
30
+
31
+struct keyword { const char *name; int val; };
32
+/* maximum key range = 100, duplicates = 0 */
33
+
34
+#ifdef __GNUC__
35
+__inline
36
+#else
37
+#ifdef __cplusplus
38
+inline
39
+#endif
40
+#endif
41
+static unsigned int
42
+hash (const char *str, unsigned int len)
43
+{
44
+  static const unsigned char asso_values[] =
45
+    {
46
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
47
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
48
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
49
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
50
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
51
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
52
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
53
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
54
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
55
+      103, 103, 103, 103, 103, 103, 103,  30,   5,   0,
56
+        5,   0,  10,  50,  35,   5, 103, 103,  25,  55,
57
+        0,  20,  35, 103,   0,  40,  15,   5,  45,  55,
58
+       45,  50, 103, 103, 103, 103, 103, 103, 103, 103,
59
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
60
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
61
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
62
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
63
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
64
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
65
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
66
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
67
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
68
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
69
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
70
+      103, 103, 103, 103, 103, 103, 103, 103, 103, 103,
71
+      103, 103, 103, 103, 103, 103
72
+    };
73
+  return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
74
+}
75
+
76
+#ifdef __GNUC__
77
+__inline
78
+#ifdef __GNUC_STDC_INLINE__
79
+__attribute__ ((__gnu_inline__))
80
+#endif
81
+#endif
82
+static const struct keyword *
83
+in_word_set (const char *str, unsigned int len)
84
+{
85
+  enum
86
+    {
87
+      TOTAL_KEYWORDS = 59,
88
+      MIN_WORD_LENGTH = 2,
89
+      MAX_WORD_LENGTH = 12,
90
+      MIN_HASH_VALUE = 3,
91
+      MAX_HASH_VALUE = 102
92
+    };
93
+
94
+  static const struct keyword wordlist[] =
95
+    {
96
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
97
+      {"new", TOK_NEW},
98
+      {"enum", TOK_FUTURE_RESERVED_WORD},
99
+      {"", TOK_ERROR},
100
+      {"return", TOK_RETURN},
101
+      {"in", TOK_IN},
102
+      {"int", TOK_FUTURE_RESERVED_WORD},
103
+      {"null", TOK_NULL},
104
+      {"break", TOK_BREAK},
105
+      {"delete", TOK_DELETE},
106
+      {"default", TOK_DEFAULT},
107
+      {"debugger", TOK_FUTURE_RESERVED_WORD},
108
+      {"interface", TOK_FUTURE_RESERVED_WORD},
109
+      {"instanceof", TOK_INSTANCEOF},
110
+      {"", TOK_ERROR},
111
+      {"if", TOK_IF},
112
+      {"try", TOK_TRY},
113
+      {"true", TOK_TRUE},
114
+      {"final", TOK_FUTURE_RESERVED_WORD},
115
+      {"", TOK_ERROR},
116
+      {"finally", TOK_FINALLY},
117
+      {"function", TOK_FUNCTION},
118
+      {"transient", TOK_FUTURE_RESERVED_WORD},
119
+      {"const", TOK_FUTURE_RESERVED_WORD},
120
+      {"", TOK_ERROR},
121
+      {"do", TOK_DO},
122
+      {"continue", TOK_CONTINUE},
123
+      {"else", TOK_ELSE},
124
+      {"class", TOK_FUTURE_RESERVED_WORD},
125
+      {"double", TOK_FUTURE_RESERVED_WORD},
126
+      {"boolean", TOK_FUTURE_RESERVED_WORD},
127
+      {"for", TOK_FOR},
128
+      {"case", TOK_CASE},
129
+      {"catch", TOK_CATCH},
130
+      {"native", TOK_FUTURE_RESERVED_WORD},
131
+      {"", TOK_ERROR}, {"", TOK_ERROR},
132
+      {"char", TOK_FUTURE_RESERVED_WORD},
133
+      {"float", TOK_FUTURE_RESERVED_WORD},
134
+      {"", TOK_ERROR},
135
+      {"private", TOK_FUTURE_RESERVED_WORD},
136
+      {"abstract", TOK_FUTURE_RESERVED_WORD},
137
+      {"protected", TOK_FUTURE_RESERVED_WORD},
138
+      {"false", TOK_FALSE},
139
+      {"public", TOK_FUTURE_RESERVED_WORD},
140
+      {"", TOK_ERROR}, {"", TOK_ERROR},
141
+      {"long", TOK_FUTURE_RESERVED_WORD},
142
+      {"super", TOK_FUTURE_RESERVED_WORD},
143
+      {"export", TOK_FUTURE_RESERVED_WORD},
144
+      {"extends", TOK_FUTURE_RESERVED_WORD},
145
+      {"", TOK_ERROR},
146
+      {"this", TOK_THIS},
147
+      {"throw", TOK_THROW},
148
+      {"throws", TOK_FUTURE_RESERVED_WORD},
149
+      {"", TOK_ERROR}, {"", TOK_ERROR},
150
+      {"byte", TOK_FUTURE_RESERVED_WORD},
151
+      {"", TOK_ERROR},
152
+      {"static", TOK_FUTURE_RESERVED_WORD},
153
+      {"", TOK_ERROR}, {"", TOK_ERROR},
154
+      {"with", TOK_WITH},
155
+      {"", TOK_ERROR},
156
+      {"import", TOK_FUTURE_RESERVED_WORD},
157
+      {"", TOK_ERROR}, {"", TOK_ERROR},
158
+      {"void", TOK_VOID},
159
+      {"implements", TOK_FUTURE_RESERVED_WORD},
160
+      {"typeof", TOK_TYPEOF},
161
+      {"package", TOK_FUTURE_RESERVED_WORD},
162
+      {"volatile", TOK_FUTURE_RESERVED_WORD},
163
+      {"goto", TOK_FUTURE_RESERVED_WORD},
164
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
165
+      {"var", TOK_VAR},
166
+      {"", TOK_ERROR},
167
+      {"short", TOK_FUTURE_RESERVED_WORD},
168
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
169
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
170
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
171
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
172
+      {"", TOK_ERROR}, {"", TOK_ERROR},
173
+      {"while", TOK_WHILE},
174
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
175
+      {"", TOK_ERROR}, {"", TOK_ERROR},
176
+      {"switch", TOK_SWITCH},
177
+      {"synchronized", TOK_FUTURE_RESERVED_WORD}
178
+    };
179
+
180
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
181
+    {
182
+      int key = hash (str, len);
183
+
184
+      if (key <= MAX_HASH_VALUE && key >= 0)
185
+        {
186
+          const char *s = wordlist[key].name;
187
+
188
+          if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
189
+            return &wordlist[key];
190
+        }
191
+    }
192
+  return 0;
193
+}
0 194
new file mode 100644
... ...
@@ -0,0 +1,186 @@
0
+/* ANSI-C code produced by gperf version 3.0.3 */
1
+/* Command-line: gperf -E -t -L ANSI-C -C -F ', TOK_ERROR' -c -H op_hash -N in_op_set -W oplist ../../../trunk/libclamav/jsparse/operators.gperf  */
2
+/* Computed positions: -k'1,$' */
3
+
4
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
5
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
6
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
7
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
8
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
9
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
10
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
11
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
12
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
13
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
14
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
15
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
16
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
17
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
18
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
19
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
20
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
21
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
22
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
23
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
24
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
25
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
26
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
27
+/* The character set is not based on ISO-646.  */
28
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
29
+#endif
30
+
31
+struct operator {
32
+	const char *name;
33
+	int val;
34
+};
35
+/* maximum key range = 121, duplicates = 0 */
36
+
37
+#ifdef __GNUC__
38
+__inline
39
+#else
40
+#ifdef __cplusplus
41
+inline
42
+#endif
43
+#endif
44
+static unsigned int
45
+op_hash (const char *str, unsigned int len)
46
+{
47
+  static const unsigned char asso_values[] =
48
+    {
49
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
50
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
51
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
52
+      122, 122, 122,  50, 122, 122, 122,  31,  40, 122,
53
+      122, 122,  21,  30, 122,  25, 122,  16, 122, 122,
54
+      122, 122, 122, 122, 122, 122, 122, 122,  45, 122,
55
+       10,   5,   0,  35, 122, 122, 122, 122, 122, 122,
56
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
57
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
58
+      122, 122, 122, 122,  60, 122, 122, 122, 122, 122,
59
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
60
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
61
+      122, 122, 122, 122,  20, 122,  15, 122, 122, 122,
62
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
63
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
64
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
65
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
66
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
67
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
68
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
69
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
70
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
71
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
72
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
73
+      122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
74
+      122, 122, 122, 122, 122, 122
75
+    };
76
+  return len + asso_values[(unsigned char)str[len - 1]] + asso_values[(unsigned char)str[0]];
77
+}
78
+
79
+#ifdef __GNUC__
80
+__inline
81
+#ifdef __GNUC_STDC_INLINE__
82
+__attribute__ ((__gnu_inline__))
83
+#endif
84
+#endif
85
+static const struct operator *
86
+in_op_set (const char *str, unsigned int len)
87
+{
88
+  enum
89
+    {
90
+      TOTAL_KEYWORDS = 39,
91
+      MIN_WORD_LENGTH = 1,
92
+      MAX_WORD_LENGTH = 4,
93
+      MIN_HASH_VALUE = 1,
94
+      MAX_HASH_VALUE = 121
95
+    };
96
+
97
+  static const struct operator oplist[] =
98
+    {
99
+      {"", TOK_ERROR},
100
+      {">",	TOK_GREATER},
101
+      {">>",	TOK_SHIFT_RIGHT},
102
+      {">>>",	TOK_DOUBLESHIFT_RIGHT},
103
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
104
+      {">=",	TOK_GREATEREQUAL},
105
+      {">>=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL},
106
+      {">>>=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL},
107
+      {"", TOK_ERROR},
108
+      {"=",	TOK_EQUAL},
109
+      {"==",	TOK_EQUAL_EQUAL},
110
+      {"===",	TOK_TRIPLE_EQUAL},
111
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
112
+      {"<=",	TOK_LESSEQUAL},
113
+      {"<<=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL},
114
+      {"", TOK_ERROR}, {"", TOK_ERROR},
115
+      {"<",	TOK_LESS},
116
+      {"<<",	TOK_SHIFT_LEFT},
117
+      {"/=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL},
118
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
119
+      {"|=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL},
120
+      {"*=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL},
121
+      {"", TOK_ERROR}, {"", TOK_ERROR},
122
+      {"~",	TOK_TILDE},
123
+      {"-=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL},
124
+      {"/",	TOK_DIVIDE},
125
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
126
+      {"+=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL},
127
+      {"%=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL},
128
+      {"", TOK_ERROR}, {"", TOK_ERROR},
129
+      {"|",	TOK_OR},
130
+      {"||",	TOK_OR_OR},
131
+      {"*",	TOK_MULTIPLY},
132
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
133
+      {"&=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL},
134
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
135
+      {"-",	TOK_MINUS},
136
+      {"--",	TOK_MINUSMINUS},
137
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
138
+      {"", TOK_ERROR},
139
+      {"!=",	TOK_NOT_EQUAL},
140
+      {"!==",	TOK_NOT_DOUBLEEQUAL},
141
+      {"", TOK_ERROR}, {"", TOK_ERROR},
142
+      {"+",	TOK_PLUS},
143
+      {"++",	TOK_PLUSPLUS},
144
+      {"%",	TOK_PERCENT},
145
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
146
+      {"^=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL},
147
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
148
+      {"?",	TOK_QUESTIONMARK},
149
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
150
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
151
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
152
+      {"&",	TOK_AND},
153
+      {"&&",	TOK_AND_AND},
154
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
155
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
156
+      {"", TOK_ERROR}, {"", TOK_ERROR},
157
+      {":",	TOK_COLON},
158
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
159
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
160
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
161
+      {"!",	TOK_EXCLAMATION},
162
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
163
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
164
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
165
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
166
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
167
+      {"", TOK_ERROR}, {"", TOK_ERROR}, {"", TOK_ERROR},
168
+      {"", TOK_ERROR},
169
+      {"^",	TOK_XOR}
170
+    };
171
+
172
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
173
+    {
174
+      int key = op_hash (str, len);
175
+
176
+      if (key <= MAX_HASH_VALUE && key >= 0)
177
+        {
178
+          const char *s = oplist[key].name;
179
+
180
+          if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
181
+            return &oplist[key];
182
+        }
183
+    }
184
+  return 0;
185
+}
0 186
new file mode 100644
... ...
@@ -0,0 +1,1672 @@
0
+/*
1
+ *  Javascript normalizer.
2
+ *
3
+ *  Copyright (C) 2008 Sourcefire, Inc.
4
+ *
5
+ *  Authors: Török Edvin
6
+ *
7
+ *  This program is free software; you can redistribute it and/or modify
8
+ *  it under the terms of the GNU General Public License version 2 as
9
+ *  published by the Free Software Foundation.
10
+ *
11
+ *  This program is distributed in the hope that it will be useful,
12
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
+ *  GNU General Public License for more details.
15
+ *
16
+ *  You should have received a copy of the GNU General Public License
17
+ *  along with this program; if not, write to the Free Software
18
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19
+ *  MA 02110-1301, USA.
20
+ */
21
+
22
+#include <stdio.h>
23
+#include <unistd.h>
24
+#include <sys/types.h>
25
+#include <sys/stat.h>
26
+#include <fcntl.h>
27
+#include <stdlib.h>
28
+#include <stdint.h>
29
+#include <string.h>
30
+#include <ctype.h>
31
+#include <assert.h>
32
+#define BUFS 65536
33
+#include "lexglobal.h"
34
+#include "hashtab.h"
35
+#include "others.h"
36
+#include "js-norm.h"
37
+#include "jsparse/generated/operators.h"
38
+#include "jsparse/generated/keywords.h"
39
+
40
+/* ----------- tokenizer ---------------- */
41
+enum tokenizer_state {
42
+	Initial,
43
+	MultilineComment,
44
+	SinglelineComment,
45
+	Number,
46
+	DoubleQString,
47
+	SingleQString,
48
+	Identifier
49
+};
50
+
51
+struct text_buffer {
52
+	char *data;
53
+	size_t pos;
54
+	size_t capacity;
55
+};
56
+
57
+typedef struct scanner {
58
+	enum tokenizer_state state;
59
+	struct text_buffer buf;
60
+	const char *yytext;
61
+	size_t yylen;
62
+	const char *in;
63
+	size_t insize;
64
+	size_t pos;
65
+} *yyscan_t;
66
+
67
+typedef int YY_BUFFER_STATE;
68
+
69
+static int yylex( YYSTYPE *lvalp, yyscan_t  );
70
+static void yy_delete_buffer( YY_BUFFER_STATE, yyscan_t);
71
+static YY_BUFFER_STATE yy_scan_bytes( const char *, size_t, yyscan_t scanner );
72
+static const char *yyget_text ( yyscan_t scanner );
73
+static int yyget_leng ( yyscan_t scanner );
74
+static int yylex_init ( yyscan_t * ptr_yy_globals ) ;
75
+static void yyset_debug (int debug_flag ,yyscan_t yyscanner );
76
+static int yylex_destroy ( yyscan_t yyscanner ) ;
77
+/* ----------- tokenizer end ---------------- */
78
+
79
+enum fsm_state {
80
+	Base,
81
+	InsideVar,
82
+	InsideInitializer,
83
+	WaitFunctionName,
84
+	WaitParameterList,
85
+	InsideFunctionDecl
86
+};
87
+
88
+struct scope {
89
+	struct hashtable id_map;
90
+	struct scope *parent;/* hierarchy */
91
+	struct scope *nxt;/* all scopes kept in a list so we can easily free all of them */
92
+	enum fsm_state fsm_state;
93
+	int  last_token;
94
+	unsigned int brackets;
95
+	unsigned int blocks;
96
+};
97
+
98
+struct tokens {
99
+	yystype *data;
100
+	size_t   cnt;
101
+	size_t   capacity;
102
+};
103
+
104
+/* state for the current JS file being parsed */
105
+struct parser_state {
106
+	unsigned long     var_uniq;
107
+	unsigned long     syntax_errors;
108
+	struct scope *global;
109
+	struct scope *current;
110
+	struct scope *list;
111
+	yyscan_t scanner;
112
+	struct tokens tokens;
113
+};
114
+
115
+static struct scope* scope_new(struct parser_state *state)
116
+{
117
+	struct scope *parent = state->current;
118
+	struct scope *s = cli_calloc(1, sizeof(*s));
119
+	if(!s)
120
+		return NULL;
121
+	if(hashtab_init(&s->id_map, 10) < 0) {
122
+		free(s);
123
+		return NULL;
124
+	}
125
+	s->parent = parent;
126
+	s->fsm_state = Base;
127
+	s->nxt = state->list;
128
+	state->list = s;
129
+	state->current = s;
130
+	return s;
131
+}
132
+
133
+static struct scope* scope_done(struct scope *s)
134
+{
135
+	struct scope* parent = s->parent;
136
+	/* TODO: have a hashtab_destroy */
137
+	hashtab_clear(&s->id_map);
138
+	free(s->id_map.htable);
139
+	free(s);
140
+	return parent;
141
+}
142
+
143
+/* transitions:
144
+ *   Base --(VAR)--> InsideVar
145
+ *   InsideVar --(Identifier)-->InsideInitializer
146
+ *   InsideVar --(anything_else) --> POP (to Base)
147
+ *   InsideInitializer --(COMMA)--> POP (to InsideVar)
148
+ *   InsideInitializer | InsideVar --(SEMICOLON) --> POP (to Base)
149
+ *   InsideInitializer --(BRACKET_OPEN) --> WaitBrClose
150
+ *   InsideInitializer --(PAR_OPEN) --> WaitParClose
151
+ *   WaitBrClose --(BRACKET_OPEN) --> increase depth
152
+ *   WaitBrClose --(BRACKET_CLOSE) --> POP
153
+ *   WaitParClose --(PAR_CLOSE) --> POP
154
+ *   WaitParClose --(PAR_OPEN) --> increase depth
155
+ */
156
+
157
+/* Base --(VAR)--> PUSH, to InsideVar
158
+ * InsideVar --(Identifier)--> InsideInitializer
159
+ * InsideVar --(ELSE)--> POP, inc. syntax_errors
160
+ * InsideInitializer --(COMMA)--> POP (to InsideVar)
161
+ * --(BRACKET_OPEN)--> inc bracket_counter
162
+ * --(PAR_OPEN)--> inc par_counter
163
+ * --(BRACKET_CLOSE) --> dec bracket_counter
164
+ * --(PAR_CLOSE)--> dec par_counter
165
+ * --(VAR)--> PUSH, to InsideVar (if bracket_counter != 0 || par_counter != 0)
166
+ *        --> POP, to InsideVar, inc. syntax_errors (if bracket_counter == 0  && par_counter == 0)
167
+ *  POP only allowed if bracket_counter == 0 && par_counter == 0 
168
+ *
169
+ * InsideInitializer acts differently, make it only a flag
170
+ * ....................
171
+ *
172
+ * Pushing, Poping is done when entering / exiting function scopes,
173
+ * tracking { and function ( is done by the function scope tracker too.
174
+ *
175
+ * we only need to track brackets.
176
+ */
177
+
178
+
179
+/*
180
+ * var x = document;
181
+ * x.writeln(...);
182
+ *
183
+ * ^we must not normalize member method names
184
+ */
185
+
186
+/*
187
+ * Variables are declared at function scope, and their initial value is
188
+ * undefined. At the point where the initializer is, and from there on the value
189
+ * is defined.
190
+ *
191
+ * { doesn't introduce a new variable scope, they are in function's scope too
192
+ *
193
+ * function foo() {
194
+ *  alert(x); -> x exists, undefined
195
+ *  var x=5; 
196
+ *  alert(x); -> x exists, =5
197
+ * }
198
+ * 
199
+ * vs.
200
+ *
201
+ * function bar() {
202
+ *   alert(x);//error, x not declared
203
+ *   x=5;
204
+ *   }
205
+ *
206
+ * vs.
207
+ *
208
+ * but we can declare variables without var, only valid if we use them after
209
+ * assigning.
210
+ *
211
+ * function foobar() {
212
+ *   x=5;
213
+ *   alert(x);//x is defined, value is 5
214
+ *   }
215
+ *
216
+ * other examples:
217
+ * function foo2() {
218
+ *   alert(x); -> x exists, undefined
219
+ *   {
220
+ *       var x=5; -> x equals to 5
221
+ *   }
222
+ *   alert(x); -> x is 5
223
+ * }
224
+ *
225
+ * function foo3() {
226
+ *   var x=4; -> x exists, equals to 4
227
+ *   alert(x); -> x exists, equals to 4
228
+ *   {
229
+ *       var x=5; -> x equals to 5
230
+ *   }
231
+ *   alert(x); -> x is 5
232
+ * }
233
+ *
234
+ * function bar3() {
235
+ *   //same as foo3
236
+ *   var x=4;
237
+ *   alert(x);
238
+ *   { 
239
+ *        x=5;
240
+ *   }
241
+ *   alert(x);
242
+ * }
243
+ *
244
+ */
245
+
246
+
247
+static const char* scope_declare(struct scope *s, const char *token, const size_t len, struct parser_state *state)
248
+{
249
+	const struct element *el = hashtab_insert(&s->id_map, token, len, state->var_uniq++);
250
+	/* hashtab_insert either finds an already existing entry, or allocates a
251
+	 * new one, we return the allocated string */
252
+	return el ? el->key : NULL;
253
+}
254
+
255
+static const char* scope_use(struct scope *s, const char *token, const size_t len)
256
+{
257
+	const struct element *el = hashtab_find(&s->id_map, token, len);
258
+	if(el) {
259
+		/* identifier already found in current scope,
260
+		 * return here to avoid overwriting uniq id */
261
+		return el->key;
262
+	}
263
+	/* identifier not yet in current scope's hashtab, add with ID -1.
264
+	 * Later if we find a declaration it will automatically assign a uniq ID
265
+	 * to it. If not, we'll know that we have to push ID == -1 tokens to an
266
+	 * outer scope.*/
267
+	el = hashtab_insert(&s->id_map, token, len, -1);
268
+	return el ? el->key : NULL;
269
+}
270
+
271
+static long scope_lookup(struct scope *s, const char *token, const size_t len)
272
+{
273
+	while(s) {
274
+		const struct element *el = hashtab_find(&s->id_map, token, len);
275
+		if(el && el->data != -1) {
276
+			return el->data;
277
+		}
278
+		/* not found in current scope, try in outer scope */
279
+		s = s->parent;
280
+	}
281
+	return -1;
282
+}
283
+
284
+static int tokens_ensure_capacity(struct tokens *tokens, size_t cap)
285
+{
286
+	if(tokens->capacity < cap) {
287
+		tokens->capacity = cap + 1024;
288
+		tokens->data = cli_realloc2(tokens->data, tokens->capacity * sizeof(*tokens->data));
289
+		if(!tokens->data)
290
+			return CL_EMEM;
291
+	}
292
+	return CL_SUCCESS;
293
+}
294
+
295
+static int add_token(struct parser_state *state, const yystype *token)
296
+{
297
+	if(tokens_ensure_capacity(&state->tokens, state->tokens.cnt + 1) == -1)
298
+		return -1;
299
+	state->tokens.data[state->tokens.cnt++] = *token;
300
+	return 0;
301
+}
302
+
303
+struct buf {
304
+	size_t pos;
305
+	int outfd;
306
+	char buf[65536];
307
+};
308
+
309
+static inline int buf_outc(char c, struct buf *buf)
310
+{
311
+	if(buf->pos >= sizeof(buf->buf)) {
312
+		if(write(buf->outfd, buf->buf, sizeof(buf->buf)) != sizeof(buf->buf))
313
+			return CL_EIO;
314
+		buf->pos = 0;
315
+	}
316
+	buf->buf[buf->pos++] = c;
317
+	return CL_SUCCESS;
318
+}
319
+
320
+static inline int buf_outs(const char *s, struct buf *buf)
321
+{
322
+	const size_t buf_len = sizeof(buf->buf);
323
+	size_t len = strlen(s);
324
+	while(buf->pos + len > buf_len) {
325
+		memcpy(buf->buf + buf->pos, s, buf_len - buf->pos);
326
+		len -= (buf_len - buf->pos);
327
+		if(write(buf->outfd, buf->buf, buf_len) < 0)
328
+			return CL_EIO;
329
+		buf->pos = 0;
330
+	}
331
+	memcpy(buf->buf + buf->pos, s, len);
332
+	buf->pos += len;
333
+	return CL_SUCCESS;
334
+}
335
+
336
+static inline void output_space(char last, char current, struct buf *out)
337
+{
338
+	if(isalnum(last) && isalnum(current))
339
+		buf_outc(' ', out);
340
+}
341
+
342
+
343
+/* return class of last character */
344
+static char output_token(const yystype *token, struct scope *scope, struct buf *out, char lastchar)
345
+{
346
+	char sbuf[128];
347
+	const char *s = TOKEN_GET(token, cstring);
348
+	/* TODO: use a local buffer, instead of FILE* */
349
+	switch(token->type) {
350
+		case TOK_StringLiteral:
351
+			output_space(lastchar,'"', out);
352
+			buf_outc('"', out);
353
+			if(s) {
354
+				buf_outs(s, out);
355
+			}
356
+			buf_outc('"', out);
357
+			return '\"';
358
+		case TOK_NumericInt:
359
+			output_space(lastchar,'0', out);
360
+			snprintf(sbuf, sizeof(sbuf), "%ld", TOKEN_GET(token, ival));
361
+			buf_outs(sbuf, out);
362
+			return '0';
363
+		case TOK_NumericFloat:
364
+			output_space(lastchar,'0', out);
365
+			snprintf(sbuf, sizeof(sbuf), "%e", TOKEN_GET(token, dval));
366
+			buf_outs(sbuf, out);
367
+			return '0';
368
+		case TOK_IDENTIFIER_NAME:
369
+			/* TODO: lookup identifier name here, and normalize it
370
+			 * */
371
+			output_space(lastchar,'a', out);
372
+			if(s) {
373
+				long id = scope_lookup(scope, s, strlen(s));
374
+				if(id == -1) {
375
+					/* identifier not normalized */
376
+					buf_outs(s, out);
377
+				} else {
378
+					snprintf(sbuf, sizeof(sbuf), "n%03ld",id);
379
+					buf_outs(sbuf, out);
380
+				}
381
+			}
382
+			return 'a';
383
+		case TOK_FUNCTION:
384
+			/*TODO: output function name */
385
+			output_space(lastchar,'a', out);
386
+			buf_outs("function",out);
387
+			return 'a';
388
+		default:
389
+			if(s) {
390
+				const size_t len = strlen(s);
391
+				output_space(lastchar,s[0], out);
392
+				buf_outs(s, out);
393
+				return len ? s[len-1] : '\0';
394
+			}
395
+			return '\0';
396
+	}
397
+}
398
+
399
+/*
400
+ * We can't delete the scope as soon as we see a }, because
401
+ * we still need the hashmap from it.
402
+ *
403
+ * If we would normalize all the identifiers, and output when a scope is closed,
404
+ * then it would be impossible to normalize calls to other functions.
405
+ *
406
+ * So we need to keep all scopes in memory, to do this instead of scope_done, we
407
+ * simply just set current = current->parent when a scope is closed.
408
+ * We keep a list of all scopes created in parser_state-> When we parsed
409
+ * everything, we output everything, and then delete all scopes.
410
+ *
411
+ * We also need to know where to switch scopes on the second pass, so for
412
+ * TOK_FUNCTION types we will use another pointer, that points to the scope
413
+ * (added to yystype's union).
414
+ *
415
+ * We lookup the identifier in the scope (using scope_lookup, it looks in parent
416
+ * scopes too), if ID is found then output (n%3d, Id),
417
+ * otherwise output the identifier as is.
418
+ *
419
+ * To make  it easier to match sigs, we do a xfrm : 
420
+ * 'function ID1 (..'. => 'n%3d = function (...'
421
+ */
422
+
423
+/*
424
+ * we'll add all identifier to the scope's map
425
+ * those that are not decl. will have initial ID -1
426
+ * if we later see a decl for it in same scope, it'll automatically get a
427
+ * correct ID.
428
+ *
429
+ * When parsing of local scope is done, we take any ID -1 identifiers,
430
+ * and push them up one level (careful not to overwrite existing IDs).
431
+ *
432
+ * it would be nice if the tokens would contain a link to the entry in the
433
+ * hashtab, a link that automatically gets updated when the element is moved
434
+ * (pushed up). This would prevent subsequent lookups in the map,
435
+ * when we want to output the tokens.
436
+ * There is no easy way to do that, so we just do another lookup
437
+ *
438
+ */
439
+
440
+/*
441
+ * This actually works, redefining foo:
442
+ * function foo() {
443
+ *   var foo=5; alert(foo);
444
+ * }
445
+ * So we can't treat function names just as any other identifier?
446
+ * We can, because you can no longer call foo, if you redefined it as a var.
447
+ * So if we rename both foo-s with same name, it will have same behaviour.
448
+ *
449
+ * This means that a new scope should begin after function, and not after
450
+ * function ... (.
451
+ */
452
+
453
+static void scope_free_all(struct scope *p)
454
+{
455
+	struct scope *nxt;
456
+	do {
457
+		nxt = p->nxt;
458
+		scope_done(p);
459
+		p = nxt;
460
+	} while(p);
461
+}
462
+
463
+void cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens);
464
+static int match_parameters(const yystype *tokens, const char ** param_names, size_t count)
465
+{
466
+	size_t i,j=0;
467
+	if(tokens[0].type != TOK_PAR_OPEN)
468
+		return -1;
469
+	i=1;
470
+	while(count--) {
471
+		const char *token_val = TOKEN_GET(&tokens[i], cstring);
472
+		if(tokens[i].type != TOK_IDENTIFIER_NAME ||
473
+		   !token_val ||
474
+		   strcmp(token_val, param_names[j++]))
475
+			return -1;
476
+		++i;
477
+		if((count && tokens[i].type != TOK_COMMA)
478
+		   || (!count && tokens[i].type != TOK_PAR_CLOSE))
479
+			return -1;
480
+		++i;
481
+	}
482
+	return 0;
483
+}
484
+
485
+static const char *de_packer_3[] = {"p","a","c","k","e","r"};
486
+static const char *de_packer_2[] = {"p","a","c","k","e","d"};
487
+
488
+
489
+#ifndef MAX
490
+#define MAX(a, b) ((a)>(b) ? (a) : (b))
491
+#endif
492
+
493
+static inline char *textbuffer_done(yyscan_t scanner)
494
+{
495
+	/* free unusued memory */
496
+	char *str = cli_realloc(scanner->buf.data, scanner->buf.pos);
497
+	if(!str) {
498
+		str = scanner->buf.data;
499
+	}
500
+	scanner->yytext = scanner->buf.data;
501
+	scanner->yylen = scanner->buf.pos - 1;
502
+	memset(&scanner->buf, 0, sizeof(scanner->buf));
503
+	return str;
504
+}
505
+
506
+static inline int textbuffer_ensure_capacity(struct text_buffer *txtbuf, size_t len)
507
+{
508
+	if (txtbuf->pos + len > txtbuf->capacity) {
509
+		char *d;
510
+		txtbuf->capacity = MAX(txtbuf->pos + len, txtbuf->capacity + 4096);
511
+		d = cli_realloc(txtbuf->data, txtbuf->capacity);
512
+		if(!d)
513
+			return -1;
514
+		txtbuf->data = d;
515
+	}
516
+	return 0;
517
+}
518
+
519
+static inline void textbuffer_append_len(struct text_buffer *txtbuf, const char *s, size_t len)
520
+{
521
+	textbuffer_ensure_capacity(txtbuf, len);
522
+	memcpy(&txtbuf->data[txtbuf->pos], s, len);
523
+	txtbuf->pos += len;
524
+}
525
+
526
+
527
+static inline void textbuffer_append(struct text_buffer *txtbuf, const char *s)
528
+{
529
+	size_t len = strlen(s);
530
+	textbuffer_append_len(txtbuf, s, len);
531
+}
532
+
533
+static inline void textbuffer_putc(struct text_buffer *txtbuf, const char c)
534
+{
535
+	textbuffer_ensure_capacity(txtbuf, 1);
536
+	txtbuf->data[txtbuf->pos++] = c;
537
+}
538
+#define MODULE "JS-Norm: "
539
+
540
+static void free_token(yystype *token)
541
+{
542
+	if(token->vtype == vtype_string) {
543
+		free(token->val.string);
544
+		token->val.string = NULL;
545
+	}
546
+}
547
+
548
+static int replace_token_range(struct tokens *dst, size_t start, size_t end, const struct tokens *with)
549
+{
550
+	const size_t len = with ? with->cnt : 0;
551
+	size_t i;
552
+	cli_dbgmsg(MODULE "Replacing tokens %lu - %lu with %lu tokens\n",start, end, len);
553
+	if(start >= dst->cnt || end > dst->cnt)
554
+		return -1;
555
+	for(i=start;i<end;i++) {
556
+		free_token(&dst->data[i]);
557
+	}
558
+	if(tokens_ensure_capacity(dst, dst->cnt - (end-start) + len) < 0)
559
+		return CL_EMEM;
560
+	memmove(&dst->data[start+len], &dst->data[end], (dst->cnt - end) * sizeof(dst->data[0]));
561
+	if(with && len > 0) {
562
+		memcpy(&dst->data[start], with->data, len * sizeof(dst->data[0]));
563
+	}
564
+	dst->cnt = dst->cnt - (end-start) + len;
565
+	return CL_SUCCESS;
566
+}
567
+
568
+static int append_tokens(struct tokens *dst, const struct tokens *src)
569
+{
570
+	if(!dst || !src)
571
+		return CL_ENULLARG;
572
+	if(!dst->cnt)
573
+		return CL_SUCCESS;
574
+	if(tokens_ensure_capacity(dst, dst->cnt + src->cnt) == -1)
575
+		return CL_EMEM;
576
+	cli_dbgmsg(MODULE "Appending %lu tokens\n", src->cnt);
577
+	memcpy(&dst->data[dst->cnt], src->data, src->cnt * sizeof(dst->data[0]));
578
+	dst->cnt += src->cnt;
579
+	return CL_SUCCESS;
580
+}
581
+
582
+static void decode_de(yystype *params[], struct text_buffer *txtbuf)
583
+{
584
+	const char *p = TOKEN_GET(params[0], cstring);
585
+	const long a = TOKEN_GET(params[1], ival);
586
+	/*const char *c = params[2];*/
587
+	char *k = TOKEN_GET(params[3], string);
588
+	/*const char *r = params[5];*/
589
+
590
+	unsigned val=0;
591
+	unsigned nsplit = 0;
592
+	const char* o;
593
+	const char **tokens;
594
+
595
+	memset(txtbuf, 0, sizeof(*txtbuf));
596
+	if(!p || !k )
597
+		return;
598
+	for(o = k; *o; o++) if(*o == '|') nsplit++;
599
+	nsplit++;
600
+	tokens = malloc(sizeof(char*)*nsplit);
601
+	if(!tokens) {
602
+		return;
603
+	}
604
+	cli_strtokenize(k,'|',nsplit, tokens);
605
+
606
+	do {
607
+		while(*p && !isalnum(*p)) {
608
+			if(*p=='\\' && (p[1] == '\'' || p[1] == '\"'))
609
+				p++;
610
+			else
611
+				textbuffer_putc(txtbuf, *p++);
612
+		}
613
+		if(!*p) break;
614
+		val = 0;
615
+		o = p;
616
+		while(*p && isalnum(*p)) {
617
+			unsigned x;
618
+			unsigned char v = *p++;
619
+			/* TODO: use a table here */
620
+			if(v >= 'a') x = 10+v-'a';
621
+			else if(v >= 'A') x = 36+v-'A';
622
+			else x = v-'0';
623
+			val = val*a+x;
624
+		}
625
+		if(val >= nsplit || !tokens[val] || !tokens[val][0])
626
+			while(o!=p)
627
+				textbuffer_putc(txtbuf, *o++);
628
+		else	textbuffer_append(txtbuf, tokens[val]);
629
+	} while (*p);
630
+	free(tokens);
631
+	textbuffer_append(txtbuf, "\0");
632
+}
633
+
634
+struct decode_result {
635
+	struct text_buffer txtbuf;
636
+	size_t pos_begin;
637
+	size_t pos_end;
638
+        unsigned append:1; /* 0: tokens are replaced with new token(s),
639
+                            1: old tokens are deleted, new ones appended at the end */
640
+};
641
+
642
+static void handle_de(yystype *tokens, size_t start, const size_t cnt, const char *name, struct decode_result *res)
643
+{
644
+	/* find function decl. end */
645
+	size_t i, nesting = 1, j;
646
+	yystype* parameters [6];
647
+	const size_t parameters_cnt = 6;
648
+
649
+	for(i=start;i < cnt; i++) {
650
+		if(tokens[i].type == TOK_FUNCTION) {
651
+			if(TOKEN_GET(&tokens[i], scope))
652
+				nesting++;
653
+			else
654
+				nesting--;
655
+			if(!nesting)
656
+				break;
657
+		}
658
+	}
659
+	if(nesting)
660
+		return;
661
+	if(name) {
662
+		/* find call to function */
663
+		for(;i+2 < cnt; i++) {
664
+			const char* token_val = TOKEN_GET(&tokens[i], cstring);
665
+			if(tokens[i].type == TOK_IDENTIFIER_NAME &&
666
+			   token_val &&
667
+			   !strcmp(name, token_val) &&
668
+			   tokens[i+1].type == TOK_PAR_OPEN) {
669
+
670
+				i += 2;
671
+				for(j = 0;j < parameters_cnt && i < cnt;j++) {
672
+					parameters[j] = &tokens[i++];
673
+					if(j != parameters_cnt-1)
674
+						while (tokens[i].type != TOK_COMMA && i < cnt) i++;
675
+					else
676
+						while (tokens[i].type != TOK_PAR_CLOSE && i < cnt) i++;
677
+					i++;
678
+				}
679
+				if(j == parameters_cnt)
680
+					decode_de(parameters, &res->txtbuf);
681
+			}
682
+		}
683
+	} else {
684
+		while(i<cnt && tokens[i].type != TOK_PAR_OPEN) i++;
685
+		++i;
686
+		if(i >= cnt) return;
687
+		/* TODO: move this v to another func */
688
+				for(j = 0;j < parameters_cnt && i < cnt;j++) {
689
+					parameters[j] = &tokens[i++];
690
+					if(j != parameters_cnt-1)
691
+						while (tokens[i].type != TOK_COMMA && i < cnt) i++;
692
+					else
693
+						while (tokens[i].type != TOK_PAR_CLOSE && i < cnt) i++;
694
+					i++;
695
+				}
696
+				if(j == parameters_cnt)
697
+					decode_de(parameters, &res->txtbuf);
698
+	}
699
+	res->pos_begin = parameters[0] - tokens;
700
+	res->pos_end = parameters[parameters_cnt-1] - tokens + 1;
701
+	if(tokens[res->pos_end].type == TOK_BRACKET_OPEN &&
702
+			tokens[res->pos_end+1].type == TOK_BRACKET_CLOSE &&
703
+			tokens[res->pos_end+2].type == TOK_PAR_CLOSE)
704
+		res->pos_end += 3; /* {}) */
705
+	else
706
+		res->pos_end++; /* ) */
707
+}
708
+
709
+/* --------- this should be in str.c -------------------------------- */
710
+static const int hex_chars[256] = {
711
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
712
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
713
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
714
+     0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
715
+    -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
716
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
717
+    -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
718
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
719
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
720
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
721
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
722
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
723
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
724
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
725
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
726
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
727
+};
728
+
729
+static inline int cli_hex2int(const char c)
730
+{
731
+	return hex_chars[(const unsigned char)c];
732
+}
733
+
734
+static inline size_t output_utf8(uint16_t u, unsigned char* dst)
735
+{
736
+	if(!u) {
737
+		*dst = 0x1; /* don't add \0, add \1 instead */
738
+		return 1;
739
+	}
740
+	if(u < 0x80) {
741
+		*dst = u&0xff;
742
+		return 1;
743
+	}
744
+	if(u < 0x800) {
745
+		*dst++ = 0xc0 | (u>>6);   /* 110yyyyy */
746
+		*dst = 0x80 | (u & 0x3f); /* 10zzzzzz */
747
+		return 2;
748
+	}
749
+	/* u < 0x10000 because we only handle utf-16,
750
+	 * values in range 0xd800 - 0xdfff aren't valid, but we don't check for
751
+	 * that*/
752
+	*dst++ = 0xe0 | (u>>12);        /* 1110xxxx */
753
+	*dst++ = 0x80 | ((u>>6)&0x3f); /* 10yyyyyy */
754
+	*dst = 0x80 | (u & 0x3f);      /* 10zzzzzz */
755
+	return 3;
756
+}
757
+
758
+static void textbuffer_append_normalize(struct text_buffer *buf, const char *str, size_t len)
759
+{
760
+	size_t i;
761
+	for(i=0;i < len;i++) {
762
+		char c = str[i];
763
+		if (c == '\\' && i+1 < len) {
764
+			i++;
765
+			switch (str[i]) {
766
+				case '0':
767
+					c = 0;
768
+					break;
769
+				case 'b':
770
+					c = 8;
771
+					break;
772
+				case 't':
773
+					c = 9;
774
+					break;
775
+				case 'n':
776
+					c = 10;
777
+					break;
778
+				case 'v':
779
+					c = 11;
780
+					break;
781
+				case 'f':
782
+					c = 12;
783
+					break;
784
+				case 'r':
785
+					c=13;
786
+					break;
787
+				case 'x':
788
+					if(i+2 < len)
789
+						c = (cli_hex2int(str[i+1])<<4)|cli_hex2int(str[i+2]);
790
+					i += 2;
791
+					break;
792
+				case 'u':
793
+					if(i+4 < len) {
794
+						uint16_t u = (cli_hex2int(str[i+1])<<12) | (cli_hex2int(str[i+2])<<8) |
795
+							(cli_hex2int(str[i+3])<<4) | cli_hex2int(str[i+4]);
796
+						textbuffer_ensure_capacity(buf, 4);
797
+						buf->pos += output_utf8(u, (unsigned char*)buf->data);
798
+						i += 4;
799
+						continue;
800
+					}
801
+					break;
802
+				default:
803
+					c = str[i];
804
+					break;
805
+			}
806
+		}
807
+		if(!c) c = 1; /* we don't insert \0 */
808
+		textbuffer_putc(buf, c);
809
+	}
810
+}
811
+
812
+
813
+static char *cli_unescape(const char *str)
814
+{
815
+	char *R;
816
+	size_t k, i=0;
817
+	const size_t len = strlen(str);
818
+	/* unescaped string is at most as long as original,
819
+	 * it will usually be shorter */
820
+	R = cli_malloc(len + 1);
821
+	for(k=0;k < len;k++) {
822
+		unsigned char c = str[k];
823
+		if (str[k] == '%') {
824
+			if(k+5 >= len || str[k+1] != 'u' || !isxdigit(str[k+2]) || !isxdigit(str[k+3])
825
+						|| !isxdigit(str[k+4]) || !isxdigit(str[k+5])) {
826
+				if(k+2 < len && isxdigit(str[k+1]) && isxdigit(str[k+2])) {
827
+					c = (cli_hex2int(str[k+1])<<4) | cli_hex2int(str[k+2]);
828
+					k += 2;
829
+				}
830
+			} else {
831
+				uint16_t u = (cli_hex2int(str[k+2])<<12) | (cli_hex2int(str[k+3])<<8) |
832
+					(cli_hex2int(str[k+4])<<4) | cli_hex2int(str[k+5]);
833
+				i += output_utf8(u, (unsigned char*)&R[i]);
834
+				k += 5;
835
+				continue;
836
+			}
837
+		}
838
+		if(!c) c = 1; /* don't add \0 */
839
+		/* TODO: if c >= 0x80 output UTF-8, and do the same in
840
+		 * normalize_string, and interpret the full %u sequence ! */
841
+		R[i++] = c;
842
+	}
843
+	R[i++] = '\0';
844
+	R = cli_realloc(R, i);
845
+	return R;
846
+}
847
+
848
+/* ------------ end of str.c ----------------- */
849
+
850
+static int handle_unescape(struct tokens *tokens, size_t start, const size_t cnt)
851
+{
852
+	if(tokens->data[start].type == TOK_StringLiteral) {
853
+		char *R;
854
+		struct tokens new_tokens;
855
+		yystype tok;
856
+
857
+		R = cli_unescape(TOKEN_GET(&tokens->data[start], cstring));
858
+		tok.type = TOK_StringLiteral;
859
+		TOKEN_SET(&tok, string, R);
860
+		new_tokens.capacity = new_tokens.cnt = 1;
861
+		new_tokens.data = &tok;
862
+		if(replace_token_range(tokens, start-2, start+2, &new_tokens) < 0)
863
+			return CL_EMEM;
864
+	}
865
+	return CL_SUCCESS;
866
+}
867
+
868
+
869
+/* scriptasylum dot com's JS encoder */
870
+static void handle_df(const yystype *tokens, size_t start, const size_t cnt, struct decode_result *res)
871
+{
872
+	char *str, *s1;
873
+	size_t len, s1_len, i;
874
+	unsigned char clast;
875
+	char *R;
876
+
877
+	if(tokens[start].type != TOK_StringLiteral)
878
+		return;
879
+	str = TOKEN_GET(&tokens[start], string);
880
+	if(!str)
881
+		return;
882
+	len = strlen(str);
883
+	clast = str[len-1] - '0';
884
+
885
+	str[len-1] = '\0';
886
+	s1 = cli_unescape(str);
887
+	s1_len = strlen(s1);
888
+	for(i=0;i<s1_len;i++) {
889
+		s1[i] -= clast;
890
+	}
891
+	R = cli_unescape(s1);
892
+	free(s1);
893
+	res->pos_begin = start-2;
894
+	res->pos_end = start+2;
895
+	res->txtbuf.data = R;
896
+	res->txtbuf.pos = strlen(R);
897
+	res->append = 1;
898
+}
899
+
900
+
901
+
902
+static void handle_eval(struct tokens *tokens, size_t start, struct decode_result *res)
903
+{
904
+	res->txtbuf.data = TOKEN_GET(&tokens->data[start], string);
905
+	if(res->txtbuf.data && tokens->data[start+1].type == TOK_PAR_CLOSE) {
906
+		TOKEN_SET(&tokens->data[start], string, NULL);
907
+		res->txtbuf.pos = strlen(res->txtbuf.data);
908
+		res->pos_begin = start-2;
909
+		res->pos_end = start+2;
910
+	}
911
+}
912
+
913
+static void run_folders(struct tokens *tokens)
914
+{
915
+  size_t i;
916
+
917
+  for(i = 0; i < tokens->cnt; i++) {
918
+	  const char *cstring = TOKEN_GET(&tokens->data[i], cstring);
919
+	  if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME &&
920
+		    cstring &&
921
+		    !strcmp("unescape", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) {
922
+
923
+		  handle_unescape(tokens, i+2, tokens->cnt);
924
+	  }
925
+  }
926
+}
927
+
928
+static inline int state_update_scope(struct parser_state *state, const yystype *token)
929
+{
930
+	if(token->type == TOK_FUNCTION) {
931
+		struct scope *scope = TOKEN_GET(token, scope);
932
+		if(scope) {
933
+			state->current = scope;
934
+		}
935
+		else {
936
+			/* dummy token marking function end */
937
+			if(state->current->parent)
938
+				state->current = state->current->parent;
939
+			/* don't output this token, it is just a dummy marker */
940
+			return 0;
941
+		}
942
+	}
943
+	return 1;
944
+}
945
+
946
+static void run_decoders(struct parser_state *state)
947
+{
948
+  size_t i;
949
+  const char* name;
950
+  struct tokens *tokens = &state->tokens;
951
+
952
+  for(i = 0; i < tokens->cnt; i++) {
953
+	  const char *cstring = TOKEN_GET(&tokens->data[i], cstring);
954
+	  struct decode_result res;
955
+	  res.pos_begin = res.pos_end = 0;
956
+	  res.append = 0;
957
+	  if(tokens->data[i].type == TOK_FUNCTION && i+13 < tokens->cnt) {
958
+		  name = NULL;
959
+		  ++i;
960
+		  if(tokens->data[i].type == TOK_IDENTIFIER_NAME) {
961
+			  name = cstring;
962
+			  ++i;
963
+		  }
964
+		  if(match_parameters(&tokens->data[i], de_packer_3, sizeof(de_packer_3)/sizeof(de_packer_3[0])) != -1
965
+		     || match_parameters(&tokens->data[i], de_packer_2, sizeof(de_packer_2)/sizeof(de_packer_2[0])) != -1)  {
966
+			  /* find function decl. end */
967
+			  handle_de(tokens->data, i, tokens->cnt, name, &res);
968
+		  }
969
+	  } else if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME &&
970
+		    cstring &&
971
+		    !strcmp("dF", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) {
972
+		  /* TODO: also match signature of dF function (possibly
973
+		   * declared using unescape */
974
+
975
+		  handle_df(tokens->data, i+2, tokens->cnt, &res);
976
+	  } else if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME &&
977
+			  cstring &&
978
+			  !strcmp("eval", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) {
979
+		  handle_eval(tokens, i+2, &res);
980
+	  }
981
+	if(res.pos_end > res.pos_begin) {
982
+		struct tokens parent_tokens;
983
+		if(res.pos_end < tokens->cnt && tokens->data[res.pos_end].type == TOK_SEMICOLON)
984
+			res.pos_end++;
985
+		parent_tokens = state->tokens;/* save current tokens */
986
+		/* initialize embedded context */
987
+		memset(&state->tokens, 0, sizeof(state->tokens));
988
+		cli_js_process_buffer(state, res.txtbuf.data, res.txtbuf.pos);
989
+		free(res.txtbuf.data);
990
+		/* state->tokens still refers to the embedded/nested context
991
+		 * here */
992
+		if(!res.append) {
993
+			replace_token_range(&parent_tokens, res.pos_begin, res.pos_end, &state->tokens);
994
+		} else {
995
+			/* delete tokens */
996
+			replace_token_range(&parent_tokens, res.pos_begin, res.pos_end, NULL);
997
+			append_tokens(&parent_tokens, &state->tokens);
998
+		}
999
+		/* end of embedded context, restore tokens state */
1000
+		free(state->tokens.data);
1001
+		state->tokens = parent_tokens;
1002
+	}
1003
+	  state_update_scope(state, &state->tokens.data[i]);
1004
+  }
1005
+}
1006
+
1007
+void cli_js_parse_done(struct parser_state* state)
1008
+{
1009
+	run_folders(&state->tokens);
1010
+	run_decoders(state);
1011
+
1012
+	yylex_destroy(state->scanner);
1013
+	state->global = NULL; /* make this state invalid for parsing */
1014
+}
1015
+
1016
+
1017
+void cli_js_output(struct parser_state *state)
1018
+{
1019
+	unsigned i;
1020
+	struct buf buf;
1021
+	char lastchar = '\0';
1022
+	buf.pos = 0;
1023
+	buf.outfd = STDOUT_FILENO;
1024
+	state->current = state->global;
1025
+	for(i = 0; i < state->tokens.cnt; i++) {
1026
+		if(state_update_scope(state, &state->tokens.data[i]))
1027
+			lastchar = output_token(&state->tokens.data[i], state->current, &buf, lastchar);
1028
+	}
1029
+	if(write(buf.outfd, buf.buf, buf.pos) < 0) {
1030
+		cli_dbgmsg(MODULE "I/O error");
1031
+	}
1032
+}
1033
+
1034
+void cli_js_destroy(struct parser_state *state)
1035
+{
1036
+	size_t i;
1037
+	scope_free_all(state->list);
1038
+	for(i=0;i<state->tokens.cnt;i++) {
1039
+		free_token(&state->tokens.data[i]);
1040
+	}
1041
+	free(state->tokens.data);
1042
+}
1043
+
1044
+/* buffer is html-normlike "chunk", if original file is bigger than buffer,
1045
+ * we rewind to a space, so we'll know that tokens won't be broken in half at
1046
+ * the end of a buffer. All tokens except string-literals of course.
1047
+ * So we can assume that after the buffer there is either a space, EOF, or a
1048
+ * chunk of text not containing whitespace at all (for which we care only if its
1049
+ * a stringliteral)*/
1050
+void cli_js_process_buffer(struct parser_state *state, const char *buf, size_t n)
1051
+{
1052
+	struct scope* current = state->current;
1053
+	YYSTYPE val;
1054
+	int yv;
1055
+	YY_BUFFER_STATE yyb;
1056
+
1057
+	if(!state->global) {
1058
+		/* this state has either not been initialized,
1059
+		 * or cli_js_parse_done() was already called on it */
1060
+		cli_warnmsg(MODULE "invalid state");
1061
+		return;
1062
+	}
1063
+	yyb = yy_scan_bytes(buf, n, state->scanner);
1064
+	memset(&val, 0, sizeof(val));
1065
+	val.vtype = vtype_undefined;
1066
+	/* on EOF yylex will return 0 */
1067
+	while( (yv=yylex(&val, state->scanner)) != 0)
1068
+	{
1069
+		const char *text;
1070
+		size_t leng;
1071
+
1072
+		val.type = yv;
1073
+		switch(yv) {
1074
+			case TOK_VAR:
1075
+				current->fsm_state = InsideVar;
1076
+				break;
1077
+			case TOK_IDENTIFIER_NAME:
1078
+				text = yyget_text(state->scanner);
1079
+				leng = yyget_leng(state->scanner);
1080
+				if(current->last_token == TOK_DOT) {
1081
+					/* this is a member name, don't normalize
1082
+					*/
1083
+					TOKEN_SET(&val, string, cli_strdup(text));
1084
+					val.type = TOK_UNNORM_IDENTIFIER;
1085
+				} else {
1086
+					switch(current->fsm_state) {
1087
+						case WaitParameterList:
1088
+							state->syntax_errors++;
1089
+							/* fall through */
1090
+						case Base:
1091
+						case InsideInitializer:
1092
+							TOKEN_SET(&val, cstring, scope_use(current, text, leng));
1093
+							break;
1094
+						case InsideVar:
1095
+						case InsideFunctionDecl:
1096
+							TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state));
1097
+							current->fsm_state = InsideInitializer;
1098
+							current->brackets = 0;
1099
+							break;
1100
+						case WaitFunctionName:
1101
+							TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state));
1102
+							current->fsm_state = WaitParameterList;
1103
+							break;
1104
+					}
1105
+				}
1106
+				break;
1107
+			case TOK_PAR_OPEN:
1108
+				switch(current->fsm_state) {
1109
+					case WaitFunctionName:
1110
+						/* TODO: function name is null */
1111
+						/* fallthrough */
1112
+					case WaitParameterList:
1113
+						current->fsm_state = InsideFunctionDecl;
1114
+						break;
1115
+					default:
1116
+						/* noop */
1117
+						break;
1118
+				}
1119
+				break;
1120
+			case TOK_PAR_CLOSE:
1121
+				switch(current->fsm_state) {
1122
+					case WaitFunctionName:
1123
+						state->syntax_errors++;
1124
+						break;
1125
+					case WaitParameterList:
1126
+						current->fsm_state = Base;
1127
+						break;
1128
+					default:
1129
+						/* noop */
1130
+						break;
1131
+				}
1132
+				break;
1133
+			case TOK_CURLY_BRACE_OPEN:
1134
+				switch(current->fsm_state) {
1135
+					case WaitFunctionName:
1136
+						/* TODO: function name is null */
1137
+						/* fallthrough */
1138
+					case WaitParameterList:
1139
+					case InsideFunctionDecl:
1140
+						/* in a syntactically correct
1141
+						 * file, we would already be in
1142
+						 * the Base state when we see a {
1143
+						 */
1144
+						current->fsm_state = Base;
1145
+						/* fall-through */
1146
+					case InsideVar:
1147
+					case InsideInitializer:
1148
+						state->syntax_errors++;
1149
+						/* fall-through */
1150
+					case Base:
1151
+					default:
1152
+						current->blocks++;
1153
+						break;
1154
+				}
1155
+				break;
1156
+					case TOK_CURLY_BRACE_CLOSE:
1157
+				if(current->blocks > 0)
1158
+					current->blocks--;
1159
+				else
1160
+					state->syntax_errors++;
1161
+				if(!current->blocks) {
1162
+					if(current->parent) {
1163
+						/* add dummy FUNCTION token to
1164
+						 * mark function end */
1165
+						TOKEN_SET(&val, cstring, "}");
1166
+						add_token(state, &val);
1167
+						TOKEN_SET(&val, scope, NULL);
1168
+						val.type = TOK_FUNCTION;
1169
+
1170
+						state->current = current = current->parent;
1171
+					} else{
1172
+						/* extra } */
1173
+						state->syntax_errors++;
1174
+				}
1175
+				}
1176
+				break;
1177
+			case TOK_BRACKET_OPEN:
1178
+				current->brackets++;
1179
+				break;
1180
+			case TOK_BRACKET_CLOSE:
1181
+				if(current->brackets > 0)
1182
+					current->brackets--;
1183
+				else
1184
+					state->syntax_errors++;
1185
+				break;
1186
+			case TOK_COMMA:
1187
+				if (current->fsm_state == InsideInitializer && current->brackets == 0 && current->blocks == 0) {
1188
+					/* initializer ended only if we
1189
+					 * encountered a comma, and [] are
1190
+					 * balanced.
1191
+					 * This avoids switching state on:
1192
+					 * var x = [4,y,u];*/
1193
+					current->fsm_state = InsideVar;
1194
+				}
1195
+				break;
1196
+			case TOK_SEMICOLON:
1197
+				if (current->brackets == 0 && current->blocks == 0) {
1198
+					/* avoid switching state on unbalanced []:
1199
+					 * var x = [test;testi]; */
1200
+					current->fsm_state = Base;
1201
+				}
1202
+				break;
1203
+			case TOK_FUNCTION:
1204
+				current = scope_new(state);
1205
+				current->fsm_state = WaitFunctionName;
1206
+				TOKEN_SET(&val, scope, state->current);
1207
+				break;
1208
+			case TOK_StringLiteral:
1209
+				if(state->tokens.data[state->tokens.cnt-1].type == TOK_PLUS) {
1210
+					/* see if can fold */
1211
+					yystype *prev_string = &state->tokens.data[state->tokens.cnt-2];
1212
+					if(prev_string->type == TOK_StringLiteral) {
1213
+						char *str = TOKEN_GET(prev_string, string);
1214
+						size_t str_len = strlen(str);
1215
+
1216
+						text = yyget_text(state->scanner);
1217
+						leng = yyget_leng(state->scanner);
1218
+
1219
+
1220
+						/* delete TOK_PLUS */
1221
+						free_token(&state->tokens.data[--state->tokens.cnt]);
1222
+
1223
+						str = cli_realloc(str, str_len + leng + 1);
1224
+						strncpy(str+str_len, text+1, leng);
1225
+						str[str_len + leng] = '\0';
1226
+						TOKEN_SET(prev_string, string, str);
1227
+						free(val.val.string);
1228
+						memset(&val, 0, sizeof(val));
1229
+						val.vtype = vtype_undefined;
1230
+						continue;
1231
+					}
1232
+				}
1233
+				break;
1234
+		}
1235
+		if(val.vtype == vtype_undefined) {
1236
+			text = yyget_text(state->scanner);
1237
+			/* TODO: tokenizer should set it to point to a constant
1238
+			 * string, it currently doesn't do that for operators ,;:=... */
1239
+			TOKEN_SET(&val, string, cli_strdup(text));
1240
+			abort();
1241
+		}
1242
+		add_token(state, &val);
1243
+		current->last_token = yv;
1244
+		memset(&val, 0, sizeof(val));
1245
+		val.vtype = vtype_undefined;
1246
+	}
1247
+	yy_delete_buffer(yyb, state->scanner);
1248
+}
1249
+
1250
+int cli_js_init(struct parser_state *state)
1251
+{
1252
+	if(!state)
1253
+		return CL_ENULLARG;
1254
+	memset(state, 0, sizeof(*state));
1255
+	if(!scope_new(state)) {
1256
+		return CL_EMEM;
1257
+	}
1258
+	state->global = state->current;
1259
+
1260
+	if(yylex_init(&state->scanner)) {
1261
+		scope_done(state->global);
1262
+		return CL_EMEM;
1263
+	}
1264
+	yyset_debug(1, state->scanner);
1265
+	return CL_SUCCESS;
1266
+}
1267
+
1268
+int main(int argc,char** argv)
1269
+{
1270
+	int n;
1271
+	char buf[BUFS+2];
1272
+	struct parser_state state;
1273
+
1274
+	/*cli_debug_flag=1;*/
1275
+	printf("Enter javascript:\n");
1276
+	printf("  Terminate with ^D\n");
1277
+
1278
+	cli_js_init(&state);
1279
+	while ( ( n=read(fileno(stdin), buf, BUFS )) >  0)
1280
+	{
1281
+		/*buf[n] = '\0';*/
1282
+		cli_js_process_buffer(&state, buf, n);
1283
+	}
1284
+	cli_js_parse_done(&state);
1285
+	cli_js_output(&state);
1286
+	cli_js_destroy(&state);
1287
+	return 0;
1288
+}
1289
+
1290
+/* TODO: special identifiers in global scope (document, ...) 
1291
+ *
1292
+ * avoid extra strdup: 
1293
+ *  - when string is going to be folded
1294
+ *  - normalize_string, and hashtab_insert - avoid one
1295
+ *
1296
+ * decoded stuff should be parsed in the correct context (not a global one!)
1297
+ *
1298
+ * make unescape, packers handling more generic
1299
+ * memory leaks - manul check
1300
+ * check for allocation failure everywhere
1301
+ * limits
1302
+ * security check
1303
+ * */
1304
+
1305
+
1306
+/*-------------- tokenizer ---------------------*/
1307
+enum char_class {
1308
+	Whitespace,
1309
+	Slash,
1310
+	Operator,
1311
+	DQuote,
1312
+	SQuote,
1313
+	Digit,
1314
+	IdStart,
1315
+	BracketOpen = TOK_BRACKET_OPEN,
1316
+	BracketClose = TOK_BRACKET_CLOSE,
1317
+	Comma = TOK_COMMA,
1318
+	CurlyOpen = TOK_CURLY_BRACE_OPEN,
1319
+	CurlyClose = TOK_CURLY_BRACE_CLOSE,
1320
+	ParOpen = TOK_PAR_OPEN,
1321
+	ParClose = TOK_PAR_CLOSE,
1322
+	Dot = TOK_DOT,
1323
+	SemiColon = TOK_SEMICOLON,
1324
+	Nop
1325
+};
1326
+
1327
+#define SL Slash
1328
+#define DG Digit
1329
+#define DQ DQuote
1330
+#define SQ SQuote
1331
+#define ID IdStart
1332
+#define OP Operator
1333
+#define WS Whitespace
1334
+#define BO BracketOpen
1335
+#define BC BracketClose
1336
+#define CM Comma
1337
+#define CO CurlyOpen
1338
+#define CC CurlyClose
1339
+#define PO ParOpen
1340
+#define PC ParClose
1341
+#define DT Dot
1342
+#define SC SemiColon
1343
+#define NA Nop
1344
+
1345
+static const enum char_class ctype[256] = {
1346
+	NA, NA, NA, NA, NA, NA, NA, NA, NA, WS, WS, WS, NA, WS, NA, NA,
1347
+	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1348
+	WS, OP, DQ, NA, ID, OP, OP, SQ, PO, PC, OP, OP, CM, OP, DT, SL,
1349
+	DG, DG, DG, DG, DG, DG, DG, DG, DG, DG, OP, SC, OP, OP, OP, OP,
1350
+	NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
1351
+	ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, BO, ID, BC, OP, ID,
1352
+	NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
1353
+	ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, CO, OP, CC, OP, NA,
1354
+	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1355
+	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1356
+	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1357
+	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1358
+	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1359
+	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1360
+	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1361
+	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
1362
+};
1363
+
1364
+static const enum char_class id_ctype[256] = {
1365
+	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1366
+        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1367
+        NA, NA, NA, NA, ID, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1368
+        ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, NA, NA, NA, NA, NA,
1369
+        NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
1370
+        ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, OP, NA, NA, ID,
1371
+        NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
1372
+        ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, NA, NA, NA, NA,
1373
+        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1374
+        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1375
+        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1376
+        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1377
+        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1378
+        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1379
+        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1380
+        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1381
+};
1382
+
1383
+#define CASE_SPECIAL_CHAR(C, S) case C: TOKEN_SET(lvalp, cstring, (S)); return cClass;
1384
+
1385
+#define BUF_KEEP_SIZE 32768
1386
+
1387
+static void textbuf_clean(struct text_buffer *buf)
1388
+{
1389
+	if(buf->capacity > BUF_KEEP_SIZE) {
1390
+		buf->data = cli_realloc(buf->data, BUF_KEEP_SIZE);
1391
+		buf->capacity = BUF_KEEP_SIZE;
1392
+	}
1393
+	buf->pos = 0;
1394
+}
1395
+
1396
+
1397
+static inline int parseString(YYSTYPE *lvalp, yyscan_t scanner, const char q,
1398
+		enum tokenizer_state tostate)
1399
+{
1400
+	size_t len;
1401
+	/* look for " terminating the string */
1402
+	const char *start = &scanner->in[scanner->pos], *end = start;
1403
+	do {
1404
+		const size_t siz = &scanner->in[scanner->insize] - end;
1405
+		end = memchr(end, q, siz);
1406
+		if(end && end[-1] == '\\') {
1407
+			++end;
1408
+			continue;
1409
+		}
1410
+		break;
1411
+	} while (1);
1412
+	len = (end && end > start) ? end - start : scanner->insize - scanner->pos;
1413
+	textbuffer_append_normalize(&scanner->buf, start, len);
1414
+	if(end) {
1415
+		/* skip over end quote */
1416
+		scanner->pos += len + 1;
1417
+		textbuffer_putc(&scanner->buf, '\0');
1418
+		TOKEN_SET(lvalp, string, textbuffer_done(scanner));
1419
+		scanner->state = Initial;
1420
+		assert(lvalp->val.string);
1421
+		return TOK_StringLiteral;
1422
+	} else {
1423
+		scanner->pos += len;
1424
+		/* unfinished string */
1425
+		scanner->state = tostate;
1426
+		return 0;
1427
+	}
1428
+}
1429
+
1430
+static inline int parseDQString(YYSTYPE *lvalp, yyscan_t scanner)
1431
+{
1432
+	return parseString(lvalp, scanner, '"', DoubleQString);
1433
+}
1434
+
1435
+static inline int parseSQString(YYSTYPE *lvalp, yyscan_t scanner)
1436
+{
1437
+	return parseString(lvalp, scanner, '\'', SingleQString);
1438
+}
1439
+
1440
+static inline int parseNumber(YYSTYPE *lvalp, yyscan_t scanner)
1441
+{
1442
+	const unsigned char *in = (const unsigned char*)scanner->in;
1443
+	int is_float = 0;
1444
+	while(scanner->pos < scanner->insize) {
1445
+		unsigned char c = in[scanner->pos++];
1446
+		if(isdigit(c)) {
1447
+			textbuffer_putc(&scanner->buf, c);
1448
+			continue;
1449
+		}
1450
+		if(c =='.' && !is_float) {
1451
+			is_float = 1;
1452
+			textbuffer_putc(&scanner->buf, '.');
1453
+			continue;
1454
+		}
1455
+		if((c=='e' || c=='E') && is_float) {
1456
+			textbuffer_putc(&scanner->buf, c);
1457
+			if(scanner->pos < scanner->insize) {
1458
+				c = in[scanner->pos++];
1459
+				if(c == '+' || c == '-' || isdigit(c)) {
1460
+					textbuffer_putc(&scanner->buf, c);
1461
+					continue;
1462
+				}
1463
+			}
1464
+		}
1465
+		scanner->pos--;
1466
+		textbuffer_putc(&scanner->buf, '\0');
1467
+		scanner->state = Initial;
1468
+		if(is_float) {
1469
+			TOKEN_SET(lvalp, dval, atof(scanner->buf.data));
1470
+			return TOK_NumericFloat;
1471
+		} else {
1472
+			TOKEN_SET(lvalp, ival, atoi(scanner->buf.data));
1473
+			return TOK_NumericInt;
1474
+		}
1475
+	}
1476
+	scanner->state = Number;
1477
+	return 0;
1478
+}
1479
+
1480
+static inline int parseId(YYSTYPE *lvalp, yyscan_t scanner)
1481
+{
1482
+	const struct keyword *kw;
1483
+	const unsigned char *in = (const unsigned char*)scanner->in;
1484
+	scanner->state = Initial;
1485
+	while(scanner->pos < scanner->insize) {
1486
+		unsigned char c = in[scanner->pos++];
1487
+		enum char_class cClass = id_ctype[c];
1488
+		switch(cClass) {
1489
+			case IdStart:
1490
+				textbuffer_putc(&scanner->buf, c);
1491
+				break;
1492
+			case Operator:
1493
+				/* the table contains OP only for \ */
1494
+				assert(c == '\\');
1495
+				if(scanner->pos < scanner->insize &&
1496
+						in[scanner->pos++] == 'u') {
1497
+					textbuffer_putc(&scanner->buf, c);
1498
+					break;
1499
+				}
1500
+				/* else fallthrough */
1501
+			default:
1502
+				/* character is no longer part of identifier */
1503
+				textbuffer_putc(&scanner->buf, '\0');
1504
+				scanner->pos--;
1505
+				kw = in_word_set(scanner->buf.data, scanner->buf.pos-1);
1506
+				if(kw) {
1507
+					/* we got a keyword */
1508
+					TOKEN_SET(lvalp, cstring, kw->name);
1509
+					return kw->val;
1510
+				}
1511
+				/* it is not a keyword, just an identifier */
1512
+				TOKEN_SET(lvalp, cstring, NULL);
1513
+				return TOK_IDENTIFIER_NAME;
1514
+		}
1515
+	}
1516
+	scanner->state = Identifier;
1517
+	return 0;
1518
+}
1519
+
1520
+#ifndef MIN
1521
+#define MIN(a,b) ((a)<(b) ? (a):(b))
1522
+#endif
1523
+
1524
+static int parseOperator(YYSTYPE *lvalp, yyscan_t scanner)
1525
+{
1526
+	size_t len = MIN(5, scanner->insize - scanner->pos);
1527
+	while(len) {
1528
+		const struct operator *kw = in_op_set(&scanner->in[scanner->pos], len);
1529
+		if(kw) {
1530
+			TOKEN_SET(lvalp, cstring, kw->name);
1531
+			scanner->pos += len;
1532
+			return kw->val;
1533
+		}
1534
+		len--;
1535
+	}
1536
+	scanner->pos++;
1537
+	TOKEN_SET(lvalp, cstring, NULL);
1538
+	return TOK_ERROR;
1539
+}
1540
+
1541
+static int yylex_init(yyscan_t *scanner)
1542
+{
1543
+	*scanner = cli_calloc(1, sizeof(**scanner));
1544
+	return *scanner ? 0 : -1;
1545
+}
1546
+
1547
+static int yylex_destroy(yyscan_t scanner)
1548
+{
1549
+	free(scanner->buf.data);
1550
+	free(scanner);
1551
+	return 0;
1552
+}
1553
+
1554
+static int yy_scan_bytes(const char *p, size_t len, yyscan_t scanner)
1555
+{
1556
+	scanner->in = p;
1557
+	scanner->insize = len;
1558
+	scanner->pos = 0;
1559
+	return 0;
1560
+}
1561
+
1562
+static void yyset_debug (int debug_flag ,yyscan_t yyscanner )
1563
+{
1564
+	/* TODO */
1565
+}
1566
+
1567
+static void yy_delete_buffer( YY_BUFFER_STATE yyb, yyscan_t scanner)
1568
+{
1569
+	/* TODO */
1570
+}
1571
+
1572
+static const char *yyget_text(yyscan_t scanner)
1573
+{
1574
+	assert(scanner->buf.data || scanner->yytext);
1575
+	return scanner->yytext ? scanner->yytext : scanner->buf.data;
1576
+}
1577
+
1578
+static int yyget_leng(yyscan_t scanner)
1579
+{
1580
+	/* we have a \0 too */
1581
+	return scanner->yylen ? scanner->yylen : (scanner->buf.pos > 0 ? scanner->buf.pos - 1 : 0);
1582
+}
1583
+
1584
+static int yylex(YYSTYPE *lvalp, yyscan_t  scanner)
1585
+{
1586
+	const size_t len = scanner->insize;
1587
+	const unsigned char *in = (const unsigned char*)scanner->in;
1588
+	unsigned char lookahead;
1589
+	enum char_class cClass;
1590
+
1591
+	scanner->yytext = NULL;
1592
+	scanner->yylen = 0;
1593
+	while(scanner->pos < scanner->insize) {
1594
+		switch(scanner->state) {
1595
+			case Initial:
1596
+				textbuf_clean(&scanner->buf);
1597
+				cClass = ctype[in[scanner->pos++]];
1598
+				switch(cClass) {
1599
+					case Whitespace:
1600
+						/* eat whitespace */
1601
+						continue;
1602
+					case Slash:
1603
+						if(scanner->pos < len) {
1604
+							lookahead = in[scanner->pos];
1605
+							switch(lookahead) {
1606
+								case '*':
1607
+									scanner->state = MultilineComment;
1608
+									scanner->pos++;
1609
+									continue;
1610
+								case '/':
1611
+									scanner->state = SinglelineComment;
1612
+									scanner->pos++;
1613
+									continue;
1614
+							}
1615
+						}
1616
+						--scanner->pos;
1617
+						return parseOperator(lvalp, scanner);
1618
+					case Operator:
1619
+						--scanner->pos;
1620
+						return parseOperator(lvalp, scanner);
1621
+					case DQuote:
1622
+						return parseDQString(lvalp, scanner);
1623
+					case SQuote:
1624
+						return parseSQString(lvalp, scanner);
1625
+					case Digit:
1626
+						--scanner->pos;
1627
+						return parseNumber(lvalp, scanner);
1628
+					case IdStart:
1629
+						--scanner->pos;
1630
+						return parseId(lvalp,scanner);
1631
+					CASE_SPECIAL_CHAR(BracketOpen, "[");
1632
+					CASE_SPECIAL_CHAR(BracketClose, "]");
1633
+					CASE_SPECIAL_CHAR(Comma, ",");
1634
+					CASE_SPECIAL_CHAR(CurlyOpen, "{");
1635
+					CASE_SPECIAL_CHAR(CurlyClose, "}");
1636
+					CASE_SPECIAL_CHAR(ParOpen, "(");
1637
+					CASE_SPECIAL_CHAR(ParClose, ")");
1638
+					CASE_SPECIAL_CHAR(Dot, ".");
1639
+					CASE_SPECIAL_CHAR(SemiColon, ";");
1640
+					case Nop:
1641
+					       continue;
1642
+				}
1643
+				break;
1644
+			case DoubleQString:
1645
+				return parseString(lvalp, scanner, '"', DoubleQString);
1646
+			case SingleQString:
1647
+				return parseString(lvalp, scanner, '\'', DoubleQString);
1648
+			case Identifier:
1649
+				return parseId(lvalp, scanner);
1650
+			case MultilineComment:
1651
+				while(scanner->pos+1 < scanner->insize) {
1652
+					if(in[scanner->pos] == '*' && in[scanner->pos+1] == '/')
1653
+						break;
1654
+					scanner->pos++;
1655
+				}
1656
+				scanner->state = Initial;
1657
+				break;
1658
+			case Number:
1659
+				return parseNumber(lvalp, scanner);
1660
+			case SinglelineComment:
1661
+				while(scanner->pos < scanner->insize) {
1662
+					if(in[scanner->pos] == '\n')
1663
+						break;
1664
+					scanner->pos++;
1665
+				}
1666
+				scanner->state = Initial;
1667
+				break;
1668
+		}
1669
+	}
1670
+	return 0;
1671
+}
0 1672
new file mode 100644
... ...
@@ -0,0 +1,29 @@
0
+/*
1
+ *  Javascript normalizer.
2
+ *
3
+ *  Copyright (C) 2008 Sourcefire, Inc.
4
+ *
5
+ *  Authors: Török Edvin
6
+ *
7
+ *  This program is free software; you can redistribute it and/or modify
8
+ *  it under the terms of the GNU General Public License version 2 as
9
+ *  published by the Free Software Foundation.
10
+ *
11
+ *  This program is distributed in the hope that it will be useful,
12
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
+ *  GNU General Public License for more details.
15
+ *
16
+ *  You should have received a copy of the GNU General Public License
17
+ *  along with this program; if not, write to the Free Software
18
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19
+ *  MA 02110-1301, USA.
20
+ */
21
+
22
+struct parser_state;
23
+int cli_js_init(struct parser_state *state);
24
+void cli_js_process_buffer(struct parser_state *state, const char *buf, size_t n);
25
+void cli_js_parse_done(struct parser_state* state);
26
+void cli_js_output(struct parser_state *state);
27
+void cli_js_destroy(struct parser_state *state);
28
+
0 29
new file mode 100644
... ...
@@ -0,0 +1,25 @@
0
+break
1
+case
2
+catch
3
+continue
4
+default
5
+delete
6
+do
7
+else
8
+finally
9
+for
10
+function
11
+if
12
+in
13
+instanceof
14
+new
15
+return
16
+switch
17
+this
18
+throw
19
+try
20
+typeof
21
+var
22
+void
23
+while
24
+with
0 25
new file mode 100644
... ...
@@ -0,0 +1,146 @@
0
+/*
1
+ *  Javascript normalizer.
2
+ *
3
+ *  Copyright (C) 2008 Sourcefire, Inc.
4
+ *
5
+ *  Authors: Török Edvin
6
+ *
7
+ *  This program is free software; you can redistribute it and/or modify
8
+ *  it under the terms of the GNU General Public License version 2 as
9
+ *  published by the Free Software Foundation.
10
+ *
11
+ *  This program is distributed in the hope that it will be useful,
12
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
+ *  GNU General Public License for more details.
15
+ *
16
+ *  You should have received a copy of the GNU General Public License
17
+ *  along with this program; if not, write to the Free Software
18
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19
+ *  MA 02110-1301, USA.
20
+ */
21
+#ifndef YYSTYPE
22
+enum token_type {
23
+	TOK_FUTURE_RESERVED_WORD=1,
24
+	TOK_ERROR,
25
+	TOK_IDENTIFIER_NAME,
26
+	TOK_TRUE,
27
+	TOK_FALSE,
28
+	TOK_NULL,
29
+	TOK_BRACKET_OPEN,
30
+	TOK_BRACKET_CLOSE,
31
+	TOK_COMMA,
32
+	TOK_CURLY_BRACE_OPEN,
33
+	TOK_CURLY_BRACE_CLOSE,
34
+	TOK_PAR_OPEN,
35
+	TOK_PAR_CLOSE,
36
+	TOK_DOT,
37
+	TOK_SEMICOLON,
38
+	TOK_COLON,
39
+	TOK_NEW,
40
+	TOK_NumericInt,
41
+	TOK_NumericFloat,
42
+	TOK_StringLiteral,
43
+	TOK_REGULAR_EXPRESSION_LITERAL,
44
+	TOK_THIS,
45
+	TOK_PLUSPLUS,
46
+	TOK_MINUSMINUS,
47
+	TOK_DELETE,
48
+	TOK_VOID,
49
+	TOK_TYPEOF,
50
+	TOK_MINUS,
51
+	TOK_TILDE,
52
+	TOK_EXCLAMATION,
53
+	TOK_MULTIPLY,
54
+	TOK_DIVIDE,
55
+	TOK_PERCENT,
56
+	TOK_PLUS,
57
+	TOK_SHIFT_LEFT,
58
+	TOK_SHIFT_RIGHT,
59
+	TOK_DOUBLESHIFT_RIGHT,
60
+	TOK_LESS,
61
+	TOK_GREATER,
62
+	TOK_LESSEQUAL,
63
+	TOK_GREATEREQUAL,
64
+	TOK_INSTANCEOF,
65
+	TOK_IN,
66
+	TOK_EQUAL_EQUAL,
67
+	TOK_NOT_EQUAL,
68
+	TOK_TRIPLE_EQUAL,
69
+	TOK_NOT_DOUBLEEQUAL,
70
+	TOK_AND,
71
+	TOK_XOR,
72
+	TOK_OR,
73
+	TOK_AND_AND,
74
+	TOK_OR_OR,
75
+	TOK_QUESTIONMARK,
76
+	TOK_EQUAL,
77
+	TOK_ASSIGNMENT_OPERATOR_NOEQUAL,
78
+	TOK_VAR,
79
+	TOK_IF,
80
+	TOK_ELSE,
81
+	TOK_DO,
82
+	TOK_WHILE,
83
+	TOK_FOR,
84
+	TOK_CONTINUE,
85
+	TOK_BREAK,
86
+	TOK_RETURN,
87
+	TOK_WITH,
88
+	TOK_SWITCH,
89
+	TOK_CASE,
90
+	TOK_DEFAULT,
91
+	TOK_THROW,
92
+	TOK_TRY,
93
+	TOK_CATCH,
94
+	TOK_FINALLY,
95
+	TOK_FUNCTION,
96
+	TOK_UNNORM_IDENTIFIER
97
+};
98
+
99
+enum val_type {
100
+	vtype_undefined,
101
+	vtype_cstring,
102
+	vtype_string,
103
+	vtype_scope,
104
+	vtype_dval,
105
+	vtype_ival
106
+};
107
+
108
+typedef struct token {
109
+	union {
110
+		const char *cstring;
111
+		char  *string;
112
+		struct scope *scope;/* for function */
113
+		double dval;
114
+		long   ival;
115
+	} val;
116
+	enum token_type type;
117
+	enum val_type   vtype;
118
+} yystype;
119
+
120
+/* inline functions to access the structure to ensure type safety */
121
+
122
+#define TOKEN_SET(DST, VTYPE, VAL) do {\
123
+	(DST)->vtype = vtype_##VTYPE ; (DST)->val.VTYPE = (VAL); \
124
+} while(0);
125
+
126
+#define cstring_invalid NULL
127
+#define string_invalid NULL
128
+#define scope_invalid NULL
129
+/* there isn't really an invalid double, or long value, but we don't care
130
+ * about those values anyway, so -1 will be fine here */
131
+#define dval_invalid -1
132
+#define ival_invalid -1
133
+
134
+/* compatible if same type, or if we request a const char* instead of char*,
135
+ * but not viceversa! */
136
+static int vtype_compatible(enum val_type orig, enum val_type req)
137
+{
138
+	return orig == req || (orig == vtype_string && req == vtype_cstring);
139
+}
140
+#define COMPATIBLE(SRC, VTYPE) ((SRC)->vtype == vtype_##VTYPE || )
141
+
142
+#define TOKEN_GET(SRC, VTYPE) (vtype_compatible((SRC)->vtype, vtype_##VTYPE) ? (SRC)->val.VTYPE : VTYPE##_invalid)
143
+
144
+#define YYSTYPE yystype
145
+#endif
0 146
new file mode 100644
... ...
@@ -0,0 +1,44 @@
0
+struct operator {
1
+	const char *name;
2
+	int val;
3
+};
4
+%%
5
+=,	TOK_EQUAL
6
++,	TOK_PLUS
7
+===,	TOK_TRIPLE_EQUAL
8
+>>>,	TOK_DOUBLESHIFT_RIGHT
9
+!==,	TOK_NOT_DOUBLEEQUAL
10
+<<,	TOK_SHIFT_LEFT
11
+<=,	TOK_LESSEQUAL
12
+==,	TOK_EQUAL_EQUAL
13
+>=,	TOK_GREATEREQUAL
14
+>>,	TOK_SHIFT_RIGHT
15
+||,	TOK_OR_OR
16
+--,	TOK_MINUSMINUS
17
+!=,	TOK_NOT_EQUAL
18
+&&,	TOK_AND_AND
19
+*=, 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL
20
+/=, 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL
21
+"%=", 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL
22
++=, 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL
23
+-=, 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL
24
+<<=, 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL
25
+>>=, 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL
26
+>>>=, 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL
27
+&=, 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL
28
+^=, 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL
29
+|=, 	TOK_ASSIGNMENT_OPERATOR_NOEQUAL
30
+&,	TOK_AND
31
+:,	TOK_COLON
32
+/,	TOK_DIVIDE
33
+!,	TOK_EXCLAMATION
34
+>,	TOK_GREATER
35
+<,	TOK_LESS
36
+-,	TOK_MINUS
37
+*,	TOK_MULTIPLY
38
+|,	TOK_OR
39
+"%",	TOK_PERCENT
40
+?,	TOK_QUESTIONMARK
41
+~,	TOK_TILDE
42
+^,	TOK_XOR
0 43
new file mode 100644
... ...
@@ -0,0 +1,3 @@
0
+null, TOK_NULL
1
+true, TOK_TRUE
2
+false, TOK_FALSE
... ...
@@ -114,6 +114,7 @@ EGREP = @EGREP@
114 114
 EXEEXT = @EXEEXT@
115 115
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
116 116
 GETENT = @GETENT@
117
+GPERF = @GPERF@
117 118
 GREP = @GREP@
118 119
 HAVE_LIBGMP = @HAVE_LIBGMP@
119 120
 INSTALL = @INSTALL@
... ...
@@ -118,6 +118,7 @@ EGREP = @EGREP@
118 118
 EXEEXT = @EXEEXT@
119 119
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
120 120
 GETENT = @GETENT@
121
+GPERF = @GPERF@
121 122
 GREP = @GREP@
122 123
 HAVE_LIBGMP = @HAVE_LIBGMP@
123 124
 INSTALL = @INSTALL@
... ...
@@ -113,6 +113,7 @@ EGREP = @EGREP@
113 113
 EXEEXT = @EXEEXT@
114 114
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
115 115
 GETENT = @GETENT@
116
+GPERF = @GPERF@
116 117
 GREP = @GREP@
117 118
 HAVE_LIBGMP = @HAVE_LIBGMP@
118 119
 INSTALL = @INSTALL@
... ...
@@ -121,6 +121,7 @@ EGREP = @EGREP@
121 121
 EXEEXT = @EXEEXT@
122 122
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
123 123
 GETENT = @GETENT@
124
+GPERF = @GPERF@
124 125
 GREP = @GREP@
125 126
 HAVE_LIBGMP = @HAVE_LIBGMP@
126 127
 INSTALL = @INSTALL@
... ...
@@ -76,6 +76,7 @@ EGREP = @EGREP@
76 76
 EXEEXT = @EXEEXT@
77 77
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
78 78
 GETENT = @GETENT@
79
+GPERF = @GPERF@
79 80
 GREP = @GREP@
80 81
 HAVE_LIBGMP = @HAVE_LIBGMP@
81 82
 INSTALL = @INSTALL@
... ...
@@ -102,6 +102,7 @@ EGREP = @EGREP@
102 102
 EXEEXT = @EXEEXT@
103 103
 FRESHCLAM_LIBS = @FRESHCLAM_LIBS@
104 104
 GETENT = @GETENT@
105
+GPERF = @GPERF@
105 106
 GREP = @GREP@
106 107
 HAVE_LIBGMP = @HAVE_LIBGMP@
107 108
 INSTALL = @INSTALL@