unit_tests/check_jsnorm.c
5ee56e41
 /*
  *  Unit tests for JS normalizer.
  *
e1cbc270
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2008-2013 Sourcefire, Inc.
5ee56e41
  *
  *  Authors: Török Edvin
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
4a6ade44
 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
 
 #include <stdio.h>
 
 #include <stdlib.h>
 #include <limits.h>
 #include <string.h>
 #include <check.h>
377a2330
 #include <fcntl.h>
85398f3e
 #include <ctype.h>
b3c69e92
 #include <errno.h>
b2e7c931
 
4a6ade44
 #include "../libclamav/clamav.h"
 #include "../libclamav/others.h"
5be3029f
 #include "../libclamav/dconf.h"
 #include "../libclamav/htmlnorm.h"
4a6ade44
 #include "../libclamav/jsparse/js-norm.h"
 #include "../libclamav/jsparse/lexglobal.h"
 #include "../libclamav/jsparse/textbuf.h"
 #include "../libclamav/jsparse/generated/keywords.h"
 #include "../libclamav/jsparse/generated/operators.h"
a39b29cb
 #include "checks.h"
4a6ade44
 
 struct test {
288057e9
     const char *str;
     int is;
4a6ade44
 };
 
 static struct test kw_test[] = {
288057e9
     {"new", 1},
     {"eval", 0},
     {"function", 1},
     {"eval1", 0},
     {"ne", 0}};
4a6ade44
 
 static struct test op_test[] = {
288057e9
     {"-", 1},
     {"---", 0}};
4a6ade44
 
99f74adc
 #ifdef CHECK_HAVE_LOOPS
288057e9
 START_TEST(test_keywords)
4a6ade44
 {
     const struct keyword *kw = in_word_set(kw_test[_i].str, strlen(kw_test[_i].str));
288057e9
     if (kw_test[_i].is) {
         fail_unless(kw && !strcmp(kw->name, kw_test[_i].str), "keyword mismatch");
4a6ade44
     } else {
288057e9
         fail_unless(!kw, "non-keyword detected as keyword");
4a6ade44
     }
 }
 END_TEST
 
288057e9
 START_TEST(test_operators)
4a6ade44
 {
288057e9
     const struct operator*op = in_op_set(op_test[_i].str, strlen(op_test[_i].str));
     if (op_test[_i].is)
         fail_unless(op && !strcmp(op->name, op_test[_i].str), "operator mismatch");
4a6ade44
     else
288057e9
         fail_unless(!op, "non-operator detected as operator");
4a6ade44
 }
 END_TEST
61b295f4
 #endif /* CHECK_HAVE_LOOPS */
4a6ade44
 
288057e9
 START_TEST(test_token_string)
4a6ade44
 {
288057e9
     char str[] = "test";
     yystype tok;
     memset(&tok, 0, sizeof(tok));
 
     TOKEN_SET(&tok, string, str);
     fail_unless(TOKEN_GET(&tok, string) == str, "token string get/set");
     fail_unless(TOKEN_GET(&tok, cstring) == str, "token string->cstring");
     fail_unless(TOKEN_GET(&tok, scope) == NULL, "token string->scope");
     fail_unless(TOKEN_GET(&tok, ival) == -1, "token string->ival");
4a6ade44
 }
 END_TEST
 
288057e9
 START_TEST(test_token_cstring)
4a6ade44
 {
288057e9
     const char *str = "test";
     yystype tok;
     memset(&tok, 0, sizeof(tok));
 
     TOKEN_SET(&tok, cstring, str);
     fail_unless(TOKEN_GET(&tok, string) == NULL, "token cstring->string");
     fail_unless(TOKEN_GET(&tok, cstring) == str, "token string->cstring");
     fail_unless(TOKEN_GET(&tok, scope) == NULL, "token string->scope");
     fail_unless(TOKEN_GET(&tok, ival) == -1, "token string->ival");
4a6ade44
 }
 END_TEST
 
288057e9
 START_TEST(test_token_scope)
4a6ade44
 {
288057e9
     struct scope *sc = (struct scope *)0xdeadbeef;
     yystype tok;
     memset(&tok, 0, sizeof(tok));
 
     TOKEN_SET(&tok, scope, sc);
     fail_unless(TOKEN_GET(&tok, string) == NULL, "token scope->string");
     fail_unless(TOKEN_GET(&tok, cstring) == NULL, "token scope->cstring");
     fail_unless(TOKEN_GET(&tok, scope) == sc, "token scope->scope");
     fail_unless(TOKEN_GET(&tok, ival) == -1, "token scope->ival");
4a6ade44
 }
 END_TEST
 
288057e9
 START_TEST(test_token_ival)
4a6ade44
 {
288057e9
     int val = 0x1234567;
     yystype tok;
     memset(&tok, 0, sizeof(tok));
 
     TOKEN_SET(&tok, ival, val);
     fail_unless(TOKEN_GET(&tok, string) == NULL, "token ival->string");
     fail_unless(TOKEN_GET(&tok, cstring) == NULL, "token ival->cstring");
     fail_unless(TOKEN_GET(&tok, scope) == NULL, "token ival->scope");
     fail_unless(TOKEN_GET(&tok, dval) - -1 < 1e-9, "token ival->dval");
     fail_unless(TOKEN_GET(&tok, ival) == val, "token ival->ival");
4a6ade44
 }
 END_TEST
 
288057e9
 START_TEST(test_token_dval)
4a6ade44
 {
288057e9
     double val = 0.12345;
     yystype tok;
     memset(&tok, 0, sizeof(tok));
 
     TOKEN_SET(&tok, dval, val);
     fail_unless(TOKEN_GET(&tok, string) == NULL, "token dval->string");
     fail_unless(TOKEN_GET(&tok, cstring) == NULL, "token dval->cstring");
     fail_unless(TOKEN_GET(&tok, scope) == NULL, "token dval->scope");
     fail_unless(TOKEN_GET(&tok, dval) - val < 1e-9, "token dval->dval");
     fail_unless(TOKEN_GET(&tok, ival) == -1, "token dval->ival");
4a6ade44
 }
 END_TEST
 
288057e9
 START_TEST(test_init_destroy)
4a6ade44
 {
288057e9
     struct parser_state *state = cli_js_init();
     fail_unless(!!state, "cli_js_init()");
     cli_js_destroy(state);
     cli_js_destroy(NULL);
4a6ade44
 }
 END_TEST
 
288057e9
 START_TEST(test_init_parse_destroy)
4a6ade44
 {
288057e9
     const char buf[]           = "function (p) { return \"anonymous\";}";
     struct parser_state *state = cli_js_init();
     fail_unless(!!state, "cli_js_init()");
     cli_js_process_buffer(state, buf, strlen(buf));
     cli_js_process_buffer(state, buf, strlen(buf));
     cli_js_parse_done(state);
     cli_js_destroy(state);
4a6ade44
 }
 END_TEST
 
288057e9
 START_TEST(js_begin_end)
5be3029f
 {
288057e9
     char buf[16384] = "</script>";
     size_t p;
 
     for (p = strlen(buf); p < 8191; p++) {
         buf[p++] = 'a';
         buf[p]   = ' ';
     }
     strncpy(buf + 8192, " stuff stuff <script language='javascript'> function () {}", 8192);
     fail_unless(html_normalise_mem((unsigned char *)buf, sizeof(buf), NULL, NULL, dconf) == 1, "normalise");
5be3029f
 }
 END_TEST
 
288057e9
 START_TEST(multiple_scripts)
5be3029f
 {
288057e9
     char buf[] = "</script> stuff"
                  "<script language='Javascript'> function foo() {} </script>"
                  "<script language='Javascript'> function bar() {} </script>";
5be3029f
 
288057e9
     fail_unless(!!dconf, "failed to init dconf");
     fail_unless(html_normalise_mem((unsigned char *)buf, sizeof(buf), NULL, NULL, dconf) == 1, "normalise");
     /* TODO: test that both had been normalized */
377a2330
 }
 END_TEST
 
 static struct parser_state *state;
 static char *tmpdir = NULL;
 
 static void jstest_setup(void)
 {
288057e9
     cl_init(CL_INIT_DEFAULT);
     state = cli_js_init();
     fail_unless(!!state, "js init");
     tmpdir = cli_gentemp(NULL);
     fail_unless(!!tmpdir, "js tmp dir");
     fail_unless_fmt(mkdir(tmpdir, 0700) == 0, "tempdir mkdir of %s failed: %s", tmpdir, strerror(errno));
377a2330
 }
 
 static void jstest_teardown(void)
 {
288057e9
     if (tmpdir) {
         cli_rmdirs(tmpdir);
         free(tmpdir);
     }
     cli_js_destroy(state);
     state = NULL;
377a2330
 }
 
 static void tokenizer_test(const char *in, const char *expected, int split)
 {
288057e9
     char filename[1024];
     int fd;
     ssize_t len  = strlen(expected);
     size_t inlen = strlen(in);
 
     if (split) {
         cli_js_process_buffer(state, in, inlen / 2);
         cli_js_process_buffer(state, in + inlen / 2, inlen - inlen / 2);
     } else {
         cli_js_process_buffer(state, in, inlen);
     }
 
     cli_js_parse_done(state);
     cli_js_output(state, tmpdir);
     snprintf(filename, 1023, "%s/javascript", tmpdir);
 
     fd = open(filename, O_RDONLY);
     if (fd < 0) {
         jstest_teardown();
         fail_fmt("failed to open output file: %s", filename);
     }
 
     diff_file_mem(fd, expected, len);
377a2330
 }
 
 static const char jstest_buf0[] =
288057e9
     "function foo(a, b) {\n"
     "var x = 1.9e2*2*a/ 4.;\n"
     "var y = 'test\\'tst';//var\n"
     "x=b[5],/* multiline\nvar z=6;\nsome*some/other**/"
     "z=x/y;/* multiline oneline */var t=z/a;\n"
     "z=[test;testi];"
     "document.writeln('something\n');}";
377a2330
 
 static const char jstest_expected0[] =
288057e9
     "<script>function n000(n001,n002){"
     "var n003=190*2*n001/4;"
     "var n004=\"test\'tst\";"
     "n003=n002[5],"
     "z=n003/n004;var n005=z/n001;"
     "z=[test;testi];"
     "document.writeln(\"something \");}</script>";
377a2330
 
 static const char jstest_buf1[] =
288057e9
     "function () { var id\\u1234tx;}";
377a2330
 
 static const char jstest_expected1[] =
288057e9
     "<script>function(){var n000;}</script>";
377a2330
 
 static const char jstest_buf2[] =
288057e9
     "function () { var tst=\"a\"+'bc'+     'd'; }";
377a2330
 
 static const char jstest_expected2[] =
288057e9
     "<script>function(){var n000=\"abcd\";}</script>";
377a2330
 
 static const char jstest_buf3[] =
288057e9
     "dF('bmfsu%2639%2638x11u%2638%263%3A%264C1');";
377a2330
 
 static const char jstest_expected3[] =
288057e9
     "<script>alert(\"w00t\");</script>";
377a2330
 
533e76aa
 #define B64 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
377a2330
 
 /* TODO: document.write should be normalized too */
81040d73
 static char jstest_buf4[] =
288057e9
     "qbphzrag.jevgr(harfpncr('%3P%73%63%72%69%70%74%20%6P%61%6R%67%75%61%67%65%3Q%22%6N%61%76%61%73%63%72%69%70%74%22%3R%66%75%6R%63%74%69%6S%6R%20%64%46%28%73%29%7O%76%61%72%20%73%31%3Q%75%6R%65%73%63%61%70%65%28%73%2R%73%75%62%73%74%72%28%30%2P%73%2R%6P%65%6R%67%74%68%2Q%31%29%29%3O%20%76%61%72%20%74%3Q%27%27%3O%66%6S%72%28%69%3Q%30%3O%69%3P%73%31%2R%6P%65%6R%67%74%68%3O%69%2O%2O%29%74%2O%3Q%53%74%72%69%6R%67%2R%66%72%6S%6Q%43%68%61%72%43%6S%64%65%28%73%31%2R%63%68%61%72%43%6S%64%65%41%74%28%69%29%2Q%73%2R%73%75%62%73%74%72%28%73%2R%6P%65%6R%67%74%68%2Q%31%2P%31%29%29%3O%64%6S%63%75%6Q%65%6R%74%2R%77%72%69%74%65%28%75%6R%65%73%63%61%70%65%28%74%29%29%3O%7Q%3P%2S%73%63%72%69%70%74%3R'));riny(qS('tV%285%3O%285%3Nsdwjl%28585%3N7%28586Q%28585%3N7%3P%7P55l%28585%3N7%3P%28585%3N7%28586R%28585%3N8T5%285%3N%285%3P%286R3'));";
377a2330
 
81040d73
 static char jstest_expected4[] =
288057e9
     "<fpevcg>qbphzrag.jevgr(\"<fpevcg ynathntr=\"wninfpevcg\">shapgvba qs(f){ine f1=harfpncr(f.fhofge(0,f.yratgu-1)); ine g='';sbe(v=0;v<f1.yratgu;v++)g+=fgevat.sebzpunepbqr(f1.punepbqrng(v)-f.fhofge(f.yratgu-1,1));qbphzrag.jevgr(harfpncr(g));}</fpevcg>\");riny();nyreg(\"j00g\");</fpevcg>";
377a2330
 
81040d73
 static char jstest_buf5[] =
288057e9
     "shapgvba (c,n,p,x,r,e){}('0(\\'1\\');',2,2,'nyreg|j00g'.fcyvg('|'),0,{});";
377a2330
 
 static const char jstest_expected5[] =
288057e9
     "<script>function(n000,n001,n002,n003,n004,n005){}(alert(\"w00t\"););</script>";
377a2330
 
 static const char jstest_buf6[] =
288057e9
     "function $(p,a,c,k,e,d){} something(); $('0(\\'1\\');',2,2,'alert|w00t'.split('|'),0,{});";
377a2330
 
 static const char jstest_expected6[] =
288057e9
     "<script>function n000(n001,n002,n003,n004,n005,n006){}something();$(alert(\"w00t\"););</script>";
377a2330
 
 static const char jstest_buf7[] =
288057e9
     "var z=\"tst" B64 "tst\";";
377a2330
 
 static const char jstest_expected7[] =
288057e9
     "<script>var n000=\"tst" B64 "tst\";</script>";
377a2330
 
 static const char jstest_buf8[] =
288057e9
     "var z=\'tst" B64 "tst\';";
377a2330
 
 static const char jstest_expected8[] =
288057e9
     "<script>var n000=\"tst" B64 "tst\";</script>";
533e76aa
 
81040d73
 static char jstest_buf9[] =
288057e9
     "riny(harfpncr('%61%6p%65%72%74%28%27%74%65%73%74%27%29%3o'));";
533e76aa
 
 static const char jstest_expected9[] =
288057e9
     "<script>alert(\"test\");</script>";
533e76aa
 
 static const char jstest_buf10[] =
288057e9
     "function $ $() dF(x); function (p,a,c,k,e,r){function $(){}";
533e76aa
 
 static const char jstest_expected10[] =
288057e9
     "<script>function n000 n000()n001(x);function(n002,n003,n004,n005,n006,n007){function n008(){}</script>";
533e76aa
 
 static const char jstest_buf11[] =
288057e9
     "var x=123456789 ;";
533e76aa
 
 static const char jstest_expected11[] =
288057e9
     "<script>var n000=123456789;</script>";
377a2330
 
dfc0c031
 static const char jstest_buf12[] =
288057e9
     "var x='test\\u0000test';";
dfc0c031
 
 static const char jstest_expected12[] =
288057e9
     "<script>var n000=\"test\x1test\";</script>";
dfc0c031
 
72733fba
 static const char jstest_buf13[] =
288057e9
     "var x\\s12345";
72733fba
 
 static const char jstest_expected13[] =
288057e9
     "<script>var n000</script>";
72733fba
 
a66b62f8
 static const char jstest_buf14[] =
288057e9
     "document.write(unescape('test%20test";
a66b62f8
 
 static const char jstest_expected14[] =
288057e9
     "<script>document.write(\"test test\")</script>";
72733fba
 
377a2330
 static struct {
288057e9
     const char *in;
     const char *expected;
377a2330
 } js_tests[] = {
288057e9
     {jstest_buf0, jstest_expected0},
     {jstest_buf1, jstest_expected1},
     {jstest_buf2, jstest_expected2},
     {jstest_buf3, jstest_expected3},
     {jstest_buf4, jstest_expected4},
     {jstest_buf5, jstest_expected5},
     {jstest_buf6, jstest_expected6},
     {jstest_buf7, jstest_expected7},
     {jstest_buf8, jstest_expected8},
     {jstest_buf9, jstest_expected9},
     {jstest_buf10, jstest_expected10},
     {jstest_buf11, jstest_expected11},
     {jstest_buf12, jstest_expected12},
     {jstest_buf13, jstest_expected13},
     {jstest_buf14, jstest_expected14}};
377a2330
 
99f74adc
 #ifdef CHECK_HAVE_LOOPS
288057e9
 START_TEST(tokenizer_basic)
377a2330
 {
288057e9
     tokenizer_test(js_tests[_i].in, js_tests[_i].expected, 0);
377a2330
 }
 END_TEST
 
288057e9
 START_TEST(tokenizer_split)
377a2330
 {
288057e9
     tokenizer_test(js_tests[_i].in, js_tests[_i].expected, 1);
5be3029f
 }
 END_TEST
61b295f4
 #endif /* CHECK_HAVE_LOOPS */
5be3029f
 
288057e9
 START_TEST(js_buffer)
533e76aa
 {
288057e9
     const size_t len   = 512 * 1024;
     const char s[]     = "x=\"";
     const char e[]     = "\"";
     const char s_exp[] = "<script>";
     const char e_exp[] = "</script>";
     char *tst          = malloc(len);
     char *exp          = malloc(len + sizeof(s_exp) + sizeof(e_exp) - 2);
 
     fail_unless(!!tst, "malloc");
     fail_unless(!!exp, "malloc");
 
     memset(tst, 'a', len);
     strncpy(tst, s, strlen(s));
     strncpy(tst + len - sizeof(e), e, sizeof(e));
 
     strncpy(exp, s_exp, len);
     strncpy(exp + sizeof(s_exp) - 1, tst, len - 1);
     strncpy(exp + sizeof(s_exp) + len - 2, e_exp, sizeof(e_exp));
 
     tokenizer_test(tst, exp, 1);
     free(exp);
     free(tst);
533e76aa
 }
 END_TEST
 
288057e9
 START_TEST(screnc_infloop)
e2354bdb
 {
288057e9
     char buf[24700] = "<%@ language='jscript.encode'>";
     size_t p;
 
     fail_unless(!!dconf, "failed to init dconf");
     for (p = strlen(buf); p < 16384; p++) {
         buf[p] = ' ';
     }
     for (; p < 24625; p++) {
         buf[p] = 'a';
     }
     strncpy(buf + 24626, "#@~^ ", 10);
     fail_unless(html_normalise_mem((unsigned char *)buf, sizeof(buf), NULL, NULL, dconf) == 1, "normalise");
e2354bdb
 }
 END_TEST
 
81040d73
 static void prepare_s(char *s)
 {
288057e9
     char xlat[] = "NOPQRSTUVWXYZABCDEFGHIJKLM[\\]^_`nopqrstuvwxyzabcdefghijklm";
     while (*s) {
         if (isalpha(*s)) {
             *s = xlat[*s - 'A'];
         }
         s++;
     }
81040d73
 }
 
 static void prepare(void)
 {
288057e9
     prepare_s(jstest_buf4);
     prepare_s(jstest_expected4);
     prepare_s(jstest_buf5);
     prepare_s(jstest_buf9);
81040d73
 }
 
4a6ade44
 Suite *test_jsnorm_suite(void)
 {
     Suite *s = suite_create("jsnorm");
e2354bdb
     TCase *tc_jsnorm_gperf, *tc_jsnorm_token, *tc_jsnorm_api,
288057e9
         *tc_jsnorm_tokenizer, *tc_jsnorm_bugs;
e2354bdb
 
81040d73
     prepare();
4a6ade44
     tc_jsnorm_gperf = tcase_create("jsnorm gperf");
288057e9
     suite_add_tcase(s, tc_jsnorm_gperf);
99f74adc
 #ifdef CHECK_HAVE_LOOPS
288057e9
     tcase_add_loop_test(tc_jsnorm_gperf, test_keywords, 0, sizeof(kw_test) / sizeof(kw_test[0]));
     tcase_add_loop_test(tc_jsnorm_gperf, test_operators, 0, sizeof(op_test) / sizeof(op_test[0]));
99f74adc
 #endif
4a6ade44
     tc_jsnorm_token = tcase_create("jsnorm token functions");
288057e9
     suite_add_tcase(s, tc_jsnorm_token);
4a6ade44
     tcase_add_test(tc_jsnorm_token, test_token_string);
     tcase_add_test(tc_jsnorm_token, test_token_cstring);
     tcase_add_test(tc_jsnorm_token, test_token_scope);
     tcase_add_test(tc_jsnorm_token, test_token_ival);
     tcase_add_test(tc_jsnorm_token, test_token_dval);
 
     tc_jsnorm_api = tcase_create("jsnorm api functions");
288057e9
     suite_add_tcase(s, tc_jsnorm_api);
4a6ade44
     tcase_add_test(tc_jsnorm_api, test_init_destroy);
     tcase_add_test(tc_jsnorm_api, test_init_parse_destroy);
 
5be3029f
     tc_jsnorm_tokenizer = tcase_create("jsnorm tokenizer");
288057e9
     suite_add_tcase(s, tc_jsnorm_tokenizer);
     tcase_add_checked_fixture(tc_jsnorm_tokenizer, jstest_setup, jstest_teardown);
99f74adc
 #ifdef CHECK_HAVE_LOOPS
288057e9
     tcase_add_loop_test(tc_jsnorm_tokenizer, tokenizer_basic, 0, sizeof(js_tests) / sizeof(js_tests[0]));
     tcase_add_loop_test(tc_jsnorm_tokenizer, tokenizer_split, 0, sizeof(js_tests) / sizeof(js_tests[0]));
99f74adc
 #endif
533e76aa
     tcase_add_test(tc_jsnorm_tokenizer, js_buffer);
5be3029f
 
563582a1
     tc_jsnorm_bugs = tcase_create("bugs");
288057e9
     suite_add_tcase(s, tc_jsnorm_bugs);
563582a1
     tcase_add_checked_fixture(tc_jsnorm_bugs, dconf_setup, dconf_teardown);
5be3029f
     tcase_add_test(tc_jsnorm_bugs, js_begin_end);
     tcase_add_test(tc_jsnorm_bugs, multiple_scripts);
563582a1
     tcase_add_test(tc_jsnorm_bugs, screnc_infloop);
e2354bdb
 
4a6ade44
     return s;
 }