libclamav/jsparse/js-norm.c
fd08e02e
 /*
  *  Javascript normalizer.
  *
c442ca9c
  *  Copyright (C) 2013-2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  *  Copyright (C) 2008-2013 Sourcefire, Inc.
fd08e02e
  *
  *  Authors: Török Edvin
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *  MA 02110-1301, USA.
  */
8be1d5a4
 #ifdef HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif
fd08e02e
 
 #include <stdio.h>
61eb9432
 
 #ifdef HAVE_UNISTD_H
fd08e02e
 #include <unistd.h>
61eb9432
 #endif
fd08e02e
 #include <fcntl.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #include <assert.h>
61eb9432
 
60d8d2c3
 #include "clamav.h"
b3722aeb
 #include "jsparse/lexglobal.h"
fd08e02e
 #include "hashtab.h"
 #include "others.h"
bd02147f
 #include "str.h"
fd08e02e
 #include "js-norm.h"
 #include "jsparse/generated/operators.h"
 #include "jsparse/generated/keywords.h"
4a6ade44
 #include "jsparse/textbuf.h"
fd08e02e
 
 /* ----------- tokenizer ---------------- */
 enum tokenizer_state {
 	Initial,
 	MultilineComment,
 	SinglelineComment,
 	Number,
 	DoubleQString,
 	SingleQString,
72733fba
 	Identifier,
 	Dummy
fd08e02e
 };
 
 
 typedef struct scanner {
 	struct text_buffer buf;
 	const char *yytext;
 	size_t yylen;
 	const char *in;
 	size_t insize;
 	size_t pos;
72733fba
 	size_t lastpos;
 	enum tokenizer_state state;
 	enum tokenizer_state last_state;
fd08e02e
 } *yyscan_t;
 
 typedef int YY_BUFFER_STATE;
 
 static int yylex( YYSTYPE *lvalp, yyscan_t  );
 static YY_BUFFER_STATE yy_scan_bytes( const char *, size_t, yyscan_t scanner );
 static const char *yyget_text ( yyscan_t scanner );
 static int yyget_leng ( yyscan_t scanner );
 static int yylex_init ( yyscan_t * ptr_yy_globals ) ;
 static int yylex_destroy ( yyscan_t yyscanner ) ;
 /* ----------- tokenizer end ---------------- */
 
 enum fsm_state {
 	Base,
 	InsideVar,
 	InsideInitializer,
 	WaitFunctionName,
 	WaitParameterList,
 	InsideFunctionDecl
 };
 
 struct scope {
cc447ac8
 	struct cli_hashtable id_map;
fd08e02e
 	struct scope *parent;/* hierarchy */
 	struct scope *nxt;/* all scopes kept in a list so we can easily free all of them */
 	enum fsm_state fsm_state;
 	int  last_token;
 	unsigned int brackets;
 	unsigned int blocks;
 };
 
 struct tokens {
 	yystype *data;
 	size_t   cnt;
 	size_t   capacity;
 };
 
 /* state for the current JS file being parsed */
 struct parser_state {
094104a7
 	unsigned long     var_uniq;
 	unsigned long     syntax_errors;
fd08e02e
 	struct scope *global;
 	struct scope *current;
 	struct scope *list;
 	yyscan_t scanner;
 	struct tokens tokens;
fe389c84
 	unsigned int      rec;
fd08e02e
 };
 
 static struct scope* scope_new(struct parser_state *state)
 {
 	struct scope *parent = state->current;
 	struct scope *s = cli_calloc(1, sizeof(*s));
 	if(!s)
 		return NULL;
cc447ac8
 	if(cli_hashtab_init(&s->id_map, 10) < 0) {
fd08e02e
 		free(s);
 		return NULL;
 	}
 	s->parent = parent;
 	s->fsm_state = Base;
 	s->nxt = state->list;
 	state->list = s;
 	state->current = s;
 	return s;
 }
 
 static struct scope* scope_done(struct scope *s)
 {
 	struct scope* parent = s->parent;
 	/* TODO: have a hashtab_destroy */
cc447ac8
 	cli_hashtab_clear(&s->id_map);
fd08e02e
 	free(s->id_map.htable);
 	free(s);
 	return parent;
 }
 
 /* transitions:
  *   Base --(VAR)--> InsideVar
  *   InsideVar --(Identifier)-->InsideInitializer
  *   InsideVar --(anything_else) --> POP (to Base)
  *   InsideInitializer --(COMMA)--> POP (to InsideVar)
  *   InsideInitializer | InsideVar --(SEMICOLON) --> POP (to Base)
  *   InsideInitializer --(BRACKET_OPEN) --> WaitBrClose
  *   InsideInitializer --(PAR_OPEN) --> WaitParClose
  *   WaitBrClose --(BRACKET_OPEN) --> increase depth
  *   WaitBrClose --(BRACKET_CLOSE) --> POP
  *   WaitParClose --(PAR_CLOSE) --> POP
  *   WaitParClose --(PAR_OPEN) --> increase depth
  */
 
 /* Base --(VAR)--> PUSH, to InsideVar
  * InsideVar --(Identifier)--> InsideInitializer
  * InsideVar --(ELSE)--> POP, inc. syntax_errors
  * InsideInitializer --(COMMA)--> POP (to InsideVar)
  * --(BRACKET_OPEN)--> inc bracket_counter
  * --(PAR_OPEN)--> inc par_counter
  * --(BRACKET_CLOSE) --> dec bracket_counter
  * --(PAR_CLOSE)--> dec par_counter
  * --(VAR)--> PUSH, to InsideVar (if bracket_counter != 0 || par_counter != 0)
  *        --> POP, to InsideVar, inc. syntax_errors (if bracket_counter == 0  && par_counter == 0)
  *  POP only allowed if bracket_counter == 0 && par_counter == 0 
  *
  * InsideInitializer acts differently, make it only a flag
  * ....................
  *
  * Pushing, Poping is done when entering / exiting function scopes,
  * tracking { and function ( is done by the function scope tracker too.
  *
  * we only need to track brackets.
  */
 
 
 /*
  * var x = document;
  * x.writeln(...);
  *
  * ^we must not normalize member method names
  */
 
 /*
  * Variables are declared at function scope, and their initial value is
  * undefined. At the point where the initializer is, and from there on the value
  * is defined.
  *
  * { doesn't introduce a new variable scope, they are in function's scope too
  *
  * function foo() {
  *  alert(x); -> x exists, undefined
  *  var x=5; 
  *  alert(x); -> x exists, =5
  * }
  * 
  * vs.
  *
  * function bar() {
  *   alert(x);//error, x not declared
  *   x=5;
  *   }
  *
  * vs.
  *
  * but we can declare variables without var, only valid if we use them after
  * assigning.
  *
  * function foobar() {
  *   x=5;
  *   alert(x);//x is defined, value is 5
  *   }
  *
  * other examples:
  * function foo2() {
  *   alert(x); -> x exists, undefined
  *   {
  *       var x=5; -> x equals to 5
  *   }
  *   alert(x); -> x is 5
  * }
  *
  * function foo3() {
  *   var x=4; -> x exists, equals to 4
  *   alert(x); -> x exists, equals to 4
  *   {
  *       var x=5; -> x equals to 5
  *   }
  *   alert(x); -> x is 5
  * }
  *
  * function bar3() {
  *   //same as foo3
  *   var x=4;
  *   alert(x);
  *   { 
  *        x=5;
  *   }
  *   alert(x);
  * }
  *
  */
 
 
 static const char* scope_declare(struct scope *s, const char *token, const size_t len, struct parser_state *state)
 {
cc447ac8
 	const struct cli_element *el = cli_hashtab_insert(&s->id_map, token, len, state->var_uniq++);
 	/* cli_hashtab_insert either finds an already existing entry, or allocates a
fd08e02e
 	 * new one, we return the allocated string */
 	return el ? el->key : NULL;
 }
 
 static const char* scope_use(struct scope *s, const char *token, const size_t len)
 {
cc447ac8
 	const struct cli_element *el = cli_hashtab_find(&s->id_map, token, len);
fd08e02e
 	if(el) {
 		/* identifier already found in current scope,
 		 * return here to avoid overwriting uniq id */
 		return el->key;
 	}
 	/* identifier not yet in current scope's hashtab, add with ID -1.
 	 * Later if we find a declaration it will automatically assign a uniq ID
 	 * to it. If not, we'll know that we have to push ID == -1 tokens to an
 	 * outer scope.*/
cc447ac8
 	el = cli_hashtab_insert(&s->id_map, token, len, -1);
fd08e02e
 	return el ? el->key : NULL;
 }
 
 static long scope_lookup(struct scope *s, const char *token, const size_t len)
 {
 	while(s) {
cc447ac8
 		const struct cli_element *el = cli_hashtab_find(&s->id_map, token, len);
fd08e02e
 		if(el && el->data != -1) {
 			return el->data;
 		}
 		/* not found in current scope, try in outer scope */
 		s = s->parent;
 	}
 	return -1;
 }
 
 static int tokens_ensure_capacity(struct tokens *tokens, size_t cap)
 {
 	if(tokens->capacity < cap) {
b0033c8f
 	        yystype *data;
377a2330
 		cap += 1024;
b0033c8f
 		/* Keep old data if OOM */
 		data = cli_realloc(tokens->data, cap * sizeof(*tokens->data));
 		if(!data)
fd08e02e
 			return CL_EMEM;
b0033c8f
 		tokens->data = data;
377a2330
 		tokens->capacity = cap;
fd08e02e
 	}
 	return CL_SUCCESS;
 }
 
 static int add_token(struct parser_state *state, const yystype *token)
 {
b0033c8f
 	if(tokens_ensure_capacity(&state->tokens, state->tokens.cnt + 1))
fd08e02e
 		return -1;
 	state->tokens.data[state->tokens.cnt++] = *token;
 	return 0;
 }
 
 struct buf {
 	size_t pos;
 	int outfd;
 	char buf[65536];
 };
 
 static inline int buf_outc(char c, struct buf *buf)
 {
 	if(buf->pos >= sizeof(buf->buf)) {
 		if(write(buf->outfd, buf->buf, sizeof(buf->buf)) != sizeof(buf->buf))
871177cd
 			return CL_EWRITE;
fd08e02e
 		buf->pos = 0;
 	}
 	buf->buf[buf->pos++] = c;
 	return CL_SUCCESS;
 }
 
 static inline int buf_outs(const char *s, struct buf *buf)
 {
 	const size_t buf_len = sizeof(buf->buf);
6ba8b950
 	size_t i;
 
 	i = buf->pos;
 	while(*s) {
 		while(i < buf_len && *s) {
2686c7ba
 			if(isspace(*s & 0xff))
1279faf6
 				buf->buf[i++] = ' ';
 			else
 				buf->buf[i++] = tolower((unsigned char)(*s));
 			++s;
6ba8b950
 		}
 		if(i == buf_len) {
 			if(write(buf->outfd, buf->buf, buf_len) < 0)
871177cd
 				return CL_EWRITE;
6ba8b950
 		       i = 0;
 		}
fd08e02e
 	}
6ba8b950
 	buf->pos = i;
fd08e02e
 	return CL_SUCCESS;
 }
 
 static inline void output_space(char last, char current, struct buf *out)
 {
 	if(isalnum(last) && isalnum(current))
 		buf_outc(' ', out);
 }
 
 
 /* return class of last character */
 static char output_token(const yystype *token, struct scope *scope, struct buf *out, char lastchar)
 {
 	char sbuf[128];
 	const char *s = TOKEN_GET(token, cstring);
 	/* TODO: use a local buffer, instead of FILE* */
 	switch(token->type) {
 		case TOK_StringLiteral:
 			output_space(lastchar,'"', out);
 			buf_outc('"', out);
 			if(s) {
 				buf_outs(s, out);
 			}
 			buf_outc('"', out);
 			return '\"';
 		case TOK_NumericInt:
 			output_space(lastchar,'0', out);
 			snprintf(sbuf, sizeof(sbuf), "%ld", TOKEN_GET(token, ival));
 			buf_outs(sbuf, out);
 			return '0';
 		case TOK_NumericFloat:
 			output_space(lastchar,'0', out);
377a2330
 			snprintf(sbuf, sizeof(sbuf), "%g", TOKEN_GET(token, dval));
fd08e02e
 			buf_outs(sbuf, out);
 			return '0';
 		case TOK_IDENTIFIER_NAME:
 			output_space(lastchar,'a', out);
 			if(s) {
 				long id = scope_lookup(scope, s, strlen(s));
 				if(id == -1) {
 					/* identifier not normalized */
 					buf_outs(s, out);
 				} else {
 					snprintf(sbuf, sizeof(sbuf), "n%03ld",id);
 					buf_outs(sbuf, out);
 				}
 			}
 			return 'a';
 		case TOK_FUNCTION:
 			output_space(lastchar,'a', out);
 			buf_outs("function",out);
 			return 'a';
 		default:
 			if(s) {
 				const size_t len = strlen(s);
 				output_space(lastchar,s[0], out);
 				buf_outs(s, out);
 				return len ? s[len-1] : '\0';
 			}
 			return '\0';
 	}
 }
 
 /*
  * We can't delete the scope as soon as we see a }, because
  * we still need the hashmap from it.
  *
  * If we would normalize all the identifiers, and output when a scope is closed,
  * then it would be impossible to normalize calls to other functions.
  *
  * So we need to keep all scopes in memory, to do this instead of scope_done, we
  * simply just set current = current->parent when a scope is closed.
  * We keep a list of all scopes created in parser_state-> When we parsed
  * everything, we output everything, and then delete all scopes.
  *
  * We also need to know where to switch scopes on the second pass, so for
  * TOK_FUNCTION types we will use another pointer, that points to the scope
  * (added to yystype's union).
  *
  * We lookup the identifier in the scope (using scope_lookup, it looks in parent
  * scopes too), if ID is found then output (n%3d, Id),
  * otherwise output the identifier as is.
  *
  * To make  it easier to match sigs, we do a xfrm : 
  * 'function ID1 (..'. => 'n%3d = function (...'
  */
 
 /*
  * we'll add all identifier to the scope's map
  * those that are not decl. will have initial ID -1
  * if we later see a decl for it in same scope, it'll automatically get a
  * correct ID.
  *
  * When parsing of local scope is done, we take any ID -1 identifiers,
  * and push them up one level (careful not to overwrite existing IDs).
  *
  * it would be nice if the tokens would contain a link to the entry in the
  * hashtab, a link that automatically gets updated when the element is moved
  * (pushed up). This would prevent subsequent lookups in the map,
  * when we want to output the tokens.
  * There is no easy way to do that, so we just do another lookup
  *
  */
 
 /*
  * This actually works, redefining foo:
  * function foo() {
  *   var foo=5; alert(foo);
  * }
  * So we can't treat function names just as any other identifier?
  * We can, because you can no longer call foo, if you redefined it as a var.
  * So if we rename both foo-s with same name, it will have same behaviour.
  *
  * This means that a new scope should begin after function, and not after
  * function ... (.
  */
 
 static void scope_free_all(struct scope *p)
 {
 	struct scope *nxt;
 	do {
 		nxt = p->nxt;
 		scope_done(p);
 		p = nxt;
 	} while(p);
 }
 
72fb25ea
 size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens);
fd08e02e
 static int match_parameters(const yystype *tokens, const char ** param_names, size_t count)
 {
 	size_t i,j=0;
 	if(tokens[0].type != TOK_PAR_OPEN)
 		return -1;
 	i=1;
 	while(count--) {
 		const char *token_val = TOKEN_GET(&tokens[i], cstring);
 		if(tokens[i].type != TOK_IDENTIFIER_NAME ||
 		   !token_val ||
 		   strcmp(token_val, param_names[j++]))
 			return -1;
 		++i;
 		if((count && tokens[i].type != TOK_COMMA)
 		   || (!count && tokens[i].type != TOK_PAR_CLOSE))
 			return -1;
 		++i;
 	}
 	return 0;
 }
 
 static const char *de_packer_3[] = {"p","a","c","k","e","r"};
 static const char *de_packer_2[] = {"p","a","c","k","e","d"};
 
 static inline char *textbuffer_done(yyscan_t scanner)
 {
4b2ade1d
        char *str = cli_realloc(scanner->buf.data, scanner->buf.pos);
        if(!str) {
                str = scanner->buf.data;
        }
        scanner->yytext = str;
        scanner->yylen = scanner->buf.pos - 1;
        memset(&scanner->buf, 0, sizeof(scanner->buf));
        return str;
fd08e02e
 }
 
 #define MODULE "JS-Norm: "
 
 static void free_token(yystype *token)
 {
 	if(token->vtype == vtype_string) {
 		free(token->val.string);
 		token->val.string = NULL;
 	}
 }
 
 static int replace_token_range(struct tokens *dst, size_t start, size_t end, const struct tokens *with)
 {
 	const size_t len = with ? with->cnt : 0;
 	size_t i;
8af6e5a2
 	cli_dbgmsg(MODULE "Replacing tokens %lu - %lu with %lu tokens\n", (unsigned long)start,
                    (unsigned long)end, (unsigned long)len);
fd08e02e
 	if(start >= dst->cnt || end > dst->cnt)
 		return -1;
 	for(i=start;i<end;i++) {
 		free_token(&dst->data[i]);
 	}
4b2ade1d
 	if(tokens_ensure_capacity(dst, dst->cnt - (end-start) + len))
fd08e02e
 		return CL_EMEM;
 	memmove(&dst->data[start+len], &dst->data[end], (dst->cnt - end) * sizeof(dst->data[0]));
 	if(with && len > 0) {
 		memcpy(&dst->data[start], with->data, len * sizeof(dst->data[0]));
 	}
 	dst->cnt = dst->cnt - (end-start) + len;
 	return CL_SUCCESS;
 }
 
 static int append_tokens(struct tokens *dst, const struct tokens *src)
 {
 	if(!dst || !src)
 		return CL_ENULLARG;
4b2ade1d
 	if(tokens_ensure_capacity(dst, dst->cnt + src->cnt))
fd08e02e
 		return CL_EMEM;
8af6e5a2
 	cli_dbgmsg(MODULE "Appending %lu tokens\n", (unsigned long)(src->cnt));
fd08e02e
 	memcpy(&dst->data[dst->cnt], src->data, src->cnt * sizeof(dst->data[0]));
 	dst->cnt += src->cnt;
 	return CL_SUCCESS;
 }
 
 static void decode_de(yystype *params[], struct text_buffer *txtbuf)
 {
 	const char *p = TOKEN_GET(params[0], cstring);
 	const long a = TOKEN_GET(params[1], ival);
 	/*const char *c = params[2];*/
 	char *k = TOKEN_GET(params[3], string);
 	/*const char *r = params[5];*/
 
 	unsigned val=0;
 	unsigned nsplit = 0;
 	const char* o;
 	const char **tokens;
 
 	memset(txtbuf, 0, sizeof(*txtbuf));
 	if(!p || !k )
 		return;
 	for(o = k; *o; o++) if(*o == '|') nsplit++;
 	nsplit++;
 	tokens = malloc(sizeof(char*)*nsplit);
 	if(!tokens) {
 		return;
 	}
 	cli_strtokenize(k,'|',nsplit, tokens);
 
 	do {
 		while(*p && !isalnum(*p)) {
 			if(*p=='\\' && (p[1] == '\'' || p[1] == '\"'))
 				p++;
 			else
 				textbuffer_putc(txtbuf, *p++);
 		}
 		if(!*p) break;
 		val = 0;
 		o = p;
 		while(*p && isalnum(*p)) {
 			unsigned x;
 			unsigned char v = *p++;
 			/* TODO: use a table here */
 			if(v >= 'a') x = 10+v-'a';
 			else if(v >= 'A') x = 36+v-'A';
 			else x = v-'0';
 			val = val*a+x;
 		}
 		if(val >= nsplit || !tokens[val] || !tokens[val][0])
 			while(o!=p)
 				textbuffer_putc(txtbuf, *o++);
 		else	textbuffer_append(txtbuf, tokens[val]);
 	} while (*p);
 	free(tokens);
 	textbuffer_append(txtbuf, "\0");
 }
 
 struct decode_result {
 	struct text_buffer txtbuf;
 	size_t pos_begin;
 	size_t pos_end;
         unsigned append:1; /* 0: tokens are replaced with new token(s),
                             1: old tokens are deleted, new ones appended at the end */
 };
 
 static void handle_de(yystype *tokens, size_t start, const size_t cnt, const char *name, struct decode_result *res)
 {
 	/* find function decl. end */
 	size_t i, nesting = 1, j;
 	yystype* parameters [6];
 	const size_t parameters_cnt = 6;
 
 	for(i=start;i < cnt; i++) {
 		if(tokens[i].type == TOK_FUNCTION) {
 			if(TOKEN_GET(&tokens[i], scope))
 				nesting++;
 			else
 				nesting--;
 			if(!nesting)
 				break;
 		}
 	}
 	if(nesting)
 		return;
377a2330
 	memset(parameters, 0, sizeof(parameters));
fd08e02e
 	if(name) {
 		/* find call to function */
 		for(;i+2 < cnt; i++) {
 			const char* token_val = TOKEN_GET(&tokens[i], cstring);
 			if(tokens[i].type == TOK_IDENTIFIER_NAME &&
 			   token_val &&
 			   !strcmp(name, token_val) &&
 			   tokens[i+1].type == TOK_PAR_OPEN) {
 
 				i += 2;
 				for(j = 0;j < parameters_cnt && i < cnt;j++) {
 					parameters[j] = &tokens[i++];
 					if(j != parameters_cnt-1)
 						while (tokens[i].type != TOK_COMMA && i < cnt) i++;
 					else
 						while (tokens[i].type != TOK_PAR_CLOSE && i < cnt) i++;
 					i++;
 				}
 				if(j == parameters_cnt)
 					decode_de(parameters, &res->txtbuf);
 			}
 		}
 	} else {
 		while(i<cnt && tokens[i].type != TOK_PAR_OPEN) i++;
 		++i;
 		if(i >= cnt) return;
 		/* TODO: move this v to another func */
 				for(j = 0;j < parameters_cnt && i < cnt;j++) {
 					parameters[j] = &tokens[i++];
 					if(j != parameters_cnt-1)
 						while (tokens[i].type != TOK_COMMA && i < cnt) i++;
 					else
 						while (tokens[i].type != TOK_PAR_CLOSE && i < cnt) i++;
 					i++;
 				}
 				if(j == parameters_cnt)
 					decode_de(parameters, &res->txtbuf);
 	}
377a2330
 	if(parameters[0] && parameters[parameters_cnt-1]) {
 		res->pos_begin = parameters[0] - tokens;
 		res->pos_end = parameters[parameters_cnt-1] - tokens + 1;
 		if(tokens[res->pos_end].type == TOK_BRACKET_OPEN &&
 				tokens[res->pos_end+1].type == TOK_BRACKET_CLOSE &&
 				tokens[res->pos_end+2].type == TOK_PAR_CLOSE)
 			res->pos_end += 3; /* {}) */
 		else
 			res->pos_end++; /* ) */
 	}
fd08e02e
 }
 
08402afa
 static int handle_unescape(struct tokens *tokens, size_t start)
fd08e02e
 {
 	if(tokens->data[start].type == TOK_StringLiteral) {
 		char *R;
 		struct tokens new_tokens;
 		yystype tok;
 
 		R = cli_unescape(TOKEN_GET(&tokens->data[start], cstring));
 		tok.type = TOK_StringLiteral;
 		TOKEN_SET(&tok, string, R);
 		new_tokens.capacity = new_tokens.cnt = 1;
 		new_tokens.data = &tok;
 		if(replace_token_range(tokens, start-2, start+2, &new_tokens) < 0)
 			return CL_EMEM;
 	}
 	return CL_SUCCESS;
 }
 
 
 /* scriptasylum dot com's JS encoder */
08402afa
 static void handle_df(const yystype *tokens, size_t start, struct decode_result *res)
fd08e02e
 {
 	char *str, *s1;
 	size_t len, s1_len, i;
 	unsigned char clast;
 	char *R;
 
 	if(tokens[start].type != TOK_StringLiteral)
 		return;
 	str = TOKEN_GET(&tokens[start], string);
 	if(!str)
 		return;
 	len = strlen(str);
2e67cefc
 	if(!len)
 		return;
fd08e02e
 	clast = str[len-1] - '0';
 
 	str[len-1] = '\0';
 	s1 = cli_unescape(str);
 	s1_len = strlen(s1);
 	for(i=0;i<s1_len;i++) {
 		s1[i] -= clast;
 	}
 	R = cli_unescape(s1);
 	free(s1);
 	res->pos_begin = start-2;
 	res->pos_end = start+2;
 	res->txtbuf.data = R;
 	res->txtbuf.pos = strlen(R);
 	res->append = 1;
 }
 
 
 
 static void handle_eval(struct tokens *tokens, size_t start, struct decode_result *res)
 {
 	res->txtbuf.data = TOKEN_GET(&tokens->data[start], string);
 	if(res->txtbuf.data && tokens->data[start+1].type == TOK_PAR_CLOSE) {
 		TOKEN_SET(&tokens->data[start], string, NULL);
 		res->txtbuf.pos = strlen(res->txtbuf.data);
 		res->pos_begin = start-2;
 		res->pos_end = start+2;
 	}
 }
 
 static void run_folders(struct tokens *tokens)
 {
   size_t i;
 
   for(i = 0; i < tokens->cnt; i++) {
 	  const char *cstring = TOKEN_GET(&tokens->data[i], cstring);
 	  if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME &&
 		    cstring &&
 		    !strcmp("unescape", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) {
 
08402afa
 		  handle_unescape(tokens, i+2);
fd08e02e
 	  }
   }
 }
 
 static inline int state_update_scope(struct parser_state *state, const yystype *token)
 {
 	if(token->type == TOK_FUNCTION) {
 		struct scope *scope = TOKEN_GET(token, scope);
 		if(scope) {
 			state->current = scope;
 		}
 		else {
 			/* dummy token marking function end */
 			if(state->current->parent)
 				state->current = state->current->parent;
 			/* don't output this token, it is just a dummy marker */
 			return 0;
 		}
 	}
 	return 1;
 }
 
 static void run_decoders(struct parser_state *state)
 {
   size_t i;
   const char* name;
   struct tokens *tokens = &state->tokens;
 
   for(i = 0; i < tokens->cnt; i++) {
 	  const char *cstring = TOKEN_GET(&tokens->data[i], cstring);
 	  struct decode_result res;
 	  res.pos_begin = res.pos_end = 0;
 	  res.append = 0;
 	  if(tokens->data[i].type == TOK_FUNCTION && i+13 < tokens->cnt) {
 		  name = NULL;
 		  ++i;
 		  if(tokens->data[i].type == TOK_IDENTIFIER_NAME) {
377a2330
 			  cstring = TOKEN_GET(&tokens->data[i], cstring);
fd08e02e
 			  name = cstring;
 			  ++i;
 		  }
 		  if(match_parameters(&tokens->data[i], de_packer_3, sizeof(de_packer_3)/sizeof(de_packer_3[0])) != -1
 		     || match_parameters(&tokens->data[i], de_packer_2, sizeof(de_packer_2)/sizeof(de_packer_2[0])) != -1)  {
 			  /* find function decl. end */
 			  handle_de(tokens->data, i, tokens->cnt, name, &res);
 		  }
 	  } else if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME &&
 		    cstring &&
 		    !strcmp("dF", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) {
 		  /* TODO: also match signature of dF function (possibly
 		   * declared using unescape */
 
08402afa
 		  handle_df(tokens->data, i+2, &res);
fd08e02e
 	  } else if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME &&
 			  cstring &&
 			  !strcmp("eval", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) {
 		  handle_eval(tokens, i+2, &res);
 	  }
 	if(res.pos_end > res.pos_begin) {
 		struct tokens parent_tokens;
 		if(res.pos_end < tokens->cnt && tokens->data[res.pos_end].type == TOK_SEMICOLON)
 			res.pos_end++;
 		parent_tokens = state->tokens;/* save current tokens */
 		/* initialize embedded context */
 		memset(&state->tokens, 0, sizeof(state->tokens));
d39b5281
 		if(++state->rec > 16)
1405207a
 			cli_dbgmsg(MODULE "recursion limit reached\n");
d39b5281
 		else {
 			cli_js_process_buffer(state, res.txtbuf.data, res.txtbuf.pos);
 			--state->rec;
 		}
fd08e02e
 		free(res.txtbuf.data);
 		/* state->tokens still refers to the embedded/nested context
 		 * here */
 		if(!res.append) {
 			replace_token_range(&parent_tokens, res.pos_begin, res.pos_end, &state->tokens);
 		} else {
 			/* delete tokens */
 			replace_token_range(&parent_tokens, res.pos_begin, res.pos_end, NULL);
 			append_tokens(&parent_tokens, &state->tokens);
 		}
 		/* end of embedded context, restore tokens state */
 		free(state->tokens.data);
 		state->tokens = parent_tokens;
 	}
 	  state_update_scope(state, &state->tokens.data[i]);
   }
 }
 
 void cli_js_parse_done(struct parser_state* state)
 {
a66b62f8
 	struct tokens * tokens = &state->tokens;
 	size_t par_balance = 0, i;
 	char end = '\0';
 	YYSTYPE val;
 
8be1d5a4
 	cli_dbgmsg(MODULE "in cli_js_parse_done()\n");
a66b62f8
 	/* close unfinished token */
 	switch (state->scanner->state) {
 		case DoubleQString:
 			end = '"';
 			break;
 		case SingleQString:
 			end = '\'';
 			break;
b9b47784
 		default: /* make gcc happy */
 			break;
a66b62f8
 	}
 	if (end != '\0')
 		cli_js_process_buffer(state, &end, 1);
22cb38ed
 	/* close remaining parenthesis */
a66b62f8
 	for (i=0;i<tokens->cnt;i++) {
 		if (tokens->data[i].type == TOK_PAR_OPEN)
 			par_balance++;
 		else if (tokens->data[i].type == TOK_PAR_CLOSE && par_balance > 0)
 			par_balance--;
 	}
 	if (par_balance > 0) {
 		memset(&val, 0, sizeof(val));
 		val.type = TOK_PAR_CLOSE;
 		TOKEN_SET(&val, cstring, ")");
 		while (par_balance-- > 0) {
 			add_token(state, &val);
 		}
 	}
 
22cb38ed
 	/* we had to close unfinished strings, parenthesis,
a66b62f8
 	 * so that the folders/decoders can run properly */
fd08e02e
 	run_folders(&state->tokens);
 	run_decoders(state);
 
 	yylex_destroy(state->scanner);
377a2330
 	state->scanner = NULL;
fd08e02e
 }
 
 
8be1d5a4
 void cli_js_output(struct parser_state *state, const char *tempdir)
fd08e02e
 {
 	unsigned i;
 	struct buf buf;
 	char lastchar = '\0';
8be1d5a4
 	char filename[1024];
 
58481352
 	snprintf(filename, 1024, "%s"PATHSEP"javascript", tempdir);
8be1d5a4
 
fd08e02e
 	buf.pos = 0;
8be1d5a4
 	buf.outfd = open(filename, O_CREAT | O_WRONLY, 0600);
 	if(buf.outfd < 0) {
 		cli_errmsg(MODULE "cannot open output file for writing: %s\n", filename);
 		return;
 	}
 	/* append to file */
 	if(lseek(buf.outfd, 0, SEEK_END) != 0) {
 		/* separate multiple scripts with \n */
 		buf_outc('\n', &buf);
 	}
1279faf6
 	buf_outs("<script>", &buf);
fd08e02e
 	state->current = state->global;
 	for(i = 0; i < state->tokens.cnt; i++) {
 		if(state_update_scope(state, &state->tokens.data[i]))
 			lastchar = output_token(&state->tokens.data[i], state->current, &buf, lastchar);
 	}
1279faf6
 	/* add /script if not already there */
 	if(buf.pos < 9 || memcmp(buf.buf + buf.pos - 9, "</script>", 9))
 		buf_outs("</script>", &buf);
fd08e02e
 	if(write(buf.outfd, buf.buf, buf.pos) < 0) {
1405207a
 		cli_dbgmsg(MODULE "I/O error\n");
fd08e02e
 	}
8be1d5a4
 	close(buf.outfd);
 	cli_dbgmsg(MODULE "dumped/appended normalized script to: %s\n",filename);
fd08e02e
 }
 
 void cli_js_destroy(struct parser_state *state)
 {
 	size_t i;
377a2330
 	if(!state)
 		return;
fd08e02e
 	scope_free_all(state->list);
 	for(i=0;i<state->tokens.cnt;i++) {
 		free_token(&state->tokens.data[i]);
 	}
 	free(state->tokens.data);
8be1d5a4
 	/* detect use after free */
377a2330
 	if(state->scanner)
 		yylex_destroy(state->scanner);
8be1d5a4
 	memset(state, 0x55, sizeof(*state));
 	free(state);
 	cli_dbgmsg(MODULE "cli_js_destroy() done\n");
fd08e02e
 }
 
 /* buffer is html-normlike "chunk", if original file is bigger than buffer,
  * we rewind to a space, so we'll know that tokens won't be broken in half at
  * the end of a buffer. All tokens except string-literals of course.
  * So we can assume that after the buffer there is either a space, EOF, or a
  * chunk of text not containing whitespace at all (for which we care only if its
  * a stringliteral)*/
 void cli_js_process_buffer(struct parser_state *state, const char *buf, size_t n)
 {
 	struct scope* current = state->current;
 	YYSTYPE val;
 	int yv;
 	YY_BUFFER_STATE yyb;
 
 	if(!state->global) {
 		/* this state has either not been initialized,
 		 * or cli_js_parse_done() was already called on it */
1405207a
 		cli_warnmsg(MODULE "invalid state\n");
fd08e02e
 		return;
 	}
 	yyb = yy_scan_bytes(buf, n, state->scanner);
 	memset(&val, 0, sizeof(val));
 	val.vtype = vtype_undefined;
 	/* on EOF yylex will return 0 */
 	while( (yv=yylex(&val, state->scanner)) != 0)
 	{
 		const char *text;
 		size_t leng;
 
 		val.type = yv;
 		switch(yv) {
 			case TOK_VAR:
 				current->fsm_state = InsideVar;
 				break;
 			case TOK_IDENTIFIER_NAME:
 				text = yyget_text(state->scanner);
 				leng = yyget_leng(state->scanner);
 				if(current->last_token == TOK_DOT) {
 					/* this is a member name, don't normalize
 					*/
 					TOKEN_SET(&val, string, cli_strdup(text));
 					val.type = TOK_UNNORM_IDENTIFIER;
 				} else {
 					switch(current->fsm_state) {
 						case WaitParameterList:
 							state->syntax_errors++;
 							/* fall through */
 						case Base:
 						case InsideInitializer:
 							TOKEN_SET(&val, cstring, scope_use(current, text, leng));
 							break;
 						case InsideVar:
 						case InsideFunctionDecl:
 							TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state));
 							current->fsm_state = InsideInitializer;
 							current->brackets = 0;
 							break;
 						case WaitFunctionName:
 							TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state));
 							current->fsm_state = WaitParameterList;
 							break;
 					}
 				}
 				break;
 			case TOK_PAR_OPEN:
 				switch(current->fsm_state) {
 					case WaitFunctionName:
 						/* fallthrough */
 					case WaitParameterList:
 						current->fsm_state = InsideFunctionDecl;
 						break;
 					default:
 						/* noop */
 						break;
 				}
 				break;
 			case TOK_PAR_CLOSE:
 				switch(current->fsm_state) {
 					case WaitFunctionName:
 						state->syntax_errors++;
 						break;
 					case WaitParameterList:
 						current->fsm_state = Base;
 						break;
 					default:
 						/* noop */
 						break;
 				}
 				break;
 			case TOK_CURLY_BRACE_OPEN:
 				switch(current->fsm_state) {
 					case WaitFunctionName:
 						/* fallthrough */
 					case WaitParameterList:
 					case InsideFunctionDecl:
 						/* in a syntactically correct
 						 * file, we would already be in
 						 * the Base state when we see a {
 						 */
 						current->fsm_state = Base;
 						/* fall-through */
 					case InsideVar:
 					case InsideInitializer:
 						state->syntax_errors++;
 						/* fall-through */
 					case Base:
 					default:
 						current->blocks++;
 						break;
 				}
 				break;
 					case TOK_CURLY_BRACE_CLOSE:
 				if(current->blocks > 0)
 					current->blocks--;
 				else
 					state->syntax_errors++;
 				if(!current->blocks) {
 					if(current->parent) {
 						/* add dummy FUNCTION token to
 						 * mark function end */
 						TOKEN_SET(&val, cstring, "}");
 						add_token(state, &val);
 						TOKEN_SET(&val, scope, NULL);
 						val.type = TOK_FUNCTION;
 
 						state->current = current = current->parent;
 					} else{
 						/* extra } */
 						state->syntax_errors++;
 				}
 				}
 				break;
 			case TOK_BRACKET_OPEN:
 				current->brackets++;
 				break;
 			case TOK_BRACKET_CLOSE:
 				if(current->brackets > 0)
 					current->brackets--;
 				else
 					state->syntax_errors++;
 				break;
 			case TOK_COMMA:
 				if (current->fsm_state == InsideInitializer && current->brackets == 0 && current->blocks == 0) {
 					/* initializer ended only if we
 					 * encountered a comma, and [] are
 					 * balanced.
 					 * This avoids switching state on:
 					 * var x = [4,y,u];*/
 					current->fsm_state = InsideVar;
 				}
 				break;
 			case TOK_SEMICOLON:
 				if (current->brackets == 0 && current->blocks == 0) {
 					/* avoid switching state on unbalanced []:
 					 * var x = [test;testi]; */
 					current->fsm_state = Base;
 				}
 				break;
 			case TOK_FUNCTION:
 				current = scope_new(state);
 				current->fsm_state = WaitFunctionName;
 				TOKEN_SET(&val, scope, state->current);
 				break;
 			case TOK_StringLiteral:
8af6e5a2
 				if(state->tokens.cnt > 1 && state->tokens.data[state->tokens.cnt-1].type == TOK_PLUS) {
fd08e02e
 					/* see if can fold */
 					yystype *prev_string = &state->tokens.data[state->tokens.cnt-2];
 					if(prev_string->type == TOK_StringLiteral) {
 						char *str = TOKEN_GET(prev_string, string);
 						size_t str_len = strlen(str);
 
 						text = yyget_text(state->scanner);
 						leng = yyget_leng(state->scanner);
 
 
 						/* delete TOK_PLUS */
 						free_token(&state->tokens.data[--state->tokens.cnt]);
 
 						str = cli_realloc(str, str_len + leng + 1);
b0033c8f
 						if (!str)
 						    break;
377a2330
 						strncpy(str+str_len, text, leng);
fd08e02e
 						str[str_len + leng] = '\0';
 						TOKEN_SET(prev_string, string, str);
 						free(val.val.string);
 						memset(&val, 0, sizeof(val));
 						val.vtype = vtype_undefined;
 						continue;
 					}
 				}
 				break;
 		}
 		if(val.vtype == vtype_undefined) {
 			text = yyget_text(state->scanner);
 			TOKEN_SET(&val, string, cli_strdup(text));
 			abort();
 		}
 		add_token(state, &val);
 		current->last_token = yv;
 		memset(&val, 0, sizeof(val));
 		val.vtype = vtype_undefined;
 	}
 }
 
8be1d5a4
 struct parser_state *cli_js_init(void)
fd08e02e
 {
8be1d5a4
 	struct parser_state *state = cli_calloc(1, sizeof(*state));
fd08e02e
 	if(!state)
8be1d5a4
 		return NULL;
fd08e02e
 	if(!scope_new(state)) {
8be1d5a4
 		free(state);
 		return NULL;
fd08e02e
 	}
 	state->global = state->current;
 
 	if(yylex_init(&state->scanner)) {
 		scope_done(state->global);
8be1d5a4
 		free(state);
 		return NULL;
fd08e02e
 	}
8be1d5a4
 	cli_dbgmsg(MODULE "cli_js_init() done\n");
 	return state;
fd08e02e
 }
 
 /*-------------- tokenizer ---------------------*/
 enum char_class {
 	Whitespace,
 	Slash,
 	Operator,
 	DQuote,
 	SQuote,
 	Digit,
 	IdStart,
 	BracketOpen = TOK_BRACKET_OPEN,
 	BracketClose = TOK_BRACKET_CLOSE,
 	Comma = TOK_COMMA,
 	CurlyOpen = TOK_CURLY_BRACE_OPEN,
 	CurlyClose = TOK_CURLY_BRACE_CLOSE,
 	ParOpen = TOK_PAR_OPEN,
 	ParClose = TOK_PAR_CLOSE,
 	Dot = TOK_DOT,
 	SemiColon = TOK_SEMICOLON,
 	Nop
 };
 
 #define SL Slash
 #define DG Digit
 #define DQ DQuote
 #define SQ SQuote
 #define ID IdStart
 #define OP Operator
 #define WS Whitespace
 #define BO BracketOpen
 #define BC BracketClose
 #define CM Comma
 #define CO CurlyOpen
 #define CC CurlyClose
 #define PO ParOpen
 #define PC ParClose
 #define DT Dot
 #define SC SemiColon
 #define NA Nop
 
 static const enum char_class ctype[256] = {
 	NA, NA, NA, NA, NA, NA, NA, NA, NA, WS, WS, WS, NA, WS, NA, NA,
 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 	WS, OP, DQ, NA, ID, OP, OP, SQ, PO, PC, OP, OP, CM, OP, DT, SL,
 	DG, DG, DG, DG, DG, DG, DG, DG, DG, DG, OP, SC, OP, OP, OP, OP,
 	NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
 	ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, BO, ID, BC, OP, ID,
 	NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
 	ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, CO, OP, CC, OP, NA,
 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
 };
 
 static const enum char_class id_ctype[256] = {
 	NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
         NA, NA, NA, NA, ID, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
         ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, NA, NA, NA, NA, NA,
         NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
         ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, OP, NA, NA, ID,
         NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
         ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, NA, NA, NA, NA,
         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 };
 
 #define CASE_SPECIAL_CHAR(C, S) case C: TOKEN_SET(lvalp, cstring, (S)); return cClass;
 
 #define BUF_KEEP_SIZE 32768
 
 static void textbuf_clean(struct text_buffer *buf)
 {
 	if(buf->capacity > BUF_KEEP_SIZE) {
4b2ade1d
 	        char *data= cli_realloc(buf->data, BUF_KEEP_SIZE);
b0033c8f
 		if (data)
 		    buf->data = data;
fd08e02e
 		buf->capacity = BUF_KEEP_SIZE;
 	}
 	buf->pos = 0;
 }
 
 static inline int parseString(YYSTYPE *lvalp, yyscan_t scanner, const char q,
 		enum tokenizer_state tostate)
 {
 	size_t len;
 	/* look for " terminating the string */
 	const char *start = &scanner->in[scanner->pos], *end = start;
 	do {
 		const size_t siz = &scanner->in[scanner->insize] - end;
 		end = memchr(end, q, siz);
69df34a0
 		if(end && end > start && end[-1] == '\\') {
fd08e02e
 			++end;
 			continue;
 		}
 		break;
 	} while (1);
377a2330
 	if(end && end >= start)
eb290151
 		len = end - start;
 	else
 		len = scanner->insize - scanner->pos;
37e64729
 	cli_textbuffer_append_normalize(&scanner->buf, start, len);
fd08e02e
 	if(end) {
b0033c8f
 	        char *str;
fd08e02e
 		/* skip over end quote */
 		scanner->pos += len + 1;
 		textbuffer_putc(&scanner->buf, '\0');
b0033c8f
 		str = textbuffer_done(scanner);
 		if (str) {
 		    TOKEN_SET(lvalp, string, str);
 		} else {
 		    TOKEN_SET(lvalp, cstring, "");
 		}
fd08e02e
 		scanner->state = Initial;
 		assert(lvalp->val.string);
 		return TOK_StringLiteral;
 	} else {
 		scanner->pos += len;
 		/* unfinished string */
 		scanner->state = tostate;
 		return 0;
 	}
 }
 
 static inline int parseDQString(YYSTYPE *lvalp, yyscan_t scanner)
 {
 	return parseString(lvalp, scanner, '"', DoubleQString);
 }
 
 static inline int parseSQString(YYSTYPE *lvalp, yyscan_t scanner)
 {
 	return parseString(lvalp, scanner, '\'', SingleQString);
 }
 
 static inline int parseNumber(YYSTYPE *lvalp, yyscan_t scanner)
 {
 	const unsigned char *in = (const unsigned char*)scanner->in;
 	int is_float = 0;
 	while(scanner->pos < scanner->insize) {
 		unsigned char c = in[scanner->pos++];
 		if(isdigit(c)) {
 			textbuffer_putc(&scanner->buf, c);
 			continue;
 		}
 		if(c =='.' && !is_float) {
 			is_float = 1;
 			textbuffer_putc(&scanner->buf, '.');
 			continue;
 		}
 		if((c=='e' || c=='E') && is_float) {
 			textbuffer_putc(&scanner->buf, c);
 			if(scanner->pos < scanner->insize) {
 				c = in[scanner->pos++];
 				if(c == '+' || c == '-' || isdigit(c)) {
 					textbuffer_putc(&scanner->buf, c);
 					continue;
 				}
 			}
 		}
 		scanner->pos--;
 		textbuffer_putc(&scanner->buf, '\0');
 		scanner->state = Initial;
4b2ade1d
 		if (!scanner->buf.data)
 			return 0;
fd08e02e
 		if(is_float) {
 			TOKEN_SET(lvalp, dval, atof(scanner->buf.data));
 			return TOK_NumericFloat;
 		} else {
 			TOKEN_SET(lvalp, ival, atoi(scanner->buf.data));
 			return TOK_NumericInt;
 		}
 	}
 	scanner->state = Number;
 	return 0;
 }
 
 static inline int parseId(YYSTYPE *lvalp, yyscan_t scanner)
 {
 	const struct keyword *kw;
 	const unsigned char *in = (const unsigned char*)scanner->in;
 	scanner->state = Initial;
 	while(scanner->pos < scanner->insize) {
 		unsigned char c = in[scanner->pos++];
 		enum char_class cClass = id_ctype[c];
 		switch(cClass) {
 			case IdStart:
 				textbuffer_putc(&scanner->buf, c);
 				break;
 			case Operator:
 				/* the table contains OP only for \ */
 				assert(c == '\\');
 				if(scanner->pos < scanner->insize &&
 						in[scanner->pos++] == 'u') {
 					textbuffer_putc(&scanner->buf, c);
 					break;
 				}
72733fba
 				if(scanner->pos == scanner->insize) {
 					scanner->pos++;
 				}
fd08e02e
 				/* else fallthrough */
 			default:
 				/* character is no longer part of identifier */
72733fba
 				scanner->state = Initial;
fd08e02e
 				textbuffer_putc(&scanner->buf, '\0');
 				scanner->pos--;
 				kw = in_word_set(scanner->buf.data, scanner->buf.pos-1);
 				if(kw) {
 					/* we got a keyword */
 					TOKEN_SET(lvalp, cstring, kw->name);
 					return kw->val;
 				}
 				/* it is not a keyword, just an identifier */
 				TOKEN_SET(lvalp, cstring, NULL);
 				return TOK_IDENTIFIER_NAME;
 		}
 	}
 	scanner->state = Identifier;
 	return 0;
 }
 
 static int parseOperator(YYSTYPE *lvalp, yyscan_t scanner)
 {
 	size_t len = MIN(5, scanner->insize - scanner->pos);
 	while(len) {
 		const struct operator *kw = in_op_set(&scanner->in[scanner->pos], len);
 		if(kw) {
 			TOKEN_SET(lvalp, cstring, kw->name);
 			scanner->pos += len;
 			return kw->val;
 		}
 		len--;
 	}
377a2330
 	/* never reached */
 	assert(0);
fd08e02e
 	scanner->pos++;
 	TOKEN_SET(lvalp, cstring, NULL);
 	return TOK_ERROR;
 }
 
 static int yylex_init(yyscan_t *scanner)
 {
 	*scanner = cli_calloc(1, sizeof(**scanner));
 	return *scanner ? 0 : -1;
 }
 
 static int yylex_destroy(yyscan_t scanner)
 {
 	free(scanner->buf.data);
 	free(scanner);
 	return 0;
 }
 
 static int yy_scan_bytes(const char *p, size_t len, yyscan_t scanner)
 {
 	scanner->in = p;
 	scanner->insize = len;
 	scanner->pos = 0;
72733fba
 	scanner->lastpos = -1;
 	scanner->last_state = Dummy;
fd08e02e
 	return 0;
 }
 
 static const char *yyget_text(yyscan_t scanner)
 {
4b2ade1d
     return scanner->yytext ? scanner->yytext :  scanner->buf.data;
fd08e02e
 }
 
 static int yyget_leng(yyscan_t scanner)
 {
 	/* we have a \0 too */
4b2ade1d
 	return scanner->yylen ? scanner->yylen: (scanner->buf.pos > 0 ? scanner->buf.pos - 1 : 0);
fd08e02e
 }
 
 static int yylex(YYSTYPE *lvalp, yyscan_t  scanner)
 {
 	const size_t len = scanner->insize;
 	const unsigned char *in = (const unsigned char*)scanner->in;
 	unsigned char lookahead;
 	enum char_class cClass;
 
 	scanner->yytext = NULL;
 	scanner->yylen = 0;
72733fba
 	if(scanner->pos == scanner->lastpos) {
 		if(scanner->last_state == scanner->state) {
 			cli_dbgmsg(MODULE "infloop detected, skipping character\n");
 			scanner->pos++;
 		}
 		/* its not necesarely an infloop if it changed
 		 * state, and it shouldn't infloop between states */
 	}
 	scanner->lastpos = scanner->pos;
 	scanner->last_state = scanner->state;
fd08e02e
 	while(scanner->pos < scanner->insize) {
 		switch(scanner->state) {
 			case Initial:
 				textbuf_clean(&scanner->buf);
 				cClass = ctype[in[scanner->pos++]];
 				switch(cClass) {
 					case Whitespace:
 						/* eat whitespace */
 						continue;
 					case Slash:
 						if(scanner->pos < len) {
 							lookahead = in[scanner->pos];
 							switch(lookahead) {
 								case '*':
 									scanner->state = MultilineComment;
 									scanner->pos++;
 									continue;
 								case '/':
 									scanner->state = SinglelineComment;
 									scanner->pos++;
 									continue;
 							}
 						}
 						--scanner->pos;
 						return parseOperator(lvalp, scanner);
 					case Operator:
 						--scanner->pos;
 						return parseOperator(lvalp, scanner);
 					case DQuote:
 						return parseDQString(lvalp, scanner);
 					case SQuote:
 						return parseSQString(lvalp, scanner);
 					case Digit:
 						--scanner->pos;
 						return parseNumber(lvalp, scanner);
 					case IdStart:
 						--scanner->pos;
 						return parseId(lvalp,scanner);
 					CASE_SPECIAL_CHAR(BracketOpen, "[");
 					CASE_SPECIAL_CHAR(BracketClose, "]");
 					CASE_SPECIAL_CHAR(Comma, ",");
 					CASE_SPECIAL_CHAR(CurlyOpen, "{");
 					CASE_SPECIAL_CHAR(CurlyClose, "}");
 					CASE_SPECIAL_CHAR(ParOpen, "(");
 					CASE_SPECIAL_CHAR(ParClose, ")");
 					CASE_SPECIAL_CHAR(Dot, ".");
 					CASE_SPECIAL_CHAR(SemiColon, ";");
 					case Nop:
 					       continue;
 				}
 				break;
 			case DoubleQString:
 				return parseString(lvalp, scanner, '"', DoubleQString);
 			case SingleQString:
377a2330
 				return parseString(lvalp, scanner, '\'', SingleQString);
fd08e02e
 			case Identifier:
 				return parseId(lvalp, scanner);
 			case MultilineComment:
 				while(scanner->pos+1 < scanner->insize) {
377a2330
 					if(in[scanner->pos] == '*' && in[scanner->pos+1] == '/') {
 						scanner->state = Initial;
 						scanner->pos++;
fd08e02e
 						break;
377a2330
 					}
fd08e02e
 					scanner->pos++;
 				}
377a2330
 				scanner->pos++;
fd08e02e
 				break;
 			case Number:
 				return parseNumber(lvalp, scanner);
 			case SinglelineComment:
 				while(scanner->pos < scanner->insize) {
086ad393
 					/* htmlnorm converts \n to space, so
 					 * stop on space too */
 					if(in[scanner->pos] == '\n' || in[scanner->pos] == ' ')
fd08e02e
 						break;
 					scanner->pos++;
 				}
 				scanner->state = Initial;
 				break;
b9b47784
 			default:
 				assert(0 && "Not reached");
fd08e02e
 		}
 	}
 	return 0;
 }