888f5794 |
/* |
e57fa318 |
* Normalise HTML text.
* Decode MS Script Encoder protection.
* |
2023340a |
* Copyright (C) 2007-2008 Sourcefire, Inc. |
e57fa318 |
* |
2023340a |
* Authors: Trog |
888f5794 |
*
* This program is free software; you can redistribute it and/or modify |
2023340a |
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation. |
888f5794 |
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software |
48b7b4a7 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA. |
888f5794 |
*/
|
b58fdfc2 |
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
|
888f5794 |
#include <stdio.h> |
b58fdfc2 |
#ifdef HAVE_UNISTD_H |
888f5794 |
#include <unistd.h> |
b58fdfc2 |
#endif |
888f5794 |
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h> |
c7029064 |
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif |
e57fa318 |
#include <string.h>
#include <errno.h>
#include <stdio.h> |
a58f7bc5 |
#include <ctype.h> |
e57fa318 |
|
ba65fdc8 |
#include "fmap.h" |
888f5794 |
#include "others.h" |
e57fa318 |
#include "htmlnorm.h" |
888f5794 |
|
3506ac49 |
#include "entconv.h" |
8be1d5a4 |
#include "jsparse/js-norm.h" |
333d724d |
|
e57fa318 |
#define HTML_STR_LENGTH 1024 |
333d724d |
#define MAX_TAG_CONTENTS_LENGTH HTML_STR_LENGTH |
888f5794 |
|
e57fa318 |
typedef enum { |
a58f7bc5 |
HTML_BAD_STATE,
HTML_NORM, |
13bfb273 |
HTML_8BIT, |
a58f7bc5 |
HTML_COMMENT,
HTML_CHAR_REF, |
3506ac49 |
HTML_ENTITY_REF_DECODE, |
a58f7bc5 |
HTML_SKIP_WS,
HTML_TRIM_WS,
HTML_TAG,
HTML_TAG_ARG,
HTML_TAG_ARG_VAL,
HTML_TAG_ARG_EQUAL,
HTML_PROCESS_TAG,
HTML_CHAR_REF_DECODE, |
a5f19645 |
HTML_LOOKFOR_SCRENC, |
a58f7bc5 |
HTML_JSDECODE,
HTML_JSDECODE_LENGTH,
HTML_JSDECODE_DECRYPT,
HTML_SPECIAL_CHAR, |
a92110df |
HTML_RFC2397_TYPE,
HTML_RFC2397_INIT,
HTML_RFC2397_DATA,
HTML_RFC2397_FINISH,
HTML_RFC2397_ESC, |
fc83da82 |
HTML_ESCAPE_CHAR |
e57fa318 |
} html_state;
typedef enum {
SINGLE_QUOTED,
DOUBLE_QUOTED, |
fc83da82 |
NOT_QUOTED |
e57fa318 |
} quoted_state;
#define HTML_FILE_BUFF_LEN 8192
typedef struct file_buff_tag {
int fd;
unsigned char buffer[HTML_FILE_BUFF_LEN];
int length;
} file_buff_t;
|
7d4b5f16 |
struct tag_contents {
size_t pos; |
08402afa |
unsigned char contents[MAX_TAG_CONTENTS_LENGTH + 1]; |
7d4b5f16 |
};
|
e57fa318 |
static const int base64_chars[256] = {
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
};
int table_order[] = {
00, 02, 01, 00, 02, 01, 02, 01, 01, 02, 01, 02, 00, 01, 02, 01,
00, 01, 02, 01, 00, 00, 02, 01, 01, 02, 00, 01, 02, 01, 01, 02,
00, 00, 01, 02, 01, 02, 01, 00, 01, 00, 00, 02, 01, 00, 01, 02,
00, 01, 02, 01, 00, 00, 02, 01, 01, 00, 00, 02, 01, 00, 01, 02
}; |
888f5794 |
|
e57fa318 |
int decrypt_tables[3][128] = {
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x57, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x2E, 0x47, 0x7A, 0x56, 0x42, 0x6A, 0x2F, 0x26, 0x49, 0x41, 0x34, 0x32, 0x5B, 0x76, 0x72, 0x43,
0x38, 0x39, 0x70, 0x45, 0x68, 0x71, 0x4F, 0x09, 0x62, 0x44, 0x23, 0x75, 0x3C, 0x7E, 0x3E, 0x5E,
0xFF, 0x77, 0x4A, 0x61, 0x5D, 0x22, 0x4B, 0x6F, 0x4E, 0x3B, 0x4C, 0x50, 0x67, 0x2A, 0x7D, 0x74,
0x54, 0x2B, 0x2D, 0x2C, 0x30, 0x6E, 0x6B, 0x66, 0x35, 0x25, 0x21, 0x64, 0x4D, 0x52, 0x63, 0x3F,
0x7B, 0x78, 0x29, 0x28, 0x73, 0x59, 0x33, 0x7F, 0x6D, 0x55, 0x53, 0x7C, 0x3A, 0x5F, 0x65, 0x46,
0x58, 0x31, 0x69, 0x6C, 0x5A, 0x48, 0x27, 0x5C, 0x3D, 0x24, 0x79, 0x37, 0x60, 0x51, 0x20, 0x36},
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x7B, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x32, 0x30, 0x21, 0x29, 0x5B, 0x38, 0x33, 0x3D, 0x58, 0x3A, 0x35, 0x65, 0x39, 0x5C, 0x56, 0x73,
0x66, 0x4E, 0x45, 0x6B, 0x62, 0x59, 0x78, 0x5E, 0x7D, 0x4A, 0x6D, 0x71, 0x3C, 0x60, 0x3E, 0x53,
0xFF, 0x42, 0x27, 0x48, 0x72, 0x75, 0x31, 0x37, 0x4D, 0x52, 0x22, 0x54, 0x6A, 0x47, 0x64, 0x2D,
0x20, 0x7F, 0x2E, 0x4C, 0x5D, 0x7E, 0x6C, 0x6F, 0x79, 0x74, 0x43, 0x26, 0x76, 0x25, 0x24, 0x2B,
0x28, 0x23, 0x41, 0x34, 0x09, 0x2A, 0x44, 0x3F, 0x77, 0x3B, 0x55, 0x69, 0x61, 0x63, 0x50, 0x67,
0x51, 0x49, 0x4F, 0x46, 0x68, 0x7C, 0x36, 0x70, 0x6E, 0x7A, 0x2F, 0x5F, 0x4B, 0x5A, 0x2C, 0x57},
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x6E, 0x0A, 0x0B, 0x0C, 0x06, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x2D, 0x75, 0x52, 0x60, 0x71, 0x5E, 0x49, 0x5C, 0x62, 0x7D, 0x29, 0x36, 0x20, 0x7C, 0x7A, 0x7F,
0x6B, 0x63, 0x33, 0x2B, 0x68, 0x51, 0x66, 0x76, 0x31, 0x64, 0x54, 0x43, 0x3C, 0x3A, 0x3E, 0x7E,
0xFF, 0x45, 0x2C, 0x2A, 0x74, 0x27, 0x37, 0x44, 0x79, 0x59, 0x2F, 0x6F, 0x26, 0x72, 0x6A, 0x39,
0x7B, 0x3F, 0x38, 0x77, 0x67, 0x53, 0x47, 0x34, 0x78, 0x5D, 0x30, 0x23, 0x5A, 0x5B, 0x6C, 0x48,
0x55, 0x70, 0x69, 0x2E, 0x4C, 0x21, 0x24, 0x4E, 0x50, 0x09, 0x56, 0x73, 0x35, 0x61, 0x4B, 0x58,
0x3B, 0x57, 0x22, 0x6D, 0x4D, 0x25, 0x28, 0x46, 0x4A, 0x32, 0x41, 0x3D, 0x5F, 0x4F, 0x42, 0x65}
};
|
a6de01aa |
static inline unsigned int rewind_tospace(const unsigned char* chunk, unsigned int len) |
888f5794 |
{ |
a6de01aa |
unsigned int count = len;
while (!isspace(chunk[len - 1]) && (len > 1)) {
len--;
}
if (len == 1) {
return count;
}
return len;
} |
888f5794 |
|
a6de01aa |
/* read at most @max_len of data from @m_area or @stream, skipping NULL chars.
* This used to be called cli_readline, but we don't stop at end-of-line anymore */
static unsigned char *cli_readchunk(FILE *stream, m_area_t *m_area, unsigned int max_len)
{
unsigned char *chunk, *start, *ptr, *end;
unsigned int chunk_len, count;
chunk = (unsigned char *) cli_malloc(max_len);
if (!chunk) { |
888f5794 |
return NULL;
}
|
e57fa318 |
/* Try and use the memory buffer first */
if (m_area) { |
ba65fdc8 |
/* maximum we can copy into the buffer,
* we could have less than max_len bytes available */
chunk_len = MIN(m_area->length-m_area->offset, max_len-1);
if(!chunk_len) {
free(chunk);
return NULL;
}
if(m_area->map) |
1ae65c29 |
ptr = (unsigned char *)fmap_need_off_once(m_area->map, m_area->offset, chunk_len); |
ba65fdc8 |
else
ptr = m_area->buffer + m_area->offset;
start = ptr;
end = ptr - m_area->offset + m_area->length; |
caa00029 |
|
d819b67b |
if ((start >= end) || !start) { |
a6de01aa |
free(chunk); |
e57fa318 |
return NULL;
} |
a6de01aa |
/* look for NULL chars */
ptr = memchr(start, 0, chunk_len);
if(!ptr) {
/* no NULL chars found, copy all */
memcpy(chunk, start, chunk_len);
chunk[chunk_len] = '\0';
m_area->offset += chunk_len;
/* point ptr to end of chunk,
* so we can check and rewind to a space below */
ptr = start + chunk_len; |
e57fa318 |
} else { |
a6de01aa |
/* copy portion that doesn't contain NULL chars */
chunk_len = ptr - start;
if(chunk_len < max_len) {
memcpy(chunk, start, chunk_len);
} else {
chunk_len = 0;
ptr = start;
} |
567f60a3 |
if(m_area->map) |
1ae65c29 |
ptr = (unsigned char *)fmap_need_ptr_once(m_area->map, ptr, end - ptr); |
caa00029 |
if (!ptr) {
cli_warnmsg("fmap inconsistency\n");
ptr = end;
} |
a6de01aa |
/* we have unknown number of NULL chars,
* copy char-by-char and skip them */
while((ptr < end) && (chunk_len < max_len-1)) {
const unsigned char c = *ptr++; |
ba65fdc8 |
/* we can't use chunk_len to determine how many bytes we read, since
* we skipped chars */ |
a6de01aa |
if(c) {
chunk[chunk_len++] = c;
} |
888f5794 |
} |
567f60a3 |
m_area->offset += ptr - start; |
a6de01aa |
chunk[chunk_len] = '\0';
}
if(ptr && ptr < end && !isspace(*ptr)) {
/* we hit max_len, rewind to a space */
count = rewind_tospace(chunk, chunk_len);
if(count < chunk_len) {
chunk[count] = '\0';
m_area->offset -= chunk_len - count; |
888f5794 |
} |
e57fa318 |
}
} else {
if (!stream) {
cli_dbgmsg("No HTML stream\n"); |
a6de01aa |
free(chunk); |
e57fa318 |
return NULL;
} |
a6de01aa |
chunk_len = fread(chunk, 1, max_len-1, stream);
if(!chunk_len || chunk_len > max_len-1) {
/* EOF, or prevent overflow */
free(chunk); |
e57fa318 |
return NULL;
}
|
a6de01aa |
/* Look for NULL chars */
ptr = memchr(chunk, 0, chunk_len);
if(ptr) {
/* NULL char found */
/* save buffer limits */
start = ptr;
end = chunk + chunk_len;
/* start of NULL chars, we will copy non-NULL characters
* to this position */
chunk_len = ptr - chunk;
/* find first non-NULL char */
while((ptr < end) && !(*ptr)) {
ptr++;
}
/* skip over NULL chars, and move back the rest */
while((ptr < end) && (chunk_len < max_len-1)) {
const unsigned char c = *ptr++;
if(c) {
chunk[chunk_len++] = c; |
888f5794 |
}
} |
a6de01aa |
} |
d9282b97 |
chunk[chunk_len] = '\0'; |
a6de01aa |
if(chunk_len == max_len - 1) {
/* rewind to a space (which includes newline) */
count = rewind_tospace(chunk, chunk_len);
if(count < chunk_len) {
chunk[count] = '\0';
/* seek-back to space */ |
a5f19645 |
fseek(stream, -(long)(chunk_len - count), SEEK_CUR); |
a6de01aa |
} |
888f5794 |
}
} |
a6de01aa |
return chunk; |
888f5794 |
}
|
e57fa318 |
static void html_output_flush(file_buff_t *fbuff) |
888f5794 |
{ |
e57fa318 |
if (fbuff && (fbuff->length > 0)) {
cli_writen(fbuff->fd, fbuff->buffer, fbuff->length);
fbuff->length = 0; |
888f5794 |
} |
e57fa318 |
}
|
d5a5fef9 |
static inline void html_output_c(file_buff_t *fbuff1, unsigned char c) |
e57fa318 |
{
if (fbuff1) {
if (fbuff1->length == HTML_FILE_BUFF_LEN) {
html_output_flush(fbuff1);
}
fbuff1->buffer[fbuff1->length++] = c; |
888f5794 |
}
}
|
5cd3f734 |
static void html_output_str(file_buff_t *fbuff, const unsigned char *str, size_t len) |
888f5794 |
{ |
e57fa318 |
if (fbuff) {
if ((fbuff->length + len) >= HTML_FILE_BUFF_LEN) {
html_output_flush(fbuff);
} |
922f69fa |
if (len >= HTML_FILE_BUFF_LEN) {
html_output_flush(fbuff);
cli_writen(fbuff->fd, str, len);
} else {
memcpy(fbuff->buffer + fbuff->length, str, len);
fbuff->length += len;
} |
888f5794 |
} |
e57fa318 |
}
|
fc83da82 |
static char *html_tag_arg_value(tag_arguments_t *tags, const char *tag) |
e57fa318 |
{
int i; |
888f5794 |
|
e57fa318 |
for (i=0; i < tags->count; i++) { |
b9b47784 |
if (strcmp((const char*)tags->tag[i], tag) == 0) {
return (char*)tags->value[i]; |
e57fa318 |
} |
888f5794 |
} |
e57fa318 |
return NULL;
}
|
fc83da82 |
static void html_tag_arg_set(tag_arguments_t *tags, const char *tag, const char *value) |
e57fa318 |
{
int i; |
888f5794 |
|
e57fa318 |
for (i=0; i < tags->count; i++) { |
b9b47784 |
if (strcmp((const char*)tags->tag[i], tag) == 0) { |
e57fa318 |
free(tags->value[i]); |
b9b47784 |
tags->value[i] = (unsigned char*)cli_strdup(value); |
e57fa318 |
return; |
888f5794 |
} |
e57fa318 |
}
return;
} |
f2b71eb9 |
void html_tag_arg_add(tag_arguments_t *tags, |
b9b47784 |
const char *tag, char *value) |
e57fa318 |
{ |
ea04d2de |
int len, i; |
e57fa318 |
tags->count++; |
84fd5a61 |
tags->tag = (unsigned char **) cli_realloc2(tags->tag, |
e57fa318 |
tags->count * sizeof(char *)); |
ea04d2de |
if (!tags->tag) {
goto abort;
} |
84fd5a61 |
tags->value = (unsigned char **) cli_realloc2(tags->value, |
e57fa318 |
tags->count * sizeof(char *)); |
ea04d2de |
if (!tags->value) {
goto abort; |
e57fa318 |
} |
333d724d |
if(tags->scanContents) { |
7d4b5f16 |
tags->contents= (unsigned char **) cli_realloc2(tags->contents, |
333d724d |
tags->count*sizeof(*tags->contents));
if(!tags->contents) {
goto abort;
}
tags->contents[tags->count-1]=NULL;
} |
b9b47784 |
tags->tag[tags->count-1] = (unsigned char*)cli_strdup(tag); |
e57fa318 |
if (value) {
if (*value == '"') { |
b9b47784 |
tags->value[tags->count-1] = (unsigned char*)cli_strdup(value+1);
len = strlen((const char*)value+1); |
e57fa318 |
if (len > 0) {
tags->value[tags->count-1][len-1] = '\0';
} |
f05eb936 |
} else { |
b9b47784 |
tags->value[tags->count-1] = (unsigned char*)cli_strdup(value); |
f05eb936 |
} |
e57fa318 |
} else {
tags->value[tags->count-1] = NULL; |
888f5794 |
} |
ea04d2de |
return;
abort:
/* Bad error - can't do 100% recovery */
tags->count--;
for (i=0; i < tags->count; i++) {
if (tags->tag) {
free(tags->tag[i]);
}
if (tags->value) {
free(tags->value[i]);
} |
333d724d |
if(tags->contents) {
if(tags->contents[i]) |
7d4b5f16 |
free(tags->contents[i]); |
333d724d |
} |
ea04d2de |
}
if (tags->tag) {
free(tags->tag);
}
if (tags->value) {
free(tags->value);
} |
333d724d |
if (tags->contents)
free(tags->contents);
tags->contents=NULL; |
ea04d2de |
tags->tag = tags->value = NULL;
tags->count = 0;
return; |
e57fa318 |
}
static void html_output_tag(file_buff_t *fbuff, char *tag, tag_arguments_t *tags)
{ |
a58f7bc5 |
int i, j, len; |
e57fa318 |
|
d5a5fef9 |
html_output_c(fbuff, '<'); |
b9b47784 |
html_output_str(fbuff, (const unsigned char*)tag, strlen(tag)); |
e57fa318 |
for (i=0; i < tags->count; i++) { |
d5a5fef9 |
html_output_c(fbuff, ' '); |
b9b47784 |
html_output_str(fbuff, tags->tag[i], strlen((const char*)tags->tag[i])); |
e57fa318 |
if (tags->value[i]) { |
b9b47784 |
html_output_str(fbuff, (const unsigned char*)"=\"", 2);
len = strlen((const char*)tags->value[i]); |
a58f7bc5 |
for (j=0 ; j<len ; j++) { |
d5a5fef9 |
html_output_c(fbuff, tolower(tags->value[i][j])); |
a58f7bc5 |
} |
d5a5fef9 |
html_output_c(fbuff, '"'); |
e57fa318 |
} |
888f5794 |
} |
d5a5fef9 |
html_output_c(fbuff, '>'); |
e57fa318 |
}
void html_tag_arg_free(tag_arguments_t *tags)
{
int i; |
888f5794 |
|
e57fa318 |
for (i=0; i < tags->count; i++) {
free(tags->tag[i]);
if (tags->value[i]) {
free(tags->value[i]);
} |
333d724d |
if(tags->contents)
if (tags->contents[i]) |
7d4b5f16 |
free(tags->contents[i]); |
e57fa318 |
}
if (tags->tag) {
free(tags->tag);
}
if (tags->value) {
free(tags->value);
} |
333d724d |
if(tags->contents)
free(tags->contents);
tags->contents = NULL; |
e57fa318 |
tags->tag = tags->value = NULL;
tags->count = 0; |
888f5794 |
}
|
333d724d |
/**
* the displayed text for an <a href> tag
*/ |
7d4b5f16 |
static inline void html_tag_contents_append(struct tag_contents *cont, const unsigned char* begin,const unsigned char *end) |
333d724d |
{ |
7d4b5f16 |
size_t i; |
13bfb273 |
uint32_t mbchar = 0; |
7d4b5f16 |
if(!begin || !end)
return;
for(i = cont->pos; i < MAX_TAG_CONTENTS_LENGTH && (begin < end);i++) { |
13bfb273 |
uint8_t c = *begin++;
if (mbchar && (c < 0x80 || mbchar >= 0x10000)) {
if (mbchar == 0xE38082 || mbchar == 0xEFBC8E
|| mbchar == 0xEFB992 || |
3afedd07 |
(mbchar == 0xA1 && (c == 0x43 || c == 0x44 || c == 0x4F))) { |
13bfb273 |
cont->contents[i++] = '.'; |
4522746e |
if (mbchar == 0xA1) {
--i;
mbchar = 0;
continue;
} |
13bfb273 |
} else {
uint8_t c0 = mbchar >> 16;
uint8_t c1 = (mbchar >> 8)&0xff;
uint8_t c2 = (mbchar & 0xff);
if (c0 && i+1 < MAX_TAG_CONTENTS_LENGTH)
cont->contents[i++] = c0;
if ((c0 || c1) && i+1 < MAX_TAG_CONTENTS_LENGTH)
cont->contents[i++] = c1;
if (i+1 < MAX_TAG_CONTENTS_LENGTH)
cont->contents[i++] = c2;
}
mbchar = 0;
}
if (c >= 0x80) {
mbchar = (mbchar << 8) | c;
--i;
}
else
cont->contents[i] = c; |
333d724d |
} |
7d4b5f16 |
cont->pos = i; |
333d724d |
}
|
7d4b5f16 |
static inline void html_tag_contents_done(tag_arguments_t *tags,int idx, struct tag_contents *cont) |
333d724d |
{ |
7d4b5f16 |
unsigned char *p;
cont->contents[cont->pos++] = '\0';
p = cli_malloc(cont->pos);
if(!p)
return;
memcpy(p, cont->contents, cont->pos);
tags->contents[idx-1] = p;
cont->pos = 0; |
333d724d |
}
|
a5f19645 |
struct screnc_state {
uint32_t length;
uint32_t sum;
uint8_t table_pos;
};
/* inplace decoding, so that we can normalize it later */ |
745d4b38 |
static void screnc_decode(unsigned char *ptr, struct screnc_state *s) |
a5f19645 |
{
uint8_t value;
unsigned char *dst = ptr;
if(!ptr || !s)
return;
while(s->length > 0 && *ptr) {
if ((*ptr == '\n') || (*ptr == '\r')) {
ptr++;
continue;
}
if (*ptr < 0x80) {
value = decrypt_tables[table_order[s->table_pos]][*ptr];
if (value == 0xFF) { /* special character */
ptr++;
s->length--;
switch (*ptr) {
case '\0':
/* Fixup for end of line */
ptr--;
break;
case 0x21:
value = 0x3c;
break;
case 0x23:
value = 0x0d;
break;
case 0x24:
value = 0x40;
break;
case 0x26:
value = 0x0a;
break;
case 0x2a:
value = 0x3e;
break;
}
}
s->sum += value;
*dst++ = value;
s->table_pos = (s->table_pos + 1) % 64;
} else {
*dst++ = *ptr++;
*dst++ = *ptr; |
d9282b97 |
if (!*ptr)
break; |
a5f19645 |
}
ptr++;
s->length--;
}
if(!s->length) {
size_t remaining; |
b9b47784 |
if(strlen((const char*)ptr) >= 12) { |
a5f19645 |
uint32_t expected;
expected = base64_chars[ptr[0]] << 2;
expected += base64_chars[ptr[1]] >> 4;
expected += (base64_chars[ptr[1]] & 0x0f) << 12;
expected += (base64_chars[ptr[2]] >> 2) << 8;
expected += (base64_chars[ptr[2]] & 0x03) << 22;
expected += base64_chars[ptr[3]] << 16;
expected += (base64_chars[ptr[4]] << 2) << 24;
expected += (base64_chars[ptr[5]] >> 4) << 24;
ptr += 8;
if(s->sum != expected) { |
5cd3f734 |
cli_dbgmsg("screnc_decode: checksum mismatch: %u != %u\n", s->sum, expected); |
a5f19645 |
} else { |
b9b47784 |
if(strncmp((const char*)ptr, "^#~@", 4) != 0) { |
a5f19645 |
cli_dbgmsg("screnc_decode: terminator not found\n");
} else {
cli_dbgmsg("screnc_decode: OK\n");
}
}
ptr += 4;
}
/* copy remaining */ |
b9b47784 |
remaining = strlen((const char*)ptr) + 1; |
a5f19645 |
memmove(dst, ptr, remaining);
} else {
*dst = '\0';
}
}
|
b9b47784 |
static void js_process(struct parser_state *js_state, const unsigned char *js_begin, const unsigned char *js_end,
const unsigned char *line, const unsigned char *ptr, int in_script, const char *dirname) |
6a53bbdf |
{
if(!js_begin)
js_begin = line;
if(!js_end)
js_end = ptr;
if(js_end > js_begin &&
CLI_ISCONTAINED(line, 8192, js_begin, 1) &&
CLI_ISCONTAINED(line, 8192, js_end, 1)) { |
b9b47784 |
cli_js_process_buffer(js_state, (const char*)js_begin, js_end - js_begin); |
6a53bbdf |
}
if(!in_script) {
/* we found a /script, normalize script now */
cli_js_parse_done(js_state);
cli_js_output(js_state, dirname);
cli_js_destroy(js_state);
}
}
|
462e8e5e |
static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf) |
888f5794 |
{ |
b9b47784 |
int fd_tmp, tag_length = 0, tag_arg_length = 0, binary;
int retval=FALSE, escape=FALSE, value = 0, hex=FALSE, tag_val_length=0; |
7959343d |
int look_for_screnc=FALSE, in_screnc=FALSE,in_script=FALSE, text_space_written=FALSE; |
5e2a487c |
FILE *stream_in = NULL; |
a5f19645 |
html_state state=HTML_NORM, next_state=HTML_BAD_STATE, saved_next_state=HTML_BAD_STATE; |
e57fa318 |
char filename[1024], tag[HTML_STR_LENGTH+1], tag_arg[HTML_STR_LENGTH+1]; |
b9b47784 |
char tag_val[HTML_STR_LENGTH+1], *tmp_file, *arg_value;
unsigned char *line, *ptr, *ptr_screnc = NULL; |
e57fa318 |
tag_arguments_t tag_args; |
b9b47784 |
quoted_state quoted = NOT_QUOTED;
unsigned long length = 0; |
a5f19645 |
struct screnc_state screnc_state; |
d5a5fef9 |
file_buff_t *file_buff_o2, *file_buff_text; |
b9b47784 |
file_buff_t *file_tmp_o1 = NULL; |
333d724d |
int in_ahref=0;/* index of <a> tag, whose contents we are parsing. Indexing starts from 1, 0 means outside of <a>*/
unsigned char* href_contents_begin=NULL;/*beginning of the next portion of <a> contents*/
unsigned char* ptrend=NULL;/*end of <a> contents*/
unsigned char* in_form_action = NULL;/* the action URL of the current <form> tag, if any*/ |
462e8e5e |
|
3506ac49 |
struct entity_conv conv;
unsigned char entity_val[HTML_STR_LENGTH+1];
size_t entity_val_length = 0; |
1f95d81c |
const int dconf_entconv = dconf ? dconf->phishing&PHISHING_CONF_ENTCONV : 1;
const int dconf_js = dirname && (dconf ? dconf->doc&DOC_CONF_JSNORM : 1); /* TODO */ |
462e8e5e |
/* dconf for phishing engine sets scanContents, so no need for a flag here */ |
8be1d5a4 |
struct parser_state *js_state = NULL; |
5be3029f |
const unsigned char *js_begin = NULL, *js_end = NULL; |
7d4b5f16 |
struct tag_contents contents; |
13bfb273 |
uint32_t mbchar = 0; |
4522746e |
uint32_t mbchar2 = 0; |
a92110df |
|
462e8e5e |
tag_args.scanContents=0;/* do we need to store the contents of <a></a>?*/ |
7d4b5f16 |
contents.pos = 0; |
e57fa318 |
if (!m_area) {
if (fd < 0) {
cli_dbgmsg("Invalid HTML fd\n");
return FALSE;
}
lseek(fd, 0, SEEK_SET);
fd_tmp = dup(fd);
if (fd_tmp < 0) {
return FALSE;
}
stream_in = fdopen(fd_tmp, "r");
if (!stream_in) {
close(fd_tmp);
return FALSE;
} |
888f5794 |
} |
ea04d2de |
tag_args.count = 0;
tag_args.tag = NULL;
tag_args.value = NULL; |
333d724d |
tag_args.contents = NULL; |
e57fa318 |
if (dirname) { |
58481352 |
snprintf(filename, 1024, "%s"PATHSEP"rfc2397", dirname); |
3506ac49 |
if (mkdir(filename, 0700) && errno != EEXIST) { |
d5a5fef9 |
file_buff_o2 = file_buff_text = NULL; |
ea04d2de |
goto abort;
} |
4e1127c5 |
|
5e733972 |
file_buff_o2 = (file_buff_t *) cli_malloc(sizeof(file_buff_t)); |
ea04d2de |
if (!file_buff_o2) { |
d5a5fef9 |
file_buff_o2 = file_buff_text = NULL; |
ea04d2de |
goto abort; |
e57fa318 |
}
|
d5a5fef9 |
/* this will still contains scripts that are inside comments */ |
58481352 |
snprintf(filename, 1024, "%s"PATHSEP"nocomment.html", dirname); |
e3a6f061 |
file_buff_o2->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR); |
72ce4b70 |
if (file_buff_o2->fd == -1) { |
e57fa318 |
cli_dbgmsg("open failed: %s\n", filename); |
5e733972 |
free(file_buff_o2); |
d5a5fef9 |
file_buff_o2 = file_buff_text = NULL; |
ea04d2de |
goto abort; |
e57fa318 |
}
|
0664128a |
file_buff_text = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
if(!file_buff_text) { |
72ce4b70 |
close(file_buff_o2->fd); |
0664128a |
free(file_buff_o2); |
d5a5fef9 |
file_buff_o2 = file_buff_text = NULL; |
0664128a |
goto abort;
} |
d5a5fef9 |
|
58481352 |
snprintf(filename, 1024, "%s"PATHSEP"notags.html", dirname); |
0664128a |
file_buff_text->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR); |
72ce4b70 |
if(file_buff_text->fd == -1) { |
0664128a |
cli_dbgmsg("open failed: %s\n", filename);
close(file_buff_o2->fd);
free(file_buff_o2);
free(file_buff_text); |
d5a5fef9 |
file_buff_o2 = file_buff_text = NULL; |
72ce4b70 |
goto abort; |
0664128a |
} |
5e733972 |
file_buff_o2->length = 0; |
0664128a |
file_buff_text->length = 0; |
e57fa318 |
} else {
file_buff_o2 = NULL; |
0664128a |
file_buff_text = NULL; |
888f5794 |
} |
4e1127c5 |
|
a92110df |
binary = FALSE; |
3506ac49 |
|
b3fc7f97 |
ptr = line = cli_readchunk(stream_in, m_area, 8192); |
3506ac49 |
|
888f5794 |
while (line) { |
333d724d |
if(href_contents_begin)
href_contents_begin=ptr;/*start of a new line, last line already appended to contents see below*/ |
e57fa318 |
while (*ptr && isspace(*ptr)) {
ptr++; |
888f5794 |
} |
e57fa318 |
while (*ptr) { |
a92110df |
if (!binary && *ptr == '\n') { |
ee4e852a |
/* Convert it to a space and re-process */
*ptr = ' '; |
e57fa318 |
continue;
} |
a92110df |
if (!binary && *ptr == '\r') { |
e57fa318 |
ptr++;
continue;
}
switch (state) { |
fc83da82 |
case HTML_SPECIAL_CHAR:
cli_dbgmsg("Impossible, special_char can't occur here\n");
break; |
e57fa318 |
case HTML_BAD_STATE:
/* An engine error has occurred */
cli_dbgmsg("HTML Engine Error\n");
goto abort;
case HTML_SKIP_WS:
if (isspace(*ptr)) {
ptr++;
} else {
state = next_state;
next_state = HTML_BAD_STATE;
}
break;
case HTML_TRIM_WS:
if (isspace(*ptr)) {
ptr++;
} else { |
22b961c2 |
if(!in_script)
html_output_c(file_buff_o2, ' '); |
e57fa318 |
state = next_state;
next_state = HTML_BAD_STATE;
}
break; |
13bfb273 |
case HTML_8BIT:
if (*ptr < 0x80 || mbchar >= 0x10000) {
if (mbchar == 0xE38082 || mbchar == 0xEFBC8E
|| mbchar == 0xEFB992 || |
3afedd07 |
(mbchar == 0xA1 && (*ptr == 0x43 || *ptr == 0x44 || *ptr == 0x4F))) { |
13bfb273 |
/* bb #4097 */
html_output_c(file_buff_o2, '.');
html_output_c(file_buff_text, '.'); |
4522746e |
if (mbchar == 0xA1) {
ptr++;
mbchar = 0;
continue;
} |
13bfb273 |
} else {
uint8_t c0 = mbchar >> 16;
uint8_t c1 = (mbchar >> 8)&0xff;
uint8_t c2 = (mbchar & 0xff);
if (c0) {
html_output_c(file_buff_o2, c0);
html_output_c(file_buff_text, c0);
}
if (c0 || c1) {
html_output_c(file_buff_o2, c1);
html_output_c(file_buff_text, c1);
}
html_output_c(file_buff_o2, c2);
html_output_c(file_buff_text, c1);
}
mbchar = 0;
state = next_state;
next_state = HTML_NORM;
} else {
mbchar = (mbchar << 8) | *ptr;
ptr++;
}
break; |
e57fa318 |
case HTML_NORM:
if (*ptr == '<') { |
f74bc827 |
ptrend=ptr; /* for use by scanContents */ |
d5a5fef9 |
html_output_c(file_buff_o2, '<');
if (!in_script && !text_space_written) {
html_output_c(file_buff_text, ' '); |
0664128a |
text_space_written = TRUE; |
e57fa318 |
} |
333d724d |
if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin) {
/*append this text portion to the contents of <a>*/ |
7d4b5f16 |
html_tag_contents_append(&contents,href_contents_begin,ptr); |
333d724d |
href_contents_begin=NULL;/*We just encountered another tag inside <a>, so skip it*/
} |
e57fa318 |
ptr++;
state = HTML_SKIP_WS;
tag_length=0;
next_state = HTML_TAG;
} else if (isspace(*ptr)) { |
0664128a |
if(!text_space_written && !in_script) { |
d5a5fef9 |
html_output_c(file_buff_text, ' '); |
0664128a |
text_space_written = TRUE;
} |
e57fa318 |
state = HTML_TRIM_WS;
next_state = HTML_NORM;
} else if (*ptr == '&') { |
0664128a |
if(!text_space_written && !in_script) { |
d5a5fef9 |
html_output_c(file_buff_text, ' '); |
0664128a |
text_space_written = TRUE;
} |
e57fa318 |
state = HTML_CHAR_REF;
next_state = HTML_NORM;
ptr++; |
13bfb273 |
} else if (*ptr >= 0x80) {
state = HTML_8BIT;
next_state = HTML_NORM;
mbchar = *ptr;
ptr++; |
e57fa318 |
} else { |
2354901f |
unsigned char c = tolower(*ptr);
/* normalize ' to " for scripts */
if(in_script && c == '\'') c = '"';
html_output_c(file_buff_o2, c); |
d5a5fef9 |
if (!in_script) { |
22b961c2 |
if(*ptr < 0x20) {
if(!text_space_written) {
html_output_c(file_buff_text, ' ');
text_space_written = TRUE;
}
} else { |
2354901f |
html_output_c(file_buff_text, c); |
22b961c2 |
text_space_written = FALSE;
} |
e57fa318 |
}
ptr++;
}
break;
case HTML_TAG:
if ((tag_length == 0) && (*ptr == '!')) {
/* Comment */
if (in_script) { |
d5a5fef9 |
/* we still write scripts to nocomment.html */
html_output_c(file_buff_o2, '!');
} else {
/* Need to rewind in the no-comment output stream */
if (file_buff_o2 && (file_buff_o2->length > 0)) {
file_buff_o2->length--;
} |
e57fa318 |
}
state = HTML_COMMENT;
next_state = HTML_BAD_STATE;
ptr++;
} else if (*ptr == '>') { |
d5a5fef9 |
html_output_c(file_buff_o2, '>'); |
e57fa318 |
ptr++;
tag[tag_length] = '\0';
state = HTML_SKIP_WS;
next_state = HTML_PROCESS_TAG;
} else if (!isspace(*ptr)) { |
d5a5fef9 |
html_output_c(file_buff_o2, tolower(*ptr)); |
6f7c0a15 |
/* if we're inside a script we only care for </script>.*/
if(in_script && tag_length==0 && *ptr != '/') {
state = HTML_NORM;
} |
e57fa318 |
if (tag_length < HTML_STR_LENGTH) {
tag[tag_length++] = tolower(*ptr);
}
ptr++;
} else {
tag[tag_length] = '\0';
state = HTML_SKIP_WS;
tag_arg_length = 0; |
6f7c0a15 |
/* if we'd go to HTML_TAG_ARG whitespace would be inconsistently normalized for in_script*/ |
eb0757aa |
next_state = !in_script ? HTML_TAG_ARG : HTML_PROCESS_TAG; |
e57fa318 |
}
break;
case HTML_TAG_ARG:
if (*ptr == '=') { |
d5a5fef9 |
html_output_c(file_buff_o2, '='); |
e57fa318 |
tag_arg[tag_arg_length] = '\0';
ptr++;
state = HTML_SKIP_WS;
escape = FALSE;
quoted = NOT_QUOTED;
tag_val_length = 0;
next_state = HTML_TAG_ARG_VAL;
} else if (isspace(*ptr)) {
ptr++;
tag_arg[tag_arg_length] = '\0';
state = HTML_SKIP_WS;
next_state = HTML_TAG_ARG_EQUAL;
} else if (*ptr == '>') { |
d5a5fef9 |
html_output_c(file_buff_o2, '>'); |
e57fa318 |
if (tag_arg_length > 0) {
tag_arg[tag_arg_length] = '\0';
html_tag_arg_add(&tag_args, tag_arg, NULL);
}
ptr++;
state = HTML_PROCESS_TAG;
next_state = HTML_BAD_STATE;
} else {
if (tag_arg_length == 0) {
/* Start of new tag - add space */ |
d5a5fef9 |
html_output_c(file_buff_o2,' '); |
e57fa318 |
} |
d5a5fef9 |
html_output_c(file_buff_o2, tolower(*ptr)); |
e57fa318 |
if (tag_arg_length < HTML_STR_LENGTH) {
tag_arg[tag_arg_length++] = tolower(*ptr);
}
ptr++;
}
break;
case HTML_TAG_ARG_EQUAL:
if (*ptr == '=') { |
d5a5fef9 |
html_output_c(file_buff_o2, '='); |
e57fa318 |
ptr++;
state = HTML_SKIP_WS;
escape = FALSE;
quoted = NOT_QUOTED;
tag_val_length = 0;
next_state = HTML_TAG_ARG_VAL;
} else {
if (tag_arg_length > 0) {
tag_arg[tag_arg_length] = '\0';
html_tag_arg_add(&tag_args, tag_arg, NULL);
}
tag_arg_length=0;
state = HTML_TAG_ARG;
next_state = HTML_BAD_STATE;
}
break;
case HTML_TAG_ARG_VAL: |
a92110df |
if ((tag_val_length == 5) && (strncmp(tag_val, "data:", 5) == 0)) {
/* RFC2397 inline data */
/* Rewind one byte so we don't recursuive */
if (file_buff_o2 && (file_buff_o2->length > 0)) {
file_buff_o2->length--;
} |
4e1127c5 |
|
a92110df |
if (quoted != NOT_QUOTED) { |
d5a5fef9 |
html_output_c(file_buff_o2, '"'); |
a92110df |
}
tag_val_length = 0;
state = HTML_RFC2397_TYPE;
next_state = HTML_TAG_ARG;
} else if ((tag_val_length == 6) && (strncmp(tag_val, "\"data:", 6) == 0)) {
/* RFC2397 inline data */
/* Rewind one byte so we don't recursuive */
if (file_buff_o2 && (file_buff_o2->length > 0)) {
file_buff_o2->length--;
} |
4e1127c5 |
|
a92110df |
if (quoted != NOT_QUOTED) { |
d5a5fef9 |
html_output_c(file_buff_o2, '"'); |
a92110df |
}
tag_val_length = 0;
state = HTML_RFC2397_TYPE;
next_state = HTML_TAG_ARG;
} else if (*ptr == '&') { |
e57fa318 |
state = HTML_CHAR_REF;
next_state = HTML_TAG_ARG_VAL;
ptr++;
} else if (*ptr == '\'') {
if (tag_val_length == 0) {
quoted = SINGLE_QUOTED; |
d5a5fef9 |
html_output_c(file_buff_o2, '"'); |
e57fa318 |
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '"';
}
ptr++;
} else {
if (!escape && (quoted==SINGLE_QUOTED)) { |
d5a5fef9 |
html_output_c(file_buff_o2, '"'); |
e57fa318 |
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '"';
}
tag_val[tag_val_length] = '\0';
html_tag_arg_add(&tag_args, tag_arg, tag_val);
ptr++;
state = HTML_SKIP_WS;
tag_arg_length=0;
next_state = HTML_TAG_ARG;
} else { |
d5a5fef9 |
html_output_c(file_buff_o2, '"'); |
e57fa318 |
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '"';
}
ptr++;
}
}
} else if (*ptr == '"') {
if (tag_val_length == 0) {
quoted = DOUBLE_QUOTED; |
d5a5fef9 |
html_output_c(file_buff_o2, '"'); |
e57fa318 |
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '"';
}
ptr++;
} else { |
4e1127c5 |
if (!escape && (quoted==DOUBLE_QUOTED)) { |
d5a5fef9 |
html_output_c(file_buff_o2, '"'); |
e57fa318 |
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '"';
}
tag_val[tag_val_length] = '\0';
html_tag_arg_add(&tag_args, tag_arg, tag_val);
ptr++;
state = HTML_SKIP_WS;
tag_arg_length=0;
next_state = HTML_TAG_ARG;
} else { |
d5a5fef9 |
html_output_c(file_buff_o2, '"'); |
e57fa318 |
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '"';
}
ptr++;
}
}
} else if (isspace(*ptr) || (*ptr == '>')) {
if (quoted == NOT_QUOTED) {
tag_val[tag_val_length] = '\0';
html_tag_arg_add(&tag_args, tag_arg, tag_val);
state = HTML_SKIP_WS;
tag_arg_length=0;
next_state = HTML_TAG_ARG;
} else { |
d5a5fef9 |
html_output_c(file_buff_o2, *ptr); |
e57fa318 |
if (tag_val_length < HTML_STR_LENGTH) {
if (isspace(*ptr)) {
tag_val[tag_val_length++] = ' ';
} else {
tag_val[tag_val_length++] = '>';
}
}
state = HTML_SKIP_WS;
escape = FALSE;
quoted = NOT_QUOTED;
next_state = HTML_TAG_ARG_VAL;
ptr++;
}
} else { |
4522746e |
if (mbchar2 && (*ptr < 0x80 || mbchar2 >= 0x10000)) {
if (mbchar2 == 0xE38082 || mbchar2 == 0xEFBC8E
|| mbchar2 == 0xEFB992 || |
3afedd07 |
(mbchar2 == 0xA1 && (*ptr == 0x43 || *ptr == 0x44 || *ptr == 0x4F))) { |
4522746e |
html_output_c(file_buff_o2, '.');
if (tag_val_length < HTML_STR_LENGTH)
tag_val[tag_val_length++] = '.';
if (mbchar2 == 0xA1) {
ptr++;
mbchar2 = 0;
continue;
}
} else {
uint8_t c0 = mbchar2 >> 16;
uint8_t c1 = (mbchar2 >> 8)&0xff;
uint8_t c2 = (mbchar2 & 0xff);
if (c0)
html_output_c(file_buff_o2, c0);
if (c0 || c1)
html_output_c(file_buff_o2, c1);
html_output_c(file_buff_o2, c2);
if (c0 && tag_val_length < HTML_STR_LENGTH)
tag_val[tag_val_length++] = c0;
if ((c0 || c1) && tag_val_length < HTML_STR_LENGTH)
tag_val[tag_val_length++] = c1;
if (tag_val_length < HTML_STR_LENGTH)
tag_val[tag_val_length++] = c2;
}
mbchar2 = 0;
}
if (*ptr >= 0x80)
mbchar2 = (mbchar2 << 8) | *ptr;
else { |
d5a5fef9 |
html_output_c(file_buff_o2, tolower(*ptr)); |
e57fa318 |
if (tag_val_length < HTML_STR_LENGTH) { |
a58f7bc5 |
tag_val[tag_val_length++] = *ptr; |
e57fa318 |
} |
4522746e |
}
ptr++; |
e57fa318 |
} |
4e1127c5 |
|
e57fa318 |
if (*ptr == '\\') {
escape = TRUE;
} else {
escape = FALSE;
}
break;
case HTML_COMMENT: |
22b961c2 |
if (in_script && !isspace(*ptr)) { |
94ec8955 |
unsigned char c = tolower(*ptr); |
d5a5fef9 |
/* dump script to nocomment.html, since we no longer have
* comment.html/script.html */ |
94ec8955 |
if(c == '\'') c = '"';
html_output_c(file_buff_o2, c); |
e57fa318 |
}
if (*ptr == '>') {
state = HTML_SKIP_WS; |
d5a5fef9 |
next_state = HTML_NORM; |
e57fa318 |
}
ptr++;
break;
case HTML_PROCESS_TAG: |
4e1127c5 |
|
e57fa318 |
/* Default to no action for this tag */
state = HTML_SKIP_WS;
next_state = HTML_NORM;
if (tag[0] == '/') {
/* End tag */
state = HTML_SKIP_WS;
next_state = HTML_NORM;
if (strcmp(tag, "/script") == 0) { |
8be1d5a4 |
in_script = FALSE; |
6a53bbdf |
if(js_state) { |
5be3029f |
js_end = ptr; |
6a53bbdf |
js_process(js_state, js_begin, js_end, line, ptr, in_script, dirname);
js_state = NULL;
js_begin = js_end = NULL;
} |
0f247775 |
/*don't output newlines in nocomment.html
* html_output_c(file_buff_o2, '\n');*/ |
e57fa318 |
} |
333d724d |
if (hrefs && hrefs->scanContents && in_ahref) {
if(strcmp(tag,"/a") == 0) { |
7d4b5f16 |
html_tag_contents_done(hrefs,in_ahref, &contents); |
333d724d |
in_ahref=0;/* we are no longer inside an <a href>
nesting <a> tags not supported, and shouldn't be supported*/
}
href_contents_begin=ptr;
}
if (strcmp(tag, "/form") == 0) { |
462e8e5e |
if (in_form_action)
free(in_form_action); |
333d724d |
in_form_action = NULL;
} |
e57fa318 |
} else if (strcmp(tag, "script") == 0) {
arg_value = html_tag_arg_value(&tag_args, "language"); |
d5a5fef9 |
/* TODO: maybe we can output all tags only via html_output_tag */ |
b9b47784 |
if (arg_value && (strcasecmp((const char*)arg_value, "jscript.encode") == 0)) { |
e57fa318 |
html_tag_arg_set(&tag_args, "language", "javascript");
state = HTML_SKIP_WS;
next_state = HTML_JSDECODE; |
d5a5fef9 |
/* we already output the old tag, output the new tag now */
html_output_tag(file_buff_o2, tag, &tag_args); |
b9b47784 |
} else if (arg_value && (strcasecmp((const char*)arg_value, "vbscript.encode") == 0)) { |
e57fa318 |
html_tag_arg_set(&tag_args, "language", "vbscript");
state = HTML_SKIP_WS;
next_state = HTML_JSDECODE; |
d5a5fef9 |
/* we already output the old tag, output the new tag now */
html_output_tag(file_buff_o2, tag, &tag_args); |
6a53bbdf |
}
in_script = TRUE;
if(dconf_js && !js_state) {
js_state = cli_js_init();
if(!js_state) { |
1405207a |
cli_dbgmsg("htmlnorm: Failed to initialize js parser\n"); |
8be1d5a4 |
} |
6a53bbdf |
js_begin = ptr;
js_end = NULL; |
e57fa318 |
} |
a5f19645 |
} else if(strcmp(tag, "%@") == 0) {
arg_value = html_tag_arg_value(&tag_args, "language"); |
b9b47784 |
if(arg_value && (strcasecmp((const char*)arg_value,"jscript.encode") == 0||
strcasecmp((const char*)arg_value, "vbscript.encode") == 0)) { |
a5f19645 |
saved_next_state = next_state;
next_state = state; |
e2354bdb |
look_for_screnc = FALSE; |
a5f19645 |
state = HTML_LOOKFOR_SCRENC;
} |
247bc5c6 |
} else if (hrefs) { |
333d724d |
if(in_ahref && !href_contents_begin)
href_contents_begin=ptr; |
247bc5c6 |
if (strcmp(tag, "a") == 0) {
arg_value = html_tag_arg_value(&tag_args, "href"); |
b9b47784 |
if (arg_value && strlen((const char*)arg_value) > 0) { |
333d724d |
if (hrefs->scanContents) { |
b9b47784 |
char* arg_value_title = html_tag_arg_value(&tag_args,"title"); |
333d724d |
/*beginning of an <a> tag*/
if (in_ahref)
/*we encountered nested <a> tags, pretend previous closed*/
if (href_contents_begin) { |
7d4b5f16 |
html_tag_contents_append(&contents, href_contents_begin, ptrend); |
333d724d |
/*add pending contents between tags*/ |
7d4b5f16 |
html_tag_contents_done(hrefs, in_ahref, &contents); |
333d724d |
in_ahref=0;
}
if (arg_value_title) {
/* title is a 'displayed link'*/
html_tag_arg_add(hrefs,"href_title",arg_value_title); |
b9b47784 |
html_tag_contents_append(&contents,(const unsigned char*)arg_value,
(const unsigned char*)arg_value+strlen(arg_value)); |
7d4b5f16 |
html_tag_contents_done(hrefs, hrefs->count, &contents); |
333d724d |
}
if (in_form_action) {
/* form action is the real URL, and href is the 'displayed' */
html_tag_arg_add(hrefs,"form",arg_value); |
7d4b5f16 |
contents.pos = 0;
html_tag_contents_append(&contents, in_form_action, |
b9b47784 |
in_form_action + strlen((const char*)in_form_action)); |
7d4b5f16 |
html_tag_contents_done(hrefs, hrefs->count, &contents); |
333d724d |
}
} |
247bc5c6 |
html_tag_arg_add(hrefs, "href", arg_value); |
333d724d |
if (hrefs->scanContents) {
in_ahref=hrefs->count; /* index of this tag (counted from 1) */
href_contents_begin=ptr;/* contents begin after <a ..> ends */ |
7d4b5f16 |
contents.pos = 0; |
333d724d |
} |
247bc5c6 |
} |
333d724d |
} else if (strcmp(tag,"form") == 0 && hrefs->scanContents) { |
b9b47784 |
const char* arg_action_value = html_tag_arg_value(&tag_args,"action"); |
ec481027 |
if (arg_action_value) { |
4e1127c5 |
if(in_form_action)
free(in_form_action); |
b9b47784 |
in_form_action = (unsigned char*)cli_strdup(arg_action_value); |
ec481027 |
} |
247bc5c6 |
} else if (strcmp(tag, "img") == 0) {
arg_value = html_tag_arg_value(&tag_args, "src");
if (arg_value && strlen(arg_value) > 0) {
html_tag_arg_add(hrefs, "src", arg_value); |
333d724d |
if(hrefs->scanContents && in_ahref)
/* "contents" of an img tag, is the URL of its parent <a> tag */ |
b9b47784 |
hrefs->contents[hrefs->count-1] = (unsigned char*)cli_strdup((const char*)hrefs->value[in_ahref-1]); |
333d724d |
if (in_form_action) {
/* form action is the real URL, and href is the 'displayed' */
html_tag_arg_add(hrefs,"form",arg_value); |
7d4b5f16 |
contents.pos = 0;
html_tag_contents_append(&contents, in_form_action, |
b9b47784 |
in_form_action + strlen((const char*)in_form_action)); |
7d4b5f16 |
html_tag_contents_done(hrefs, hrefs->count, &contents); |
333d724d |
} |
247bc5c6 |
}
arg_value = html_tag_arg_value(&tag_args, "dynsrc");
if (arg_value && strlen(arg_value) > 0) {
html_tag_arg_add(hrefs, "dynsrc", arg_value); |
333d724d |
if(hrefs->scanContents && in_ahref)
/* see above */ |
b9b47784 |
hrefs->contents[hrefs->count-1] = (unsigned char*)cli_strdup((const char*)hrefs->value[in_ahref-1]); |
333d724d |
if (in_form_action) {
/* form action is the real URL, and href is the 'displayed' */
html_tag_arg_add(hrefs,"form",arg_value); |
7d4b5f16 |
contents.pos = 0;
html_tag_contents_append(&contents, in_form_action, |
b9b47784 |
in_form_action + strlen((const char*)in_form_action)); |
7d4b5f16 |
html_tag_contents_done(hrefs, hrefs->count, &contents); |
333d724d |
} |
247bc5c6 |
}
} else if (strcmp(tag, "iframe") == 0) {
arg_value = html_tag_arg_value(&tag_args, "src");
if (arg_value && strlen(arg_value) > 0) {
html_tag_arg_add(hrefs, "iframe", arg_value); |
333d724d |
if(hrefs->scanContents && in_ahref)
/* see above */ |
b9b47784 |
hrefs->contents[hrefs->count-1] = (unsigned char*)cli_strdup((const char*)hrefs->value[in_ahref-1]); |
333d724d |
if (in_form_action) {
/* form action is the real URL, and href is the 'displayed' */
html_tag_arg_add(hrefs,"form",arg_value); |
7d4b5f16 |
contents.pos = 0;
html_tag_contents_append(&contents, in_form_action, |
b9b47784 |
in_form_action + strlen((const char*)in_form_action)); |
7d4b5f16 |
html_tag_contents_done(hrefs, hrefs->count, &contents); |
333d724d |
} |
247bc5c6 |
} |
333d724d |
} else if (strcmp(tag,"area") == 0) {
arg_value = html_tag_arg_value(&tag_args,"href");
if (arg_value && strlen(arg_value) > 0) {
html_tag_arg_add(hrefs, "area", arg_value);
if(hrefs->scanContents && in_ahref)
/* see above */ |
b9b47784 |
hrefs->contents[hrefs->count-1] = (unsigned char*)cli_strdup((const char*)hrefs->value[in_ahref-1]); |
333d724d |
if (in_form_action) {
/* form action is the real URL, and href is the 'displayed' */
html_tag_arg_add(hrefs,"form",arg_value); |
7d4b5f16 |
contents.pos = 0;
html_tag_contents_append(&contents, in_form_action, |
b9b47784 |
in_form_action + strlen((const char*)in_form_action)); |
7d4b5f16 |
html_tag_contents_done(hrefs, hrefs->count, &contents); |
333d724d |
} |
4e1127c5 |
} |
333d724d |
}
/* TODO:imagemaps can have urls too */ |
ec774193 |
} else if (strcmp(tag, "a") == 0) {
/* a/img tags for buff_text can be processed only if we're not processing hrefs */
arg_value = html_tag_arg_value(&tag_args, "href");
if(arg_value && arg_value[0]) { |
b9b47784 |
html_output_str(file_buff_text, (const unsigned char*)arg_value, strlen((const char*)arg_value)); |
d5a5fef9 |
html_output_c(file_buff_text, ' '); |
22b961c2 |
text_space_written = TRUE; |
ec774193 |
}
} else if (strcmp(tag, "img") == 0) {
arg_value = html_tag_arg_value(&tag_args, "src");
if(arg_value && arg_value[0]) { |
b9b47784 |
html_output_str(file_buff_text, (const unsigned char*)arg_value, strlen((const char*)arg_value)); |
d5a5fef9 |
html_output_c(file_buff_text, ' '); |
22b961c2 |
text_space_written = TRUE; |
ec774193 |
} |
e57fa318 |
}
html_tag_arg_free(&tag_args);
break;
case HTML_CHAR_REF:
if (*ptr == '#') {
value = 0;
hex = FALSE;
state = HTML_CHAR_REF_DECODE;
ptr++;
} else { |
462e8e5e |
if(dconf_entconv)
state = HTML_ENTITY_REF_DECODE;
else {
if(next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '&';
} |
d5a5fef9 |
html_output_c(file_buff_o2, '&'); |
3506ac49 |
|
462e8e5e |
state = next_state;
next_state = HTML_BAD_STATE;
} |
3506ac49 |
}
break;
case HTML_ENTITY_REF_DECODE:
if(*ptr == ';') {
size_t i; |
b0b8398b |
const char* normalized; |
3506ac49 |
entity_val[entity_val_length] = '\0';
normalized = entity_norm(&conv, entity_val);
if(normalized) {
for(i=0; i < strlen(normalized); i++) { |
b0b8398b |
const unsigned char c = normalized[i]&0xff; |
d5a5fef9 |
html_output_c(file_buff_o2, c); |
66f7a691 |
if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) { |
3506ac49 |
tag_val[tag_val_length++] = c;
}
}
}
else { |
d5a5fef9 |
html_output_c(file_buff_o2, '&'); |
66f7a691 |
if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '&'; |
4e1127c5 |
} |
3506ac49 |
for(i=0; i < entity_val_length; i++) {
const char c = tolower(entity_val[i]); |
d5a5fef9 |
html_output_c(file_buff_o2, c); |
66f7a691 |
if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) { |
3506ac49 |
tag_val[tag_val_length++] = c;
}
} |
66f7a691 |
if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = ';';
} |
d5a5fef9 |
html_output_c(file_buff_o2, ';'); |
3506ac49 |
}
entity_val_length = 0; |
e57fa318 |
state = next_state;
next_state = HTML_BAD_STATE; |
3506ac49 |
ptr++;
}
else if ( (isalnum(*ptr) || *ptr=='_' || *ptr==':' || (*ptr=='-')) && entity_val_length < HTML_STR_LENGTH) {
entity_val[entity_val_length++] = *ptr++;
}
else {
/* entity too long, or not valid, dump it */
size_t i; |
66f7a691 |
if (next_state==HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '&';
} |
d5a5fef9 |
html_output_c(file_buff_o2, '&'); |
3506ac49 |
for(i=0; i < entity_val_length; i++) {
const char c = tolower(entity_val[i]); |
d5a5fef9 |
html_output_c(file_buff_o2, c); |
66f7a691 |
if (next_state==HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) { |
3506ac49 |
tag_val[tag_val_length++] = c;
}
}
state = next_state;
next_state = HTML_BAD_STATE;
entity_val_length = 0; |
e57fa318 |
}
break;
case HTML_CHAR_REF_DECODE:
if ((value==0) && ((*ptr == 'x') || (*ptr == 'X'))) {
hex=TRUE;
ptr++;
} else if (*ptr == ';') { |
73611293 |
if (next_state==HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) { |
462e8e5e |
tag_val[tag_val_length++] = value; /* store encoded values too */ |
73611293 |
}
if(dconf_entconv) { |
3506ac49 |
|
462e8e5e |
if(value < 0x80) |
d5a5fef9 |
html_output_c(file_buff_o2, tolower(value)); |
462e8e5e |
else {
unsigned char buff[10]; |
8b22c9b5 |
unsigned char* out = u16_normalize_tobuffer(value, buff, 10); |
d5a5fef9 |
if(out && out>buff) {
html_output_str(file_buff_o2, buff, out-buff-1); |
8b22c9b5 |
} |
462e8e5e |
}
} else |
d5a5fef9 |
html_output_c(file_buff_o2, tolower(value&0xff)); |
e57fa318 |
state = next_state;
next_state = HTML_BAD_STATE;
ptr++;
} else if (isdigit(*ptr) || (hex && isxdigit(*ptr))) {
if (hex) {
value *= 16;
} else {
value *= 10;
}
if (isdigit(*ptr)) {
value += (*ptr - '0');
} else {
value += (tolower(*ptr) - 'a' + 10);
}
ptr++;
} else { |
d5a5fef9 |
html_output_c(file_buff_o2, value); |
e57fa318 |
state = next_state;
next_state = HTML_BAD_STATE;
}
break; |
a5f19645 |
case HTML_LOOKFOR_SCRENC:
look_for_screnc = TRUE; |
b9b47784 |
ptr_screnc = (unsigned char*)strstr((char*)ptr, "#@~^"); |
a5f19645 |
if(ptr_screnc) { |
6a53bbdf |
ptr_screnc[0] = '/';
ptr_screnc[1] = '/'; |
a5f19645 |
ptr_screnc += 4;
}
state = next_state;
next_state = saved_next_state;
break; |
e57fa318 |
case HTML_JSDECODE:
/* Check for start marker */ |
b9b47784 |
if (strncmp((const char*)ptr, "#@~^", 4) == 0) { |
6a53bbdf |
ptr[0] = '/';
ptr[1] = '/'; |
e57fa318 |
ptr += 4;
state = HTML_JSDECODE_LENGTH;
next_state = HTML_BAD_STATE;
} else { |
d5a5fef9 |
html_output_c(file_buff_o2, tolower(*ptr)); |
e57fa318 |
ptr++;
}
break;
case HTML_JSDECODE_LENGTH: |
b9b47784 |
if (strlen((const char*)ptr) < 8) { |
e57fa318 |
state = HTML_NORM;
next_state = HTML_BAD_STATE;
break;
} |
a5f19645 |
memset(&screnc_state, 0, sizeof(screnc_state));
screnc_state.length = base64_chars[ptr[0]] << 2;
screnc_state.length += base64_chars[ptr[1]] >> 4;
screnc_state.length += (base64_chars[ptr[1]] & 0x0f) << 12;
screnc_state.length += (base64_chars[ptr[2]] >> 2) << 8;
screnc_state.length += (base64_chars[ptr[2]] & 0x03) << 22;
screnc_state.length += base64_chars[ptr[3]] << 16;
screnc_state.length += (base64_chars[ptr[4]] << 2) << 24;
screnc_state.length += (base64_chars[ptr[5]] >> 4) << 24; |
e57fa318 |
state = HTML_JSDECODE_DECRYPT; |
a5f19645 |
in_screnc = TRUE; |
e57fa318 |
next_state = HTML_BAD_STATE; |
6a53bbdf |
/* for JS normalizer */
ptr[7] = '\n'; |
e57fa318 |
ptr += 8;
break;
case HTML_JSDECODE_DECRYPT: |
a5f19645 |
screnc_decode(ptr, &screnc_state);
if(!screnc_state.length) {
state = HTML_NORM;
next_state = HTML_BAD_STATE;
in_screnc = FALSE; |
e57fa318 |
break; |
a5f19645 |
} else {
state = HTML_NORM;
next_state = HTML_BAD_STATE; |
e57fa318 |
}
break; |
a92110df |
case HTML_RFC2397_TYPE:
if (*ptr == '\'') {
if (!escape && (quoted==SINGLE_QUOTED)) {
/* Early end of data detected. Error */
ptr++;
state = HTML_SKIP_WS;
tag_arg_length=0;
next_state = HTML_TAG_ARG;
} else {
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '"';
}
ptr++;
}
} else if (*ptr == '"') {
if (!escape && (quoted==DOUBLE_QUOTED)) {
/* Early end of data detected. Error */
ptr++;
state = HTML_SKIP_WS;
tag_arg_length=0;
next_state = HTML_TAG_ARG;
} else {
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '"';
}
ptr++;
}
} else if (isspace(*ptr) || (*ptr == '>')) {
if (quoted == NOT_QUOTED) {
/* Early end of data detected. Error */
state = HTML_SKIP_WS;
tag_arg_length=0;
next_state = HTML_TAG_ARG;
} else {
if (tag_val_length < HTML_STR_LENGTH) {
if (isspace(*ptr)) {
tag_val[tag_val_length++] = ' ';
} else {
tag_val[tag_val_length++] = '>';
}
}
state = HTML_SKIP_WS;
escape = FALSE;
quoted = NOT_QUOTED;
next_state = HTML_RFC2397_TYPE;
ptr++;
}
} else if (*ptr == ',') {
/* Beginning of data */
tag_val[tag_val_length] = '\0';
state = HTML_RFC2397_INIT;
escape = FALSE;
next_state = HTML_BAD_STATE;
ptr++; |
4e1127c5 |
|
a92110df |
} else {
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = tolower(*ptr);
}
ptr++;
}
if (*ptr == '\\') {
escape = TRUE;
} else {
escape = FALSE;
}
break;
case HTML_RFC2397_INIT: |
45d6cbd9 |
if (dirname) {
file_tmp_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
if (!file_tmp_o1) {
goto abort;
} |
58481352 |
snprintf(filename, 1024, "%s"PATHSEP"rfc2397", dirname); |
45d6cbd9 |
tmp_file = cli_gentemp(filename); |
5fc380f1 |
if(!tmp_file) {
goto abort;
} |
45d6cbd9 |
cli_dbgmsg("RFC2397 data file: %s\n", tmp_file);
file_tmp_o1->fd = open(tmp_file, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
free(tmp_file); |
e357da7b |
if (file_tmp_o1->fd < 0) { |
45d6cbd9 |
cli_dbgmsg("open failed: %s\n", filename);
goto abort;
}
file_tmp_o1->length = 0; |
4e1127c5 |
|
b9b47784 |
html_output_str(file_tmp_o1, (const unsigned char*)"From html-normalise\n", 20);
html_output_str(file_tmp_o1, (const unsigned char*)"Content-type: ", 14); |
45d6cbd9 |
if ((tag_val_length == 0) && (*tag_val == ';')) { |
b9b47784 |
html_output_str(file_tmp_o1, (const unsigned char*)"text/plain\n", 11); |
45d6cbd9 |
} |
b9b47784 |
html_output_str(file_tmp_o1, (const unsigned char*)tag_val, tag_val_length); |
d5a5fef9 |
html_output_c(file_tmp_o1, '\n'); |
45d6cbd9 |
if (strstr(tag_val, ";base64") != NULL) { |
b9b47784 |
html_output_str(file_tmp_o1, (const unsigned char*)"Content-transfer-encoding: base64\n", 34); |
45d6cbd9 |
} |
d5a5fef9 |
html_output_c(file_tmp_o1, '\n'); |
45d6cbd9 |
} else {
file_tmp_o1 = NULL; |
a92110df |
}
state = HTML_RFC2397_DATA;
binary = TRUE;
break;
case HTML_RFC2397_DATA:
if (*ptr == '&') {
state = HTML_CHAR_REF;
next_state = HTML_RFC2397_DATA;
ptr++;
} else if (*ptr == '%') {
length = 0;
value = 0;
state = HTML_ESCAPE_CHAR;
next_state = HTML_RFC2397_ESC;
ptr++;
} else if (*ptr == '\'') {
if (!escape && (quoted==SINGLE_QUOTED)) {
state = HTML_RFC2397_FINISH;
ptr++;
} else { |
d5a5fef9 |
html_output_c(file_tmp_o1, *ptr); |
a92110df |
ptr++;
}
} else if (*ptr == '\"') { |
379870fa |
if (!escape && (quoted==DOUBLE_QUOTED)) { |
a92110df |
state = HTML_RFC2397_FINISH;
ptr++;
} else { |
d5a5fef9 |
html_output_c(file_tmp_o1, *ptr); |
a92110df |
ptr++;
}
} else if (isspace(*ptr) || (*ptr == '>')) {
if (quoted == NOT_QUOTED) {
state = HTML_RFC2397_FINISH;
ptr++;
} else { |
d5a5fef9 |
html_output_c(file_tmp_o1, *ptr); |
a92110df |
ptr++;
}
} else { |
d5a5fef9 |
html_output_c(file_tmp_o1, *ptr); |
a92110df |
ptr++;
}
if (*ptr == '\\') {
escape = TRUE;
} else {
escape = FALSE;
}
break;
case HTML_RFC2397_FINISH: |
45d6cbd9 |
if(file_tmp_o1) {
html_output_flush(file_tmp_o1);
close(file_tmp_o1->fd);
free(file_tmp_o1); |
4b2400fd |
file_tmp_o1 = NULL; |
45d6cbd9 |
} |
a92110df |
state = HTML_SKIP_WS;
escape = FALSE;
quoted = NOT_QUOTED;
next_state = HTML_TAG_ARG;
binary = FALSE;
break;
case HTML_RFC2397_ESC:
if (length == 2) { |
d5a5fef9 |
html_output_c(file_tmp_o1, value); |
a92110df |
} else if (length == 1) { |
d5a5fef9 |
html_output_c(file_tmp_o1, '%');
html_output_c(file_tmp_o1, value+'0'); |
a92110df |
} else { |
d5a5fef9 |
html_output_c(file_tmp_o1, '%'); |
a92110df |
}
state = HTML_RFC2397_DATA; |
4e1127c5 |
break; |
a92110df |
case HTML_ESCAPE_CHAR:
value *= 16;
length++;
if (isxdigit(*ptr)) {
if (isdigit(*ptr)) {
value += (*ptr - '0');
} else {
value += (tolower(*ptr) - 'a' + 10);
}
} else {
state = next_state;
}
if (length == 2) {
state = next_state;
}
ptr++; |
4e1127c5 |
break; |
888f5794 |
}
} |
333d724d |
if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin)
/* end of line, append contents now, resume on next line */ |
7d4b5f16 |
html_tag_contents_append(&contents,href_contents_begin,ptr); |
333d724d |
ptrend = NULL; |
8be1d5a4 |
if(js_state) { |
6a53bbdf |
js_process(js_state, js_begin, js_end, line, ptr, in_script, dirname); |
8be1d5a4 |
js_begin = js_end = NULL;
if(!in_script) {
js_state = NULL;
}
} |
a5f19645 |
if(look_for_screnc && ptr_screnc) {
/* start found, and stuff before it already processed */
ptr = ptr_screnc;
ptr_screnc = NULL;
state = HTML_JSDECODE_LENGTH;
next_state = HTML_BAD_STATE;
continue;
} |
b3fc7f97 |
free(line);
ptr = line = cli_readchunk(stream_in, m_area, 8192); |
a5f19645 |
if (in_screnc) {
state = HTML_JSDECODE_DECRYPT;
next_state = HTML_BAD_STATE; |
e2354bdb |
} else if(look_for_screnc && !ptr_screnc &&
state != HTML_LOOKFOR_SCRENC) { |
a5f19645 |
saved_next_state = next_state;
next_state = state;
state = HTML_LOOKFOR_SCRENC;
} |
e2354bdb |
if(next_state == state) {
/* safeguard against infloop */
cli_dbgmsg("htmlnorm.c: next_state == state, changing next_state\n");
next_state = HTML_BAD_STATE;
} |
e57fa318 |
} |
4e1127c5 |
if(dconf_entconv) {
/* handle "unfinished" entitites */ |
3506ac49 |
size_t i; |
b0b8398b |
const char* normalized; |
3506ac49 |
entity_val[entity_val_length] = '\0';
normalized = entity_norm(&conv, entity_val);
if(normalized) {
for(i=0; i < strlen(normalized); i++) |
d5a5fef9 |
html_output_c(file_buff_o2, normalized[i]&0xff); |
3506ac49 |
}
else {
if(entity_val_length) { |
d5a5fef9 |
html_output_c(file_buff_o2, '&'); |
3506ac49 |
for(i=0; i < entity_val_length; i++) |
d5a5fef9 |
html_output_c(file_buff_o2, tolower(entity_val[i])); |
3506ac49 |
}
}
} |
e57fa318 |
retval = TRUE;
abort: |
333d724d |
if (in_form_action)
free(in_form_action); |
7d4b5f16 |
if (in_ahref) /* tag not closed, force closing */
html_tag_contents_done(hrefs, in_ahref, &contents); |
462e8e5e |
|
8be1d5a4 |
if(js_state) { |
fb6208fd |
/* output script so far */
cli_js_parse_done(js_state);
cli_js_output(js_state, dirname); |
8be1d5a4 |
cli_js_destroy(js_state);
js_state = NULL;
} |
e57fa318 |
html_tag_arg_free(&tag_args);
if (!m_area) {
fclose(stream_in); |
888f5794 |
} |
3d0ca3cf |
if (file_buff_o2) {
html_output_flush(file_buff_o2); |
e357da7b |
if(file_buff_o2->fd != -1)
close(file_buff_o2->fd); |
3d0ca3cf |
free(file_buff_o2);
} |
0664128a |
if(file_buff_text) {
html_output_flush(file_buff_text); |
e357da7b |
if(file_buff_text->fd != -1)
close(file_buff_text->fd); |
0664128a |
free(file_buff_text);
} |
4b2400fd |
if(file_tmp_o1) {
html_output_flush(file_tmp_o1);
close(file_tmp_o1->fd);
free(file_tmp_o1);
} |
e57fa318 |
return retval;
}
|
462e8e5e |
int html_normalise_mem(unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf) |
e57fa318 |
{
m_area_t m_area; |
b3fc7f97 |
|
e57fa318 |
m_area.buffer = in_buff;
m_area.length = in_size;
m_area.offset = 0; |
ba65fdc8 |
m_area.map = NULL; |
b3fc7f97 |
|
462e8e5e |
return cli_html_normalise(-1, &m_area, dirname, hrefs, dconf); |
888f5794 |
}
|
49cc1e3c |
int html_normalise_map(fmap_t *map, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf) |
888f5794 |
{ |
e57fa318 |
int retval=FALSE;
m_area_t m_area; |
084d19aa |
m_area.length = map->len;
m_area.offset = 0;
m_area.map = map;
retval = cli_html_normalise(-1, &m_area, dirname, hrefs, dconf); |
e57fa318 |
return retval; |
888f5794 |
}
|
32f7e1d7 |
int html_screnc_decode(fmap_t *map, const char *dirname) |
888f5794 |
{ |
32f7e1d7 |
int count, retval=FALSE; |
e57fa318 |
unsigned char *line, tmpstr[6];
unsigned char *ptr, filename[1024]; |
a5f19645 |
int ofd;
struct screnc_state screnc_state; |
32f7e1d7 |
m_area_t m_area; |
a5f19645 |
|
32f7e1d7 |
memset(&m_area, 0, sizeof(m_area));
m_area.length = map->len;
m_area.offset = 0;
m_area.map = map; |
a5f19645 |
|
58481352 |
snprintf((char*)filename, 1024, "%s"PATHSEP"screnc.html", dirname); |
b9b47784 |
ofd = open((const char*)filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR); |
a5f19645 |
|
e357da7b |
if (ofd < 0) { |
e57fa318 |
cli_dbgmsg("open failed: %s\n", filename);
return FALSE; |
888f5794 |
} |
a5f19645 |
|
32f7e1d7 |
while ((line = cli_readchunk(NULL, &m_area, 8192)) != NULL) { |
b9b47784 |
ptr = (unsigned char*)strstr((char*)line, "#@~^"); |
e57fa318 |
if (ptr) {
break; |
888f5794 |
} |
e57fa318 |
free(line);
}
if (!line) {
goto abort;
} |
a5f19645 |
|
e57fa318 |
/* Calculate the length of the encoded string */
ptr += 4;
count = 0;
do {
if (! *ptr) {
free(line); |
32f7e1d7 |
ptr = line = cli_readchunk(NULL, &m_area, 8192); |
e57fa318 |
if (!line) {
goto abort; |
888f5794 |
}
} |
a5f19645 |
if(count < 6)
tmpstr[count] = *ptr;
count++; |
e57fa318 |
ptr++; |
a5f19645 |
} while (count < 8);
memset(&screnc_state, 0, sizeof(screnc_state));
screnc_state.length = base64_chars[tmpstr[0]] << 2;
screnc_state.length += base64_chars[tmpstr[1]] >> 4;
screnc_state.length += (base64_chars[tmpstr[1]] & 0x0f) << 12;
screnc_state.length += (base64_chars[tmpstr[2]] >> 2) << 8;
screnc_state.length += (base64_chars[tmpstr[2]] & 0x03) << 22;
screnc_state.length += base64_chars[tmpstr[3]] << 16;
screnc_state.length += (base64_chars[tmpstr[4]] << 2) << 24;
screnc_state.length += (base64_chars[tmpstr[5]] >> 4) << 24;
|
048725d0 |
cli_writen(ofd, "<script>",strlen("<script>")); |
a5f19645 |
while (screnc_state.length && line) {
screnc_decode(ptr, &screnc_state); |
b9b47784 |
cli_writen(ofd, ptr, strlen((const char*)ptr)); |
e57fa318 |
free(line); |
a5f19645 |
if (screnc_state.length) { |
32f7e1d7 |
ptr = line = cli_readchunk(NULL, &m_area, 8192); |
888f5794 |
}
} |
048725d0 |
cli_writen(ofd, "</script>",strlen("</script>")); |
a5f19645 |
if(screnc_state.length) |
5cd3f734 |
cli_dbgmsg("html_screnc_decode: missing %u bytes\n",screnc_state.length); |
e57fa318 |
retval = TRUE; |
a5f19645 |
|
e57fa318 |
abort: |
a5f19645 |
close(ofd); |
e57fa318 |
return retval; |
888f5794 |
} |