... | ... |
@@ -1,3 +1,15 @@ |
1 |
+Mon Aug 24 22:09:12 CEST 2009 (tk) |
|
2 |
+---------------------------------- |
|
3 |
+ * libclamav: improve handling of PDF files (bb#1682) |
|
4 |
+ |
|
5 |
+Fri Aug 21 15:53:35 CEST 2009 (tk) |
|
6 |
+---------------------------------- |
|
7 |
+ * libclamav: handle relative offsets with cli_ac_data; fix offset logic |
|
8 |
+ |
|
9 |
+Fri Aug 21 02:17:11 CEST 2009 (acab) |
|
10 |
+------------------------------------ |
|
11 |
+ * libclamav/ishield.c: properly free() header |
|
12 |
+ |
|
1 | 13 |
Fri Aug 21 00:56:03 CEST 2009 (acab) |
2 | 14 |
------------------------------------ |
3 | 15 |
* build system: upgrade to autoconf 2.64 and automake 1.11 (bb#1528) |
... | ... |
@@ -173,7 +173,7 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine) |
173 | 173 |
if(!root) |
174 | 174 |
return ret; |
175 | 175 |
|
176 |
- if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN)) |
|
176 |
+ if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)) |
|
177 | 177 |
return ret; |
178 | 178 |
|
179 | 179 |
sret = cli_ac_scanbuff(buff, bread, NULL, NULL, NULL, engine->root[0], &mdata, 0, ret, NULL, AC_SCAN_FT, NULL); |
... | ... |
@@ -183,7 +183,7 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine) |
183 | 183 |
if(sret >= CL_TYPENO) { |
184 | 184 |
ret = sret; |
185 | 185 |
} else { |
186 |
- if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN)) |
|
186 |
+ if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)) |
|
187 | 187 |
return ret; |
188 | 188 |
|
189 | 189 |
decoded = (unsigned char *) cli_utf16toascii((char *) buff, bread); |
... | ... |
@@ -217,7 +217,7 @@ cli_file_t cli_filetype2(int desc, const struct cl_engine *engine) |
217 | 217 |
* However when detecting whether a file is HTML or not, we need exact conversion. |
218 | 218 |
* (just eliminating zeros and matching would introduce false positives */ |
219 | 219 |
if(encoding_normalize_toascii(&in_area, encoding, &out_area) >= 0 && out_area.length > 0) { |
220 |
- if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN)) |
|
220 |
+ if(cli_ac_initdata(&mdata, root->ac_partsigs, root->ac_lsigs, root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)) |
|
221 | 221 |
return ret; |
222 | 222 |
|
223 | 223 |
if(out_area.length > 0) { |
... | ... |
@@ -769,7 +769,7 @@ inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uin |
769 | 769 |
return 1; |
770 | 770 |
} |
771 | 771 |
|
772 |
-int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, uint8_t tracklen) |
|
772 |
+int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, uint32_t reloffsigs, uint8_t tracklen) |
|
773 | 773 |
{ |
774 | 774 |
unsigned int i; |
775 | 775 |
|
... | ... |
@@ -779,12 +779,24 @@ int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, |
779 | 779 |
return CL_ENULLARG; |
780 | 780 |
} |
781 | 781 |
|
782 |
- data->partsigs = partsigs; |
|
782 |
+ data->reloffsigs = reloffsigs; |
|
783 |
+ if(reloffsigs) { |
|
784 |
+ data->offset = (uint32_t *) cli_malloc(reloffsigs * 2 * sizeof(uint32_t)); |
|
785 |
+ if(!data->offset) { |
|
786 |
+ cli_errmsg("cli_ac_init: Can't allocate memory for data->offset\n"); |
|
787 |
+ return CL_EMEM; |
|
788 |
+ } |
|
789 |
+ for(i = 0; i < reloffsigs * 2; i += 2) |
|
790 |
+ data->offset[i] = CLI_OFF_NONE; |
|
791 |
+ } |
|
783 | 792 |
|
793 |
+ data->partsigs = partsigs; |
|
784 | 794 |
if(partsigs) { |
785 | 795 |
data->offmatrix = (int32_t ***) cli_calloc(partsigs, sizeof(int32_t **)); |
786 | 796 |
if(!data->offmatrix) { |
787 | 797 |
cli_errmsg("cli_ac_init: Can't allocate memory for data->offmatrix\n"); |
798 |
+ if(reloffsigs) |
|
799 |
+ free(data->offset); |
|
788 | 800 |
return CL_EMEM; |
789 | 801 |
} |
790 | 802 |
} |
... | ... |
@@ -795,6 +807,8 @@ int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, |
795 | 795 |
if(!data->lsigcnt) { |
796 | 796 |
if(partsigs) |
797 | 797 |
free(data->offmatrix); |
798 |
+ if(reloffsigs) |
|
799 |
+ free(data->offset); |
|
798 | 800 |
cli_errmsg("cli_ac_init: Can't allocate memory for data->lsigcnt\n"); |
799 | 801 |
return CL_EMEM; |
800 | 802 |
} |
... | ... |
@@ -803,17 +817,19 @@ int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, |
803 | 803 |
free(data->lsigcnt); |
804 | 804 |
if(partsigs) |
805 | 805 |
free(data->offmatrix); |
806 |
+ if(reloffsigs) |
|
807 |
+ free(data->offset); |
|
806 | 808 |
cli_errmsg("cli_ac_init: Can't allocate memory for data->lsigcnt[0]\n"); |
807 | 809 |
return CL_EMEM; |
808 | 810 |
} |
809 | 811 |
for(i = 1; i < lsigs; i++) |
810 | 812 |
data->lsigcnt[i] = data->lsigcnt[0] + 64 * i; |
811 |
- } |
|
812 |
- |
|
813 |
+ } |
|
814 |
+ |
|
813 | 815 |
return CL_SUCCESS; |
814 | 816 |
} |
815 | 817 |
|
816 |
-int cli_ac_caloff(struct cli_matcher *root, int fd) |
|
818 |
+int cli_ac_caloff(const struct cli_matcher *root, struct cli_ac_data *data, int fd) |
|
817 | 819 |
{ |
818 | 820 |
int ret; |
819 | 821 |
unsigned int i; |
... | ... |
@@ -824,8 +840,8 @@ int cli_ac_caloff(struct cli_matcher *root, int fd) |
824 | 824 |
for(i = 0; i < root->ac_reloff_num; i++) { |
825 | 825 |
patt = root->ac_reloff[i]; |
826 | 826 |
if(fd == -1) { |
827 |
- patt->offset_min = CLI_OFF_NONE; |
|
828 |
- } else if((ret = cli_caloff(NULL, &info, fd, root->type, patt->offdata, &patt->offset_min, &patt->offset_max))) { |
|
827 |
+ data->offset[patt->offset_min] = CLI_OFF_NONE; |
|
828 |
+ } else if((ret = cli_caloff(NULL, &info, fd, root->type, patt->offdata, &data->offset[patt->offset_min], &data->offset[patt->offset_max]))) { |
|
829 | 829 |
cli_errmsg("cli_ac_caloff: Can't calculate relative offset in signature for %s\n", patt->virname); |
830 | 830 |
if(info.exeinfo.section) |
831 | 831 |
free(info.exeinfo.section); |
... | ... |
@@ -859,6 +875,11 @@ void cli_ac_freedata(struct cli_ac_data *data) |
859 | 859 |
free(data->lsigcnt); |
860 | 860 |
data->lsigs = 0; |
861 | 861 |
} |
862 |
+ |
|
863 |
+ if(data && data->reloffsigs) { |
|
864 |
+ free(data->offset); |
|
865 |
+ data->reloffsigs = 0; |
|
866 |
+ } |
|
862 | 867 |
} |
863 | 868 |
|
864 | 869 |
inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, off_t offset, const cli_ctx *ctx) |
... | ... |
@@ -926,7 +947,7 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v |
926 | 926 |
while(patt) { |
927 | 927 |
bp = i + 1 - patt->depth; |
928 | 928 |
pt = patt; |
929 |
- /* |
|
929 |
+ /* |
|
930 | 930 |
while(pt) { |
931 | 931 |
if((pt->type && !(mode & AC_SCAN_FT)) || (!pt->type && !(mode & AC_SCAN_VIR))) { |
932 | 932 |
pt = pt->next_same; |
... | ... |
@@ -958,9 +979,16 @@ int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **v |
958 | 958 |
} |
959 | 959 |
realoff = offset + bp - pt->prefix_length; |
960 | 960 |
if(pt->offset_min != CLI_OFF_ANY && (!pt->sigid || pt->partno == 1)) { |
961 |
- if(pt->offset_max > realoff || pt->offset_min < realoff) { |
|
962 |
- pt = pt->next_same; |
|
963 |
- continue; |
|
961 |
+ if(pt->offdata[0] == CLI_OFF_ABSOLUTE) { |
|
962 |
+ if(pt->offset_max < realoff || pt->offset_min > realoff) { |
|
963 |
+ pt = pt->next_same; |
|
964 |
+ continue; |
|
965 |
+ } |
|
966 |
+ } else { |
|
967 |
+ if(mdata->offset[pt->offset_min] == CLI_OFF_NONE || mdata->offset[pt->offset_max] < realoff || mdata->offset[pt->offset_min] > realoff) { |
|
968 |
+ pt = pt->next_same; |
|
969 |
+ continue; |
|
970 |
+ } |
|
964 | 971 |
} |
965 | 972 |
} |
966 | 973 |
if(pt->sigid) { /* it's a partial signature */ |
... | ... |
@@ -1477,6 +1505,8 @@ int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hex |
1477 | 1477 |
return CL_EMEM; |
1478 | 1478 |
} |
1479 | 1479 |
root->ac_reloff[root->ac_reloff_num] = new; |
1480 |
+ new->offset_min = root->ac_reloff_num * 2; |
|
1481 |
+ new->offset_max = new->offset_min + 1; |
|
1480 | 1482 |
root->ac_reloff_num++; |
1481 | 1483 |
} |
1482 | 1484 |
|
... | ... |
@@ -33,8 +33,9 @@ |
33 | 33 |
|
34 | 34 |
struct cli_ac_data { |
35 | 35 |
int32_t ***offmatrix; |
36 |
- uint32_t partsigs, lsigs; |
|
36 |
+ uint32_t partsigs, lsigs, reloffsigs; |
|
37 | 37 |
uint32_t **lsigcnt; |
38 |
+ uint32_t *offset; |
|
38 | 39 |
}; |
39 | 40 |
|
40 | 41 |
struct cli_ac_alt { |
... | ... |
@@ -79,13 +80,13 @@ struct cli_ac_result { |
79 | 79 |
#include "matcher.h" |
80 | 80 |
|
81 | 81 |
int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern); |
82 |
-int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, uint8_t tracklen); |
|
82 |
+int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint32_t lsigs, uint32_t reloffsigs, uint8_t tracklen); |
|
83 | 83 |
int cli_ac_chklsig(const char *expr, const char *end, uint32_t *lsigcnt, unsigned int *cnt, uint64_t *ids, unsigned int parse_only); |
84 | 84 |
void cli_ac_freedata(struct cli_ac_data *data); |
85 | 85 |
int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, void **customdata, struct cli_ac_result **res, const struct cli_matcher *root, struct cli_ac_data *mdata, uint32_t offset, cli_file_t ftype, struct cli_matched_type **ftoffset, unsigned int mode, const cli_ctx *ctx); |
86 | 86 |
int cli_ac_buildtrie(struct cli_matcher *root); |
87 | 87 |
int cli_ac_init(struct cli_matcher *root, uint8_t mindepth, uint8_t maxdepth); |
88 |
-int cli_ac_caloff(struct cli_matcher *root, int fd); |
|
88 |
+int cli_ac_caloff(const struct cli_matcher *root, struct cli_ac_data *data, int fd); |
|
89 | 89 |
void cli_ac_free(struct cli_matcher *root); |
90 | 90 |
int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t rtype, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, const uint32_t *lsigid, unsigned int options); |
91 | 91 |
|
... | ... |
@@ -55,8 +55,12 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern, const |
55 | 55 |
cli_errmsg("cli_bm_addpatt: Can't calculate offset for signature %s\n", pattern->virname); |
56 | 56 |
return ret; |
57 | 57 |
} |
58 |
- if(pattern->offdata[0] != CLI_OFF_ANY && pattern->offdata[0] != CLI_OFF_ABSOLUTE) |
|
59 |
- root->bm_reloff_num++; |
|
58 |
+ if(pattern->offdata[0] != CLI_OFF_ANY) { |
|
59 |
+ if(pattern->offdata[0] == CLI_OFF_ABSOLUTE) |
|
60 |
+ root->bm_absoff_num++; |
|
61 |
+ else |
|
62 |
+ root->bm_reloff_num++; |
|
63 |
+ } |
|
60 | 64 |
|
61 | 65 |
#if BM_MIN_LENGTH == BM_BLOCK_SIZE |
62 | 66 |
/* try to load balance bm_suffix (at the cost of bm_shift) */ |
... | ... |
@@ -154,7 +158,7 @@ void cli_bm_free(struct cli_matcher *root) |
154 | 154 |
|
155 | 155 |
int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, uint32_t offset, int fd) |
156 | 156 |
{ |
157 |
- uint32_t i, j, off; |
|
157 |
+ uint32_t i, j, off, off_min, off_max; |
|
158 | 158 |
uint8_t found, pchain, shift; |
159 | 159 |
uint16_t idx, idxchk; |
160 | 160 |
struct cli_bm_patt *p; |
... | ... |
@@ -163,7 +167,6 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v |
163 | 163 |
struct cli_target_info info; |
164 | 164 |
int ret; |
165 | 165 |
|
166 |
- |
|
167 | 166 |
if(!root || !root->bm_shift) |
168 | 167 |
return CL_CLEAN; |
169 | 168 |
|
... | ... |
@@ -226,16 +229,19 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v |
226 | 226 |
if(found && p->length + p->prefix_length == j) { |
227 | 227 |
if(p->offset_min != CLI_OFF_ANY) { |
228 | 228 |
if(p->offdata[0] != CLI_OFF_ABSOLUTE) { |
229 |
- ret = cli_caloff(NULL, &info, fd, root->type, p->offdata, &p->offset_min, &p->offset_max); |
|
229 |
+ ret = cli_caloff(NULL, &info, fd, root->type, p->offdata, &off_min, &off_max); |
|
230 | 230 |
if(ret != CL_SUCCESS) { |
231 | 231 |
cli_errmsg("cli_bm_scanbuff: Can't calculate relative offset in signature for %s\n", p->virname); |
232 | 232 |
if(info.exeinfo.section) |
233 | 233 |
free(info.exeinfo.section); |
234 | 234 |
return ret; |
235 | 235 |
} |
236 |
+ } else { |
|
237 |
+ off_min = p->offset_min; |
|
238 |
+ off_max = p->offset_max; |
|
236 | 239 |
} |
237 | 240 |
off = offset + i - p->prefix_length - BM_MIN_LENGTH + BM_BLOCK_SIZE; |
238 |
- if(p->offset_max > off || p->offset_min < off) { |
|
241 |
+ if(off_max < off || off_min > off) { |
|
239 | 242 |
p = p->next; |
240 | 243 |
continue; |
241 | 244 |
} |
... | ... |
@@ -73,7 +73,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset, |
73 | 73 |
|
74 | 74 |
if(troot) { |
75 | 75 |
|
76 |
- if(!acdata && (ret = cli_ac_initdata(&mdata, troot->ac_partsigs, troot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))) |
|
76 |
+ if(!acdata && (ret = cli_ac_initdata(&mdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) |
|
77 | 77 |
return ret; |
78 | 78 |
|
79 | 79 |
if(troot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, troot, offset, -1)) != CL_VIRUS) |
... | ... |
@@ -86,7 +86,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset, |
86 | 86 |
return ret; |
87 | 87 |
} |
88 | 88 |
|
89 |
- if(!acdata && (ret = cli_ac_initdata(&mdata, groot->ac_partsigs, groot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))) |
|
89 |
+ if(!acdata && (ret = cli_ac_initdata(&mdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) |
|
90 | 90 |
return ret; |
91 | 91 |
|
92 | 92 |
if(groot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, groot, offset, -1)) != CL_VIRUS) |
... | ... |
@@ -366,11 +366,11 @@ int cli_scandesc(int desc, cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struc |
366 | 366 |
} |
367 | 367 |
|
368 | 368 |
if(!ftonly) |
369 |
- if((ret = cli_ac_caloff(groot, desc)) || (ret = cli_ac_initdata(&gdata, groot->ac_partsigs, groot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))) |
|
369 |
+ if((ret = cli_ac_initdata(&gdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)) || (ret = cli_ac_caloff(groot, &gdata, desc))) |
|
370 | 370 |
return ret; |
371 | 371 |
|
372 | 372 |
if(troot) { |
373 |
- if((ret = cli_ac_caloff(troot, desc)) || (ret = cli_ac_initdata(&tdata, troot->ac_partsigs, troot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))) { |
|
373 |
+ if((ret = cli_ac_initdata(&tdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)) || (ret = cli_ac_caloff(troot, &tdata, desc))) { |
|
374 | 374 |
if(!ftonly) |
375 | 375 |
cli_ac_freedata(&gdata); |
376 | 376 |
return ret; |
... | ... |
@@ -76,7 +76,7 @@ struct cli_matcher { |
76 | 76 |
struct cli_bm_patt **bm_suffix; |
77 | 77 |
struct cli_hashset md5_sizes_hs; |
78 | 78 |
uint32_t *soff, soff_len; /* for PE section sigs */ |
79 |
- uint32_t bm_patterns, bm_reloff_num; |
|
79 |
+ uint32_t bm_patterns, bm_reloff_num, bm_absoff_num; |
|
80 | 80 |
|
81 | 81 |
/* Extended Aho-Corasick */ |
82 | 82 |
uint32_t ac_partsigs, ac_nodes, ac_patterns, ac_lsigs; |
... | ... |
@@ -84,7 +84,7 @@ struct cli_matcher { |
84 | 84 |
struct cli_ac_node *ac_root, **ac_nodetable; |
85 | 85 |
struct cli_ac_patt **ac_pattable; |
86 | 86 |
struct cli_ac_patt **ac_reloff; |
87 |
- uint32_t ac_reloff_num; |
|
87 |
+ uint32_t ac_reloff_num, ac_absoff_num; |
|
88 | 88 |
uint8_t ac_mindepth, ac_maxdepth; |
89 | 89 |
|
90 | 90 |
uint16_t maxpatlen; |
... | ... |
@@ -52,6 +52,7 @@ static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $"; |
52 | 52 |
#include "pdf.h" |
53 | 53 |
#include "scanners.h" |
54 | 54 |
#include "fmap.h" |
55 |
+#include "str.h" |
|
55 | 56 |
|
56 | 57 |
#ifndef O_BINARY |
57 | 58 |
#define O_BINARY 0 |
... | ... |
@@ -67,7 +68,6 @@ static int flatedecode(unsigned char *buf, off_t len, int fout, cli_ctx *ctx); |
67 | 67 |
static int ascii85decode(const char *buf, off_t len, unsigned char *output); |
68 | 68 |
static const char *pdf_nextlinestart(const char *ptr, size_t len); |
69 | 69 |
static const char *pdf_nextobject(const char *ptr, size_t len); |
70 |
-static const char *cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns); |
|
71 | 70 |
|
72 | 71 |
int |
73 | 72 |
cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
... | ... |
@@ -83,6 +83,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
83 | 83 |
unsigned int files; |
84 | 84 |
struct stat statb; |
85 | 85 |
struct F_MAP *map; |
86 |
+ int opt_failed = 0; |
|
86 | 87 |
|
87 | 88 |
cli_dbgmsg("in cli_pdf(%s)\n", dir); |
88 | 89 |
|
... | ... |
@@ -146,7 +147,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
146 | 146 |
* q points to the end of the trailer section |
147 | 147 |
*/ |
148 | 148 |
trailerlength = (long)(q - trailerstart); |
149 |
- if(cli_pmemstr(trailerstart, trailerlength, "Encrypt", 7)) { |
|
149 |
+ if(cli_memstr(trailerstart, trailerlength, "Encrypt", 7)) { |
|
150 | 150 |
/* |
151 | 151 |
* This tends to mean that the file is, in effect, read-only |
152 | 152 |
* http://www.cs.cmu.edu/~dst/Adobe/Gallery/anon21jul01-pdf-encryption.txt |
... | ... |
@@ -239,7 +240,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
239 | 239 |
|
240 | 240 |
bytesleft -= (off_t)((q - p) + 3); |
241 | 241 |
objstart = p = &q[3]; |
242 |
- objend = cli_pmemstr(p, bytesleft, "endobj", 6); |
|
242 |
+ objend = cli_memstr(p, bytesleft, "endobj", 6); |
|
243 | 243 |
if(objend == NULL) { |
244 | 244 |
cli_dbgmsg("cli_pdf: No matching endobj\n"); |
245 | 245 |
break; |
... | ... |
@@ -249,7 +250,7 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
249 | 249 |
objlen = (unsigned long)(objend - objstart); |
250 | 250 |
|
251 | 251 |
/* Is this object a stream? */ |
252 |
- streamstart = cli_pmemstr(objstart, objlen, "stream", 6); |
|
252 |
+ streamstart = cli_memstr(objstart, objlen, "stream", 6); |
|
253 | 253 |
if(streamstart == NULL) |
254 | 254 |
continue; |
255 | 255 |
|
... | ... |
@@ -275,7 +276,6 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
275 | 275 |
*/ |
276 | 276 |
if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) { |
277 | 277 |
const char *r, *nq; |
278 |
- int opt_failed = 0; |
|
279 | 278 |
size_t len; |
280 | 279 |
char b[14]; |
281 | 280 |
|
... | ... |
@@ -287,10 +287,15 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
287 | 287 |
length = (unsigned long)strlen(b); |
288 | 288 |
/* optimization: assume objects |
289 | 289 |
* are sequential */ |
290 |
- nq = q; |
|
291 |
- len = buf + size - q; |
|
290 |
+ if(!opt_failed) { |
|
291 |
+ nq = q; |
|
292 |
+ len = buf + size - q; |
|
293 |
+ } else { |
|
294 |
+ nq = buf; |
|
295 |
+ len = q - buf; |
|
296 |
+ } |
|
292 | 297 |
do { |
293 |
- r = cli_pmemstr(nq, len, b, length); |
|
298 |
+ r = cli_memstr(nq, len, b, length); |
|
294 | 299 |
if (r > nq) { |
295 | 300 |
const char x = *(r-1); |
296 | 301 |
if (x == '\n' || x=='\r') { |
... | ... |
@@ -299,8 +304,8 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
299 | 299 |
} |
300 | 300 |
} |
301 | 301 |
if (r) { |
302 |
- len -= r+1-nq; |
|
303 |
- nq = r + 1; |
|
302 |
+ len -= r + length - nq; |
|
303 |
+ nq = r + length; |
|
304 | 304 |
} else if (!opt_failed) { |
305 | 305 |
/* we failed optimized match, |
306 | 306 |
* try matching from the beginning |
... | ... |
@@ -382,9 +387,9 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
382 | 382 |
break; |
383 | 383 |
len -= (int)(q - streamstart); |
384 | 384 |
streamstart = q; |
385 |
- streamend = cli_pmemstr(streamstart, len, "endstream\n", 10); |
|
385 |
+ streamend = cli_memstr(streamstart, len, "endstream\n", 10); |
|
386 | 386 |
if(streamend == NULL) { |
387 |
- streamend = cli_pmemstr(streamstart, len, "endstream\r", 10); |
|
387 |
+ streamend = cli_memstr(streamstart, len, "endstream\r", 10); |
|
388 | 388 |
if(streamend == NULL) { |
389 | 389 |
cli_dbgmsg("cli_pdf: No endstream\n"); |
390 | 390 |
break; |
... | ... |
@@ -532,6 +537,10 @@ cli_pdf(const char *dir, int desc, cli_ctx *ctx, off_t offset) |
532 | 532 |
rc = CL_EUNLINK; |
533 | 533 |
break; |
534 | 534 |
} |
535 |
+ if(cli_updatelimits(ctx, real_streamlen) != CL_SUCCESS) { |
|
536 |
+ rc = CL_CLEAN; |
|
537 |
+ break; |
|
538 |
+ } |
|
535 | 539 |
continue; |
536 | 540 |
} else |
537 | 541 |
tableInsert(md5table, md5str, 1); |
... | ... |
@@ -714,7 +723,7 @@ ascii85decode(const char *buf, off_t len, unsigned char *output) |
714 | 714 |
int quintet = 0; |
715 | 715 |
int ret = 0; |
716 | 716 |
|
717 |
- if(cli_pmemstr(buf, len, "~>", 2) == NULL) |
|
717 |
+ if(cli_memstr(buf, len, "~>", 2) == NULL) |
|
718 | 718 |
cli_dbgmsg("cli_pdf: ascii85decode: no EOF marker found\n"); |
719 | 719 |
|
720 | 720 |
ptr = buf; |
... | ... |
@@ -842,43 +851,3 @@ pdf_nextobject(const char *ptr, size_t len) |
842 | 842 |
} |
843 | 843 |
return NULL; |
844 | 844 |
} |
845 |
- |
|
846 |
-/* |
|
847 |
- * like cli_memstr - but returns the location of the match |
|
848 |
- * FIXME: need a case insensitive version |
|
849 |
- */ |
|
850 |
-static const char * |
|
851 |
-cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns) |
|
852 |
-{ |
|
853 |
- const char *pt, *hay; |
|
854 |
- size_t n; |
|
855 |
- |
|
856 |
- if(haystack == needle) |
|
857 |
- return haystack; |
|
858 |
- |
|
859 |
- if(hs < ns) |
|
860 |
- return NULL; |
|
861 |
- |
|
862 |
- if(memcmp(haystack, needle, ns) == 0) |
|
863 |
- return haystack; |
|
864 |
- |
|
865 |
- pt = hay = haystack; |
|
866 |
- n = hs; |
|
867 |
- |
|
868 |
- while((pt = memchr(hay, needle[0], n)) != NULL) { |
|
869 |
- n -= (size_t)(pt - hay); |
|
870 |
- if(n < ns) |
|
871 |
- break; |
|
872 |
- |
|
873 |
- if(memcmp(pt, needle, ns) == 0) |
|
874 |
- return pt; |
|
875 |
- |
|
876 |
- if(hay == pt) { |
|
877 |
- n--; |
|
878 |
- hay++; |
|
879 |
- } else |
|
880 |
- hay = pt; |
|
881 |
- } |
|
882 |
- |
|
883 |
- return NULL; |
|
884 |
-} |
... | ... |
@@ -2152,7 +2152,7 @@ int cl_engine_compile(struct cl_engine *engine) |
2152 | 2152 |
if((root = engine->root[i])) { |
2153 | 2153 |
if((ret = cli_ac_buildtrie(root))) |
2154 | 2154 |
return ret; |
2155 |
- cli_dbgmsg("matcher[%u]: %s: AC sigs: %u (reloff: %u) BM sigs: %u (reloff: %u) %s\n", i, cli_mtargets[i].name, root->ac_patterns, root->ac_reloff_num, root->bm_patterns, root->bm_reloff_num, root->ac_only ? "(ac_only mode)" : ""); |
|
2155 |
+ cli_dbgmsg("matcher[%u]: %s: AC sigs: %u (reloff: %u, absoff: %u) BM sigs: %u (reloff: %u, absoff: %u) %s\n", i, cli_mtargets[i].name, root->ac_patterns, root->ac_reloff_num, root->ac_absoff_num, root->bm_patterns, root->bm_reloff_num, root->bm_absoff_num, root->ac_only ? "(ac_only mode)" : ""); |
|
2156 | 2156 |
} |
2157 | 2157 |
} |
2158 | 2158 |
|
... | ... |
@@ -289,7 +289,7 @@ int regex_list_match(struct regex_matcher* matcher,char* real_url,const char* di |
289 | 289 |
buffer[buffer_len]=0; |
290 | 290 |
cli_dbgmsg("Looking up in regex_list: %s\n", buffer); |
291 | 291 |
|
292 |
- if((rc = cli_ac_initdata(&mdata, 0, 0, CLI_DEFAULT_AC_TRACKLEN))) |
|
292 |
+ if((rc = cli_ac_initdata(&mdata, 0, 0, 0, CLI_DEFAULT_AC_TRACKLEN))) |
|
293 | 293 |
return rc; |
294 | 294 |
|
295 | 295 |
bufrev = cli_strdup(buffer); |
... | ... |
@@ -1062,10 +1062,10 @@ static int cli_scanscript(int desc, cli_ctx *ctx) |
1062 | 1062 |
text_normalize_init(&state, normalized, SCANBUFF + maxpatlen); |
1063 | 1063 |
ret = CL_CLEAN; |
1064 | 1064 |
|
1065 |
- if ((ret = cli_ac_initdata(&tmdata, troot->ac_partsigs, troot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))) |
|
1065 |
+ if ((ret = cli_ac_initdata(&tmdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) |
|
1066 | 1066 |
return ret; |
1067 | 1067 |
|
1068 |
- if ((ret = cli_ac_initdata(&gmdata, groot->ac_partsigs, groot->ac_lsigs, CLI_DEFAULT_AC_TRACKLEN))) { |
|
1068 |
+ if ((ret = cli_ac_initdata(&gmdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) { |
|
1069 | 1069 |
cli_ac_freedata(&tmdata); |
1070 | 1070 |
return ret; |
1071 | 1071 |
} |
... | ... |
@@ -369,37 +369,33 @@ char *cli_strtokbuf(const char *input, int fieldno, const char *delim, char *out |
369 | 369 |
return output; |
370 | 370 |
} |
371 | 371 |
|
372 |
-const char *cli_memstr(const char *haystack, int hs, const char *needle, int ns) |
|
372 |
+const char *cli_memstr(const char *haystack, unsigned int hs, const char *needle, unsigned int ns) |
|
373 | 373 |
{ |
374 |
- const char *pt, *hay; |
|
375 |
- int n; |
|
374 |
+ unsigned int i, s1, s2; |
|
376 | 375 |
|
377 |
- |
|
378 |
- if(hs < ns) |
|
376 |
+ if(!hs || !ns || hs < ns) |
|
379 | 377 |
return NULL; |
380 | 378 |
|
381 |
- if(haystack == needle) |
|
382 |
- return haystack; |
|
383 |
- |
|
384 |
- if(!memcmp(haystack, needle, ns)) |
|
379 |
+ if(needle == haystack) |
|
385 | 380 |
return haystack; |
386 | 381 |
|
387 |
- pt = hay = haystack; |
|
388 |
- n = hs; |
|
382 |
+ if(ns == 1) |
|
383 |
+ return memchr(haystack, needle[0], hs); |
|
389 | 384 |
|
390 |
- while((pt = memchr(hay, needle[0], n)) != NULL) { |
|
391 |
- n -= (int) (pt - hay); |
|
392 |
- if(n < ns) |
|
393 |
- break; |
|
394 |
- |
|
395 |
- if(!memcmp(pt, needle, ns)) |
|
396 |
- return pt; |
|
397 |
- |
|
398 |
- if(hay == pt) { |
|
399 |
- n--; |
|
400 |
- hay++; |
|
385 |
+ if(needle[0] == needle[1]) { |
|
386 |
+ s1 = 2; |
|
387 |
+ s2 = 1; |
|
388 |
+ } else { |
|
389 |
+ s1 = 1; |
|
390 |
+ s2 = 2; |
|
391 |
+ } |
|
392 |
+ for(i = 0; i <= hs - ns; ) { |
|
393 |
+ if(needle[1] != haystack[i + 1]) { |
|
394 |
+ i += s1; |
|
401 | 395 |
} else { |
402 |
- hay = pt; |
|
396 |
+ if((needle[0] == haystack[i]) && !memcmp(needle + 2, haystack + i + 2, ns - 2)) |
|
397 |
+ return &haystack[i]; |
|
398 |
+ i += s2; |
|
403 | 399 |
} |
404 | 400 |
} |
405 | 401 |
|
... | ... |
@@ -42,7 +42,7 @@ int cli_hex2num(const char *hex); |
42 | 42 |
char *cli_str2hex(const char *string, unsigned int len); |
43 | 43 |
char *cli_utf16toascii(const char *str, unsigned int length); |
44 | 44 |
char *cli_strtokbuf(const char *input, int fieldno, const char *delim, char *output); |
45 |
-const char *cli_memstr(const char *haystack, int hs, const char *needle, int ns); |
|
45 |
+const char *cli_memstr(const char *haystack, unsigned int hs, const char *needle, unsigned int ns); |
|
46 | 46 |
char *cli_strrcpy(char *dest, const char *source); |
47 | 47 |
size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens); |
48 | 48 |
int cli_isnumber(const char *str); |
... | ... |
@@ -76,7 +76,7 @@ START_TEST (test_ac_scanbuff) { |
76 | 76 |
ret = cli_ac_buildtrie(root); |
77 | 77 |
fail_unless(ret == CL_SUCCESS, "cli_ac_buildtrie() failed"); |
78 | 78 |
|
79 |
- ret = cli_ac_initdata(&mdata, root->ac_partsigs, 0, CLI_DEFAULT_AC_TRACKLEN); |
|
79 |
+ ret = cli_ac_initdata(&mdata, root->ac_partsigs, 0, 0, CLI_DEFAULT_AC_TRACKLEN); |
|
80 | 80 |
fail_unless(ret == CL_SUCCESS, "cli_ac_initdata() failed"); |
81 | 81 |
|
82 | 82 |
for(i = 0; ac_testdata[i].data; i++) { |