Browse code

fuzz - 12166 - Fix for 4-byte out of bounds write wherein the an invalid struct pointer member variable is set to zero. The fix adds bounds checking to the Uniq storage 'add' function as well as error code checks. Included a lot of new inline documentation.

Micah Snyder authored on 2019/01/23 04:05:05
Showing 10 changed files
... ...
@@ -101,6 +101,7 @@ CLAMAV_PRIVATE {
101 101
     cli_ppt_vba_read;
102 102
     cli_wm_readdir;
103 103
     cli_wm_decrypt_macro;
104
+    cli_free_vba_project;
104 105
     cli_readn;
105 106
     cli_str2hex;
106 107
     cli_hashfile;
... ...
@@ -3,9 +3,9 @@
3 3
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
4 4
  *
5 5
  *  Authors: Trog
6
- * 
6
+ *
7 7
  *  Summary: Extract component parts of OLE2 files (e.g. MS Office Documents).
8
- * 
8
+ *
9 9
  *  Acknowledgements: Some ideas and algorithms were based upon OpenOffice and libgsf.
10 10
  *
11 11
  *  This program is free software; you can redistribute it and/or modify
... ...
@@ -822,10 +822,18 @@ handler_writefile(ole2_header_t * hdr, property_t * prop, const char *dir, cli_c
822 822
         return CL_SUCCESS;
823 823
     }
824 824
     name = get_property_name2(prop->name, prop->name_size);
825
-    if (name)
826
-        cnt = uniq_add(hdr->U, name, strlen(name), &hash);
827
-    else
828
-        cnt = uniq_add(hdr->U, NULL, 0, &hash);
825
+    if (name) {
826
+        if (CL_SUCCESS != uniq_add(hdr->U, name, strlen(name), &hash, &cnt)) {
827
+            free(name);
828
+            cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n");
829
+            return CL_BREAK;
830
+        }
831
+    } else {
832
+        if (CL_SUCCESS != uniq_add(hdr->U, NULL, 0, &hash, &cnt)) {
833
+            cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n");
834
+            return CL_BREAK;
835
+        }
836
+    }
829 837
     snprintf(newname, sizeof(newname), "%s" PATHSEP "%s_%u", dir, hash, cnt);
830 838
     newname[sizeof(newname) - 1] = '\0';
831 839
     cli_dbgmsg("OLE2 [handler_writefile]: Dumping '%s' to '%s'\n", name ? name : "<empty>", newname);
... ...
@@ -1215,7 +1215,8 @@ static int vba_scandata(const unsigned char *data, unsigned int len, cli_ctx *ct
1215 1215
 
1216 1216
 static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1217 1217
 {
1218
-    int ret = CL_CLEAN, i, j, fd, data_len, hasmacros = 0;
1218
+    cl_error_t ret = CL_CLEAN;
1219
+    int i, j, fd, data_len, hasmacros = 0;
1219 1220
     vba_project_t *vba_project;
1220 1221
     DIR *dd;
1221 1222
     struct dirent *dent;
... ...
@@ -1229,15 +1230,19 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1229 1229
     char *fullname, vbaname[1024];
1230 1230
     unsigned char *data;
1231 1231
     char *hash;
1232
-    uint32_t hashcnt;
1232
+    uint32_t hashcnt           = 0;
1233 1233
     unsigned int viruses_found = 0;
1234 1234
 
1235 1235
     cli_dbgmsg("VBADir: %s\n", dirname);
1236
-    hashcnt = uniq_get(U, "_vba_project", 12, NULL);
1237
-    while (hashcnt--)
1238
-    {
1239
-        if (!(vba_project = (vba_project_t *)cli_vba_readdir(dirname, U, hashcnt)))
1236
+    if (CL_SUCCESS != (ret = uniq_get(U, "_vba_project", 12, NULL, &hashcnt))) {
1237
+        cli_dbgmsg("VBADir: uniq_get('_vba_project') failed with ret code (%d)!\n", ret);
1238
+        return ret;
1239
+    }
1240
+    while (hashcnt) {
1241
+        if (!(vba_project = (vba_project_t *)cli_vba_readdir(dirname, U, hashcnt))) {
1242
+            hashcnt--;
1240 1243
             continue;
1244
+        }
1241 1245
 
1242 1246
         for (i = 0; i < vba_project->count; i++)
1243 1247
         {
... ...
@@ -1246,8 +1251,10 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1246 1246
                 snprintf(vbaname, 1024, "%s" PATHSEP "%s_%u", vba_project->dir, vba_project->name[i], j);
1247 1247
                 vbaname[sizeof(vbaname) - 1] = '\0';
1248 1248
                 fd = open(vbaname, O_RDONLY | O_BINARY);
1249
-                if (fd == -1)
1249
+                if (fd == -1) {
1250
+                    hashcnt--;
1250 1251
                     continue;
1252
+                }
1251 1253
                 cli_dbgmsg("VBADir: Decompress VBA project '%s_%u'\n", vba_project->name[i], j);
1252 1254
                 data = (unsigned char *)cli_vba_inflate(fd, vba_project->offset[i], &data_len);
1253 1255
                 close(fd);
... ...
@@ -1299,29 +1306,30 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1299 1299
             }
1300 1300
         }
1301 1301
 
1302
-        free(vba_project->name);
1303
-        free(vba_project->colls);
1304
-        free(vba_project->dir);
1305
-        free(vba_project->offset);
1306
-        free(vba_project);
1302
+        cli_free_vba_project(vba_project);
1303
+        vba_project = NULL;
1304
+
1307 1305
         if (ret == CL_VIRUS && !SCAN_ALLMATCHES)
1308 1306
             break;
1307
+
1308
+        hashcnt--;
1309 1309
     }
1310 1310
 
1311
-    if ((ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) &&
1312
-        (hashcnt = uniq_get(U, "powerpoint document", 19, &hash)))
1313
-    {
1314
-        while (hashcnt--)
1315
-        {
1311
+    if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
1312
+        if (CL_SUCCESS != (ret = uniq_get(U, "powerpoint document", 19, &hash, &hashcnt))) {
1313
+            cli_dbgmsg("VBADir: uniq_get('powerpoint document') failed with ret code (%d)!\n", ret);
1314
+            return ret;
1315
+        }
1316
+        while (hashcnt) {
1316 1317
             snprintf(vbaname, 1024, "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1317 1318
             vbaname[sizeof(vbaname) - 1] = '\0';
1318 1319
             fd = open(vbaname, O_RDONLY | O_BINARY);
1319
-            if (fd == -1)
1320
+            if (fd == -1) {
1321
+                hashcnt--;
1320 1322
                 continue;
1321
-            if ((fullname = cli_ppt_vba_read(fd, ctx)))
1322
-            {
1323
-                if (cli_scandir(fullname, ctx) == CL_VIRUS)
1324
-                {
1323
+            }
1324
+            if ((fullname = cli_ppt_vba_read(fd, ctx))) {
1325
+                if (cli_scandir(fullname, ctx) == CL_VIRUS) {
1325 1326
                     ret = CL_VIRUS;
1326 1327
                     viruses_found++;
1327 1328
                 }
... ...
@@ -1330,23 +1338,28 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1330 1330
                 free(fullname);
1331 1331
             }
1332 1332
             close(fd);
1333
+            hashcnt--;
1333 1334
         }
1334 1335
     }
1335 1336
 
1336
-    if ((ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) &&
1337
-        (hashcnt = uniq_get(U, "worddocument", 12, &hash)))
1338
-    {
1339
-        while (hashcnt--)
1340
-        {
1337
+    if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
1338
+        if (CL_SUCCESS != (ret = uniq_get(U, "worddocument", 12, &hash, &hashcnt))) {
1339
+            cli_dbgmsg("VBADir: uniq_get('worddocument') failed with ret code (%d)!\n", ret);
1340
+            return ret;
1341
+        }
1342
+        while (hashcnt) {
1341 1343
             snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1342 1344
             vbaname[sizeof(vbaname) - 1] = '\0';
1343 1345
             fd = open(vbaname, O_RDONLY | O_BINARY);
1344
-            if (fd == -1)
1346
+            if (fd == -1) {
1347
+                hashcnt--;
1345 1348
                 continue;
1349
+            }
1346 1350
 
1347 1351
             if (!(vba_project = (vba_project_t *)cli_wm_readdir(fd)))
1348 1352
             {
1349 1353
                 close(fd);
1354
+                hashcnt--;
1350 1355
                 continue;
1351 1356
             }
1352 1357
 
... ...
@@ -1379,20 +1392,16 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1379 1379
             }
1380 1380
 
1381 1381
             close(fd);
1382
-            free(vba_project->name);
1383
-            free(vba_project->colls);
1384
-            free(vba_project->dir);
1385
-            free(vba_project->offset);
1386
-            free(vba_project->key);
1387
-            free(vba_project->length);
1388
-            free(vba_project);
1389
-            if (ret == CL_VIRUS)
1390
-            {
1382
+            cli_free_vba_project(vba_project);
1383
+            vba_project = NULL;
1384
+
1385
+            if (ret == CL_VIRUS) {
1391 1386
                 if (SCAN_ALLMATCHES)
1392 1387
                     viruses_found++;
1393 1388
                 else
1394 1389
                     break;
1395 1390
             }
1391
+            hashcnt--;
1396 1392
         }
1397 1393
     }
1398 1394
 
... ...
@@ -1401,11 +1410,12 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1401 1401
 
1402 1402
 #if HAVE_JSON
1403 1403
     /* JSON Output Summary Information */
1404
-    if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL))
1405
-    {
1406
-        hashcnt = uniq_get(U, "_5_summaryinformation", 21, &hash);
1407
-        while (hashcnt--)
1408
-        {
1404
+    if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
1405
+        if (CL_SUCCESS != (ret = uniq_get(U, "_5_summaryinformation", 21, &hash, &hashcnt))) {
1406
+            cli_dbgmsg("VBADir: uniq_get('_5_summaryinformation') failed with ret code (%d)!\n", ret);
1407
+            return ret;
1408
+        }
1409
+        while (hashcnt) {
1409 1410
             snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1410 1411
             vbaname[sizeof(vbaname) - 1] = '\0';
1411 1412
 
... ...
@@ -1417,11 +1427,14 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1417 1417
                 cli_ole2_summary_json(ctx, fd, 0);
1418 1418
                 close(fd);
1419 1419
             }
1420
+            hashcnt--;
1420 1421
         }
1421 1422
 
1422
-        hashcnt = uniq_get(U, "_5_documentsummaryinformation", 29, &hash);
1423
-        while (hashcnt--)
1424
-        {
1423
+        if (CL_SUCCESS != (ret = uniq_get(U, "_5_documentsummaryinformation", 29, &hash, &hashcnt))) {
1424
+            cli_dbgmsg("VBADir: uniq_get('_5_documentsummaryinformation') failed with ret code (%d)!\n", ret);
1425
+            return ret;
1426
+        }
1427
+        while (hashcnt) {
1425 1428
             snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1426 1429
             vbaname[sizeof(vbaname) - 1] = '\0';
1427 1430
 
... ...
@@ -1433,14 +1446,17 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1433 1433
                 cli_ole2_summary_json(ctx, fd, 1);
1434 1434
                 close(fd);
1435 1435
             }
1436
+            hashcnt--;
1436 1437
         }
1437 1438
     }
1438 1439
 #endif
1439 1440
 
1440 1441
     /* Check directory for embedded OLE objects */
1441
-    hashcnt = uniq_get(U, "_1_ole10native", 14, &hash);
1442
-    while (hashcnt--)
1443
-    {
1442
+    if (CL_SUCCESS != (ret = uniq_get(U, "_1_ole10native", 14, &hash, &hashcnt))) {
1443
+        cli_dbgmsg("VBADir: uniq_get('_1_ole10native') failed with ret code (%d)!\n", ret);
1444
+        return ret;
1445
+    }
1446
+    while (hashcnt) {
1444 1447
         snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1445 1448
         vbaname[sizeof(vbaname) - 1] = '\0';
1446 1449
 
... ...
@@ -1452,6 +1468,7 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1452 1452
             if (ret != CL_CLEAN && !(ret == CL_VIRUS && SCAN_ALLMATCHES))
1453 1453
                 return ret;
1454 1454
         }
1455
+        hashcnt--;
1455 1456
     }
1456 1457
 
1457 1458
     /* ACAB: since we now hash filenames and handle collisions we
... ...
@@ -46,6 +46,7 @@ struct uniq *uniq_init(uint32_t count) {
46 46
     uniq_free(U);
47 47
     return NULL;
48 48
   }
49
+    U->max_unique_items = count;
49 50
 
50 51
   return U;
51 52
 }
... ...
@@ -55,18 +56,39 @@ void uniq_free(struct uniq *U) {
55 55
   free(U);
56 56
 }
57 57
 
58
-uint32_t uniq_add(struct uniq *U, const char *key, uint32_t key_len, char **rhash) {
58
+cl_error_t uniq_add(struct uniq *U, const char *item, uint32_t item_len, char **rhash, uint32_t *count)
59
+{
60
+  cl_error_t status = CL_EARG;
59 61
   unsigned int i;
60 62
   uint8_t digest[16];
61 63
   struct UNIQMD5 *m = NULL;
62 64
 
63
-  cl_hash_data("md5", key, key_len, digest, NULL);
65
+    if (!U) {
66
+        /* Invalid args */
67
+        goto done;
68
+    }
69
+
70
+    /* Uniq adds are limited by the maximum allocated in uniq_init(). */
71
+    if (U->cur_unique_items >= U->max_unique_items) {
72
+        /* Attempted to add more uniq items than may be stored. */
73
+        status = CL_EMAXSIZE;
74
+        goto done;
75
+    }
76
+
77
+    /* Make a hash of the item string */
78
+    if (NULL == cl_hash_data("md5", item, item_len, digest, NULL)) {
79
+        /* Failed to create hash of item. */
80
+        status = CL_EFORMAT;
81
+        goto done;
82
+    }
64 83
 
84
+    /* Check for md5 digest match in md5 collection */
65 85
   if(U->items && U->md5s[U->idx[*digest]].md5[0]==*digest)
66 86
     for(m=&U->md5s[U->idx[*digest]]; m; m=m->next)
67 87
       if(!memcmp(&digest[1], &m->md5[1], 15)) break;
68 88
   
69 89
   if(!m) {
90
+        /* No match. Add new md5 to list */
70 91
     const char HEX[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
71 92
 
72 93
     m = &U->md5s[U->items];
... ...
@@ -85,27 +107,85 @@ uint32_t uniq_add(struct uniq *U, const char *key, uint32_t key_len, char **rhas
85 85
       m->md5[i] = digest[i];
86 86
     }
87 87
     m->name[32] = '\0';
88
+
89
+        /* Increment # of unique items. */
90
+        U->cur_unique_items++;
88 91
   }
89 92
 
93
+    /* Increment total # of items. */
90 94
   U->items++;
95
+
96
+    /* Increment # items matching this md5 digest (probably just this 1). */
97
+    m->count++;
98
+
99
+    /* Pass back the ascii hash, if requested. */
91 100
   if(rhash) *rhash = m->name;
92
-  return m->count++;
101
+
102
+    /* Pass back the count, if requested. */
103
+    if (count) *count = m->count;
104
+
105
+    status = CL_SUCCESS;
106
+
107
+done:
108
+    return status;
93 109
 }
94 110
 
95
-uint32_t uniq_get(struct uniq *U, const char *key, uint32_t key_len, char **rhash) {
111
+cl_error_t uniq_get(struct uniq *U, const char *item, uint32_t item_len, char **rhash, uint32_t *count)
112
+{
113
+  cl_error_t status = CL_EARG;
96 114
   uint8_t digest[16];
97 115
   struct UNIQMD5 *m = NULL;
116
+    uint32_t idx      = 0;
98 117
 
99
-  cl_hash_data("md5", key, key_len, digest, NULL);
118
+    if (!U || !count) {
119
+        /* Invalid args */
120
+        goto done;
121
+    }
100 122
 
101
-  if(!U->items || U->md5s[U->idx[*digest]].md5[0]!=*digest)
102
-    return 0;
123
+    *count = 0;
103 124
 
104
-  for(m=&U->md5s[U->idx[*digest]]; m; m=m->next) {
105
-    if(memcmp(&digest[1], &m->md5[1], 15)) continue;
106
-    if(rhash) *rhash = m->name;
107
-    return m->count;
125
+    if (!U->items) {
126
+        goto not_found;
127
+    }
128
+
129
+    /* Make a hash of the item string */
130
+    if (NULL == cl_hash_data("md5", item, item_len, digest, NULL)) {
131
+        /* Failed to create hash of item. */
132
+        status = CL_EFORMAT;
133
+        goto done;
134
+    }
135
+
136
+    /* Get the md5s array index for the bucket list head. */
137
+    idx = U->idx[*digest];
138
+    m   = &U->md5s[idx];
139
+
140
+    if (m->md5[0] != *digest) {
141
+        /*
142
+         * If the first two bytes in the digest doesn't actually match,
143
+         * then the item has never been added.
144
+         * This is a common scenario because the idx table is initialized
145
+         * to 0's.
146
+         */
147
+        goto not_found;
148
+    }
149
+
150
+    do {
151
+        if (0 == memcmp(&digest[1], &m->md5[1], 15)) {
152
+            /* The item-hash matched.
153
+             * Pass back the ascii hash value (if requested).
154
+             * Return the count of matching items (will be 1+).
155
+             */
156
+            if (rhash)
157
+                *rhash = m->name;
158
+            *count = m->count;
159
+            break;
108 160
   }
161
+        m = m->next;
162
+    } while (NULL != m);
163
+
164
+not_found:
165
+    status = CL_SUCCESS;
109 166
 
110
-  return 0;
167
+done:
168
+    return status;
111 169
 }
... ...
@@ -6,6 +6,54 @@
6 6
  *
7 7
  *  Authors: aCaB <acab@clamav.net>
8 8
  *
9
+ *  Uniq implements a structure that stores the count of duplicate items.
10
+ *  The count can be retrieved by item name (if you know it).
11
+ *  Additionally, you can retrieve the ascii md5 hash at the same time.
12
+ *
13
+ *  This is essentially a tiny hash table of hashes.
14
+ *  The hashes are in an array instead of dynamically added.
15
+ *  This is faster than alloc'ing for each unique item added, *  but means a max # of unique items must be defined at init.
16
+ *
17
+ *  Example where:
18
+ *   items = 6
19
+ *   max_unique_items = 5
20
+ *   cur_unique_items = 4
21
+ *   md5 #1 has been added 3 times
22
+ *   Two md5's start with the same 2 bytes (#0 and #3)
23
+ *
24
+ *    idx:
25
+ *      -00--01--02--03--04--05--06--07-...
26
+ *      | 0 | 0 | 0 | 2 | 1 | 0 | 0 | ...
27
+ *      ------------------------------...
28
+ *
29
+ *    md5s:
30
+ *      ------------------------------
31
+ *   0  | next:  Address of #3
32
+ *      | count: 1
33
+ *      | md5:   0x01,0x98,0x23,0xa8,0xfd,...
34
+ *      | name:  "019823a8fd..."
35
+ *      ------------------------------
36
+ *   1  | next:  NULL
37
+ *      | count: 3
38
+ *      | md5:   0x03,0x98,0x23,0xa8,0xfd,...
39
+ *      | name:  "019823a8fd..."
40
+ *      ------------------------------
41
+ *   2  | next:  NULL
42
+ *      | count: 1
43
+ *      | md5:   0x01,0x98,0x23,0xa8,0xfd,...
44
+ *      | name:  "019823a8fd..."
45
+ *      ------------------------------
46
+ *   3  | next:  NULL
47
+ *      | count: 1
48
+ *      | md5:   0x01,0xdd,0x2f,0x87,0x6a,...
49
+ *      | name:  "01dd2f876a..."
50
+ *      ------------------------------
51
+ *   4  | next:  NULL
52
+ *      | count: 0
53
+ *      | md5:   0x00,0x00,0x00,0x00,0x00,...
54
+ *      | name:  "\0\0\0\0\0..."
55
+ *      ------------------------------
56
+ *
9 57
  *  This program is free software; you can redistribute it and/or modify
10 58
  *  it under the terms of the GNU General Public License version 2 as
11 59
  *  published by the Free Software Foundation.
... ...
@@ -24,25 +72,85 @@
24 24
 #ifndef _UNIQ_H
25 25
 #define _UNIQ_H
26 26
 
27
+#include "clamav.h"
27 28
 #include "clamav-types.h"
28 29
 
30
+/**
31
+ * @brief Store the count of each unique item.
32
+ *
33
+ * These elements are allocated as an array in struct uniq, but they are also
34
+ * linked together using the `next` pointers to form impromptu buckets,
35
+ * categorized using the first two bytes of each md5.
36
+ */
29 37
 struct UNIQMD5 {
30
-  struct UNIQMD5 *next;
31
-  uint32_t count;
32
-  uint8_t md5[16];
33
-  char name[33];
38
+    struct UNIQMD5 *next; /**< Pointer to next UNIQMD5 where the first two bytes are the same. */
39
+    uint32_t count;       /**< Number of times this item has been added. (# duplicates). */
40
+    uint8_t md5[16];      /**< Binary md5 hash of the item. */
41
+    char name[33];        /**< Ascii md5 hash of the item. */
34 42
 };
35 43
 
44
+/**
45
+ * @brief The main Uniq store structure.
46
+ *
47
+ * Includes array of uniq md5 hashes, and an index table to optimize searches
48
+ * into the hash array, categorized by the first two bytes of the md5.
49
+ */
36 50
 struct uniq {
37
-  struct UNIQMD5 *md5s;
38
-  uint32_t items;
39
-  uint32_t idx[256];
51
+    struct UNIQMD5 *md5s;      /**< Array of UNIQMD5 structs. */
52
+    uint32_t items;            /**< Total # of items added (including duplicates) */
53
+    uint32_t cur_unique_items; /**< The # of md5s currently stored in the array. */
54
+    uint32_t max_unique_items; /**< The # of md5s that can be stored the array. */
55
+    uint32_t idx[256];         /**< Array of indices into the md5s array.
56
+                                    Each index represents a linked-list of md5s
57
+                                    sharing the common trait that the first two
58
+                                    bytes are the same. */
40 59
 };
41 60
 
61
+/**
62
+ * @brief Initialize a Uniq store to count the number of uniq string items.
63
+ *
64
+ * The Uniq store must be free'd with uniq_free().
65
+ * uniq_add()'s will fail if they exceed the number of unique strings initialized with count.
66
+ *
67
+ * @param count         The max number of unique string items that may be added.
68
+ * @return struct uniq* A pointer to the Uniq store object. Will return NULL on failure.
69
+ */
42 70
 struct uniq *uniq_init(uint32_t);
71
+
72
+/**
73
+ * @brief Free the Uniq store and associated memory.
74
+ */
43 75
 void uniq_free(struct uniq *);
44
-uint32_t uniq_add(struct uniq *, const char *, uint32_t, char **);
45
-uint32_t uniq_get(struct uniq *, const char *, uint32_t, char **);
76
+
77
+/**
78
+ * @brief Add to the uniq (item md5) count.
79
+ *
80
+ * Adds an item to the list of known items.
81
+ * Increments the count if the item has been seen before.
82
+ * The optional rhash pointer will be valid until `uniq_free()` is called.
83
+ *
84
+ * @param U             The Uniq count store.
85
+ * @param item          (optional) The item to hash and count.
86
+ * @param item_len      The length, in bytes, of the item. May be 0.
87
+ * @param[out] rhash    (optional) A pointer to the item's md5 hash (in ascii).
88
+ * @param[out] count    (optional) The number of times this unique item has been added.
89
+ * @return cl_error_t   CL_SUCCESS if successful, else an error code.
90
+ */
91
+cl_error_t uniq_add(struct uniq *U, const char *item, uint32_t, char **rhash, uint32_t *count);
92
+
93
+/**
94
+ * @brief Retrieve the number of times an item has been added to the Uniq count store.
95
+ *
96
+ * The optional rhash pointer will be valid until `uniq_free()` is called.
97
+ *
98
+ * @param U             The Uniq count store.
99
+ * @param item          (optional) The item to hash and count.
100
+ * @param item_len      The length, in bytes, of the item. May be 0.
101
+ * @param[out] rhash    (optional) A pointer to the item's md5 hash (in ascii).
102
+ * @param[out] count    The number of times this unique item has been added.
103
+ * @return cl_error_t   CL_SUCCESS if successful, else an error code.
104
+ */
105
+cl_error_t uniq_get(struct uniq *U, const char *item, uint32_t, char **rhash, uint32_t *count);
46 106
 
47 107
 
48 108
 #endif
... ...
@@ -294,6 +294,7 @@ cli_vba_readdir(const char *dir, struct uniq *U, uint32_t which)
294 294
 	struct vba56_header v56h;
295 295
 	off_t seekback;
296 296
 	char fullname[1024], *hash;
297
+    uint32_t hashcnt = 0;
297 298
 
298 299
 	cli_dbgmsg("in cli_vba_readdir()\n");
299 300
 
... ...
@@ -304,8 +305,13 @@ cli_vba_readdir(const char *dir, struct uniq *U, uint32_t which)
304 304
 	 * _VBA_PROJECT files are embedded within office documents (OLE2)
305 305
 	 */
306 306
 	
307
-	if (!uniq_get(U, "_vba_project", 12, &hash))
307
+    if (CL_SUCCESS != uniq_get(U, "_vba_project", 12, &hash, &hashcnt)) {
308
+        cli_dbgmsg("vba_readdir: uniq_get('_vba_project') failed. Unable to check # of embedded vba proj files\n");
308 309
 		return NULL;
310
+    }
311
+    if (hashcnt == 0) {
312
+        return NULL;
313
+    }
309 314
 	snprintf(fullname, sizeof(fullname), "%s"PATHSEP"%s_%u", dir, hash, which);
310 315
 	fullname[sizeof(fullname)-1] = '\0';
311 316
 	fd = open(fullname, O_RDONLY|O_BINARY);
... ...
@@ -448,7 +454,13 @@ cli_vba_readdir(const char *dir, struct uniq *U, uint32_t which)
448 448
 		}
449 449
 		ptr = get_unicode_name((const char *)buf, length, big_endian);
450 450
 		if(ptr == NULL) break;
451
-		if (!(vba_project->colls[i]=uniq_get(U, ptr, strlen(ptr), &hash))) {
451
+        if (CL_SUCCESS != uniq_get(U, ptr, strlen(ptr), &hash, &hashcnt)) {
452
+            cli_dbgmsg("vba_readdir: uniq_get('%s') failed.\n", ptr);
453
+            free(ptr);
454
+            break;
455
+        }
456
+        vba_project->colls[i] = hashcnt;
457
+        if (0 == vba_project->colls[i]) {
452 458
 			cli_dbgmsg("vba_readdir: cannot find project %s (%s)\n", ptr, hash);
453 459
 			free(ptr);
454 460
 			break;
... ...
@@ -1308,7 +1320,7 @@ create_vba_project(int record_count, const char *dir, struct uniq *U)
1308 1308
 {
1309 1309
 	vba_project_t *ret;
1310 1310
 
1311
-	ret = (vba_project_t *) cli_malloc(sizeof(struct vba_project_tag));
1311
+    ret = (vba_project_t *)cli_calloc(1, sizeof(struct vba_project_tag));
1312 1312
 
1313 1313
 	if(ret == NULL) {
1314 1314
         cli_errmsg("create_vba_project: Unable to allocate memory for vba project structure\n");
... ...
@@ -1320,16 +1332,8 @@ create_vba_project(int record_count, const char *dir, struct uniq *U)
1320 1320
 	ret->dir = cli_strdup(dir);
1321 1321
 	ret->offset = (uint32_t *)cli_malloc (sizeof(uint32_t) * record_count);
1322 1322
 
1323
-	if((ret->name == NULL) || (ret->dir == NULL) || (ret->offset == NULL)) {
1324
-		if(ret->dir)
1325
-			free(ret->dir);
1326
-		if(ret->colls)
1327
-			free(ret->colls);
1328
-		if(ret->name)
1329
-			free(ret->name);
1330
-		if(ret->offset)
1331
-			free(ret->offset);
1332
-		free(ret);
1323
+    if ((ret->colls == NULL) || (ret->name == NULL) || (ret->dir == NULL) || (ret->offset == NULL)) {
1324
+        cli_free_vba_project(ret);
1333 1325
         cli_errmsg("create_vba_project: Unable to allocate memory for vba project elements\n");
1334 1326
 		return NULL;
1335 1327
 	}
... ...
@@ -1338,3 +1342,32 @@ create_vba_project(int record_count, const char *dir, struct uniq *U)
1338 1338
 
1339 1339
 	return ret;
1340 1340
 }
1341
+
1342
+/**
1343
+ * @brief Free up the memory associated with the vba_project_t type.
1344
+ *
1345
+ * @param project A vba_project_t type allocated by one of these:
1346
+ *  - create_vba_project()
1347
+ *  - cli_wm_readdir()
1348
+ *  - cli_vba_readdir()
1349
+ */
1350
+void cli_free_vba_project(vba_project_t *vba_project)
1351
+{
1352
+    if (vba_project) {
1353
+        if (vba_project->dir)
1354
+            free(vba_project->dir);
1355
+        if (vba_project->colls)
1356
+            free(vba_project->colls);
1357
+        if (vba_project->name)
1358
+            free(vba_project->name);
1359
+        if (vba_project->offset)
1360
+            free(vba_project->offset);
1361
+        if (vba_project->length)
1362
+            free(vba_project->length);
1363
+        if (vba_project->key)
1364
+            free(vba_project->key);
1365
+        free(vba_project);
1366
+    }
1367
+
1368
+    return;
1369
+}
1341 1370
\ No newline at end of file
... ...
@@ -41,6 +41,8 @@ typedef struct vba_project_tag {
41 41
 
42 42
 vba_project_t	*cli_vba_readdir(const char *dir, struct uniq *U, uint32_t which);
43 43
 vba_project_t	*cli_wm_readdir(int fd);
44
+void 			cli_free_vba_project(vba_project_t *vba_project);
45
+
44 46
 unsigned char	*cli_vba_inflate(int fd, off_t offset, int *size);
45 47
 int	cli_scan_ole10(int fd, cli_ctx *ctx);
46 48
 char	*cli_ppt_vba_read(int fd, cli_ctx *ctx);
... ...
@@ -1107,8 +1107,10 @@ static int sigtool_scandir (const char *dirname, int hex_output)
1107 1107
 
1108 1108
 int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
1109 1109
 {
1110
-    int ret = CL_CLEAN, i, fd, data_len;
1111
-    vba_project_t *vba_project;
1110
+    cl_error_t status = CL_CLEAN;
1111
+    cl_error_t ret;
1112
+    int i, fd, data_len;
1113
+    vba_project_t *vba_project = NULL;
1112 1114
     DIR *dd;
1113 1115
     struct dirent *dent;
1114 1116
     STATBUF statbuf;
... ...
@@ -1117,14 +1119,22 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
1117 1117
     uint32_t hashcnt;
1118 1118
     unsigned int j;
1119 1119
 
1120
-    hashcnt = uniq_get(U, "_vba_project", 12, NULL);
1121
-    while(hashcnt--) {
1122
-	if(!(vba_project = (vba_project_t *)cli_vba_readdir(dirname, U, hashcnt))) continue;
1120
+    if (CL_SUCCESS != (ret = uniq_get(U, "_vba_project", 12, NULL, &hashcnt))) {
1121
+        logg("!ScanDir -> uniq_get('_vba_project') failed.\n");
1122
+        return ret;
1123
+    }
1124
+
1125
+    while (hashcnt) {
1126
+        if (!(vba_project = (vba_project_t *)cli_vba_readdir(dirname, U, hashcnt))) {
1127
+            hashcnt--;
1128
+            continue;
1129
+        }
1123 1130
 
1124 1131
 	for(i = 0; i < vba_project->count; i++) {
1125 1132
 	    for(j = 0; j < vba_project->colls[i]; j++) {
1126 1133
 		snprintf(vbaname, 1024, "%s"PATHSEP"%s_%u", vba_project->dir, vba_project->name[i], j);
1127 1134
 		vbaname[sizeof(vbaname)-1] = '\0';
1135
+
1128 1136
 		fd = open(vbaname, O_RDONLY|O_BINARY);
1129 1137
 		if(fd == -1) continue;
1130 1138
 		data = (unsigned char *)cli_vba_inflate(fd, vba_project->offset[i], &data_len);
... ...
@@ -1139,39 +1149,53 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
1139 1139
 	    }
1140 1140
 	}
1141 1141
 
1142
-	free(vba_project->name);
1143
-	free(vba_project->colls);
1144
-	free(vba_project->dir);
1145
-	free(vba_project->offset);
1146
-	free(vba_project);
1142
+        cli_free_vba_project(vba_project);
1143
+        vba_project = NULL;
1144
+
1145
+        hashcnt--;
1147 1146
     }
1148 1147
 
1148
+    if (CL_SUCCESS != (ret = uniq_get(U, "powerpoint document", 19, &hash, &hashcnt))) {
1149
+        logg("!ScanDir -> uniq_get('powerpoint document') failed.\n");
1150
+        return ret;
1151
+    }
1149 1152
 
1150
-    if((hashcnt = uniq_get(U, "powerpoint document", 19, &hash))) {
1151
-	while(hashcnt--) {
1153
+    while (hashcnt) {
1152 1154
 	    snprintf(vbaname, 1024, "%s"PATHSEP"%s_%u", dirname, hash, hashcnt);
1153 1155
 	    vbaname[sizeof(vbaname)-1] = '\0';
1156
+
1154 1157
 	    fd = open(vbaname, O_RDONLY|O_BINARY);
1155
-	    if (fd == -1) continue;
1158
+        if (fd == -1) {
1159
+            hashcnt--;
1160
+            continue;
1161
+        }
1156 1162
 	    if ((fullname = cli_ppt_vba_read(fd, NULL))) {
1157 1163
 	      sigtool_scandir(fullname, hex_output);
1158 1164
 	      cli_rmdirs(fullname);
1159 1165
 	      free(fullname);
1160 1166
 	    }
1161 1167
 	    close(fd);
1168
+        hashcnt--;
1162 1169
 	}
1163
-    }
1164 1170
 
1171
+    if (CL_SUCCESS != (ret = uniq_get(U, "worddocument", 12, &hash, &hashcnt))) {
1172
+        logg("!ScanDir -> uniq_get('worddocument') failed.\n");
1173
+        return ret;
1174
+    }
1165 1175
 
1166
-    if ((hashcnt = uniq_get(U, "worddocument", 12, &hash))) {
1167
-	while(hashcnt--) {
1176
+    while (hashcnt) {
1168 1177
 	    snprintf(vbaname, sizeof(vbaname), "%s"PATHSEP"%s_%u", dirname, hash, hashcnt);
1169 1178
 	    vbaname[sizeof(vbaname)-1] = '\0';
1179
+
1170 1180
 	    fd = open(vbaname, O_RDONLY|O_BINARY);
1171
-	    if (fd == -1) continue;
1181
+        if (fd == -1) {
1182
+            hashcnt--;
1183
+            continue;
1184
+        }
1172 1185
 	    
1173 1186
 	    if (!(vba_project = (vba_project_t *)cli_wm_readdir(fd))) {
1174 1187
 		close(fd);
1188
+            hashcnt--;
1175 1189
 		continue;
1176 1190
 	    }
1177 1191
 
... ...
@@ -1187,14 +1211,9 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
1187 1187
 	    }
1188 1188
 
1189 1189
 	    close(fd);
1190
-	    free(vba_project->name);
1191
-	    free(vba_project->colls);
1192
-	    free(vba_project->dir);
1193
-	    free(vba_project->offset);
1194
-	    free(vba_project->key);
1195
-	    free(vba_project->length);
1196
-	    free(vba_project);
1197
-	}
1190
+        cli_free_vba_project(vba_project);
1191
+        vba_project = NULL;
1192
+        hashcnt--;
1198 1193
     }
1199 1194
 
1200 1195
     if ((dd = opendir (dirname)) != NULL) {
... ...
@@ -1221,5 +1240,5 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
1221 1221
 
1222 1222
 
1223 1223
     closedir (dd);
1224
-    return ret;
1224
+    return status;
1225 1225
 }
... ...
@@ -28,6 +28,7 @@
28 28
 #include <stdlib.h>
29 29
 #include <string.h>
30 30
 
31
+#include "../libclamav/clamav.h"
31 32
 #include "../libclamav/uniq.h"
32 33
 #include "checks.h"
33 34
 
... ...
@@ -59,12 +60,16 @@ START_TEST (test_uniq_known) {
59 59
   fail_unless(U!=0, "uniq_init");
60 60
 
61 61
   for(i=0; tests[i].expected; i++) {
62
-    u = uniq_add(U, tests[i].key, tests[i].key_len, &hash);
63
-    fail_unless_fmt(u==0 && strcmp(hash, tests[i].expected)==0, "uniq_add(%s) = %u - expected %s, got %s", tests[i].key, u, tests[i].expected, hash);
62
+        if (CL_SUCCESS != uniq_add(U, tests[i].key, tests[i].key_len, &hash, &u)) {
63
+            fail("uniq_add(%s) failed.", tests[i].key);
64
+        }
65
+        fail_unless_fmt(u == 1 && strcmp(hash, tests[i].expected) == 0, "uniq_add(%s) = %u - expected %s, got %s", tests[i].key, u, tests[i].expected, hash);
64 66
   }
65 67
 
66 68
   for(i=0; tests[i].expected; i++) {
67
-    u = uniq_get(U, tests[i].key, tests[i].key_len, &hash);
69
+        if (CL_SUCCESS != uniq_get(U, tests[i].key, tests[i].key_len, &hash, &u)) {
70
+            fail("uniq_get(%s) failed.", tests[i].key);
71
+        }
68 72
     fail_unless_fmt(u==1 && strcmp(hash, tests[i].expected)==0, "uniq_get(%s) = %u - expected %s, got %s", tests[i].key, u, tests[i].expected, hash);
69 73
   }
70 74
 
... ...
@@ -82,11 +87,16 @@ START_TEST (test_uniq_colls) {
82 82
   fail_unless(U!=0, "uniq_init");
83 83
 
84 84
   for(j=4; j>0; j--)
85
-    for (i=0; i<j; i++)
86
-      u = uniq_add(U, tests[i], strlen(tests[i]), NULL);
85
+        for (i = 0; i < j; i++) {
86
+            if (CL_SUCCESS != uniq_add(U, tests[i], strlen(tests[i]), NULL, &u)) {
87
+                fail("uniq_add(%s) failed.", tests[i]);
88
+            }
89
+        }
87 90
   
88 91
   for (i=0; i<4; i++) {
89
-    u = uniq_add(U, tests[i], strlen(tests[i]), NULL);
92
+        if (CL_SUCCESS != uniq_get(U, tests[i], strlen(tests[i]), NULL, &u)) {
93
+            fail("uniq_get(%s) failed.", tests[i]);
94
+        }
90 95
     fail_unless_fmt(u+i==4, "uniq_get(%s) = %u - expected %u", tests[i], u, 4-i);
91 96
   }
92 97
 
... ...
@@ -179,6 +179,7 @@ EXPORTS cli_hwp5header @44381 NONAME
179 179
 EXPORTS cli_scanhwp5_stream @44382 NONAME
180 180
 EXPORTS cli_scanhwp3 @44383 NONAME
181 181
 EXPORTS cli_genhash_pe @44384 NONAME
182
+EXPORTS cli_free_vba_project @44385 NONAME
182 183
 
183 184
 ; compatibility layer, tommath, zlib
184 185
 EXPORTS w32_srand @44269 NONAME