Browse code

fuzz - 12166 - Fix for 4-byte out of bounds write wherein the an invalid struct pointer member variable is set to zero. The fix adds bounds checking to the Uniq storage 'add' function as well as error code checks. Included a lot of new inline documentation.

Micah Snyder authored on 2019/01/23 04:05:05
Showing 10 changed files
... ...
@@ -101,6 +101,7 @@ CLAMAV_PRIVATE {
101 101
     cli_ppt_vba_read;
102 102
     cli_wm_readdir;
103 103
     cli_wm_decrypt_macro;
104
+    cli_free_vba_project;
104 105
     cli_readn;
105 106
     cli_str2hex;
106 107
     cli_hashfile;
... ...
@@ -3,9 +3,9 @@
3 3
  *  Copyright (C) 2007-2013 Sourcefire, Inc.
4 4
  *
5 5
  *  Authors: Trog
6
- * 
6
+ *
7 7
  *  Summary: Extract component parts of OLE2 files (e.g. MS Office Documents).
8
- * 
8
+ *
9 9
  *  Acknowledgements: Some ideas and algorithms were based upon OpenOffice and libgsf.
10 10
  *
11 11
  *  This program is free software; you can redistribute it and/or modify
... ...
@@ -814,10 +814,18 @@ handler_writefile(ole2_header_t * hdr, property_t * prop, const char *dir, cli_c
814 814
         return CL_SUCCESS;
815 815
     }
816 816
     name = get_property_name2(prop->name, prop->name_size);
817
-    if (name)
818
-        cnt = uniq_add(hdr->U, name, strlen(name), &hash);
819
-    else
820
-        cnt = uniq_add(hdr->U, NULL, 0, &hash);
817
+    if (name) {
818
+        if (CL_SUCCESS != uniq_add(hdr->U, name, strlen(name), &hash, &cnt)) {
819
+            free(name);
820
+            cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n");
821
+            return CL_BREAK;
822
+        }
823
+    } else {
824
+        if (CL_SUCCESS != uniq_add(hdr->U, NULL, 0, &hash, &cnt)) {
825
+            cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n");
826
+            return CL_BREAK;
827
+        }
828
+    }
821 829
     snprintf(newname, sizeof(newname), "%s" PATHSEP "%s_%u", dir, hash, cnt);
822 830
     newname[sizeof(newname) - 1] = '\0';
823 831
     cli_dbgmsg("OLE2 [handler_writefile]: Dumping '%s' to '%s'\n", name ? name : "<empty>", newname);
... ...
@@ -1041,7 +1041,8 @@ static int vba_scandata(const unsigned char *data, unsigned int len, cli_ctx *ct
1041 1041
 
1042 1042
 static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1043 1043
 {
1044
-    int ret = CL_CLEAN, i, j, fd, data_len, hasmacros = 0;
1044
+    cl_error_t ret = CL_CLEAN;
1045
+    int i, j, fd, data_len, hasmacros = 0;
1045 1046
     vba_project_t *vba_project;
1046 1047
     DIR *dd;
1047 1048
     struct dirent *dent;
... ...
@@ -1055,15 +1056,19 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1055 1055
     char *fullname, vbaname[1024];
1056 1056
     unsigned char *data;
1057 1057
     char *hash;
1058
-    uint32_t hashcnt;
1058
+    uint32_t hashcnt           = 0;
1059 1059
     unsigned int viruses_found = 0;
1060 1060
 
1061 1061
     cli_dbgmsg("VBADir: %s\n", dirname);
1062
-    hashcnt = uniq_get(U, "_vba_project", 12, NULL);
1063
-    while (hashcnt--)
1064
-    {
1065
-        if (!(vba_project = (vba_project_t *)cli_vba_readdir(dirname, U, hashcnt)))
1062
+    if (CL_SUCCESS != (ret = uniq_get(U, "_vba_project", 12, NULL, &hashcnt))) {
1063
+        cli_dbgmsg("VBADir: uniq_get('_vba_project') failed with ret code (%d)!\n", ret);
1064
+        return ret;
1065
+    }
1066
+    while (hashcnt) {
1067
+        if (!(vba_project = (vba_project_t *)cli_vba_readdir(dirname, U, hashcnt))) {
1068
+            hashcnt--;
1066 1069
             continue;
1070
+        }
1067 1071
 
1068 1072
         for (i = 0; i < vba_project->count; i++)
1069 1073
         {
... ...
@@ -1072,8 +1077,10 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1072 1072
                 snprintf(vbaname, 1024, "%s" PATHSEP "%s_%u", vba_project->dir, vba_project->name[i], j);
1073 1073
                 vbaname[sizeof(vbaname) - 1] = '\0';
1074 1074
                 fd = open(vbaname, O_RDONLY | O_BINARY);
1075
-                if (fd == -1)
1075
+                if (fd == -1) {
1076
+                    hashcnt--;
1076 1077
                     continue;
1078
+                }
1077 1079
                 cli_dbgmsg("VBADir: Decompress VBA project '%s_%u'\n", vba_project->name[i], j);
1078 1080
                 data = (unsigned char *)cli_vba_inflate(fd, vba_project->offset[i], &data_len);
1079 1081
                 close(fd);
... ...
@@ -1125,29 +1132,30 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1125 1125
             }
1126 1126
         }
1127 1127
 
1128
-        free(vba_project->name);
1129
-        free(vba_project->colls);
1130
-        free(vba_project->dir);
1131
-        free(vba_project->offset);
1132
-        free(vba_project);
1128
+        cli_free_vba_project(vba_project);
1129
+        vba_project = NULL;
1130
+
1133 1131
         if (ret == CL_VIRUS && !SCAN_ALL)
1134 1132
             break;
1133
+
1134
+        hashcnt--;
1135 1135
     }
1136 1136
 
1137
-    if ((ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALL)) &&
1138
-        (hashcnt = uniq_get(U, "powerpoint document", 19, &hash)))
1139
-    {
1140
-        while (hashcnt--)
1141
-        {
1137
+    if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALL)) {
1138
+        if (CL_SUCCESS != (ret = uniq_get(U, "powerpoint document", 19, &hash, &hashcnt))) {
1139
+            cli_dbgmsg("VBADir: uniq_get('powerpoint document') failed with ret code (%d)!\n", ret);
1140
+            return ret;
1141
+        }
1142
+        while (hashcnt) {
1142 1143
             snprintf(vbaname, 1024, "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1143 1144
             vbaname[sizeof(vbaname) - 1] = '\0';
1144 1145
             fd = open(vbaname, O_RDONLY | O_BINARY);
1145
-            if (fd == -1)
1146
+            if (fd == -1) {
1147
+                hashcnt--;
1146 1148
                 continue;
1147
-            if ((fullname = cli_ppt_vba_read(fd, ctx)))
1148
-            {
1149
-                if (cli_scandir(fullname, ctx) == CL_VIRUS)
1150
-                {
1149
+            }
1150
+            if ((fullname = cli_ppt_vba_read(fd, ctx))) {
1151
+                if (cli_scandir(fullname, ctx) == CL_VIRUS) {
1151 1152
                     ret = CL_VIRUS;
1152 1153
                     viruses_found++;
1153 1154
                 }
... ...
@@ -1156,23 +1164,28 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1156 1156
                 free(fullname);
1157 1157
             }
1158 1158
             close(fd);
1159
+            hashcnt--;
1159 1160
         }
1160 1161
     }
1161 1162
 
1162
-    if ((ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALL)) &&
1163
-        (hashcnt = uniq_get(U, "worddocument", 12, &hash)))
1164
-    {
1165
-        while (hashcnt--)
1166
-        {
1163
+    if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALL)) {
1164
+        if (CL_SUCCESS != (ret = uniq_get(U, "worddocument", 12, &hash, &hashcnt))) {
1165
+            cli_dbgmsg("VBADir: uniq_get('worddocument') failed with ret code (%d)!\n", ret);
1166
+            return ret;
1167
+        }
1168
+        while (hashcnt) {
1167 1169
             snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1168 1170
             vbaname[sizeof(vbaname) - 1] = '\0';
1169 1171
             fd = open(vbaname, O_RDONLY | O_BINARY);
1170
-            if (fd == -1)
1172
+            if (fd == -1) {
1173
+                hashcnt--;
1171 1174
                 continue;
1175
+            }
1172 1176
 
1173 1177
             if (!(vba_project = (vba_project_t *)cli_wm_readdir(fd)))
1174 1178
             {
1175 1179
                 close(fd);
1180
+                hashcnt--;
1176 1181
                 continue;
1177 1182
             }
1178 1183
 
... ...
@@ -1205,20 +1218,16 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1205 1205
             }
1206 1206
 
1207 1207
             close(fd);
1208
-            free(vba_project->name);
1209
-            free(vba_project->colls);
1210
-            free(vba_project->dir);
1211
-            free(vba_project->offset);
1212
-            free(vba_project->key);
1213
-            free(vba_project->length);
1214
-            free(vba_project);
1215
-            if (ret == CL_VIRUS)
1216
-            {
1208
+            cli_free_vba_project(vba_project);
1209
+            vba_project = NULL;
1210
+
1211
+            if (ret == CL_VIRUS) {
1217 1212
                 if (SCAN_ALL)
1218 1213
                     viruses_found++;
1219 1214
                 else
1220 1215
                     break;
1221 1216
             }
1217
+            hashcnt--;
1222 1218
         }
1223 1219
     }
1224 1220
 
... ...
@@ -1227,11 +1236,12 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1227 1227
 
1228 1228
 #if HAVE_JSON
1229 1229
     /* JSON Output Summary Information */
1230
-    if (ctx->options & CL_SCAN_FILE_PROPERTIES && ctx->wrkproperty != NULL)
1231
-    {
1232
-        hashcnt = uniq_get(U, "_5_summaryinformation", 21, &hash);
1233
-        while (hashcnt--)
1234
-        {
1230
+    if ((ctx->options & CL_SCAN_FILE_PROPERTIES) && (ctx->wrkproperty != NULL)) {
1231
+        if (CL_SUCCESS != (ret = uniq_get(U, "_5_summaryinformation", 21, &hash, &hashcnt))) {
1232
+            cli_dbgmsg("VBADir: uniq_get('_5_summaryinformation') failed with ret code (%d)!\n", ret);
1233
+            return ret;
1234
+        }
1235
+        while (hashcnt) {
1235 1236
             snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1236 1237
             vbaname[sizeof(vbaname) - 1] = '\0';
1237 1238
 
... ...
@@ -1243,11 +1253,14 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1243 1243
                 cli_ole2_summary_json(ctx, fd, 0);
1244 1244
                 close(fd);
1245 1245
             }
1246
+            hashcnt--;
1246 1247
         }
1247 1248
 
1248
-        hashcnt = uniq_get(U, "_5_documentsummaryinformation", 29, &hash);
1249
-        while (hashcnt--)
1250
-        {
1249
+        if (CL_SUCCESS != (ret = uniq_get(U, "_5_documentsummaryinformation", 29, &hash, &hashcnt))) {
1250
+            cli_dbgmsg("VBADir: uniq_get('_5_documentsummaryinformation') failed with ret code (%d)!\n", ret);
1251
+            return ret;
1252
+        }
1253
+        while (hashcnt) {
1251 1254
             snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1252 1255
             vbaname[sizeof(vbaname) - 1] = '\0';
1253 1256
 
... ...
@@ -1259,14 +1272,17 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1259 1259
                 cli_ole2_summary_json(ctx, fd, 1);
1260 1260
                 close(fd);
1261 1261
             }
1262
+            hashcnt--;
1262 1263
         }
1263 1264
     }
1264 1265
 #endif
1265 1266
 
1266 1267
     /* Check directory for embedded OLE objects */
1267
-    hashcnt = uniq_get(U, "_1_ole10native", 14, &hash);
1268
-    while (hashcnt--)
1269
-    {
1268
+    if (CL_SUCCESS != (ret = uniq_get(U, "_1_ole10native", 14, &hash, &hashcnt))) {
1269
+        cli_dbgmsg("VBADir: uniq_get('_1_ole10native') failed with ret code (%d)!\n", ret);
1270
+        return ret;
1271
+    }
1272
+    while (hashcnt) {
1270 1273
         snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1271 1274
         vbaname[sizeof(vbaname) - 1] = '\0';
1272 1275
 
... ...
@@ -1278,6 +1294,7 @@ static int cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
1278 1278
             if (ret != CL_CLEAN && !(ret == CL_VIRUS && SCAN_ALL))
1279 1279
                 return ret;
1280 1280
         }
1281
+        hashcnt--;
1281 1282
     }
1282 1283
 
1283 1284
     /* ACAB: since we now hash filenames and handle collisions we
... ...
@@ -46,6 +46,7 @@ struct uniq *uniq_init(uint32_t count) {
46 46
     uniq_free(U);
47 47
     return NULL;
48 48
   }
49
+    U->max_unique_items = count;
49 50
 
50 51
   return U;
51 52
 }
... ...
@@ -55,18 +56,39 @@ void uniq_free(struct uniq *U) {
55 55
   free(U);
56 56
 }
57 57
 
58
-uint32_t uniq_add(struct uniq *U, const char *key, uint32_t key_len, char **rhash) {
58
+cl_error_t uniq_add(struct uniq *U, const char *item, uint32_t item_len, char **rhash, uint32_t *count)
59
+{
60
+  cl_error_t status = CL_EARG;
59 61
   unsigned int i;
60 62
   uint8_t digest[16];
61 63
   struct UNIQMD5 *m = NULL;
62 64
 
63
-  cl_hash_data("md5", key, key_len, digest, NULL);
65
+    if (!U) {
66
+        /* Invalid args */
67
+        goto done;
68
+    }
69
+
70
+    /* Uniq adds are limited by the maximum allocated in uniq_init(). */
71
+    if (U->cur_unique_items >= U->max_unique_items) {
72
+        /* Attempted to add more uniq items than may be stored. */
73
+        status = CL_EMAXSIZE;
74
+        goto done;
75
+    }
76
+
77
+    /* Make a hash of the item string */
78
+    if (NULL == cl_hash_data("md5", item, item_len, digest, NULL)) {
79
+        /* Failed to create hash of item. */
80
+        status = CL_EFORMAT;
81
+        goto done;
82
+    }
64 83
 
84
+    /* Check for md5 digest match in md5 collection */
65 85
   if(U->items && U->md5s[U->idx[*digest]].md5[0]==*digest)
66 86
     for(m=&U->md5s[U->idx[*digest]]; m; m=m->next)
67 87
       if(!memcmp(&digest[1], &m->md5[1], 15)) break;
68 88
   
69 89
   if(!m) {
90
+        /* No match. Add new md5 to list */
70 91
     const char HEX[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
71 92
 
72 93
     m = &U->md5s[U->items];
... ...
@@ -85,27 +107,85 @@ uint32_t uniq_add(struct uniq *U, const char *key, uint32_t key_len, char **rhas
85 85
       m->md5[i] = digest[i];
86 86
     }
87 87
     m->name[32] = '\0';
88
+
89
+        /* Increment # of unique items. */
90
+        U->cur_unique_items++;
88 91
   }
89 92
 
93
+    /* Increment total # of items. */
90 94
   U->items++;
95
+
96
+    /* Increment # items matching this md5 digest (probably just this 1). */
97
+    m->count++;
98
+
99
+    /* Pass back the ascii hash, if requested. */
91 100
   if(rhash) *rhash = m->name;
92
-  return m->count++;
101
+
102
+    /* Pass back the count, if requested. */
103
+    if (count) *count = m->count;
104
+
105
+    status = CL_SUCCESS;
106
+
107
+done:
108
+    return status;
93 109
 }
94 110
 
95
-uint32_t uniq_get(struct uniq *U, const char *key, uint32_t key_len, char **rhash) {
111
+cl_error_t uniq_get(struct uniq *U, const char *item, uint32_t item_len, char **rhash, uint32_t *count)
112
+{
113
+  cl_error_t status = CL_EARG;
96 114
   uint8_t digest[16];
97 115
   struct UNIQMD5 *m = NULL;
116
+    uint32_t idx      = 0;
98 117
 
99
-  cl_hash_data("md5", key, key_len, digest, NULL);
118
+    if (!U || !count) {
119
+        /* Invalid args */
120
+        goto done;
121
+    }
100 122
 
101
-  if(!U->items || U->md5s[U->idx[*digest]].md5[0]!=*digest)
102
-    return 0;
123
+    *count = 0;
103 124
 
104
-  for(m=&U->md5s[U->idx[*digest]]; m; m=m->next) {
105
-    if(memcmp(&digest[1], &m->md5[1], 15)) continue;
106
-    if(rhash) *rhash = m->name;
107
-    return m->count;
125
+    if (!U->items) {
126
+        goto not_found;
127
+    }
128
+
129
+    /* Make a hash of the item string */
130
+    if (NULL == cl_hash_data("md5", item, item_len, digest, NULL)) {
131
+        /* Failed to create hash of item. */
132
+        status = CL_EFORMAT;
133
+        goto done;
134
+    }
135
+
136
+    /* Get the md5s array index for the bucket list head. */
137
+    idx = U->idx[*digest];
138
+    m   = &U->md5s[idx];
139
+
140
+    if (m->md5[0] != *digest) {
141
+        /*
142
+         * If the first two bytes in the digest doesn't actually match,
143
+         * then the item has never been added.
144
+         * This is a common scenario because the idx table is initialized
145
+         * to 0's.
146
+         */
147
+        goto not_found;
148
+    }
149
+
150
+    do {
151
+        if (0 == memcmp(&digest[1], &m->md5[1], 15)) {
152
+            /* The item-hash matched.
153
+             * Pass back the ascii hash value (if requested).
154
+             * Return the count of matching items (will be 1+).
155
+             */
156
+            if (rhash)
157
+                *rhash = m->name;
158
+            *count = m->count;
159
+            break;
108 160
   }
161
+        m = m->next;
162
+    } while (NULL != m);
163
+
164
+not_found:
165
+    status = CL_SUCCESS;
109 166
 
110
-  return 0;
167
+done:
168
+    return status;
111 169
 }
... ...
@@ -6,6 +6,54 @@
6 6
  *
7 7
  *  Authors: aCaB <acab@clamav.net>
8 8
  *
9
+ *  Uniq implements a structure that stores the count of duplicate items.
10
+ *  The count can be retrieved by item name (if you know it).
11
+ *  Additionally, you can retrieve the ascii md5 hash at the same time.
12
+ *
13
+ *  This is essentially a tiny hash table of hashes.
14
+ *  The hashes are in an array instead of dynamically added.
15
+ *  This is faster than alloc'ing for each unique item added, *  but means a max # of unique items must be defined at init.
16
+ *
17
+ *  Example where:
18
+ *   items = 6
19
+ *   max_unique_items = 5
20
+ *   cur_unique_items = 4
21
+ *   md5 #1 has been added 3 times
22
+ *   Two md5's start with the same 2 bytes (#0 and #3)
23
+ *
24
+ *    idx:
25
+ *      -00--01--02--03--04--05--06--07-...
26
+ *      | 0 | 0 | 0 | 2 | 1 | 0 | 0 | ...
27
+ *      ------------------------------...
28
+ *
29
+ *    md5s:
30
+ *      ------------------------------
31
+ *   0  | next:  Address of #3
32
+ *      | count: 1
33
+ *      | md5:   0x01,0x98,0x23,0xa8,0xfd,...
34
+ *      | name:  "019823a8fd..."
35
+ *      ------------------------------
36
+ *   1  | next:  NULL
37
+ *      | count: 3
38
+ *      | md5:   0x03,0x98,0x23,0xa8,0xfd,...
39
+ *      | name:  "019823a8fd..."
40
+ *      ------------------------------
41
+ *   2  | next:  NULL
42
+ *      | count: 1
43
+ *      | md5:   0x01,0x98,0x23,0xa8,0xfd,...
44
+ *      | name:  "019823a8fd..."
45
+ *      ------------------------------
46
+ *   3  | next:  NULL
47
+ *      | count: 1
48
+ *      | md5:   0x01,0xdd,0x2f,0x87,0x6a,...
49
+ *      | name:  "01dd2f876a..."
50
+ *      ------------------------------
51
+ *   4  | next:  NULL
52
+ *      | count: 0
53
+ *      | md5:   0x00,0x00,0x00,0x00,0x00,...
54
+ *      | name:  "\0\0\0\0\0..."
55
+ *      ------------------------------
56
+ *
9 57
  *  This program is free software; you can redistribute it and/or modify
10 58
  *  it under the terms of the GNU General Public License version 2 as
11 59
  *  published by the Free Software Foundation.
... ...
@@ -24,25 +72,85 @@
24 24
 #ifndef _UNIQ_H
25 25
 #define _UNIQ_H
26 26
 
27
+#include "clamav.h"
27 28
 #include "cltypes.h"
28 29
 
30
+/**
31
+ * @brief Store the count of each unique item.
32
+ *
33
+ * These elements are allocated as an array in struct uniq, but they are also
34
+ * linked together using the `next` pointers to form impromptu buckets,
35
+ * categorized using the first two bytes of each md5.
36
+ */
29 37
 struct UNIQMD5 {
30
-  struct UNIQMD5 *next;
31
-  uint32_t count;
32
-  uint8_t md5[16];
33
-  char name[33];
38
+    struct UNIQMD5 *next; /**< Pointer to next UNIQMD5 where the first two bytes are the same. */
39
+    uint32_t count;       /**< Number of times this item has been added. (# duplicates). */
40
+    uint8_t md5[16];      /**< Binary md5 hash of the item. */
41
+    char name[33];        /**< Ascii md5 hash of the item. */
34 42
 };
35 43
 
44
+/**
45
+ * @brief The main Uniq store structure.
46
+ *
47
+ * Includes array of uniq md5 hashes, and an index table to optimize searches
48
+ * into the hash array, categorized by the first two bytes of the md5.
49
+ */
36 50
 struct uniq {
37
-  struct UNIQMD5 *md5s;
38
-  uint32_t items;
39
-  uint32_t idx[256];
51
+    struct UNIQMD5 *md5s;      /**< Array of UNIQMD5 structs. */
52
+    uint32_t items;            /**< Total # of items added (including duplicates) */
53
+    uint32_t cur_unique_items; /**< The # of md5s currently stored in the array. */
54
+    uint32_t max_unique_items; /**< The # of md5s that can be stored the array. */
55
+    uint32_t idx[256];         /**< Array of indices into the md5s array.
56
+                                    Each index represents a linked-list of md5s
57
+                                    sharing the common trait that the first two
58
+                                    bytes are the same. */
40 59
 };
41 60
 
61
+/**
62
+ * @brief Initialize a Uniq store to count the number of uniq string items.
63
+ *
64
+ * The Uniq store must be free'd with uniq_free().
65
+ * uniq_add()'s will fail if they exceed the number of unique strings initialized with count.
66
+ *
67
+ * @param count         The max number of unique string items that may be added.
68
+ * @return struct uniq* A pointer to the Uniq store object. Will return NULL on failure.
69
+ */
42 70
 struct uniq *uniq_init(uint32_t);
71
+
72
+/**
73
+ * @brief Free the Uniq store and associated memory.
74
+ */
43 75
 void uniq_free(struct uniq *);
44
-uint32_t uniq_add(struct uniq *, const char *, uint32_t, char **);
45
-uint32_t uniq_get(struct uniq *, const char *, uint32_t, char **);
76
+
77
+/**
78
+ * @brief Add to the uniq (item md5) count.
79
+ *
80
+ * Adds an item to the list of known items.
81
+ * Increments the count if the item has been seen before.
82
+ * The optional rhash pointer will be valid until `uniq_free()` is called.
83
+ *
84
+ * @param U             The Uniq count store.
85
+ * @param item          (optional) The item to hash and count.
86
+ * @param item_len      The length, in bytes, of the item. May be 0.
87
+ * @param[out] rhash    (optional) A pointer to the item's md5 hash (in ascii).
88
+ * @param[out] count    (optional) The number of times this unique item has been added.
89
+ * @return cl_error_t   CL_SUCCESS if successful, else an error code.
90
+ */
91
+cl_error_t uniq_add(struct uniq *U, const char *item, uint32_t, char **rhash, uint32_t *count);
92
+
93
+/**
94
+ * @brief Retrieve the number of times an item has been added to the Uniq count store.
95
+ *
96
+ * The optional rhash pointer will be valid until `uniq_free()` is called.
97
+ *
98
+ * @param U             The Uniq count store.
99
+ * @param item          (optional) The item to hash and count.
100
+ * @param item_len      The length, in bytes, of the item. May be 0.
101
+ * @param[out] rhash    (optional) A pointer to the item's md5 hash (in ascii).
102
+ * @param[out] count    The number of times this unique item has been added.
103
+ * @return cl_error_t   CL_SUCCESS if successful, else an error code.
104
+ */
105
+cl_error_t uniq_get(struct uniq *U, const char *item, uint32_t, char **rhash, uint32_t *count);
46 106
 
47 107
 
48 108
 #endif
... ...
@@ -294,6 +294,7 @@ cli_vba_readdir(const char *dir, struct uniq *U, uint32_t which)
294 294
 	struct vba56_header v56h;
295 295
 	off_t seekback;
296 296
 	char fullname[1024], *hash;
297
+    uint32_t hashcnt = 0;
297 298
 
298 299
 	cli_dbgmsg("in cli_vba_readdir()\n");
299 300
 
... ...
@@ -304,8 +305,13 @@ cli_vba_readdir(const char *dir, struct uniq *U, uint32_t which)
304 304
 	 * _VBA_PROJECT files are embedded within office documents (OLE2)
305 305
 	 */
306 306
 	
307
-	if (!uniq_get(U, "_vba_project", 12, &hash))
307
+    if (CL_SUCCESS != uniq_get(U, "_vba_project", 12, &hash, &hashcnt)) {
308
+        cli_dbgmsg("vba_readdir: uniq_get('_vba_project') failed. Unable to check # of embedded vba proj files\n");
308 309
 		return NULL;
310
+    }
311
+    if (hashcnt == 0) {
312
+        return NULL;
313
+    }
309 314
 	snprintf(fullname, sizeof(fullname), "%s"PATHSEP"%s_%u", dir, hash, which);
310 315
 	fullname[sizeof(fullname)-1] = '\0';
311 316
 	fd = open(fullname, O_RDONLY|O_BINARY);
... ...
@@ -448,7 +454,13 @@ cli_vba_readdir(const char *dir, struct uniq *U, uint32_t which)
448 448
 		}
449 449
 		ptr = get_unicode_name((const char *)buf, length, big_endian);
450 450
 		if(ptr == NULL) break;
451
-		if (!(vba_project->colls[i]=uniq_get(U, ptr, strlen(ptr), &hash))) {
451
+        if (CL_SUCCESS != uniq_get(U, ptr, strlen(ptr), &hash, &hashcnt)) {
452
+            cli_dbgmsg("vba_readdir: uniq_get('%s') failed.\n", ptr);
453
+            free(ptr);
454
+            break;
455
+        }
456
+        vba_project->colls[i] = hashcnt;
457
+        if (0 == vba_project->colls[i]) {
452 458
 			cli_dbgmsg("vba_readdir: cannot find project %s (%s)\n", ptr, hash);
453 459
 			free(ptr);
454 460
 			break;
... ...
@@ -1308,7 +1320,7 @@ create_vba_project(int record_count, const char *dir, struct uniq *U)
1308 1308
 {
1309 1309
 	vba_project_t *ret;
1310 1310
 
1311
-	ret = (vba_project_t *) cli_malloc(sizeof(struct vba_project_tag));
1311
+    ret = (vba_project_t *)cli_calloc(1, sizeof(struct vba_project_tag));
1312 1312
 
1313 1313
 	if(ret == NULL) {
1314 1314
         cli_errmsg("create_vba_project: Unable to allocate memory for vba project structure\n");
... ...
@@ -1320,16 +1332,8 @@ create_vba_project(int record_count, const char *dir, struct uniq *U)
1320 1320
 	ret->dir = cli_strdup(dir);
1321 1321
 	ret->offset = (uint32_t *)cli_malloc (sizeof(uint32_t) * record_count);
1322 1322
 
1323
-	if((ret->name == NULL) || (ret->dir == NULL) || (ret->offset == NULL)) {
1324
-		if(ret->dir)
1325
-			free(ret->dir);
1326
-		if(ret->colls)
1327
-			free(ret->colls);
1328
-		if(ret->name)
1329
-			free(ret->name);
1330
-		if(ret->offset)
1331
-			free(ret->offset);
1332
-		free(ret);
1323
+    if ((ret->colls == NULL) || (ret->name == NULL) || (ret->dir == NULL) || (ret->offset == NULL)) {
1324
+        cli_free_vba_project(ret);
1333 1325
         cli_errmsg("create_vba_project: Unable to allocate memory for vba project elements\n");
1334 1326
 		return NULL;
1335 1327
 	}
... ...
@@ -1338,3 +1342,32 @@ create_vba_project(int record_count, const char *dir, struct uniq *U)
1338 1338
 
1339 1339
 	return ret;
1340 1340
 }
1341
+
1342
+/**
1343
+ * @brief Free up the memory associated with the vba_project_t type.
1344
+ *
1345
+ * @param project A vba_project_t type allocated by one of these:
1346
+ *  - create_vba_project()
1347
+ *  - cli_wm_readdir()
1348
+ *  - cli_vba_readdir()
1349
+ */
1350
+void cli_free_vba_project(vba_project_t *vba_project)
1351
+{
1352
+    if (vba_project) {
1353
+        if (vba_project->dir)
1354
+            free(vba_project->dir);
1355
+        if (vba_project->colls)
1356
+            free(vba_project->colls);
1357
+        if (vba_project->name)
1358
+            free(vba_project->name);
1359
+        if (vba_project->offset)
1360
+            free(vba_project->offset);
1361
+        if (vba_project->length)
1362
+            free(vba_project->length);
1363
+        if (vba_project->key)
1364
+            free(vba_project->key);
1365
+        free(vba_project);
1366
+    }
1367
+
1368
+    return;
1369
+}
1341 1370
\ No newline at end of file
... ...
@@ -41,6 +41,8 @@ typedef struct vba_project_tag {
41 41
 
42 42
 vba_project_t	*cli_vba_readdir(const char *dir, struct uniq *U, uint32_t which);
43 43
 vba_project_t	*cli_wm_readdir(int fd);
44
+void 			cli_free_vba_project(vba_project_t *vba_project);
45
+
44 46
 unsigned char	*cli_vba_inflate(int fd, off_t offset, int *size);
45 47
 int	cli_scan_ole10(int fd, cli_ctx *ctx);
46 48
 char	*cli_ppt_vba_read(int fd, cli_ctx *ctx);
... ...
@@ -1108,8 +1108,10 @@ static int sigtool_scandir (const char *dirname, int hex_output)
1108 1108
 
1109 1109
 int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
1110 1110
 {
1111
-    int ret = CL_CLEAN, i, fd, data_len;
1112
-    vba_project_t *vba_project;
1111
+    cl_error_t status = CL_CLEAN;
1112
+    cl_error_t ret;
1113
+    int i, fd, data_len;
1114
+    vba_project_t *vba_project = NULL;
1113 1115
     DIR *dd;
1114 1116
     struct dirent *dent;
1115 1117
     STATBUF statbuf;
... ...
@@ -1118,14 +1120,22 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
1118 1118
     uint32_t hashcnt;
1119 1119
     unsigned int j;
1120 1120
 
1121
-    hashcnt = uniq_get(U, "_vba_project", 12, NULL);
1122
-    while(hashcnt--) {
1123
-	if(!(vba_project = (vba_project_t *)cli_vba_readdir(dirname, U, hashcnt))) continue;
1121
+    if (CL_SUCCESS != (ret = uniq_get(U, "_vba_project", 12, NULL, &hashcnt))) {
1122
+        logg("!ScanDir -> uniq_get('_vba_project') failed.\n");
1123
+        return ret;
1124
+    }
1125
+
1126
+    while (hashcnt) {
1127
+        if (!(vba_project = (vba_project_t *)cli_vba_readdir(dirname, U, hashcnt))) {
1128
+            hashcnt--;
1129
+            continue;
1130
+        }
1124 1131
 
1125 1132
 	for(i = 0; i < vba_project->count; i++) {
1126 1133
 	    for(j = 0; j < vba_project->colls[i]; j++) {
1127 1134
 		snprintf(vbaname, 1024, "%s"PATHSEP"%s_%u", vba_project->dir, vba_project->name[i], j);
1128 1135
 		vbaname[sizeof(vbaname)-1] = '\0';
1136
+
1129 1137
 		fd = open(vbaname, O_RDONLY|O_BINARY);
1130 1138
 		if(fd == -1) continue;
1131 1139
 		data = (unsigned char *)cli_vba_inflate(fd, vba_project->offset[i], &data_len);
... ...
@@ -1140,39 +1150,53 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
1140 1140
 	    }
1141 1141
 	}
1142 1142
 
1143
-	free(vba_project->name);
1144
-	free(vba_project->colls);
1145
-	free(vba_project->dir);
1146
-	free(vba_project->offset);
1147
-	free(vba_project);
1143
+        cli_free_vba_project(vba_project);
1144
+        vba_project = NULL;
1145
+
1146
+        hashcnt--;
1148 1147
     }
1149 1148
 
1149
+    if (CL_SUCCESS != (ret = uniq_get(U, "powerpoint document", 19, &hash, &hashcnt))) {
1150
+        logg("!ScanDir -> uniq_get('powerpoint document') failed.\n");
1151
+        return ret;
1152
+    }
1150 1153
 
1151
-    if((hashcnt = uniq_get(U, "powerpoint document", 19, &hash))) {
1152
-	while(hashcnt--) {
1154
+    while (hashcnt) {
1153 1155
 	    snprintf(vbaname, 1024, "%s"PATHSEP"%s_%u", dirname, hash, hashcnt);
1154 1156
 	    vbaname[sizeof(vbaname)-1] = '\0';
1157
+
1155 1158
 	    fd = open(vbaname, O_RDONLY|O_BINARY);
1156
-	    if (fd == -1) continue;
1159
+        if (fd == -1) {
1160
+            hashcnt--;
1161
+            continue;
1162
+        }
1157 1163
 	    if ((fullname = cli_ppt_vba_read(fd, NULL))) {
1158 1164
 	      sigtool_scandir(fullname, hex_output);
1159 1165
 	      cli_rmdirs(fullname);
1160 1166
 	      free(fullname);
1161 1167
 	    }
1162 1168
 	    close(fd);
1169
+        hashcnt--;
1163 1170
 	}
1164
-    }
1165 1171
 
1172
+    if (CL_SUCCESS != (ret = uniq_get(U, "worddocument", 12, &hash, &hashcnt))) {
1173
+        logg("!ScanDir -> uniq_get('worddocument') failed.\n");
1174
+        return ret;
1175
+    }
1166 1176
 
1167
-    if ((hashcnt = uniq_get(U, "worddocument", 12, &hash))) {
1168
-	while(hashcnt--) {
1177
+    while (hashcnt) {
1169 1178
 	    snprintf(vbaname, sizeof(vbaname), "%s"PATHSEP"%s_%u", dirname, hash, hashcnt);
1170 1179
 	    vbaname[sizeof(vbaname)-1] = '\0';
1180
+
1171 1181
 	    fd = open(vbaname, O_RDONLY|O_BINARY);
1172
-	    if (fd == -1) continue;
1182
+        if (fd == -1) {
1183
+            hashcnt--;
1184
+            continue;
1185
+        }
1173 1186
 	    
1174 1187
 	    if (!(vba_project = (vba_project_t *)cli_wm_readdir(fd))) {
1175 1188
 		close(fd);
1189
+            hashcnt--;
1176 1190
 		continue;
1177 1191
 	    }
1178 1192
 
... ...
@@ -1188,14 +1212,9 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
1188 1188
 	    }
1189 1189
 
1190 1190
 	    close(fd);
1191
-	    free(vba_project->name);
1192
-	    free(vba_project->colls);
1193
-	    free(vba_project->dir);
1194
-	    free(vba_project->offset);
1195
-	    free(vba_project->key);
1196
-	    free(vba_project->length);
1197
-	    free(vba_project);
1198
-	}
1191
+        cli_free_vba_project(vba_project);
1192
+        vba_project = NULL;
1193
+        hashcnt--;
1199 1194
     }
1200 1195
 
1201 1196
     if ((dd = opendir (dirname)) != NULL) {
... ...
@@ -1222,5 +1241,5 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
1222 1222
 
1223 1223
 
1224 1224
     closedir (dd);
1225
-    return ret;
1225
+    return status;
1226 1226
 }
... ...
@@ -28,6 +28,7 @@
28 28
 #include <stdlib.h>
29 29
 #include <string.h>
30 30
 
31
+#include "../libclamav/clamav.h"
31 32
 #include "../libclamav/uniq.h"
32 33
 #include "checks.h"
33 34
 
... ...
@@ -59,12 +60,16 @@ START_TEST (test_uniq_known) {
59 59
   fail_unless(U!=0, "uniq_init");
60 60
 
61 61
   for(i=0; tests[i].expected; i++) {
62
-    u = uniq_add(U, tests[i].key, tests[i].key_len, &hash);
63
-    fail_unless_fmt(u==0 && strcmp(hash, tests[i].expected)==0, "uniq_add(%s) = %u - expected %s, got %s", tests[i].key, u, tests[i].expected, hash);
62
+        if (CL_SUCCESS != uniq_add(U, tests[i].key, tests[i].key_len, &hash, &u)) {
63
+            fail("uniq_add(%s) failed.", tests[i].key);
64
+        }
65
+        fail_unless_fmt(u == 1 && strcmp(hash, tests[i].expected) == 0, "uniq_add(%s) = %u - expected %s, got %s", tests[i].key, u, tests[i].expected, hash);
64 66
   }
65 67
 
66 68
   for(i=0; tests[i].expected; i++) {
67
-    u = uniq_get(U, tests[i].key, tests[i].key_len, &hash);
69
+        if (CL_SUCCESS != uniq_get(U, tests[i].key, tests[i].key_len, &hash, &u)) {
70
+            fail("uniq_get(%s) failed.", tests[i].key);
71
+        }
68 72
     fail_unless_fmt(u==1 && strcmp(hash, tests[i].expected)==0, "uniq_get(%s) = %u - expected %s, got %s", tests[i].key, u, tests[i].expected, hash);
69 73
   }
70 74
 
... ...
@@ -82,11 +87,16 @@ START_TEST (test_uniq_colls) {
82 82
   fail_unless(U!=0, "uniq_init");
83 83
 
84 84
   for(j=4; j>0; j--)
85
-    for (i=0; i<j; i++)
86
-      u = uniq_add(U, tests[i], strlen(tests[i]), NULL);
85
+        for (i = 0; i < j; i++) {
86
+            if (CL_SUCCESS != uniq_add(U, tests[i], strlen(tests[i]), NULL, &u)) {
87
+                fail("uniq_add(%s) failed.", tests[i]);
88
+            }
89
+        }
87 90
   
88 91
   for (i=0; i<4; i++) {
89
-    u = uniq_add(U, tests[i], strlen(tests[i]), NULL);
92
+        if (CL_SUCCESS != uniq_get(U, tests[i], strlen(tests[i]), NULL, &u)) {
93
+            fail("uniq_get(%s) failed.", tests[i]);
94
+        }
90 95
     fail_unless_fmt(u+i==4, "uniq_get(%s) = %u - expected %u", tests[i], u, 4-i);
91 96
   }
92 97
 
... ...
@@ -179,6 +179,7 @@ EXPORTS cli_hwp5header @44381 NONAME
179 179
 EXPORTS cli_scanhwp5_stream @44382 NONAME
180 180
 EXPORTS cli_scanhwp3 @44383 NONAME
181 181
 EXPORTS cli_genhash_pe @44384 NONAME
182
+EXPORTS cli_free_vba_project @44385 NONAME
182 183
 
183 184
 ; compatibility layer, tommath, zlib
184 185
 EXPORTS w32_srand @44269 NONAME