Browse code

JPEG format validator improvements

Adds debug output to the JPEG format validator to help resolve issues
with unusually formatted JPEGs and to validate that the JPEG parser is
working correctly.

Relaxes the rules around duplicate application markers or application
markers that appear later than expected, due to prior XMP metadata, etc.

Removed the requirement for an application marker to exist, as some
older JPEGs don't appear to use JFIF, Exif, or SPIFF application
extensions.

I tested against a relatively large data set of JPEGs from Mac & Windows
stock photos, personal photos, and assorted downloaded photos. FP rates
when alerting on broken media should be very low.

Micah Snyder authored on 2020/12/02 09:29:02
Showing 3 changed files
... ...
@@ -36,7 +36,7 @@ print "  NULL\n};\n"
36 36
 */
37 37
 
38 38
 static const char *ftypes_int[] = {
39
-    "0:0:0000000c6a5020200d0a870a:JPEG2000:CL_TYPE_ANY:CL_TYPE_JPEG",
39
+    "0:0:0000000c6a5020200d0a870a:JPEG2000:CL_TYPE_ANY:CL_TYPE_GRAPHICS",
40 40
     "0:0:000001b3:MPEG video stream:CL_TYPE_ANY:CL_TYPE_IGNORED",
41 41
     "0:0:000001ba:MPEG sys stream:CL_TYPE_ANY:CL_TYPE_IGNORED",
42 42
     "0:0:1f8b:GZip:CL_TYPE_ANY:CL_TYPE_GZ",
... ...
@@ -32,24 +32,245 @@
32 32
 #ifdef HAVE_UNISTD_H
33 33
 #include <unistd.h>
34 34
 #endif
35
+#include <stdbool.h>
35 36
 #include <time.h>
36 37
 
37 38
 #include "jpeg.h"
38 39
 #include "clamav.h"
39 40
 
40
-cl_error_t cli_parsejpeg(cli_ctx *ctx)
41
+// clang-format off
42
+/*
43
+ * JPEG format highlights
44
+ * ----------------------
45
+ *
46
+ * Links:
47
+ * - https://en.wikipedia.org/wiki/JPEG#Syntax_and_structure
48
+ * - https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format
49
+ * - https://en.wikipedia.org/wiki/Exif
50
+ *
51
+ * A JPEG image is a sequence of segments.
52
+ *
53
+ * Each segment starts with a two-byte marker. The first byte is 0xff and is
54
+ * followed by one of the following to identify the segment.
55
+
56
+ * Some segments are simply the 2-byte marker, while others have a payload.
57
+ * Realistically it appears that just the start-of-image and end-of-image lack
58
+ * the 2-byte size field, the rest have it, even the 4-byte DRI segment.
59
+ *
60
+ * All variable-byte payloads have 2-bytes indicating the size which includes
61
+ * the 2-bytes (but not the marker itself).
62
+ *
63
+ * Within entropy-encoded (compressed) data, any 0xff will have an 0x00
64
+ * inserted after it to indicate that it's just and 0xff and _NOT_ a segment
65
+ * marker. Decoders skip the 0x00 byte.
66
+ * This only applies to entropy-encoded data, not to marker payload data.
67
+ * We dont' really worry about this though because this parser stops when it
68
+ * reaches the image data.
69
+ */
70
+
71
+/*
72
+ * JPEG Segment & Entropy Markers.
73
+ */
74
+typedef enum {
75
+    /* Start of Image
76
+     * No payload
77
+     */
78
+    JPEG_MARKER_SEGMENT_SOI_START_OF_IMAGE = 0xD8,
79
+
80
+    /* Start of Frame for a Baseline DCT-based JPEG (S0F0)
81
+     * Variable size payload.
82
+     * Baseline DCT-based JPEG, and specifies the width, height, number of
83
+     * components, and component subsampling
84
+     */
85
+    JPEG_MARKER_SEGMENT_S0F0_START_OF_FRAME_BASELINE_DCT = 0xC0,
86
+
87
+    /* Start of Frame for a extended sequential DCT-based JPEG (S0F1)
88
+     * Variable size payload.
89
+     */
90
+    JPEG_MARKER_SEGMENT_S0F1_START_OF_FRAME_EXT_SEQ_DCT = 0xC1,
91
+
92
+    /* Start of Frame for a progressive DCT-based JPEG (S0F2)
93
+     * Variable size payload.
94
+     * Progressive DCT-based JPEG, and specifies the width, height, number of
95
+     * components, and component subsampling
96
+     */
97
+    JPEG_MARKER_SEGMENT_S0F2_START_OF_FRAME_PROG_DCT = 0xC2,
98
+
99
+    /* Start of Frame for a lossless sequential DCT-based JPEG (S0F3)
100
+     * Variable size payload.
101
+     */
102
+    JPEG_MARKER_SEGMENT_S0F3_START_OF_FRAME_DIFF_SEQ_DCT = 0xC3,
103
+
104
+    /* Start of Frame for a differential sequential DCT-based JPEG (S0F5)
105
+     * Variable size payload.
106
+     */
107
+    JPEG_MARKER_SEGMENT_S0F5_START_OF_FRAME_DIFF_SEQ_DCT = 0xC5,
108
+
109
+    /* Start of Frame for a differential progressive DCT-based JPEG (S0F6)
110
+     * Variable size payload.
111
+     */
112
+    JPEG_MARKER_SEGMENT_S0F6_START_OF_FRAME_DIFF_PROG_DCT = 0xC6,
113
+
114
+    /* Start of Frame for a differential lossless DCT-based JPEG (S0F7)
115
+     * Variable size payload.
116
+     */
117
+    JPEG_MARKER_SEGMENT_S0F7_START_OF_FRAME_DIFF_LOSSLESS_DCT = 0xC7,
118
+
119
+    /* Start of Frame for a differential sequential arithmatic-based JPEG (S0F5)
120
+     * Variable size payload.
121
+     */
122
+    JPEG_MARKER_SEGMENT_S0F9_START_OF_FRAME_DIFF_SEQ_ARITH = 0xC9,
123
+
124
+    /* Start of Frame for a differential progressive arithmatic-based JPEG (S0F6)
125
+     * Variable size payload.
126
+     */
127
+    JPEG_MARKER_SEGMENT_S0F10_START_OF_FRAME_DIFF_PROG_ARITH = 0xCA,
128
+
129
+    /* Start of Frame for a differential lossless arithmatic-based JPEG (S0F7)
130
+     * Variable size payload.
131
+     */
132
+    JPEG_MARKER_SEGMENT_S0F11_START_OF_FRAME_DIFF_LOSSLESS_ARITH = 0xCB,
133
+
134
+    /* Define Huffman Tables (DHT)
135
+     * Variable size payload.
136
+     * Defines one or more Huffman tables.
137
+     */
138
+    JPEG_MARKER_SEGMENT_DHT_DEFINE_HUFFMAN_TABLES = 0xC4,
139
+
140
+    /* Define Arithmatic Coding Conditioning (DAC)
141
+     * Variable size payload.
142
+     */
143
+    JPEG_MARKER_SEGMENT_DHT_DEFINE_ARITH_CODING = 0xCC,
144
+
145
+    /* Define Quantization Tables (DTQ)
146
+     * Variable size payload.
147
+     * Defines one or more quantization tables.
148
+     */
149
+    JPEG_MARKER_SEGMENT_DQT_DEFINE_QUANTIZATION_TABLES = 0xDB,
150
+
151
+    /* Define Restart Interval (DRI)
152
+     * 4-byte payload.
153
+     * Specifies the interval between RSTn markers, in Minimum Coded Units (MCUs).
154
+     * This marker is followed by two bytes indicating the fixed size so it can be
155
+     * treated like any other variable size segment.
156
+     */
157
+    JPEG_MARKER_SEGMENT_DRI_DEFINE_RESTART_INTERVAL = 0xDD,
158
+
159
+    /* Start of Scan (SOS)
160
+     * Variable size payload
161
+     * This is the start of the JPEG image data, so we'll actually stop parsing
162
+     * when we reach this.
163
+     */
164
+    JPEG_MARKER_SEGMENT_SOS_START_OF_SCAN = 0xDA,
165
+
166
+    /*
167
+     * App-specific markers E0 - EF
168
+     * Variable size payload.
169
+     * Since several vendors might use the *same* APPn marker type, application-
170
+     * specific markers often begin with a standard or vendor name (e.g., "Exif" or
171
+     * "Adobe") or some other identifying string.
172
+     *
173
+     * Some known app specific markers include:
174
+     *   0xE0:
175
+     *     - JFIF
176
+     *   0xE1:
177
+     *     - Exif
178
+     *     - XMP data, starts with http://ns.adobe.com/xap/1.0/\0
179
+     *   0xE2:
180
+     *     - ICC Profile Chunk. There could be multiple of these to fit the entire profile, see http://www.color.org/icc_specs2.xalter and http://www.color.org/specification/ICC1v43_2010-12.pdf Section B.4
181
+     *   0xE8:
182
+     *     - SPIFF. Not a common format, see http://fileformats.archiveteam.org/wiki/SPIFF
183
+     *   0xED:
184
+     *     - IPTC / IMM metadata (a type of comment)
185
+     *   0xEE:
186
+     *     - AdobeRGB (as opposed to sRGB)
187
+     */
188
+    JPEG_MARKER_SEGMENT_APP0 = 0xE0,
189
+    JPEG_MARKER_SEGMENT_APP1 = 0xE1,
190
+    JPEG_MARKER_SEGMENT_APP2 = 0xE2,
191
+    JPEG_MARKER_SEGMENT_APP3 = 0xE3,
192
+    JPEG_MARKER_SEGMENT_APP4 = 0xE4,
193
+    JPEG_MARKER_SEGMENT_APP5 = 0xE5,
194
+    JPEG_MARKER_SEGMENT_APP6 = 0xE6,
195
+    JPEG_MARKER_SEGMENT_APP7 = 0xE7,
196
+    JPEG_MARKER_SEGMENT_APP8 = 0xE8,
197
+    JPEG_MARKER_SEGMENT_APP9 = 0xE9,
198
+    JPEG_MARKER_SEGMENT_APP10 = 0xEA,
199
+    JPEG_MARKER_SEGMENT_APP11 = 0xEB,
200
+    JPEG_MARKER_SEGMENT_APP12 = 0xEC,
201
+    JPEG_MARKER_SEGMENT_APP13 = 0xED,
202
+    JPEG_MARKER_SEGMENT_APP14 = 0xEE,
203
+    JPEG_MARKER_SEGMENT_APP15 = 0xEF,
204
+
205
+    /* DTI (?)
206
+     *
207
+     */
208
+    JPEG_MARKER_SEGMENT_DTI = 0xF1,
209
+
210
+    /* DTT (?)
211
+     *
212
+     */
213
+    JPEG_MARKER_SEGMENT_DTT = 0xF2,
214
+
215
+    /* JPG7
216
+     * Variable size payload (?)
217
+     */
218
+    JPEG_MARKER_SEGMENT_JPG7 = 0xF7,
219
+
220
+    /* Comment (COM)
221
+     * Variable size payload.
222
+     */
223
+    JPEG_MARKER_SEGMENT_COM_COMMENT = 0xFE,
224
+
225
+    /* End of Image
226
+     * No payload
227
+     */
228
+    JPEG_MARKER_SEGMENT_EOI_END_OF_IMAGE = 0xD9,
229
+
230
+    /* Entropy-encoded (aka compressed) data markers.
231
+     *
232
+     * These aren't referenced since we don't parse the image data.
233
+     */
234
+    JPEG_MARKER_NOT_A_MARKER_0x00 = 0x00,
235
+    JPEG_MARKER_NOT_A_MARKER_0xFF = 0xFF,
236
+
237
+    /* Reset entropy-markers are inserted every r macroblocks, where r is the restart interval set by a DRI marker.
238
+     * Not used if there was no DRI segment-marker.
239
+     * The low three bits of the marker code cycle in value from 0 to 7 (i.e. D0 - D7).
240
+     */
241
+    JPEG_MARKER_ENTROPY_RST0_RESET = 0xD0,
242
+    JPEG_MARKER_ENTROPY_RST1_RESET = 0xD1,
243
+    JPEG_MARKER_ENTROPY_RST2_RESET = 0xD2,
244
+    JPEG_MARKER_ENTROPY_RST3_RESET = 0xD3,
245
+    JPEG_MARKER_ENTROPY_RST4_RESET = 0xD4,
246
+    JPEG_MARKER_ENTROPY_RST5_RESET = 0xD5,
247
+    JPEG_MARKER_ENTROPY_RST6_RESET = 0xD6,
248
+    JPEG_MARKER_ENTROPY_RST7_RESET = 0xD7,
249
+} jpeg_marker_t;
250
+
251
+// clang-format on
252
+
253
+cl_error_t
254
+cli_parsejpeg(cli_ctx *ctx)
41 255
 {
42 256
     cl_error_t status = CL_CLEAN;
43 257
 
44 258
     fmap_t *map = NULL;
45
-    unsigned char marker, prev_marker, prev_segment = 0, buff[8];
259
+    jpeg_marker_t marker, prev_marker, prev_segment = JPEG_MARKER_NOT_A_MARKER_0x00;
260
+    uint8_t buff[strlen("ICC_PROFILE") + 2]; /* Using length "ICC_PROFILE" + 2 because it's the longest we'll read. */
46 261
     uint16_t len_u16;
47
-    unsigned int offset = 0, i, len, comment = 0, segment = 0, app = 0;
262
+    unsigned int offset = 0, i, len, segment = 0;
263
+    bool found_comment = false;
264
+    bool found_app     = false;
265
+
266
+    uint32_t num_JFIF  = 0;
267
+    uint32_t num_Exif  = 0;
268
+    uint32_t num_SPIFF = 0;
48 269
 
49 270
     cli_dbgmsg("in cli_parsejpeg()\n");
50 271
 
51 272
     if (NULL == ctx) {
52
-        cli_dbgmsg("JPEG: passed context was NULL\n");
273
+        cli_dbgmsg("passed context was NULL\n");
53 274
         status = CL_EARG;
54 275
         goto done;
55 276
     }
... ...
@@ -67,17 +288,19 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
67 67
 
68 68
     while (1) {
69 69
         segment++;
70
-        prev_marker = 0;
70
+        prev_marker = JPEG_MARKER_NOT_A_MARKER_0x00;
71 71
         for (i = 0; offset < map->len && i < 16; i++) {
72
-            if (fmap_readn(map, &marker, offset, sizeof(marker)) == sizeof(marker)) {
73
-                offset += sizeof(marker);
72
+            uint8_t marker_u8;
73
+            if (fmap_readn(map, &marker_u8, offset, sizeof(marker_u8)) == sizeof(marker_u8)) {
74
+                offset += sizeof(marker_u8);
74 75
             } else {
75 76
                 cli_errmsg("JPEG: Failed to read marker, file corrupted?\n");
76 77
                 cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.CantReadMarker");
77 78
                 goto done;
78 79
             }
80
+            marker = (jpeg_marker_t)marker_u8;
79 81
 
80
-            if (prev_marker == 0xff && marker != 0xff)
82
+            if (prev_marker == JPEG_MARKER_NOT_A_MARKER_0xFF && marker != JPEG_MARKER_NOT_A_MARKER_0xFF)
81 83
                 break;
82 84
             prev_marker = marker;
83 85
         }
... ...
@@ -94,7 +317,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
94 94
             goto done;
95 95
         }
96 96
         len = (unsigned int)be16_to_host(len_u16);
97
-        cli_dbgmsg("JPEG: Marker %02x, length %u\n", marker, len);
97
+        cli_dbgmsg("segment[%d] = 0x%02x, Length %u\n", segment, marker, len);
98 98
 
99 99
         if (len < 2) {
100 100
             cli_warnmsg("JPEG: Invalid segment size\n");
... ...
@@ -111,85 +334,116 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
111 111
         offset += len;
112 112
 
113 113
         switch (marker) {
114
-            case 0xe0: /* JFIF */
115
-                if (app) {
116
-                    cli_warnmsg("JPEG: Duplicate Application Marker\n");
117
-                    cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFdupAppMarker");
118
-                    status = CL_EPARSE;
119
-                    goto done;
120
-                }
121
-                if (segment != 1 && (segment != 2 || !comment)) {
122
-                    cli_warnmsg("JPEG: JFIF marker at wrong position\n");
123
-                    cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFmarkerBadPosition");
124
-                    status = CL_EPARSE;
125
-                    goto done;
126
-                }
127
-                if (fmap_readn(map, buff, offset - len + sizeof(len_u16), 5) != 5 || memcmp(buff, "JFIF\0", 5)) {
128
-                    cli_warnmsg("JPEG: No JFIF marker\n");
129
-                    cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.NoJFIFmarker");
130
-                    status = CL_EPARSE;
131
-                    goto done;
132
-                }
133
-                if (len < 16) {
134
-                    cli_warnmsg("JPEG: JFIF header too short\n");
135
-                    cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFheaderTooShort");
136
-                    status = CL_EPARSE;
137
-                    goto done;
114
+            case JPEG_MARKER_SEGMENT_APP0:
115
+                /*
116
+                 * JFIF, maybe
117
+                 */
118
+                if (fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("JFIF") + 1) == strlen("JFIF") + 1 && 0 == memcmp(buff, "JFIF\0", strlen("JFIF") + 1)) {
119
+                    /* Found a JFIF marker */
120
+                    if (found_app && num_JFIF > 0) {
121
+                        cli_warnmsg("JPEG: Duplicate Application Marker found (JFIF)\n");
122
+                        cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
123
+                        cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFdupAppMarker");
124
+                        status = CL_EPARSE;
125
+                        goto done;
126
+                    }
127
+                    if (!(segment == 1 ||
128
+                          (segment == 2 && found_comment) ||
129
+                          (segment == 2 && num_Exif > 0) ||
130
+                          (segment == 3 && found_comment && num_Exif > 0))) {
131
+                        /* The JFIF segment is technically required to appear first, though it has been observed
132
+                        * appearing in segment 2 in functional images when segment 1 is a comment or an Exif segment.
133
+                        * If segment 1 wasn't a comment or Exif, then the file structure is unusual. */
134
+                        cli_warnmsg("JPEG: JFIF marker at wrong position, found in segment # %d\n", segment);
135
+                        cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
136
+                        cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFmarkerBadPosition");
137
+                        status = CL_EPARSE;
138
+                        goto done;
139
+                    }
140
+                    if (len < 16) {
141
+                        cli_warnmsg("JPEG: JFIF header too short\n");
142
+                        cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFheaderTooShort");
143
+                        status = CL_EPARSE;
144
+                        goto done;
145
+                    }
146
+                    cli_dbgmsg(" JFIF application marker\n");
147
+                    found_app = true;
148
+                    num_JFIF += 1;
149
+                } else {
150
+                    /* Found something else. Eg could be an Ocad Revision # (eg "Ocad$Rev: 14797 $"), for example.
151
+                       Whatever it is, we don't really care for now */
152
+                    cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker);
138 153
                 }
139
-                app = 0xe0;
140 154
                 break;
141 155
 
142
-            case 0xe1: /* EXIF */
143
-                if (fmap_readn(map, buff, offset - len + sizeof(len_u16), 7) != 7) {
144
-                    cli_warnmsg("JPEG: Can't read Exif header\n");
145
-                    cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.CantReadExifHeader");
146
-                    status = CL_EPARSE;
147
-                    goto done;
148
-                }
149
-                if (!memcmp(buff, "Exif\0\0", 6)) {
150
-                    if (app && app != 0xe0) {
151
-                        cli_warnmsg("JPEG: Duplicate Application Marker\n");
156
+            case JPEG_MARKER_SEGMENT_APP1:
157
+                /*
158
+                 * Exif, or maybe XMP data
159
+                 */
160
+                if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("Exif") + 2) == strlen("Exif") + 2) && (0 == memcmp(buff, "Exif\0\0", strlen("Exif") + 2))) {
161
+                    if (found_app && (num_Exif > 0 || num_SPIFF > 0)) {
162
+                        cli_warnmsg("JPEG: Duplicate Application Marker found (Exif)\n");
163
+                        cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
152 164
                         cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifDupAppMarker");
153 165
                         status = CL_EPARSE;
154 166
                         goto done;
155 167
                     }
156
-                    if (segment > 3 && !comment && app != 0xe0) {
168
+                    if (segment > 3 && !found_comment && num_JFIF > 0) {
169
+                        /* If Exif was found after segment 3 and previous segments weren't a comment or JFIF, something is unusual. */
157 170
                         cli_warnmsg("JPEG: Exif marker at wrong position\n");
158 171
                         cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderBadPosition");
159 172
                         status = CL_EPARSE;
160 173
                         goto done;
161 174
                     }
162
-                } else if (!memcmp(buff, "http://", 7)) {
163
-                    cli_dbgmsg("JPEG: XMP data in segment %u\n", segment);
175
+                    if (len < 16) {
176
+                        cli_warnmsg("JPEG: Exif header too short\n");
177
+                        cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderTooShort");
178
+                        status = CL_EPARSE;
179
+                        goto done;
180
+                    }
181
+                    cli_dbgmsg(" Exif application marker\n");
182
+                    found_app = true;
183
+                    num_Exif += 1;
184
+                } else if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("http://")) == strlen("http://")) && (0 == memcmp(buff, "http://", strlen("http://")))) {
185
+                    cli_dbgmsg(" XMP metadata\n");
186
+                    found_comment = true;
164 187
                 } else {
165
-                    cli_warnmsg("JPEG: Invalid Exif header\n");
166
-                    cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.InvalidExifHeader");
167
-                    status = CL_EPARSE;
168
-                    goto done;
188
+                    cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker);
169 189
                 }
170
-                if (len < 16) {
171
-                    cli_warnmsg("JPEG: Exif header too short\n");
172
-                    cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderTooShort");
173
-                    status = CL_EPARSE;
174
-                    goto done;
190
+                break;
191
+
192
+            case JPEG_MARKER_SEGMENT_APP2:
193
+                /*
194
+                 * ICC Profile
195
+                 */
196
+                if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("ICC_PROFILE") + 2) == strlen("ICC_PROFILE") + 2) &&
197
+                    (0 == memcmp(buff, "ICC_PROFILE\0", strlen("ICC_PROFILE" + 1)))) {
198
+                    /* Found ICC Profile Chunk. Let's print out the chunk #, which follows "ICC_PROFILE\0"... */
199
+                    uint8_t chunk_no = buff[strlen("ICC_PROFILE") + 1];
200
+                    cli_dbgmsg(" ICC Profile, chunk # %d\n", chunk_no);
201
+                } else {
202
+                    cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker);
175 203
                 }
176
-                app = 0xe1;
177 204
                 break;
178 205
 
179
-            case 0xe8: /* SPIFF */
180
-                if (app) {
181
-                    cli_warnmsg("JPEG: Duplicate Application Marker\n");
206
+            case JPEG_MARKER_SEGMENT_APP8:
207
+                /*
208
+                 * SPIFF
209
+                 */
210
+                if (found_app) {
211
+                    cli_warnmsg("JPEG: Duplicate Application Marker found (SPIFF)\n");
212
+                    cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
182 213
                     cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFdupAppMarker");
183 214
                     status = CL_EPARSE;
184 215
                     goto done;
185 216
                 }
186
-                if (segment != 1 && (segment != 2 || !comment)) {
217
+                if (segment != 1 && (segment != 2 || !found_comment)) {
187 218
                     cli_warnmsg("JPEG: SPIFF marker at wrong position\n");
188 219
                     cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFmarkerBadPosition");
189 220
                     status = CL_EPARSE;
190 221
                     goto done;
191 222
                 }
192
-                if (fmap_readn(map, buff, offset - len + sizeof(len_u16), 6) != 6 || memcmp(buff, "SPIFF\0", 6)) {
223
+                if (fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("SPIFF") + 1) != strlen("SPIFF") + 1 || memcmp(buff, "SPIFF\0", strlen("SPIFF") + 1)) {
193 224
                     cli_warnmsg("JPEG: No SPIFF marker\n");
194 225
                     cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.NoSPIFFmarker");
195 226
                     status = CL_EPARSE;
... ...
@@ -201,11 +455,75 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
201 201
                     status = CL_EPARSE;
202 202
                     goto done;
203 203
                 }
204
-                app = 0xe8;
204
+                cli_dbgmsg(" SPIFF application marker\n");
205
+                found_app = true;
206
+                num_SPIFF += 1;
207
+                break;
208
+
209
+            case JPEG_MARKER_SEGMENT_APP13:
210
+                /*
211
+                 * IPTC / IMM metadata (comment), probably
212
+                 */
213
+                cli_dbgmsg(" IPTC IMM metadata segment marker\n");
214
+                found_comment = true;
215
+                break;
216
+
217
+            case JPEG_MARKER_SEGMENT_APP14:
218
+                /*
219
+                 * Adobe RGB, probably
220
+                 */
221
+                if (fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("Adobe") + 1) != strlen("Adobe") + 1 || 0 != memcmp(buff, "Adobe\0", strlen("Adobe") + 1)) {
222
+                    /* Not Adobe, dunno what this is. */
223
+                    cli_dbgmsg(" Unfamiliar application marker: 0x%02x\n", marker);
224
+                    break;
225
+                }
226
+                cli_dbgmsg(" AdobeRGB application marker\n");
227
+                break;
228
+
229
+            case JPEG_MARKER_SEGMENT_APP3:
230
+            case JPEG_MARKER_SEGMENT_APP4:
231
+            case JPEG_MARKER_SEGMENT_APP5:
232
+            case JPEG_MARKER_SEGMENT_APP6:
233
+            case JPEG_MARKER_SEGMENT_APP7:
234
+            case JPEG_MARKER_SEGMENT_APP9:
235
+            case JPEG_MARKER_SEGMENT_APP10:
236
+            case JPEG_MARKER_SEGMENT_APP11:
237
+            case JPEG_MARKER_SEGMENT_APP12:
238
+            case JPEG_MARKER_SEGMENT_APP15:
239
+                /*
240
+                 * Unknown
241
+                 */
242
+                cli_dbgmsg(" Unfamiliar application marker: 0x%02x\n", marker);
205 243
                 break;
206 244
 
207
-            case 0xf7: /* JPG7 */
208
-                if (app) {
245
+            case JPEG_MARKER_SEGMENT_S0F0_START_OF_FRAME_BASELINE_DCT:
246
+            case JPEG_MARKER_SEGMENT_S0F1_START_OF_FRAME_EXT_SEQ_DCT:
247
+            case JPEG_MARKER_SEGMENT_S0F2_START_OF_FRAME_PROG_DCT:
248
+            case JPEG_MARKER_SEGMENT_S0F3_START_OF_FRAME_DIFF_SEQ_DCT:
249
+            case JPEG_MARKER_SEGMENT_S0F5_START_OF_FRAME_DIFF_SEQ_DCT:
250
+            case JPEG_MARKER_SEGMENT_S0F6_START_OF_FRAME_DIFF_PROG_DCT:
251
+            case JPEG_MARKER_SEGMENT_S0F7_START_OF_FRAME_DIFF_LOSSLESS_DCT:
252
+            case JPEG_MARKER_SEGMENT_S0F9_START_OF_FRAME_DIFF_SEQ_ARITH:
253
+            case JPEG_MARKER_SEGMENT_S0F10_START_OF_FRAME_DIFF_PROG_ARITH:
254
+            case JPEG_MARKER_SEGMENT_S0F11_START_OF_FRAME_DIFF_LOSSLESS_ARITH:
255
+                cli_dbgmsg(" Start of Frame (S0F) %02x\n", (uint8_t)marker);
256
+                break;
257
+
258
+            case JPEG_MARKER_SEGMENT_DHT_DEFINE_HUFFMAN_TABLES:
259
+                cli_dbgmsg(" Huffman Tables definitions (DHT)\n");
260
+                break;
261
+
262
+            case JPEG_MARKER_SEGMENT_DQT_DEFINE_QUANTIZATION_TABLES:
263
+                cli_dbgmsg(" Quantization Tables definitions (DQT)\n");
264
+                break;
265
+
266
+            case JPEG_MARKER_SEGMENT_DRI_DEFINE_RESTART_INTERVAL:
267
+                cli_dbgmsg(" Restart Interval definition (DRI)\n");
268
+                break;
269
+
270
+            case JPEG_MARKER_SEGMENT_JPG7: /* JPG7 */
271
+                cli_dbgmsg(" JPG7 segment marker\n");
272
+                if (found_app) {
209 273
                     cli_warnmsg("JPEG: Application Marker before JPG7\n");
210 274
                     cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.AppMarkerBeforeJPG7");
211 275
                     status = CL_EPARSE;
... ...
@@ -213,31 +531,38 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
213 213
                 }
214 214
                 goto done;
215 215
 
216
-            case 0xda: /* SOS */
217
-                if (!app) {
218
-                    cli_warnmsg("JPEG: Invalid file structure\n");
219
-                    cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.InvalidStructure");
220
-                    status = CL_EPARSE;
221
-                    goto done;
216
+            case JPEG_MARKER_SEGMENT_SOS_START_OF_SCAN: /* SOS */
217
+                cli_dbgmsg(" Start of Scan (SOS) segment marker\n");
218
+                if (!found_app) {
219
+                    cli_dbgmsg(" Found the Start-of-Scan segment without identifying the JPEG application type.\n");
222 220
                 }
221
+                /* What follows would be scan data (compressed image data),
222
+                 * parsing is not presently required for validation purposes
223
+                 * so we'll just call it quits. */
223 224
                 goto done;
224 225
 
225
-            case 0xd9: /* EOI */
226
+            case JPEG_MARKER_SEGMENT_EOI_END_OF_IMAGE: /* EOI (End of Image) */
227
+                cli_dbgmsg(" End of Image (EOI) segment marker\n");
228
+                /*
229
+                 * We shouldn't reach this marker because we exit out when we hit the Start of Scan marker.
230
+                 */
226 231
                 cli_warnmsg("JPEG: No image in jpeg\n");
227 232
                 cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.NoImages");
228 233
                 status = CL_EPARSE;
229 234
                 goto done;
230 235
 
231
-            case 0xfe: /* COM */
232
-                comment = 1;
236
+            case JPEG_MARKER_SEGMENT_COM_COMMENT: /* COM (comment) */
237
+                cli_dbgmsg(" Comment (COM) segment marker\n");
238
+                found_comment = true;
233 239
                 break;
234 240
 
235
-            case 0xed: /* IPTC */
236
-                comment = 1;
241
+            case JPEG_MARKER_SEGMENT_DTI: /* DTI */
242
+                cli_dbgmsg(" DTI segment marker\n");
237 243
                 break;
238 244
 
239
-            case 0xf2: /* DTT */
240
-                if (prev_segment != 0xf1) {
245
+            case JPEG_MARKER_SEGMENT_DTT: /* DTT */
246
+                cli_dbgmsg(" DTT segment marker\n");
247
+                if (prev_segment != JPEG_MARKER_SEGMENT_DTI) {
241 248
                     cli_warnmsg("JPEG: No DTI segment before DTT\n");
242 249
                     cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.DTTMissingDTISegment");
243 250
                     status = CL_EPARSE;
... ...
@@ -246,8 +571,10 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx)
246 246
                 break;
247 247
 
248 248
             default:
249
+                /* Some unknown marker we don't presently handle, don't worry about it. */
249 250
                 break;
250 251
         }
252
+
251 253
         prev_segment = marker;
252 254
     }
253 255
 
... ...
@@ -4171,7 +4171,10 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
4171 4171
 
4172 4172
         case CL_TYPE_GRAPHICS:
4173 4173
             /*
4174
-             * This case is for unhandled graphics types such as BMP.
4174
+             * This case is for unhandled graphics types such as BMP, JPEG 2000, etc.
4175
+             *
4176
+             * Note: JPEG 2000 is a very different format from JPEG, JPEG/JFIF, JPEG/Exif, JPEG/SPIFF (1994, 1997)
4177
+             * JPEG 2000 is not handled by cli_scanjpeg or cli_parsejpeg.
4175 4178
              */
4176 4179
             break;
4177 4180