Adds debug output to the JPEG format validator to help resolve issues
with unusually formatted JPEGs and to validate that the JPEG parser is
working correctly.
Relaxes the rules around duplicate application markers or application
markers that appear later than expected, due to prior XMP metadata, etc.
Removed the requirement for an application marker to exist, as some
older JPEGs don't appear to use JFIF, Exif, or SPIFF application
extensions.
I tested against a relatively large data set of JPEGs from Mac & Windows
stock photos, personal photos, and assorted downloaded photos. FP rates
when alerting on broken media should be very low.
... | ... |
@@ -36,7 +36,7 @@ print " NULL\n};\n" |
36 | 36 |
*/ |
37 | 37 |
|
38 | 38 |
static const char *ftypes_int[] = { |
39 |
- "0:0:0000000c6a5020200d0a870a:JPEG2000:CL_TYPE_ANY:CL_TYPE_JPEG", |
|
39 |
+ "0:0:0000000c6a5020200d0a870a:JPEG2000:CL_TYPE_ANY:CL_TYPE_GRAPHICS", |
|
40 | 40 |
"0:0:000001b3:MPEG video stream:CL_TYPE_ANY:CL_TYPE_IGNORED", |
41 | 41 |
"0:0:000001ba:MPEG sys stream:CL_TYPE_ANY:CL_TYPE_IGNORED", |
42 | 42 |
"0:0:1f8b:GZip:CL_TYPE_ANY:CL_TYPE_GZ", |
... | ... |
@@ -32,24 +32,245 @@ |
32 | 32 |
#ifdef HAVE_UNISTD_H |
33 | 33 |
#include <unistd.h> |
34 | 34 |
#endif |
35 |
+#include <stdbool.h> |
|
35 | 36 |
#include <time.h> |
36 | 37 |
|
37 | 38 |
#include "jpeg.h" |
38 | 39 |
#include "clamav.h" |
39 | 40 |
|
40 |
-cl_error_t cli_parsejpeg(cli_ctx *ctx) |
|
41 |
+// clang-format off |
|
42 |
+/* |
|
43 |
+ * JPEG format highlights |
|
44 |
+ * ---------------------- |
|
45 |
+ * |
|
46 |
+ * Links: |
|
47 |
+ * - https://en.wikipedia.org/wiki/JPEG#Syntax_and_structure |
|
48 |
+ * - https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format |
|
49 |
+ * - https://en.wikipedia.org/wiki/Exif |
|
50 |
+ * |
|
51 |
+ * A JPEG image is a sequence of segments. |
|
52 |
+ * |
|
53 |
+ * Each segment starts with a two-byte marker. The first byte is 0xff and is |
|
54 |
+ * followed by one of the following to identify the segment. |
|
55 |
+ |
|
56 |
+ * Some segments are simply the 2-byte marker, while others have a payload. |
|
57 |
+ * Realistically it appears that just the start-of-image and end-of-image lack |
|
58 |
+ * the 2-byte size field, the rest have it, even the 4-byte DRI segment. |
|
59 |
+ * |
|
60 |
+ * All variable-byte payloads have 2-bytes indicating the size which includes |
|
61 |
+ * the 2-bytes (but not the marker itself). |
|
62 |
+ * |
|
63 |
+ * Within entropy-encoded (compressed) data, any 0xff will have an 0x00 |
|
64 |
+ * inserted after it to indicate that it's just and 0xff and _NOT_ a segment |
|
65 |
+ * marker. Decoders skip the 0x00 byte. |
|
66 |
+ * This only applies to entropy-encoded data, not to marker payload data. |
|
67 |
+ * We dont' really worry about this though because this parser stops when it |
|
68 |
+ * reaches the image data. |
|
69 |
+ */ |
|
70 |
+ |
|
71 |
+/* |
|
72 |
+ * JPEG Segment & Entropy Markers. |
|
73 |
+ */ |
|
74 |
+typedef enum { |
|
75 |
+ /* Start of Image |
|
76 |
+ * No payload |
|
77 |
+ */ |
|
78 |
+ JPEG_MARKER_SEGMENT_SOI_START_OF_IMAGE = 0xD8, |
|
79 |
+ |
|
80 |
+ /* Start of Frame for a Baseline DCT-based JPEG (S0F0) |
|
81 |
+ * Variable size payload. |
|
82 |
+ * Baseline DCT-based JPEG, and specifies the width, height, number of |
|
83 |
+ * components, and component subsampling |
|
84 |
+ */ |
|
85 |
+ JPEG_MARKER_SEGMENT_S0F0_START_OF_FRAME_BASELINE_DCT = 0xC0, |
|
86 |
+ |
|
87 |
+ /* Start of Frame for a extended sequential DCT-based JPEG (S0F1) |
|
88 |
+ * Variable size payload. |
|
89 |
+ */ |
|
90 |
+ JPEG_MARKER_SEGMENT_S0F1_START_OF_FRAME_EXT_SEQ_DCT = 0xC1, |
|
91 |
+ |
|
92 |
+ /* Start of Frame for a progressive DCT-based JPEG (S0F2) |
|
93 |
+ * Variable size payload. |
|
94 |
+ * Progressive DCT-based JPEG, and specifies the width, height, number of |
|
95 |
+ * components, and component subsampling |
|
96 |
+ */ |
|
97 |
+ JPEG_MARKER_SEGMENT_S0F2_START_OF_FRAME_PROG_DCT = 0xC2, |
|
98 |
+ |
|
99 |
+ /* Start of Frame for a lossless sequential DCT-based JPEG (S0F3) |
|
100 |
+ * Variable size payload. |
|
101 |
+ */ |
|
102 |
+ JPEG_MARKER_SEGMENT_S0F3_START_OF_FRAME_DIFF_SEQ_DCT = 0xC3, |
|
103 |
+ |
|
104 |
+ /* Start of Frame for a differential sequential DCT-based JPEG (S0F5) |
|
105 |
+ * Variable size payload. |
|
106 |
+ */ |
|
107 |
+ JPEG_MARKER_SEGMENT_S0F5_START_OF_FRAME_DIFF_SEQ_DCT = 0xC5, |
|
108 |
+ |
|
109 |
+ /* Start of Frame for a differential progressive DCT-based JPEG (S0F6) |
|
110 |
+ * Variable size payload. |
|
111 |
+ */ |
|
112 |
+ JPEG_MARKER_SEGMENT_S0F6_START_OF_FRAME_DIFF_PROG_DCT = 0xC6, |
|
113 |
+ |
|
114 |
+ /* Start of Frame for a differential lossless DCT-based JPEG (S0F7) |
|
115 |
+ * Variable size payload. |
|
116 |
+ */ |
|
117 |
+ JPEG_MARKER_SEGMENT_S0F7_START_OF_FRAME_DIFF_LOSSLESS_DCT = 0xC7, |
|
118 |
+ |
|
119 |
+ /* Start of Frame for a differential sequential arithmatic-based JPEG (S0F5) |
|
120 |
+ * Variable size payload. |
|
121 |
+ */ |
|
122 |
+ JPEG_MARKER_SEGMENT_S0F9_START_OF_FRAME_DIFF_SEQ_ARITH = 0xC9, |
|
123 |
+ |
|
124 |
+ /* Start of Frame for a differential progressive arithmatic-based JPEG (S0F6) |
|
125 |
+ * Variable size payload. |
|
126 |
+ */ |
|
127 |
+ JPEG_MARKER_SEGMENT_S0F10_START_OF_FRAME_DIFF_PROG_ARITH = 0xCA, |
|
128 |
+ |
|
129 |
+ /* Start of Frame for a differential lossless arithmatic-based JPEG (S0F7) |
|
130 |
+ * Variable size payload. |
|
131 |
+ */ |
|
132 |
+ JPEG_MARKER_SEGMENT_S0F11_START_OF_FRAME_DIFF_LOSSLESS_ARITH = 0xCB, |
|
133 |
+ |
|
134 |
+ /* Define Huffman Tables (DHT) |
|
135 |
+ * Variable size payload. |
|
136 |
+ * Defines one or more Huffman tables. |
|
137 |
+ */ |
|
138 |
+ JPEG_MARKER_SEGMENT_DHT_DEFINE_HUFFMAN_TABLES = 0xC4, |
|
139 |
+ |
|
140 |
+ /* Define Arithmatic Coding Conditioning (DAC) |
|
141 |
+ * Variable size payload. |
|
142 |
+ */ |
|
143 |
+ JPEG_MARKER_SEGMENT_DHT_DEFINE_ARITH_CODING = 0xCC, |
|
144 |
+ |
|
145 |
+ /* Define Quantization Tables (DTQ) |
|
146 |
+ * Variable size payload. |
|
147 |
+ * Defines one or more quantization tables. |
|
148 |
+ */ |
|
149 |
+ JPEG_MARKER_SEGMENT_DQT_DEFINE_QUANTIZATION_TABLES = 0xDB, |
|
150 |
+ |
|
151 |
+ /* Define Restart Interval (DRI) |
|
152 |
+ * 4-byte payload. |
|
153 |
+ * Specifies the interval between RSTn markers, in Minimum Coded Units (MCUs). |
|
154 |
+ * This marker is followed by two bytes indicating the fixed size so it can be |
|
155 |
+ * treated like any other variable size segment. |
|
156 |
+ */ |
|
157 |
+ JPEG_MARKER_SEGMENT_DRI_DEFINE_RESTART_INTERVAL = 0xDD, |
|
158 |
+ |
|
159 |
+ /* Start of Scan (SOS) |
|
160 |
+ * Variable size payload |
|
161 |
+ * This is the start of the JPEG image data, so we'll actually stop parsing |
|
162 |
+ * when we reach this. |
|
163 |
+ */ |
|
164 |
+ JPEG_MARKER_SEGMENT_SOS_START_OF_SCAN = 0xDA, |
|
165 |
+ |
|
166 |
+ /* |
|
167 |
+ * App-specific markers E0 - EF |
|
168 |
+ * Variable size payload. |
|
169 |
+ * Since several vendors might use the *same* APPn marker type, application- |
|
170 |
+ * specific markers often begin with a standard or vendor name (e.g., "Exif" or |
|
171 |
+ * "Adobe") or some other identifying string. |
|
172 |
+ * |
|
173 |
+ * Some known app specific markers include: |
|
174 |
+ * 0xE0: |
|
175 |
+ * - JFIF |
|
176 |
+ * 0xE1: |
|
177 |
+ * - Exif |
|
178 |
+ * - XMP data, starts with http://ns.adobe.com/xap/1.0/\0 |
|
179 |
+ * 0xE2: |
|
180 |
+ * - ICC Profile Chunk. There could be multiple of these to fit the entire profile, see http://www.color.org/icc_specs2.xalter and http://www.color.org/specification/ICC1v43_2010-12.pdf Section B.4 |
|
181 |
+ * 0xE8: |
|
182 |
+ * - SPIFF. Not a common format, see http://fileformats.archiveteam.org/wiki/SPIFF |
|
183 |
+ * 0xED: |
|
184 |
+ * - IPTC / IMM metadata (a type of comment) |
|
185 |
+ * 0xEE: |
|
186 |
+ * - AdobeRGB (as opposed to sRGB) |
|
187 |
+ */ |
|
188 |
+ JPEG_MARKER_SEGMENT_APP0 = 0xE0, |
|
189 |
+ JPEG_MARKER_SEGMENT_APP1 = 0xE1, |
|
190 |
+ JPEG_MARKER_SEGMENT_APP2 = 0xE2, |
|
191 |
+ JPEG_MARKER_SEGMENT_APP3 = 0xE3, |
|
192 |
+ JPEG_MARKER_SEGMENT_APP4 = 0xE4, |
|
193 |
+ JPEG_MARKER_SEGMENT_APP5 = 0xE5, |
|
194 |
+ JPEG_MARKER_SEGMENT_APP6 = 0xE6, |
|
195 |
+ JPEG_MARKER_SEGMENT_APP7 = 0xE7, |
|
196 |
+ JPEG_MARKER_SEGMENT_APP8 = 0xE8, |
|
197 |
+ JPEG_MARKER_SEGMENT_APP9 = 0xE9, |
|
198 |
+ JPEG_MARKER_SEGMENT_APP10 = 0xEA, |
|
199 |
+ JPEG_MARKER_SEGMENT_APP11 = 0xEB, |
|
200 |
+ JPEG_MARKER_SEGMENT_APP12 = 0xEC, |
|
201 |
+ JPEG_MARKER_SEGMENT_APP13 = 0xED, |
|
202 |
+ JPEG_MARKER_SEGMENT_APP14 = 0xEE, |
|
203 |
+ JPEG_MARKER_SEGMENT_APP15 = 0xEF, |
|
204 |
+ |
|
205 |
+ /* DTI (?) |
|
206 |
+ * |
|
207 |
+ */ |
|
208 |
+ JPEG_MARKER_SEGMENT_DTI = 0xF1, |
|
209 |
+ |
|
210 |
+ /* DTT (?) |
|
211 |
+ * |
|
212 |
+ */ |
|
213 |
+ JPEG_MARKER_SEGMENT_DTT = 0xF2, |
|
214 |
+ |
|
215 |
+ /* JPG7 |
|
216 |
+ * Variable size payload (?) |
|
217 |
+ */ |
|
218 |
+ JPEG_MARKER_SEGMENT_JPG7 = 0xF7, |
|
219 |
+ |
|
220 |
+ /* Comment (COM) |
|
221 |
+ * Variable size payload. |
|
222 |
+ */ |
|
223 |
+ JPEG_MARKER_SEGMENT_COM_COMMENT = 0xFE, |
|
224 |
+ |
|
225 |
+ /* End of Image |
|
226 |
+ * No payload |
|
227 |
+ */ |
|
228 |
+ JPEG_MARKER_SEGMENT_EOI_END_OF_IMAGE = 0xD9, |
|
229 |
+ |
|
230 |
+ /* Entropy-encoded (aka compressed) data markers. |
|
231 |
+ * |
|
232 |
+ * These aren't referenced since we don't parse the image data. |
|
233 |
+ */ |
|
234 |
+ JPEG_MARKER_NOT_A_MARKER_0x00 = 0x00, |
|
235 |
+ JPEG_MARKER_NOT_A_MARKER_0xFF = 0xFF, |
|
236 |
+ |
|
237 |
+ /* Reset entropy-markers are inserted every r macroblocks, where r is the restart interval set by a DRI marker. |
|
238 |
+ * Not used if there was no DRI segment-marker. |
|
239 |
+ * The low three bits of the marker code cycle in value from 0 to 7 (i.e. D0 - D7). |
|
240 |
+ */ |
|
241 |
+ JPEG_MARKER_ENTROPY_RST0_RESET = 0xD0, |
|
242 |
+ JPEG_MARKER_ENTROPY_RST1_RESET = 0xD1, |
|
243 |
+ JPEG_MARKER_ENTROPY_RST2_RESET = 0xD2, |
|
244 |
+ JPEG_MARKER_ENTROPY_RST3_RESET = 0xD3, |
|
245 |
+ JPEG_MARKER_ENTROPY_RST4_RESET = 0xD4, |
|
246 |
+ JPEG_MARKER_ENTROPY_RST5_RESET = 0xD5, |
|
247 |
+ JPEG_MARKER_ENTROPY_RST6_RESET = 0xD6, |
|
248 |
+ JPEG_MARKER_ENTROPY_RST7_RESET = 0xD7, |
|
249 |
+} jpeg_marker_t; |
|
250 |
+ |
|
251 |
+// clang-format on |
|
252 |
+ |
|
253 |
+cl_error_t |
|
254 |
+cli_parsejpeg(cli_ctx *ctx) |
|
41 | 255 |
{ |
42 | 256 |
cl_error_t status = CL_CLEAN; |
43 | 257 |
|
44 | 258 |
fmap_t *map = NULL; |
45 |
- unsigned char marker, prev_marker, prev_segment = 0, buff[8]; |
|
259 |
+ jpeg_marker_t marker, prev_marker, prev_segment = JPEG_MARKER_NOT_A_MARKER_0x00; |
|
260 |
+ uint8_t buff[strlen("ICC_PROFILE") + 2]; /* Using length "ICC_PROFILE" + 2 because it's the longest we'll read. */ |
|
46 | 261 |
uint16_t len_u16; |
47 |
- unsigned int offset = 0, i, len, comment = 0, segment = 0, app = 0; |
|
262 |
+ unsigned int offset = 0, i, len, segment = 0; |
|
263 |
+ bool found_comment = false; |
|
264 |
+ bool found_app = false; |
|
265 |
+ |
|
266 |
+ uint32_t num_JFIF = 0; |
|
267 |
+ uint32_t num_Exif = 0; |
|
268 |
+ uint32_t num_SPIFF = 0; |
|
48 | 269 |
|
49 | 270 |
cli_dbgmsg("in cli_parsejpeg()\n"); |
50 | 271 |
|
51 | 272 |
if (NULL == ctx) { |
52 |
- cli_dbgmsg("JPEG: passed context was NULL\n"); |
|
273 |
+ cli_dbgmsg("passed context was NULL\n"); |
|
53 | 274 |
status = CL_EARG; |
54 | 275 |
goto done; |
55 | 276 |
} |
... | ... |
@@ -67,17 +288,19 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx) |
67 | 67 |
|
68 | 68 |
while (1) { |
69 | 69 |
segment++; |
70 |
- prev_marker = 0; |
|
70 |
+ prev_marker = JPEG_MARKER_NOT_A_MARKER_0x00; |
|
71 | 71 |
for (i = 0; offset < map->len && i < 16; i++) { |
72 |
- if (fmap_readn(map, &marker, offset, sizeof(marker)) == sizeof(marker)) { |
|
73 |
- offset += sizeof(marker); |
|
72 |
+ uint8_t marker_u8; |
|
73 |
+ if (fmap_readn(map, &marker_u8, offset, sizeof(marker_u8)) == sizeof(marker_u8)) { |
|
74 |
+ offset += sizeof(marker_u8); |
|
74 | 75 |
} else { |
75 | 76 |
cli_errmsg("JPEG: Failed to read marker, file corrupted?\n"); |
76 | 77 |
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.CantReadMarker"); |
77 | 78 |
goto done; |
78 | 79 |
} |
80 |
+ marker = (jpeg_marker_t)marker_u8; |
|
79 | 81 |
|
80 |
- if (prev_marker == 0xff && marker != 0xff) |
|
82 |
+ if (prev_marker == JPEG_MARKER_NOT_A_MARKER_0xFF && marker != JPEG_MARKER_NOT_A_MARKER_0xFF) |
|
81 | 83 |
break; |
82 | 84 |
prev_marker = marker; |
83 | 85 |
} |
... | ... |
@@ -94,7 +317,7 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx) |
94 | 94 |
goto done; |
95 | 95 |
} |
96 | 96 |
len = (unsigned int)be16_to_host(len_u16); |
97 |
- cli_dbgmsg("JPEG: Marker %02x, length %u\n", marker, len); |
|
97 |
+ cli_dbgmsg("segment[%d] = 0x%02x, Length %u\n", segment, marker, len); |
|
98 | 98 |
|
99 | 99 |
if (len < 2) { |
100 | 100 |
cli_warnmsg("JPEG: Invalid segment size\n"); |
... | ... |
@@ -111,85 +334,116 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx) |
111 | 111 |
offset += len; |
112 | 112 |
|
113 | 113 |
switch (marker) { |
114 |
- case 0xe0: /* JFIF */ |
|
115 |
- if (app) { |
|
116 |
- cli_warnmsg("JPEG: Duplicate Application Marker\n"); |
|
117 |
- cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFdupAppMarker"); |
|
118 |
- status = CL_EPARSE; |
|
119 |
- goto done; |
|
120 |
- } |
|
121 |
- if (segment != 1 && (segment != 2 || !comment)) { |
|
122 |
- cli_warnmsg("JPEG: JFIF marker at wrong position\n"); |
|
123 |
- cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFmarkerBadPosition"); |
|
124 |
- status = CL_EPARSE; |
|
125 |
- goto done; |
|
126 |
- } |
|
127 |
- if (fmap_readn(map, buff, offset - len + sizeof(len_u16), 5) != 5 || memcmp(buff, "JFIF\0", 5)) { |
|
128 |
- cli_warnmsg("JPEG: No JFIF marker\n"); |
|
129 |
- cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.NoJFIFmarker"); |
|
130 |
- status = CL_EPARSE; |
|
131 |
- goto done; |
|
132 |
- } |
|
133 |
- if (len < 16) { |
|
134 |
- cli_warnmsg("JPEG: JFIF header too short\n"); |
|
135 |
- cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFheaderTooShort"); |
|
136 |
- status = CL_EPARSE; |
|
137 |
- goto done; |
|
114 |
+ case JPEG_MARKER_SEGMENT_APP0: |
|
115 |
+ /* |
|
116 |
+ * JFIF, maybe |
|
117 |
+ */ |
|
118 |
+ if (fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("JFIF") + 1) == strlen("JFIF") + 1 && 0 == memcmp(buff, "JFIF\0", strlen("JFIF") + 1)) { |
|
119 |
+ /* Found a JFIF marker */ |
|
120 |
+ if (found_app && num_JFIF > 0) { |
|
121 |
+ cli_warnmsg("JPEG: Duplicate Application Marker found (JFIF)\n"); |
|
122 |
+ cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF); |
|
123 |
+ cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFdupAppMarker"); |
|
124 |
+ status = CL_EPARSE; |
|
125 |
+ goto done; |
|
126 |
+ } |
|
127 |
+ if (!(segment == 1 || |
|
128 |
+ (segment == 2 && found_comment) || |
|
129 |
+ (segment == 2 && num_Exif > 0) || |
|
130 |
+ (segment == 3 && found_comment && num_Exif > 0))) { |
|
131 |
+ /* The JFIF segment is technically required to appear first, though it has been observed |
|
132 |
+ * appearing in segment 2 in functional images when segment 1 is a comment or an Exif segment. |
|
133 |
+ * If segment 1 wasn't a comment or Exif, then the file structure is unusual. */ |
|
134 |
+ cli_warnmsg("JPEG: JFIF marker at wrong position, found in segment # %d\n", segment); |
|
135 |
+ cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF); |
|
136 |
+ cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFmarkerBadPosition"); |
|
137 |
+ status = CL_EPARSE; |
|
138 |
+ goto done; |
|
139 |
+ } |
|
140 |
+ if (len < 16) { |
|
141 |
+ cli_warnmsg("JPEG: JFIF header too short\n"); |
|
142 |
+ cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFheaderTooShort"); |
|
143 |
+ status = CL_EPARSE; |
|
144 |
+ goto done; |
|
145 |
+ } |
|
146 |
+ cli_dbgmsg(" JFIF application marker\n"); |
|
147 |
+ found_app = true; |
|
148 |
+ num_JFIF += 1; |
|
149 |
+ } else { |
|
150 |
+ /* Found something else. Eg could be an Ocad Revision # (eg "Ocad$Rev: 14797 $"), for example. |
|
151 |
+ Whatever it is, we don't really care for now */ |
|
152 |
+ cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker); |
|
138 | 153 |
} |
139 |
- app = 0xe0; |
|
140 | 154 |
break; |
141 | 155 |
|
142 |
- case 0xe1: /* EXIF */ |
|
143 |
- if (fmap_readn(map, buff, offset - len + sizeof(len_u16), 7) != 7) { |
|
144 |
- cli_warnmsg("JPEG: Can't read Exif header\n"); |
|
145 |
- cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.CantReadExifHeader"); |
|
146 |
- status = CL_EPARSE; |
|
147 |
- goto done; |
|
148 |
- } |
|
149 |
- if (!memcmp(buff, "Exif\0\0", 6)) { |
|
150 |
- if (app && app != 0xe0) { |
|
151 |
- cli_warnmsg("JPEG: Duplicate Application Marker\n"); |
|
156 |
+ case JPEG_MARKER_SEGMENT_APP1: |
|
157 |
+ /* |
|
158 |
+ * Exif, or maybe XMP data |
|
159 |
+ */ |
|
160 |
+ if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("Exif") + 2) == strlen("Exif") + 2) && (0 == memcmp(buff, "Exif\0\0", strlen("Exif") + 2))) { |
|
161 |
+ if (found_app && (num_Exif > 0 || num_SPIFF > 0)) { |
|
162 |
+ cli_warnmsg("JPEG: Duplicate Application Marker found (Exif)\n"); |
|
163 |
+ cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF); |
|
152 | 164 |
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifDupAppMarker"); |
153 | 165 |
status = CL_EPARSE; |
154 | 166 |
goto done; |
155 | 167 |
} |
156 |
- if (segment > 3 && !comment && app != 0xe0) { |
|
168 |
+ if (segment > 3 && !found_comment && num_JFIF > 0) { |
|
169 |
+ /* If Exif was found after segment 3 and previous segments weren't a comment or JFIF, something is unusual. */ |
|
157 | 170 |
cli_warnmsg("JPEG: Exif marker at wrong position\n"); |
158 | 171 |
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderBadPosition"); |
159 | 172 |
status = CL_EPARSE; |
160 | 173 |
goto done; |
161 | 174 |
} |
162 |
- } else if (!memcmp(buff, "http://", 7)) { |
|
163 |
- cli_dbgmsg("JPEG: XMP data in segment %u\n", segment); |
|
175 |
+ if (len < 16) { |
|
176 |
+ cli_warnmsg("JPEG: Exif header too short\n"); |
|
177 |
+ cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderTooShort"); |
|
178 |
+ status = CL_EPARSE; |
|
179 |
+ goto done; |
|
180 |
+ } |
|
181 |
+ cli_dbgmsg(" Exif application marker\n"); |
|
182 |
+ found_app = true; |
|
183 |
+ num_Exif += 1; |
|
184 |
+ } else if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("http://")) == strlen("http://")) && (0 == memcmp(buff, "http://", strlen("http://")))) { |
|
185 |
+ cli_dbgmsg(" XMP metadata\n"); |
|
186 |
+ found_comment = true; |
|
164 | 187 |
} else { |
165 |
- cli_warnmsg("JPEG: Invalid Exif header\n"); |
|
166 |
- cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.InvalidExifHeader"); |
|
167 |
- status = CL_EPARSE; |
|
168 |
- goto done; |
|
188 |
+ cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker); |
|
169 | 189 |
} |
170 |
- if (len < 16) { |
|
171 |
- cli_warnmsg("JPEG: Exif header too short\n"); |
|
172 |
- cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderTooShort"); |
|
173 |
- status = CL_EPARSE; |
|
174 |
- goto done; |
|
190 |
+ break; |
|
191 |
+ |
|
192 |
+ case JPEG_MARKER_SEGMENT_APP2: |
|
193 |
+ /* |
|
194 |
+ * ICC Profile |
|
195 |
+ */ |
|
196 |
+ if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("ICC_PROFILE") + 2) == strlen("ICC_PROFILE") + 2) && |
|
197 |
+ (0 == memcmp(buff, "ICC_PROFILE\0", strlen("ICC_PROFILE" + 1)))) { |
|
198 |
+ /* Found ICC Profile Chunk. Let's print out the chunk #, which follows "ICC_PROFILE\0"... */ |
|
199 |
+ uint8_t chunk_no = buff[strlen("ICC_PROFILE") + 1]; |
|
200 |
+ cli_dbgmsg(" ICC Profile, chunk # %d\n", chunk_no); |
|
201 |
+ } else { |
|
202 |
+ cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker); |
|
175 | 203 |
} |
176 |
- app = 0xe1; |
|
177 | 204 |
break; |
178 | 205 |
|
179 |
- case 0xe8: /* SPIFF */ |
|
180 |
- if (app) { |
|
181 |
- cli_warnmsg("JPEG: Duplicate Application Marker\n"); |
|
206 |
+ case JPEG_MARKER_SEGMENT_APP8: |
|
207 |
+ /* |
|
208 |
+ * SPIFF |
|
209 |
+ */ |
|
210 |
+ if (found_app) { |
|
211 |
+ cli_warnmsg("JPEG: Duplicate Application Marker found (SPIFF)\n"); |
|
212 |
+ cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF); |
|
182 | 213 |
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFdupAppMarker"); |
183 | 214 |
status = CL_EPARSE; |
184 | 215 |
goto done; |
185 | 216 |
} |
186 |
- if (segment != 1 && (segment != 2 || !comment)) { |
|
217 |
+ if (segment != 1 && (segment != 2 || !found_comment)) { |
|
187 | 218 |
cli_warnmsg("JPEG: SPIFF marker at wrong position\n"); |
188 | 219 |
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFmarkerBadPosition"); |
189 | 220 |
status = CL_EPARSE; |
190 | 221 |
goto done; |
191 | 222 |
} |
192 |
- if (fmap_readn(map, buff, offset - len + sizeof(len_u16), 6) != 6 || memcmp(buff, "SPIFF\0", 6)) { |
|
223 |
+ if (fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("SPIFF") + 1) != strlen("SPIFF") + 1 || memcmp(buff, "SPIFF\0", strlen("SPIFF") + 1)) { |
|
193 | 224 |
cli_warnmsg("JPEG: No SPIFF marker\n"); |
194 | 225 |
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.NoSPIFFmarker"); |
195 | 226 |
status = CL_EPARSE; |
... | ... |
@@ -201,11 +455,75 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx) |
201 | 201 |
status = CL_EPARSE; |
202 | 202 |
goto done; |
203 | 203 |
} |
204 |
- app = 0xe8; |
|
204 |
+ cli_dbgmsg(" SPIFF application marker\n"); |
|
205 |
+ found_app = true; |
|
206 |
+ num_SPIFF += 1; |
|
207 |
+ break; |
|
208 |
+ |
|
209 |
+ case JPEG_MARKER_SEGMENT_APP13: |
|
210 |
+ /* |
|
211 |
+ * IPTC / IMM metadata (comment), probably |
|
212 |
+ */ |
|
213 |
+ cli_dbgmsg(" IPTC IMM metadata segment marker\n"); |
|
214 |
+ found_comment = true; |
|
215 |
+ break; |
|
216 |
+ |
|
217 |
+ case JPEG_MARKER_SEGMENT_APP14: |
|
218 |
+ /* |
|
219 |
+ * Adobe RGB, probably |
|
220 |
+ */ |
|
221 |
+ if (fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("Adobe") + 1) != strlen("Adobe") + 1 || 0 != memcmp(buff, "Adobe\0", strlen("Adobe") + 1)) { |
|
222 |
+ /* Not Adobe, dunno what this is. */ |
|
223 |
+ cli_dbgmsg(" Unfamiliar application marker: 0x%02x\n", marker); |
|
224 |
+ break; |
|
225 |
+ } |
|
226 |
+ cli_dbgmsg(" AdobeRGB application marker\n"); |
|
227 |
+ break; |
|
228 |
+ |
|
229 |
+ case JPEG_MARKER_SEGMENT_APP3: |
|
230 |
+ case JPEG_MARKER_SEGMENT_APP4: |
|
231 |
+ case JPEG_MARKER_SEGMENT_APP5: |
|
232 |
+ case JPEG_MARKER_SEGMENT_APP6: |
|
233 |
+ case JPEG_MARKER_SEGMENT_APP7: |
|
234 |
+ case JPEG_MARKER_SEGMENT_APP9: |
|
235 |
+ case JPEG_MARKER_SEGMENT_APP10: |
|
236 |
+ case JPEG_MARKER_SEGMENT_APP11: |
|
237 |
+ case JPEG_MARKER_SEGMENT_APP12: |
|
238 |
+ case JPEG_MARKER_SEGMENT_APP15: |
|
239 |
+ /* |
|
240 |
+ * Unknown |
|
241 |
+ */ |
|
242 |
+ cli_dbgmsg(" Unfamiliar application marker: 0x%02x\n", marker); |
|
205 | 243 |
break; |
206 | 244 |
|
207 |
- case 0xf7: /* JPG7 */ |
|
208 |
- if (app) { |
|
245 |
+ case JPEG_MARKER_SEGMENT_S0F0_START_OF_FRAME_BASELINE_DCT: |
|
246 |
+ case JPEG_MARKER_SEGMENT_S0F1_START_OF_FRAME_EXT_SEQ_DCT: |
|
247 |
+ case JPEG_MARKER_SEGMENT_S0F2_START_OF_FRAME_PROG_DCT: |
|
248 |
+ case JPEG_MARKER_SEGMENT_S0F3_START_OF_FRAME_DIFF_SEQ_DCT: |
|
249 |
+ case JPEG_MARKER_SEGMENT_S0F5_START_OF_FRAME_DIFF_SEQ_DCT: |
|
250 |
+ case JPEG_MARKER_SEGMENT_S0F6_START_OF_FRAME_DIFF_PROG_DCT: |
|
251 |
+ case JPEG_MARKER_SEGMENT_S0F7_START_OF_FRAME_DIFF_LOSSLESS_DCT: |
|
252 |
+ case JPEG_MARKER_SEGMENT_S0F9_START_OF_FRAME_DIFF_SEQ_ARITH: |
|
253 |
+ case JPEG_MARKER_SEGMENT_S0F10_START_OF_FRAME_DIFF_PROG_ARITH: |
|
254 |
+ case JPEG_MARKER_SEGMENT_S0F11_START_OF_FRAME_DIFF_LOSSLESS_ARITH: |
|
255 |
+ cli_dbgmsg(" Start of Frame (S0F) %02x\n", (uint8_t)marker); |
|
256 |
+ break; |
|
257 |
+ |
|
258 |
+ case JPEG_MARKER_SEGMENT_DHT_DEFINE_HUFFMAN_TABLES: |
|
259 |
+ cli_dbgmsg(" Huffman Tables definitions (DHT)\n"); |
|
260 |
+ break; |
|
261 |
+ |
|
262 |
+ case JPEG_MARKER_SEGMENT_DQT_DEFINE_QUANTIZATION_TABLES: |
|
263 |
+ cli_dbgmsg(" Quantization Tables definitions (DQT)\n"); |
|
264 |
+ break; |
|
265 |
+ |
|
266 |
+ case JPEG_MARKER_SEGMENT_DRI_DEFINE_RESTART_INTERVAL: |
|
267 |
+ cli_dbgmsg(" Restart Interval definition (DRI)\n"); |
|
268 |
+ break; |
|
269 |
+ |
|
270 |
+ case JPEG_MARKER_SEGMENT_JPG7: /* JPG7 */ |
|
271 |
+ cli_dbgmsg(" JPG7 segment marker\n"); |
|
272 |
+ if (found_app) { |
|
209 | 273 |
cli_warnmsg("JPEG: Application Marker before JPG7\n"); |
210 | 274 |
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.AppMarkerBeforeJPG7"); |
211 | 275 |
status = CL_EPARSE; |
... | ... |
@@ -213,31 +531,38 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx) |
213 | 213 |
} |
214 | 214 |
goto done; |
215 | 215 |
|
216 |
- case 0xda: /* SOS */ |
|
217 |
- if (!app) { |
|
218 |
- cli_warnmsg("JPEG: Invalid file structure\n"); |
|
219 |
- cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.InvalidStructure"); |
|
220 |
- status = CL_EPARSE; |
|
221 |
- goto done; |
|
216 |
+ case JPEG_MARKER_SEGMENT_SOS_START_OF_SCAN: /* SOS */ |
|
217 |
+ cli_dbgmsg(" Start of Scan (SOS) segment marker\n"); |
|
218 |
+ if (!found_app) { |
|
219 |
+ cli_dbgmsg(" Found the Start-of-Scan segment without identifying the JPEG application type.\n"); |
|
222 | 220 |
} |
221 |
+ /* What follows would be scan data (compressed image data), |
|
222 |
+ * parsing is not presently required for validation purposes |
|
223 |
+ * so we'll just call it quits. */ |
|
223 | 224 |
goto done; |
224 | 225 |
|
225 |
- case 0xd9: /* EOI */ |
|
226 |
+ case JPEG_MARKER_SEGMENT_EOI_END_OF_IMAGE: /* EOI (End of Image) */ |
|
227 |
+ cli_dbgmsg(" End of Image (EOI) segment marker\n"); |
|
228 |
+ /* |
|
229 |
+ * We shouldn't reach this marker because we exit out when we hit the Start of Scan marker. |
|
230 |
+ */ |
|
226 | 231 |
cli_warnmsg("JPEG: No image in jpeg\n"); |
227 | 232 |
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.NoImages"); |
228 | 233 |
status = CL_EPARSE; |
229 | 234 |
goto done; |
230 | 235 |
|
231 |
- case 0xfe: /* COM */ |
|
232 |
- comment = 1; |
|
236 |
+ case JPEG_MARKER_SEGMENT_COM_COMMENT: /* COM (comment) */ |
|
237 |
+ cli_dbgmsg(" Comment (COM) segment marker\n"); |
|
238 |
+ found_comment = true; |
|
233 | 239 |
break; |
234 | 240 |
|
235 |
- case 0xed: /* IPTC */ |
|
236 |
- comment = 1; |
|
241 |
+ case JPEG_MARKER_SEGMENT_DTI: /* DTI */ |
|
242 |
+ cli_dbgmsg(" DTI segment marker\n"); |
|
237 | 243 |
break; |
238 | 244 |
|
239 |
- case 0xf2: /* DTT */ |
|
240 |
- if (prev_segment != 0xf1) { |
|
245 |
+ case JPEG_MARKER_SEGMENT_DTT: /* DTT */ |
|
246 |
+ cli_dbgmsg(" DTT segment marker\n"); |
|
247 |
+ if (prev_segment != JPEG_MARKER_SEGMENT_DTI) { |
|
241 | 248 |
cli_warnmsg("JPEG: No DTI segment before DTT\n"); |
242 | 249 |
cli_append_possibly_unwanted(ctx, "Heuristics.Broken.Media.JPEG.DTTMissingDTISegment"); |
243 | 250 |
status = CL_EPARSE; |
... | ... |
@@ -246,8 +571,10 @@ cl_error_t cli_parsejpeg(cli_ctx *ctx) |
246 | 246 |
break; |
247 | 247 |
|
248 | 248 |
default: |
249 |
+ /* Some unknown marker we don't presently handle, don't worry about it. */ |
|
249 | 250 |
break; |
250 | 251 |
} |
252 |
+ |
|
251 | 253 |
prev_segment = marker; |
252 | 254 |
} |
253 | 255 |
|
... | ... |
@@ -4171,7 +4171,10 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) |
4171 | 4171 |
|
4172 | 4172 |
case CL_TYPE_GRAPHICS: |
4173 | 4173 |
/* |
4174 |
- * This case is for unhandled graphics types such as BMP. |
|
4174 |
+ * This case is for unhandled graphics types such as BMP, JPEG 2000, etc. |
|
4175 |
+ * |
|
4176 |
+ * Note: JPEG 2000 is a very different format from JPEG, JPEG/JFIF, JPEG/Exif, JPEG/SPIFF (1994, 1997) |
|
4177 |
+ * JPEG 2000 is not handled by cli_scanjpeg or cli_parsejpeg. |
|
4175 | 4178 |
*/ |
4176 | 4179 |
break; |
4177 | 4180 |
|