... | ... |
@@ -62,7 +62,35 @@ ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encodi |
62 | 62 |
#endif |
63 | 63 |
/* applies in the both case */ |
64 | 64 |
if (sctx->codepage == 20127 || sctx->codepage == 65001) { |
65 |
- outbuf = cli_strdup(begin); |
|
65 |
+ char *track; |
|
66 |
+ int bcnt, scnt; |
|
67 |
+ |
|
68 |
+ outbuf = cli_calloc(1, sz); |
|
69 |
+ if (!(outbuf)) |
|
70 |
+ return NULL; |
|
71 |
+ memcpy(outbuf, begin, sz); |
|
72 |
+ |
|
73 |
+ track = outbuf+sz-1; |
|
74 |
+ if ((sctx->codepage == 65001) && (*track & 0x80)) { /* UTF-8 with a most significant bit */ |
|
75 |
+ /* locate the start of the last character */ |
|
76 |
+ for (bcnt = 1; (track != outbuf); track--, bcnt++) { |
|
77 |
+ if (((uint8_t)*track & 0xC0) != 0x80) |
|
78 |
+ break; |
|
79 |
+ } |
|
80 |
+ |
|
81 |
+ /* count number of set (1) significant bits */ |
|
82 |
+ for (scnt = 0; scnt < sizeof(uint8_t)*8; scnt++) { |
|
83 |
+ if (((uint8_t)*track & (0x80 >> scnt)) == 0) |
|
84 |
+ break; |
|
85 |
+ } |
|
86 |
+ |
|
87 |
+ if (bcnt != scnt) { |
|
88 |
+ cli_dbgmsg("ole2_convert_utf: cleaning out %d bytes from incomplete " |
|
89 |
+ "utf-8 character length %d\n", bcnt, scnt); |
|
90 |
+ for (; bcnt > 0; bcnt--, track++) |
|
91 |
+ *track = '\0'; |
|
92 |
+ } |
|
93 |
+ } |
|
66 | 94 |
return outbuf; |
67 | 95 |
} |
68 | 96 |
|