... | ... |
@@ -78,7 +78,18 @@ static const struct key_entry msxml_keys[] = { |
78 | 78 |
}; |
79 | 79 |
static size_t num_msxml_keys = sizeof(msxml_keys) / sizeof(struct key_entry); |
80 | 80 |
|
81 |
+enum msxml_state { |
|
82 |
+ MSXML_STATE_NORMAL = 0, |
|
83 |
+ MSXML_STATE_ENTITY_START_1, |
|
84 |
+ MSXML_STATE_ENTITY_START_2, |
|
85 |
+ MSXML_STATE_ENTITY_HEX, |
|
86 |
+ MSXML_STATE_ENTITY_DEC, |
|
87 |
+ MSXML_STATE_ENTITY_CLOSE, |
|
88 |
+ MSXML_STATE_ENTITY_NONE |
|
89 |
+}; |
|
90 |
+ |
|
81 | 91 |
struct msxml_cbdata { |
92 |
+ enum msxml_state state; |
|
82 | 93 |
fmap_t *map; |
83 | 94 |
const unsigned char *window; |
84 | 95 |
off_t winpos, mappos; |
... | ... |
@@ -145,8 +156,14 @@ int msxml_read_cb(void *ctx, char *buffer, int len) |
145 | 145 |
wbytes = 0; |
146 | 146 |
rbytes = cbdata->winsize - cbdata->winpos; |
147 | 147 |
|
148 |
+ /* copying loop with preprocessing */ |
|
148 | 149 |
while (wbytes < len) { |
149 |
- size_t written = MIN(rbytes, len); |
|
150 |
+ const unsigned char *read_from; |
|
151 |
+ char *write_to = buffer + wbytes; |
|
152 |
+ enum msxml_state *state; |
|
153 |
+#if MSXML_VERBIOSE |
|
154 |
+ size_t written; |
|
155 |
+#endif |
|
150 | 156 |
|
151 | 157 |
if (!rbytes) { |
152 | 158 |
if ((winret = msxml_read_cb_new_window(cbdata)) < 0) |
... | ... |
@@ -159,16 +176,68 @@ int msxml_read_cb(void *ctx, char *buffer, int len) |
159 | 159 |
rbytes = cbdata->winsize; |
160 | 160 |
} |
161 | 161 |
|
162 |
+#if MSXML_VERBIOSE |
|
162 | 163 |
written = MIN(rbytes, len - wbytes); |
163 |
- |
|
164 |
- cli_msxmlmsg("msxml_read_cb: copying from window [%llu(+%llu)] %llu->%llu\n", |
|
164 |
+ cli_msxmlmsg("msxml_read_cb: copying from window [%llu(+%llu)] %llu->~%llu\n", |
|
165 | 165 |
(long long unsigned)(cbdata->winsize - rbytes), (long long unsigned)cbdata->winsize, |
166 | 166 |
(long long unsigned)cbdata->winpos, (long long unsigned)(cbdata->winpos + written)); |
167 |
+#endif |
|
167 | 168 |
|
168 |
- memcpy(buffer + wbytes, cbdata->window + cbdata->winpos, written); |
|
169 |
+ read_from = cbdata->window + cbdata->winpos; |
|
170 |
+ state = &(cbdata->state); |
|
171 |
+ |
|
172 |
+ while (rbytes > 0 && wbytes < len) { |
|
173 |
+ switch (*state) { |
|
174 |
+ case MSXML_STATE_NORMAL: |
|
175 |
+ if ((*read_from) == '&') |
|
176 |
+ *state = MSXML_STATE_ENTITY_START_1; |
|
177 |
+ break; |
|
178 |
+ case MSXML_STATE_ENTITY_START_1: |
|
179 |
+ if ((*read_from) == '#') |
|
180 |
+ *state = MSXML_STATE_ENTITY_START_2; |
|
181 |
+ else |
|
182 |
+ *state = MSXML_STATE_NORMAL; |
|
183 |
+ break; |
|
184 |
+ case MSXML_STATE_ENTITY_START_2: |
|
185 |
+ if ((*read_from) == 'x') |
|
186 |
+ *state = MSXML_STATE_ENTITY_HEX; |
|
187 |
+ else if (((*read_from) >= '0') && ((*read_from) <= '9')) |
|
188 |
+ *state = MSXML_STATE_ENTITY_DEC; |
|
189 |
+ else |
|
190 |
+ *state = MSXML_STATE_NORMAL; |
|
191 |
+ break; |
|
192 |
+ case MSXML_STATE_ENTITY_HEX: |
|
193 |
+ if ((((*read_from) >= '0') && ((*read_from) <= '9')) || |
|
194 |
+ (((*read_from) >= 'a') && ((*read_from) <= 'f')) || |
|
195 |
+ (((*read_from) >= 'A') && ((*read_from) <= 'F'))) {} |
|
196 |
+ else |
|
197 |
+ *state = MSXML_STATE_ENTITY_CLOSE; |
|
198 |
+ break; |
|
199 |
+ case MSXML_STATE_ENTITY_DEC: |
|
200 |
+ if (((*read_from) >= '0') && ((*read_from) <= '9')) {} |
|
201 |
+ else |
|
202 |
+ *state = MSXML_STATE_ENTITY_CLOSE; |
|
203 |
+ break; |
|
204 |
+ default: |
|
205 |
+ cli_errmsg("unknown *state: %d\n", *state); |
|
206 |
+ } |
|
169 | 207 |
|
170 |
- wbytes += written; |
|
171 |
- rbytes -= written; |
|
208 |
+ if (*state == MSXML_STATE_ENTITY_CLOSE) { |
|
209 |
+ if ((*read_from) != ';') { |
|
210 |
+ cli_msxmlmsg("msxml_read_cb: detected unterminated character entity @ winoff %d\n", |
|
211 |
+ (int)(read_from - cbdata->window)); |
|
212 |
+ (*write_to++) = ';'; |
|
213 |
+ wbytes++; |
|
214 |
+ } |
|
215 |
+ *state = MSXML_STATE_NORMAL; |
|
216 |
+ if (wbytes >= len) |
|
217 |
+ break; |
|
218 |
+ } |
|
219 |
+ |
|
220 |
+ *(write_to++) = *(read_from++); |
|
221 |
+ rbytes--; |
|
222 |
+ wbytes++; |
|
223 |
+ } |
|
172 | 224 |
} |
173 | 225 |
|
174 | 226 |
cbdata->winpos = cbdata->winsize - rbytes; |