Browse code

pcre: moved diagnostics from matcher to regex execution level pcre: added diagnostics on matching using ovector and fullinfo

Kevin Lin authored on 2014/09/04 02:00:27
Showing 2 changed files
... ...
@@ -202,7 +202,6 @@ static inline void lsig_sub_matched(const struct cli_matcher *root, struct cli_a
202 202
     }
203 203
 }
204 204
 
205
-#define DISABLE_PCRE_REPORT 0
206 205
 int cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const struct cli_matcher *root, struct cli_ac_data *mdata, cli_ctx *ctx)
207 206
 {
208 207
     struct cli_pcre_meta **metatable = root->pcre_metatable, *pm = NULL;
... ...
@@ -239,22 +238,6 @@ int cli_pcre_scanbuf(const unsigned char *buffer, uint32_t length, const struct
239 239
                 lsig_sub_matched(root, mdata, pm->lsigid[0], pm->lsigid[1], ovector[0], 0);
240 240
             }
241 241
 
242
-            /* print out additional diagnostics if cli_debug_flag is set, TODO: is there a right way to reference the debug flag?  */
243
-            if (!DISABLE_PCRE_REPORT && cli_debug_flag) {
244
-                cli_dbgmsg("PCRE Execution Report:\n");
245
-                if (rc > 0) {
246
-                    /* TODO: handle results from full_info */
247
-                }
248
-                else if (rc == 0 || rc == PCRE_ERROR_NOMATCH) {
249
-                    cli_dbgmsg("no match found\n");
250
-                }
251
-                else {
252
-                    cli_dbgmsg("error occurred in pcre_match: %d\n", rc);
253
-                    /* error handled later */
254
-                }
255
-                cli_dbgmsg("PCRE Execution Report End\n");
256
-            }
257
-
258 242
             /* move off to the end of the match for next match; 
259 243
              * NOTE: misses matches starting within the last match */
260 244
             offset = ovector[1];
... ...
@@ -305,22 +288,6 @@ int cli_pcre_ucondscanbuf(const unsigned char *buffer, uint32_t length, const st
305 305
                 lsig_sub_matched(root, mdata, pm->lsigid[0], pm->lsigid[1], ovector[0], 0);
306 306
             }
307 307
 
308
-            /* print out additional diagnostics if cli_debug_flag is set, TODO: is there a right way to reference the debug flag?  */
309
-            if (!DISABLE_PCRE_REPORT && cli_debug_flag) {
310
-                cli_dbgmsg("PCRE Execution Report:\n");
311
-                if (rc > 0) {
312
-                    /* TODO: handle results from full_info */
313
-                }
314
-                else if (rc == 0 || rc == PCRE_ERROR_NOMATCH) {
315
-                    cli_dbgmsg("no match found\n");
316
-                }
317
-                else {
318
-                    cli_dbgmsg("error occurred in pcre_match: %d\n", rc);
319
-                    /* error handled later */
320
-                }
321
-                cli_dbgmsg("PCRE Execution Report End\n");
322
-            }
323
-
324 308
             /* move off to the end of the match for next match; 
325 309
              * NOTE: misses matches starting within the last match */
326 310
             offset = ovector[1];
... ...
@@ -139,22 +139,113 @@ int cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, l
139 139
     return CL_SUCCESS;
140 140
 }
141 141
 
142
-/* TODO: fix this function */
142
+#define DISABLE_PCRE_REPORT 0
143
+#define MATCH_MAXLEN 1028 /*because lolz*/
144
+
145
+/* TODO: audit this function, how to handle the named substring name? */
146
+static void named_substr_print(struct cli_pcre_data *pd, const unsigned char *buffer, int *ovector, size_t ovlen)
147
+{
148
+    int i, j, length, namecount, trunc;
149
+    unsigned char *tabptr;
150
+    int name_entry_size;
151
+    unsigned char *name_table;
152
+    const char *start;
153
+    char outstr[2*MATCH_MAXLEN+1];
154
+
155
+    /* determine if there are named substrings */
156
+    (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMECOUNT, &namecount);
157
+    if (namecount <= 0) {
158
+        cli_dbgmsg("named_substr: no named substrings\n");
159
+    }
160
+    else {
161
+        cli_dbgmsg("named_substr: named substrings\n");
162
+
163
+        /* extract named substring translation table */
164
+        (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMETABLE, &name_table);
165
+        (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
166
+
167
+        /* print named substring information */
168
+        tabptr = name_table;
169
+        for (i = 0; i < namecount; i++) {
170
+            int n = (tabptr[0] << 8) | tabptr[1];
171
+
172
+            start = buffer + ovector[2*n];
173
+            length = ovector[2*n+1] - ovector[2*n];
174
+
175
+            trunc = 0;
176
+            if (length > MATCH_MAXLEN) {
177
+                trunc = 1;
178
+                length = MATCH_MAXLEN;
179
+            }
180
+
181
+            for (j = 0; j < length; ++j)
182
+                snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j));
183
+
184
+            cli_dbgmsg("named_substr:  (%d) %*s: %s%s\n", n, name_entry_size - 3, tabptr + 2,
185
+                       outstr, trunc ? " (trunc)":"");
186
+            /*cli_dbgmsg("named_substr:  (%d) %*s: %.*s%s\n", n, name_entry_size - 3, tabptr + 2,
187
+                         ovector[2*n+1] - ovector[2*n], subject + ovector[2*n], trunc ? " (trunc)":"");*/
188
+            tabptr += name_entry_size;
189
+        }
190
+    }
191
+}
192
+
193
+/* TODO: audit this function */
143 194
 int cli_pcre_match(struct cli_pcre_data *pd, const unsigned char *buffer, uint32_t buflen, int override_offset, int options, int *ovector, size_t ovlen)
144 195
 {
145
-    int rc, startoffset;
196
+    int rc, startoffset, i, j, length, trunc;
197
+    const char *start;
198
+    char outstr[2*MATCH_MAXLEN+1];
146 199
 
147 200
     if (ovlen % 3) {
148 201
         cli_dbgmsg("cli_pcre_match: ovector length is not a multiple of 3\n");
149 202
         return CL_EARG;
150 203
     }
151 204
 
205
+    /* set the startoffset, override if a value is specified */
152 206
     startoffset = pd->search_offset;
153 207
     if (override_offset >= 0)
154 208
         startoffset = override_offset;
155 209
 
210
+    /* execute the pcre */
156 211
     rc = pcre_exec(pd->re, pd->ex, buffer, buflen, startoffset, options, ovector, ovlen);
157 212
 
213
+    /* print out additional diagnostics if cli_debug_flag is set */
214
+    if (!DISABLE_PCRE_REPORT && cli_debug_flag) {
215
+        cli_dbgmsg("\n");
216
+        cli_dbgmsg("cli_pcre_match: PCRE Execution Report:\n");
217
+        if (rc > 0) {
218
+            /* print out full-match and capture groups */
219
+            for (i = 0; i < rc; ++i) {
220
+                start = buffer + ovector[2*i];
221
+                length = ovector[2*i+1] - ovector[2*i];
222
+
223
+                trunc = 0;
224
+                if (length > MATCH_MAXLEN) {
225
+                    trunc = 1;
226
+                    length = MATCH_MAXLEN;
227
+                }
228
+
229
+                for (j = 0; j < length; ++j) 
230
+                    snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j));
231
+
232
+                cli_dbgmsg("cli_pcre_match:  %d: %s%s\n", i, outstr, trunc ? " (trunc)":"");
233
+                //cli_dbgmsg("cli_pcre_match:  %d: %.*s%s\n", i, length, start, trunc ? " (trunc)":"");
234
+            }
235
+
236
+            named_substr_print(pd, buffer, ovector, ovlen);
237
+        }
238
+        else if (rc == 0 || rc == PCRE_ERROR_NOMATCH) {
239
+            cli_dbgmsg("cli_pcre_match: no match found\n");
240
+        }
241
+        else {
242
+            cli_dbgmsg("cli_pcre_match: error occurred in pcre_match: %d\n", rc);
243
+            /* error handled by caller */
244
+        }
245
+        cli_dbgmsg("cli_pcre_match: PCRE Execution Report End\n");
246
+        cli_dbgmsg("\n");
247
+    }
248
+
158 249
     return rc;
159 250
 }
160 251