Browse code

bb12389 - fast AC sig load - courtesy of Alberto Wu

This commit addresses the signature load time issue in the following steps:
1. Loaded list items are allocated but left unattached; only a node reference is set on them for further processing. This is done with no increase of memory usage. See changes in insert_list and matcher-ac.h
2. Before the tries are built, the whole list of entries is sorted by node, then by pattern, then by partno. This requires O(N log(N)) time.
3. The list is processed linearly, one node at a time and the `next_same` chain is built. Each next_same chain head is also extracted. This requires O(N) time.
4. The list of heads is sorted by partno. This requires O(M log(M)) time on average with M<=N.
5. The list of heads is processed linearly and the `next` chain is built. This has O(M) complexity.

And improves scantime performance, by adding checks to:
1. Place longer lists earlier in the trie.
2. Keep close patterns close, rather than scattering them further apart.

This reduced memory cache faults to improve load and scan time performance.

Micah Snyder (micasnyd) authored on 2019/11/09 07:06:13
Showing 2 changed files
... ...
@@ -104,7 +104,8 @@ static inline int insert_list(struct cli_matcher *root, struct cli_ac_patt *patt
104 104
         cli_errmsg("cli_ac_addpatt: Can't allocate memory for list node\n");
105 105
         return CL_EMEM;
106 106
     }
107
-    new->me = pattern;
107
+    new->me   = pattern;
108
+    new->node = pt;
108 109
 
109 110
     root->ac_lists++;
110 111
     newtable = MPOOL_REALLOC(root->mempool, root->ac_listtable, root->ac_lists * sizeof(struct cli_ac_list *));
... ...
@@ -117,111 +118,183 @@ static inline int insert_list(struct cli_matcher *root, struct cli_ac_patt *patt
117 117
 
118 118
     root->ac_listtable                     = newtable;
119 119
     root->ac_listtable[root->ac_lists - 1] = new;
120
+    return CL_SUCCESS;
121
+}
120 122
 
121
-    ph           = pt->list;
122
-    ph_add_after = ph_prev = NULL;
123
-    while (ph) {
124
-        php = ph->me;
125
-        if (!ph_add_after && php->partno <= pattern->partno && (!ph->next || ph->next->me->partno > pattern->partno))
126
-            ph_add_after = ph;
127
-        if ((php->length[0] == pattern->length[0]) && (php->prefix_length[0] == pattern->prefix_length[0]) && (php->ch[0] == pattern->ch[0]) && (php->ch[1] == pattern->ch[1]) && (php->boundary == pattern->boundary)) {
128
-            if (!memcmp(php->pattern, pattern->pattern, php->length[0] * sizeof(uint16_t)) && !memcmp(php->prefix, pattern->prefix, php->prefix_length[0] * sizeof(uint16_t))) {
129
-                if (!php->special && !pattern->special) {
130
-                    match = 1;
131
-                } else if (php->special == pattern->special) {
132
-                    match = 1;
133
-                    for (i = 0; i < php->special; i++) {
134
-                        a1 = php->special_table[i];
135
-                        a2 = pattern->special_table[i];
136
-
137
-                        if (a1->num != a2->num) {
138
-                            match = 0;
139
-                            break;
140
-                        }
123
+#define RETURN_RES_IF_NE(uia, uib) \
124
+    do {                           \
125
+        if (uia < uib) return -1;  \
126
+        if (uia > uib) return +1;  \
127
+    } while (0)
141 128
 
142
-                        if (a1->negative != a2->negative) {
143
-                            match = 0;
144
-                            break;
145
-                        }
129
+static int patt_cmp_fn(const struct cli_ac_patt *a, const struct cli_ac_patt *b)
130
+{
131
+    unsigned int i;
132
+    int res;
133
+    RETURN_RES_IF_NE(a->length[0], b->length[0]);
134
+    RETURN_RES_IF_NE(a->prefix_length[0], b->prefix_length[0]);
135
+    RETURN_RES_IF_NE(a->ch[0], b->ch[0]);
136
+    RETURN_RES_IF_NE(a->ch[1], b->ch[1]);
137
+    RETURN_RES_IF_NE(a->boundary, b->boundary);
138
+
139
+    res = memcmp(a->pattern, b->pattern, a->length[0] * sizeof(uint16_t));
140
+    if (res) return res;
141
+    res = memcmp(a->prefix, b->prefix, a->prefix_length[0] * sizeof(uint16_t));
142
+    if (res) return res;
143
+
144
+    RETURN_RES_IF_NE(a->special, b->special);
145
+    if (!a->special && !b->special)
146
+        return 0;
146 147
 
147
-                        if (a1->type != a2->type) {
148
-                            match = 0;
149
-                            break;
150
-                        } else if (a1->type == AC_SPECIAL_ALT_CHAR) {
151
-                            if (memcmp((a1->alt).byte, (a2->alt).byte, a1->num)) {
152
-                                match = 0;
153
-                                break;
154
-                            }
155
-                        } else if (a1->type == AC_SPECIAL_ALT_STR_FIXED) {
156
-                            if (a1->len != a2->len) {
157
-                                match = 0;
158
-                                break;
159
-                            }
148
+    for (i = 0; i < a->special; i++) {
149
+        struct cli_ac_special *spcl_a = a->special_table[i], *spcl_b = b->special_table[i];
150
+
151
+        RETURN_RES_IF_NE(spcl_a->num, spcl_b->num);
152
+        RETURN_RES_IF_NE(spcl_a->negative, spcl_b->negative);
153
+        RETURN_RES_IF_NE(spcl_a->type, spcl_b->type);
154
+
155
+        if (spcl_a->type == AC_SPECIAL_ALT_CHAR) {
156
+            res = memcmp((spcl_a->alt).byte, (spcl_b->alt).byte, spcl_a->num);
157
+            if (res) return res;
158
+        } else if (spcl_a->type == AC_SPECIAL_ALT_STR_FIXED) {
159
+            unsigned int j;
160
+            RETURN_RES_IF_NE(spcl_a->len[0], spcl_b->len[0]);
161
+            for (j = 0; j < spcl_a->num; j++) {
162
+                res = memcmp((spcl_a->alt).f_str[j], (spcl_b->alt).f_str[j], spcl_a->len[0]);
163
+                if (res) return res;
164
+            }
165
+        } else if (spcl_a->type == AC_SPECIAL_ALT_STR) {
166
+            struct cli_alt_node *alt_a = (spcl_a->alt).v_str, *alt_b = (spcl_b->alt).v_str;
167
+            while (alt_a && alt_b) {
168
+                RETURN_RES_IF_NE(alt_a->len, alt_b->len);
169
+                res = memcmp(alt_a->str, alt_b->str, alt_a->len);
170
+                if (res) return res;
171
+                alt_a = alt_a->next;
172
+                alt_b = alt_b->next;
173
+            }
174
+            RETURN_RES_IF_NE(alt_a, alt_b);
175
+        }
176
+    }
177
+    return 0;
178
+}
160 179
 
161
-                            for (j = 0; j < a1->num; j++) {
162
-                                if (memcmp((a1->alt).f_str[j], (a2->alt).f_str[j], a1->len[0]))
163
-                                    break;
164
-                            }
180
+static int sort_list_fn(const void *a, const void *b)
181
+{
182
+    const struct cli_ac_node *node_a = (*(const struct cli_ac_list **)a)->node;
183
+    const struct cli_ac_node *node_b = (*(const struct cli_ac_list **)b)->node;
184
+    const struct cli_ac_patt *patt_a = (*(const struct cli_ac_list **)a)->me;
185
+    const struct cli_ac_patt *patt_b = (*(const struct cli_ac_list **)b)->me;
186
+    int res;
187
+
188
+    /* 1. Group by owning node
189
+     * (this is for assigning entries to nodes) */
190
+    RETURN_RES_IF_NE(node_a, node_b);
191
+
192
+    /* 2. Group together equal pattern in a node
193
+     * (this is for building the next_same list) */
194
+    res = patt_cmp_fn(patt_a, patt_b);
195
+    if (res)
196
+        return res;
197
+
198
+    /* 3. Sort equal patterns in a node by partno in ascending order
199
+     * (this is required by the matcher) */
200
+    RETURN_RES_IF_NE(patt_a->partno, patt_b->partno);
201
+
202
+    /* 4. Keep close patterns close
203
+     * (this is for performace) */
204
+    RETURN_RES_IF_NE(patt_a, patt_b);
165 205
 
166
-                            if (j < a1->num) {
167
-                                match = 0;
168
-                                break;
169
-                            }
170
-                        } else if (a1->type == AC_SPECIAL_ALT_STR) {
171
-                            b1 = (a1->alt).v_str;
172
-                            b2 = (a2->alt).v_str;
173
-                            while (b1 && b2) {
174
-                                if ((b1->len != b2->len) || memcmp(b1->str, b2->str, b1->len))
175
-                                    break;
176
-                                b1 = b1->next;
177
-                                b2 = b2->next;
178
-                            }
206
+    return 0;
207
+}
179 208
 
180
-                            if (b1 || b2) {
181
-                                match = 0;
182
-                                break;
183
-                            }
184
-                        }
185
-                    }
186
-                } else {
187
-                    match = 0;
188
-                }
209
+static int sort_heads_by_partno_fn(const void *a, const void *b)
210
+{
211
+    const struct cli_ac_list *list_a = *(const struct cli_ac_list **)a;
212
+    const struct cli_ac_list *list_b = *(const struct cli_ac_list **)b;
213
+    const struct cli_ac_patt *patt_a = list_a->me;
214
+    const struct cli_ac_patt *patt_b = list_b->me;
189 215
 
190
-                if (match) {
191
-                    if (pattern->partno < php->partno) {
192
-                        new->next_same = ph;
193
-                        if (ph_prev)
194
-                            ph_prev->next = ph->next;
195
-                        else
196
-                            pt->list = ph->next;
197
-
198
-                        ph->next = NULL;
199
-                        break;
200
-                    } else {
201
-                        while (ph->next_same && ph->next_same->me->partno < pattern->partno)
202
-                            ph = ph->next_same;
216
+    /* 1. Sort heads by partno
217
+     * (this is required by the matcher) */
218
+    RETURN_RES_IF_NE(patt_a->partno, patt_b->partno);
203 219
 
204
-                        new->next_same = ph->next_same;
205
-                        ph->next_same  = new;
206
-                        return CL_SUCCESS;
207
-                    }
208
-                }
209
-            }
210
-        }
220
+    /* 2. Place longer lists earlier
221
+     * (this is for performance) */
211 222
 
212
-        ph_prev = ph;
213
-        ph      = ph->next;
223
+    while (1) {
224
+        if (!list_a->next_same) {
225
+            if (!list_b->next_same)
226
+                break;
227
+            return +1;
228
+        }
229
+        if (!list_b->next_same)
230
+            return -1;
231
+        list_a = list_a->next_same;
232
+        list_b = list_b->next_same;
214 233
     }
215 234
 
216
-    if (ph_add_after) {
217
-        new->next          = ph_add_after->next;
218
-        ph_add_after->next = new;
219
-    } else {
220
-        new->next = pt->list;
221
-        pt->list  = new;
235
+    /* 3. Keep close patterns close
236
+     * (this is for performace) */
237
+    RETURN_RES_IF_NE(patt_a, patt_b);
238
+
239
+    return 0;
240
+}
241
+
242
+static inline void link_node_lists(struct cli_ac_list **listtable, unsigned int nentries)
243
+{
244
+    struct cli_ac_list *prev = listtable[0];
245
+    struct cli_ac_node *node = prev->node;
246
+    unsigned int i, nheads = 1;
247
+
248
+    /* Link equal patterns in the next_same list (entries are already sorted by partno asc) */
249
+    for (i = 1; i < nentries; i++) {
250
+        int ret = patt_cmp_fn(prev->me, listtable[i]->me);
251
+        if (ret) {
252
+            /* This is a new head of a next_same chain */
253
+            prev = listtable[i];
254
+            if (i != nheads) {
255
+                /* Move heads towards the beginning of the table */
256
+                listtable[i]      = listtable[nheads];
257
+                listtable[nheads] = prev;
258
+            }
259
+            nheads++;
260
+        } else {
261
+            prev->next_same = listtable[i];
262
+            prev->next      = NULL;
263
+            prev            = listtable[i];
264
+        }
222 265
     }
223 266
 
224
-    return CL_SUCCESS;
267
+    cli_qsort(listtable, nheads, sizeof(listtable[0]), sort_heads_by_partno_fn);
268
+
269
+    /* Link heads in the next list */
270
+    node->list = listtable[0];
271
+    for (i = 1; i < nheads; i++)
272
+        listtable[i - 1]->next = listtable[i];
273
+    listtable[nheads - 1]->next = NULL;
274
+}
275
+
276
+static void link_lists(struct cli_matcher *root)
277
+{
278
+    struct cli_ac_node *curnode;
279
+    unsigned int i, grouplen;
280
+
281
+    if (!root->ac_lists)
282
+        return;
283
+
284
+    /* Group the list by owning node, pattern equality and sort by partno */
285
+    cli_qsort(root->ac_listtable, root->ac_lists, sizeof(root->ac_listtable[0]), sort_list_fn);
286
+
287
+    curnode = root->ac_listtable[0]->node;
288
+    for (i = 1, grouplen = 1; i <= root->ac_lists; i++, grouplen++) {
289
+        if (i == root->ac_lists || root->ac_listtable[i]->node != curnode) {
290
+            link_node_lists(&root->ac_listtable[i - grouplen], grouplen);
291
+            if (i < root->ac_lists) {
292
+                grouplen = 0;
293
+                curnode  = root->ac_listtable[i]->node;
294
+            }
295
+        }
296
+    }
225 297
 }
226 298
 
227 299
 static inline struct cli_ac_node *add_new_node(struct cli_matcher *root, uint16_t i, uint16_t len)
... ...
@@ -495,6 +568,8 @@ cl_error_t cli_ac_buildtrie(struct cli_matcher *root)
495 495
     if (root->filter)
496 496
         cli_dbgmsg("Using filter for trie %d\n", root->type);
497 497
 
498
+    link_lists(root);
499
+
498 500
     return ac_maketrans(root);
499 501
 }
500 502
 
... ...
@@ -108,7 +108,11 @@ struct cli_ac_patt {
108 108
 
109 109
 struct cli_ac_list {
110 110
     struct cli_ac_patt *me;
111
-    struct cli_ac_list *next, *next_same;
111
+    union {
112
+        struct cli_ac_node *node;
113
+        struct cli_ac_list *next;
114
+    };
115
+    struct cli_ac_list *next_same;
112 116
 };
113 117
 
114 118
 struct cli_ac_node {