1From 7fde66f32dcfbdc5df97fbffe411c0d7fd60fa50 Mon Sep 17 00:00:00 2001
2From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
3Date: Wed, 29 Nov 2023 21:29:31 +0100
4Subject: [PATCH 2/2] Shrink size of static binary search tree
5
6This also makes it more efficient on the data cache.
7---
8 source/lexbor/core/sbst.h                   | 10 +++++-----
9 source/lexbor/html/tokenizer/state.c        |  2 +-
10 utils/lexbor/html/tmp/tokenizer_res.h       |  2 +-
11 utils/lexbor/html/tokenizer_entities_bst.py |  8 ++++----
12 utils/lexbor/lexbor/LXB.py                  |  2 +-
13 5 files changed, 12 insertions(+), 12 deletions(-)
14
15diff --git a/source/lexbor/core/sbst.h b/source/lexbor/core/sbst.h
16index b0fbc54..40e0e91 100755
17--- a/source/lexbor/core/sbst.h
18+++ b/source/lexbor/core/sbst.h
19@@ -19,12 +19,12 @@ extern "C" {
20 typedef struct {
21     lxb_char_t key;
22
23-    void       *value;
24-    size_t     value_len;
25+    lxb_char_t         value[6];
26+    unsigned char      value_len;
27
28-    size_t     left;
29-    size_t     right;
30-    size_t     next;
31+    unsigned short     left;
32+    unsigned short     right;
33+    unsigned short     next;
34 }
35 lexbor_sbst_entry_static_t;
36
37diff --git a/source/lexbor/html/tokenizer/state.c b/source/lexbor/html/tokenizer/state.c
38index 70ca391..2f3414f 100755
39--- a/source/lexbor/html/tokenizer/state.c
40+++ b/source/lexbor/html/tokenizer/state.c
41@@ -1815,7 +1815,7 @@ lxb_html_tokenizer_state_char_ref_named(lxb_html_tokenizer_t *tkz,
42             goto done;
43         }
44
45-        if (entry->value != NULL) {
46+        if (entry->value[0] != 0) {
47             tkz->entity_end = (tkz->pos + (data - begin)) - tkz->start;
48             tkz->entity_match = entry;
49         }
50diff --git a/utils/lexbor/html/tmp/tokenizer_res.h b/utils/lexbor/html/tmp/tokenizer_res.h
51index b3701d5..73ab66e 100755
52--- a/utils/lexbor/html/tmp/tokenizer_res.h
53+++ b/utils/lexbor/html/tmp/tokenizer_res.h
54@@ -6,7 +6,7 @@
55
56 /*
57  * Caution!!! Important!!!
58- * This file generated by the script
59+ * This file is generated by the script
60  * "utils/lexbor/html/tokenizer_entities_bst.py"!
61  * Do not change this file!
62  */
63diff --git a/utils/lexbor/html/tokenizer_entities_bst.py b/utils/lexbor/html/tokenizer_entities_bst.py
64index ee7dcb4..7cd1335 100755
65--- a/utils/lexbor/html/tokenizer_entities_bst.py
66+++ b/utils/lexbor/html/tokenizer_entities_bst.py
67@@ -1,6 +1,6 @@
68
69 import json
70-import sys, re, os
71+import sys, os
72
73 # Find and append run script run dir to module search path
74 ABS_PATH = os.path.dirname(os.path.abspath(__file__))
75@@ -62,7 +62,7 @@ def entities_bst_create_layer(name, entry, index):
76
77 def entities_bst_create(index):
78     bst = {}
79-    bst[0] = ["\0", 0, 0, 0, "NULL"]
80+    bst[0] = ["\0", 0, 0, 0, "{0}"]
81
82     begin = 1
83     idx = end = entities_bst_create_tree(index, bst, begin)
84@@ -114,7 +114,7 @@ def entities_bst_create_tree(index, bst, idx):
85         assert len(index[ split[0] ]['values']) < 2, 'Double values'
86
87         if len(index[ split[0] ]['values']) == 0:
88-            value = "NULL"
89+            value = "{0}"
90         else:
91             value = '"{}"'.format(toHex(index[ split[0] ]['values'][0]['characters']))
92
93@@ -210,5 +210,5 @@ def entities_bst_print(bst):
94
95 if __name__ == "__main__":
96     entities_bst("tmp/tokenizer_res.h",
97-                 "../../../source/lexbor/html/tokenizer_res.h",
98+                 "../../../source/lexbor/html/tokenizer/res.h",
99                  "data/entities.json");
100diff --git a/utils/lexbor/lexbor/LXB.py b/utils/lexbor/lexbor/LXB.py
101index 3e75812..b068ea3 100755
102--- a/utils/lexbor/lexbor/LXB.py
103+++ b/utils/lexbor/lexbor/LXB.py
104@@ -27,7 +27,7 @@ class Temp:
105
106         for line in fh:
107             for name in self.patterns:
108-                line = re.sub(name, '\n'.join(self.patterns[name]), line)
109+                line = line.replace(name, '\n'.join(self.patterns[name]))
110             self.buffer.append(line)
111         fh.close()
112
113--
1142.43.0
115
116