1From 16daa8e860e393ff39613b908550b0982a2210f2 Mon Sep 17 00:00:00 2001 2From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> 3Date: Wed, 29 Nov 2023 21:29:31 +0100 4Subject: [PATCH 5/6] Shrink size of static binary search tree 5 6This also makes it more efficient on the data cache. 7--- 8 source/lexbor/core/sbst.h | 10 +++++----- 9 source/lexbor/html/tokenizer/state.c | 2 +- 10 utils/lexbor/html/tmp/tokenizer_res.h | 2 +- 11 utils/lexbor/html/tokenizer_entities_bst.py | 8 ++++---- 12 utils/lexbor/lexbor/LXB.py | 2 +- 13 5 files changed, 12 insertions(+), 12 deletions(-) 14 15diff --git a/source/lexbor/core/sbst.h b/source/lexbor/core/sbst.h 16index b0fbc54..40e0e91 100644 17--- a/source/lexbor/core/sbst.h 18+++ b/source/lexbor/core/sbst.h 19@@ -19,12 +19,12 @@ extern "C" { 20 typedef struct { 21 lxb_char_t key; 22 23- void *value; 24- size_t value_len; 25+ lxb_char_t value[6]; 26+ unsigned char value_len; 27 28- size_t left; 29- size_t right; 30- size_t next; 31+ unsigned short left; 32+ unsigned short right; 33+ unsigned short next; 34 } 35 lexbor_sbst_entry_static_t; 36 37diff --git a/source/lexbor/html/tokenizer/state.c b/source/lexbor/html/tokenizer/state.c 38index 70ca391..2f3414f 100644 39--- a/source/lexbor/html/tokenizer/state.c 40+++ b/source/lexbor/html/tokenizer/state.c 41@@ -1815,7 +1815,7 @@ lxb_html_tokenizer_state_char_ref_named(lxb_html_tokenizer_t *tkz, 42 goto done; 43 } 44 45- if (entry->value != NULL) { 46+ if (entry->value[0] != 0) { 47 tkz->entity_end = (tkz->pos + (data - begin)) - tkz->start; 48 tkz->entity_match = entry; 49 } 50diff --git a/utils/lexbor/html/tmp/tokenizer_res.h b/utils/lexbor/html/tmp/tokenizer_res.h 51index b3701d5..73ab66e 100644 52--- a/utils/lexbor/html/tmp/tokenizer_res.h 53+++ b/utils/lexbor/html/tmp/tokenizer_res.h 54@@ -6,7 +6,7 @@ 55 56 /* 57 * Caution!!! Important!!! 58- * This file generated by the script 59+ * This file is generated by the script 60 * "utils/lexbor/html/tokenizer_entities_bst.py"! 61 * Do not change this file! 62 */ 63diff --git a/utils/lexbor/html/tokenizer_entities_bst.py b/utils/lexbor/html/tokenizer_entities_bst.py 64index ee7dcb4..7cd1335 100755 65--- a/utils/lexbor/html/tokenizer_entities_bst.py 66+++ b/utils/lexbor/html/tokenizer_entities_bst.py 67@@ -1,6 +1,6 @@ 68 69 import json 70-import sys, re, os 71+import sys, os 72 73 # Find and append run script run dir to module search path 74 ABS_PATH = os.path.dirname(os.path.abspath(__file__)) 75@@ -62,7 +62,7 @@ def entities_bst_create_layer(name, entry, index): 76 77 def entities_bst_create(index): 78 bst = {} 79- bst[0] = ["\0", 0, 0, 0, "NULL"] 80+ bst[0] = ["\0", 0, 0, 0, "{0}"] 81 82 begin = 1 83 idx = end = entities_bst_create_tree(index, bst, begin) 84@@ -114,7 +114,7 @@ def entities_bst_create_tree(index, bst, idx): 85 assert len(index[ split[0] ]['values']) < 2, 'Double values' 86 87 if len(index[ split[0] ]['values']) == 0: 88- value = "NULL" 89+ value = "{0}" 90 else: 91 value = '"{}"'.format(toHex(index[ split[0] ]['values'][0]['characters'])) 92 93@@ -210,5 +210,5 @@ def entities_bst_print(bst): 94 95 if __name__ == "__main__": 96 entities_bst("tmp/tokenizer_res.h", 97- "../../../source/lexbor/html/tokenizer_res.h", 98+ "../../../source/lexbor/html/tokenizer/res.h", 99 "data/entities.json"); 100diff --git a/utils/lexbor/lexbor/LXB.py b/utils/lexbor/lexbor/LXB.py 101index 2370c66..c41e645 100755 102--- a/utils/lexbor/lexbor/LXB.py 103+++ b/utils/lexbor/lexbor/LXB.py 104@@ -27,7 +27,7 @@ class Temp: 105 106 for line in fh: 107 for name in self.patterns: 108- line = re.sub(name, '\n'.join(self.patterns[name]), line) 109+ line = line.replace(name, '\n'.join(self.patterns[name])) 110 self.buffer.append(line) 111 fh.close() 112 113-- 1142.44.0 115 116