From 0514a1cb2f45ab6dd814118780d56a713f4925a2 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 24 Aug 2023 22:57:48 +0200 Subject: [PATCH 3/6] Patch utilities and data structure to be able to generate smaller lookup tables Changed the generation script to check if everything fits in 32-bits. And change the actual field types to 32-bits. This decreases the hash tables in size. --- source/lexbor/core/shs.h | 4 ++-- utils/lexbor/encoding/multi-byte.py | 2 +- utils/lexbor/encoding/single-byte.py | 2 +- utils/lexbor/lexbor/LXB.py | 12 +++++++++--- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/source/lexbor/core/shs.h b/source/lexbor/core/shs.h index 7a63a07..c84dfaa 100644 --- a/source/lexbor/core/shs.h +++ b/source/lexbor/core/shs.h @@ -27,9 +27,9 @@ lexbor_shs_entry_t; typedef struct { uint32_t key; - void *value; + uint32_t value; - size_t next; + uint32_t next; } lexbor_shs_hash_t; diff --git a/utils/lexbor/encoding/multi-byte.py b/utils/lexbor/encoding/multi-byte.py index ff9ddf1..f8af2d4 100755 --- a/utils/lexbor/encoding/multi-byte.py +++ b/utils/lexbor/encoding/multi-byte.py @@ -151,7 +151,7 @@ class MultiByte: key_id = entries[1].decode('utf-8') - hash_key.append(key_id, '(void *) {}'.format(idx)) + hash_key.append(key_id, idx) return hash_key.create(rate = 1) diff --git a/utils/lexbor/encoding/single-byte.py b/utils/lexbor/encoding/single-byte.py index 9a85d54..ec2023c 100755 --- a/utils/lexbor/encoding/single-byte.py +++ b/utils/lexbor/encoding/single-byte.py @@ -128,7 +128,7 @@ class SingleByte: entries = values[idx] key_id = entries[1].decode('utf-8') - hash_key.append(key_id, '(void *) {}'.format(idx + 0x80)) + hash_key.append(key_id, idx + 0x80) return hash_key.create(rate = 1) diff --git a/utils/lexbor/lexbor/LXB.py b/utils/lexbor/lexbor/LXB.py index 3e75812..2370c66 100755 --- a/utils/lexbor/lexbor/LXB.py +++ b/utils/lexbor/lexbor/LXB.py @@ -94,7 +94,7 @@ class HashKey: def append(self, key_id, value): self.buffer.append([self.hash_id(int(key_id, 0)), value]) - def create(self, terminate_value = '{0, NULL, 0}', rate = 2, is_const = True, data_before = None): + def create(self, terminate_value = '{0, 0, 0}', rate = 2, is_const = True, data_before = None): test = self.test(int(self.max_table_size / 1.2), int(self.max_table_size * 1.2)) rate_dn = rate - 1 @@ -142,9 +142,12 @@ class HashKey: entry = table[idx] if entry: + assert entry[0] < 2**32 + assert entry[1] < 2**32 + assert entry[2] < 2**32 result.append("{{{}, {}, {}}},".format(entry[0], entry[1], entry[2])) else: - result.append("{0, NULL, 0},") + result.append("{0, 0, 0},") if int(idx) % rate == rate_dn: result.append("\n ") @@ -154,9 +157,12 @@ class HashKey: if len(table): entry = table[-1] if entry: + assert entry[0] < 2**32 + assert entry[1] < 2**32 + assert entry[2] < 2**32 result.append("{{{}, {}, {}}}\n".format(entry[0], entry[1], entry[2])) else: - result.append("{0, NULL, 0}\n") + result.append("{0, 0, 0}\n") result.append("};") -- 2.44.0