mirror of
https://github.com/php/php-src.git
synced 2025-08-16 14:08:47 +02:00
128 lines
4.2 KiB
Diff
128 lines
4.2 KiB
Diff
From 16daa8e860e393ff39613b908550b0982a2210f2 Mon Sep 17 00:00:00 2001
|
|
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
|
|
Date: Wed, 29 Nov 2023 21:29:31 +0100
|
|
Subject: [PATCH 5/6] Shrink size of static binary search tree
|
|
|
|
This also makes it more efficient on the data cache.
|
|
---
|
|
source/lexbor/core/sbst.h | 19 ++++++++++++++-----
|
|
source/lexbor/html/tokenizer/state.c | 2 +-
|
|
utils/lexbor/html/tmp/tokenizer_res.h | 2 +-
|
|
utils/lexbor/html/tokenizer_entities_bst.py | 8 ++++----
|
|
utils/lexbor/lexbor/LXB.py | 2 +-
|
|
5 files changed, 21 insertions(+), 12 deletions(-)
|
|
|
|
diff --git a/source/lexbor/core/sbst.h b/source/lexbor/core/sbst.h
|
|
index b0fbc54..15a1d40 100644
|
|
--- a/source/lexbor/core/sbst.h
|
|
+++ b/source/lexbor/core/sbst.h
|
|
@@ -15,16 +15,25 @@ extern "C" {
|
|
|
|
#include "lexbor/core/base.h"
|
|
|
|
+#ifdef __has_attribute
|
|
+# if __has_attribute(nonstring) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 15
|
|
+# define LXB_NONSTRING __attribute__((nonstring))
|
|
+# else
|
|
+# define LXB_NONSTRING
|
|
+# endif
|
|
+#else
|
|
+# define LXB_NONSTRING
|
|
+#endif
|
|
|
|
typedef struct {
|
|
lxb_char_t key;
|
|
|
|
- void *value;
|
|
- size_t value_len;
|
|
+ lxb_char_t value[6] LXB_NONSTRING;
|
|
+ unsigned char value_len;
|
|
|
|
- size_t left;
|
|
- size_t right;
|
|
- size_t next;
|
|
+ unsigned short left;
|
|
+ unsigned short right;
|
|
+ unsigned short next;
|
|
}
|
|
lexbor_sbst_entry_static_t;
|
|
|
|
diff --git a/source/lexbor/html/tokenizer/state.c b/source/lexbor/html/tokenizer/state.c
|
|
index 158aade..207b909 100644
|
|
--- a/source/lexbor/html/tokenizer/state.c
|
|
+++ b/source/lexbor/html/tokenizer/state.c
|
|
@@ -1820,7 +1820,7 @@ lxb_html_tokenizer_state_char_ref_named(lxb_html_tokenizer_t *tkz,
|
|
goto done;
|
|
}
|
|
|
|
- if (entry->value != NULL) {
|
|
+ if (entry->value[0] != 0) {
|
|
tkz->entity_end = (tkz->pos + (data - begin)) - tkz->start;
|
|
tkz->entity_match = entry;
|
|
}
|
|
diff --git a/utils/lexbor/html/tmp/tokenizer_res.h b/utils/lexbor/html/tmp/tokenizer_res.h
|
|
index b3701d5..73ab66e 100644
|
|
--- a/utils/lexbor/html/tmp/tokenizer_res.h
|
|
+++ b/utils/lexbor/html/tmp/tokenizer_res.h
|
|
@@ -6,7 +6,7 @@
|
|
|
|
/*
|
|
* Caution!!! Important!!!
|
|
- * This file generated by the script
|
|
+ * This file is generated by the script
|
|
* "utils/lexbor/html/tokenizer_entities_bst.py"!
|
|
* Do not change this file!
|
|
*/
|
|
diff --git a/utils/lexbor/html/tokenizer_entities_bst.py b/utils/lexbor/html/tokenizer_entities_bst.py
|
|
index 4fa0999..8bd83b2 100755
|
|
--- a/utils/lexbor/html/tokenizer_entities_bst.py
|
|
+++ b/utils/lexbor/html/tokenizer_entities_bst.py
|
|
@@ -1,6 +1,6 @@
|
|
|
|
import json
|
|
-import sys, re, os
|
|
+import sys, os
|
|
|
|
# Find and append run script run dir to module search path
|
|
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
|
|
@@ -62,7 +62,7 @@ def entities_bst_create_layer(name, entry, index):
|
|
|
|
def entities_bst_create(index):
|
|
bst = {}
|
|
- bst[0] = ["\0", 0, 0, 0, "NULL"]
|
|
+ bst[0] = ["\0", 0, 0, 0, "{0}"]
|
|
|
|
begin = 1
|
|
idx = end = entities_bst_create_tree(index, bst, begin)
|
|
@@ -114,7 +114,7 @@ def entities_bst_create_tree(index, bst, idx):
|
|
assert len(index[ split[0] ]['values']) < 2, 'Double values'
|
|
|
|
if len(index[ split[0] ]['values']) == 0:
|
|
- value = "NULL"
|
|
+ value = "{0}"
|
|
else:
|
|
value = '"{}"'.format(toHex(index[ split[0] ]['values'][0]['characters']))
|
|
|
|
@@ -210,5 +210,5 @@ def entities_bst_print(bst):
|
|
|
|
if __name__ == "__main__":
|
|
entities_bst("tmp/tokenizer_res.h",
|
|
- "../../../source/lexbor/html/tokenizer_res.h",
|
|
+ "../../../source/lexbor/html/tokenizer/res.h",
|
|
"data/entities.json");
|
|
diff --git a/utils/lexbor/lexbor/LXB.py b/utils/lexbor/lexbor/LXB.py
|
|
index 3e75812..b068ea3 100755
|
|
--- a/utils/lexbor/lexbor/LXB.py
|
|
+++ b/utils/lexbor/lexbor/LXB.py
|
|
@@ -27,7 +27,7 @@ class Temp:
|
|
|
|
for line in fh:
|
|
for name in self.patterns:
|
|
- line = re.sub(name, '\n'.join(self.patterns[name]), line)
|
|
+ line = line.replace(name, '\n'.join(self.patterns[name]))
|
|
self.buffer.append(line)
|
|
fh.close()
|
|
|
|
--
|
|
2.49.0
|
|
|