File: 0005-Shrink-size-of-static-binary-search-tree.patch

package info (click to toggle)
php8.4 8.4.11-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 208,108 kB
  • sloc: ansic: 1,060,628; php: 35,345; sh: 11,866; cpp: 7,201; pascal: 4,913; javascript: 3,091; asm: 2,810; yacc: 2,411; makefile: 689; xml: 446; python: 301; awk: 148
file content (128 lines) | stat: -rw-r--r-- 4,251 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
From 16daa8e860e393ff39613b908550b0982a2210f2 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 29 Nov 2023 21:29:31 +0100
Subject: [PATCH 5/6] Shrink size of static binary search tree

This also makes it more efficient on the data cache.
---
 source/lexbor/core/sbst.h                   | 19 ++++++++++++++-----
 source/lexbor/html/tokenizer/state.c        |  2 +-
 utils/lexbor/html/tmp/tokenizer_res.h       |  2 +-
 utils/lexbor/html/tokenizer_entities_bst.py |  8 ++++----
 utils/lexbor/lexbor/LXB.py                  |  2 +-
 5 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/source/lexbor/core/sbst.h b/source/lexbor/core/sbst.h
index b0fbc54..15a1d40 100644
--- a/source/lexbor/core/sbst.h
+++ b/source/lexbor/core/sbst.h
@@ -15,16 +15,25 @@ extern "C" {
 
 #include "lexbor/core/base.h"
 
+#ifdef __has_attribute
+# if __has_attribute(nonstring) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 15
+#  define LXB_NONSTRING __attribute__((nonstring))
+# else
+#  define LXB_NONSTRING
+# endif
+#else
+# define LXB_NONSTRING
+#endif
 
 typedef struct {
     lxb_char_t key;
 
-    void       *value;
-    size_t     value_len;
+    lxb_char_t         value[6] LXB_NONSTRING;
+    unsigned char      value_len;
 
-    size_t     left;
-    size_t     right;
-    size_t     next;
+    unsigned short     left;
+    unsigned short     right;
+    unsigned short     next;
 }
 lexbor_sbst_entry_static_t;
 
diff --git a/source/lexbor/html/tokenizer/state.c b/source/lexbor/html/tokenizer/state.c
index 158aade..207b909 100644
--- a/source/lexbor/html/tokenizer/state.c
+++ b/source/lexbor/html/tokenizer/state.c
@@ -1820,7 +1820,7 @@ lxb_html_tokenizer_state_char_ref_named(lxb_html_tokenizer_t *tkz,
             goto done;
         }
 
-        if (entry->value != NULL) {
+        if (entry->value[0] != 0) {
             tkz->entity_end = (tkz->pos + (data - begin)) - tkz->start;
             tkz->entity_match = entry;
         }
diff --git a/utils/lexbor/html/tmp/tokenizer_res.h b/utils/lexbor/html/tmp/tokenizer_res.h
index b3701d5..73ab66e 100644
--- a/utils/lexbor/html/tmp/tokenizer_res.h
+++ b/utils/lexbor/html/tmp/tokenizer_res.h
@@ -6,7 +6,7 @@
 
 /*
  * Caution!!! Important!!!
- * This file generated by the script
+ * This file is generated by the script
  * "utils/lexbor/html/tokenizer_entities_bst.py"!
  * Do not change this file!
  */
diff --git a/utils/lexbor/html/tokenizer_entities_bst.py b/utils/lexbor/html/tokenizer_entities_bst.py
index 4fa0999..8bd83b2 100755
--- a/utils/lexbor/html/tokenizer_entities_bst.py
+++ b/utils/lexbor/html/tokenizer_entities_bst.py
@@ -1,6 +1,6 @@
 
 import json
-import sys, re, os
+import sys, os
 
 # Find and append run script run dir to module search path
 ABS_PATH = os.path.dirname(os.path.abspath(__file__))
@@ -62,7 +62,7 @@ def entities_bst_create_layer(name, entry, index):
 
 def entities_bst_create(index):
     bst = {}
-    bst[0] = ["\0", 0, 0, 0, "NULL"]
+    bst[0] = ["\0", 0, 0, 0, "{0}"]
 
     begin = 1
     idx = end = entities_bst_create_tree(index, bst, begin)
@@ -114,7 +114,7 @@ def entities_bst_create_tree(index, bst, idx):
         assert len(index[ split[0] ]['values']) < 2, 'Double values'
 
         if len(index[ split[0] ]['values']) == 0:
-            value = "NULL"
+            value = "{0}"
         else:
             value = '"{}"'.format(toHex(index[ split[0] ]['values'][0]['characters']))
 
@@ -210,5 +210,5 @@ def entities_bst_print(bst):
 
 if __name__ == "__main__":
     entities_bst("tmp/tokenizer_res.h",
-                 "../../../source/lexbor/html/tokenizer_res.h",
+                 "../../../source/lexbor/html/tokenizer/res.h",
                  "data/entities.json");
diff --git a/utils/lexbor/lexbor/LXB.py b/utils/lexbor/lexbor/LXB.py
index 3e75812..b068ea3 100755
--- a/utils/lexbor/lexbor/LXB.py
+++ b/utils/lexbor/lexbor/LXB.py
@@ -27,7 +27,7 @@ class Temp:
 
         for line in fh:
             for name in self.patterns:
-                line = re.sub(name, '\n'.join(self.patterns[name]), line)
+                line = line.replace(name, '\n'.join(self.patterns[name]))
             self.buffer.append(line)
         fh.close()
 
-- 
2.49.0