mirror of
https://github.com/php/php-src.git
synced 2025-08-15 13:38:49 +02:00
Update Lexbor (#14729)
This commit is contained in:
parent
62a3bbd0e3
commit
4df90af44f
7 changed files with 89 additions and 9 deletions
|
@ -71,7 +71,7 @@ lexbor_cached_power_bin(int exp, int *dec_exp);
|
|||
/*
|
||||
* Inline functions
|
||||
*/
|
||||
#if (LEXBOR_HAVE_BUILTIN_CLZLL)
|
||||
#ifdef LEXBOR_HAVE_BUILTIN_CLZLL
|
||||
#define nxt_leading_zeros64(x) (((x) == 0) ? 64 : __builtin_clzll(x))
|
||||
|
||||
#else
|
||||
|
@ -199,7 +199,7 @@ lexbor_diyfp_sub(lexbor_diyfp_t lhs, lexbor_diyfp_t rhs)
|
|||
lxb_inline lexbor_diyfp_t
|
||||
lexbor_diyfp_mul(lexbor_diyfp_t lhs, lexbor_diyfp_t rhs)
|
||||
{
|
||||
#if (LEXBOR_HAVE_UNSIGNED_INT128)
|
||||
#ifdef LEXBOR_HAVE_UNSIGNED_INT128
|
||||
|
||||
uint64_t l, h;
|
||||
lxb_uint128_t u128;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Alexander Borisov
|
||||
* Copyright (C) 2018-2024 Alexander Borisov
|
||||
*
|
||||
* Author: Alexander Borisov <borisov@lexbor.com>
|
||||
*/
|
||||
|
@ -14,9 +14,6 @@ extern "C" {
|
|||
#include "lexbor/core/base.h"
|
||||
|
||||
|
||||
#ifdef LEXBOR_WITH_PERF
|
||||
|
||||
|
||||
LXB_API void *
|
||||
lexbor_perf_create(void);
|
||||
|
||||
|
@ -36,8 +33,6 @@ LXB_API double
|
|||
lexbor_perf_in_sec(void *perf);
|
||||
|
||||
|
||||
#endif /* LEXBOR_WITH_PERF */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
|
68
ext/dom/lexbor/lexbor/core/swar.h
Normal file
68
ext/dom/lexbor/lexbor/core/swar.h
Normal file
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright (C) 2024 Alexander Borisov
|
||||
*
|
||||
* Author: Niels Dossche <nielsdos@php.net>
|
||||
*/
|
||||
|
||||
#ifndef LEXBOR_SWAR_H
|
||||
#define LEXBOR_SWAR_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#include "lexbor/core/base.h"
|
||||
|
||||
|
||||
/*
|
||||
* Based on techniques from https://graphics.stanford.edu/~seander/bithacks.html
|
||||
*/
|
||||
#define LEXBOR_SWAR_ONES (~((size_t) 0) / 0xFF)
|
||||
#define LEXBOR_SWAR_REPEAT(x) (LEXBOR_SWAR_ONES * (x))
|
||||
#define LEXBOR_SWAR_HAS_ZERO(v) (((v) - LEXBOR_SWAR_ONES) & ~(v) & LEXBOR_SWAR_REPEAT(0x80))
|
||||
#define LEXBOR_SWAR_IS_LITTLE_ENDIAN (*(unsigned char *) &(uint16_t){1})
|
||||
|
||||
|
||||
/*
|
||||
* When handling hot loops that search for a set of characters,
|
||||
* this function can be used to quickly move the data pointer much
|
||||
* closer to the first occurrence of such a character.
|
||||
*/
|
||||
lxb_inline const lxb_char_t *
|
||||
lexbor_swar_seek4(const lxb_char_t *data, const lxb_char_t *end,
|
||||
lxb_char_t c1, lxb_char_t c2, lxb_char_t c3, lxb_char_t c4)
|
||||
{
|
||||
size_t bytes, matches, t1, t2, t3, t4;
|
||||
|
||||
if (LEXBOR_SWAR_IS_LITTLE_ENDIAN) {
|
||||
while (data + sizeof(size_t) <= end) {
|
||||
memcpy(&bytes, data, sizeof(size_t));
|
||||
|
||||
t1 = bytes ^ LEXBOR_SWAR_REPEAT(c1);
|
||||
t2 = bytes ^ LEXBOR_SWAR_REPEAT(c2);
|
||||
t3 = bytes ^ LEXBOR_SWAR_REPEAT(c3);
|
||||
t4 = bytes ^ LEXBOR_SWAR_REPEAT(c4);
|
||||
matches = LEXBOR_SWAR_HAS_ZERO(t1) | LEXBOR_SWAR_HAS_ZERO(t2)
|
||||
| LEXBOR_SWAR_HAS_ZERO(t3) | LEXBOR_SWAR_HAS_ZERO(t4);
|
||||
|
||||
if (matches) {
|
||||
data += ((((matches - 1) & LEXBOR_SWAR_ONES) * LEXBOR_SWAR_ONES)
|
||||
>> (sizeof(size_t) * 8 - 8)) - 1;
|
||||
break;
|
||||
} else {
|
||||
data += sizeof(size_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* LEXBOR_SWAR_H */
|
||||
|
|
@ -62,6 +62,16 @@ lxb_css_parser_init(lxb_css_parser_t *parser, lxb_css_syntax_tokenizer_t *tkz)
|
|||
parser->rules_end = parser->rules_begin + lxb_rules_length;
|
||||
parser->rules = parser->rules_begin;
|
||||
|
||||
/*
|
||||
* Zero those parameters that can be used (passed to the function).
|
||||
* The parser->rules->phase parameter will be assigned at the end of the
|
||||
* parsing.
|
||||
*
|
||||
* The point is that parser->rules[0] is used as a stub before exiting
|
||||
* parsing.
|
||||
*/
|
||||
parser->rules->context = NULL;
|
||||
|
||||
/* Temp */
|
||||
parser->pos = NULL;
|
||||
parser->str.length = 0;
|
||||
|
|
|
@ -477,6 +477,8 @@ name_state:
|
|||
data++;
|
||||
}
|
||||
|
||||
*name_end = data;
|
||||
|
||||
spaces_state:
|
||||
|
||||
data = lxb_html_encoding_skip_spaces(data, end);
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#define LEXBOR_STR_RES_MAP_HEX
|
||||
#define LEXBOR_STR_RES_MAP_NUM
|
||||
#include "lexbor/core/str_res.h"
|
||||
#include "lexbor/core/swar.h"
|
||||
|
||||
#define LXB_HTML_TOKENIZER_RES_ENTITIES_SBST
|
||||
#include "lexbor/html/tokenizer/res.h"
|
||||
|
@ -226,6 +227,8 @@ lxb_html_tokenizer_state_data(lxb_html_tokenizer_t *tkz,
|
|||
{
|
||||
lxb_html_tokenizer_state_begin_set(tkz, data);
|
||||
|
||||
data = lexbor_swar_seek4(data, end, 0x3C, 0x26, 0x0D, 0x00);
|
||||
|
||||
while (data != end) {
|
||||
switch (*data) {
|
||||
/* U+003C LESS-THAN SIGN (<) */
|
||||
|
@ -906,6 +909,8 @@ lxb_html_tokenizer_state_attribute_value_double_quoted(lxb_html_tokenizer_t *tkz
|
|||
|
||||
lxb_html_tokenizer_state_begin_set(tkz, data);
|
||||
|
||||
data = lexbor_swar_seek4(data, end, 0x22, 0x26, 0x0D, 0x00);
|
||||
|
||||
while (data != end) {
|
||||
switch (*data) {
|
||||
/* U+0022 QUOTATION MARK (") */
|
||||
|
|
|
@ -175,7 +175,7 @@ lxb_html_tree_insertion_mode_in_head_open(lxb_html_tree_t *tree,
|
|||
* We can create function for this, but...
|
||||
*
|
||||
* The "in head noscript" insertion mode use this
|
||||
* is you change this code, please, change it in in head noscript" mode
|
||||
* is you change this code, please, change it in head noscript" mode
|
||||
*/
|
||||
case LXB_TAG__TEXT: {
|
||||
lxb_html_token_t ws_token = {0};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue