Update Lexbor (#14729)

This commit is contained in:
Niels Dossche 2024-06-29 13:42:56 -07:00 committed by GitHub
parent 62a3bbd0e3
commit 4df90af44f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 89 additions and 9 deletions

View file

@ -71,7 +71,7 @@ lexbor_cached_power_bin(int exp, int *dec_exp);
/*
* Inline functions
*/
#if (LEXBOR_HAVE_BUILTIN_CLZLL)
#ifdef LEXBOR_HAVE_BUILTIN_CLZLL
#define nxt_leading_zeros64(x) (((x) == 0) ? 64 : __builtin_clzll(x))
#else
@ -199,7 +199,7 @@ lexbor_diyfp_sub(lexbor_diyfp_t lhs, lexbor_diyfp_t rhs)
lxb_inline lexbor_diyfp_t
lexbor_diyfp_mul(lexbor_diyfp_t lhs, lexbor_diyfp_t rhs)
{
#if (LEXBOR_HAVE_UNSIGNED_INT128)
#ifdef LEXBOR_HAVE_UNSIGNED_INT128
uint64_t l, h;
lxb_uint128_t u128;

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018 Alexander Borisov
* Copyright (C) 2018-2024 Alexander Borisov
*
* Author: Alexander Borisov <borisov@lexbor.com>
*/
@ -14,9 +14,6 @@ extern "C" {
#include "lexbor/core/base.h"
#ifdef LEXBOR_WITH_PERF
LXB_API void *
lexbor_perf_create(void);
@ -36,8 +33,6 @@ LXB_API double
lexbor_perf_in_sec(void *perf);
#endif /* LEXBOR_WITH_PERF */
#ifdef __cplusplus
} /* extern "C" */
#endif

View file

@ -0,0 +1,68 @@
/*
* Copyright (C) 2024 Alexander Borisov
*
* Author: Niels Dossche <nielsdos@php.net>
*/
#ifndef LEXBOR_SWAR_H
#define LEXBOR_SWAR_H
#ifdef __cplusplus
extern "C" {
#endif
#include "lexbor/core/base.h"
/*
* Based on techniques from https://graphics.stanford.edu/~seander/bithacks.html
*/
#define LEXBOR_SWAR_ONES (~((size_t) 0) / 0xFF)
#define LEXBOR_SWAR_REPEAT(x) (LEXBOR_SWAR_ONES * (x))
#define LEXBOR_SWAR_HAS_ZERO(v) (((v) - LEXBOR_SWAR_ONES) & ~(v) & LEXBOR_SWAR_REPEAT(0x80))
#define LEXBOR_SWAR_IS_LITTLE_ENDIAN (*(unsigned char *) &(uint16_t){1})
/*
* When handling hot loops that search for a set of characters,
* this function can be used to quickly move the data pointer much
* closer to the first occurrence of such a character.
*/
lxb_inline const lxb_char_t *
lexbor_swar_seek4(const lxb_char_t *data, const lxb_char_t *end,
lxb_char_t c1, lxb_char_t c2, lxb_char_t c3, lxb_char_t c4)
{
size_t bytes, matches, t1, t2, t3, t4;
if (LEXBOR_SWAR_IS_LITTLE_ENDIAN) {
while (data + sizeof(size_t) <= end) {
memcpy(&bytes, data, sizeof(size_t));
t1 = bytes ^ LEXBOR_SWAR_REPEAT(c1);
t2 = bytes ^ LEXBOR_SWAR_REPEAT(c2);
t3 = bytes ^ LEXBOR_SWAR_REPEAT(c3);
t4 = bytes ^ LEXBOR_SWAR_REPEAT(c4);
matches = LEXBOR_SWAR_HAS_ZERO(t1) | LEXBOR_SWAR_HAS_ZERO(t2)
| LEXBOR_SWAR_HAS_ZERO(t3) | LEXBOR_SWAR_HAS_ZERO(t4);
if (matches) {
data += ((((matches - 1) & LEXBOR_SWAR_ONES) * LEXBOR_SWAR_ONES)
>> (sizeof(size_t) * 8 - 8)) - 1;
break;
} else {
data += sizeof(size_t);
}
}
}
return data;
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* LEXBOR_SWAR_H */

View file

@ -62,6 +62,16 @@ lxb_css_parser_init(lxb_css_parser_t *parser, lxb_css_syntax_tokenizer_t *tkz)
parser->rules_end = parser->rules_begin + lxb_rules_length;
parser->rules = parser->rules_begin;
/*
* Zero those parameters that can be used (passed to the function).
* The parser->rules->phase parameter will be assigned at the end of the
* parsing.
*
* The point is that parser->rules[0] is used as a stub before exiting
* parsing.
*/
parser->rules->context = NULL;
/* Temp */
parser->pos = NULL;
parser->str.length = 0;

View file

@ -477,6 +477,8 @@ name_state:
data++;
}
*name_end = data;
spaces_state:
data = lxb_html_encoding_skip_spaces(data, end);

View file

@ -15,6 +15,7 @@
#define LEXBOR_STR_RES_MAP_HEX
#define LEXBOR_STR_RES_MAP_NUM
#include "lexbor/core/str_res.h"
#include "lexbor/core/swar.h"
#define LXB_HTML_TOKENIZER_RES_ENTITIES_SBST
#include "lexbor/html/tokenizer/res.h"
@ -226,6 +227,8 @@ lxb_html_tokenizer_state_data(lxb_html_tokenizer_t *tkz,
{
lxb_html_tokenizer_state_begin_set(tkz, data);
data = lexbor_swar_seek4(data, end, 0x3C, 0x26, 0x0D, 0x00);
while (data != end) {
switch (*data) {
/* U+003C LESS-THAN SIGN (<) */
@ -906,6 +909,8 @@ lxb_html_tokenizer_state_attribute_value_double_quoted(lxb_html_tokenizer_t *tkz
lxb_html_tokenizer_state_begin_set(tkz, data);
data = lexbor_swar_seek4(data, end, 0x22, 0x26, 0x0D, 0x00);
while (data != end) {
switch (*data) {
/* U+0022 QUOTATION MARK (") */

View file

@ -175,7 +175,7 @@ lxb_html_tree_insertion_mode_in_head_open(lxb_html_tree_t *tree,
* We can create function for this, but...
*
* The "in head noscript" insertion mode use this
* is you change this code, please, change it in in head noscript" mode
* is you change this code, please, change it in head noscript" mode
*/
case LXB_TAG__TEXT: {
lxb_html_token_t ws_token = {0};