Add Lexbor files for URL handling (#18656)

Relates to #14461 and https://wiki.php.net/rfc/url_parsing_api
2025-08-15 13:38:49 +02:00 · 2025-05-27 13:06:02 +02:00 · 2025-05-27 13:06:02 +02:00 · 400b7b8c74
commit 400b7b8c74
parent d585a5609d
15 changed files with 210811 additions and 1 deletions
--- a/codecov.yml
+++ b/codecov.yml
@ -7,6 +7,9 @@ ignore:
  - "ext/lexbor/lexbor/html"
  - "ext/lexbor/lexbor/ns"
  - "ext/lexbor/lexbor/ports"
+  - "ext/lexbor/lexbor/punycode"
  - "ext/lexbor/lexbor/tag"
+  - "ext/lexbor/lexbor/unicode"
+  - "ext/lexbor/lexbor/url"
  - "ext/pcre/pcre2lib"
  - "ext/uri/uriparser"
--- a/ext/lexbor/config.m4
+++ b/ext/lexbor/config.m4
@ -17,6 +17,7 @@ PHP_NEW_EXTENSION([lexbor], m4_normalize([
    $LEXBOR_DIR/core/hash.c
    $LEXBOR_DIR/core/mem.c
    $LEXBOR_DIR/core/mraw.c
+    $LEXBOR_DIR/core/plog.c
    $LEXBOR_DIR/core/print.c
    $LEXBOR_DIR/core/serialize.c
    $LEXBOR_DIR/core/shs.c
@ -174,7 +175,11 @@ PHP_NEW_EXTENSION([lexbor], m4_normalize([
    $LEXBOR_DIR/html/tree/open_elements.c
    $LEXBOR_DIR/ns/ns.c
    $LEXBOR_DIR/ports/posix/lexbor/core/memory.c
+    $LEXBOR_DIR/punycode/punycode.c
    $LEXBOR_DIR/tag/tag.c
+    $LEXBOR_DIR/unicode/idna.c
+    $LEXBOR_DIR/unicode/unicode.c
+    $LEXBOR_DIR/url/url.c
  ]),
  [no],,
  [-DZEND_ENABLE_STATIC_TSRMLS_CACHE=1 $PHP_LEXBOR_CFLAGS])
@ -193,7 +198,10 @@ PHP_ADD_BUILD_DIR([
  $ext_builddir/$LEXBOR_DIR/html/tree/insertion_mode
  $ext_builddir/$LEXBOR_DIR/ns
  $ext_builddir/$LEXBOR_DIR/ports/posix/lexbor/core
+  $ext_builddir/$LEXBOR_DIR/punycode
  $ext_builddir/$LEXBOR_DIR/tag
+  $ext_builddir/$LEXBOR_DIR/unicode
+  $ext_builddir/$LEXBOR_DIR/url
 ])
 PHP_ADD_INCLUDE([$ext_srcdir])
 PHP_INSTALL_HEADERS([ext/lexbor], m4_normalize([
--- a/ext/lexbor/config.w32
+++ b/ext/lexbor/config.w32
@ -3,7 +3,7 @@
 EXTENSION("lexbor", "php_lexbor.c", false, "/I " + configure_module_dirname + " /DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
 PHP_LEXBOR="yes";
 ADD_SOURCES("ext/lexbor/lexbor/ports/windows_nt/lexbor/core", "memory.c", "lexbor");
-ADD_SOURCES("ext/lexbor/lexbor/core", "array_obj.c array.c avl.c bst.c diyfp.c conv.c dobject.c dtoa.c hash.c mem.c mraw.c print.c serialize.c shs.c str.c strtod.c", "lexbor");
+ADD_SOURCES("ext/lexbor/lexbor/core", "array_obj.c array.c avl.c bst.c diyfp.c conv.c dobject.c dtoa.c hash.c mem.c mraw.c plog.c print.c serialize.c shs.c str.c strtod.c", "lexbor");
 ADD_SOURCES("ext/lexbor/lexbor/dom", "interface.c", "lexbor");
 ADD_SOURCES("ext/lexbor/lexbor/dom/interfaces", "attr.c cdata_section.c character_data.c comment.c document.c document_fragment.c document_type.c element.c node.c processing_instruction.c shadow_root.c text.c", "lexbor");
 ADD_SOURCES("ext/lexbor/lexbor/html/tokenizer", "error.c state_comment.c state_doctype.c state_rawtext.c state_rcdata.c state_script.c state.c", "lexbor");
@ -17,7 +17,10 @@ ADD_SOURCES("ext/lexbor/lexbor/css/selectors", "state.c selectors.c selector.c p
 ADD_SOURCES("ext/lexbor/lexbor/css/syntax", "state.c parser.c syntax.c anb.c tokenizer.c token.c","lexbor");
 ADD_SOURCES("ext/lexbor/lexbor/css/syntax/tokenizer", "error.c","lexbor");
 ADD_SOURCES("ext/lexbor/lexbor/ns", "ns.c","lexbor");
+ADD_SOURCES("ext/lexbor/lexbor/punycode", "punycode.c","lexbor");
 ADD_SOURCES("ext/lexbor/lexbor/tag", "tag.c","lexbor");
+ADD_SOURCES("ext/lexbor/lexbor/unicode", "idna.c unicode.c","lexbor");
+ADD_SOURCES("ext/lexbor/lexbor/url", "url.c","lexbor");
 ADD_FLAG("CFLAGS_LEXBOR", "/D LEXBOR_BUILDING /utf-8");

 AC_DEFINE("HAVE_LEXBOR", 1, "Define to 1 if the PHP extension 'lexbor' is available.");
--- a/ext/lexbor/lexbor/punycode/base.h
+++ b/ext/lexbor/lexbor/punycode/base.h
@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2023-2024 Alexander Borisov
+ *
+ * Author: Alexander Borisov <borisov@lexbor.com>
+ */
+
+#ifndef LEXBOR_PUNYCODE_BASE_H
+#define LEXBOR_PUNYCODE_BASE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "lexbor/core/base.h"
+
+
+#define LXB_PUNYCODE_VERSION_MAJOR 1
+#define LXB_PUNYCODE_VERSION_MINOR 1
+#define LXB_PUNYCODE_VERSION_PATCH 0
+
+#define LEXBOR_PUNYCODE_VERSION_STRING LEXBOR_STRINGIZE(LXB_PUNYCODE_VERSION_MAJOR) "." \
+                                       LEXBOR_STRINGIZE(LXB_PUNYCODE_VERSION_MINOR) "." \
+                                       LEXBOR_STRINGIZE(LXB_PUNYCODE_VERSION_PATCH)
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* LEXBOR_PUNYCODE_BASE_H */
--- a/ext/lexbor/lexbor/punycode/punycode.c
+++ b/ext/lexbor/lexbor/punycode/punycode.c
@ -0,0 +1,671 @@
+/*
+ * Copyright (C) 2023-2024 Alexander Borisov
+ *
+ * Author: Alexander Borisov <borisov@lexbor.com>
+ */
+
+#include "lexbor/punycode/punycode.h"
+#include "lexbor/encoding/encoding.h"
+
+
+enum {
+    LXB_PUNYCODE_BASE = 36,
+    LXB_PUNYCODE_TMIN = 1,
+    LXB_PUNYCODE_TMAX = 26,
+    LXB_PUNYCODE_SKEW = 38,
+    LXB_PUNYCODE_DAMP = 700,
+    LXB_PUNYCODE_INITIAL_BIAS = 72,
+    LXB_PUNYCODE_INITIAL_N = 0x80,
+    LXB_PUNYCODE_DELIMITER = 0x2D
+};
+
+
+static lxb_status_t
+lxb_punycode_callback_cp(const lxb_codepoint_t *cps, size_t len, void *ctx);
+
+
+lxb_inline lxb_char_t *
+lxb_punycode_encode_realloc(lxb_char_t *p, lxb_char_t **buf,
+                            const lxb_char_t **end, const lxb_char_t *buffer)
+{
+    size_t cur_size = *end - *buf;
+    size_t nsize = cur_size * 2;
+    lxb_char_t *tmp;
+
+    if (*buf == buffer) {
+        tmp = lexbor_malloc(nsize);
+        if (tmp == NULL) {
+            return NULL;
+        }
+
+        memcpy(tmp, *buf, cur_size);
+    }
+    else {
+        tmp = lexbor_realloc(*buf, nsize);
+        if (tmp == NULL) {
+            return lexbor_free(*buf);
+        }
+    }
+
+    *buf = tmp;
+    *end = tmp + nsize;
+
+    return tmp + cur_size;
+}
+
+lxb_inline lxb_codepoint_t *
+lxb_punycode_decode_realloc(lxb_codepoint_t *p, lxb_codepoint_t **buf,
+                            const lxb_codepoint_t **end,
+                            const lxb_codepoint_t *buffer)
+{
+    size_t cur_size = *end - *buf;
+    size_t nsize = cur_size * 2;
+    lxb_codepoint_t *tmp;
+
+    if (*buf == buffer) {
+        tmp = lexbor_malloc(nsize * sizeof(lxb_codepoint_t));
+        if (tmp == NULL) {
+            return NULL;
+        }
+
+        memcpy(tmp, *buf, cur_size * sizeof(lxb_codepoint_t));
+    }
+    else {
+        tmp = lexbor_realloc(*buf, nsize * sizeof(lxb_codepoint_t));
+        if (tmp == NULL) {
+            return lexbor_free(*buf);
+        }
+    }
+
+    *buf = tmp;
+    *end = tmp + nsize;
+
+    return tmp + cur_size;
+}
+
+static char
+lxb_punycode_encode_digit(size_t d) {
+    return d + 22 + 75 * (d < 26);
+}
+
+static size_t
+lxb_punycode_decode_digit(lxb_codepoint_t cp)
+{
+    return  cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65
+          : cp - 97 < 26 ? cp - 97 : LXB_PUNYCODE_BASE;
+}
+
+static size_t
+lxb_punycode_adapt(size_t delta, size_t numpoints, bool firsttime)
+{
+    size_t k;
+
+    delta = firsttime ? delta / LXB_PUNYCODE_DAMP : delta >> 1;
+    delta += delta / numpoints;
+
+    for (k = 0;
+         delta > ((LXB_PUNYCODE_BASE - LXB_PUNYCODE_TMIN) * LXB_PUNYCODE_TMAX) / 2;
+         k += LXB_PUNYCODE_BASE)
+    {
+        delta /= LXB_PUNYCODE_BASE - LXB_PUNYCODE_TMIN;
+    }
+
+    return k + (LXB_PUNYCODE_BASE - LXB_PUNYCODE_TMIN + 1)
+           * delta / (delta + LXB_PUNYCODE_SKEW);
+}
+
+static lxb_status_t
+lxb_punycode_encode_body(const lxb_codepoint_t *cps, const lxb_codepoint_t *cps_end,
+                         lxb_char_t *p, lxb_char_t *buf, const lxb_char_t *end,
+                         const lxb_char_t *buffer, lxb_punycode_encode_cb_f cb,
+                         void *ctx)
+{
+    bool unchanged;
+    size_t h, b, n, q, k, t, delta, bias;
+    lxb_status_t status;
+    lxb_codepoint_t cp, m;
+    const lxb_codepoint_t *cps_t, *cps_p;
+
+    n = LXB_PUNYCODE_INITIAL_N;
+    bias = LXB_PUNYCODE_INITIAL_BIAS;
+    delta = 0;
+    b = p - buf;
+    cps_p = cps + b;
+
+    if (cps_p >= cps_end) {
+        unchanged = true;
+        goto done;
+    }
+
+    if (p > buf) {
+        *p++ = LXB_PUNYCODE_DELIMITER;
+    }
+
+    unchanged = false;
+
+    while (cps_p < cps_end) {
+        m = UINT32_MAX;
+        cps_t = cps;
+
+        while (cps_t < cps_end) {
+            cp = *cps_t++;
+
+            if (cp >= n && cp < m) {
+                m = cp;
+            }
+        }
+
+        h = (cps_p - cps) + 1;
+
+        if (m - n > (UINT32_MAX - delta) / h) {
+            status = LXB_STATUS_ERROR_OVERFLOW;
+            goto failed;
+        }
+
+        delta += (m - n) * h;
+        n = m;
+
+        cps_t = cps;
+
+        while (cps_t < cps_end) {
+            cp = *cps_t++;
+
+            if (cp < n) {
+                if (++delta == 0) {
+                    status = LXB_STATUS_ERROR_OVERFLOW;
+                    goto failed;
+                }
+            }
+
+            if (cp == n) {
+                q = delta;
+                k = LXB_PUNYCODE_BASE;
+
+                for (;; k += LXB_PUNYCODE_BASE) {
+                    t = k <= bias ? LXB_PUNYCODE_TMIN :
+                    k >= bias + LXB_PUNYCODE_TMAX
+                    ? LXB_PUNYCODE_TMAX : k - bias;
+
+                    if (q < t) {
+                        break;
+                    }
+
+                    if (p >= end) {
+                        p = lxb_punycode_encode_realloc(p, &buf, &end, buffer);
+                        if (p == NULL) {
+                            return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+                        }
+                    }
+
+                    *p++ = lxb_punycode_encode_digit(t + (q - t)
+                                                     % (LXB_PUNYCODE_BASE - t));
+                    q = (q - t) / (LXB_PUNYCODE_BASE - t);
+                }
+
+                h = cps_p - cps;
+
+                if (p >= end) {
+                    p = lxb_punycode_encode_realloc(p, &buf, &end, buffer);
+                    if (p == NULL) {
+                        return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+                    }
+                }
+
+                *p++ = lxb_punycode_encode_digit(q);
+                bias = lxb_punycode_adapt(delta, h + 1, h == b);
+                delta = 0;
+                cps_p += 1;
+            }
+        }
+
+        delta += 1;
+        n += 1;
+    }
+
+done:
+
+    status = cb(buf, p - buf, ctx, unchanged);
+
+failed:
+
+    if (buf != buffer) {
+        (void) lexbor_free(buf);
+    }
+
+    return status;
+}
+
+lxb_status_t
+lxb_punycode_encode(const lxb_char_t *data, size_t length,
+                    lxb_punycode_encode_cb_f cb, void *ctx)
+{
+    size_t cp_length;
+    uint8_t len;
+    lxb_char_t *p, *buf;
+    lxb_status_t status;
+    lxb_codepoint_t cp, *cps, *cps_p;
+    const lxb_char_t *data_p, *data_end, *end;
+    const lxb_codepoint_t *cps_end;
+    lxb_char_t buffer[4096];
+    lxb_codepoint_t input[4096];
+
+    /*
+     * Make GCC happy.
+     * length variable can be 0.
+     */
+    input[0] = 0x00;
+
+    p = buffer;
+    buf = buffer;
+    end = buffer + sizeof(buffer);
+
+    data_p = data;
+    data_end = data + length;
+    cp_length = 0;
+
+    while (data_p < data_end) {
+        len = lxb_encoding_decode_utf_8_length(*data_p);
+        if (len == 0) {
+            return LXB_STATUS_ERROR_UNEXPECTED_DATA;
+        }
+
+        data_p += len;
+        cp_length += 1;
+    }
+
+    if (cp_length <= sizeof(input) / sizeof(lxb_codepoint_t)) {
+        cps = input;
+    }
+    else {
+        cps = lexbor_malloc(cp_length * sizeof(lxb_codepoint_t));
+        if (cps == NULL) {
+            return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+        }
+    }
+
+    data_p = data;
+
+    cps_p = cps;
+    cps_end = cps + cp_length;
+
+    while (data_p < data_end) {
+        cp = lxb_encoding_decode_valid_utf_8_single(&data_p, data_end);
+        if (cp == LXB_ENCODING_DECODE_ERROR) {
+            status = LXB_STATUS_ERROR_UNEXPECTED_DATA;
+            goto done;
+        }
+
+        *cps_p++ = cp;
+
+        if (cp < 0x80) {
+            if (p >= end) {
+                p = lxb_punycode_encode_realloc(p, &buf, &end, buffer);
+                if (p == NULL) {
+                    status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+                    goto done;
+                }
+            }
+
+            *p++ = cp;
+        }
+    }
+
+    status = lxb_punycode_encode_body(cps, cps_end, p, buf, end, buffer,
+                                      cb, ctx);
+done:
+
+    if (cps != input) {
+        (void) lexbor_free(cps);
+    }
+
+    return status;
+}
+
+lxb_status_t
+lxb_punycode_encode_cp(const lxb_codepoint_t *cps, size_t length,
+                       lxb_punycode_encode_cb_f cb, void *ctx)
+{
+    lxb_char_t *p, *buf;
+    lxb_codepoint_t cp;
+    const lxb_char_t *end;
+    const lxb_codepoint_t *cps_p, *cps_end;
+    lxb_char_t buffer[4096];
+
+    p = buffer;
+    buf = buffer;
+    end = buffer + sizeof(buffer);
+
+    cps_p = cps;
+    cps_end = cps + length;
+
+    while (cps_p < cps_end) {
+        cp = *cps_p++;
+
+        if (cp < 0x80) {
+            if (p >= end) {
+                p = lxb_punycode_encode_realloc(p, &buf, &end, buffer);
+                if (p == NULL) {
+                    return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+                }
+            }
+
+            *p++ = cp;
+        }
+    }
+
+    return lxb_punycode_encode_body(cps, cps_end, p, buf, end, buffer, cb, ctx);
+}
+
+lxb_status_t
+lxb_punycode_decode(const lxb_char_t *data, size_t length,
+                    lexbor_serialize_cb_f cb, void *ctx)
+{
+    lexbor_serialize_ctx_t nctx = {.cb = cb, .ctx = ctx};
+
+    return lxb_punycode_decode_cb_cp(data, length, lxb_punycode_callback_cp,
+                                     &nctx);
+}
+
+static lxb_status_t
+lxb_punycode_callback_cp(const lxb_codepoint_t *cps, size_t len, void *ctx)
+{
+    uint8_t i;
+    size_t length;
+    lxb_status_t status;
+    const lxb_codepoint_t *cps_p, *cps_end;
+    lexbor_serialize_ctx_t *nctx = ctx;
+    lxb_char_t *p, *buf, *end;
+    lxb_char_t buffer[4096];
+
+    /*
+     * Make GCC happy.
+     * len variable can be 0.
+     */
+    buffer[0] = 0x00;
+
+    cps_p = cps;
+    cps_end = cps_p + len;
+    length = 0;
+
+    while (cps_p < cps_end) {
+        i = lxb_encoding_encode_utf_8_length(*cps_p++);
+        if (i == 0) {
+            return LXB_STATUS_ERROR_UNEXPECTED_DATA;
+        }
+
+        length += i;
+    }
+
+    buf = buffer;
+    end = buffer + sizeof(buffer);
+
+    if (buf + length > end) {
+        buf = lexbor_malloc(length);
+        if (buf == NULL) {
+            return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+        }
+
+        end = buf + length;
+    }
+
+    p = buf;
+    cps_p = cps;
+
+    while (cps_p < cps_end) {
+        (void) lxb_encoding_encode_utf_8_single(NULL, &p, end, *cps_p++);
+    }
+
+    status = nctx->cb(buf, p - buf, nctx->ctx);
+
+    if (buf != buffer) {
+        (void) lexbor_free(buf);
+    }
+
+    return status;
+}
+
+lxb_status_t
+lxb_punycode_decode_cp(const lxb_codepoint_t *data, size_t length,
+                       lexbor_serialize_cb_cp_f cb, void *ctx)
+{
+    size_t buf_len, digit, oldi, bias, w, k, t, i, h, in;
+    const lxb_codepoint_t *delimiter, *data_p, *data_end;
+    lxb_status_t status;
+    lxb_codepoint_t cp, n;
+    lxb_codepoint_t *p, *buf;
+    const lxb_codepoint_t *end;
+    lxb_codepoint_t buffer[4096];
+
+    p = buffer;
+    buf = buffer;
+    buf_len = sizeof(buffer) / sizeof(lxb_codepoint_t);
+    end = buffer + buf_len;
+
+    data_p = data;
+    data_end = data + length;
+    delimiter = data_end;
+
+    while (delimiter != data) {
+        delimiter -= 1;
+
+        if (*delimiter == LXB_PUNYCODE_DELIMITER) {
+            break;
+        }
+    }
+
+    while (data_p < delimiter) {
+        cp = *data_p++;
+
+        if (cp >= 0x80) {
+            status = LXB_STATUS_ERROR_UNEXPECTED_DATA;
+            goto done;
+        }
+
+        if (p >= end) {
+            p = lxb_punycode_decode_realloc(p, &buf, &end, buffer);
+            if (p == NULL) {
+                return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+            }
+        }
+
+        *p++ = cp;
+    }
+
+    i = 0;
+    n = LXB_PUNYCODE_INITIAL_N;
+    bias = LXB_PUNYCODE_INITIAL_BIAS;
+    data_p = (delimiter != data) ? delimiter + 1: data;
+    in = data_p - data;
+
+    for (; in < length; p++) {
+        for (oldi = i, w = 1, k = LXB_PUNYCODE_BASE; ; k += LXB_PUNYCODE_BASE) {
+            if (in >= length) {
+                status = LXB_STATUS_ERROR_UNEXPECTED_DATA;
+                goto done;
+            }
+
+            cp = data[in++];
+            digit = lxb_punycode_decode_digit(cp);
+
+            if (digit >= LXB_PUNYCODE_BASE) {
+                status = LXB_STATUS_ERROR_UNEXPECTED_DATA;
+                goto done;
+            }
+
+            if (digit > (UINT32_MAX - i) / w) {
+                status = LXB_STATUS_ERROR_OVERFLOW;
+                goto done;
+            }
+
+            i += digit * w;
+            t = k <= bias ? LXB_PUNYCODE_TMIN
+            : k >= bias + LXB_PUNYCODE_TMAX ? LXB_PUNYCODE_TMAX : k - bias;
+
+            if (digit < t) {
+                break;
+            }
+
+            if (w > UINT32_MAX / (LXB_PUNYCODE_BASE - t)) {
+                status = LXB_STATUS_ERROR_OVERFLOW;
+                goto done;
+            }
+
+            w *= (LXB_PUNYCODE_BASE - t);
+        }
+
+        h = (p - buf) + 1;
+
+        bias = lxb_punycode_adapt(i - oldi, h, oldi == 0);
+
+        if (i / h > UINT32_MAX - n) {
+            status = LXB_STATUS_ERROR_OVERFLOW;
+            goto done;
+        }
+
+        n += i / h;
+        i %= h;
+
+        if (p >= end) {
+            p = lxb_punycode_decode_realloc(p, &buf, &end, buffer);
+            if (p == NULL) {
+                return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+            }
+        }
+
+        memmove(buf + i + 1, buf + i, ((h - 1) - i) * sizeof(lxb_codepoint_t));
+        buf[i++] = n;
+    }
+
+    status = cb(buf, p - buf, ctx);
+
+done:
+
+    if (buffer != buf) {
+        (void) lexbor_free(buf);
+    }
+
+    return status;
+}
+
+lxb_status_t
+lxb_punycode_decode_cb_cp(const lxb_char_t *data, size_t length,
+                          lexbor_serialize_cb_cp_f cb, void *ctx)
+{
+    size_t buf_len, digit, oldi, bias, w, k, t, i, h, in;
+    const lxb_char_t *delimiter, *data_p, *data_end;
+    lxb_status_t status;
+    lxb_codepoint_t cp, n;
+    lxb_codepoint_t *p, *buf;
+    const lxb_codepoint_t *end;
+    lxb_codepoint_t buffer[4096];
+
+    p = buffer;
+    buf = buffer;
+    buf_len = sizeof(buffer) / sizeof(lxb_codepoint_t);
+    end = buffer + buf_len;
+
+    data_p = data;
+    data_end = data + length;
+    delimiter = data_end;
+
+    while (delimiter != data) {
+        delimiter -= 1;
+
+        if (*delimiter == LXB_PUNYCODE_DELIMITER) {
+            break;
+        }
+    }
+
+    while (data_p < delimiter) {
+        cp = *data_p++;
+
+        if (cp >= 0x80) {
+            status = LXB_STATUS_ERROR_UNEXPECTED_DATA;
+            goto done;
+        }
+
+        if (p >= end) {
+            p = lxb_punycode_decode_realloc(p, &buf, &end, buffer);
+            if (p == NULL) {
+                return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+            }
+        }
+
+        *p++ = cp;
+    }
+
+    i = 0;
+    n = LXB_PUNYCODE_INITIAL_N;
+    bias = LXB_PUNYCODE_INITIAL_BIAS;
+    data_p = (delimiter != data) ? delimiter + 1: data;
+    in = data_p - data;
+
+    for (; in < length; p++) {
+        for (oldi = i, w = 1, k = LXB_PUNYCODE_BASE; ; k += LXB_PUNYCODE_BASE) {
+            if (in >= length) {
+                status = LXB_STATUS_ERROR_UNEXPECTED_DATA;
+                goto done;
+            }
+
+            cp = data[in++];
+            digit = lxb_punycode_decode_digit(cp);
+
+            if (digit >= LXB_PUNYCODE_BASE) {
+                status = LXB_STATUS_ERROR_UNEXPECTED_DATA;
+                goto done;
+            }
+
+            if (digit > (UINT32_MAX - i) / w) {
+                status = LXB_STATUS_ERROR_OVERFLOW;
+                goto done;
+            }
+
+            i += digit * w;
+            t = k <= bias ? LXB_PUNYCODE_TMIN
+              : k >= bias + LXB_PUNYCODE_TMAX ? LXB_PUNYCODE_TMAX : k - bias;
+
+            if (digit < t) {
+                break;
+            }
+
+            if (w > UINT32_MAX / (LXB_PUNYCODE_BASE - t)) {
+                status = LXB_STATUS_ERROR_OVERFLOW;
+                goto done;
+            }
+
+            w *= (LXB_PUNYCODE_BASE - t);
+        }
+
+        h = (p - buf) + 1;
+
+        bias = lxb_punycode_adapt(i - oldi, h, oldi == 0);
+
+        if (i / h > UINT32_MAX - n) {
+            status = LXB_STATUS_ERROR_OVERFLOW;
+            goto done;
+        }
+
+        n += i / h;
+        i %= h;
+
+        if (p >= end) {
+            p = lxb_punycode_decode_realloc(p, &buf, &end, buffer);
+            if (p == NULL) {
+                return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+            }
+        }
+
+        memmove(buf + i + 1, buf + i, ((h - 1) - i) * sizeof(lxb_codepoint_t));
+        buf[i++] = n;
+    }
+
+    status = cb(buf, p - buf, ctx);
+
+done:
+
+    if (buffer != buf) {
+        (void) lexbor_free(buf);
+    }
+
+    return status;
+}
--- a/ext/lexbor/lexbor/punycode/punycode.h
+++ b/ext/lexbor/lexbor/punycode/punycode.h
@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2023 Alexander Borisov
+ *
+ * Author: Alexander Borisov <borisov@lexbor.com>
+ */
+
+#ifndef LEXBOR_PUNYCODE_H
+#define LEXBOR_PUNYCODE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "lexbor/punycode/base.h"
+
+
+typedef lxb_status_t
+(*lxb_punycode_encode_cb_f)(const lxb_char_t *data, size_t len, void *ctx,
+                            bool unchanged);
+
+
+/*
+ * Punycode: A Bootstring encoding of Unicode
+ * for Internationalized Domain Names in Applications (IDNA).
+ *
+ * https://www.rfc-editor.org/rfc/inline-errata/rfc3492.html
+ */
+
+/*
+ * Encoding from characters to characters.
+ *
+ * @param[in] Input characters for encoding. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ * @param[in] Callback for results. Сalled only once when encoding is complete.
+ * @param[in] Context for callback.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_punycode_encode(const lxb_char_t *data, size_t length,
+                    lxb_punycode_encode_cb_f cb, void *ctx);
+
+/*
+ * Encoding from code points to characters.
+ *
+ * Same as lxb_punycode_encode() only the input is code points.
+ *
+ * @param[in] Input code points for encoding. Not NULL.
+ * @param[in] Length of code points. Can be 0.
+ * @param[in] Callback for results. Сalled only once when encoding is complete.
+ * @param[in] Context for callback.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_punycode_encode_cp(const lxb_codepoint_t *cps, size_t length,
+                       lxb_punycode_encode_cb_f cb, void *ctx);
+
+/*
+ * Decoding from characters to characters.
+ *
+ * @param[in] Input characters for encoding. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ * @param[in] Callback for results. Сalled only once when encoding is complete.
+ * @param[in] Context for callback.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_punycode_decode(const lxb_char_t *data, size_t length,
+                    lexbor_serialize_cb_f cb, void *ctx);
+
+/*
+ * Decoding from code points to code points.
+ *
+ * Same as lxb_punycode_decode() only the input/output is code points.
+ *
+ * @param[in] Input code points for encoding. Not NULL.
+ * @param[in] Length of code points. Can be 0.
+ * @param[in] Callback for results. Сalled only once when encoding is complete.
+ * @param[in] Context for callback.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_punycode_decode_cp(const lxb_codepoint_t *data, size_t length,
+                       lexbor_serialize_cb_cp_f cb, void *ctx);
+
+/*
+ * Decoding from characters to code points.
+ *
+ * Same as lxb_punycode_decode() only the output is code points.
+ *
+ * @param[in] Input code points for encoding. Not NULL.
+ * @param[in] Length of code points. Can be 0.
+ * @param[in] Callback for results. Сalled only once when encoding is complete.
+ * @param[in] Context for callback.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_punycode_decode_cb_cp(const lxb_char_t *data, size_t length,
+                          lexbor_serialize_cb_cp_f cb, void *ctx);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* LEXBOR_PUNYCODE_H */
--- a/ext/lexbor/lexbor/unicode/base.h
+++ b/ext/lexbor/lexbor/unicode/base.h
@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2023-2024 Alexander Borisov
+ *
+ * Author: Alexander Borisov <borisov@lexbor.com>
+ */
+
+#ifndef LEXBOR_UNICODE_BASE_H
+#define LEXBOR_UNICODE_BASE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "lexbor/core/base.h"
+#include "lexbor/core/str.h"
+
+
+#define LXB_UNICODE_VERSION_MAJOR 0
+#define LXB_UNICODE_VERSION_MINOR 3
+#define LXB_UNICODE_VERSION_PATCH 0
+
+#define LXB_UNICODE_VERSION_STRING LEXBOR_STRINGIZE(LXB_UNICODE_VERSION_MAJOR) "." \
+                                   LEXBOR_STRINGIZE(LXB_UNICODE_VERSION_MINOR) "." \
+                                   LEXBOR_STRINGIZE(LXB_UNICODE_VERSION_PATCH)
+
+
+enum {
+    LXB_UNICODE_DECOMPOSITION_TYPE__UNDEF = 0x00,
+    LXB_UNICODE_DECOMPOSITION_TYPE_CIRCLE,
+    LXB_UNICODE_DECOMPOSITION_TYPE_COMPAT,
+    LXB_UNICODE_DECOMPOSITION_TYPE_FINAL,
+    LXB_UNICODE_DECOMPOSITION_TYPE_FONT,
+    LXB_UNICODE_DECOMPOSITION_TYPE_FRACTION,
+    LXB_UNICODE_DECOMPOSITION_TYPE_INITIAL,
+    LXB_UNICODE_DECOMPOSITION_TYPE_ISOLATED,
+    LXB_UNICODE_DECOMPOSITION_TYPE_MEDIAL,
+    LXB_UNICODE_DECOMPOSITION_TYPE_NARROW,
+    LXB_UNICODE_DECOMPOSITION_TYPE_NOBREAK,
+    LXB_UNICODE_DECOMPOSITION_TYPE_SMALL,
+    LXB_UNICODE_DECOMPOSITION_TYPE_SQUARE,
+    LXB_UNICODE_DECOMPOSITION_TYPE_SUB,
+    LXB_UNICODE_DECOMPOSITION_TYPE_SUPER,
+    LXB_UNICODE_DECOMPOSITION_TYPE_VERTICAL,
+    LXB_UNICODE_DECOMPOSITION_TYPE_WIDE,
+    LXB_UNICODE_DECOMPOSITION_TYPE__LAST_ENTRY
+};
+#define LXB_UNICODE_CANONICAL_SEPARATELY        (1 << 7)
+#define LXB_UNICODE_IS_CANONICAL_SEPARATELY(a)  ((a) >> 7)
+#define LXB_UNICODE_DECOMPOSITION_TYPE(a)       ((a) & ~(1 << 7))
+typedef uint8_t lxb_unicode_decomposition_type_t;
+
+enum {
+    LXB_UNICODE_QUICK__UNDEF     = 0x00,
+    LXB_UNICODE_QUICK_NFC_MAYBE  = 1 << 0,
+    LXB_UNICODE_QUICK_NFC_NO     = 1 << 1,
+    LXB_UNICODE_QUICK_NFD_NO     = 1 << 2,
+    LXB_UNICODE_QUICK_NFKC_MAYBE = 1 << 3,
+    LXB_UNICODE_QUICK_NFKC_NO    = 1 << 4,
+    LXB_UNICODE_QUICK_NFKD_NO    = 1 << 5
+};
+typedef uint8_t lxb_unicode_quick_type_t;
+
+enum {
+    LXB_UNICODE_IDNA__UNDEF = 0x00,
+    LXB_UNICODE_IDNA_DEVIATION,
+    LXB_UNICODE_IDNA_DISALLOWED,
+    LXB_UNICODE_IDNA_IGNORED,
+    LXB_UNICODE_IDNA_MAPPED,
+    LXB_UNICODE_IDNA_VALID
+};
+typedef uint8_t lxb_unicode_idna_type_t;
+
+typedef struct lxb_unicode_normalizer lxb_unicode_normalizer_t;
+
+typedef struct {
+    lxb_codepoint_t cp;
+    uint8_t         ccc;
+}
+lxb_unicode_buffer_t;
+
+typedef lxb_status_t
+(*lxb_unicode_nf_handler_f)(lxb_unicode_normalizer_t *uc, const lxb_char_t *data,
+                            size_t length, lexbor_serialize_cb_f cb, void *ctx,
+                            bool is_last);
+
+typedef lxb_unicode_buffer_t *
+(*lxb_unicode_de_handler_f)(lxb_unicode_normalizer_t *uc, lxb_codepoint_t cp,
+                            lxb_unicode_buffer_t **buf,
+                            const lxb_unicode_buffer_t **end);
+
+typedef void
+(*lxb_unicode_co_handler_f)(lxb_unicode_buffer_t *starter,
+                            lxb_unicode_buffer_t *op, lxb_unicode_buffer_t *p);
+
+
+typedef struct {
+    uint16_t normalization; /* lxb_unicode_normalization_t */
+    uint16_t idna;          /* lxb_unicode_idna_t */
+}
+lxb_unicode_entry_t;
+
+typedef struct {
+    lxb_unicode_decomposition_type_t type;
+    lxb_unicode_quick_type_t         quick;         /* Quick Check.               */
+    uint8_t                          ccc;           /* Canonical Combining Class. */
+    uint8_t                          length;
+    uint16_t                         decomposition; /* lxb_codepoint_t */
+    uint16_t                         composition;   /* lxb_unicode_composition_entry_t */
+}
+lxb_unicode_normalization_entry_t;
+
+typedef struct {
+    lxb_unicode_idna_type_t type;
+    uint8_t                 length;
+    uint16_t                index;
+}
+lxb_unicode_idna_entry_t;
+
+typedef struct {
+    uint8_t         length;  /* Length in lxb_unicode_composition_cps_t */
+    uint16_t        index;   /* lxb_unicode_composition_cps_t */
+    lxb_codepoint_t cp;      /* Begin code point in lxb_unicode_composition_cps_t */
+}
+lxb_unicode_composition_entry_t;
+
+typedef struct {
+    lxb_codepoint_t cp;
+    bool            exclusion;
+}
+lxb_unicode_composition_cp_t;
+
+struct lxb_unicode_normalizer {
+    lxb_unicode_de_handler_f   decomposition;
+    lxb_unicode_co_handler_f   composition;
+
+    lxb_unicode_buffer_t       *starter;
+
+    lxb_unicode_buffer_t       *buf;
+    const lxb_unicode_buffer_t *end;
+    lxb_unicode_buffer_t       *p;
+    lxb_unicode_buffer_t       *ican;
+
+    lxb_char_t                 tmp[4];
+    uint8_t                    tmp_lenght;
+
+    uint8_t                    quick_ccc;
+    lxb_unicode_quick_type_t   quick_type;
+
+    size_t                     flush_cp;
+};
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* LEXBOR_UNICODE_BASE_H */
--- a/ext/lexbor/lexbor/unicode/idna.c
+++ b/ext/lexbor/lexbor/unicode/idna.c
@ -0,0 +1,738 @@
+/*
+ * Copyright (C) 2023 Alexander Borisov
+ *
+ * Author: Alexander Borisov <borisov@lexbor.com>
+ */
+
+#include "lexbor/unicode/idna.h"
+#include "lexbor/unicode/unicode.h"
+#include "lexbor/punycode/punycode.h"
+#include "lexbor/encoding/encoding.h"
+
+
+typedef struct {
+    lxb_unicode_idna_cb_f   cb;
+    void                    *context;
+    lxb_unicode_idna_flag_t flags;
+}
+lxb_unicode_idna_ctx_t;
+
+typedef struct {
+    lxb_char_t              buffer[4096];
+    lxb_char_t              *p;
+    lxb_char_t              *buf;
+    const lxb_char_t        *end;
+    lxb_unicode_idna_flag_t flags;
+}
+lxb_unicode_idna_ascii_ctx_t;
+
+
+static lxb_status_t
+lxb_unicode_idna_processing_body(lxb_unicode_idna_t *idna, const void *data,
+                                 size_t len, lxb_unicode_idna_cb_f cb, void *ctx,
+                                 lxb_unicode_idna_flag_t flags, bool is_cp);
+
+static lxb_status_t
+lxb_unicode_idna_norm_c_cb(const lxb_codepoint_t *cps, size_t len, void *ctx);
+
+static lxb_status_t
+lxb_unicode_idna_norm_c_send(const lxb_codepoint_t *cps,
+                             const lxb_codepoint_t *p,
+                             lxb_unicode_idna_ctx_t *context);
+
+static lxb_status_t
+lxb_unicode_idna_punycode_cb(const lxb_codepoint_t *cps, size_t len, void *ctx);
+
+static lxb_status_t
+lxb_unicode_idna_to_ascii_cb(const lxb_codepoint_t *part, size_t len,
+                             void *ctx, lxb_status_t status);
+
+static lxb_status_t
+lxb_unicode_idna_to_ascii_body(lxb_unicode_idna_t *idna, const void *data,
+                               size_t length, lexbor_serialize_cb_f cb, void *ctx,
+                               lxb_unicode_idna_flag_t flags, bool is_cp);
+
+static lxb_status_t
+lxb_unicode_idna_ascii_puny_cb(const lxb_char_t *data, size_t length, void *ctx,
+                               bool unchanged);
+
+static lxb_status_t
+lxb_unicode_idna_to_unicode_cb(const lxb_codepoint_t *part, size_t len,
+                               void *ctx, lxb_status_t status);
+
+static lxb_status_t
+lxb_unicode_idna_to_unicode_body(lxb_unicode_idna_t *idna, const void *data,
+                                 size_t length, lexbor_serialize_cb_f cb,
+                                 void *ctx, lxb_unicode_idna_flag_t flags,
+                                 bool is_cp);
+
+static bool
+lxb_unicode_idna_validity_criteria_h(const void *data, size_t length,
+                                     lxb_unicode_idna_flag_t flags, bool is_cp);
+
+lxb_unicode_idna_t *
+lxb_unicode_idna_create(void)
+{
+    return lexbor_malloc(sizeof(lxb_unicode_idna_t));
+}
+
+lxb_status_t
+lxb_unicode_idna_init(lxb_unicode_idna_t *idna)
+{
+    if (idna == NULL) {
+        return LXB_STATUS_ERROR_OBJECT_IS_NULL;
+    }
+
+    return lxb_unicode_normalizer_init(&idna->normalizer, LXB_UNICODE_NFC);
+}
+
+void
+lxb_unicode_idna_clean(lxb_unicode_idna_t *idna)
+{
+    lxb_unicode_normalizer_clean(&idna->normalizer);
+}
+
+lxb_unicode_idna_t *
+lxb_unicode_idna_destroy(lxb_unicode_idna_t *idna, bool self_destroy)
+{
+    if (idna == NULL) {
+        return NULL;
+    }
+
+    (void) lxb_unicode_normalizer_destroy(&idna->normalizer, false);
+
+    if (self_destroy) {
+        return lexbor_free(idna);
+    }
+
+    return idna;
+}
+
+lxb_codepoint_t *
+lxb_unicode_idna_realloc(lxb_codepoint_t *buf, const lxb_codepoint_t *buffer,
+                         lxb_codepoint_t **buf_p, lxb_codepoint_t **buf_end,
+                         size_t len)
+{
+    size_t nlen;
+    lxb_codepoint_t *tmp;
+
+    nlen = ((*buf_end - buf) * 4) + len;
+ 
+    if (buf == buffer) {
+        tmp = lexbor_malloc(nlen * sizeof(lxb_codepoint_t));
+        if (tmp == NULL) {
+            return NULL;
+        }
+    }
+    else {
+        tmp = lexbor_realloc(buf, nlen * sizeof(lxb_codepoint_t));
+        if (tmp == NULL) {
+            return lexbor_free(buf);
+        }
+    }
+
+    *buf_p = tmp + (*buf_p - buf);
+    *buf_end = tmp + nlen;
+
+    return tmp;
+}
+
+lxb_status_t
+lxb_unicode_idna_processing(lxb_unicode_idna_t *idna, const lxb_char_t *data,
+                            size_t length, lxb_unicode_idna_cb_f cb, void *ctx,
+                            lxb_unicode_idna_flag_t flags)
+{
+    return lxb_unicode_idna_processing_body(idna, data, length, cb, ctx,
+                                            flags, false);
+}
+
+lxb_status_t
+lxb_unicode_idna_processing_cp(lxb_unicode_idna_t *idna,
+                               const lxb_codepoint_t *cps, size_t length,
+                               lxb_unicode_idna_cb_f cb, void *ctx,
+                               lxb_unicode_idna_flag_t flags)
+{
+    return lxb_unicode_idna_processing_body(idna, cps, length, cb, ctx,
+                                            flags, true);
+}
+
+static lxb_status_t
+lxb_unicode_idna_processing_body(lxb_unicode_idna_t *idna, const void *data,
+                                 size_t len, lxb_unicode_idna_cb_f cb, void *ctx,
+                                 lxb_unicode_idna_flag_t flags, bool is_cp)
+{
+    bool need;
+    size_t i, length;
+    lxb_status_t status;
+    lxb_codepoint_t cp, *buf, *buf_p, *buf_end;
+    const lxb_char_t *end, *p;
+    lxb_unicode_idna_type_t type;
+    const lxb_unicode_idna_entry_t *udata;
+    const lxb_codepoint_t *maps;
+    lxb_unicode_idna_ctx_t context;
+    lxb_codepoint_t buffer[4096];
+
+    buf = buffer;
+    buf_p = buffer;
+    buf_end = buffer + (sizeof(buffer) / sizeof(lxb_codepoint_t));
+
+    p = data;
+    len *= (is_cp) ? sizeof(lxb_codepoint_t) : 1;
+    end = (const lxb_char_t *) data + len;
+
+    while (p < end) {
+        if (is_cp) {
+            cp = *((const lxb_codepoint_t *) p);
+            p = (const lxb_char_t *) ((const lxb_codepoint_t *) p + 1);
+        }
+        else {
+            cp = lxb_encoding_decode_valid_utf_8_single(&p, end);
+            if (cp > LXB_ENCODING_DECODE_MAX_CODEPOINT) {
+                status = LXB_STATUS_ERROR_UNEXPECTED_DATA;
+                goto done;
+            }
+        }
+
+        type = lxb_unicode_idna_type(cp);
+
+    again:
+
+        switch (type) {
+            case LXB_UNICODE_IDNA_IGNORED:
+                break;
+
+            case LXB_UNICODE_IDNA_MAPPED:
+                udata = lxb_unicode_idna_entry_by_cp(cp);
+                maps = lxb_unicode_idna_map(udata, &length);
+
+                if (buf_p + length > buf_end) {
+                    buf = lxb_unicode_idna_realloc(buf, buffer, &buf_p,
+                                                   &buf_end, length);
+                    if (buf == NULL) {
+                        return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+                    }
+                }
+
+                for (i = 0; i < length; i++) {
+                    *buf_p++ = maps[i];
+                }
+
+                break;
+
+            case LXB_UNICODE_IDNA_DEVIATION:
+                if ((flags & LXB_UNICODE_IDNA_FLAG_TRANSITIONAL_PROCESSING)) {
+                    type = LXB_UNICODE_IDNA_MAPPED;
+                    goto again;
+                }
+
+                /* Fall through. */
+
+            case LXB_UNICODE_IDNA_DISALLOWED:
+                /* Fall through. */
+
+            case LXB_UNICODE_IDNA_VALID:
+            default:
+                if (buf_p >= buf_end) {
+                    buf = lxb_unicode_idna_realloc(buf, buffer, &buf_p,
+                                                   &buf_end, 1);
+                    if (buf == NULL) {
+                        return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+                    }
+                }
+
+                *buf_p++ = cp;
+                break;
+        }
+    }
+
+    context.cb = cb;
+    context.context = ctx;
+    context.flags = flags;
+
+
+    need = lxb_unicode_quick_check_cp(&idna->normalizer, buf, buf_p - buf,
+                                      true);
+    if (need) {
+        lxb_unicode_flush_count_set(&idna->normalizer, UINT32_MAX);
+
+        status = lxb_unicode_normalize_cp(&idna->normalizer, buf, buf_p - buf,
+                                          lxb_unicode_idna_norm_c_cb,
+                                          &context, true);
+    }
+    else {
+        status = lxb_unicode_idna_norm_c_cb(buf, buf_p - buf, &context);
+    }
+
+done:
+
+    if (buf != buffer) {
+        (void) lexbor_free(buf);
+    }
+
+    return status;
+}
+
+static lxb_status_t
+lxb_unicode_idna_norm_c_cb(const lxb_codepoint_t *cps, size_t len, void *ctx)
+{
+    lxb_status_t status;
+    lxb_unicode_idna_ctx_t *context = ctx;
+    const lxb_codepoint_t *p, *end;
+
+    p = cps;
+    end = cps + len;
+
+    while (p < end) {
+        /* U+002E ( . ) FULL STOP. */
+
+        if (*p == 0x002E) {
+            status = lxb_unicode_idna_norm_c_send(cps, p, context);
+            if (status != LXB_STATUS_OK) {
+                return status;
+            }
+
+            cps = p + 1;
+        }
+
+        p += 1;
+    }
+
+    /*
+     * We need to call a zero-length callback if the last codepoint was a
+     * U+002E ( . ) FULL STOP.
+     *
+     * For example, "muuuu." will call for two callbacks.
+     * First: "muuuu".
+     * Second: "" -- empty string with length = 0.
+     */
+
+    if (p > cps || (len >= 1 && p[-1] == '.')) {
+        return lxb_unicode_idna_norm_c_send(cps, p, context);
+    }
+
+    return LXB_STATUS_OK;
+}
+
+static lxb_status_t
+lxb_unicode_idna_norm_c_send(const lxb_codepoint_t *cps,
+                             const lxb_codepoint_t *p,
+                             lxb_unicode_idna_ctx_t *context)
+{
+    bool cr;
+    lxb_status_t status;
+
+    /* xn-- or Xn-- or xN-- or XN-- */
+
+    if (p - cps >= 4
+        && (cps[0] == 0x0078 || cps[0] == 0x0058)
+        && (cps[1] == 0x006E || cps[1] == 0x004E)
+        && cps[2] == 0x002D && cps[3] == 0x002D)
+    {
+        cps += 4;
+        status = lxb_punycode_decode_cp(cps, p - cps,
+                                        lxb_unicode_idna_punycode_cb,
+                                        context);
+        if (status == LXB_STATUS_OK) {
+            return LXB_STATUS_OK;
+        }
+
+        cps -= 4;
+    }
+    else {
+        status = LXB_STATUS_OK;
+    }
+
+    cr = lxb_unicode_idna_validity_criteria_cp(cps, p - cps, context->flags);
+    if (!cr) {
+        return LXB_STATUS_ERROR_UNEXPECTED_RESULT;
+    }
+
+    return context->cb(cps, p - cps, context->context, status);
+}
+
+static lxb_status_t
+lxb_unicode_idna_punycode_cb(const lxb_codepoint_t *cps, size_t len, void *ctx)
+{
+    bool cr;
+    lxb_unicode_idna_ctx_t *context = ctx;
+    lxb_unicode_idna_ascii_ctx_t *asc = context->context;
+
+    cr = lxb_unicode_idna_validity_criteria_cp(cps, len, asc->flags);
+    if (!cr) {
+        return LXB_STATUS_ERROR_UNEXPECTED_RESULT;
+    }
+
+    return context->cb(cps, len, context->context, LXB_STATUS_OK);
+}
+
+lxb_status_t
+lxb_unicode_idna_to_ascii(lxb_unicode_idna_t *idna, const lxb_char_t *data,
+                          size_t length, lexbor_serialize_cb_f cb, void *ctx,
+                          lxb_unicode_idna_flag_t flags)
+{
+    return lxb_unicode_idna_to_ascii_body(idna, data, length, cb, ctx,
+                                          flags, false);
+}
+
+lxb_status_t
+lxb_unicode_idna_to_ascii_cp(lxb_unicode_idna_t *idna, const lxb_codepoint_t *cps,
+                             size_t length, lexbor_serialize_cb_f cb, void *ctx,
+                             lxb_unicode_idna_flag_t flags)
+{
+    return lxb_unicode_idna_to_ascii_body(idna, cps, length, cb, ctx,
+                                          flags, true);
+}
+
+static lxb_status_t
+lxb_unicode_idna_to_ascii_body(lxb_unicode_idna_t *idna, const void *data,
+                               size_t length, lexbor_serialize_cb_f cb, void *ctx,
+                               lxb_unicode_idna_flag_t flags, bool is_cp)
+{
+    size_t len;
+    lxb_status_t status;
+    lxb_unicode_idna_ascii_ctx_t context;
+
+    context.p = context.buffer;
+    context.buf = context.buffer;
+    context.end = context.buf + sizeof(context.buffer);
+    context.flags = flags;
+
+    if (!is_cp) {
+        status = lxb_unicode_idna_processing(idna, data, length,
+                                             lxb_unicode_idna_to_ascii_cb,
+                                             &context, flags);
+    }
+    else {
+        status = lxb_unicode_idna_processing_cp(idna, data, length,
+                                                lxb_unicode_idna_to_ascii_cb,
+                                                &context, flags);
+    }
+
+    if (status != LXB_STATUS_OK) {
+        goto done;
+    }
+
+    /* Remove last U+002E ( . ) FULL STOP. */
+
+    if (context.p > context.buf) {
+        context.p -= 1;
+    }
+
+    len = context.p - context.buf;
+
+    status = cb(context.buf, len, ctx);
+
+done:
+
+    if (context.buf != context.buffer) {
+        (void) lexbor_free(context.buf);
+    }
+
+    return status;
+}
+
+static lxb_status_t
+lxb_unicode_idna_to_ascii_cb(const lxb_codepoint_t *part, size_t len,
+                             void *ctx, lxb_status_t status)
+{
+    if (status != LXB_STATUS_OK) {
+        return status;
+    }
+
+    return lxb_punycode_encode_cp(part, len, lxb_unicode_idna_ascii_puny_cb,
+                                  ctx);
+}
+
+static lxb_status_t
+lxb_unicode_idna_ascii_puny_cb(const lxb_char_t *data, size_t length, void *ctx,
+                               bool unchanged)
+{
+    size_t nlen;
+    lxb_char_t *tmp;
+    lxb_unicode_idna_ascii_ctx_t *asc = ctx;
+
+    static const lexbor_str_t prefix = lexbor_str("xn--");
+
+    if (asc->p + length + 6 > asc->end) {
+        nlen = ((asc->end - asc->buf) * 4) + length + 6;
+
+        if (asc->buf == asc->buffer) {
+            tmp = lexbor_malloc(nlen);
+        }
+        else {
+            tmp = lexbor_realloc(asc->buf, nlen);
+        }
+
+        if (tmp == NULL) {
+            return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+        }
+
+        asc->p = tmp + (asc->p - asc->buf);
+        asc->buf = tmp;
+        asc->end = tmp + nlen;
+    }
+
+    if (!unchanged) {
+        memcpy(asc->p, prefix.data, prefix.length);
+        asc->p += 4;
+    }
+
+    memcpy(asc->p, data, length);
+
+    asc->p += length;
+    *asc->p++ = '.';
+    *asc->p = 0x00;
+
+    return LXB_STATUS_OK;
+}
+
+bool
+lxb_unicode_idna_validity_criteria(const lxb_char_t *data, size_t length,
+                                   lxb_unicode_idna_flag_t flags)
+{
+    return lxb_unicode_idna_validity_criteria_h(data, length, flags, false);
+}
+
+bool
+lxb_unicode_idna_validity_criteria_cp(const lxb_codepoint_t *data, size_t length,
+                                      lxb_unicode_idna_flag_t flags)
+{
+    return lxb_unicode_idna_validity_criteria_h(data, length, flags, true);
+}
+
+static bool
+lxb_unicode_idna_validity_criteria_h(const void *data, size_t length,
+                                     lxb_unicode_idna_flag_t flags, bool is_cp)
+{
+    size_t len;
+    lxb_codepoint_t cp;
+    const lxb_codepoint_t *cps;
+    const lxb_char_t *p, *end;
+    lxb_unicode_idna_type_t type;
+
+    p = data;
+    len = length * ((is_cp) ? sizeof(lxb_codepoint_t) : 1);
+    end = (const lxb_char_t *) data + len;
+
+    if (flags & LXB_UNICODE_IDNA_FLAG_CHECK_HYPHENS) {
+        /* U+002D HYPHEN-MINUS */
+
+        if (is_cp) {
+            cps = data;
+
+            if (length > 4) {
+                if (cps[3] == 0x002D || cps[4] == 0x002D) {
+                    return false;
+                }
+            }
+
+            if (length >= 1) {
+                if (cps[0] == 0x002D || cps[length - 1] == 0x002D) {
+                    return false;
+                }
+            }
+        }
+        else {
+            if (length > 4) {
+                if (p[3] == 0x002D || p[4] == 0x002D) {
+                    return false;
+                }
+            }
+
+            if (length >= 1) {
+                if (p[0] == 0x002D || p[-1] == 0x002D) {
+                    return false;
+                }
+            }
+        }
+    }
+    else if (length >= 4) {
+        if (is_cp) {
+            cps = data;
+
+            if (   (cps[0] == 0x0078 || cps[0] == 0x0058)
+                && (cps[1] == 0x006E || cps[1] == 0x004E)
+                &&  cps[2] == 0x002D && cps[3] == 0x002D)
+            {
+                return false;
+            }
+        }
+        else {
+            if (   (p[0] == 0x0078 || p[0] == 0x0058)
+                && (p[1] == 0x006E || p[1] == 0x004E)
+                &&  p[2] == 0x002D && p[3] == 0x002D)
+            {
+                return false;
+            }
+        }
+    }
+
+    while (p < end) {
+        if (!is_cp) {
+            cp = lxb_encoding_decode_valid_utf_8_single(&p, end);
+            if (cp == LXB_ENCODING_DECODE_ERROR) {
+                return false;
+            }
+        }
+        else {
+            cp = *((const lxb_codepoint_t *) p);
+            p = (const lxb_char_t *) ((const lxb_codepoint_t *) p + 1);
+        }
+
+        /* U+002E ( . ) FULL STOP */
+
+        if (cp == 0x002E) {
+            return false;
+        }
+
+        type = lxb_unicode_idna_type(cp);
+
+        switch (type) {
+            case LXB_UNICODE_IDNA_VALID:
+                break;
+
+            case LXB_UNICODE_IDNA_DEVIATION:
+                if (!(flags & LXB_UNICODE_IDNA_FLAG_TRANSITIONAL_PROCESSING)) {
+                    break;
+                }
+
+                /* Fall through. */
+
+            case LXB_UNICODE_IDNA_DISALLOWED:
+            case LXB_UNICODE_IDNA_IGNORED:
+            case LXB_UNICODE_IDNA_MAPPED:
+            default:
+                return false;
+        }
+    }
+
+    return true;
+}
+
+lxb_status_t
+lxb_unicode_idna_to_unicode(lxb_unicode_idna_t *idna, const lxb_char_t *data,
+                            size_t length, lexbor_serialize_cb_f cb,
+                            void *ctx, lxb_unicode_idna_flag_t flags)
+{
+    return lxb_unicode_idna_to_unicode_body(idna, data, length, cb, ctx,
+                                            flags, false);
+}
+
+lxb_status_t
+lxb_unicode_idna_to_unicode_cp(lxb_unicode_idna_t *idna,
+                               const lxb_codepoint_t *cps,
+                               size_t length, lexbor_serialize_cb_f cb,
+                               void *ctx, lxb_unicode_idna_flag_t flags)
+{
+    return lxb_unicode_idna_to_unicode_body(idna, cps, length, cb, ctx,
+                                            flags, true);
+}
+
+static lxb_status_t
+lxb_unicode_idna_to_unicode_body(lxb_unicode_idna_t *idna, const void *data,
+                                 size_t length, lexbor_serialize_cb_f cb,
+                                 void *ctx, lxb_unicode_idna_flag_t flags,
+                                 bool is_cp)
+{
+    size_t len;
+    lxb_status_t status;
+    lxb_unicode_idna_ascii_ctx_t context;
+
+    context.p = context.buffer;
+    context.buf = context.buffer;
+    context.end = context.buf + sizeof(context.buffer);
+    context.flags = flags;
+
+    if (!is_cp) {
+        status = lxb_unicode_idna_processing(idna, data, length,
+                                             lxb_unicode_idna_to_unicode_cb,
+                                             &context, flags);
+    }
+    else {
+        status = lxb_unicode_idna_processing_cp(idna, data, length,
+                                                lxb_unicode_idna_to_unicode_cb,
+                                                &context, flags);
+    }
+
+    if (status != LXB_STATUS_OK) {
+        goto done;
+    }
+
+    /* Remove last U+002E ( . ) FULL STOP. */
+
+    if (context.p > context.buf) {
+        context.p -= 1;
+    }
+
+    len = context.p - context.buf;
+
+    status = cb(context.buf, len, ctx);
+
+done:
+
+    if (context.buf != context.buffer) {
+        (void) lexbor_free(context.buf);
+    }
+
+    return status;
+}
+
+
+static lxb_status_t
+lxb_unicode_idna_to_unicode_cb(const lxb_codepoint_t *part, size_t len,
+                               void *ctx, lxb_status_t status)
+{
+    int8_t res;
+    size_t length, nlen;
+    lxb_char_t *tmp;
+    const lxb_codepoint_t *p, *end;
+    lxb_unicode_idna_ascii_ctx_t *asc = ctx;
+
+    if (status != LXB_STATUS_OK) {
+        return status;
+    }
+
+    p = part;
+    end = part + len;
+
+    length = 0;
+
+    while (p < end) {
+        res = lxb_encoding_encode_utf_8_length(*p++);
+        if (res == 0) {
+            return LXB_STATUS_ERROR_UNEXPECTED_DATA;
+        }
+
+        length += res;
+    }
+
+    if (asc->p + length + 2 > asc->end) {
+        nlen = ((asc->end - asc->buf) * 4) + length + 2;
+
+        if (asc->buf == asc->buffer) {
+            tmp = lexbor_malloc(nlen);
+        }
+        else {
+            tmp = lexbor_realloc(asc->buf, nlen);
+        }
+
+        if (tmp == NULL) {
+            return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
+        }
+
+        asc->p = tmp + (asc->p - asc->buf);
+        asc->buf = tmp;
+        asc->end = tmp + nlen;
+    }
+
+    p = part;
+
+    while (p < end) {
+        (void) lxb_encoding_encode_utf_8_single(NULL, &asc->p, asc->end, *p++);
+    }
+
+    *asc->p++ = '.';
+    *asc->p = 0x00;
+
+    return LXB_STATUS_OK;
+}
--- a/ext/lexbor/lexbor/unicode/idna.h
+++ b/ext/lexbor/lexbor/unicode/idna.h
@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2023 Alexander Borisov
+ *
+ * Author: Alexander Borisov <borisov@lexbor.com>
+ *
+ * UNICODE IDNA COMPATIBILITY PROCESSING
+ * https://www.unicode.org/reports/tr46/
+ */
+
+#ifndef LEXBOR_UNICODE_IDNA_H
+#define LEXBOR_UNICODE_IDNA_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "lexbor/unicode/base.h"
+
+
+typedef lxb_status_t
+(*lxb_unicode_idna_cb_f)(const lxb_codepoint_t *part, size_t len,
+                         void *ctx, lxb_status_t status);
+
+typedef enum {
+    LXB_UNICODE_IDNA_FLAG_UNDEF                   = 0x00,
+    LXB_UNICODE_IDNA_FLAG_USE_STD3ASCII_RULES     = 1 << 1,
+    LXB_UNICODE_IDNA_FLAG_CHECK_HYPHENS           = 1 << 2,
+    LXB_UNICODE_IDNA_FLAG_CHECK_BIDI              = 1 << 3, /* Not implemented. */
+    LXB_UNICODE_IDNA_FLAG_CHECK_JOINERS           = 1 << 4, /* Not implemented. */
+    LXB_UNICODE_IDNA_FLAG_TRANSITIONAL_PROCESSING = 1 << 5,
+    LXB_UNICODE_IDNA_FLAG_VERIFY_DNS_LENGTH       = 1 << 6
+}
+lxb_unicode_idna_flag_t;
+
+typedef struct {
+    lxb_unicode_normalizer_t normalizer;
+}
+lxb_unicode_idna_t;
+
+
+/*
+ * Create lxb_unicode_idna_t object.
+ *
+ * @return lxb_unicode_idna_t * if successful, otherwise NULL.
+ */
+LXB_API lxb_unicode_idna_t *
+lxb_unicode_idna_create(void);
+
+/*
+ * Initialization of lxb_unicode_idna_t object.
+ *
+ * @param[in] lxb_unicode_idna_t *.  May be NULL,
+ * LXB_STATUS_ERROR_OBJECT_IS_NULL status will be returned.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_idna_init(lxb_unicode_idna_t *idna);
+
+/*
+ * Clears the object.  Returns to states as after initialization.
+ *
+ * @param[in] lxb_unicode_idna_t *
+ */
+LXB_API void
+lxb_unicode_idna_clean(lxb_unicode_idna_t *idna);
+
+/*
+ * Destroy lxb_unicode_idna_t object.
+ *
+ * Release of occupied resources.
+ *
+ * @param[in] lxb_unicode_idna_t *. Can be NULL.
+ * @param[in] if false: only destroys internal buffers.
+ * if true: destroys the lxb_unicode_idna_t object and all internal buffers.
+ *
+ * @return lxb_unicode_idna_t * if self_destroy = false, otherwise NULL.
+ */
+LXB_API lxb_unicode_idna_t *
+lxb_unicode_idna_destroy(lxb_unicode_idna_t *idna, bool self_destroy);
+
+/*
+ * Domain name processing.
+ *
+ * Mapping, Normalization (NFC), Converting, Validating.
+ *
+ * Callback will be invoked at each level of the domain name.
+ *
+ * For example:
+ *     lexbor.com -- there will be two callbacks, for "lexbor" and "com".
+ *
+ * https://www.unicode.org/reports/tr46/#Processing
+ *
+ * @param[in] lxb_unicode_idna_t *.
+ * @param[in] Input characters for processing. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ * @param[in] Callback for results of processing.
+ * @param[in] Context for callback.
+ * @param[in] Bitmap of IDNA flags (LXB_UNICODE_IDNA_FLAG_*).
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_idna_processing(lxb_unicode_idna_t *idna, const lxb_char_t *data,
+                            size_t length, lxb_unicode_idna_cb_f cb, void *ctx,
+                            lxb_unicode_idna_flag_t flags);
+
+/*
+ * Domain name processing for code points.
+ *
+ * This function is exactly the same as lxb_unicode_idna_processing() only it
+ * takes code points instead of characters as input.
+ *
+ * * Please, see lxb_unicode_idna_processing() function.
+ *
+ * @param[in] lxb_unicode_idna_t *.
+ * @param[in] Input code points for processing. Not NULL.
+ * @param[in] Length of code points. Can be 0.
+ * @param[in] Callback for results of processing.
+ * @param[in] Context for callback.
+ * @param[in] Bitmap of IDNA flags (LXB_UNICODE_IDNA_FLAG_*).
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_idna_processing_cp(lxb_unicode_idna_t *idna,
+                               const lxb_codepoint_t *cps, size_t length,
+                               lxb_unicode_idna_cb_f cb, void *ctx,
+                               lxb_unicode_idna_flag_t flags);
+
+/*
+ * Processing and converting domain name to ASCII.
+ *
+ * Does the same thing as lxb_unicode_idna_processing() + converts each part
+ * domain name to Punycode.
+ *
+ * Callback will be invoked only once in at end of processing.
+ *
+ * https://www.unicode.org/reports/tr46/#ToASCII
+ *
+ * @param[in] lxb_unicode_idna_t *.
+ * @param[in] Input characters for processing. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ * @param[in] Callback for results of processing.
+ * @param[in] Context for callback.
+ * @param[in] Bitmap of IDNA flags (LXB_UNICODE_IDNA_FLAG_*).
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_idna_to_ascii(lxb_unicode_idna_t *idna, const lxb_char_t *data,
+                          size_t length, lexbor_serialize_cb_f cb, void *ctx,
+                          lxb_unicode_idna_flag_t flags);
+
+/*
+ * Processing and converting domain name to ASCII for code points.
+ *
+ * This function is exactly the same as lxb_unicode_idna_to_ascii() only it
+ * takes code points instead of characters as input.
+ *
+ * Please, see lxb_unicode_idna_to_ascii() function.
+ *
+ * @param[in] lxb_unicode_idna_t *.
+ * @param[in] Input characters for processing. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ * @param[in] Callback for results of processing.
+ * @param[in] Context for callback.
+ * @param[in] Bitmap of IDNA flags (LXB_UNICODE_IDNA_FLAG_*).
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_idna_to_ascii_cp(lxb_unicode_idna_t *idna, const lxb_codepoint_t *cps,
+                             size_t length, lexbor_serialize_cb_f cb, void *ctx,
+                             lxb_unicode_idna_flag_t flags);
+
+/*
+ * Processing and converting domain name to Unicode.
+ *
+ * Does the same thing as lxb_unicode_idna_processing().
+ *
+ * Callback will be invoked only once in at end of processing.
+ *
+ * https://www.unicode.org/reports/tr46/#ToUnicode
+ *
+ * @param[in] lxb_unicode_idna_t *.
+ * @param[in] Input characters for processing. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ * @param[in] Callback for results of processing.
+ * @param[in] Context for callback.
+ * @param[in] Bitmap of IDNA flags (LXB_UNICODE_IDNA_FLAG_*).
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_idna_to_unicode(lxb_unicode_idna_t *idna, const lxb_char_t *data,
+                            size_t length, lexbor_serialize_cb_f cb, void *ctx,
+                            lxb_unicode_idna_flag_t flags);
+
+/*
+ * Processing and converting domain name to Unicode for code points.
+ *
+ * This function is exactly the same as lxb_unicode_idna_to_unicode() only it
+ * takes code points instead of characters as input.
+ *
+ * Please, see lxb_unicode_idna_to_unicode() function.
+ *
+ * @param[in] lxb_unicode_idna_t *.
+ * @param[in] Input characters for processing. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ * @param[in] Callback for results of processing.
+ * @param[in] Context for callback.
+ * @param[in] Bitmap of IDNA flags (LXB_UNICODE_IDNA_FLAG_*).
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_idna_to_unicode_cp(lxb_unicode_idna_t *idna, const lxb_codepoint_t *cps,
+                               size_t length, lexbor_serialize_cb_f cb, void *ctx,
+                               lxb_unicode_idna_flag_t flags);
+
+/*
+ * Validity Criteria.
+ *
+ * The function checks the domain name for validity according to a number of
+ * criteria.
+ *
+ * LXB_UNICODE_IDNA_FLAG_CHECK_BIDI and LXB_UNICODE_IDNA_FLAG_CHECK_JOINERS
+ * not implemented.
+ *
+ * https://www.unicode.org/reports/tr46/#Validity_Criteria
+ *
+ * @param[in] Input characters for processing. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ * @param[in] Bitmap of IDNA flags (LXB_UNICODE_IDNA_FLAG_*).
+ *
+ * @return true if valid, otherwise false.
+ */
+LXB_API bool
+lxb_unicode_idna_validity_criteria(const lxb_char_t *data, size_t length,
+                                   lxb_unicode_idna_flag_t flags);
+
+/*
+ * Validity Criteria.
+ *
+ * Same as lxb_unicode_idna_validity_criteria() only it takes codepoints as
+ * input.
+ *
+ * @param[in] Input codepoints for processing. Not NULL.
+ * @param[in] Length of codepoints. Can be 0.
+ * @param[in] Bitmap of IDNA flags (LXB_UNICODE_IDNA_FLAG_*).
+ *
+ * @return true if valid, otherwise false.
+ */
+LXB_API bool
+lxb_unicode_idna_validity_criteria_cp(const lxb_codepoint_t *data, size_t length,
+                                      lxb_unicode_idna_flag_t flags);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* LEXBOR_UNICODE_IDNA_H */
--- a/ext/lexbor/lexbor/unicode/res.h
+++ b/ext/lexbor/lexbor/unicode/res.h
--- a/ext/lexbor/lexbor/unicode/unicode.c
+++ b/ext/lexbor/lexbor/unicode/unicode.c
--- a/ext/lexbor/lexbor/unicode/unicode.h
+++ b/ext/lexbor/lexbor/unicode/unicode.h
@ -0,0 +1,405 @@
+/*
+ * Copyright (C) 2023 Alexander Borisov
+ *
+ * Author: Alexander Borisov <borisov@lexbor.com>
+ */
+
+#ifndef LEXBOR_UNICODE_H
+#define LEXBOR_UNICODE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "lexbor/unicode/base.h"
+#include "lexbor/unicode/idna.h"
+#include "lexbor/core/array_obj.h"
+
+
+typedef enum {
+    LXB_UNICODE_NFC  = 0x00, /* Normalization Form C (NFC).   */
+    LXB_UNICODE_NFD  = 0x01, /* Normalization Form D (NFD).   */
+    LXB_UNICODE_NFKC = 0x02, /* Normalization Form KC (NFKC). */
+    LXB_UNICODE_NFKD = 0x03  /* Normalization Form KD (NFKD). */
+}
+lxb_unicode_form_t;
+
+
+/*
+ * Create lxb_unicode_normalizer_t object.
+ *
+ * @return lxb_unicode_normalizer_t * if successful, otherwise NULL.
+ */
+LXB_API lxb_unicode_normalizer_t *
+lxb_unicode_normalizer_create(void);
+
+/*
+ * Initialization of lxb_unicode_normalizer_t object.
+ *
+ * Support normalization forms:
+ *     Normalization Form D (NFD):   LXB_UNICODE_NFD
+ *     Normalization Form C (NFC):   LXB_UNICODE_NFC
+ *     Normalization Form KD (NFKD): LXB_UNICODE_NFKD
+ *     Normalization Form KC (NFKC): LXB_UNICODE_NFKC
+ *
+ * https://www.unicode.org/reports/tr15/
+ *
+ * @param[in] lxb_unicode_normalizer_t *
+ * @param[in] Normalization form.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_normalizer_init(lxb_unicode_normalizer_t *uc,
+                            lxb_unicode_form_t form);
+
+/*
+ * Initialization of lxb_unicode_normalizer_t object.
+ *
+ * Clears the object.  Returns to states as after initialization.
+ *
+ * @param[in] lxb_unicode_normalizer_t *
+ */
+LXB_API void
+lxb_unicode_normalizer_clean(lxb_unicode_normalizer_t *uc);
+
+/*
+ * Destroy lxb_unicode_normalizer_t object.
+ *
+ * Release of occupied resources.
+ *
+ * @param[in] lxb_unicode_normalizer_t *. Can be NULL.
+ * @param[in] if false: only destroys internal buffers.
+ * if true: destroys the lxb_unicode_normalizer_t object and all internal buffers.
+ *
+ * @return lxb_unicode_normalizer_t * if self_destroy = false, otherwise NULL.
+ */
+LXB_API lxb_unicode_normalizer_t *
+lxb_unicode_normalizer_destroy(lxb_unicode_normalizer_t *uc, bool self_destroy);
+
+/*
+ * Unicode normalization forms.
+ *
+ * This is a function with an implementation of the unicode normalization
+ * algorithm.
+ *
+ * The function is designed to work with a stream (chunks).
+ *
+ * Please, see examples for this function in examples/lexbor/unicode directory.
+ *
+ * @param[in] lxb_unicode_normalizer_t *
+ * @param[in] Input characters for normalization. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ * @param[in] Callback for results of normalization.
+ * @param[in] Context for callback.
+ * @param[in] Set to true if the last chunk or the only one chunk is processed.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_normalize(lxb_unicode_normalizer_t *uc, const lxb_char_t *data,
+                      size_t length, lexbor_serialize_cb_f cb, void *ctx,
+                      bool is_last);
+
+/*
+ * Unicode normalization end.
+ *
+ * The function is used to complete a normalization.
+ * Same as calling the lxb_unicode_normalize() function with is_last = true.
+ *
+ * Use this function only if you do not set is_last = true in
+ * the lxb_unicode_normalize() function.
+ *
+ * For example:
+ *     status = lxb_unicode_normalize(uc, data, length, cb, NULL, false);
+ *     status = lxb_unicode_normalize(uc, data, length, cb, NULL, false);
+ *     lxb_unicode_normalize_end(uc);
+ *
+ *     The same as:
+ *     status = lxb_unicode_normalize(uc, data, length, cb, NULL, false);
+ *     status = lxb_unicode_normalize(uc, data, length, cb, NULL, true);
+ *
+ * @param[in] lxb_unicode_normalizer_t *
+ * @param[in] Callback for results of normalization.
+ * @param[in] Context for callback.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_normalize_end(lxb_unicode_normalizer_t *uc, lexbor_serialize_cb_f cb,
+                          void *ctx);
+
+/*
+ * Unicode normalization forms for code points.
+ *
+ * This function is exactly the same as lxb_unicode_normalize() only it takes
+ * code points instead of characters as input.
+ *
+ * Also, unlike the lxb_unicode_normalize() function, a callback will be called
+ * to return a code points, not characters.
+ *
+ * The function is designed to work with a stream (chunks).
+ *
+ * @param[in] lxb_unicode_normalizer_t *
+ * @param[in] Input code points for normalization. Not NULL.
+ * @param[in] Length of code points. Can be 0.
+ * @param[in] Callback for results of normalization.
+ * @param[in] Context for callback.
+ * @param[in] Set to true if the last chunk or the only one chunk is processed.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_normalize_cp(lxb_unicode_normalizer_t *uc, const lxb_codepoint_t *cps,
+                         size_t length, lexbor_serialize_cb_cp_f cb, void *ctx,
+                         bool is_last);
+
+/*
+ * Unicode normalization end for code points.
+ *
+ * This function is completely similar to lxb_unicode_normalize_end(),
+ * only it takes a function with code points as a callback function.
+ *
+ * Same as calling the lxb_unicode_normalize_cp() function with is_last = true.
+ *
+ * Use this function only if you do not set is_last = true in
+ * the lxb_unicode_normalize_cp() function.
+ *
+ * For example:
+ *     status = lxb_unicode_normalize_cp(uc, cps, length, cb, NULL, false);
+ *     status = lxb_unicode_normalize_cp(uc, cps, length, cb, NULL, false);
+ *     lxb_unicode_normalize_cp_end(uc);
+ *
+ *     The same as:
+ *     status = lxb_unicode_normalize_cp(uc, cps, length, cb, NULL, false);
+ *     status = lxb_unicode_normalize_cp(uc, cps, length, cb, NULL, true);
+ *
+ * @param[in] lxb_unicode_normalizer_t *
+ * @param[in] Callback for results of normalization.
+ * @param[in] Context for callback.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_normalize_cp_end(lxb_unicode_normalizer_t *uc,
+                             lexbor_serialize_cb_cp_f cb, void *ctx);
+
+/*
+ * Quick Check.
+ *
+ * The basic normalization algorithm is not simple and requires time
+ * and resources.
+ * This function checks relatively quickly if the text needs to be normalized.
+ *
+ * The function is designed to work with a stream (chunks).
+ *
+ * https://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
+ *
+ * @param[in] lxb_unicode_normalizer_t *
+ * @param[in] Input characters for checks. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ * @param[in] Set to true if the last chunk or the only one chunk is processed.
+ *
+ * @return true if it needs to be normalized, otherwise false.
+ */
+LXB_API bool
+lxb_unicode_quick_check(lxb_unicode_normalizer_t *uc, const lxb_char_t *data,
+                        size_t length, bool is_last);
+
+/*
+ * Quick Check End.
+ *
+ * The function is used to complete a quick check.
+ * Same as calling the lxb_unicode_quick_check() function with is_last = true.
+ *
+ * Use this function only if you do not set is_last = true in
+ * the lxb_unicode_quick_check() function.
+ *
+ * For example:
+ *     is = lxb_unicode_quick_check(uc, data, length, false);
+ *     is = lxb_unicode_quick_check(uc, data, length, false);
+ *     is = lxb_unicode_quick_check_end(uc);
+ *
+ *     The same as:
+ *     is = lxb_unicode_quick_check(uc, data, length, false);
+ *     is = lxb_unicode_quick_check(uc, data, length, true);
+ *
+ * @param[in] lxb_unicode_normalizer_t *
+ *
+ * @return true if it needs to be normalized, otherwise false.
+ */
+LXB_API bool
+lxb_unicode_quick_check_end(lxb_unicode_normalizer_t *uc);
+
+/*
+ * Quick Check for code points.
+ *
+ * Same as lxb_unicode_quick_check() only it takes code points as input.
+ *
+ * @param[in] lxb_unicode_normalizer_t *
+ * @param[in] Input code points for checks. Not NULL.
+ * @param[in] Length of code points. Can be 0.
+ * @param[in] Set to true if the last chunk or the only one chunk is processed.
+ *
+ * @return true if it needs to be normalized, otherwise false.
+ */
+LXB_API bool
+lxb_unicode_quick_check_cp(lxb_unicode_normalizer_t *uc,
+                           const lxb_codepoint_t *cps, size_t length,
+                           bool is_last);
+
+/*
+ * Quick Check End for code points.
+ *
+ * Same as lxb_unicode_quick_check_end().
+ *
+ * For example:
+ *     is = lxb_unicode_quick_check_cp(uc, cps, length, false);
+ *     is = lxb_unicode_quick_check_cp(uc, cps, length, false);
+ *     is = lxb_unicode_quick_check_cp_end(uc);
+ *
+ *     The same as:
+ *     is = lxb_unicode_quick_check_cp(uc, cps, length, false);
+ *     is = lxb_unicode_quick_check_cp(uc, cps, length, true);
+ *
+ * @param[in] lxb_unicode_normalizer_t *
+ *
+ * @return true if it needs to be normalized, otherwise false.
+ */
+LXB_API bool
+lxb_unicode_quick_check_cp_end(lxb_unicode_normalizer_t *uc);
+
+/*
+ * Flush.
+ *
+ * Force flush the buffer to the user's callback if it possible.
+ *
+ * Please, see lxb_unicode_flush_count_set() function.
+ *
+ * @param[in] lxb_unicode_normalizer_t *.
+ * @param[in] Callback.
+ * @param[in] Callback context.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_flush(lxb_unicode_normalizer_t *uc, lexbor_serialize_cb_f cb,
+                  void *ctx);
+
+/*
+ * Flush for code points.
+ *
+ * Same as lxb_unicode_flush(), but it takes a callback with code points as
+ * input.
+ *
+ * @param[in] lxb_unicode_normalizer_t *.
+ * @param[in] Callback.
+ * @param[in] Callback context.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_flush_cp(lxb_unicode_normalizer_t *uc, lexbor_serialize_cb_cp_f cb,
+                     void *ctx);
+
+/*
+ * Change normalization form.
+ *
+ * You should only apply this function after one of the following actions:
+ *     1. The lxb_unicode_normalize() function was called with is_last = true.
+ *        That is, the processing of the previous type was successfully
+ *        completed.
+ *  OR
+ *     2. The end of normalization function was called:
+          lxb_unicode_normalize_end().
+ *  OR
+ *     3. The lxb_unicode_normalizer_t object cleanup function was called:
+ *        lxb_unicode_normalizer_clean().
+ *
+ *
+ * All this is to be able to normalize or quickly check text with different
+ * types without creating new objects.
+ *
+ * @param[in] lxb_unicode_normalizer_t *.
+ * @param[in] Normalization form.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_unicode_normalization_form_set(lxb_unicode_normalizer_t *uc,
+                                   lxb_unicode_form_t form);
+
+LXB_API const lxb_unicode_entry_t *
+lxb_unicode_entry(lxb_codepoint_t cp);
+
+LXB_API const lxb_unicode_composition_cp_t *
+lxb_unicode_compose_entry(lxb_codepoint_t first, lxb_codepoint_t second);
+
+LXB_API lxb_unicode_idna_type_t
+lxb_unicode_idna_type(lxb_codepoint_t cp);
+
+LXB_API const lxb_unicode_composition_cp_t *
+lxb_unicode_composition_cp(lxb_codepoint_t first, lxb_codepoint_t second);
+
+LXB_API const lxb_unicode_normalization_entry_t *
+lxb_unicode_normalization_entry(const lxb_unicode_entry_t *entry);
+
+LXB_API const lxb_unicode_normalization_entry_t *
+lxb_unicode_normalization_entry_by_cp(lxb_codepoint_t cp);
+
+LXB_API const lxb_unicode_normalization_entry_t *
+lxb_unicode_normalization_entry_by_index(uint16_t index);
+
+LXB_API bool
+lxb_unicode_normalization_is_null(const lxb_unicode_normalization_entry_t *entry);
+
+LXB_API const lxb_codepoint_t *
+lxb_unicode_full_canonical(const lxb_unicode_normalization_entry_t *entry,
+                           size_t *out_length);
+
+LXB_API const lxb_codepoint_t *
+lxb_unicode_full_compatibility(const lxb_unicode_normalization_entry_t *entry,
+                               size_t *out_length);
+
+LXB_API const lxb_unicode_idna_entry_t *
+lxb_unicode_idna_entry(const lxb_unicode_entry_t *entry);
+
+LXB_API const lxb_unicode_idna_entry_t *
+lxb_unicode_idna_entry_by_cp(lxb_codepoint_t cp);
+
+LXB_API const lxb_unicode_idna_entry_t *
+lxb_unicode_idna_entry_by_index(uint16_t index);
+
+LXB_API const lxb_codepoint_t *
+lxb_unicode_idna_map(const lxb_unicode_idna_entry_t *entry,
+                     size_t *out_length);
+
+/*
+ * Inline functions.
+ */
+
+/*
+ * Sets the buffer size for codepoints.
+ *
+ * By default, 4096 processed codepoints are accumulated before converting them
+ * to lxb_char_t and returning the result to the user via callback.
+ *
+ * If set the count to 0, the user callback will be called for every codepoint
+ * processed.  That is, it will be streaming without accumulation in
+ * the intermediate buffer.
+ *
+ * @param[in] lxb_unicode_normalizer_t *.
+ * @param[in] Count of codepoints in the buffer.
+ */
+lxb_inline void
+lxb_unicode_flush_count_set(lxb_unicode_normalizer_t *uc, size_t count)
+{
+    uc->flush_cp = count;
+}
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* LEXBOR_UNICODE_H */
--- a/ext/lexbor/lexbor/url/base.h
+++ b/ext/lexbor/lexbor/url/base.h
@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2023-2024 Alexander Borisov
+ *
+ * Author: Alexander Borisov <borisov@lexbor.com>
+ */
+
+#ifndef LEXBOR_URL_BASE_H
+#define LEXBOR_URL_BASE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "lexbor/core/base.h"
+#include "lexbor/core/mraw.h"
+#include "lexbor/core/str.h"
+
+
+#define LXB_URL_VERSION_MAJOR 0
+#define LXB_URL_VERSION_MINOR 3
+#define LXB_URL_VERSION_PATCH 0
+
+#define LXB_URL_VERSION_STRING LEXBOR_STRINGIZE(LXB_URL_VERSION_MAJOR) "."    \
+                               LEXBOR_STRINGIZE(LXB_URL_VERSION_MINOR) "."    \
+                               LEXBOR_STRINGIZE(LXB_URL_VERSION_PATCH)
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* LEXBOR_URL_BASE_H */
--- a/ext/lexbor/lexbor/url/url.c
+++ b/ext/lexbor/lexbor/url/url.c
--- a/ext/lexbor/lexbor/url/url.h
+++ b/ext/lexbor/lexbor/url/url.h
@ -0,0 +1,551 @@
+/*
+ * Copyright (C) 2023 Alexander Borisov
+ *
+ * Author: Alexander Borisov <borisov@lexbor.com>
+ *
+ * The URL Standard.
+ * By specification: https://url.spec.whatwg.org/
+ */
+
+#ifndef LEXBOR_URL_H
+#define LEXBOR_URL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "lexbor/url/base.h"
+#include "lexbor/core/mraw.h"
+#include "lexbor/core/plog.h"
+#include "lexbor/encoding/encoding.h"
+#include "lexbor/unicode/unicode.h"
+
+
+typedef enum {
+    LXB_URL_ERROR_TYPE_DOMAIN_TO_ASCII = 0x00,
+    LXB_URL_ERROR_TYPE_DOMAIN_TO_UNICODE,
+    LXB_URL_ERROR_TYPE_DOMAIN_INVALID_CODE_POINT,
+    LXB_URL_ERROR_TYPE_HOST_INVALID_CODE_POINT,
+    LXB_URL_ERROR_TYPE_IPV4_EMPTY_PART,
+    LXB_URL_ERROR_TYPE_IPV4_TOO_MANY_PARTS,
+    LXB_URL_ERROR_TYPE_IPV4_NON_NUMERIC_PART,
+    LXB_URL_ERROR_TYPE_IPV4_NON_DECIMAL_PART,
+    LXB_URL_ERROR_TYPE_IPV4_OUT_OF_RANGE_PART,
+    LXB_URL_ERROR_TYPE_IPV6_UNCLOSED,
+    LXB_URL_ERROR_TYPE_IPV6_INVALID_COMPRESSION,
+    LXB_URL_ERROR_TYPE_IPV6_TOO_MANY_PIECES,
+    LXB_URL_ERROR_TYPE_IPV6_MULTIPLE_COMPRESSION,
+    LXB_URL_ERROR_TYPE_IPV6_INVALID_CODE_POINT,
+    LXB_URL_ERROR_TYPE_IPV6_TOO_FEW_PIECES,
+    LXB_URL_ERROR_TYPE_IPV4_IN_IPV6_TOO_MANY_PIECES,
+    LXB_URL_ERROR_TYPE_IPV4_IN_IPV6_INVALID_CODE_POINT,
+    LXB_URL_ERROR_TYPE_IPV4_IN_IPV6_OUT_OF_RANGE_PART,
+    LXB_URL_ERROR_TYPE_IPV4_IN_IPV6_TOO_FEW_PARTS,
+    LXB_URL_ERROR_TYPE_INVALID_URL_UNIT,
+    LXB_URL_ERROR_TYPE_SPECIAL_SCHEME_MISSING_FOLLOWING_SOLIDUS,
+    LXB_URL_ERROR_TYPE_MISSING_SCHEME_NON_RELATIVE_URL,
+    LXB_URL_ERROR_TYPE_INVALID_REVERSE_SOLIDUS,
+    LXB_URL_ERROR_TYPE_INVALID_CREDENTIALS,
+    LXB_URL_ERROR_TYPE_HOST_MISSING,
+    LXB_URL_ERROR_TYPE_PORT_OUT_OF_RANGE,
+    LXB_URL_ERROR_TYPE_PORT_INVALID,
+    LXB_URL_ERROR_TYPE_FILE_INVALID_WINDOWS_DRIVE_LETTER,
+    LXB_URL_ERROR_TYPE_FILE_INVALID_WINDOWS_DRIVE_LETTER_HOST,
+    LXB_URL_ERROR_TYPE__LAST_ENTRY
+}
+lxb_url_error_type_t;
+
+typedef enum {
+    LXB_URL_STATE__UNDEF = 0x00,
+    LXB_URL_STATE_SCHEME_START_STATE,
+    LXB_URL_STATE_SCHEME_STATE,
+    LXB_URL_STATE_NO_SCHEME_STATE,
+    LXB_URL_STATE_SPECIAL_RELATIVE_OR_AUTHORITY_STATE,
+    LXB_URL_STATE_PATH_OR_AUTHORITY_STATE,
+    LXB_URL_STATE_RELATIVE_STATE,
+    LXB_URL_STATE_RELATIVE_SLASH_STATE,
+    LXB_URL_STATE_SPECIAL_AUTHORITY_SLASHES_STATE,
+    LXB_URL_STATE_SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE,
+    LXB_URL_STATE_AUTHORITY_STATE,
+    LXB_URL_STATE_HOST_STATE,
+    LXB_URL_STATE_HOSTNAME_STATE,
+    LXB_URL_STATE_PORT_STATE,
+    LXB_URL_STATE_FILE_STATE,
+    LXB_URL_STATE_FILE_SLASH_STATE,
+    LXB_URL_STATE_FILE_HOST_STATE,
+    LXB_URL_STATE_PATH_START_STATE,
+    LXB_URL_STATE_PATH_STATE,
+    LXB_URL_STATE_OPAQUE_PATH_STATE,
+    LXB_URL_STATE_QUERY_STATE,
+    LXB_URL_STATE_FRAGMENT_STATE
+}
+lxb_url_state_t;
+
+/*
+ * New values can only be added downwards.
+ * Before LXB_URL_SCHEMEL_TYPE__LAST_ENTRY.
+ *
+ * Please, see lxb_url_scheme_res in /lexbor/url/url.c.
+ */
+typedef enum {
+    LXB_URL_SCHEMEL_TYPE__UNDEF      = 0x00,
+    LXB_URL_SCHEMEL_TYPE__UNKNOWN    = 0x01,
+    LXB_URL_SCHEMEL_TYPE_HTTP        = 0x02,
+    LXB_URL_SCHEMEL_TYPE_HTTPS       = 0x03,
+    LXB_URL_SCHEMEL_TYPE_WS          = 0x04,
+    LXB_URL_SCHEMEL_TYPE_WSS         = 0x05,
+    LXB_URL_SCHEMEL_TYPE_FTP         = 0x06,
+    LXB_URL_SCHEMEL_TYPE_FILE        = 0x07,
+    LXB_URL_SCHEMEL_TYPE__LAST_ENTRY
+}
+lxb_url_scheme_type_t;
+
+typedef struct {
+    const lexbor_str_t    name;
+    uint16_t              port;
+    lxb_url_scheme_type_t type;
+}
+lxb_url_scheme_data_t;
+
+typedef struct {
+    lexbor_str_t          name;
+    lxb_url_scheme_type_t type;
+}
+lxb_url_scheme_t;
+
+typedef enum {
+    LXB_URL_HOST_TYPE__UNDEF = 0x00,
+    LXB_URL_HOST_TYPE_DOMAIN = 0x01,
+    LXB_URL_HOST_TYPE_OPAQUE = 0x02,
+    LXB_URL_HOST_TYPE_IPV4   = 0x03,
+    LXB_URL_HOST_TYPE_IPV6   = 0x04,
+    LXB_URL_HOST_TYPE_EMPTY  = 0x05
+}
+lxb_url_host_type_t;
+
+typedef struct {
+    lxb_url_host_type_t type;
+
+    union {
+        uint16_t     ipv6[8];
+        uint32_t     ipv4;
+        lexbor_str_t opaque;
+        lexbor_str_t domain;
+    } u;
+}
+lxb_url_host_t;
+
+typedef struct {
+    lexbor_str_t str;
+    size_t       length;
+    bool         opaque;
+}
+lxb_url_path_t;
+
+typedef struct {
+    lxb_url_scheme_t   scheme;
+
+    lxb_url_host_t     host;
+
+    lexbor_str_t       username;
+    lexbor_str_t       password;
+
+    uint16_t           port;
+    bool               has_port;
+
+    lxb_url_path_t     path;
+
+    lexbor_str_t       query;
+    lexbor_str_t       fragment;
+
+    lexbor_mraw_t      *mraw;
+}
+lxb_url_t;
+
+typedef struct {
+    lxb_url_t          *url;
+    lexbor_mraw_t      *mraw;
+    lexbor_plog_t      *log;
+
+    lxb_unicode_idna_t *idna;
+}
+lxb_url_parser_t;
+
+
+/*
+ * Create lxb_url_parser_t object.
+ *
+ * @return lxb_url_parser_t * if successful, otherwise NULL.
+ */
+LXB_API lxb_url_parser_t *
+lxb_url_parser_create(void);
+
+/*
+ * Initialization of lxb_url_parser_t object.
+ *
+ * The parser is not bound to the received URLs in any way. That is, after
+ * parsing the lxb_url_parser_t object can be destroyed and we can continue
+ * working with the received URLs.
+ *
+ * Memory for created URLs is taken from lexbor_mraw_t object, which you can
+ * pass during initialization of lxb_url_parser_t object, or a new lexbor_mraw_t
+ * object will be created during initialization if NULL is passed.
+ *
+ * Each created URL will have a pointer to the lexbor_mraw_t object.
+ *
+ * By destroying the lexbor_mraw_t object you destroy all the URL objects
+ * created by the parser. Use the lxb_url_destroy() function to destroy a
+ * specific URL.
+ *
+ * Destroying the lxb_url_parser_t object with lxb_url_parser_destroy() does
+ * not destroy the lexbor_mraw_t memory object.
+ *
+ * Please, see functions lxb_url_parser_memory_destroy(), lxb_url_destroy(),
+ * lxb_url_memory_destroy().
+ *
+ * @param[in] lxb_url_parser_t *
+ * @param[in] lexbor_mraw_t *. Can be NULL. If pass NULL, it will create its own
+ * memory object inside parser and it will be bound to all created URLs.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_url_parser_init(lxb_url_parser_t *parser, lexbor_mraw_t *mraw);
+
+/*
+ * Clears the object. Returns object to states as after initialization.
+ *
+ * This function must be called before the parsing functions can be reused.
+ *
+ * For example:
+ *     lxb_url_parse()
+ *     lxb_url_parser_clean()
+ *     lxb_url_parse()
+ *     lxb_url_destroy()
+ *
+ * @param[in] lxb_url_parser_t *
+ */
+LXB_API void
+lxb_url_parser_clean(lxb_url_parser_t *parser);
+
+/*
+ * Destroy lxb_url_parser_t object.
+ *
+ * Release of occupied resources.
+ * The lexbor_mraw_t memory object is not destroyed in this function.
+ *
+ * @param[in] lxb_url_parser_t *. Can be NULL.
+ * @param[in] if false: only destroys internal buffers.
+ * if true: destroys the lxb_url_parser_t object and all internal buffers.
+ *
+ * @return lxb_url_parser_t * if self_destroy = false, otherwise NULL.
+ */
+LXB_API lxb_url_parser_t *
+lxb_url_parser_destroy(lxb_url_parser_t *parser, bool destroy_self);
+
+/*
+ * Destroys the lexbor_mraw_t object, and thus all associated URLs.
+ *
+ * After that, new URLs cannot be parsed until a new lexbor_mraw_t object is
+ * assigned to the lxb_url_parser_t object.
+ *
+ * @param[in] lxb_url_parser_t *.
+ */
+LXB_API void
+lxb_url_parser_memory_destroy(lxb_url_parser_t *parser);
+
+/*
+ * URL parser.
+ *
+ * This functional an implementation of URL parsing according to the WHATWG
+ * specification.
+ *
+ * @param[in] lxb_url_parser_t *.
+ * @param[in] const lxb_url_t *. Base URL, can be NULL.
+ * @param[in] Input characters. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ *
+ * @return lxb_url_t * if successful, otherwise NULL.
+ */
+LXB_API lxb_url_t *
+lxb_url_parse(lxb_url_parser_t *parser, const lxb_url_t *base_url,
+              const lxb_char_t *data, size_t length);
+
+/*
+ * URL basic parser.
+ *
+ * This functional an implementation of URL parsing according to the WHATWG
+ * specification.
+ *
+ * Use the lxb_url_get() function to get the URL object.
+ *
+ * @param[in] lxb_url_parser_t *.
+ * @param[in] lxb_url_t *. Can be NULL.
+ * @param[in] const lxb_url_t *. Base URL, can be NULL.
+ * @param[in] Input characters. Not NULL.
+ * @param[in] Length of characters. Can be 0.
+ * @param[in] lxb_url_state_t, for default set to LXB_URL_STATE__UNDEF.
+ * @param[in] lxb_encoding_t, default (LXB_ENCODING_DEFAULT) LXB_ENCODING_UTF_8.
+ *
+ * @return LXB_STATUS_OK if successful, otherwise an error status value.
+ */
+LXB_API lxb_status_t
+lxb_url_parse_basic(lxb_url_parser_t *parser, lxb_url_t *url,
+                    const lxb_url_t *base_url,
+                    const lxb_char_t *data, size_t length,
+                    lxb_url_state_t override_state, lxb_encoding_t encoding);
+
+/*
+ * Erase URL.
+ *
+ * Frees all internal memory occupied by the URL object, but does not destroy
+ * the object.
+ *
+ * @param[in] lxb_url_t *.
+ *
+ * @return NULL.
+ */
+LXB_API void
+lxb_url_erase(lxb_url_t *url);
+
+/*
+ * Destroys URL.
+ *
+ * @param[in] lxb_url_t *.
+ *
+ * @return NULL.
+ */
+LXB_API lxb_url_t *
+lxb_url_destroy(lxb_url_t *url);
+
+/*
+ * Destroys the lexbor_mraw_t memory object.
+ *
+ * The function will destroy all URLs associated with the lexbor_mraw_t memory
+ * object, including the passed one.
+ *
+ * Keep in mind, if you have a live lxb_url_parser_t parsing object, you will
+ * have a pointer to garbage after calling this function instead of a pointer
+ * to the lexbor_mraw_t object.
+ * In this case you need to assign a new memory object lexbor_mraw_t for the
+ * parser. Use the lxb_url_mraw_set() function.
+ *
+ * @param[in] lxb_url_t *.
+ */
+LXB_API void
+lxb_url_memory_destroy(lxb_url_t *url);
+
+
+/*
+ * Below is an API for modifying the URL object according to the
+ * https://url.spec.whatwg.org/#api specification.
+ *
+ * It is not necessary to pass the lxb_url_parser_t object to API functions.
+ * You need to pass the parser if you want to have logs of parsing.
+ *
+ * All API functions can be passed NULL as "const lxb_char_t *" data.
+ */
+
+LXB_API lxb_status_t
+lxb_url_api_href_set(lxb_url_t *url, lxb_url_parser_t *parser,
+                     const lxb_char_t *href, size_t length);
+
+LXB_API lxb_status_t
+lxb_url_api_protocol_set(lxb_url_t *url, lxb_url_parser_t *parser,
+                         const lxb_char_t *protocol, size_t length);
+
+LXB_API lxb_status_t
+lxb_url_api_username_set(lxb_url_t *url,
+                         const lxb_char_t *username, size_t length);
+
+LXB_API lxb_status_t
+lxb_url_api_password_set(lxb_url_t *url,
+                         const lxb_char_t *password, size_t length);
+
+LXB_API lxb_status_t
+lxb_url_api_host_set(lxb_url_t *url, lxb_url_parser_t *parser,
+                     const lxb_char_t *host, size_t length);
+
+LXB_API lxb_status_t
+lxb_url_api_hostname_set(lxb_url_t *url, lxb_url_parser_t *parser,
+                         const lxb_char_t *hostname, size_t length);
+
+LXB_API lxb_status_t
+lxb_url_api_port_set(lxb_url_t *url, lxb_url_parser_t *parser,
+                     const lxb_char_t *port, size_t length);
+
+LXB_API lxb_status_t
+lxb_url_api_pathname_set(lxb_url_t *url, lxb_url_parser_t *parser,
+                         const lxb_char_t *pathname, size_t length);
+
+LXB_API lxb_status_t
+lxb_url_api_search_set(lxb_url_t *url, lxb_url_parser_t *parser,
+                       const lxb_char_t *search, size_t length);
+
+LXB_API lxb_status_t
+lxb_url_api_hash_set(lxb_url_t *url, lxb_url_parser_t *parser,
+                     const lxb_char_t *hash, size_t length);
+
+
+/*
+ * Below are functions for serializing a URL object and its individual
+ * parameters.
+ *
+ * Note that the callback may be called more than once.
+ * For example, the lxb_url_serialize() function will callback multiple times:
+ * 1. http
+ * 2. ://
+ * 3. example.com
+ * and so on.
+ */
+
+LXB_API lxb_status_t
+lxb_url_serialize(const lxb_url_t *url, lexbor_serialize_cb_f cb, void *ctx,
+                  bool exclude_fragment);
+
+LXB_API lxb_status_t
+lxb_url_serialize_scheme(const lxb_url_t *url,
+                         lexbor_serialize_cb_f cb, void *ctx);
+
+LXB_API lxb_status_t
+lxb_url_serialize_username(const lxb_url_t *url,
+                           lexbor_serialize_cb_f cb, void *ctx);
+
+LXB_API lxb_status_t
+lxb_url_serialize_password(const lxb_url_t *url,
+                           lexbor_serialize_cb_f cb, void *ctx);
+
+LXB_API lxb_status_t
+lxb_url_serialize_host(const lxb_url_host_t *host,
+                       lexbor_serialize_cb_f cb, void *ctx);
+
+LXB_API lxb_status_t
+lxb_url_serialize_host_unicode(lxb_unicode_idna_t *idna,
+                               const lxb_url_host_t *host,
+                               lexbor_serialize_cb_f cb, void *ctx);
+
+LXB_API lxb_status_t
+lxb_url_serialize_host_ipv4(uint32_t ipv4,
+                            lexbor_serialize_cb_f cb, void *ctx);
+
+LXB_API lxb_status_t
+lxb_url_serialize_host_ipv6(const uint16_t *ipv6,
+                            lexbor_serialize_cb_f cb, void *ctx);
+
+LXB_API lxb_status_t
+lxb_url_serialize_port(const lxb_url_t *url,
+                       lexbor_serialize_cb_f cb, void *ctx);
+
+LXB_API lxb_status_t
+lxb_url_serialize_path(const lxb_url_path_t *path,
+                       lexbor_serialize_cb_f cb, void *ctx);
+
+LXB_API lxb_status_t
+lxb_url_serialize_query(const lxb_url_t *url,
+                        lexbor_serialize_cb_f cb, void *ctx);
+
+LXB_API lxb_status_t
+lxb_url_serialize_fragment(const lxb_url_t *url,
+                           lexbor_serialize_cb_f cb, void *ctx);
+
+/*
+ * Creates a clone of the object's URL.
+ *
+ * For lexbor_mraw_t *, use url->mraw or another lexbor_mraw_t * object.
+ *
+ * @param[in] lexbor_mraw_t *.
+ * @param[in] lxb_url_t *.
+ *
+ * @return a new URL object if successful, otherwise NULL value.
+ */
+LXB_API lxb_url_t *
+lxb_url_clone(lexbor_mraw_t *mraw, lxb_url_t *url);
+
+/*
+ * Inline functions.
+ */
+
+lxb_inline const lexbor_str_t *
+lxb_url_scheme(const lxb_url_t *url)
+{
+    return &url->scheme.name;
+}
+
+lxb_inline const lexbor_str_t *
+lxb_url_username(const lxb_url_t *url)
+{
+    return &url->username;
+}
+
+lxb_inline const lexbor_str_t *
+lxb_url_password(const lxb_url_t *url)
+{
+    return &url->password;
+}
+
+lxb_inline const lxb_url_host_t *
+lxb_url_host(const lxb_url_t *url)
+{
+    return &url->host;
+}
+
+lxb_inline uint16_t
+lxb_url_port(const lxb_url_t *url)
+{
+    return url->port;
+}
+
+lxb_inline bool
+lxb_url_has_port(const lxb_url_t *url)
+{
+    return url->has_port;
+}
+
+lxb_inline const lxb_url_path_t *
+lxb_url_path(const lxb_url_t *url)
+{
+    return &url->path;
+}
+
+lxb_inline const lexbor_str_t *
+lxb_url_path_str(const lxb_url_t *url)
+{
+    return &url->path.str;
+}
+
+lxb_inline const lexbor_str_t *
+lxb_url_query(const lxb_url_t *url)
+{
+    return &url->query;
+}
+
+lxb_inline const lexbor_str_t *
+lxb_url_fragment(const lxb_url_t *url)
+{
+    return &url->fragment;
+}
+
+lxb_inline lexbor_mraw_t *
+lxb_url_mraw(lxb_url_parser_t *parser)
+{
+    return parser->mraw;
+}
+
+lxb_inline void
+lxb_url_mraw_set(lxb_url_parser_t *parser, lexbor_mraw_t *mraw)
+{
+    parser->mraw = mraw;
+}
+
+lxb_inline lxb_url_t *
+lxb_url_get(lxb_url_parser_t *parser)
+{
+    return parser->url;
+}
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* LEXBOR_URL_H */