php-src/ext/pcre/pcre2lib/pcre2_compile_class.c

/*************************************************
*      Perl-Compatible Regular Expressions       *
*************************************************/

/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
     Original API code Copyright (c) 1997-2012 University of Cambridge
          New API code Copyright (c) 2016-2024 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.

    * Neither the name of the University of Cambridge nor the names of its
      contributors may be used to endorse or promote products derived from
      this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "pcre2_compile.h"

typedef struct {
  /* Option bits for eclass. */
  uint32_t options;
  uint32_t xoptions;
  /* Rarely used members. */
  int *errorcodeptr;
  compile_block *cb;
  /* Bitmap is needed. */
  BOOL needs_bitmap;
} eclass_context;

/* Checks the allowed tokens at the end of a class structure in debug mode.
When a new token is not processed by all loops, and the token is equals to
a) one of the cases here:
   the compiler will complain about a duplicated case value.
b) none of the cases here:
   the loop without the handler will stop with an assertion failure. */

#ifdef PCRE2_DEBUG
#define CLASS_END_CASES(meta) \
  default: \
  PCRE2_ASSERT((meta) <= META_END); \
  /* Fall through */ \
  case META_CLASS: \
  case META_CLASS_NOT: \
  case META_CLASS_EMPTY: \
  case META_CLASS_EMPTY_NOT: \
  case META_CLASS_END: \
  case META_ECLASS_AND: \
  case META_ECLASS_OR: \
  case META_ECLASS_SUB: \
  case META_ECLASS_XOR: \
  case META_ECLASS_NOT:
#else
#define CLASS_END_CASES(meta) \
  default:
#endif

#ifdef SUPPORT_WIDE_CHARS

/* Heapsort algorithm. */

static void do_heapify(uint32_t *buffer, size_t size, size_t i)
{
size_t max;
size_t left;
size_t right;
uint32_t tmp1, tmp2;

while (TRUE)
  {
  max = i;
  left = (i << 1) + 2;
  right = left + 2;

  if (left < size && buffer[left] > buffer[max]) max = left;
  if (right < size && buffer[right] > buffer[max]) max = right;
  if (i == max) return;

  /* Swap items. */
  tmp1 = buffer[i];
  tmp2 = buffer[i + 1];
  buffer[i] = buffer[max];
  buffer[i + 1] = buffer[max + 1];
  buffer[max] = tmp1;
  buffer[max + 1] = tmp2;
  i = max;
  }
}

#ifdef SUPPORT_UNICODE

#define PARSE_CLASS_UTF               0x1
#define PARSE_CLASS_CASELESS_UTF      0x2
#define PARSE_CLASS_RESTRICTED_UTF    0x4
#define PARSE_CLASS_TURKISH_UTF       0x8

/* Get the range of nocase characters which includes the
'c' character passed as argument, or directly follows 'c'. */

static const uint32_t*
get_nocase_range(uint32_t c)
{
uint32_t left = 0;
uint32_t right = PRIV(ucd_nocase_ranges_size);
uint32_t middle;

if (c > MAX_UTF_CODE_POINT) return PRIV(ucd_nocase_ranges) + right;

while (TRUE)
  {
  /* Range end of the middle element. */
  middle = ((left + right) >> 1) | 0x1;

  if (PRIV(ucd_nocase_ranges)[middle] <= c)
    left = middle + 1;
  else if (middle > 1 && PRIV(ucd_nocase_ranges)[middle - 2] > c)
    right = middle - 1;
  else
    return PRIV(ucd_nocase_ranges) + (middle - 1);
  }
}

/* Get the list of othercase characters, which belongs to the passed range.
Create ranges from these characters, and append them to the buffer argument. */

static size_t
utf_caseless_extend(uint32_t start, uint32_t end, uint32_t options,
  uint32_t *buffer)
{
uint32_t new_start = start;
uint32_t new_end = end;
uint32_t c = start;
const uint32_t *list;
uint32_t tmp[3];
size_t result = 2;
const uint32_t *skip_range = get_nocase_range(c);
uint32_t skip_start = skip_range[0];

#if PCRE2_CODE_UNIT_WIDTH == 8
PCRE2_ASSERT(options & PARSE_CLASS_UTF);
#endif

#if PCRE2_CODE_UNIT_WIDTH == 32
if (end > MAX_UTF_CODE_POINT) end = MAX_UTF_CODE_POINT;
#endif

while (c <= end)
  {
  uint32_t co;

  if (c > skip_start)
    {
    c = skip_range[1];
    skip_range += 2;
    skip_start = skip_range[0];
    continue;
    }

  /* Compute caseless set. */

  if ((options & (PARSE_CLASS_TURKISH_UTF|PARSE_CLASS_RESTRICTED_UTF)) ==
        PARSE_CLASS_TURKISH_UTF &&
      UCD_ANY_I(c))
    {
    co = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(c)? 0 : 3);
    }
  else if ((co = UCD_CASESET(c)) != 0 &&
           (options & PARSE_CLASS_RESTRICTED_UTF) != 0 &&
           PRIV(ucd_caseless_sets)[co] < 128)
    {
    co = 0;  /* Ignore the caseless set if it's restricted. */
    }

  if (co != 0)
    list = PRIV(ucd_caseless_sets) + co;
  else
    {
    co = UCD_OTHERCASE(c);
    list = tmp;
    tmp[0] = c;
    tmp[1] = NOTACHAR;

    if (co != c)
      {
      tmp[1] = co;
      tmp[2] = NOTACHAR;
      }
    }
  c++;

  /* Add characters. */
  do
    {
#if PCRE2_CODE_UNIT_WIDTH == 16
    if (!(options & PARSE_CLASS_UTF) && *list > 0xffff) continue;
#endif

    if (*list < new_start)
      {
      if (*list + 1 == new_start)
        {
        new_start--;
        continue;
        }
      }
    else if (*list > new_end)
      {
      if (*list - 1 == new_end)
        {
        new_end++;
        continue;
        }
      }
    else continue;

    result += 2;
    if (buffer != NULL)
      {
      buffer[0] = *list;
      buffer[1] = *list;
      buffer += 2;
      }
    }
  while (*(++list) != NOTACHAR);
  }

  if (buffer != NULL)
    {
    buffer[0] = new_start;
    buffer[1] = new_end;
    buffer += 2;
    (void)buffer;
    }
  return result;
}

#endif

/* Add a character list to a buffer. */

static size_t
append_char_list(const uint32_t *p, uint32_t *buffer)
{
const uint32_t *n;
size_t result = 0;

while (*p != NOTACHAR)
  {
  n = p;
  while (n[0] == n[1] - 1) n++;

  PCRE2_ASSERT(*p < 0xffff);

  if (buffer != NULL)
    {
    buffer[0] = *p;
    buffer[1] = *n;
    buffer += 2;
    }

  result += 2;
  p = n + 1;
  }

  return result;
}

static uint32_t
get_highest_char(uint32_t options)
{
(void)options; /* Avoid compiler warning. */

#if PCRE2_CODE_UNIT_WIDTH == 8
return MAX_UTF_CODE_POINT;
#else
#ifdef SUPPORT_UNICODE
return GET_MAX_CHAR_VALUE((options & PARSE_CLASS_UTF) != 0);
#else
return MAX_UCHAR_VALUE;
#endif
#endif
}

/* Add a negated character list to a buffer. */
static size_t
append_negated_char_list(const uint32_t *p, uint32_t options, uint32_t *buffer)
{
const uint32_t *n;
uint32_t start = 0;
size_t result = 2;

PCRE2_ASSERT(*p > 0);

while (*p != NOTACHAR)
  {
  n = p;
  while (n[0] == n[1] - 1) n++;

  PCRE2_ASSERT(*p < 0xffff);

  if (buffer != NULL)
    {
    buffer[0] = start;
    buffer[1] = *p - 1;
    buffer += 2;
    }

  result += 2;
  start = *n + 1;
  p = n + 1;
  }

  if (buffer != NULL)
    {
    buffer[0] = start;
    buffer[1] = get_highest_char(options);
    buffer += 2;
    (void)buffer;
    }

  return result;
}

static uint32_t *
append_non_ascii_range(uint32_t options, uint32_t *buffer)
{
  if (buffer == NULL) return NULL;

  buffer[0] = 0x100;
  buffer[1] = get_highest_char(options);
  return buffer + 2;
}

static size_t
parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer)
{
size_t total_size = 0;
size_t size;
uint32_t meta_arg;
uint32_t start_char;

while (TRUE)
  {
  switch (META_CODE(*ptr))
    {
    case META_ESCAPE:
      meta_arg = META_DATA(*ptr);
      switch (meta_arg)
        {
        case ESC_D:
        case ESC_W:
        case ESC_S:
        buffer = append_non_ascii_range(options, buffer);
        total_size += 2;
        break;

        case ESC_h:
        size = append_char_list(PRIV(hspace_list), buffer);
        total_size += size;
        if (buffer != NULL) buffer += size;
        break;

        case ESC_H:
        size = append_negated_char_list(PRIV(hspace_list), options, buffer);
        total_size += size;
        if (buffer != NULL) buffer += size;
        break;

        case ESC_v:
        size = append_char_list(PRIV(vspace_list), buffer);
        total_size += size;
        if (buffer != NULL) buffer += size;
        break;

        case ESC_V:
        size = append_negated_char_list(PRIV(vspace_list), options, buffer);
        total_size += size;
        if (buffer != NULL) buffer += size;
        break;

        case ESC_p:
        case ESC_P:
        ptr++;
        if (meta_arg == ESC_p && (*ptr >> 16) == PT_ANY)
          {
          if (buffer != NULL)
            {
            buffer[0] = 0;
            buffer[1] = get_highest_char(options);
            buffer += 2;
            }
          total_size += 2;
          }
        break;
        }
      ptr++;
      continue;
    case META_POSIX_NEG:
      buffer = append_non_ascii_range(options, buffer);
      total_size += 2;
      ptr += 2;
      continue;
    case META_POSIX:
      ptr += 2;
      continue;
    case META_BIGVALUE:
      /* Character literal */
      ptr++;
      break;
    CLASS_END_CASES(*ptr)
      if (*ptr >= META_END) return total_size;
      break;
    }

    start_char = *ptr;

    if (ptr[1] == META_RANGE_LITERAL || ptr[1] == META_RANGE_ESCAPED)
      {
      ptr += 2;
      PCRE2_ASSERT(*ptr < META_END || *ptr == META_BIGVALUE);

      if (*ptr == META_BIGVALUE) ptr++;

#ifdef EBCDIC
#error "Missing EBCDIC support"
#endif
      }

#ifdef SUPPORT_UNICODE
    if (options & PARSE_CLASS_CASELESS_UTF)
      {
      size = utf_caseless_extend(start_char, *ptr++, options, buffer);
      if (buffer != NULL) buffer += size;
      total_size += size;
      continue;
      }
#endif

    if (buffer != NULL)
      {
      buffer[0] = start_char;
      buffer[1] = *ptr;
      buffer += 2;
      }

    ptr++;
    total_size += 2;
  }

  return total_size;
}

/* Extra uint32_t values for storing the lengths of range lists in
the worst case. Two uint32_t lengths and a range end for a range
starting before 255 */
#define CHAR_LIST_EXTRA_SIZE 3

/* Starting character values for each character list. */

static const uint32_t char_list_starts[] = {
#if PCRE2_CODE_UNIT_WIDTH == 32
  XCL_CHAR_LIST_HIGH_32_START,
#endif
#if PCRE2_CODE_UNIT_WIDTH == 32 || defined SUPPORT_UNICODE
  XCL_CHAR_LIST_LOW_32_START,
#endif
  XCL_CHAR_LIST_HIGH_16_START,
  /* Must be terminated by XCL_CHAR_LIST_LOW_16_START,
  which also represents the end of the bitset. */
  XCL_CHAR_LIST_LOW_16_START,
};

static class_ranges *
compile_optimize_class(uint32_t *start_ptr, uint32_t options,
  uint32_t xoptions, compile_block *cb)
{
class_ranges* cranges;
uint32_t *ptr;
uint32_t *buffer;
uint32_t *dst;
uint32_t class_options = 0;
size_t range_list_size = 0, total_size, i;
uint32_t tmp1, tmp2;
const uint32_t *char_list_next;
uint16_t *next_char;
uint32_t char_list_start, char_list_end;
uint32_t range_start, range_end;

#ifdef SUPPORT_UNICODE
if (options & PCRE2_UTF)
  class_options |= PARSE_CLASS_UTF;

if ((options & PCRE2_CASELESS) && (options & (PCRE2_UTF|PCRE2_UCP)))
  class_options |= PARSE_CLASS_CASELESS_UTF;

if (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT)
  class_options |= PARSE_CLASS_RESTRICTED_UTF;

if (xoptions & PCRE2_EXTRA_TURKISH_CASING)
  class_options |= PARSE_CLASS_TURKISH_UTF;
#endif

/* Compute required space for the range. */

range_list_size = parse_class(start_ptr, class_options, NULL);
PCRE2_ASSERT((range_list_size & 0x1) == 0);

/* Allocate buffer. The total_size also represents the end of the buffer. */

total_size = range_list_size +
   ((range_list_size >= 2) ? CHAR_LIST_EXTRA_SIZE : 0);

cranges = cb->cx->memctl.malloc(
  sizeof(class_ranges) + total_size * sizeof(uint32_t),
  cb->cx->memctl.memory_data);

if (cranges == NULL) return NULL;

cranges->next = NULL;
cranges->range_list_size = (uint16_t)range_list_size;
cranges->char_lists_types = 0;
cranges->char_lists_size = 0;
cranges->char_lists_start = 0;

if (range_list_size == 0) return cranges;

buffer = (uint32_t*)(cranges + 1);
parse_class(start_ptr, class_options, buffer);

/* Using <= instead of == to help static analysis. */
if (range_list_size <= 2) return cranges;

/* In-place sorting of ranges. */

i = (((range_list_size >> 2) - 1) << 1);
while (TRUE)
  {
  do_heapify(buffer, range_list_size, i);
  if (i == 0) break;
  i -= 2;
  }

i = range_list_size - 2;
while (TRUE)
  {
  tmp1 = buffer[i];
  tmp2 = buffer[i + 1];
  buffer[i] = buffer[0];
  buffer[i + 1] = buffer[1];
  buffer[0] = tmp1;
  buffer[1] = tmp2;

  do_heapify(buffer, i, 0);
  if (i == 0) break;
  i -= 2;
  }

/* Merge ranges whenever possible. */
dst = buffer;
ptr = buffer + 2;
range_list_size -= 2;

/* The second condition is a very rare corner case, where the end of the last
range is the maximum character. This range cannot be extended further. */

while (range_list_size > 0 && dst[1] != ~(uint32_t)0)
  {
  if (dst[1] + 1 < ptr[0])
    {
    dst += 2;
    dst[0] = ptr[0];
    dst[1] = ptr[1];
    }
  else if (dst[1] < ptr[1]) dst[1] = ptr[1];

  ptr += 2;
  range_list_size -= 2;
  }

PCRE2_ASSERT(dst[1] <= get_highest_char(class_options));

/* When the number of ranges are less than six,
they are not converted to range lists. */

ptr = buffer;
while (ptr < dst && ptr[1] < 0x100) ptr += 2;
if (dst - ptr < (2 * (6 - 1)))
  {
  cranges->range_list_size = (uint16_t)(dst + 2 - buffer);
  return cranges;
  }

/* Compute character lists structures. */

char_list_next = char_list_starts;
char_list_start = *char_list_next++;
#if PCRE2_CODE_UNIT_WIDTH == 32
char_list_end = XCL_CHAR_LIST_HIGH_32_END;
#elif defined SUPPORT_UNICODE
char_list_end = XCL_CHAR_LIST_LOW_32_END;
#else
char_list_end = XCL_CHAR_LIST_HIGH_16_END;
#endif
next_char = (uint16_t*)(buffer + total_size);

tmp1 = 0;
tmp2 = ((sizeof(char_list_starts) / sizeof(uint32_t)) - 1) * XCL_TYPE_BIT_LEN;
PCRE2_ASSERT(tmp2 <= 3 * XCL_TYPE_BIT_LEN && tmp2 >= XCL_TYPE_BIT_LEN);
range_start = dst[0];
range_end = dst[1];

while (TRUE)
  {
  if (range_start >= char_list_start)
    {
    if (range_start == range_end || range_end < char_list_end)
      {
      tmp1++;
      next_char--;

      if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
        *next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
      else
        *(uint32_t*)(--next_char) =
          (range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;
      }

    if (range_start < range_end)
      {
      if (range_start > char_list_start)
        {
        tmp1++;
        next_char--;

        if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
          *next_char = (uint16_t)(range_start << XCL_CHAR_SHIFT);
        else
          *(uint32_t*)(--next_char) = (range_start << XCL_CHAR_SHIFT);
        }
      else
        cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
      }

    PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);

    if (dst > buffer)
      {
      dst -= 2;
      range_start = dst[0];
      range_end = dst[1];
      continue;
      }

    range_start = 0;
    range_end = 0;
    }

  if (range_end >= char_list_start)
    {
    PCRE2_ASSERT(range_start < char_list_start);

    if (range_end < char_list_end)
      {
      tmp1++;
      next_char--;

      if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
        *next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
      else
        *(uint32_t*)(--next_char) =
          (range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;

      PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);
      }

    cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
    }

  if (tmp1 >= XCL_ITEM_COUNT_MASK)
    {
    cranges->char_lists_types |= XCL_ITEM_COUNT_MASK << tmp2;
    next_char--;

    if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
      *next_char = (uint16_t)tmp1;
    else
      *(uint32_t*)(--next_char) = tmp1;
    }
  else
    cranges->char_lists_types |= tmp1 << tmp2;

  if (range_start < XCL_CHAR_LIST_LOW_16_START) break;

  PCRE2_ASSERT(tmp2 >= XCL_TYPE_BIT_LEN);
  char_list_end = char_list_start - 1;
  char_list_start = *char_list_next++;
  tmp1 = 0;
  tmp2 -= XCL_TYPE_BIT_LEN;
  }

if (dst[0] < XCL_CHAR_LIST_LOW_16_START) dst += 2;
PCRE2_ASSERT((uint16_t*)dst <= next_char);

cranges->char_lists_size =
  (size_t)((uint8_t*)(buffer + total_size) - (uint8_t*)next_char);
cranges->char_lists_start = (size_t)((uint8_t*)next_char - (uint8_t*)buffer);
cranges->range_list_size = (uint16_t)(dst - buffer);
return cranges;
}

#endif /* SUPPORT_WIDE_CHARS */

#ifdef SUPPORT_UNICODE

void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
  uint8_t *classbits)
{
/* Update PRIV(xclass) when this function is changed. */
int c, chartype;
const ucd_record *prop;
uint32_t gentype;
BOOL set_bit;

if (ptype == PT_ANY)
  {
  if (!negated) memset(classbits, 0xff, 32);
  return;
  }

for (c = 0; c < 256; c++)
  {
  prop = GET_UCD(c);
  set_bit = FALSE;
  (void)set_bit;

  switch (ptype)
    {
    case PT_LAMP:
    chartype = prop->chartype;
    set_bit = (chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt);
    break;

    case PT_GC:
    set_bit = (PRIV(ucp_gentype)[prop->chartype] == pdata);
    break;

    case PT_PC:
    set_bit = (prop->chartype == pdata);
    break;

    case PT_SC:
    set_bit = (prop->script == pdata);
    break;

    case PT_SCX:
    set_bit = (prop->script == pdata ||
      MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
    break;

    case PT_ALNUM:
    gentype = PRIV(ucp_gentype)[prop->chartype];
    set_bit = (gentype == ucp_L || gentype == ucp_N);
    break;

    case PT_SPACE:    /* Perl space */
    case PT_PXSPACE:  /* POSIX space */
    switch(c)
      {
      HSPACE_BYTE_CASES:
      VSPACE_BYTE_CASES:
      set_bit = TRUE;
      break;

      default:
      set_bit = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z);
      break;
      }
    break;

    case PT_WORD:
    chartype = prop->chartype;
    gentype = PRIV(ucp_gentype)[chartype];
    set_bit = (gentype == ucp_L || gentype == ucp_N ||
               chartype == ucp_Mn || chartype == ucp_Pc);
    break;

    case PT_UCNC:
    set_bit = (c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
               c == CHAR_GRAVE_ACCENT || c >= 0xa0);
    break;

    case PT_BIDICL:
    set_bit = (UCD_BIDICLASS_PROP(prop) == pdata);
    break;

    case PT_BOOL:
    set_bit = MAPBIT(PRIV(ucd_boolprop_sets) +
                     UCD_BPROPS_PROP(prop), pdata) != 0;
    break;

    case PT_PXGRAPH:
    chartype = prop->chartype;
    gentype = PRIV(ucp_gentype)[chartype];
    set_bit = (gentype != ucp_Z && (gentype != ucp_C || chartype == ucp_Cf));
    break;

    case PT_PXPRINT:
    chartype = prop->chartype;
    set_bit = (chartype != ucp_Zl && chartype != ucp_Zp &&
       (PRIV(ucp_gentype)[chartype] != ucp_C || chartype == ucp_Cf));
    break;

    case PT_PXPUNCT:
    gentype = PRIV(ucp_gentype)[prop->chartype];
    set_bit = (gentype == ucp_P || (c < 128 && gentype == ucp_S));
    break;

    default:
    PCRE2_ASSERT(ptype == PT_PXXDIGIT);
    set_bit = (c >= CHAR_0 && c <= CHAR_9) ||
              (c >= CHAR_A && c <= CHAR_F) ||
              (c >= CHAR_a && c <= CHAR_f);
    break;
    }

  if (negated) set_bit = !set_bit;
  if (set_bit) *classbits |= (uint8_t)(1 << (c & 0x7));
  if ((c & 0x7) == 0x7) classbits++;
  }
}

#endif /* SUPPORT_UNICODE */


#ifdef SUPPORT_WIDE_CHARS

/*************************************************
*           XClass related properties            *
*************************************************/

/* XClass needs to be generated. */
#define XCLASS_REQUIRED 0x1
/* XClass has 8 bit character. */
#define XCLASS_HAS_8BIT_CHARS 0x2
/* XClass has properties. */
#define XCLASS_HAS_PROPS 0x4
/* XClass has character lists. */
#define XCLASS_HAS_CHAR_LISTS 0x8
/* XClass matches to all >= 256 characters. */
#define XCLASS_HIGH_ANY 0x10

#endif


/*************************************************
*   Internal entry point for add range to class  *
*************************************************/

/* This function sets the overall range for characters < 256.
It also handles non-utf case folding.

Arguments:
  options       the options bits
  xoptions      the extra options bits
  cb            compile data
  start         start of range character
  end           end of range character

Returns:        cb->classbits is updated
*/

static void
add_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
  uint32_t start, uint32_t end)
{
uint8_t *classbits = cb->classbits.classbits;
uint32_t c, byte_start, byte_end;
uint32_t classbits_end = (end <= 0xff ? end : 0xff);

/* If caseless matching is required, scan the range and process alternate
cases. In Unicode, there are 8-bit characters that have alternate cases that
are greater than 255 and vice-versa (though these may be ignored if caseless
restriction is in force). Sometimes we can just extend the original range. */

if ((options & PCRE2_CASELESS) != 0)
  {
#ifdef SUPPORT_UNICODE
  /* UTF mode. This branch is taken if we don't support wide characters (e.g.
  8-bit library, without UTF), but we do treat those characters as Unicode
  (if UCP flag is set). In this case, we only need to expand the character class
  set to include the case pairs which are in the 0-255 codepoint range. */
  if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
    {
      BOOL turkish_i = (xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) ==
        PCRE2_EXTRA_TURKISH_CASING;
      if (start < 128)
        {
        uint32_t lo_end = (classbits_end < 127 ? classbits_end : 127);
        for (c = start; c <= lo_end; c++)
          {
          if (turkish_i && UCD_ANY_I(c)) continue;
          SETBIT(classbits, cb->fcc[c]);
          }
        }
      if (classbits_end >= 128)
        {
        uint32_t hi_start = (start > 128 ? start : 128);
        for (c = hi_start; c <= classbits_end; c++)
          {
          uint32_t co = UCD_OTHERCASE(c);
          if (co <= 0xff) SETBIT(classbits, co);
          }
        }
    }

  else
#endif  /* SUPPORT_UNICODE */

  /* Not UTF mode */
    {
    for (c = start; c <= classbits_end; c++)
      SETBIT(classbits, cb->fcc[c]);
    }
  }

/* Use the bitmap for characters < 256. Otherwise use extra data. */

byte_start = (start + 7) >> 3;
byte_end = (classbits_end + 1) >> 3;

if (byte_start >= byte_end)
  {
  for (c = start; c <= classbits_end; c++)
    /* Regardless of start, c will always be <= 255. */
    SETBIT(classbits, c);
  return;
  }

for (c = byte_start; c < byte_end; c++)
  classbits[c] = 0xff;

byte_start <<= 3;
byte_end <<= 3;

for (c = start; c < byte_start; c++)
  SETBIT(classbits, c);

for (c = byte_end; c <= classbits_end; c++)
  SETBIT(classbits, c);
}


#if PCRE2_CODE_UNIT_WIDTH == 8
/*************************************************
*   Internal entry point for add list to class   *
*************************************************/

/* This function is used for adding a list of horizontal or vertical whitespace
characters to a class. The list must be in order so that ranges of characters
can be detected and handled appropriately. This function sets the overall range
so that the internal functions can try to avoid duplication when handling
case-independence.

Arguments:
  options       the options bits
  xoptions      the extra options bits
  cb            contains pointers to tables etc.
  p             points to row of 32-bit values, terminated by NOTACHAR

Returns:        cb->classbits is updated
*/

static void
add_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
  const uint32_t *p)
{
while (p[0] < 256)
  {
  unsigned int n = 0;

  while(p[n+1] == p[0] + n + 1) n++;
  add_to_class(options, xoptions, cb, p[0], p[n]);

  p += n + 1;
  }
}


/*************************************************
*    Add characters not in a list to a class     *
*************************************************/

/* This function is used for adding the complement of a list of horizontal or
vertical whitespace to a class. The list must be in order.

Arguments:
  options       the options bits
  xoptions      the extra options bits
  cb            contains pointers to tables etc.
  p             points to row of 32-bit values, terminated by NOTACHAR

Returns:        cb->classbits is updated
*/

static void
add_not_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
  const uint32_t *p)
{
if (p[0] > 0)
  add_to_class(options, xoptions, cb, 0, p[0] - 1);
while (p[0] < 256)
  {
  while (p[1] == p[0] + 1) p++;
  add_to_class(options, xoptions, cb, p[0] + 1, (p[1] > 255) ? 255 : p[1] - 1);
  p++;
  }
}
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */


/*************************************************
*  Main entry-point to compile a character class *
*************************************************/

/* This function consumes a "leaf", which is a set of characters that will
become a single OP_CLASS OP_NCLASS, OP_XCLASS, or OP_ALLANY. */

uint32_t *
PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
  uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
  int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr)
{
uint32_t *pptr = start_ptr;
PCRE2_UCHAR *code = *pcode;
BOOL should_flip_negation;
const uint8_t *cbits = cb->cbits;
/* Some functions such as add_to_class() or eclass processing
expects that the bitset is stored in cb->classbits.classbits. */
uint8_t *const classbits = cb->classbits.classbits;

#ifdef SUPPORT_UNICODE
BOOL utf = (options & PCRE2_UTF) != 0;
#else  /* No Unicode support */
BOOL utf = FALSE;
#endif

/* Helper variables for OP_XCLASS opcode (for characters > 255). */

#ifdef SUPPORT_WIDE_CHARS
uint32_t xclass_props;
PCRE2_UCHAR *class_uchardata;
class_ranges* cranges;
#endif

/* If an XClass contains a negative special such as \S, we need to flip the
negation flag at the end, so that support for characters > 255 works correctly
(they are all included in the class). An XClass may need to insert specific
matching or non-matching code for wide characters.
*/

should_flip_negation = FALSE;

/* XClass will be used when characters > 255 might match. */

#ifdef SUPPORT_WIDE_CHARS
xclass_props = 0;

#if PCRE2_CODE_UNIT_WIDTH == 8
cranges = NULL;

if (utf)
#endif
  {
  if (lengthptr != NULL)
    {
    cranges = compile_optimize_class(pptr, options, xoptions, cb);

    if (cranges == NULL)
      {
      *errorcodeptr = ERR21;
      return NULL;
      }

    /* Caching the pre-processed character ranges. */
    if (cb->next_cranges != NULL)
      cb->next_cranges->next = cranges;
    else
      cb->cranges = cranges;

    cb->next_cranges = cranges;
    }
  else
    {
    /* Reuse the pre-processed character ranges. */
    cranges = cb->cranges;
    PCRE2_ASSERT(cranges != NULL);
    cb->cranges = cranges->next;
    }

  if (cranges->range_list_size > 0)
    {
    const uint32_t *ranges = (const uint32_t*)(cranges + 1);

    if (ranges[0] <= 255)
      xclass_props |= XCLASS_HAS_8BIT_CHARS;

    if (ranges[cranges->range_list_size - 1] == GET_MAX_CHAR_VALUE(utf) &&
        ranges[cranges->range_list_size - 2] <= 256)
      xclass_props |= XCLASS_HIGH_ANY;
    }
  }

class_uchardata = code + LINK_SIZE + 2;   /* For XCLASS items */
#endif /* SUPPORT_WIDE_CHARS */

/* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map
in a temporary bit of memory, in case the class contains fewer than two
8-bit characters because in that case the compiled code doesn't use the bit
map. */

memset(classbits, 0, 32);

/* Process items until end_ptr is reached. */

while (TRUE)
  {
  uint32_t meta = *(pptr++);
  BOOL local_negate;
  int posix_class;
  int taboffset, tabopt;
  class_bits_storage pbits;
  uint32_t escape, c;

  /* Handle POSIX classes such as [:alpha:] etc. */
  switch (META_CODE(meta))
    {
    case META_POSIX:
    case META_POSIX_NEG:

    local_negate = (meta == META_POSIX_NEG);
    posix_class = *(pptr++);

    if (local_negate) should_flip_negation = TRUE;  /* Note negative special */

    /* If matching is caseless, upper and lower are converted to alpha.
    This relies on the fact that the class table starts with alpha,
    lower, upper as the first 3 entries. */

    if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2)
      posix_class = 0;

    /* When PCRE2_UCP is set, some of the POSIX classes are converted to
    different escape sequences that use Unicode properties \p or \P.
    Others that are not available via \p or \P have to generate
    XCL_PROP/XCL_NOTPROP directly, which is done here. */

#ifdef SUPPORT_UNICODE
    /* TODO This entire block of code here appears to be unreachable!? I simply
    can't see how it can be hit, given that the frontend parser doesn't emit
    META_POSIX for GRAPH/PRINT/PUNCT when UCP is set. */
    if ((options & PCRE2_UCP) != 0 &&
        (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0)
      {
      uint32_t ptype;

      switch(posix_class)
        {
        case PC_GRAPH:
        case PC_PRINT:
        case PC_PUNCT:
        ptype = (posix_class == PC_GRAPH)? PT_PXGRAPH :
                (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT;

        PRIV(update_classbits)(ptype, 0, local_negate, classbits);

        if ((xclass_props & XCLASS_HIGH_ANY) == 0)
          {
          if (lengthptr != NULL)
            *lengthptr += 3;
          else
            {
            *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
            *class_uchardata++ = (PCRE2_UCHAR)ptype;
            *class_uchardata++ = 0;
            }
          xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
          }
        continue;

        /* For the other POSIX classes (ex: ascii) we are going to
        fall through to the non-UCP case and build a bit map for
        characters with code points less than 256. However, if we are in
        a negated POSIX class, characters with code points greater than
        255 must either all match or all not match, depending on whether
        the whole class is not or is negated. For example, for
        [[:^ascii:]... they must all match, whereas for [^[:^ascii:]...
        they must not.

        In the special case where there are no xclass items, this is
        automatically handled by the use of OP_CLASS or OP_NCLASS, but an
        explicit range is needed for OP_XCLASS. Setting a flag here
        causes the range to be generated later when it is known that
        OP_XCLASS is required. In the 8-bit library this is relevant only in
        utf mode, since no wide characters can exist otherwise. */

        default:
        break;
        }
      }
#endif  /* SUPPORT_UNICODE */

    /* In the non-UCP case, or when UCP makes no difference, we build the
    bit map for the POSIX class in a chunk of local store because we may
    be adding and subtracting from it, and we don't want to subtract bits
    that may be in the main map already. At the end we or the result into
    the bit map that is being built. */

    posix_class *= 3;

    /* Copy in the first table (always present) */

    memcpy(pbits.classbits, cbits + PRIV(posix_class_maps)[posix_class], 32);

    /* If there is a second table, add or remove it as required. */

    taboffset = PRIV(posix_class_maps)[posix_class + 1];
    tabopt = PRIV(posix_class_maps)[posix_class + 2];

    if (taboffset >= 0)
      {
      if (tabopt >= 0)
        for (int i = 0; i < 32; i++)
          pbits.classbits[i] |= cbits[i + taboffset];
      else
        for (int i = 0; i < 32; i++)
          pbits.classbits[i] &= (uint8_t)(~cbits[i + taboffset]);
      }

    /* Now see if we need to remove any special characters. An option
    value of 1 removes vertical space and 2 removes underscore. */

    if (tabopt < 0) tabopt = -tabopt;
    if (tabopt == 1) pbits.classbits[1] &= ~0x3c;
      else if (tabopt == 2) pbits.classbits[11] &= 0x7f;

    /* Add the POSIX table or its complement into the main table that is
    being built and we are done. */

      {
      uint32_t *classwords = cb->classbits.classwords;

      if (local_negate)
        for (int i = 0; i < 8; i++)
          classwords[i] |= (uint32_t)(~pbits.classwords[i]);
      else
        for (int i = 0; i < 8; i++)
          classwords[i] |= pbits.classwords[i];
      }

#ifdef SUPPORT_WIDE_CHARS
    /* Every class contains at least one < 256 character. */
    xclass_props |= XCLASS_HAS_8BIT_CHARS;
#endif
    continue;               /* End of POSIX handling */

    /* Other than POSIX classes, the only items we should encounter are
    \d-type escapes and literal characters (possibly as ranges). */
    case META_BIGVALUE:
    meta = *(pptr++);
    break;

    case META_ESCAPE:
    escape = META_DATA(meta);

    switch(escape)
      {
      case ESC_d:
      for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
      break;

      case ESC_D:
      should_flip_negation = TRUE;
      for (int i = 0; i < 32; i++)
        classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
      break;

      case ESC_w:
      for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
      break;

      case ESC_W:
      should_flip_negation = TRUE;
      for (int i = 0; i < 32; i++)
        classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
      break;

      /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
      5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
      previously set by something earlier in the character class.
      Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
      we could just adjust the appropriate bit. From PCRE 8.34 we no
      longer treat \s and \S specially. */

      case ESC_s:
      for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
      break;

      case ESC_S:
      should_flip_negation = TRUE;
      for (int i = 0; i < 32; i++)
        classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
      break;

      /* When adding the horizontal or vertical space lists to a class, or
      their complements, disable PCRE2_CASELESS, because it justs wastes
      time, and in the "not-x" UTF cases can create unwanted duplicates in
      the XCLASS list (provoked by characters that have more than one other
      case and by both cases being in the same "not-x" sublist). */

      case ESC_h:
#if PCRE2_CODE_UNIT_WIDTH == 8
#ifdef SUPPORT_UNICODE
      if (cranges != NULL) break;
#endif
      add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
        cb, PRIV(hspace_list));
#else
      PCRE2_ASSERT(cranges != NULL);
#endif
      break;

      case ESC_H:
#if PCRE2_CODE_UNIT_WIDTH == 8
#ifdef SUPPORT_UNICODE
      if (cranges != NULL) break;
#endif
      add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
        cb, PRIV(hspace_list));
#else
      PCRE2_ASSERT(cranges != NULL);
#endif
      break;

      case ESC_v:
#if PCRE2_CODE_UNIT_WIDTH == 8
#ifdef SUPPORT_UNICODE
      if (cranges != NULL) break;
#endif
      add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
        cb, PRIV(vspace_list));
#else
      PCRE2_ASSERT(cranges != NULL);
#endif
      break;

      case ESC_V:
#if PCRE2_CODE_UNIT_WIDTH == 8
#ifdef SUPPORT_UNICODE
      if (cranges != NULL) break;
#endif
      add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
        cb, PRIV(vspace_list));
#else
      PCRE2_ASSERT(cranges != NULL);
#endif
      break;

      /* If Unicode is not supported, \P and \p are not allowed and are
      faulted at parse time, so will never appear here. */

#ifdef SUPPORT_UNICODE
      case ESC_p:
      case ESC_P:
        {
        uint32_t ptype = *pptr >> 16;
        uint32_t pdata = *(pptr++) & 0xffff;

        /* The "Any" is processed by PRIV(update_classbits)(). */
        if (ptype == PT_ANY)
          {
#if PCRE2_CODE_UNIT_WIDTH == 8
          if (!utf && escape == ESC_p) memset(classbits, 0xff, 32);
#endif
          continue;
          }

        PRIV(update_classbits)(ptype, pdata, (escape == ESC_P), classbits);

        if ((xclass_props & XCLASS_HIGH_ANY) == 0)
          {
          if (lengthptr != NULL)
            *lengthptr += 3;
          else
            {
            *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP;
            *class_uchardata++ = ptype;
            *class_uchardata++ = pdata;
            }
          xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
          }
        }
      continue;
#endif
      }

#ifdef SUPPORT_WIDE_CHARS
    /* Every non-property class contains at least one < 256 character. */
    xclass_props |= XCLASS_HAS_8BIT_CHARS;
#endif
    /* End handling \d-type escapes */
    continue;

    CLASS_END_CASES(meta)
    /* Literals. */
    if (meta < META_END) break;
    /* Non-literals: end of class contents. */
    goto END_PROCESSING;
    }

  /* A literal character may be followed by a range meta. At parse time
  there are checks for out-of-order characters, for ranges where the two
  characters are equal, and for hyphens that cannot indicate a range. At
  this point, therefore, no checking is needed. */

  c = meta;

  /* Remember if \r or \n were explicitly used */

  if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;

  /* Process a character range */

  if (*pptr == META_RANGE_LITERAL || *pptr == META_RANGE_ESCAPED)
    {
    uint32_t d;

#ifdef EBCDIC
    BOOL range_is_literal = (*pptr == META_RANGE_LITERAL);
#endif
    ++pptr;
    d = *(pptr++);
    if (d == META_BIGVALUE) d = *(pptr++);

    /* Remember an explicit \r or \n, and add the range to the class. */

    if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;

#if PCRE2_CODE_UNIT_WIDTH == 8
#ifdef SUPPORT_UNICODE
    if (cranges != NULL) continue;
    xclass_props |= XCLASS_HAS_8BIT_CHARS;
#endif

    /* In an EBCDIC environment, Perl treats alphabetic ranges specially
    because there are holes in the encoding, and simply using the range
    A-Z (for example) would include the characters in the holes. This
    applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */

#ifdef EBCDIC
    if (range_is_literal &&
         (cb->ctypes[c] & ctype_letter) != 0 &&
         (cb->ctypes[d] & ctype_letter) != 0 &&
         (c <= CHAR_z) == (d <= CHAR_z))
      {
      uint32_t uc = (d <= CHAR_z)? 0 : 64;
      uint32_t C = c - uc;
      uint32_t D = d - uc;

      if (C <= CHAR_i)
        {
        add_to_class(options, xoptions, cb, C + uc,
          ((D < CHAR_i)? D : CHAR_i) + uc);
        C = CHAR_j;
        }

      if (C <= D && C <= CHAR_r)
        {
        add_to_class(options, xoptions, cb, C + uc,
          ((D < CHAR_r)? D : CHAR_r) + uc);
        C = CHAR_s;
        }

      if (C <= D)
        add_to_class(options, xoptions, cb, C + uc, D + uc);
      }
    else
#endif
    /* Not an EBCDIC special range */

    add_to_class(options, xoptions, cb, c, d);
#else
    PCRE2_ASSERT(cranges != NULL);
#endif
    continue;
    }  /* End of range handling */

  /* Character ranges are ignored when class_ranges is present. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#ifdef SUPPORT_UNICODE
  if (cranges != NULL) continue;
  xclass_props |= XCLASS_HAS_8BIT_CHARS;
#endif
  /* Handle a single character. */

  add_to_class(options, xoptions, cb, meta, meta);
#else
  PCRE2_ASSERT(cranges != NULL);
#endif
  }   /* End of main class-processing loop */

END_PROCESSING:

#ifdef SUPPORT_WIDE_CHARS
PCRE2_ASSERT((xclass_props & XCLASS_HAS_PROPS) == 0 ||
             (xclass_props & XCLASS_HIGH_ANY) == 0);

if (cranges != NULL)
  {
  uint32_t *range = (uint32_t*)(cranges + 1);
  uint32_t *end = range + cranges->range_list_size;

  while (range < end && range[0] < 256)
    {
    PCRE2_ASSERT((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0);
    /* Add range to bitset. If we are in UTF or UCP mode, then clear the
    caseless bit, because the cranges handle caselessness (only) in this
    condition; see the condition for PARSE_CLASS_CASELESS_UTF in
    compile_optimize_class(). */
    add_to_class(((options & (PCRE2_UTF|PCRE2_UCP)) != 0)?
        (options & ~PCRE2_CASELESS) : options, xoptions, cb, range[0], range[1]);

    if (range[1] > 255) break;
    range += 2;
    }

  if (cranges->char_lists_size > 0)
    {
    /* The cranges structure is still used and freed later. */
    PCRE2_ASSERT((xclass_props & XCLASS_HIGH_ANY) == 0);
    xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_CHAR_LISTS;
    }
  else
    {
    if ((xclass_props & XCLASS_HIGH_ANY) != 0)
      {
      PCRE2_ASSERT(range + 2 == end && range[0] <= 256 &&
        range[1] >= GET_MAX_CHAR_VALUE(utf));
      should_flip_negation = TRUE;
      range = end;
      }

    while (range < end)
      {
      uint32_t range_start = range[0];
      uint32_t range_end = range[1];

      range += 2;
      xclass_props |= XCLASS_REQUIRED;

      if (range_start < 256) range_start = 256;

      if (lengthptr != NULL)
        {
#ifdef SUPPORT_UNICODE
        if (utf)
          {
          *lengthptr += 1;

          if (range_start < range_end)
            *lengthptr += PRIV(ord2utf)(range_start, class_uchardata);

          *lengthptr += PRIV(ord2utf)(range_end, class_uchardata);
          continue;
          }
#endif  /* SUPPORT_UNICODE */

        *lengthptr += range_start < range_end ? 3 : 2;
        continue;
        }

#ifdef SUPPORT_UNICODE
      if (utf)
        {
        if (range_start < range_end)
          {
          *class_uchardata++ = XCL_RANGE;
          class_uchardata += PRIV(ord2utf)(range_start, class_uchardata);
          }
        else
          *class_uchardata++ = XCL_SINGLE;

        class_uchardata += PRIV(ord2utf)(range_end, class_uchardata);
        continue;
        }
#endif  /* SUPPORT_UNICODE */

      /* Without UTF support, character values are constrained
      by the bit length, and can only be > 256 for 16-bit and
      32-bit libraries. */
#if PCRE2_CODE_UNIT_WIDTH != 8
      if (range_start < range_end)
        {
        *class_uchardata++ = XCL_RANGE;
        *class_uchardata++ = range_start;
        }
      else
        *class_uchardata++ = XCL_SINGLE;

      *class_uchardata++ = range_end;
#endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
      }

    if (lengthptr == NULL)
      cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
    }
  }
#endif /* SUPPORT_WIDE_CHARS */

/* If there are characters with values > 255, or Unicode property settings
(\p or \P), we have to compile an extended class, with its own opcode,
unless there were no property settings and there was a negated special such
as \S in the class, and PCRE2_UCP is not set, because in that case all
characters > 255 are in or not in the class, so any that were explicitly
given as well can be ignored.

In the UCP case, if certain negated POSIX classes (ex: [:^ascii:]) were
were present in a class, we either have to match or not match all wide
characters (depending on whether the whole class is or is not negated).
This requirement is indicated by match_all_or_no_wide_chars being true.
We do this by including an explicit range, which works in both cases.
This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there
cannot be any wide characters in 8-bit non-UTF mode.

When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit
class where \S etc is present without PCRE2_UCP, causing an extended class
to be compiled, we make sure that all characters > 255 are included by
forcing match_all_or_no_wide_chars to be true.

If, when generating an xclass, there are no characters < 256, we can omit
the bitmap in the actual compiled code. */

#ifdef SUPPORT_WIDE_CHARS  /* Defined for 16/32 bits, or 8-bit with Unicode */
if ((xclass_props & XCLASS_REQUIRED) != 0)
  {
  PCRE2_UCHAR *previous = code;

  if ((xclass_props & XCLASS_HAS_CHAR_LISTS) == 0)
    *class_uchardata++ = XCL_END;    /* Marks the end of extra data */
  *code++ = OP_XCLASS;
  code += LINK_SIZE;
  *code = negate_class? XCL_NOT:0;
  if ((xclass_props & XCLASS_HAS_PROPS) != 0) *code |= XCL_HASPROP;

  /* If the map is required, move up the extra data to make room for it;
  otherwise just move the code pointer to the end of the extra data. */

  if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0 || has_bitmap != NULL)
    {
    if (negate_class)
      {
      uint32_t *classwords = cb->classbits.classwords;
      for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
      }

    if (has_bitmap == NULL)
      {
      *code++ |= XCL_MAP;
      (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
        CU2BYTES(class_uchardata - code));
      memcpy(code, classbits, 32);
      code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
      }
    else
      {
      code = class_uchardata;
      if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0)
        *has_bitmap = TRUE;
      }
    }
  else code = class_uchardata;

  if ((xclass_props & XCLASS_HAS_CHAR_LISTS) != 0)
    {
    /* Char lists size is an even number, because all items are 16 or 32
    bit values. The character list data is always aligned to 32 bits. */
    size_t char_lists_size = cranges->char_lists_size;
    PCRE2_ASSERT((char_lists_size & 0x1) == 0 &&
                 (cb->char_lists_size & 0x3) == 0);

    if (lengthptr != NULL)
      {
      char_lists_size = CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));

#if PCRE2_CODE_UNIT_WIDTH == 8
      *lengthptr += 2 + LINK_SIZE;
#else
      *lengthptr += 1 + LINK_SIZE;
#endif

      cb->char_lists_size += char_lists_size;

      char_lists_size /= sizeof(PCRE2_UCHAR);

      /* Storage space for character lists is included
      in the maximum pattern size. */
      if (*lengthptr > MAX_PATTERN_SIZE ||
          MAX_PATTERN_SIZE - *lengthptr < char_lists_size)
        {
        *errorcodeptr = ERR20;   /* Pattern is too large */
        return NULL;
        }
      }
    else
      {
      uint8_t *data;

      PCRE2_ASSERT(cranges->char_lists_types <= XCL_TYPE_MASK);
#if PCRE2_CODE_UNIT_WIDTH == 8
      /* Encode as high / low bytes. */
      code[0] = (uint8_t)(XCL_LIST |
        (cranges->char_lists_types >> 8));
      code[1] = (uint8_t)cranges->char_lists_types;
      code += 2;
#else
      *code++ = (PCRE2_UCHAR)(XCL_LIST | cranges->char_lists_types);
#endif

      /* Character lists are stored in backwards direction from
      byte code start. The non-dfa/dfa matchers can access these
      lists using the byte code start stored in match blocks.
      Each list is aligned to 32 bit with an optional unused
      16 bit value at the beginning of the character list. */

      cb->char_lists_size += char_lists_size;
      data = (uint8_t*)cb->start_code - cb->char_lists_size;

      memcpy(data, (uint8_t*)(cranges + 1) + cranges->char_lists_start,
        char_lists_size);

      /* Since character lists total size is less than MAX_PATTERN_SIZE,
      their starting offset fits into a value which size is LINK_SIZE. */

      char_lists_size = cb->char_lists_size;
      PUT(code, 0, (uint32_t)(char_lists_size >> 1));
      code += LINK_SIZE;

#if defined PCRE2_DEBUG || defined SUPPORT_VALGRIND
      if ((char_lists_size & 0x2) != 0)
        {
        /* In debug the unused 16 bit value is set
        to a fixed value and marked unused. */
        ((uint16_t*)data)[-1] = 0x5555;
#ifdef SUPPORT_VALGRIND
        VALGRIND_MAKE_MEM_NOACCESS(data - 2, 2);
#endif
        }
#endif

      cb->char_lists_size =
        CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));

      cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
      }
    }

  /* Now fill in the complete length of the item */

  PUT(previous, 1, (int)(code - previous));
  goto DONE;   /* End of class handling */
  }
#endif  /* SUPPORT_WIDE_CHARS */

/* If there are no characters > 255, or they are all to be included or
excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
whole class was negated and whether there were negative specials such as \S
(non-UCP) in the class. Then copy the 32-byte map into the code vector,
negating it if necessary. */

if (negate_class)
  {
  uint32_t *classwords = cb->classbits.classwords;

  for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
  }

if ((SELECT_VALUE8(!utf, 0) || negate_class != should_flip_negation) &&
    cb->classbits.classwords[0] == ~(uint32_t)0)
  {
  const uint32_t *classwords = cb->classbits.classwords;
  int i;

  for (i = 0; i < 8; i++)
    if (classwords[i] != ~(uint32_t)0) break;

  if (i == 8)
    {
    *code++ = OP_ALLANY;
    goto DONE;   /* End of class handling */
    }
  }

*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
memcpy(code, classbits, 32);
code += 32 / sizeof(PCRE2_UCHAR);

DONE:
*pcode = code;
return pptr - 1;
}


/* ===================================================================*/
/* Here follows a block of ECLASS-compiling functions. You may well want to
read them from top to bottom; they are ordered from leafmost (at the top) to
outermost parser (at the bottom of the file). */

/* This function folds one operand using the negation operator.
The new, combined chunk of stack code is written out to *pop_info. */

static void
fold_negation(eclass_op_info *pop_info, PCRE2_SIZE *lengthptr,
  BOOL preserve_classbits)
{
/* If the chunk of stack code is already composed of multiple ops, we won't
descend in and try and propagate the negation down the tree. (That would lead
to O(n^2) compile-time, which could be exploitable with a malicious regex -
although maybe that's not really too much of a worry in a library that offers
an exponential-time matching function!) */

if (pop_info->op_single_type == 0)
  {
  if (lengthptr != NULL)
    *lengthptr += 1;
  else
    pop_info->code_start[pop_info->length] = ECL_NOT;
  pop_info->length += 1;
  }

/* Otherwise, it's a nice single-op item, so we can easily fold in the negation
without needing to produce an ECL_NOT. */

else if (pop_info->op_single_type == ECL_ANY ||
         pop_info->op_single_type == ECL_NONE)
  {
  pop_info->op_single_type = (pop_info->op_single_type == ECL_NONE)?
      ECL_ANY : ECL_NONE;
  if (lengthptr == NULL)
    *(pop_info->code_start) = pop_info->op_single_type;
  }
else
  {
  PCRE2_ASSERT(pop_info->op_single_type == ECL_XCLASS &&
               pop_info->length >= 1 + LINK_SIZE + 1);
  if (lengthptr == NULL)
    pop_info->code_start[1 + LINK_SIZE] ^= XCL_NOT;
  }

if (!preserve_classbits)
  {
  for (int i = 0; i < 8; i++)
    pop_info->bits.classwords[i] = ~pop_info->bits.classwords[i];
  }
}


/* This function folds together two operands using a binary operator.
The new, combined chunk of stack code is written out to *lhs_op_info. */

static void
fold_binary(int op, eclass_op_info *lhs_op_info, eclass_op_info *rhs_op_info,
  PCRE2_SIZE *lengthptr)
{
switch (op)
  {
  /* ECL_AND truth table:

     LHS  RHS  RESULT
     ----------------
     ANY  *    RHS
     *    ANY  LHS
     NONE *    NONE
     *    NONE NONE
     X    Y    X & Y
  */

  case ECL_AND:
  if (rhs_op_info->op_single_type == ECL_ANY)
    {
    /* no-op: drop the RHS */
    }
  else if (lhs_op_info->op_single_type == ECL_ANY)
    {
    /* no-op: drop the LHS, and memmove the RHS into its place */
    if (lengthptr == NULL)
      memmove(lhs_op_info->code_start, rhs_op_info->code_start,
              CU2BYTES(rhs_op_info->length));
    lhs_op_info->length = rhs_op_info->length;
    lhs_op_info->op_single_type = rhs_op_info->op_single_type;
    }
  else if (rhs_op_info->op_single_type == ECL_NONE)
    {
    /* the result is ECL_NONE: write into the LHS */
    if (lengthptr == NULL)
      lhs_op_info->code_start[0] = ECL_NONE;
    lhs_op_info->length = 1;
    lhs_op_info->op_single_type = ECL_NONE;
    }
  else if (lhs_op_info->op_single_type == ECL_NONE)
    {
    /* the result is ECL_NONE: drop the RHS */
    }
  else
    {
    /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
    if (lengthptr != NULL)
      *lengthptr += 1;
    else
      {
      PCRE2_ASSERT(rhs_op_info->code_start ==
          lhs_op_info->code_start + lhs_op_info->length);
      rhs_op_info->code_start[rhs_op_info->length] = ECL_AND;
      }
    lhs_op_info->length += rhs_op_info->length + 1;
    lhs_op_info->op_single_type = 0;
    }

  for (int i = 0; i < 8; i++)
    lhs_op_info->bits.classwords[i] &= rhs_op_info->bits.classwords[i];
  break;

  /* ECL_OR truth table:

     LHS  RHS  RESULT
     ----------------
     ANY  *    ANY
     *    ANY  ANY
     NONE *    RHS
     *    NONE LHS
     X    Y    X | Y
  */

  case ECL_OR:
  if (rhs_op_info->op_single_type == ECL_NONE)
    {
    /* no-op: drop the RHS */
    }
  else if (lhs_op_info->op_single_type == ECL_NONE)
    {
    /* no-op: drop the LHS, and memmove the RHS into its place */
    if (lengthptr == NULL)
      memmove(lhs_op_info->code_start, rhs_op_info->code_start,
              CU2BYTES(rhs_op_info->length));
    lhs_op_info->length = rhs_op_info->length;
    lhs_op_info->op_single_type = rhs_op_info->op_single_type;
    }
  else if (rhs_op_info->op_single_type == ECL_ANY)
    {
    /* the result is ECL_ANY: write into the LHS */
    if (lengthptr == NULL)
      lhs_op_info->code_start[0] = ECL_ANY;
    lhs_op_info->length = 1;
    lhs_op_info->op_single_type = ECL_ANY;
    }
  else if (lhs_op_info->op_single_type == ECL_ANY)
    {
    /* the result is ECL_ANY: drop the RHS */
    }
  else
    {
    /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
    if (lengthptr != NULL)
      *lengthptr += 1;
    else
      {
      PCRE2_ASSERT(rhs_op_info->code_start ==
          lhs_op_info->code_start + lhs_op_info->length);
      rhs_op_info->code_start[rhs_op_info->length] = ECL_OR;
      }
    lhs_op_info->length += rhs_op_info->length + 1;
    lhs_op_info->op_single_type = 0;
    }

  for (int i = 0; i < 8; i++)
    lhs_op_info->bits.classwords[i] |= rhs_op_info->bits.classwords[i];
  break;

  /* ECL_XOR truth table:

     LHS  RHS  RESULT
     ----------------
     ANY  *    !RHS
     *    ANY  !LHS
     NONE *    RHS
     *    NONE LHS
     X    Y    X ^ Y
  */

  case ECL_XOR:
  if (rhs_op_info->op_single_type == ECL_NONE)
    {
    /* no-op: drop the RHS */
    }
  else if (lhs_op_info->op_single_type == ECL_NONE)
    {
    /* no-op: drop the LHS, and memmove the RHS into its place */
    if (lengthptr == NULL)
      memmove(lhs_op_info->code_start, rhs_op_info->code_start,
              CU2BYTES(rhs_op_info->length));
    lhs_op_info->length = rhs_op_info->length;
    lhs_op_info->op_single_type = rhs_op_info->op_single_type;
    }
  else if (rhs_op_info->op_single_type == ECL_ANY)
    {
    /* the result is !LHS: fold in the negation, and drop the RHS */
    /* Preserve the classbits, because we promise to deal with them later. */
    fold_negation(lhs_op_info, lengthptr, TRUE);
    }
  else if (lhs_op_info->op_single_type == ECL_ANY)
    {
    /* the result is !RHS: drop the LHS, memmove the RHS into its place, and
    fold in the negation */
    if (lengthptr == NULL)
      memmove(lhs_op_info->code_start, rhs_op_info->code_start,
              CU2BYTES(rhs_op_info->length));
    lhs_op_info->length = rhs_op_info->length;
    lhs_op_info->op_single_type = rhs_op_info->op_single_type;

    /* Preserve the classbits, because we promise to deal with them later. */
    fold_negation(lhs_op_info, lengthptr, TRUE);
    }
  else
    {
    /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
    if (lengthptr != NULL)
      *lengthptr += 1;
    else
      {
      PCRE2_ASSERT(rhs_op_info->code_start ==
          lhs_op_info->code_start + lhs_op_info->length);
      rhs_op_info->code_start[rhs_op_info->length] = ECL_XOR;
      }
    lhs_op_info->length += rhs_op_info->length + 1;
    lhs_op_info->op_single_type = 0;
    }

  for (int i = 0; i < 8; i++)
    lhs_op_info->bits.classwords[i] ^= rhs_op_info->bits.classwords[i];
  break;

  default:
  PCRE2_DEBUG_UNREACHABLE();
  break;
  }
}


static BOOL
compile_eclass_nested(eclass_context *context, BOOL negated,
  uint32_t **pptr, PCRE2_UCHAR **pcode,
  eclass_op_info *pop_info, PCRE2_SIZE *lengthptr);

/* This function consumes a group of implicitly-unioned class elements.
These can be characters, ranges, properties, or nested classes, as long
as they are all joined by being placed adjacently. */

static BOOL
compile_class_operand(eclass_context *context, BOOL negated,
  uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
  PCRE2_SIZE *lengthptr)
{
uint32_t *ptr = *pptr;
uint32_t *prev_ptr;
PCRE2_UCHAR *code = *pcode;
PCRE2_UCHAR *code_start = code;
PCRE2_SIZE prev_length = (lengthptr != NULL)? *lengthptr : 0;
PCRE2_SIZE extra_length;
uint32_t meta = META_CODE(*ptr);

switch (meta)
  {
  case META_CLASS_EMPTY_NOT:
  case META_CLASS_EMPTY:
  ++ptr;
  pop_info->length = 1;
  if ((meta == META_CLASS_EMPTY) == negated)
    {
    *code++ = pop_info->op_single_type = ECL_ANY;
    memset(pop_info->bits.classbits, 0xff, 32);
    }
  else
    {
    *code++ = pop_info->op_single_type = ECL_NONE;
    memset(pop_info->bits.classbits, 0, 32);
    }
  break;

  case META_CLASS:
  case META_CLASS_NOT:
  if ((*ptr & CLASS_IS_ECLASS) != 0)
    {
    if (!compile_eclass_nested(context, negated, &ptr, &code,
                               pop_info, lengthptr))
      return FALSE;

    PCRE2_ASSERT(*ptr == META_CLASS_END);
    ptr++;
    goto DONE;
    }

  ptr++;
  /* Fall through */

  default:
  /* Scan forward characters, ranges, and properties.
  For example: inside [a-z_ -- m] we don't have brackets around "a-z_" but
  we still need to collect that fragment up into a "leaf" OP_CLASS. */

  prev_ptr = ptr;
  ptr = PRIV(compile_class_not_nested)(
    context->options, context->xoptions, ptr, &code,
    (meta != META_CLASS_NOT) == negated, &context->needs_bitmap,
    context->errorcodeptr, context->cb, lengthptr);
  if (ptr == NULL) return FALSE;

  /* We must have a 100% guarantee that ptr increases when
  compile_class_operand() returns, even on Release builds, so that we can
  statically prove our loops terminate. */
  if (ptr <= prev_ptr)
    {
    PCRE2_DEBUG_UNREACHABLE();
    return FALSE;
    }

  /* If we fell through above, consume the closing ']'. */
  if (meta == META_CLASS || meta == META_CLASS_NOT)
    {
    PCRE2_ASSERT(*ptr == META_CLASS_END);
    ptr++;
    }

  /* Regardless of whether (lengthptr == NULL), some data will still be written
  out to *pcode, which we need: we have to peek at it, to transform the opcode
  into the ECLASS version (since we need to hoist up the bitmaps). */
  PCRE2_ASSERT(code > code_start);
  extra_length = (lengthptr != NULL)? *lengthptr - prev_length : 0;

  /* Easiest case: convert OP_ALLANY to ECL_ANY */

  if (*code_start == OP_ALLANY)
    {
    PCRE2_ASSERT(code - code_start == 1 && extra_length == 0);
    pop_info->length = 1;
    *code_start = pop_info->op_single_type = ECL_ANY;
    memset(pop_info->bits.classbits, 0xff, 32);
    }

  /* For OP_CLASS and OP_NCLASS, we hoist out the bitmap and convert to
  ECL_NONE / ECL_ANY respectively. */

  else if (*code_start == OP_CLASS || *code_start == OP_NCLASS)
    {
    PCRE2_ASSERT(code - code_start == 1 + 32 / sizeof(PCRE2_UCHAR) &&
                 extra_length == 0);
    pop_info->length = 1;
    *code_start = pop_info->op_single_type =
        (*code_start == OP_CLASS)? ECL_NONE : ECL_ANY;
    memcpy(pop_info->bits.classbits, code_start + 1, 32);
    /* Rewind the code pointer, but make sure we adjust *lengthptr, because we
    do need to reserve that space (even though we only use it temporarily). */
    if (lengthptr != NULL)
      *lengthptr += code - (code_start + 1);
    code = code_start + 1;

    if (!context->needs_bitmap && *code_start == ECL_NONE)
      {
      uint32_t *classwords = pop_info->bits.classwords;

      for (int i = 0; i < 8; i++)
        if (classwords[i] != 0)
          {
          context->needs_bitmap = TRUE;
          break;
          }
      }
    else
      context->needs_bitmap = TRUE;
    }

  /* Finally, for OP_XCLASS we hoist out the bitmap (if any), and convert to
  ECL_XCLASS. */

  else
    {
    PCRE2_ASSERT(*code_start == OP_XCLASS);
    *code_start = pop_info->op_single_type = ECL_XCLASS;

    PCRE2_ASSERT(code - code_start >= 1 + LINK_SIZE + 1);

    memcpy(pop_info->bits.classbits, context->cb->classbits.classbits, 32);
    pop_info->length = (code - code_start) + extra_length;
    }

  break;
  }  /* End of switch(meta) */

pop_info->code_start = (lengthptr == NULL)? code_start : NULL;

if (lengthptr != NULL)
  {
  *lengthptr += code - code_start;
  code = code_start;
  }

DONE:
PCRE2_ASSERT(lengthptr == NULL || (code == code_start));

*pptr = ptr;
*pcode = code;
return TRUE;
}


/* This function consumes a group of implicitly-unioned class elements.
These can be characters, ranges, properties, or nested classes, as long
as they are all joined by being placed adjacently. */

static BOOL
compile_class_juxtaposition(eclass_context *context, BOOL negated,
  uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
  PCRE2_SIZE *lengthptr)
{
uint32_t *ptr = *pptr;
PCRE2_UCHAR *code = *pcode;
#ifdef PCRE2_DEBUG
PCRE2_UCHAR *start_code = *pcode;
#endif

/* See compile_class_binary_loose() for comments on compile-time folding of
the "negated" flag. */

/* Because it's a non-empty class, there must be an operand at the start. */
if (!compile_class_operand(context, negated, &ptr, &code, pop_info, lengthptr))
  return FALSE;

while (*ptr != META_CLASS_END &&
       !(*ptr >= META_ECLASS_AND && *ptr <= META_ECLASS_NOT))
  {
  uint32_t op;
  BOOL rhs_negated;
  eclass_op_info rhs_op_info;

  if (negated)
    {
    /* !(A juxtapose B)  ->  !A && !B */
    op = ECL_AND;
    rhs_negated = TRUE;
    }
  else
    {
    /* A juxtapose B  ->  A || B */
    op = ECL_OR;
    rhs_negated = FALSE;
    }

  /* An operand must follow the operator. */
  if (!compile_class_operand(context, rhs_negated, &ptr, &code,
                             &rhs_op_info, lengthptr))
    return FALSE;

  /* Convert infix to postfix (RPN). */
  fold_binary(op, pop_info, &rhs_op_info, lengthptr);
  if (lengthptr == NULL)
    code = pop_info->code_start + pop_info->length;
  }

PCRE2_ASSERT(lengthptr == NULL || code == start_code);

*pptr = ptr;
*pcode = code;
return TRUE;
}


/* This function consumes unary prefix operators. */

static BOOL
compile_class_unary(eclass_context *context, BOOL negated,
  uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
  PCRE2_SIZE *lengthptr)
{
uint32_t *ptr = *pptr;
#ifdef PCRE2_DEBUG
PCRE2_UCHAR *start_code = *pcode;
#endif

while (*ptr == META_ECLASS_NOT)
  {
  ++ptr;
  negated = !negated;
  }

*pptr = ptr;
/* Because it's a non-empty class, there must be an operand. */
if (!compile_class_juxtaposition(context, negated, pptr, pcode,
                                 pop_info, lengthptr))
  return FALSE;

PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
return TRUE;
}


/* This function consumes tightly-binding binary operators. */

static BOOL
compile_class_binary_tight(eclass_context *context, BOOL negated,
  uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
  PCRE2_SIZE *lengthptr)
{
uint32_t *ptr = *pptr;
PCRE2_UCHAR *code = *pcode;
#ifdef PCRE2_DEBUG
PCRE2_UCHAR *start_code = *pcode;
#endif

/* See compile_class_binary_loose() for comments on compile-time folding of
the "negated" flag. */

/* Because it's a non-empty class, there must be an operand at the start. */
if (!compile_class_unary(context, negated, &ptr, &code, pop_info, lengthptr))
  return FALSE;

while (*ptr == META_ECLASS_AND)
  {
  uint32_t op;
  BOOL rhs_negated;
  eclass_op_info rhs_op_info;

  if (negated)
    {
    /* !(A && B)  ->  !A || !B */
    op = ECL_OR;
    rhs_negated = TRUE;
    }
  else
    {
    /* A && B  ->  A && B */
    op = ECL_AND;
    rhs_negated = FALSE;
    }

  ++ptr;

  /* An operand must follow the operator. */
  if (!compile_class_unary(context, rhs_negated, &ptr, &code,
                           &rhs_op_info, lengthptr))
    return FALSE;

  /* Convert infix to postfix (RPN). */
  fold_binary(op, pop_info, &rhs_op_info, lengthptr);
  if (lengthptr == NULL)
    code = pop_info->code_start + pop_info->length;
  }

PCRE2_ASSERT(lengthptr == NULL || code == start_code);

*pptr = ptr;
*pcode = code;
return TRUE;
}


/* This function consumes loosely-binding binary operators. */

static BOOL
compile_class_binary_loose(eclass_context *context, BOOL negated,
  uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
  PCRE2_SIZE *lengthptr)
{
uint32_t *ptr = *pptr;
PCRE2_UCHAR *code = *pcode;
#ifdef PCRE2_DEBUG
PCRE2_UCHAR *start_code = *pcode;
#endif

/* We really want to fold the negation operator, if at all possible, so that
simple cases can be reduced down. In particular, in 8-bit no-UTF mode, we want
to produce a fully-folded expression, so that we can guarantee not to emit any
OP_ECLASS codes (in the same way that we never emit OP_XCLASS in this mode).

This has the consequence that with a little ingenuity, we can in fact avoid
emitting (nearly...) all cases of the "NOT" operator. Imagine that we have:
    !(A ...
We have parsed the preceding "!", and we are about to parse the "A" operand. We
don't know yet whether there will even be a following binary operand! Both of
these are possibilities for what follows:
    !(A && B)
    !(A)
However, we can still fold the "!" into the "A" operand, because no matter what
the following binary operator will be, we can produce an expression which is
equivalent. */

/* Because it's a non-empty class, there must be an operand at the start. */
if (!compile_class_binary_tight(context, negated, &ptr, &code,
                                pop_info, lengthptr))
  return FALSE;

while (*ptr >= META_ECLASS_OR && *ptr <= META_ECLASS_XOR)
  {
  uint32_t op;
  BOOL op_neg;
  BOOL rhs_negated;
  eclass_op_info rhs_op_info;

  if (negated)
    {
    /* The whole expression is being negated; we respond by unconditionally
    negating the LHS A, before seeing what follows. And hooray! We can recover,
    no matter what follows. */
    /* !(A || B)   ->  !A && !B                     */
    /* !(A -- B)   ->  !(A && !B)    ->  !A || B    */
    /* !(A XOR B)  ->  !(!A XOR !B)  ->  !A XNOR !B */
    op = (*ptr == META_ECLASS_OR )? ECL_AND :
         (*ptr == META_ECLASS_SUB)? ECL_OR  :
         /*ptr == META_ECLASS_XOR*/ ECL_XOR;
    op_neg = (*ptr == META_ECLASS_XOR);
    rhs_negated = *ptr != META_ECLASS_SUB;
    }
  else
    {
    /* A || B   ->  A || B  */
    /* A -- B   ->  A && !B */
    /* A XOR B  ->  A XOR B */
    op = (*ptr == META_ECLASS_OR )? ECL_OR  :
         (*ptr == META_ECLASS_SUB)? ECL_AND :
         /*ptr == META_ECLASS_XOR*/ ECL_XOR;
    op_neg = FALSE;
    rhs_negated = *ptr == META_ECLASS_SUB;
    }

  ++ptr;

  /* An operand must follow the operator. */
  if (!compile_class_binary_tight(context, rhs_negated, &ptr, &code,
                                  &rhs_op_info, lengthptr))
    return FALSE;

  /* Convert infix to postfix (RPN). */
  fold_binary(op, pop_info, &rhs_op_info, lengthptr);
  if (op_neg) fold_negation(pop_info, lengthptr, FALSE);
  if (lengthptr == NULL)
    code = pop_info->code_start + pop_info->length;
  }

PCRE2_ASSERT(lengthptr == NULL || code == start_code);

*pptr = ptr;
*pcode = code;
return TRUE;
}


/* This function converts the META codes in pptr into opcodes written to
pcode. The pptr must start at a META_CLASS or META_CLASS_NOT.

The class is compiled as a left-associative sequence of operator
applications.

The pptr will be left pointing at the matching META_CLASS_END. */

static BOOL
compile_eclass_nested(eclass_context *context, BOOL negated,
  uint32_t **pptr, PCRE2_UCHAR **pcode,
  eclass_op_info *pop_info, PCRE2_SIZE *lengthptr)
{
uint32_t *ptr = *pptr;
#ifdef PCRE2_DEBUG
PCRE2_UCHAR *start_code = *pcode;
#endif

/* The CLASS_IS_ECLASS bit must be set since it is a nested class. */
PCRE2_ASSERT(*ptr == (META_CLASS | CLASS_IS_ECLASS) ||
             *ptr == (META_CLASS_NOT | CLASS_IS_ECLASS));

if (*ptr++ == (META_CLASS_NOT | CLASS_IS_ECLASS))
  negated = !negated;

(*pptr)++;

/* Because it's a non-empty class, there must be an operand at the start. */
if (!compile_class_binary_loose(context, negated, pptr, pcode,
                                pop_info, lengthptr))
  return FALSE;

PCRE2_ASSERT(**pptr == META_CLASS_END);
PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
return TRUE;
}

BOOL
PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
  uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
  compile_block *cb, PCRE2_SIZE *lengthptr)
{
eclass_context context;
eclass_op_info op_info;
PCRE2_SIZE previous_length = (lengthptr != NULL)? *lengthptr : 0;
PCRE2_UCHAR *code = *pcode;
PCRE2_UCHAR *previous;
BOOL allbitsone = TRUE;

context.needs_bitmap = FALSE;
context.options = options;
context.xoptions = xoptions;
context.errorcodeptr = errorcodeptr;
context.cb = cb;

previous = code;
*code++ = OP_ECLASS;
code += LINK_SIZE;
*code++ = 0;  /* Flags, currently zero. */
if (!compile_eclass_nested(&context, FALSE, pptr, &code, &op_info, lengthptr))
  return FALSE;

if (lengthptr != NULL)
  {
  *lengthptr += code - previous;
  code = previous;
  /* (*lengthptr - previous_length) now holds the amount of buffer that
  we require to make the call to compile_class_nested() with
  lengthptr = NULL, and including the (1+LINK_SIZE+1) that we write out
  before that call. */
  }

/* Do some useful counting of what's in the bitmap. */
for (int i = 0; i < 8; i++)
  if (op_info.bits.classwords[i] != 0xffffffff)
    {
    allbitsone = FALSE;
    break;
    }

/* After constant-folding the extended class syntax, it may turn out to be
a simple class after all. In that case, we can unwrap it from the
OP_ECLASS container - and in fact, we must do so, because in 8-bit
no-Unicode mode the matcher is compiled without support for OP_ECLASS. */

#ifndef SUPPORT_WIDE_CHARS
PCRE2_ASSERT(op_info.op_single_type != 0);
#else
if (op_info.op_single_type != 0)
#endif
  {
  /* Rewind back over the OP_ECLASS. */
  code = previous;

  /* If the bits are all ones, and the "high characters" are all matched
  too, we use a special-cased encoding of OP_ALLANY. */

  if (op_info.op_single_type == ECL_ANY && allbitsone)
    {
    /* Advancing code means rewinding lengthptr, at this point. */
    if (lengthptr != NULL) *lengthptr -= 1;
    *code++ = OP_ALLANY;
    }

  /* If the high bits are all matched / all not-matched, then we emit an
  OP_NCLASS/OP_CLASS respectively. */

  else if (op_info.op_single_type == ECL_ANY ||
           op_info.op_single_type == ECL_NONE)
    {
    PCRE2_SIZE required_len = 1 + (32 / sizeof(PCRE2_UCHAR));

    if (lengthptr != NULL)
      {
      if (required_len > (*lengthptr - previous_length))
      *lengthptr = previous_length + required_len;
      }

    /* Advancing code means rewinding lengthptr, at this point. */
    if (lengthptr != NULL) *lengthptr -= required_len;
    *code++ = (op_info.op_single_type == ECL_ANY)? OP_NCLASS : OP_CLASS;
    memcpy(code, op_info.bits.classbits, 32);
    code += 32 / sizeof(PCRE2_UCHAR);
    }

  /* Otherwise, we have an ECL_XCLASS, so we have the OP_XCLASS data
  there, but, we pulled out its bitmap into op_info, so now we have to
  put that back into the OP_XCLASS. */

  else
    {
#ifndef SUPPORT_WIDE_CHARS
    PCRE2_DEBUG_UNREACHABLE();
#else
    BOOL need_map = context.needs_bitmap;
    PCRE2_SIZE required_len;

    PCRE2_ASSERT(op_info.op_single_type == ECL_XCLASS);
    required_len = op_info.length + (need_map? 32/sizeof(PCRE2_UCHAR) : 0);

    if (lengthptr != NULL)
      {
      /* Don't unconditionally request all the space we need - we may
      already have asked for more during processing of the ECLASS. */
      if (required_len > (*lengthptr - previous_length))
        *lengthptr = previous_length + required_len;

      /* The code we write out here won't be ignored, even during the
      (lengthptr != NULL) phase, because if there's a following quantifier
      it will peek backwards. So we do have to write out a (truncated)
      OP_XCLASS, even on this branch. */
      *lengthptr -= 1 + LINK_SIZE + 1;
      *code++ = OP_XCLASS;
      PUT(code, 0, 1 + LINK_SIZE + 1);
      code += LINK_SIZE;
      *code++ = 0;
      }
    else
      {
      PCRE2_UCHAR *rest;
      PCRE2_SIZE rest_len;
      PCRE2_UCHAR flags;

      /* 1 unit: OP_XCLASS | LINK_SIZE units | 1 unit: flags | ...rest */
      PCRE2_ASSERT(op_info.length >= 1 + LINK_SIZE + 1);
      rest = op_info.code_start + 1 + LINK_SIZE + 1;
      rest_len = (op_info.code_start + op_info.length) - rest;

      /* First read any data we use, before memmove splats it. */
      flags = op_info.code_start[1 + LINK_SIZE];
      PCRE2_ASSERT((flags & XCL_MAP) == 0);

      /* Next do the memmove before any writes. */
      memmove(code + 1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0),
              rest, CU2BYTES(rest_len));

      /* Finally write the header data. */
      *code++ = OP_XCLASS;
      PUT(code, 0, (int)required_len);
      code += LINK_SIZE;
      *code++ = flags | (need_map? XCL_MAP : 0);
      if (need_map)
        {
        memcpy(code, op_info.bits.classbits, 32);
        code += 32 / sizeof(PCRE2_UCHAR);
        }
      code += rest_len;
      }
#endif /* SUPPORT_WIDE_CHARS */
    }
  }

/* Otherwise, we're going to keep the OP_ECLASS. However, again we need
to do some adjustment to insert the bitmap if we have one. */

#ifdef SUPPORT_WIDE_CHARS
else
  {
  BOOL need_map = context.needs_bitmap;
  PCRE2_SIZE required_len =
    1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0) + op_info.length;

  if (lengthptr != NULL)
    {
    if (required_len > (*lengthptr - previous_length))
      *lengthptr = previous_length + required_len;

    /* As for the XCLASS branch above, we do have to write out a dummy
    OP_ECLASS, because of the backwards peek by the quantifier code. Write
    out a (truncated) OP_ECLASS, even on this branch. */
    *lengthptr -= 1 + LINK_SIZE + 1;
    *code++ = OP_ECLASS;
    PUT(code, 0, 1 + LINK_SIZE + 1);
    code += LINK_SIZE;
    *code++ = 0;
    }
  else
    {
    if (need_map)
      {
      PCRE2_UCHAR *map_start = previous + 1 + LINK_SIZE + 1;
      previous[1 + LINK_SIZE] |= ECL_MAP;
      memmove(map_start + 32/sizeof(PCRE2_UCHAR), map_start,
              CU2BYTES(code - map_start));
      memcpy(map_start, op_info.bits.classbits, 32);
      code += 32 / sizeof(PCRE2_UCHAR);
      }
    PUT(previous, 1, (int)(code - previous));
    }
  }
#endif /* SUPPORT_WIDE_CHARS */

*pcode = code;
return TRUE;
}

/* End of pcre2_compile_class.c */