mirror of
https://github.com/ruby/ruby.git
synced 2025-08-25 14:05:02 +02:00

Essentially, this change updates `yp_unescape_calculate_difference` to
not create syntax errors, and we rely entirely on
`yp_unescape_manipulate_string` to report syntax errors.
To do that, this PR adds another (!) parameter to `unescape`:
`yp_list_t *error_list`. When present, `unescape` reports syntax
errors (and otherwise does not).
However, an edge case that needed to be addressed is reporting syntax
errors in this case:
?\u{1234 2345}
In a string context, it's possible to have multiple codepoints by
doing something like `"\u{1234 2345}"`; however, in the character
literal context, this is a syntax error -- only a single codepoint is
allowed.
Unfortunately, when `yp_unescape_manipulate_string` is called, there's
nothing to indicate that we are in a "character literal" context and
that only a single codepoint is valid.
To make this work, this PR:
- introduces a new static utility function in yarp.c,
`yp_char_literal_node_create_and_unescape`, which is called when
we're parsing `YP_TOKEN_CHARACTER_LITERAL`
- introduces a new (unexported) function,
`yp_unescape_manipulate_char_literal` which does the same thing as
`yp_unescape_manipulate_string` but tells `unescape` that only a
single codepoint is expected
f6a65840b5
44 lines
1.7 KiB
C
44 lines
1.7 KiB
C
#ifndef YARP_UNESCAPE_H
|
|
#define YARP_UNESCAPE_H
|
|
|
|
#include "yarp/defines.h"
|
|
#include "yarp/diagnostic.h"
|
|
#include "yarp/parser.h"
|
|
#include "yarp/util/yp_char.h"
|
|
#include "yarp/util/yp_list.h"
|
|
#include "yarp/util/yp_memchr.h"
|
|
#include "yarp/util/yp_string.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
|
|
// The type of unescape we are performing.
|
|
typedef enum {
|
|
// When we're creating a string inside of a list literal like %w, we
|
|
// shouldn't escape anything.
|
|
YP_UNESCAPE_NONE,
|
|
|
|
// When we're unescaping a single-quoted string, we only need to unescape
|
|
// single quotes and backslashes.
|
|
YP_UNESCAPE_MINIMAL,
|
|
|
|
// When we're unescaping a double-quoted string, we need to unescape all
|
|
// escapes.
|
|
YP_UNESCAPE_ALL
|
|
} yp_unescape_type_t;
|
|
|
|
// Unescape the contents of the given token into the given string using the given unescape mode.
|
|
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type);
|
|
void yp_unescape_manipulate_char_literal(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type);
|
|
|
|
// Accepts a source string and a type of unescaping and returns the unescaped version.
|
|
// The caller must yp_string_free(result); after calling this function.
|
|
YP_EXPORTED_FUNCTION bool yp_unescape_string(const uint8_t *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result);
|
|
|
|
// Returns the number of bytes that encompass the first escape sequence in the
|
|
// given string.
|
|
size_t yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *value, yp_unescape_type_t unescape_type, bool expect_single_codepoint);
|
|
|
|
#endif
|