[ruby/prism] Extract out a couple more token buffer functions

341e027d23
This commit is contained in:
Kevin Newton 2023-10-11 10:12:00 -04:00
parent af8484bc39
commit 973ecf6848

View file

@ -6662,6 +6662,21 @@ typedef struct {
const uint8_t *cursor; const uint8_t *cursor;
} pm_token_buffer_t; } pm_token_buffer_t;
// Push the given byte into the token buffer.
static inline void
pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
pm_buffer_append_u8(&token_buffer->buffer, byte);
}
// When we're about to return from lexing the current token and we know for sure
// that we have found an escape sequence, this function is called to copy the
// contents of the token buffer into the current string on the parser so that it
// can be attached to the correct node.
static inline void
pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer->buffer.value, token_buffer->buffer.length);
}
// When we're about to return from lexing the current token, we need to flush // When we're about to return from lexing the current token, we need to flush
// all of the content that we have pushed into the buffer into the current // all of the content that we have pushed into the buffer into the current
// string. If we haven't pushed anything into the buffer, this means that we // string. If we haven't pushed anything into the buffer, this means that we
@ -6675,21 +6690,28 @@ pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end); pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
} else { } else {
pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor)); pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer->buffer.value, token_buffer->buffer.length); pm_token_buffer_copy(parser, token_buffer);
} }
} }
// When we've found an escape sequence, we need to copy everything up to this // When we've found an escape sequence, we need to copy everything up to this
// point into the buffer because we're about to provide a string that has // point into the buffer because we're about to provide a string that has
// different content than a direct slice of the source. // different content than a direct slice of the source.
//
// It is expected that the parser's current token end will be pointing at one
// byte past the backslash that starts the escape sequence.
static void static void
pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) { pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
const uint8_t *start;
if (token_buffer->cursor == NULL) { if (token_buffer->cursor == NULL) {
pm_buffer_init_capacity(&token_buffer->buffer, 16); pm_buffer_init_capacity(&token_buffer->buffer, 16);
pm_buffer_append_bytes(&token_buffer->buffer, parser->current.start, (size_t) (parser->current.end - parser->current.start)); start = parser->current.start;
} else { } else {
pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor)); start = token_buffer->cursor;
} }
const uint8_t *end = parser->current.end - 1;
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
} }
// This is a convenience macro that will set the current token type, call the // This is a convenience macro that will set the current token type, call the
@ -7949,9 +7971,8 @@ parser_lex(pm_parser_t *parser) {
// literally. In this case we'll skip past the next character // literally. In this case we'll skip past the next character
// and find the next breakpoint. // and find the next breakpoint.
if (*breakpoint == '\\') { if (*breakpoint == '\\') {
parser->current.end = breakpoint; parser->current.end = breakpoint + 1;
pm_token_buffer_escape(parser, &token_buffer); pm_token_buffer_escape(parser, &token_buffer);
parser->current.end++;
// If we've hit the end of the file, then break out of the // If we've hit the end of the file, then break out of the
// loop by setting the breakpoint to NULL. // loop by setting the breakpoint to NULL.
@ -7967,25 +7988,25 @@ parser_lex(pm_parser_t *parser) {
case '\t': case '\t':
case '\v': case '\v':
case '\\': case '\\':
pm_buffer_append_u8(&token_buffer.buffer, peeked); pm_token_buffer_push(&token_buffer, peeked);
parser->current.end++; parser->current.end++;
break; break;
case '\r': case '\r':
parser->current.end++; parser->current.end++;
if (peek(parser) != '\n') { if (peek(parser) != '\n') {
pm_buffer_append_u8(&token_buffer.buffer, '\r'); pm_token_buffer_push(&token_buffer, '\r');
break; break;
} }
/* fallthrough */ /* fallthrough */
case '\n': case '\n':
pm_buffer_append_u8(&token_buffer.buffer, '\n'); pm_token_buffer_push(&token_buffer, '\n');
if (parser->heredoc_end) { if (parser->heredoc_end) {
// ... if we are on the same line as a heredoc, // ... if we are on the same line as a heredoc,
// flush the heredoc and continue parsing after // flush the heredoc and continue parsing after
// heredoc_end. // heredoc_end.
parser_flush_heredoc_end(parser); parser_flush_heredoc_end(parser);
pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer.buffer.value, token_buffer.buffer.length); pm_token_buffer_copy(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT); LEX(PM_TOKEN_STRING_CONTENT);
} else { } else {
// ... else track the newline. // ... else track the newline.
@ -7996,13 +8017,13 @@ parser_lex(pm_parser_t *parser) {
break; break;
default: default:
if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) { if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
pm_buffer_append_u8(&token_buffer.buffer, peeked); pm_token_buffer_push(&token_buffer, peeked);
parser->current.end++; parser->current.end++;
} else if (lex_mode->as.list.interpolation) { } else if (lex_mode->as.list.interpolation) {
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE); escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
} else { } else {
pm_buffer_append_u8(&token_buffer.buffer, '\\'); pm_token_buffer_push(&token_buffer, '\\');
pm_buffer_append_u8(&token_buffer.buffer, peeked); pm_token_buffer_push(&token_buffer, peeked);
parser->current.end++; parser->current.end++;
} }