mirror of
https://github.com/ruby/ruby.git
synced 2025-08-15 13:39:04 +02:00
[ruby/prism] Be more defensive in the parser translator lexer
Generally I have been good about safely accessing the tokens but failed
to properly guard against no tokens in places
where it could theoretically happen through invalid syntax.
I added a test case for one occurance, other changes are theoretical only.
4a3866af19
This commit is contained in:
parent
a677220aba
commit
a04555c8ab
2 changed files with 32 additions and 15 deletions
|
@ -277,20 +277,20 @@ module Prism
|
|||
when :tCOMMENT
|
||||
if token.type == :EMBDOC_BEGIN
|
||||
|
||||
while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
|
||||
while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1)
|
||||
value += next_token.value
|
||||
index += 1
|
||||
end
|
||||
|
||||
value += next_token.value
|
||||
location = range(token.location.start_offset, lexed[index][0].location.end_offset)
|
||||
location = range(token.location.start_offset, next_token.location.end_offset)
|
||||
index += 1
|
||||
else
|
||||
is_at_eol = value.chomp!.nil?
|
||||
location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))
|
||||
|
||||
prev_token = lexed[index - 2][0] if index - 2 >= 0
|
||||
next_token = lexed[index][0]
|
||||
prev_token, _ = lexed[index - 2] if index - 2 >= 0
|
||||
next_token, _ = lexed[index]
|
||||
|
||||
is_inline_comment = prev_token&.location&.start_line == token.location.start_line
|
||||
if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
|
||||
|
@ -309,7 +309,7 @@ module Prism
|
|||
end
|
||||
end
|
||||
when :tNL
|
||||
next_token = next_token = lexed[index][0]
|
||||
next_token, _ = lexed[index]
|
||||
# Newlines after comments are emitted out of order.
|
||||
if next_token&.type == :COMMENT
|
||||
comment_newline_location = location
|
||||
|
@ -346,8 +346,8 @@ module Prism
|
|||
location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
|
||||
value = nil
|
||||
when :tSTRING_BEG
|
||||
next_token = lexed[index][0]
|
||||
next_next_token = lexed[index + 1][0]
|
||||
next_token, _ = lexed[index]
|
||||
next_next_token, _ = lexed[index + 1]
|
||||
basic_quotes = value == '"' || value == "'"
|
||||
|
||||
if basic_quotes && next_token&.type == :STRING_END
|
||||
|
@ -415,7 +415,8 @@ module Prism
|
|||
while token.type == :STRING_CONTENT
|
||||
current_length += token.value.bytesize
|
||||
# Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
|
||||
is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
|
||||
prev_token, _ = lexed[index - 2] if index - 2 >= 0
|
||||
is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line
|
||||
# The parser gem only removes indentation when the heredoc is not nested
|
||||
not_nested = heredoc_stack.size == 1
|
||||
if is_percent_array
|
||||
|
@ -434,7 +435,7 @@ module Prism
|
|||
tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
|
||||
break
|
||||
end
|
||||
token = lexed[index][0]
|
||||
token, _ = lexed[index]
|
||||
index += 1
|
||||
end
|
||||
else
|
||||
|
@ -489,7 +490,7 @@ module Prism
|
|||
end
|
||||
|
||||
if percent_array?(quote_stack.pop)
|
||||
prev_token = lexed[index - 2][0] if index - 2 >= 0
|
||||
prev_token, _ = lexed[index - 2] if index - 2 >= 0
|
||||
empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
|
||||
ends_with_whitespace = prev_token&.type == :WORDS_SEP
|
||||
# parser always emits a space token after content in a percent array, even if no actual whitespace is present.
|
||||
|
@ -498,7 +499,7 @@ module Prism
|
|||
end
|
||||
end
|
||||
when :tSYMBEG
|
||||
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
|
||||
if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
|
||||
next_location = token.location.join(next_token.location)
|
||||
type = :tSYMBOL
|
||||
value = next_token.value
|
||||
|
@ -513,13 +514,13 @@ module Prism
|
|||
type = :tIDENTIFIER
|
||||
end
|
||||
when :tXSTRING_BEG
|
||||
if (next_token = lexed[index][0]) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
|
||||
if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
|
||||
# self.`()
|
||||
type = :tBACK_REF2
|
||||
end
|
||||
quote_stack.push(value)
|
||||
when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
|
||||
if (next_token = lexed[index][0]) && next_token.type == :WORDS_SEP
|
||||
if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP
|
||||
index += 1
|
||||
end
|
||||
|
||||
|
@ -595,9 +596,9 @@ module Prism
|
|||
previous_line = -1
|
||||
result = Float::MAX
|
||||
|
||||
while (lexed[next_token_index] && next_token = lexed[next_token_index][0])
|
||||
while (next_token = lexed[next_token_index]&.first)
|
||||
next_token_index += 1
|
||||
next_next_token = lexed[next_token_index] && lexed[next_token_index][0]
|
||||
next_next_token, _ = lexed[next_token_index]
|
||||
first_token_on_line = next_token.location.start_column == 0
|
||||
|
||||
# String content inside nested heredocs and interpolation is ignored
|
||||
|
|
|
@ -163,6 +163,22 @@ module Prism
|
|||
end
|
||||
end
|
||||
|
||||
def test_invalid_syntax
|
||||
code = <<~RUBY
|
||||
foo do
|
||||
case bar
|
||||
when
|
||||
end
|
||||
end
|
||||
RUBY
|
||||
buffer = Parser::Source::Buffer.new("(string)")
|
||||
buffer.source = code
|
||||
|
||||
parser = Prism::Translation::Parser33.new
|
||||
parser.diagnostics.all_errors_are_fatal = true
|
||||
assert_raise(Parser::SyntaxError) { parser.tokenize(buffer) }
|
||||
end
|
||||
|
||||
def test_it_block_parameter_syntax
|
||||
it_fixture_path = Pathname(__dir__).join("../../../test/prism/fixtures/it.txt")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue