mirror of
https://github.com/ruby/ruby.git
synced 2025-08-15 13:39:04 +02:00
[ruby/prism] Be more defensive in the parser translator lexer
Generally I have been good about safely accessing the tokens but failed
to properly guard against no tokens in places
where it could theoretically happen through invalid syntax.
I added a test case for one occurance, other changes are theoretical only.
4a3866af19
This commit is contained in:
parent
a677220aba
commit
a04555c8ab
2 changed files with 32 additions and 15 deletions
|
@ -277,20 +277,20 @@ module Prism
|
||||||
when :tCOMMENT
|
when :tCOMMENT
|
||||||
if token.type == :EMBDOC_BEGIN
|
if token.type == :EMBDOC_BEGIN
|
||||||
|
|
||||||
while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
|
while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1)
|
||||||
value += next_token.value
|
value += next_token.value
|
||||||
index += 1
|
index += 1
|
||||||
end
|
end
|
||||||
|
|
||||||
value += next_token.value
|
value += next_token.value
|
||||||
location = range(token.location.start_offset, lexed[index][0].location.end_offset)
|
location = range(token.location.start_offset, next_token.location.end_offset)
|
||||||
index += 1
|
index += 1
|
||||||
else
|
else
|
||||||
is_at_eol = value.chomp!.nil?
|
is_at_eol = value.chomp!.nil?
|
||||||
location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))
|
location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))
|
||||||
|
|
||||||
prev_token = lexed[index - 2][0] if index - 2 >= 0
|
prev_token, _ = lexed[index - 2] if index - 2 >= 0
|
||||||
next_token = lexed[index][0]
|
next_token, _ = lexed[index]
|
||||||
|
|
||||||
is_inline_comment = prev_token&.location&.start_line == token.location.start_line
|
is_inline_comment = prev_token&.location&.start_line == token.location.start_line
|
||||||
if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
|
if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
|
||||||
|
@ -309,7 +309,7 @@ module Prism
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
when :tNL
|
when :tNL
|
||||||
next_token = next_token = lexed[index][0]
|
next_token, _ = lexed[index]
|
||||||
# Newlines after comments are emitted out of order.
|
# Newlines after comments are emitted out of order.
|
||||||
if next_token&.type == :COMMENT
|
if next_token&.type == :COMMENT
|
||||||
comment_newline_location = location
|
comment_newline_location = location
|
||||||
|
@ -346,8 +346,8 @@ module Prism
|
||||||
location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
|
location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
|
||||||
value = nil
|
value = nil
|
||||||
when :tSTRING_BEG
|
when :tSTRING_BEG
|
||||||
next_token = lexed[index][0]
|
next_token, _ = lexed[index]
|
||||||
next_next_token = lexed[index + 1][0]
|
next_next_token, _ = lexed[index + 1]
|
||||||
basic_quotes = value == '"' || value == "'"
|
basic_quotes = value == '"' || value == "'"
|
||||||
|
|
||||||
if basic_quotes && next_token&.type == :STRING_END
|
if basic_quotes && next_token&.type == :STRING_END
|
||||||
|
@ -415,7 +415,8 @@ module Prism
|
||||||
while token.type == :STRING_CONTENT
|
while token.type == :STRING_CONTENT
|
||||||
current_length += token.value.bytesize
|
current_length += token.value.bytesize
|
||||||
# Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
|
# Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
|
||||||
is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
|
prev_token, _ = lexed[index - 2] if index - 2 >= 0
|
||||||
|
is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line
|
||||||
# The parser gem only removes indentation when the heredoc is not nested
|
# The parser gem only removes indentation when the heredoc is not nested
|
||||||
not_nested = heredoc_stack.size == 1
|
not_nested = heredoc_stack.size == 1
|
||||||
if is_percent_array
|
if is_percent_array
|
||||||
|
@ -434,7 +435,7 @@ module Prism
|
||||||
tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
|
tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
token = lexed[index][0]
|
token, _ = lexed[index]
|
||||||
index += 1
|
index += 1
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
|
@ -489,7 +490,7 @@ module Prism
|
||||||
end
|
end
|
||||||
|
|
||||||
if percent_array?(quote_stack.pop)
|
if percent_array?(quote_stack.pop)
|
||||||
prev_token = lexed[index - 2][0] if index - 2 >= 0
|
prev_token, _ = lexed[index - 2] if index - 2 >= 0
|
||||||
empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
|
empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
|
||||||
ends_with_whitespace = prev_token&.type == :WORDS_SEP
|
ends_with_whitespace = prev_token&.type == :WORDS_SEP
|
||||||
# parser always emits a space token after content in a percent array, even if no actual whitespace is present.
|
# parser always emits a space token after content in a percent array, even if no actual whitespace is present.
|
||||||
|
@ -498,7 +499,7 @@ module Prism
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
when :tSYMBEG
|
when :tSYMBEG
|
||||||
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
|
if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
|
||||||
next_location = token.location.join(next_token.location)
|
next_location = token.location.join(next_token.location)
|
||||||
type = :tSYMBOL
|
type = :tSYMBOL
|
||||||
value = next_token.value
|
value = next_token.value
|
||||||
|
@ -513,13 +514,13 @@ module Prism
|
||||||
type = :tIDENTIFIER
|
type = :tIDENTIFIER
|
||||||
end
|
end
|
||||||
when :tXSTRING_BEG
|
when :tXSTRING_BEG
|
||||||
if (next_token = lexed[index][0]) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
|
if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
|
||||||
# self.`()
|
# self.`()
|
||||||
type = :tBACK_REF2
|
type = :tBACK_REF2
|
||||||
end
|
end
|
||||||
quote_stack.push(value)
|
quote_stack.push(value)
|
||||||
when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
|
when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
|
||||||
if (next_token = lexed[index][0]) && next_token.type == :WORDS_SEP
|
if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP
|
||||||
index += 1
|
index += 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -595,9 +596,9 @@ module Prism
|
||||||
previous_line = -1
|
previous_line = -1
|
||||||
result = Float::MAX
|
result = Float::MAX
|
||||||
|
|
||||||
while (lexed[next_token_index] && next_token = lexed[next_token_index][0])
|
while (next_token = lexed[next_token_index]&.first)
|
||||||
next_token_index += 1
|
next_token_index += 1
|
||||||
next_next_token = lexed[next_token_index] && lexed[next_token_index][0]
|
next_next_token, _ = lexed[next_token_index]
|
||||||
first_token_on_line = next_token.location.start_column == 0
|
first_token_on_line = next_token.location.start_column == 0
|
||||||
|
|
||||||
# String content inside nested heredocs and interpolation is ignored
|
# String content inside nested heredocs and interpolation is ignored
|
||||||
|
|
|
@ -163,6 +163,22 @@ module Prism
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_invalid_syntax
|
||||||
|
code = <<~RUBY
|
||||||
|
foo do
|
||||||
|
case bar
|
||||||
|
when
|
||||||
|
end
|
||||||
|
end
|
||||||
|
RUBY
|
||||||
|
buffer = Parser::Source::Buffer.new("(string)")
|
||||||
|
buffer.source = code
|
||||||
|
|
||||||
|
parser = Prism::Translation::Parser33.new
|
||||||
|
parser.diagnostics.all_errors_are_fatal = true
|
||||||
|
assert_raise(Parser::SyntaxError) { parser.tokenize(buffer) }
|
||||||
|
end
|
||||||
|
|
||||||
def test_it_block_parameter_syntax
|
def test_it_block_parameter_syntax
|
||||||
it_fixture_path = Pathname(__dir__).join("../../../test/prism/fixtures/it.txt")
|
it_fixture_path = Pathname(__dir__).join("../../../test/prism/fixtures/it.txt")
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue