ruby/lib/prism/translation/parser.rb
2024-01-29 16:09:47 +00:00

163 lines
5.2 KiB
Ruby

# frozen_string_literal: true
require "parser"
module Prism
module Translation
# This class is the entry-point for converting a prism syntax tree into the
# whitequark/parser gem's syntax tree. It inherits from the base parser for
# the parser gem, and overrides the parse* methods to parse with prism and
# then translate.
class Parser < ::Parser::Base
# The parser gem has a list of diagnostics with a hard-coded set of error
# messages. We create our own diagnostic class in order to set our own
# error messages.
class Diagnostic < ::Parser::Diagnostic
# The message generated by prism.
attr_reader :message
# Initialize a new diagnostic with the given message and location.
def initialize(message, location)
@message = message
super(:error, :prism_error, {}, location, [])
end
end
Racc_debug_parser = false # :nodoc:
def version # :nodoc:
33
end
# The default encoding for Ruby files is UTF-8.
def default_encoding
Encoding::UTF_8
end
def yyerror # :nodoc:
end
# Parses a source buffer and returns the AST.
def parse(source_buffer)
@source_buffer = source_buffer
source = source_buffer.source
result = unwrap(Prism.parse(source, filepath: source_buffer.name))
build_ast(result.value, build_offset_cache(source))
ensure
@source_buffer = nil
end
# Parses a source buffer and returns the AST and the source code comments.
def parse_with_comments(source_buffer)
@source_buffer = source_buffer
source = source_buffer.source
offset_cache = build_offset_cache(source)
result = unwrap(Prism.parse(source, filepath: source_buffer.name))
[
build_ast(result.value, offset_cache),
build_comments(result.comments, offset_cache)
]
ensure
@source_buffer = nil
end
# Parses a source buffer and returns the AST, the source code comments,
# and the tokens emitted by the lexer.
def tokenize(source_buffer, _recover = false)
@source_buffer = source_buffer
source = source_buffer.source
offset_cache = build_offset_cache(source)
result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name))
program, tokens = result.value
[
build_ast(program, offset_cache),
build_comments(result.comments, offset_cache),
build_tokens(tokens, offset_cache)
]
ensure
@source_buffer = nil
end
# Since prism resolves num params for us, we don't need to support this
# kind of logic here.
def try_declare_numparam(node)
node.children[0].match?(/\A_[1-9]\z/)
end
private
# If there was a error generated during the parse, then raise an
# appropriate syntax error. Otherwise return the result.
def unwrap(result)
return result if result.success?
error = result.errors.first
offset_cache = build_offset_cache(source_buffer.source)
diagnostic = Diagnostic.new(error.message, build_range(error.location, offset_cache))
raise ::Parser::SyntaxError, diagnostic
end
# Prism deals with offsets in bytes, while the parser gem deals with
# offsets in characters. We need to handle this conversion in order to
# build the parser gem AST.
#
# If the bytesize of the source is the same as the length, then we can
# just use the offset directly. Otherwise, we build a hash that functions
# as a cache for the conversion.
#
# This is a good opportunity for some optimizations. If the source file
# has any multi-byte characters, this can tank the performance of the
# translator. We could make this significantly faster by using a
# different data structure for the cache.
def build_offset_cache(source)
if source.bytesize == source.length
-> (offset) { offset }
else
Hash.new do |hash, offset|
hash[offset] = source.byteslice(0, offset).length
end
end
end
# Build the parser gem AST from the prism AST.
def build_ast(program, offset_cache)
program.accept(Compiler.new(self, offset_cache))
end
# Build the parser gem comments from the prism comments.
def build_comments(comments, offset_cache)
comments.map do |comment|
::Parser::Source::Comment.new(build_range(comment.location, offset_cache))
end
end
# Build the parser gem tokens from the prism tokens.
def build_tokens(tokens, offset_cache)
Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
end
# Build a range from a prism location.
def build_range(location, offset_cache)
::Parser::Source::Range.new(
source_buffer,
offset_cache[location.start_offset],
offset_cache[location.end_offset]
)
end
require_relative "parser/compiler"
require_relative "parser/lexer"
private_constant :Compiler
private_constant :Lexer
end
end
end