# frozen_string_literal: false # # irb/ruby-lex.rb - ruby lexcal analyzer # by Keiju ISHITSUKA(keiju@ruby-lang.org) # require "ripper" require "jruby" if RUBY_ENGINE == "jruby" require_relative "nesting_parser" # :stopdoc: class RubyLex class TerminateLineInput < StandardError def initialize super("Terminate Line Input") end end def initialize(context) @context = context @line_no = 1 @prompt = nil end def self.compile_with_errors_suppressed(code, line_no: 1) begin result = yield code, line_no rescue ArgumentError # Ruby can issue an error for the code if there is an # incomplete magic comment for encoding in it. Force an # expression with a new line before the code in this # case to prevent magic comment handling. To make sure # line numbers in the lexed code remain the same, # decrease the line number by one. code = ";\n#{code}" line_no -= 1 result = yield code, line_no end result end def single_line_command?(code) command = code.split(/\s/, 2).first @context.symbol_alias?(command) || @context.transform_args?(command) end # io functions def set_input(&block) @input = block end def configure_io(io) @io = io if @io.respond_to?(:check_termination) @io.check_termination do |code| if Reline::IOGate.in_pasting? rest = check_termination_in_prev_line(code) if rest Reline.delete_text rest.bytes.reverse_each do |c| Reline.ungetc(c) end true else false end else # Accept any single-line input for symbol aliases or commands that transform args next true if single_line_command?(code) _tokens, _opens, terminated = check_code_state(code) terminated end end end if @io.respond_to?(:dynamic_prompt) @io.dynamic_prompt do |lines| lines << '' if lines.empty? tokens = self.class.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, context: @context) line_results = IRB::NestingParser.parse_by_line(tokens) tokens_until_line = [] line_results.map.with_index do |(line_tokens, _prev_opens, next_opens, _min_depth), line_num_offset| line_tokens.each do |token, _s| # Avoid appending duplicated token. Tokens that include "\n" like multiline tstring_content can exist in multiple lines. tokens_until_line << token if token != tokens_until_line.last end continue = process_continue(tokens_until_line) prompt(next_opens, continue, line_num_offset) end end end if @io.respond_to?(:auto_indent) and @context.auto_indent_mode @io.auto_indent do |lines, line_index, byte_pointer, is_newline| if is_newline tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: @context) process_indent_level(tokens, lines) else code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join last_line = lines[line_index]&.byteslice(0, byte_pointer) code += last_line if last_line tokens = self.class.ripper_lex_without_warning(code, context: @context) check_corresponding_token_depth(tokens, lines, line_index) end end end end def set_prompt(&block) @prompt = block end ERROR_TOKENS = [ :on_parse_error, :compile_error, :on_assign_error, :on_alias_error, :on_class_name_error, :on_param_error ] def self.generate_local_variables_assign_code(local_variables) "#{local_variables.join('=')}=nil;" unless local_variables.empty? end # Some part of the code is not included in Ripper's token. # Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr. # With interpolated tokens, tokens.map(&:tok).join will be equal to code. def self.interpolate_ripper_ignored_tokens(code, tokens) line_positions = [0] code.lines.each do |line| line_positions << line_positions.last + line.bytesize end prev_byte_pos = 0 interpolated = [] prev_line = 1 tokens.each do |t| line, col = t.pos byte_pos = line_positions[line - 1] + col if prev_byte_pos < byte_pos tok = code.byteslice(prev_byte_pos...byte_pos) pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]] interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0) prev_line += tok.count("\n") end interpolated << t prev_byte_pos = byte_pos + t.tok.bytesize prev_line += t.tok.count("\n") end if prev_byte_pos < code.bytesize tok = code.byteslice(prev_byte_pos..) pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]] interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0) end interpolated end def self.ripper_lex_without_warning(code, context: nil) verbose, $VERBOSE = $VERBOSE, nil lvars_code = generate_local_variables_assign_code(context&.local_variables || []) original_code = code if lvars_code code = "#{lvars_code}\n#{code}" line_no = 0 else line_no = 1 end compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no| lexer = Ripper::Lexer.new(inner_code, '-', line_no) tokens = [] lexer.scan.each do |t| next if t.pos.first == 0 prev_tk = tokens.last position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize if position_overlapped tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event) else tokens << t end end interpolate_ripper_ignored_tokens(original_code, tokens) end ensure $VERBOSE = verbose end def prompt(opens, continue, line_num_offset) ltype = ltype_from_open_tokens(opens) _indent_level, nesting_level = calc_nesting_depth(opens) @prompt&.call(ltype, nesting_level, opens.any? || continue, @line_no + line_num_offset) end def check_code_state(code) check_target_code = code.gsub(/\s*\z/, '').concat("\n") tokens = self.class.ripper_lex_without_warning(check_target_code, context: @context) opens = IRB::NestingParser.open_tokens(tokens) [tokens, opens, code_terminated?(code, tokens, opens)] end def code_terminated?(code, tokens, opens) opens.empty? && !process_continue(tokens) && !check_code_block(code, tokens) end def save_prompt_to_context_io(opens, continue, line_num_offset) # Implicitly saves prompt string to `@context.io.prompt`. This will be used in the next `@input.call`. prompt(opens, continue, line_num_offset) end def readmultiline save_prompt_to_context_io([], false, 0) # multiline return @input.call if @io.respond_to?(:check_termination) # nomultiline code = '' line_offset = 0 loop do line = @input.call unless line return code.empty? ? nil : code end code << line # Accept any single-line input for symbol aliases or commands that transform args return code if single_line_command?(code) tokens, opens, terminated = check_code_state(code) return code if terminated line_offset += 1 continue = process_continue(tokens) save_prompt_to_context_io(opens, continue, line_offset) end end def each_top_level_statement loop do code = readmultiline break unless code if code != "\n" code.force_encoding(@io.encoding) yield code, @line_no end @line_no += code.count("\n") rescue TerminateLineInput end end def process_continue(tokens) # last token is always newline if tokens.size >= 2 and tokens[-2].event == :on_regexp_end # end of regexp literal return false elsif tokens.size >= 2 and tokens[-2].event == :on_semicolon return false elsif tokens.size >= 2 and tokens[-2].event == :on_kw and ['begin', 'else', 'ensure'].include?(tokens[-2].tok) return false elsif !tokens.empty? and tokens.last.tok == "\\\n" return true elsif tokens.size >= 1 and tokens[-1].event == :on_heredoc_end # "EOH\n" return false elsif tokens.size >= 2 and tokens[-2].state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME) and tokens[-2].tok !~ /\A\.\.\.?\z/ # end of literal except for regexp # endless range at end of line is not a continue return true end false end def check_code_block(code, tokens) return true if tokens.empty? begin # check if parser error are available verbose, $VERBOSE = $VERBOSE, nil case RUBY_ENGINE when 'ruby' self.class.compile_with_errors_suppressed(code) do |inner_code, line_no| RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no) end when 'jruby' JRuby.compile_ir(code) else catch(:valid) do eval("BEGIN { throw :valid, true }\n#{code}") false end end rescue EncodingError # This is for a hash with invalid encoding symbol, {"\xAE": 1} rescue SyntaxError => e case e.message when /unterminated (?:string|regexp) meets end of file/ # "unterminated regexp meets end of file" # # example: # / # # "unterminated string meets end of file" # # example: # ' return true when /syntax error, unexpected end-of-input/ # "syntax error, unexpected end-of-input, expecting keyword_end" # # example: # if true # hoge # if false # fuga # end return true when /syntax error, unexpected keyword_end/ # "syntax error, unexpected keyword_end" # # example: # if ( # end # # example: # end return false when /syntax error, unexpected '\.'/ # "syntax error, unexpected '.'" # # example: # . return false when /unexpected tREGEXP_BEG/ # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('" # # example: # method / f / return false end ensure $VERBOSE = verbose end last_lex_state = tokens.last.state if last_lex_state.allbits?(Ripper::EXPR_BEG) return false elsif last_lex_state.allbits?(Ripper::EXPR_DOT) return true elsif last_lex_state.allbits?(Ripper::EXPR_CLASS) return true elsif last_lex_state.allbits?(Ripper::EXPR_FNAME) return true elsif last_lex_state.allbits?(Ripper::EXPR_VALUE) return true elsif last_lex_state.allbits?(Ripper::EXPR_ARG) return false end false end # Calculates [indent_level, nesting_level]. nesting_level is used in prompt string. def calc_nesting_depth(opens) indent_level = 0 nesting_level = 0 opens.each do |t| case t.event when :on_heredoc_beg # TODO: indent heredoc when :on_tstring_beg, :on_regexp_beg, :on_symbeg # can be indented if t.tok starts with `%` when :on_words_beg, :on_qwords_beg, :on_symbols_beg, :on_qsymbols_beg, :on_embexpr_beg # can be indented but not indented in current implementation when :on_embdoc_beg indent_level = 0 else nesting_level += 1 indent_level += 1 end end [indent_level, nesting_level] end def free_indent_token(opens, line_index) last_token = opens.last return unless last_token if last_token.event == :on_heredoc_beg && last_token.pos.first < line_index + 1 # accept extra indent spaces inside heredoc last_token end end def process_indent_level(tokens, lines) opens = IRB::NestingParser.open_tokens(tokens) indent_level, _nesting_level = calc_nesting_depth(opens) indent = indent_level * 2 line_index = lines.size - 2 if free_indent_token(opens, line_index) return [indent, lines[line_index][/^ */].length].max end indent end def check_corresponding_token_depth(tokens, lines, line_index) line_results = IRB::NestingParser.parse_by_line(tokens) result = line_results[line_index] return unless result # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation. # Shortest open tokens can be calculated by `opens.take(min_depth)` _tokens, prev_opens, opens, min_depth = result indent_level, _nesting_level = calc_nesting_depth(opens.take(min_depth)) indent = indent_level * 2 free_indent_tok = free_indent_token(opens, line_index) prev_line_free_indent_tok = free_indent_token(prev_opens, line_index - 1) if prev_line_free_indent_tok && prev_line_free_indent_tok != free_indent_tok return indent elsif free_indent_tok return lines[line_index][/^ */].length end prev_indent_level, _prev_nesting_level = calc_nesting_depth(prev_opens) indent if indent_level < prev_indent_level end LTYPE_TOKENS = %i[ on_heredoc_beg on_tstring_beg on_regexp_beg on_symbeg on_backtick on_symbols_beg on_qsymbols_beg on_words_beg on_qwords_beg ] def ltype_from_open_tokens(opens) start_token = opens.reverse_each.find do |tok| LTYPE_TOKENS.include?(tok.event) end return nil unless start_token case start_token&.event when :on_tstring_beg case start_token&.tok when ?" then ?" when /^%.$/ then ?" when /^%Q.$/ then ?" when ?' then ?' when /^%q.$/ then ?' end when :on_regexp_beg then ?/ when :on_symbeg then ?: when :on_backtick then ?` when :on_qwords_beg then ?] when :on_words_beg then ?] when :on_qsymbols_beg then ?] when :on_symbols_beg then ?] when :on_heredoc_beg start_token&.tok =~ /<<[-~]?(['"`])\w+\1/ $1 || ?" else nil end end def check_termination_in_prev_line(code) tokens = self.class.ripper_lex_without_warning(code, context: @context) past_first_newline = false index = tokens.rindex do |t| # traverse first token before last line if past_first_newline if t.tok.include?("\n") true end elsif t.tok.include?("\n") past_first_newline = true false else false end end if index first_token = nil last_line_tokens = tokens[(index + 1)..(tokens.size - 1)] last_line_tokens.each do |t| unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event) first_token = t break end end if first_token && first_token.state != Ripper::EXPR_DOT tokens_without_last_line = tokens[0..index] code_without_last_line = tokens_without_last_line.map(&:tok).join opens_without_last_line = IRB::NestingParser.open_tokens(tokens_without_last_line) if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line) return last_line_tokens.map(&:tok).join end end end false end end # :startdoc: