mirror of
https://github.com/ruby/ruby.git
synced 2025-08-15 13:39:04 +02:00
Enhance keep_tokens option for RubyVM::AbstractSyntaxTree parsing methods
Implementation for Language Server Protocol (LSP) sometimes needs token information. For example both `m(1)` and `m(1, )` has same AST structure other than node locations then it's impossible to check the existence of `,` from AST. However in later case, it might be better to suggest variables list for the second argument. Token information is important for such case. This commit adds these methods. * Add `keep_tokens` option for `RubyVM::AbstractSyntaxTree.parse`, `.parse_file` and `.of` * Add `RubyVM::AbstractSyntaxTree::Node#tokens` which returns tokens for the node including tokens for descendants nodes. * Add `RubyVM::AbstractSyntaxTree::Node#all_tokens` which returns all tokens for the input script regardless the receiver node. [Feature #19070] Impacts on memory usage and performance are below: Memory usage: ``` $ cat test.rb root = RubyVM::AbstractSyntaxTree.parse_file(File.expand_path('../test/ruby/test_keyword.rb', __FILE__), keep_tokens: true) $ /usr/bin/time -f %Mkb /usr/local/bin/ruby -v ruby 3.2.0dev (2022-11-19T09:41:54Z 19070-keep_tokens d3af1b8057) [x86_64-linux] 11408kb # keep_tokens :false $ /usr/bin/time -f %Mkb /usr/local/bin/ruby test.rb 17508kb # keep_tokens :true $ /usr/bin/time -f %Mkb /usr/local/bin/ruby test.rb 30960kb ``` Performance: ``` $ cat ../ast_keep_tokens.yml prelude: | src = <<~SRC module M class C def m1(a, b) 1 + a + b end end end SRC benchmark: without_keep_tokens: | RubyVM::AbstractSyntaxTree.parse(src, keep_tokens: false) with_keep_tokens: | RubyVM::AbstractSyntaxTree.parse(src, keep_tokens: true) $ make benchmark COMPARE_RUBY="./ruby" ARGS=../ast_keep_tokens.yml /home/kaneko.y/.rbenv/shims/ruby --disable=gems -rrubygems -I../benchmark/lib ../benchmark/benchmark-driver/exe/benchmark-driver \ --executables="compare-ruby::./ruby -I.ext/common --disable-gem" \ --executables="built-ruby::./miniruby -I../lib -I. -I.ext/common ../tool/runruby.rb --extout=.ext -- --disable-gems --disable-gem" \ --output=markdown --output-compare -v ../ast_keep_tokens.yml compare-ruby: ruby 3.2.0dev (2022-11-19T09:41:54Z 19070-keep_tokens d3af1b8057) [x86_64-linux] built-ruby: ruby 3.2.0dev (2022-11-19T09:41:54Z 19070-keep_tokens d3af1b8057) [x86_64-linux] warming up.. | |compare-ruby|built-ruby| |:--------------------|-----------:|---------:| |without_keep_tokens | 21.659k| 21.303k| | | 1.02x| -| |with_keep_tokens | 6.220k| 5.691k| | | 1.09x| -| ```
This commit is contained in:
parent
bbc4cf5f76
commit
d8601621ed
Notes:
git
2022-11-21 00:02:01 +00:00
9 changed files with 556 additions and 104 deletions
24
node.c
24
node.c
|
@ -1161,6 +1161,12 @@ struct node_buffer_struct {
|
|||
node_buffer_list_t markable;
|
||||
struct rb_ast_local_table_link *local_tables;
|
||||
VALUE mark_hash;
|
||||
// - id (sequence number)
|
||||
// - token_type
|
||||
// - text of token
|
||||
// - location info
|
||||
// Array, whose entry is array
|
||||
VALUE tokens;
|
||||
};
|
||||
|
||||
static void
|
||||
|
@ -1187,6 +1193,7 @@ rb_node_buffer_new(void)
|
|||
init_node_buffer_list(&nb->markable, (node_buffer_elem_t*)((size_t)nb->unmarkable.head + bucket_size));
|
||||
nb->local_tables = 0;
|
||||
nb->mark_hash = Qnil;
|
||||
nb->tokens = Qnil;
|
||||
return nb;
|
||||
}
|
||||
|
||||
|
@ -1418,7 +1425,10 @@ rb_ast_update_references(rb_ast_t *ast)
|
|||
void
|
||||
rb_ast_mark(rb_ast_t *ast)
|
||||
{
|
||||
if (ast->node_buffer) rb_gc_mark(ast->node_buffer->mark_hash);
|
||||
if (ast->node_buffer) {
|
||||
rb_gc_mark(ast->node_buffer->mark_hash);
|
||||
rb_gc_mark(ast->node_buffer->tokens);
|
||||
}
|
||||
if (ast->body.compile_option) rb_gc_mark(ast->body.compile_option);
|
||||
if (ast->node_buffer) {
|
||||
node_buffer_t *nb = ast->node_buffer;
|
||||
|
@ -1477,3 +1487,15 @@ rb_ast_add_mark_object(rb_ast_t *ast, VALUE obj)
|
|||
}
|
||||
rb_hash_aset(ast->node_buffer->mark_hash, obj, Qtrue);
|
||||
}
|
||||
|
||||
VALUE
|
||||
rb_ast_tokens(rb_ast_t *ast)
|
||||
{
|
||||
return ast->node_buffer->tokens;
|
||||
}
|
||||
|
||||
void
|
||||
rb_ast_set_tokens(rb_ast_t *ast, VALUE tokens)
|
||||
{
|
||||
RB_OBJ_WRITE(ast, &ast->node_buffer->tokens, tokens);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue