Use Lrama LALR parser generator instead of Bison

https://bugs.ruby-lang.org/issues/19637

Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
This commit is contained in:
Yuichiro Kaneko 2023-05-12 18:25:10 +09:00 committed by GitHub
parent d314fe42f9
commit a1b01e7701
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
Notes: git 2023-05-12 09:25:38 +00:00
Merged: https://github.com/ruby/ruby/pull/7798

Merged-By: yui-knk <spiketeika@gmail.com>
38 changed files with 5749 additions and 30 deletions

View file

@ -56,7 +56,7 @@ jobs:
ruby-version: ${{ matrix.ruby }}
bundler: none
- run: echo "GNUMAKEFLAGS=-j$((1 + $(nproc --all)))" >> $GITHUB_ENV
- run: sudo apt-get install build-essential autoconf bison libyaml-dev
- run: sudo apt-get install build-essential autoconf libyaml-dev
- run: ./autogen.sh
- run: ./configure --disable-install-doc
- run: make common-srcs

View file

@ -76,7 +76,7 @@ jobs:
run: |
set -x
sudo apt-get update -q || :
sudo apt-get install --no-install-recommends -q -y build-essential libssl-dev libyaml-dev libreadline6-dev zlib1g-dev libncurses5-dev libffi-dev bison autoconf ruby
sudo apt-get install --no-install-recommends -q -y build-essential libssl-dev libyaml-dev libreadline6-dev zlib1g-dev libncurses5-dev libffi-dev autoconf ruby
if: ${{ steps.diff.outcome == 'failure' }}
- name: Build

View file

@ -42,7 +42,7 @@ jobs:
run: |
set -x
sudo apt-get update -q || :
sudo apt-get install --no-install-recommends -q -y build-essential libssl-dev libyaml-dev libreadline6-dev zlib1g-dev libncurses5-dev libffi-dev bison autoconf ruby
sudo apt-get install --no-install-recommends -q -y build-essential libssl-dev libyaml-dev libreadline6-dev zlib1g-dev libncurses5-dev libffi-dev autoconf ruby
if: ${{ contains(matrix.os, 'ubuntu') }}
- name: Install libraries
run: |

View file

@ -51,7 +51,7 @@ jobs:
run: |
set -x
sudo apt-get update -q || :
sudo apt-get install --no-install-recommends -q -y build-essential libssl-dev libyaml-dev libreadline6-dev zlib1g-dev libncurses5-dev libffi-dev bison autoconf ruby
sudo apt-get install --no-install-recommends -q -y build-essential libssl-dev libyaml-dev libreadline6-dev zlib1g-dev libncurses5-dev libffi-dev autoconf ruby
- name: Checkout repository
uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2

View file

@ -59,12 +59,11 @@ jobs:
- name: Install libraries
run: |
brew upgrade
brew install gmp libffi openssl@1.1 zlib autoconf automake libtool readline bison
brew install gmp libffi openssl@1.1 zlib autoconf automake libtool readline
working-directory: src
- name: Set ENV
run: |
echo "MAKEFLAGS=-j$((1 + $(sysctl -n hw.activecpu)))" >> $GITHUB_ENV
echo "PATH="/usr/local/opt/bison/bin:$PATH"" >> $GITHUB_ENV
for lib in openssl@1.1 readline; do
CONFIGURE_ARGS="${CONFIGURE_ARGS:+$CONFIGURE_ARGS }--with-${lib%@*}-dir=$(brew --prefix $lib)"
done

View file

@ -85,7 +85,7 @@ jobs:
mv /c/Windows/System32/libcrypto-1_1-x64.dll /c/Windows/System32/libcrypto-1_1-x64.dll_
mv /c/Windows/System32/libssl-1_1-x64.dll /c/Windows/System32/libssl-1_1-x64.dll_
result=true
for e in gcc.exe ragel.exe make.exe bison.exe libcrypto-1_1-x64.dll libssl-1_1-x64.dll; do
for e in gcc.exe ragel.exe make.exe libcrypto-1_1-x64.dll libssl-1_1-x64.dll; do
echo '##['group']'$'\033[93m'$e$'\033[m'
where $e || result=false
echo '##['endgroup']'
@ -96,7 +96,7 @@ jobs:
run: |
# show version
result=true
for e in gcc ragel make bison "openssl version"; do
for e in gcc ragel make "openssl version"; do
case "$e" in *" "*) ;; *) e="$e --version";; esac
echo '##['group']'$'\033[93m'$e$'\033[m'
$e || result=false

View file

@ -53,7 +53,7 @@ jobs:
libssl-dev libyaml-dev libreadline6-dev \
zlib1g-dev libncurses5-dev libffi-dev \
libclang1-10 \
bison autoconf
autoconf
sudo apt-get install -q -y pkg-config || :
- name: Set up Ruby
uses: ruby/setup-ruby@d2b39ad0b52eca07d23f3aa14fdf2a3fcc1f411c # v1.148.0

View file

@ -65,7 +65,7 @@ jobs:
${arch:+cross}build-essential${arch/:/-} \
libssl-dev${arch} libyaml-dev${arch} libreadline6-dev${arch} \
zlib1g-dev${arch} libncurses5-dev${arch} libffi-dev${arch} \
bison autoconf ruby libcapstone-dev
autoconf ruby libcapstone-dev
sudo apt-get install -q -y pkg-config${arch} || :
- name: git config
run: |

View file

@ -70,7 +70,7 @@ jobs:
${arch:+cross}build-essential${arch/:/-} \
libssl-dev${arch} libyaml-dev${arch} libreadline6-dev${arch} \
zlib1g-dev${arch} libncurses5-dev${arch} libffi-dev${arch} \
bison autoconf ruby
autoconf ruby
sudo apt-get install -q -y pkg-config${arch} || :
- name: git config
run: |

View file

@ -66,7 +66,7 @@ jobs:
run: |
set -ex
sudo apt-get update -q || :
sudo apt-get install --no-install-recommends -q -y ruby bison make autoconf git wget
sudo apt-get install --no-install-recommends -q -y ruby make autoconf git wget
wasi_sdk_deb="wasi-sdk_${WASI_SDK_VERSION_MAJOR}.${WASI_SDK_VERSION_MINOR}_amd64.deb"
wget "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VERSION_MAJOR}/${wasi_sdk_deb}"

View file

@ -79,7 +79,6 @@ jobs:
run: |
iex "& {$(irm get.scoop.sh)} -RunAsAdmin"
Join-Path (Resolve-Path ~).Path "scoop\shims" >> $Env:GITHUB_PATH
scoop install winflexbison
shell: pwsh
- name: git config
run: |

View file

@ -103,7 +103,7 @@ jobs:
run: |
set -x
sudo apt-get update -q || :
sudo apt-get install --no-install-recommends -q -y build-essential libssl-dev libyaml-dev libreadline6-dev zlib1g-dev libncurses5-dev libffi-dev bison autoconf ruby
sudo apt-get install --no-install-recommends -q -y build-essential libssl-dev libyaml-dev libreadline6-dev zlib1g-dev libncurses5-dev libffi-dev autoconf ruby
- name: Install Rust
if: ${{ matrix.rust_version }}
run: rustup install ${{ matrix.rust_version }} --profile minimal

View file

@ -298,7 +298,8 @@ configure-ext: $(EXTS_MK)
build-ext: $(EXTS_MK)
$(Q)$(MAKE) -f $(EXTS_MK) $(mflags) libdir="$(libdir)" LIBRUBY_EXTS=$(LIBRUBY_EXTS) \
EXTENCS="$(ENCOBJS)" MINIRUBY="$(MINIRUBY)" UPDATE_LIBRARIES=no $(EXTSTATIC)
EXTENCS="$(ENCOBJS)" BASERUBY="$(BASERUBY)" MINIRUBY="$(MINIRUBY)" \
UPDATE_LIBRARIES=no $(EXTSTATIC)
$(Q)$(MAKE) $(EXTS_NOTE)
exts-note: $(EXTS_MK)
@ -1208,8 +1209,8 @@ $(srcdir)/ext/ripper/ripper.c: $(srcdir)/ext/ripper/tools/preproc.rb $(srcdir)/p
$(Q) $(CHDIR) $(@D) && \
$(CAT_DEPEND) depend | \
$(exec) $(MAKE) -f - $(mflags) \
Q=$(Q) ECHO=$(ECHO) RM="$(RM1)" BISON="$(YACC)" top_srcdir=../.. srcdir=. VPATH=../.. \
RUBY="$(BASERUBY)" PATH_SEPARATOR="$(PATH_SEPARATOR)" LANG=C
Q=$(Q) ECHO=$(ECHO) RM="$(RM1)" top_srcdir=../.. srcdir=. VPATH=../.. \
RUBY="$(BASERUBY)" BASERUBY="$(BASERUBY)" PATH_SEPARATOR="$(PATH_SEPARATOR)" LANG=C
$(srcdir)/ext/json/parser/parser.c: $(srcdir)/ext/json/parser/parser.rl $(srcdir)/ext/json/parser/prereq.mk
$(ECHO) generating $@

View file

@ -1,6 +1,7 @@
# -*- mode: makefile-gmake; indent-tabs-mode: t -*-
reconfig config.status: export MAKE:=$(MAKE)
export BASERUBY:=$(BASERUBY)
override gnumake_recursive := $(if $(findstring n,$(firstword $(MFLAGS))),,+)
override mflags := $(filter-out -j%,$(MFLAGS))
MSPECOPT += $(if $(filter -j%,$(MFLAGS)),-j)

View file

@ -15,7 +15,6 @@
If you want to build from the git repository, you will also need:
* autoconf - 2.67 or later
* bison - 3.0 or later
* gperf - 3.1 or later
* Usually unneeded; only if you edit some source files using gperf
* ruby - 2.5 or later

View file

@ -19,7 +19,7 @@ Ruby core development can be done either in Windows `cmd` like:
```
ridk enable ucrt64
pacman -S --needed bison %MINGW_PACKAGE_PREFIX%-openssl %MINGW_PACKAGE_PREFIX%-libyaml
pacman -S --needed %MINGW_PACKAGE_PREFIX%-openssl %MINGW_PACKAGE_PREFIX%-libyaml
cd c:\
mkdir work
@ -38,7 +38,7 @@ or in MSYS2 `bash` like:
ridk enable ucrt64
bash
pacman -S --needed bison $MINGW_PACKAGE_PREFIX-openssl $MINGW_PACKAGE_PREFIX-libyaml
pacman -S --needed $MINGW_PACKAGE_PREFIX-openssl $MINGW_PACKAGE_PREFIX-libyaml
cd /c/
mkdir work
@ -78,7 +78,6 @@ make
* dumpbin
4. If you want to build from GIT source, following commands are required.
* bison
* patch
* sed
* ruby 2.0 or later
@ -86,7 +85,7 @@ make
You can use [scoop](https://scoop.sh/) to install them like:
```
scoop install git ruby winflexbison sed patch
scoop install git ruby sed patch
```
5. You need to install required libraries using [vcpkg](https://vcpkg.io/) like:

View file

@ -1,6 +1,7 @@
GEN = $(srcdir)/tools/generate.rb
SRC1 = $(top_srcdir)/parse.y
SRC2 = $(srcdir)/eventids2.c
BISON = $(BASERUBY) $(top_srcdir)/tool/lrama/exe/lrama
.SUFFIXES: .y
@ -10,6 +11,7 @@ ripper.o: ripper.c
.y.c:
$(ECHO) compiling compiler $<
$(ECHO) $(BISON)
$(Q) $(BISON) -t -v -oy.tab.c $<
$(Q) sed -e "/^#/s!y\.tab\.c!$@!" -f $(top_srcdir)/tool/ytab.sed y.tab.c > $@
@$(RM) y.tab.c

View file

@ -7,11 +7,11 @@ require 'rbconfig'
def main
yacc = ENV["YACC"] || "bison"
unless find_executable(yacc)
unless File.exist?('ripper.c') or File.exist?("#{$srcdir}/ripper.c")
raise 'missing bison; abort'
end
end
# unless find_executable(yacc)
# unless File.exist?('ripper.c') or File.exist?("#{$srcdir}/ripper.c")
# raise 'missing bison; abort'
# end
# end
$objs = %w(ripper.o)
$distcleanfiles.concat %w(ripper.y ripper.c eventids1.c eventids2table.c)
$cleanfiles.concat %w(ripper.E ripper.output y.output .eventids2-check)

View file

@ -29,7 +29,7 @@ CPP = @CPP@
LD = @LD@
RUSTC = @RUSTC@
CARGO = @CARGO@
YACC = bison
YACC = $(BASERUBY) $(tooldir)/lrama/exe/lrama
PURIFY =
AUTOCONF = autoconf
CONFIGURE = @CONFIGURE@

7
tool/lrama/exe/lrama Executable file
View file

@ -0,0 +1,7 @@
#!/usr/bin/env ruby
$LOAD_PATH << File.join(__dir__, "../lib")
require "lrama"
Lrama::Command.new.run(ARGV.dup)

13
tool/lrama/lib/lrama.rb Normal file
View file

@ -0,0 +1,13 @@
require "lrama/bitmap"
require "lrama/command"
require "lrama/context"
require "lrama/digraph"
require "lrama/grammar"
require "lrama/lexer"
require "lrama/output"
require "lrama/parser"
require "lrama/report"
require "lrama/states"
require "lrama/states_reporter"
require "lrama/version"
require "lrama/warning"

View file

@ -0,0 +1,29 @@
module Lrama
module Bitmap
def self.from_array(ary)
bit = 0
ary.each do |int|
bit |= (1 << int)
end
bit
end
def self.to_array(int)
a = []
i = 0
while int > 0 do
if int & 1 == 1
a << i
end
i += 1
int >>= 1
end
a
end
end
end

View file

@ -0,0 +1,144 @@
require 'optparse'
module Lrama
class Command
def run(argv)
opt = OptionParser.new
# opt.on('-h') {|v| p v }
opt.on('-V', '--version') {|v| puts Lrama::VERSION ; exit 0 }
# Tuning the Parser
skeleton = "bison/yacc.c"
opt.on('-S', '--skeleton=FILE') {|v| skeleton = v }
opt.on('-t') { } # Do nothing
# Output Files:
header = false
header_file = nil
report = []
report_file = nil
outfile = "y.tab.c"
opt.on('-h', '--header=[FILE]') {|v| header = true; header_file = v }
opt.on('-d') { header = true }
opt.on('-r', '--report=THINGS') {|v| report = v.split(',') }
opt.on('--report-file=FILE') {|v| report_file = v }
opt.on('-v') { } # Do nothing
opt.on('-o', '--output=FILE') {|v| outfile = v }
# Hidden
trace = []
opt.on('--trace=THINGS') {|v| trace = v.split(',') }
# Error Recovery
error_recovery = false
opt.on('-e') {|v| error_recovery = true }
opt.parse!(argv)
trace_opts = validate_trace(trace)
report_opts = validate_report(report)
grammar_file = argv.shift
if !report.empty? && report_file.nil? && grammar_file
report_file = File.dirname(grammar_file) + "/" + File.basename(grammar_file, ".*") + ".output"
end
if !header_file && header
case
when outfile
header_file = File.dirname(outfile) + "/" + File.basename(outfile, ".*") + ".h"
when grammar_file
header_file = File.dirname(grammar_file) + "/" + File.basename(grammar_file, ".*") + ".h"
end
end
if !grammar_file
puts "File should be specified\n"
exit 1
end
Report::Duration.enable if trace_opts[:time]
warning = Lrama::Warning.new
y = File.read(grammar_file)
grammar = Lrama::Parser.new(y).parse
states = Lrama::States.new(grammar, warning, trace_state: (trace_opts[:automaton] || trace_opts[:closure]))
states.compute
context = Lrama::Context.new(states)
if report_file
reporter = Lrama::StatesReporter.new(states)
File.open(report_file, "w+") do |f|
reporter.report(f, **report_opts)
end
end
File.open(outfile, "w+") do |f|
Lrama::Output.new(
out: f,
output_file_path: outfile,
template_name: skeleton,
grammar_file_path: grammar_file,
header_file_path: header_file,
context: context,
grammar: grammar,
).render
end
if warning.has_error?
exit 1
end
end
private
def validate_report(report)
bison_list = %w[states itemsets lookaheads solved counterexamples cex all none]
others = %w[verbose]
list = bison_list + others
not_supported = %w[counterexamples cex none]
h = { grammar: true }
report.each do |r|
if list.include?(r) && !not_supported.include?(r)
h[r.to_sym] = true
else
raise "Invalid report option \"#{r}\"."
end
end
if h[:all]
(bison_list - not_supported).each do |r|
h[r.to_sym] = true
end
h.delete(:all)
end
return h
end
def validate_trace(trace)
list = %w[
none locations scan parse automaton bitsets
closure grammar resource sets muscles tools
m4-early m4 skeleton time ielr cex all
]
h = {}
trace.each do |t|
if list.include?(t)
h[t.to_sym] = true
else
raise "Invalid trace option \"#{t}\"."
end
end
return h
end
end
end

View file

@ -0,0 +1,511 @@
require "lrama/report"
module Lrama
# This is passed to a template
class Context
include Report::Duration
ErrorActionNumber = -Float::INFINITY
BaseMin = -Float::INFINITY
# TODO: It might be better to pass `states` to Output directly?
attr_reader :states
def initialize(states)
@states = states
@yydefact = nil
@yydefgoto = nil
# Array of array
@_actions = []
compute_tables
end
# enum yytokentype
def yytokentype
@states.terms.reject do |term|
0 < term.token_id && term.token_id < 128
end.map do |term|
[term.id.s_value, term.token_id, term.display_name]
end.unshift(["YYEMPTY", -2, nil])
end
# enum yysymbol_kind_t
def yysymbol_kind_t
@states.symbols.map do |sym|
[sym.enum_name, sym.number, sym.comment]
end.unshift(["YYSYMBOL_YYEMPTY", -2, nil])
end
# State number of final (accepted) state
def yyfinal
@states.states.find do |state|
state.items.find do |item|
item.rule.lhs.id.s_value == "$accept" && item.end_of_rule?
end
end.id
end
def yylast
@yylast
end
# Number of terms
def yyntokens
@states.terms.count
end
# Number of nterms
def yynnts
@states.nterms.count
end
# Number of rules
def yynrules
@states.rules.count
end
# Number of states
def yynstates
@states.states.count
end
# Last token number
def yymaxutok
@states.terms.map(&:token_id).max
end
# YYTRANSLATE
#
# yytranslate is a mapping from token id to symbol number
def yytranslate
# 2 is YYSYMBOL_YYUNDEF
a = Array.new(yymaxutok, 2)
@states.terms.each do |term|
a[term.token_id] = term.number
end
return a
end
# Mapping from rule number to line number of the rule is defined.
# Dummy rule is appended as the first element whose value is 0
# because 0 means error in yydefact.
def yyrline
a = [0]
@states.rules.each do |rule|
a << rule.lineno
end
return a
end
# Mapping from symbol number to its name
def yytname
@states.symbols.sort_by(&:number).map do |sym|
sym.display_name
end
end
def yypact_ninf
@yypact_ninf
end
def yytable_ninf
@yytable_ninf
end
def yypact
@base[0...yynstates]
end
def yydefact
@yydefact
end
def yypgoto
@base[yynstates..-1]
end
def yydefgoto
@yydefgoto
end
def yytable
@table
end
def yycheck
@check
end
def yystos
@states.states.map do |state|
state.accessing_symbol.number
end
end
# Mapping from rule number to symbol number of LHS.
# Dummy rule is appended as the first element whose value is 0
# because 0 means error in yydefact.
def yyr1
a = [0]
@states.rules.each do |rule|
a << rule.lhs.number
end
return a
end
# Mapping from rule number to lenght of RHS.
# Dummy rule is appended as the first element whose value is 0
# because 0 means error in yydefact.
def yyr2
a = [0]
@states.rules.each do |rule|
a << rule.rhs.count
end
return a
end
private
# Compute these
#
# See also: "src/tables.c" of Bison.
#
# * yydefact
# * yydefgoto
# * yypact and yypgoto
# * yytable
# * yycheck
# * yypact_ninf
# * yytable_ninf
def compute_tables
report_duration(:compute_yydefact) { compute_yydefact }
report_duration(:compute_yydefgoto) { compute_yydefgoto }
report_duration(:sort_actions) { sort_actions }
# debug_sorted_actions
report_duration(:compute_packed_table) { compute_packed_table }
end
def vectors_count
@states.states.count + @states.nterms.count
end
# In compressed table, rule 0 is appended as an error case
# and reduce is represented as minus number.
def rule_id_to_action_number(rule_id)
(rule_id + 1) * -1
end
# Symbol number is assinged to term first then nterm.
# This method calculates sequence_number for nterm.
def nterm_number_to_sequence_number(nterm_number)
nterm_number - @states.terms.count
end
# Vector is states + nterms
def nterm_number_to_vector_number(nterm_number)
@states.states.count + (nterm_number - @states.terms.count)
end
def compute_yydefact
# Default action (shift/reduce/error) for each state.
# Index is state id, value is `rule id + 1` of a default reduction.
@yydefact = Array.new(@states.states.count, 0)
@states.states.each do |state|
# Action number means
#
# * number = 0, default action
# * number = -Float::INFINITY, error by %nonassoc
# * number > 0, shift then move to state "number"
# * number < 0, reduce by "-number" rule. Rule "number" is already added by 1.
actions = Array.new(@states.terms.count, 0)
if state.reduces.map(&:selected_look_ahead).any? {|la| !la.empty? }
# Iterate reduces with reverse order so that first rule is used.
state.reduces.reverse.each do |reduce|
reduce.look_ahead.each do |term|
actions[term.number] = rule_id_to_action_number(reduce.rule.id)
end
end
end
# Shift is selected when S/R conflict exists.
state.selected_term_transitions.each do |shift, next_state|
actions[shift.next_sym.number] = next_state.id
end
state.resolved_conflicts.select do |conflict|
conflict.which == :error
end.each do |conflict|
actions[conflict.symbol.number] = ErrorActionNumber
end
# If default_reduction_rule, replase default_reduction_rule in
# actions with zero.
if state.default_reduction_rule
actions.map! do |e|
if e == rule_id_to_action_number(state.default_reduction_rule.id)
0
else
e
end
end
end
# If no default_reduction_rule, default behavior is an
# error then replase ErrorActionNumber with zero.
if !state.default_reduction_rule
actions.map! do |e|
if e == ErrorActionNumber
0
else
e
end
end
end
s = actions.each_with_index.map do |n, i|
[i, n]
end.select do |i, n|
# Remove default_reduction_rule entries
n != 0
end
if s.count != 0
# Entry of @_actions is an array of
#
# * State id
# * Array of tuple, [from, to] where from is term number and to is action.
# * The number of "Array of tuple" used by sort_actions
# * "width" used by sort_actions
@_actions << [state.id, s, s.count, s.last[0] - s.first[0] + 1]
end
@yydefact[state.id] = state.default_reduction_rule ? state.default_reduction_rule.id + 1 : 0
end
end
def compute_yydefgoto
# Default GOTO (nterm transition) for each nterm.
# Index is sequence number of nterm, value is state id
# of a default nterm transition destination.
@yydefgoto = Array.new(@states.nterms.count, 0)
h = {}
# Mapping from nterm to next_states
nterm_to_next_states = {}
terms_count = @states.terms.count
@states.states.each do |state|
state.nterm_transitions.each do |shift, next_state|
key = shift.next_sym
nterm_to_next_states[key] ||= []
nterm_to_next_states[key] << [state, next_state] # [from_state, to_state]
end
end
@states.nterms.each do |nterm|
if !(states = nterm_to_next_states[nterm])
default_goto = 0
not_default_gotos = []
else
default_state = states.map(&:last).group_by {|s| s }.max_by {|_, v| v.count }.first
default_goto = default_state.id
not_default_gotos = []
states.each do |from_state, to_state|
next if to_state.id == default_goto
not_default_gotos << [from_state.id, to_state.id]
end
end
k = nterm_number_to_sequence_number(nterm.number)
@yydefgoto[k] = default_goto
if not_default_gotos.count != 0
v = nterm_number_to_vector_number(nterm.number)
# Entry of @_actions is an array of
#
# * Nterm number as vector number
# * Array of tuple, [from, to] where from is state number and to is state number.
# * The number of "Array of tuple" used by sort_actions
# * "width" used by sort_actions
@_actions << [v, not_default_gotos, not_default_gotos.count, not_default_gotos.last[0] - not_default_gotos.first[0] + 1]
end
end
end
def sort_actions
# This is not same with #sort_actions
#
# @sorted_actions = @_actions.sort_by do |_, _, count, width|
# [-width, -count]
# end
@sorted_actions = []
@_actions.each do |action|
if @sorted_actions.empty?
@sorted_actions << action
next
end
j = @sorted_actions.count - 1
state_id, froms_and_tos, count, width = action
while (j >= 0) do
case
when @sorted_actions[j][3] < width
j -= 1
when @sorted_actions[j][3] == width && @sorted_actions[j][2] < count
j -= 1
else
break
end
end
@sorted_actions.insert(j + 1, action)
end
end
def debug_sorted_actions
ary = Array.new
@sorted_actions.each do |state_id, froms_and_tos, count, width|
ary[state_id] = [state_id, froms_and_tos, count, width]
end
print sprintf("table_print:\n\n")
print sprintf("order [\n")
vectors_count.times do |i|
print sprintf("%d, ", @sorted_actions[i] ? @sorted_actions[i][0] : 0)
print "\n" if i % 10 == 9
end
print sprintf("]\n\n")
print sprintf("width [\n")
vectors_count.times do |i|
print sprintf("%d, ", ary[i] ? ary[i][3] : 0)
print "\n" if i % 10 == 9
end
print sprintf("]\n\n")
print sprintf("tally [\n")
vectors_count.times do |i|
print sprintf("%d, ", ary[i] ? ary[i][2] : 0)
print "\n" if i % 10 == 9
end
print sprintf("]\n\n")
end
def compute_packed_table
# yypact and yypgoto
@base = Array.new(vectors_count, BaseMin)
# yytable
@table = []
# yycheck
@check = []
# Key is froms_and_tos, value is index position
pushed = {}
userd_res = {}
lowzero = 0
high = 0
@sorted_actions.each do |state_id, froms_and_tos, _, _|
if (res = pushed[froms_and_tos])
@base[state_id] = res
next
end
res = lowzero - froms_and_tos.first[0]
while true do
ok = true
froms_and_tos.each do |from, to|
loc = res + from
if @table[loc]
# If the cell of table is set, can not use the cell.
ok = false
break
end
end
if ok && userd_res[res]
ok = false
end
if ok
break
else
res += 1
end
end
loc = 0
froms_and_tos.each do |from, to|
loc = res + from
@table[loc] = to
@check[loc] = from
end
while (@table[lowzero]) do
lowzero += 1
end
high = loc if high < loc
@base[state_id] = res
pushed[froms_and_tos] = res
userd_res[res] = true
end
@yylast = high
# replace_ninf
@yypact_ninf = (@base.select {|i| i != BaseMin } + [0]).min - 1
@base.map! do |i|
case i
when BaseMin
@yypact_ninf
else
i
end
end
@yytable_ninf = (@table.compact.select {|i| i != ErrorActionNumber } + [0]).min - 1
@table.map! do |i|
case i
when nil
0
when ErrorActionNumber
@yytable_ninf
else
i
end
end
@check.map! do |i|
case i
when nil
-1
else
i
end
end
end
end
end

View file

@ -0,0 +1,53 @@
module Lrama
# Algorithm Digraph of https://dl.acm.org/doi/pdf/10.1145/69622.357187 (P. 625)
class Digraph
def initialize(sets, relation, base_function)
# X in the paper
@sets = sets
# R in the paper
@relation = relation
# F' in the paper
@base_function = base_function
# S in the paper
@stack = []
# N in the paper
@h = Hash.new(0)
# F in the paper
@result = {}
end
def compute
@sets.each do |x|
next if @h[x] != 0
traverse(x)
end
return @result
end
private
def traverse(x)
@stack.push(x)
d = @stack.count
@h[x] = d
@result[x] = @base_function[x] # F x = F' x
@relation[x] && @relation[x].each do |y|
traverse(y) if @h[y] == 0
@h[x] = [@h[x], @h[y]].min
@result[x] |= @result[y] # F x = F x + F y
end
if @h[x] == d
while true do
z = @stack.pop
@h[z] = Float::INFINITY
@result[z] = @result[x] # F (Top of S) = F x
break if z == x
end
end
end
end
end

View file

@ -0,0 +1,850 @@
require "forwardable"
require "lrama/lexer"
module Lrama
Rule = Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true) do
# TODO: Change this to display_name
def to_s
l = lhs.id.s_value
r = rhs.empty? ? "ε" : rhs.map {|r| r.id.s_value }.join(", ")
"#{l} -> #{r}"
end
# Used by #user_actions
def as_comment
l = lhs.id.s_value
r = rhs.empty? ? "%empty" : rhs.map {|r| r.display_name }.join(" ")
"#{l}: #{r}"
end
def precedence
precedence_sym && precedence_sym.precedence
end
def initial_rule?
id == 0
end
def translated_code
if code
code.translated_code
else
nil
end
end
end
# Symbol is both of nterm and term
# `number` is both for nterm and term
# `token_id` is tokentype for term, internal sequence number for nterm
#
# TODO: Add validation for ASCII code range for Token::Char
Symbol = Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, keyword_init: true) do
attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
def term?
term
end
def nterm?
!term
end
def eof_symbol?
!!@eof_symbol
end
def error_symbol?
!!@error_symbol
end
def undef_symbol?
!!@undef_symbol
end
def accept_symbol?
!!@accept_symbol
end
def display_name
if alias_name
alias_name
else
id.s_value
end
end
# name for yysymbol_kind_t
#
# See: b4_symbol_kind_base
def enum_name
case
when accept_symbol?
name = "YYACCEPT"
when eof_symbol?
name = "YYEOF"
when term? && id.type == Token::Char
if alias_name
name = number.to_s + alias_name
else
name = number.to_s + id.s_value
end
when term? && id.type == Token::Ident
name = id.s_value
when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
name = number.to_s + id.s_value
when nterm?
name = id.s_value
else
raise "Unexpected #{self}"
end
"YYSYMBOL_" + name.gsub(/[^a-zA-Z_0-9]+/, "_")
end
# comment for yysymbol_kind_t
def comment
case
when accept_symbol?
# YYSYMBOL_YYACCEPT
id.s_value
when eof_symbol?
# YYEOF
alias_name
when (term? && 0 < token_id && token_id < 128)
# YYSYMBOL_3_backslash_, YYSYMBOL_14_
alias_name || id.s_value
when id.s_value.include?("$") || id.s_value.include?("@")
# YYSYMBOL_21_1
id.s_value
else
# YYSYMBOL_keyword_class, YYSYMBOL_strings_1
alias_name || id.s_value
end
end
end
Type = Struct.new(:id, :tag, keyword_init: true)
Code = Struct.new(:type, :token_code, keyword_init: true) do
extend Forwardable
def_delegators "token_code", :s_value, :line, :column, :references
# $$, $n, @$, @n is translated to C code
def translated_code
case type
when :user_code
translated_user_code
when :initial_action
translated_initial_action_code
end
end
# * ($1) error
# * ($$) *yyvaluep
# * (@1) error
# * (@$) *yylocationp
def translated_printer_code(tag)
t_code = s_value.dup
references.reverse.each do |ref|
first_column = ref.first_column
last_column = ref.last_column
case
when ref.number == "$" && ref.type == :dollar # $$
# Omit "<>"
member = tag.s_value[1..-2]
str = "((*yyvaluep).#{member})"
when ref.number == "$" && ref.type == :at # @$
str = "(*yylocationp)"
when ref.type == :dollar # $n
raise "$#{ref.number} can not be used in %printer."
when ref.type == :at # @n
raise "@#{ref.number} can not be used in %printer."
else
raise "Unexpected. #{code}, #{ref}"
end
t_code[first_column..last_column] = str
end
return t_code
end
private
# * ($1) yyvsp[i]
# * ($$) yyval
# * (@1) yylsp[i]
# * (@$) yyloc
def translated_user_code
t_code = s_value.dup
references.reverse.each do |ref|
first_column = ref.first_column
last_column = ref.last_column
case
when ref.number == "$" && ref.type == :dollar # $$
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyval.#{member})"
when ref.number == "$" && ref.type == :at # @$
str = "(yyloc)"
when ref.type == :dollar # $n
i = -ref.position_in_rhs + ref.number
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyvsp[#{i}].#{member})"
when ref.type == :at # @n
i = -ref.position_in_rhs + ref.number
str = "(yylsp[#{i}])"
else
raise "Unexpected. #{code}, #{ref}"
end
t_code[first_column..last_column] = str
end
return t_code
end
# * ($1) error
# * ($$) yylval
# * (@1) error
# * (@$) yylloc
def translated_initial_action_code
t_code = s_value.dup
references.reverse.each do |ref|
first_column = ref.first_column
last_column = ref.last_column
case
when ref.number == "$" && ref.type == :dollar # $$
str = "yylval"
when ref.number == "$" && ref.type == :at # @$
str = "yylloc"
when ref.type == :dollar # $n
raise "$#{ref.number} can not be used in initial_action."
when ref.type == :at # @n
raise "@#{ref.number} can not be used in initial_action."
else
raise "Unexpected. #{code}, #{ref}"
end
t_code[first_column..last_column] = str
end
return t_code
end
end
# type: :dollar or :at
# ex_tag: "$<tag>1" (Optional)
Reference = Struct.new(:type, :number, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
def tag
if ex_tag
ex_tag
else
referring_symbol.tag
end
end
end
Precedence = Struct.new(:type, :precedence, keyword_init: true) do
include Comparable
def <=>(other)
self.precedence <=> other.precedence
end
end
Printer = Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true) do
def translated_code(member)
code.translated_printer_code(member)
end
end
Union = Struct.new(:code, :lineno, keyword_init: true) do
def braces_less_code
# Remove braces
code.s_value[1..-2]
end
end
Token = Lrama::Lexer::Token
# Grammar is the result of parsing an input grammar file
class Grammar
# Grammar file information not used by States but by Output
Aux = Struct.new(:prologue_first_lineno, :prologue, :epilogue_first_lineno, :epilogue, keyword_init: true)
attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
attr_accessor :union, :expect,
:printers,
:lex_param, :parse_param, :initial_action,
:symbols, :types,
:rules, :_rules,
:sym_to_rules
def initialize
@printers = []
@symbols = []
@types = []
@_rules = []
@rules = []
@sym_to_rules = {}
@empty_symbol = nil
@eof_symbol = nil
@error_symbol = nil
@undef_symbol = nil
@accept_symbol = nil
@aux = Aux.new
append_special_symbols
end
def add_printer(ident_or_tags:, code:, lineno:)
@printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
end
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
if replace
sym.id = id
sym.alias_name = alias_name
sym.tag = tag
end
return sym
end
if sym = @symbols.find {|s| s.id == id }
return sym
end
sym = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: true, token_id: token_id, nullable: false
)
@symbols << sym
@terms = nil
return sym
end
def add_nterm(id:, alias_name: nil, tag: nil)
return if @symbols.find {|s| s.id == id }
sym = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: false, token_id: nil, nullable: nil,
)
@symbols << sym
@nterms = nil
return sym
end
def add_type(id:, tag:)
@types << Type.new(id: id, tag: tag)
end
def add_nonassoc(sym, precedence)
set_precedence(sym, Precedence.new(type: :nonassoc, precedence: precedence))
end
def add_left(sym, precedence)
set_precedence(sym, Precedence.new(type: :left, precedence: precedence))
end
def add_right(sym, precedence)
set_precedence(sym, Precedence.new(type: :right, precedence: precedence))
end
def set_precedence(sym, precedence)
raise "" if sym.nterm?
sym.precedence = precedence
end
def set_union(code, lineno)
@union = Union.new(code: code, lineno: lineno)
end
def add_rule(lhs:, rhs:, lineno:)
@_rules << [lhs, rhs, lineno]
end
def build_references(token_code)
token_code.references.map! do |type, number, tag, first_column, last_column|
Reference.new(type: type, number: number, ex_tag: tag, first_column: first_column, last_column: last_column)
end
token_code
end
def build_code(type, token_code)
build_references(token_code)
Code.new(type: type, token_code: token_code)
end
def prologue_first_lineno=(prologue_first_lineno)
@aux.prologue_first_lineno = prologue_first_lineno
end
def prologue=(prologue)
@aux.prologue = prologue
end
def epilogue_first_lineno=(epilogue_first_lineno)
@aux.epilogue_first_lineno = epilogue_first_lineno
end
def epilogue=(epilogue)
@aux.epilogue = epilogue
end
def prepare
normalize_rules
collect_symbols
replace_token_with_symbol
fill_symbol_number
fill_default_precedence
fill_sym_to_rules
fill_nterm_type
fill_symbol_printer
@symbols.sort_by!(&:number)
end
# TODO: More validation methods
def validate!
validate_symbol_number_uniqueness!
end
def compute_nullable
@rules.each do |rule|
case
when rule.rhs.empty?
rule.nullable = true
when rule.rhs.any?(&:term)
rule.nullable = false
else
# noop
end
end
while true do
rs = @rules.select {|e| e.nullable.nil? }
nts = nterms.select {|e| e.nullable.nil? }
rule_count_1 = rs.count
nterm_count_1 = nts.count
rs.each do |rule|
if rule.rhs.all?(&:nullable)
rule.nullable = true
end
end
nts.each do |nterm|
find_rules_by_symbol!(nterm).each do |rule|
if rule.nullable
nterm.nullable = true
end
end
end
rule_count_2 = @rules.count {|e| e.nullable.nil? }
nterm_count_2 = nterms.count {|e| e.nullable.nil? }
if (rule_count_1 == rule_count_2) && (nterm_count_1 == nterm_count_2)
break
end
end
rules.select {|r| r.nullable.nil? }.each do |rule|
rule.nullable = false
end
nterms.select {|r| r.nullable.nil? }.each do |nterm|
nterm.nullable = false
end
end
def find_symbol_by_s_value(s_value)
@symbols.find do |sym|
sym.id.s_value == s_value
end
end
def find_symbol_by_s_value!(s_value)
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
end
def find_symbol_by_id(id)
@symbols.find do |sym|
# TODO: validate uniqueness of Token#s_value and Symbol#alias_name
sym.id == id || sym.alias_name == id.s_value
end
end
def find_symbol_by_id!(id)
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
end
def find_symbol_by_number!(number)
sym = @symbols[number]
raise "Symbol not found: #{number}" unless sym
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
sym
end
def find_rules_by_symbol!(sym)
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
end
def find_rules_by_symbol(sym)
@sym_to_rules[sym.number]
end
def terms_count
terms.count
end
def terms
@terms ||= @symbols.select(&:term?)
end
def nterms_count
nterms.count
end
def nterms
@nterms ||= @symbols.select(&:nterm?)
end
private
def find_nterm_by_id!(id)
nterms.find do |nterm|
nterm.id == id
end || (raise "Nterm not found: #{id}")
end
def append_special_symbols
# YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
# term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
# term.number = -2
# @empty_symbol = term
# YYEOF
term = add_term(id: Token.new(type: Token::Ident, s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
term.number = 0
term.eof_symbol = true
@eof_symbol = term
# YYerror
term = add_term(id: Token.new(type: Token::Ident, s_value: "YYerror"), alias_name: "error")
term.number = 1
term.error_symbol = true
@error_symbol = term
# YYUNDEF
term = add_term(id: Token.new(type: Token::Ident, s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
term.number = 2
term.undef_symbol = true
@undef_symbol = term
# $accept
term = add_nterm(id: Token.new(type: Token::Ident, s_value: "$accept"))
term.accept_symbol = true
@accept_symbol = term
end
# 1. Add $accept rule to the top of rules
# 2. Extract precedence and last action
# 3. Extract action in the middle of RHS into new Empty rule
# 4. Append id and extract action then create Rule
#
# Bison 3.8.2 uses different orders for symbol number and rule number
# when a rule has actions in the middle of a rule.
#
# For example,
#
# `program: $@1 top_compstmt`
#
# Rules are ordered like below,
#
# 1 $@1: ε
# 2 program: $@1 top_compstmt
#
# Symbols are ordered like below,
#
# 164 program
# 165 $@1
#
def normalize_rules
# 1. Add $accept rule to the top of rules
accept = find_symbol_by_s_value!("$accept")
eof = find_symbol_by_number!(0)
lineno = @_rules.first ? @_rules.first[2] : 0
@rules << Rule.new(id: @rules.count, lhs: accept, rhs: [@_rules.first[0], eof], code: nil, lineno: lineno)
extracted_action_number = 1 # @n as nterm
@_rules.each do |lhs, rhs, lineno|
a = []
rhs1 = []
code = nil
precedence_sym = nil
# 2. Extract precedence and last action
rhs.reverse.each do |r|
case
when r.is_a?(Symbol) # precedence_sym
precedence_sym = r
when (r.type == Token::User_code) && precedence_sym.nil? && code.nil? && rhs1.empty?
code = r
else
rhs1 << r
end
end
rhs1.reverse!
# Bison n'th component is 1-origin
(rhs1 + [code]).compact.each.with_index(1) do |token, i|
if token.type == Token::User_code
token.references.each do |ref|
# Need to keep position_in_rhs for actions in the middle of RHS
ref.position_in_rhs = i - 1
next if ref.type == :at
# $$, $n, @$, @n can be used in any actions
number = ref.number
if number == "$"
# TODO: Should be postponed after middle actions are extracted?
ref.referring_symbol = lhs
else
raise "Can not refer following component. #{number} >= #{i}. #{token}" if number >= i
rhs1[number - 1].referred = true
ref.referring_symbol = rhs1[number - 1]
end
end
end
end
rhs2 = rhs1.map do |token|
if token.type == Token::User_code
prefix = token.referred ? "@" : "$@"
new_token = Token.new(type: Token::Ident, s_value: prefix + extracted_action_number.to_s)
extracted_action_number += 1
a << [new_token, token]
new_token
else
token
end
end
# Extract actions in the middle of RHS
# into new rules.
a.each do |new_token, code|
@rules << Rule.new(id: @rules.count, lhs: new_token, rhs: [], code: Code.new(type: :user_code, token_code: code), lineno: code.line)
end
c = code ? Code.new(type: :user_code, token_code: code) : nil
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
add_nterm(id: lhs)
a.each do |new_token, _|
add_nterm(id: new_token)
end
end
end
# Collect symbols from rules
def collect_symbols
@rules.flat_map(&:rhs).each do |s|
case s
when Token
if s.type == Token::Char
add_term(id: s)
end
when Symbol
# skip
else
raise "Unknown class: #{s}"
end
end
end
# Fill #number and #token_id
def fill_symbol_number
# TODO: why start from 256
token_id = 256
# YYEMPTY = -2
# YYEOF = 0
# YYerror = 1
# YYUNDEF = 2
number = 3
nterm_token_id = 0
used_numbers = {}
@symbols.map(&:number).each do |n|
used_numbers[n] = true
end
(@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
while used_numbers[number] do
number += 1
end
if sym.number.nil?
sym.number = number
number += 1
end
# If id is Token::Char, it uses ASCII code
if sym.term? && sym.token_id.nil?
if sym.id.type == Token::Char
# Igonre ' on the both sides
case sym.id.s_value[1..-2]
when "\\b"
sym.token_id = 8
when "\\f"
sym.token_id = 12
when "\\n"
sym.token_id = 10
when "\\r"
sym.token_id = 13
when "\\t"
sym.token_id = 9
when "\\v"
sym.token_id = 11
when "\""
sym.token_id = 34
when "\'"
sym.token_id = 39
when "\\\\"
sym.token_id = 92
when /\A\\(\d+)\z/
sym.token_id = Integer($1, 8)
when /\A(.)\z/
sym.token_id = $1.bytes.first
else
raise "Unknown Char s_value #{sym}"
end
else
sym.token_id = token_id
token_id += 1
end
end
if sym.nterm? && sym.token_id.nil?
sym.token_id = nterm_token_id
nterm_token_id += 1
end
end
end
def replace_token_with_symbol
@rules.each do |rule|
rule.lhs = token_to_symbol(rule.lhs)
rule.rhs.map! do |t|
token_to_symbol(t)
end
if rule.code
rule.code.references.each do |ref|
next if ref.type == :at
if ref.referring_symbol.type != Token::User_code
ref.referring_symbol = token_to_symbol(ref.referring_symbol)
end
end
end
end
end
def token_to_symbol(token)
case token
when Token
find_symbol_by_id!(token)
when Symbol
token
else
raise "Unknown class: #{token}"
end
end
# Rule inherits precedence from the last term in RHS.
#
# https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
def fill_default_precedence
@rules.each do |rule|
# Explicitly specified precedence has the highest priority
next if rule.precedence_sym
precedence_sym = nil
rule.rhs.each do |sym|
precedence_sym = sym if sym.term?
end
rule.precedence_sym = precedence_sym
end
end
def fill_sym_to_rules
@rules.each do |rule|
key = rule.lhs.number
@sym_to_rules[key] ||= []
@sym_to_rules[key] << rule
end
end
# Fill nterm's tag defined by %type decl
def fill_nterm_type
@types.each do |type|
nterm = find_nterm_by_id!(type.id)
nterm.tag = type.tag
end
end
def fill_symbol_printer
@symbols.each do |sym|
@printers.each do |printer|
printer.ident_or_tags.each do |ident_or_tag|
case ident_or_tag.type
when Token::Ident
sym.printer = printer if sym.id == ident_or_tag
when Token::Tag
sym.printer = printer if sym.tag == ident_or_tag
else
raise "Unknown token type. #{printer}"
end
end
end
end
end
def validate_symbol_number_uniqueness!
invalid = @symbols.group_by(&:number).select do |number, syms|
syms.count > 1
end
return if invalid.empty?
raise "Symbol number is dupulicated. #{invalid}"
end
end
end

View file

@ -0,0 +1,349 @@
require "strscan"
require "lrama/report"
module Lrama
# Lexer for parse.y
class Lexer
include Lrama::Report::Duration
# s_value is semantic value
Token = Struct.new(:type, :s_value, keyword_init: true) do
Type = Struct.new(:id, :name, keyword_init: true)
attr_accessor :line, :column, :referred
# For User_code
attr_accessor :references
def to_s
"#{super} line: #{line}, column: #{column}"
end
@i = 0
@types = []
def self.define_type(name)
type = Type.new(id: @i, name: name.to_s)
const_set(name, type)
@types << type
@i += 1
end
# Token types
define_type(:P_expect) # %expect
define_type(:P_define) # %define
define_type(:P_printer) # %printer
define_type(:P_lex_param) # %lex-param
define_type(:P_parse_param) # %parse-param
define_type(:P_initial_action) # %initial-action
define_type(:P_union) # %union
define_type(:P_token) # %token
define_type(:P_type) # %type
define_type(:P_nonassoc) # %nonassoc
define_type(:P_left) # %left
define_type(:P_right) # %right
define_type(:P_prec) # %prec
define_type(:User_code) # { ... }
define_type(:Tag) # <int>
define_type(:Number) # 0
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
define_type(:Ident) # api.pure, tNUMBER
define_type(:Semicolon) # ;
define_type(:Bar) # |
define_type(:String) # "str"
define_type(:Char) # '+'
end
# States
#
# See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
Initial = 0
Prologue = 1
BisonDeclarations = 2
GrammarRules = 3
Epilogue = 4
# Token types
attr_reader :prologue, :bison_declarations, :grammar_rules, :epilogue,
:bison_declarations_tokens, :grammar_rules_tokens
def initialize(text)
@text = text
@state = Initial
# Array of texts
@prologue = []
@bison_declarations = []
@grammar_rules = []
@epilogue = []
#
@bison_declarations_tokens = []
@grammar_rules_tokens = []
@debug = false
report_duration(:lex) do
lex_text
lex_bison_declarations_tokens
lex_grammar_rules_tokens
end
end
private
def create_token(type, s_value, line, column)
t = Token.new(type: type, s_value: s_value)
t.line = line
t.column = column
return t
end
# TODO: Remove this
def lex_text
@text.each_line.with_index(1) do |string, lineno|
case @state
when Initial
# Skip until "%{"
if string == "%{\n"
@state = Prologue
@prologue << ["", lineno]
next
end
when Prologue
# Between "%{" and "%}"
if string == "%}\n"
@state = BisonDeclarations
@prologue << ["", lineno]
next
end
@prologue << [string, lineno]
when BisonDeclarations
if string == "%%\n"
@state = GrammarRules
next
end
@bison_declarations << [string, lineno]
when GrammarRules
# Between "%%" and "%%"
if string == "%%\n"
@state = Epilogue
next
end
@grammar_rules << [string, lineno]
when Epilogue
@epilogue << [string, lineno]
else
raise "Unknown state: #{@state}"
end
end
end
# See:
# * https://www.gnu.org/software/bison/manual/html_node/Decl-Summary.html
# * https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
# * https://www.gnu.org/software/bison/manual/html_node/Empty-Rules.html
def lex_common(lines, tokens)
line = lines.first[1]
column = 0
ss = StringScanner.new(lines.map(&:first).join)
while !ss.eos? do
case
when ss.scan(/\n/)
line += 1
column = ss.pos
when ss.scan(/\s+/)
# skip
when ss.scan(/;/)
tokens << create_token(Token::Semicolon, ss[0], line, ss.pos - column)
when ss.scan(/\|/)
tokens << create_token(Token::Bar, ss[0], line, ss.pos - column)
when ss.scan(/(\d+)/)
tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
when ss.scan(/(<[a-zA-Z0-9_]+>)/)
tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
when ss.scan(/%expect/)
tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
when ss.scan(/%define/)
tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
when ss.scan(/%printer/)
tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
when ss.scan(/%lex-param/)
tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
when ss.scan(/%parse-param/)
tokens << create_token(Token::P_parse_param, ss[0], line, ss.pos - column)
when ss.scan(/%initial-action/)
tokens << create_token(Token::P_initial_action, ss[0], line, ss.pos - column)
when ss.scan(/%union/)
tokens << create_token(Token::P_union, ss[0], line, ss.pos - column)
when ss.scan(/%token/)
tokens << create_token(Token::P_token, ss[0], line, ss.pos - column)
when ss.scan(/%type/)
tokens << create_token(Token::P_type, ss[0], line, ss.pos - column)
when ss.scan(/%nonassoc/)
tokens << create_token(Token::P_nonassoc, ss[0], line, ss.pos - column)
when ss.scan(/%left/)
tokens << create_token(Token::P_left, ss[0], line, ss.pos - column)
when ss.scan(/%right/)
tokens << create_token(Token::P_right, ss[0], line, ss.pos - column)
when ss.scan(/%prec/)
tokens << create_token(Token::P_prec, ss[0], line, ss.pos - column)
when ss.scan(/{/)
token, line = lex_user_code(ss, line, ss.pos - column, lines)
tokens << token
when ss.scan(/"/)
string, line = lex_string(ss, "\"", line, lines)
token = create_token(Token::String, string, line, ss.pos - column)
tokens << token
when ss.scan(/\/\*/)
# TODO: Need to keep comment?
line = lex_comment(ss, line, lines, "")
when ss.scan(/'(.)'/)
tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
when ss.scan(/'\\(.)'/) # '\\', '\t'
tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
when ss.scan(/'\\(\d+)'/) # '\13'
tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
when ss.scan(/%empty/)
# skip
else
l = line - lines.first[1]
split = ss.string.split("\n")
col = ss.pos - split[0...l].join("\n").length
raise "Parse error (unknow token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})"
end
end
end
def lex_bison_declarations_tokens
lex_common(@bison_declarations, @bison_declarations_tokens)
end
def lex_user_code(ss, line, column, lines)
first_line = line
first_column = column
debug("Enter lex_user_code: #{line}")
brace_count = 1
str = "{"
# Array of [type, $n, tag, first column, last column]
# TODO: Is it better to keep string, like "$$", and use gsub?
references = []
while !ss.eos? do
case
when ss.scan(/\n/)
line += 1
when ss.scan(/"/)
string, line = lex_string(ss, "\"", line, lines)
str << string
next
when ss.scan(/'/)
string, line = lex_string(ss, "'", line, lines)
str << string
next
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
references << [:dollar, "$", tag, str.length, str.length + ss[0].length - 1]
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
when ss.scan(/@\$/) # @$
references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
when ss.scan(/@(\d)+/) # @1
references << [:at, Integer(ss[1]), nil, str.length, str.length + ss[0].length - 1]
when ss.scan(/{/)
brace_count += 1
when ss.scan(/}/)
brace_count -= 1
debug("Return lex_user_code: #{line}")
if brace_count == 0
str << ss[0]
user_code = Token.new(type: Token::User_code, s_value: str.freeze)
user_code.line = first_line
user_code.column = first_column
user_code.references = references
return [user_code, line]
end
when ss.scan(/\/\*/)
str << ss[0]
line = lex_comment(ss, line, lines, str)
else
# noop, just consume char
str << ss.getch
next
end
str << ss[0]
end
# Reach to end of input but brace does not match
l = line - lines.first[1]
raise "Parse error (brace mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
end
def lex_string(ss, terminator, line, lines)
debug("Enter lex_string: #{line}")
str = terminator.dup
while (c = ss.getch) do
str << c
case c
when "\n"
line += 1
when terminator
debug("Return lex_string: #{line}")
return [str, line]
else
# noop
end
end
# Reach to end of input but quote does not match
l = line - lines.first[1]
raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
end
# TODO: Need to handle // style comment
#
# /* */ style comment
def lex_comment(ss, line, lines, str)
while !ss.eos? do
case
when ss.scan(/\n/)
line += 1
when ss.scan(/\*\//)
return line
else
str << ss.getch
next
end
str << ss[0]
end
# Reach to end of input but quote does not match
l = line - lines.first[1]
raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
end
def lex_grammar_rules_tokens
lex_common(@grammar_rules, @grammar_rules_tokens)
end
def debug(msg)
return unless @debug
puts "#{msg}\n"
end
end
end

View file

@ -0,0 +1,370 @@
require "erb"
require "forwardable"
require "lrama/report"
module Lrama
class Output
extend Forwardable
include Report::Duration
attr_reader :grammar_file_path, :context, :grammar
def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
:yymaxutok, :yypact_ninf, :yytable_ninf
def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
def initialize(out:, output_file_path:, template_name:, grammar_file_path:, header_out: nil, header_file_path: nil, context:, grammar:)
@out = out
@output_file_path = output_file_path
@template_name = template_name
@grammar_file_path = grammar_file_path
@header_out = header_out
@header_file_path = header_file_path
@context = context
@grammar = grammar
end
if ERB.instance_method(:initialize).parameters.last.first == :key
def self.erb(input)
ERB.new(input, trim_mode: '-')
end
else
def self.erb(input)
ERB.new(input, nil, '-')
end
end
def eval_template(file, path)
erb = self.class.erb(File.read(file))
erb.filename = file
tmp = erb.result_with_hash(context: @context, output: self)
replace_special_variables(tmp, path)
end
def render
report_duration(:render) do
tmp = eval_template(template_file, @output_file_path)
@out << tmp
if @header_file_path
tmp = eval_template(header_template_file, @header_file_path)
if @header_out
@header_out << tmp
else
File.open(@header_file_path, "w+") do |f|
f << tmp
end
end
end
end
end
# A part of b4_token_enums
def token_enums
str = ""
@context.yytokentype.each do |s_value, token_id, display_name|
s = sprintf("%s = %d%s", s_value, token_id, token_id == yymaxutok ? "" : ",")
if display_name
str << sprintf(" %-30s /* %s */\n", s, display_name)
else
str << sprintf(" %s\n", s)
end
end
str
end
# b4_symbol_enum
def symbol_enum
str = ""
last_sym_number = @context.yysymbol_kind_t.last[1]
@context.yysymbol_kind_t.each do |s_value, sym_number, display_name|
s = sprintf("%s = %d%s", s_value, sym_number, (sym_number == last_sym_number) ? "" : ",")
if display_name
str << sprintf(" %-40s /* %s */\n", s, display_name)
else
str << sprintf(" %s\n", s)
end
end
str
end
def yytranslate
int_array_to_string(@context.yytranslate)
end
def yyrline
int_array_to_string(@context.yyrline)
end
def yytname
string_array_to_string(@context.yytname) + " YY_NULLPTR"
end
# b4_int_type_for
def int_type_for(ary)
min = ary.min
max = ary.max
case
when (-127 <= min && min <= 127) && (-127 <= max && max <= 127)
"yytype_int8"
when (0 <= min && min <= 255) && (0 <= max && max <= 255)
"yytype_uint8"
when (-32767 <= min && min <= 32767) && (-32767 <= max && max <= 32767)
"yytype_int16"
when (0 <= min && min <= 65535) && (0 <= max && max <= 65535)
"yytype_uint16"
else
"int"
end
end
def symbol_actions_for_printer
str = ""
@grammar.symbols.each do |sym|
next unless sym.printer
str << <<-STR
case #{sym.enum_name}: /* #{sym.comment} */
#line #{sym.printer.lineno} "#{@grammar_file_path}"
#{sym.printer.translated_code(sym.tag)}
#line [@oline@] [@ofile@]
break;
STR
end
str
end
# b4_user_initial_action
def user_initial_action(comment = "")
return "" unless @grammar.initial_action
<<-STR
#{comment}
#line #{@grammar.initial_action.line} "#{@grammar_file_path}"
#{@grammar.initial_action.translated_code}
STR
end
# b4_user_actions
def user_actions
str = ""
@context.states.rules.each do |rule|
next unless rule.code
rule = rule
code = rule.code
spaces = " " * (code.column - 1)
str << <<-STR
case #{rule.id + 1}: /* #{rule.as_comment} */
#line #{code.line} "#{@grammar_file_path}"
#{spaces}#{rule.translated_code}
#line [@oline@] [@ofile@]
break;
STR
end
str << <<-STR
#line [@oline@] [@ofile@]
STR
str
end
def omit_braces_and_blanks(param)
param[1..-2].strip
end
# b4_parse_param
def parse_param
if @grammar.parse_param
omit_braces_and_blanks(@grammar.parse_param)
else
""
end
end
def lex_param
if @grammar.lex_param
omit_braces_and_blanks(@grammar.lex_param)
else
""
end
end
# b4_user_formals
def user_formals
if @grammar.parse_param
", #{parse_param}"
else
""
end
end
# b4_user_args
def user_args
if @grammar.parse_param
", #{parse_param_name}"
else
""
end
end
def extract_param_name(param)
/\A(.)+([a-zA-Z0-9_]+)\z/.match(param)[2]
end
def parse_param_name
if @grammar.parse_param
extract_param_name(parse_param)
else
""
end
end
def lex_param_name
if @grammar.lex_param
extract_param_name(lex_param)
else
""
end
end
# b4_parse_param_use
def parse_param_use(val, loc)
str = <<-STR
YY_USE (#{val});
YY_USE (#{loc});
STR
if @grammar.parse_param
str << " YY_USE (#{parse_param_name});"
end
str
end
# b4_yylex_formals
def yylex_formals
ary = ["&yylval", "&yylloc"]
if @grammar.lex_param
ary << lex_param_name
end
"(#{ary.join(', ')})"
end
# b4_table_value_equals
def table_value_equals(table, value, literal, symbol)
if literal < table.min || table.max < literal
"0"
else
"((#{value}) == #{symbol})"
end
end
# b4_yyerror_args
def yyerror_args
ary = ["&yylloc"]
if @grammar.parse_param
ary << parse_param_name
end
"#{ary.join(', ')}"
end
def template_basename
File.basename(template_file)
end
def aux
@grammar.aux
end
def int_array_to_string(ary)
last = ary.count - 1
s = ary.each_with_index.each_slice(10).map do |slice|
str = " "
slice.each do |e, i|
str << sprintf("%6d%s", e, (i == last) ? "" : ",")
end
str
end
s.join("\n")
end
def spec_mapped_header_file
@header_file_path
end
def b4_cpp_guard__b4_spec_mapped_header_file
if @header_file_path
"YY_YY_" + @header_file_path.gsub(/[^a-zA-Z_0-9]+/, "_").upcase + "_INCLUDED"
else
""
end
end
private
def template_file
File.join(template_dir, @template_name)
end
def header_template_file
File.join(template_dir, "bison/yacc.h")
end
def template_dir
File.expand_path("../../../template", __FILE__)
end
def string_array_to_string(ary)
str = ""
tmp = " "
ary.each do |s|
s = s.gsub('\\', '\\\\\\\\')
s = s.gsub('"', '\\"')
if (tmp + s + " \"\",").length > 75
str << tmp << "\n"
tmp = " \"#{s}\","
else
tmp << " \"#{s}\","
end
end
str << tmp
end
def replace_special_variables(str, ofile)
str.each_line.with_index(1).map do |line, i|
line.gsub!("[@oline@]", (i + 1).to_s)
line.gsub!("[@ofile@]", "\"#{ofile}\"")
line
end.join
end
end
end

View file

@ -0,0 +1,321 @@
require "lrama/report"
module Lrama
# Parser for parse.y, generates a grammar
class Parser
include Lrama::Report::Duration
T = Lrama::Lexer::Token
class TokenScanner
def initialize(tokens)
@tokens = tokens
@index = 0
end
def current_token
@tokens[@index]
end
def current_type
current_token && current_token.type
end
def next
token = current_token
@index += 1
return token
end
def consume(*token_types)
if token_types.include?(current_type)
token = current_token
self.next
return token
end
return nil
end
def consume!(*token_types)
consume(*token_types) || (raise "#{token_types} is expected but #{current_type}. #{current_token}")
end
def consume_multi(*token_types)
a = []
while token_types.include?(current_type)
a << current_token
self.next
end
raise "No token is consumed. #{token_types}" if a.empty?
return a
end
def eots?
current_token.nil?
end
end
def initialize(text)
@text = text
end
def parse
report_duration(:parse) do
lexer = Lexer.new(@text)
grammar = Grammar.new
process_prologue(grammar, lexer)
parse_bison_declarations(TokenScanner.new(lexer.bison_declarations_tokens), grammar)
parse_grammar_rules(TokenScanner.new(lexer.grammar_rules_tokens), grammar)
process_epilogue(grammar, lexer)
grammar.prepare
grammar.compute_nullable
grammar.validate!
grammar
end
end
private
def process_prologue(grammar, lexer)
grammar.prologue_first_lineno = lexer.prologue.first[1] if lexer.prologue.first
grammar.prologue = lexer.prologue.map(&:first).join
end
def process_epilogue(grammar, lexer)
grammar.epilogue_first_lineno = lexer.epilogue.first[1] if lexer.epilogue.first
grammar.epilogue = lexer.epilogue.map(&:first).join
end
def parse_bison_declarations(ts, grammar)
precedence_number = 0
while !ts.eots? do
case ts.current_type
when T::P_expect
ts.next
grammar.expect = ts.consume!(T::Number).s_value
when T::P_define
ts.next
# Ignore
ts.consume_multi(T::Ident)
when T::P_printer
lineno = ts.current_token.line
ts.next
code = ts.consume!(T::User_code)
code = grammar.build_code(:printer, code)
ident_or_tags = ts.consume_multi(T::Ident, T::Tag)
grammar.add_printer(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
when T::P_lex_param
ts.next
code = ts.consume!(T::User_code)
code = grammar.build_code(:lex_param, code)
grammar.lex_param = code.token_code.s_value
when T::P_parse_param
ts.next
code = ts.consume!(T::User_code)
code = grammar.build_code(:parse_param, code)
grammar.parse_param = code.token_code.s_value
when T::P_initial_action
ts.next
code = ts.consume!(T::User_code)
code = grammar.build_code(:initial_action, code)
ts.consume(T::Semicolon)
grammar.initial_action = code
when T::P_union
lineno = ts.current_token.line
ts.next
code = ts.consume!(T::User_code)
code = grammar.build_code(:union, code)
ts.consume(T::Semicolon)
grammar.set_union(code, lineno)
when T::P_token
# %token tag? (ident number? string?)+
#
# * ident can be char, e.g. '\\', '\t', '\13'
# * number is a token_id for term
#
# These are valid token declaration (from CRuby parse.y)
#
# %token END_OF_INPUT 0 "end-of-input"
# %token <id> '\\' "backslash"
# %token tSP "escaped space"
# %token tUPLUS 132 "unary+"
# %token tCOLON3 ":: at EXPR_BEG"
# %token tSTRING_DBEG tSTRING_DVAR tLAMBEG tLABEL_END
#
#
# See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
ts.next
opt_tag = ts.consume(T::Tag)
while (id = ts.consume(T::Ident, T::Char)) do
opt_number = ts.consume(T::Number)
opt_string = ts.consume(T::String)
# Can replace 0 (EOF)
grammar.add_term(
id: id,
alias_name: opt_string && opt_string.s_value,
token_id: opt_number && opt_number.s_value,
tag: opt_tag,
replace: true,
)
end
when T::P_type
# %type tag? (ident|char|string)+
#
# See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
ts.next
opt_tag = ts.consume(T::Tag)
while (id = ts.consume(T::Ident, T::Char, T::String)) do
grammar.add_type(
id: id,
tag: opt_tag
)
end
when T::P_nonassoc
# %nonassoc (ident|char|string)+
ts.next
while (id = ts.consume(T::Ident, T::Char, T::String)) do
sym = grammar.add_term(id: id)
grammar.add_nonassoc(sym, precedence_number)
end
precedence_number += 1
when T::P_left
# %left (ident|char|string)+
ts.next
while (id = ts.consume(T::Ident, T::Char, T::String)) do
sym = grammar.add_term(id: id)
grammar.add_left(sym, precedence_number)
end
precedence_number += 1
when T::P_right
# %right (ident|char|string)+
ts.next
while (id = ts.consume(T::Ident, T::Char, T::String)) do
sym = grammar.add_term(id: id)
grammar.add_right(sym, precedence_number)
end
precedence_number += 1
when nil
# end of input
raise "Reach to end of input within declarations"
else
raise "Unexpected token: #{ts.current_token}"
end
end
end
def parse_grammar_rules(ts, grammar)
while !ts.eots? do
parse_grammar_rule(ts, grammar)
end
end
# TODO: Take care of %prec of rule.
# If %prec exists, user code before %prec
# is NOT an action. For example "{ code 3 }" is NOT an action.
#
# keyword_class { code 2 } tSTRING '!' keyword_end { code 3 } %prec "="
def parse_grammar_rule(ts, grammar)
# LHS
lhs = ts.consume!(T::Ident_Colon) # class:
lhs.type = T::Ident
rhs = parse_grammar_rule_rhs(ts, grammar)
grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : lhs.line)
while true do
case ts.current_type
when T::Bar
# |
bar_lineno = ts.current_token.line
ts.next
rhs = parse_grammar_rule_rhs(ts, grammar)
grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : bar_lineno)
when T::Semicolon
# ;
ts.next
break
when T::Ident_Colon
# Next lhs can be here because ";" is optional.
# Do not consume next token.
break
when nil
# end of input can be here when ";" is omitted
break
else
raise "Unexpected token: #{ts.current_token}"
end
end
end
def parse_grammar_rule_rhs(ts, grammar)
a = []
prec_seen = false
code_after_prec = false
while true do
# TODO: Srting can be here
case ts.current_type
when T::Ident
# keyword_class
raise "Ident after %prec" if prec_seen
a << ts.current_token
ts.next
when T::Char
# '!'
raise "Char after %prec" if prec_seen
a << ts.current_token
ts.next
when T::P_prec
# %prec tPLUS
#
# See: https://www.gnu.org/software/bison/manual/html_node/Contextual-Precedence.html
ts.next
prec_seen = true
precedence_id = ts.consume!(T::Ident, T::String, T::Char)
precedence_sym = grammar.find_symbol_by_id!(precedence_id)
a << precedence_sym
when T::User_code
# { code } in the middle of rhs
if prec_seen
raise "Multiple User_code after %prec" if code_after_prec
code_after_prec = true
end
code = ts.current_token
grammar.build_references(code)
a << code
ts.next
when T::Bar
# |
break
when T::Semicolon
# ;
break
when T::Ident_Colon
# Next lhs can be here because ";" is optional.
break
when nil
# end of input can be here when ";" is omitted
break
else
raise "Unexpected token: #{ts.current_token}"
end
end
return a
end
end
end

View file

@ -0,0 +1,47 @@
module Lrama
class Report
module Profile
# 1. Wrap target method with Profile.report_profile like below:
#
# Lrama::Report::Profile.report_profile { method }
#
# 2. Run lrama command, for example
#
# $ ./exe/lrama --trace=time spec/fixtures/integration/ruby_3_2_0/parse.tmp.y
#
# 3. Generate html file
#
# $ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > tmp/flamegraph.html
#
def self.report_profile
require "stackprof"
StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do
yield
end
end
end
module Duration
def self.enable
@_report_duration_enabled = true
end
def self.enabled?
!!@_report_duration_enabled
end
def report_duration(method_name)
time1 = Time.now.to_f
result = yield
time2 = Time.now.to_f
if Duration.enabled?
puts sprintf("%s %10.5f s", method_name, time2 - time1)
end
return result
end
end
end
end

View file

@ -0,0 +1,832 @@
require "forwardable"
require "lrama/report"
module Lrama
class State
class Reduce
# https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html
attr_reader :item, :look_ahead, :not_selected_symbols
attr_accessor :default_reduction
def initialize(item)
@item = item
@look_ahead = nil
@not_selected_symbols = []
end
def rule
@item.rule
end
def look_ahead=(look_ahead)
@look_ahead = look_ahead.freeze
end
def add_not_selected_symbol(sym)
@not_selected_symbols << sym
end
def selected_look_ahead
if @look_ahead
@look_ahead - @not_selected_symbols
else
[]
end
end
end
class Shift
attr_reader :next_sym, :next_items
attr_accessor :not_selected
def initialize(next_sym, next_items)
@next_sym = next_sym
@next_items = next_items
end
end
# * symbol: A symbol under discussion
# * reduce: A reduce under discussion
# * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative)
ResolvedConflict = Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) do
def report_message
s = symbol.display_name
r = reduce.rule.precedence_sym.display_name
case
when which == :shift && same_prec
msg = "resolved as #{which} (%right #{s})"
when which == :shift
msg = "resolved as #{which} (#{r} < #{s})"
when which == :reduce && same_prec
msg = "resolved as #{which} (%left #{s})"
when which == :reduce
msg = "resolved as #{which} (#{s} < #{r})"
when which == :error
msg = "resolved as an #{which} (%nonassoc #{s})"
else
raise "Unknown direction. #{self}"
end
"Conflict between rule #{reduce.rule.id} and token #{s} #{msg}."
end
end
Conflict = Struct.new(:symbols, :reduce, :type, keyword_init: true)
attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts,
:default_reduction_rule, :closure, :items
attr_accessor :shifts, :reduces
def initialize(id, accessing_symbol, kernels)
@id = id
@accessing_symbol = accessing_symbol
@kernels = kernels.freeze
@items = @kernels
# Manage relationships between items to state
# to resolve next state
@items_to_state = {}
@conflicts = []
@resolved_conflicts = []
@default_reduction_rule = nil
end
def closure=(closure)
@closure = closure
@items = @kernels + @closure
end
def non_default_reduces
reduces.select do |reduce|
reduce.rule != @default_reduction_rule
end
end
def compute_shifts_reduces
_shifts = {}
reduces = []
items.each do |item|
# TODO: Consider what should be pushed
if item.end_of_rule?
reduces << Reduce.new(item)
else
key = item.next_sym
_shifts[key] ||= []
_shifts[key] << item.new_by_next_position
end
end
# It seems Bison 3.8.2 iterates transitions order by symbol number
shifts = _shifts.sort_by do |next_sym, new_items|
next_sym.number
end.map do |next_sym, new_items|
Shift.new(next_sym, new_items.flatten)
end
self.shifts = shifts.freeze
self.reduces = reduces.freeze
end
def set_items_to_state(items, next_state)
@items_to_state[items] = next_state
end
#
def set_look_ahead(rule, look_ahead)
reduce = reduces.find do |r|
r.rule == rule
end
reduce.look_ahead = look_ahead
end
# Returns array of [nterm, next_state]
def nterm_transitions
return @nterm_transitions if @nterm_transitions
@nterm_transitions = []
shifts.each do |shift|
next if shift.next_sym.term?
@nterm_transitions << [shift, @items_to_state[shift.next_items]]
end
@nterm_transitions
end
# Returns array of [term, next_state]
def term_transitions
return @term_transitions if @term_transitions
@term_transitions = []
shifts.each do |shift|
next if shift.next_sym.nterm?
@term_transitions << [shift, @items_to_state[shift.next_items]]
end
@term_transitions
end
def selected_term_transitions
term_transitions.select do |shift, next_state|
!shift.not_selected
end
end
# Move to next state by sym
def transition(sym)
result = nil
if sym.term?
term_transitions.each do |shift, next_state|
term = shift.next_sym
result = next_state if term == sym
end
else
nterm_transitions.each do |shift, next_state|
nterm = shift.next_sym
result = next_state if nterm == sym
end
end
raise "Can not transit by #{sym} #{self}" if result.nil?
result
end
def find_reduce_by_item!(item)
reduces.find do |r|
r.item == item
end || (raise "reduce is not found. #{item}, #{state}")
end
def default_reduction_rule=(default_reduction_rule)
@default_reduction_rule = default_reduction_rule
reduces.each do |r|
if r.rule == default_reduction_rule
r.default_reduction = true
end
end
end
def sr_conflicts
@conflicts.select do |conflict|
conflict.type == :shift_reduce
end
end
def rr_conflicts
@conflicts.select do |conflict|
conflict.type == :reduce_reduce
end
end
end
# States is passed to a template file
#
# "Efficient Computation of LALR(1) Look-Ahead Sets"
# https://dl.acm.org/doi/pdf/10.1145/69622.357187
class States
extend Forwardable
include Lrama::Report::Duration
def_delegators "@grammar", :symbols, :terms, :nterms, :rules,
:accept_symbol, :eof_symbol, :find_symbol_by_s_value!
# TODO: Validate position is not over rule rhs
Item = Struct.new(:rule, :position, keyword_init: true) do
# Optimization for States#setup_state
def hash
[rule.id, position].hash
end
def rule_id
rule.id
end
def next_sym
rule.rhs[position]
end
def end_of_rule?
rule.rhs.count == position
end
def new_by_next_position
Item.new(rule: rule, position: position + 1)
end
def previous_sym
rule.rhs[position - 1]
end
def display_name
r = rule.rhs.map(&:display_name).insert(position, "").join(" ")
"#{r} (rule #{rule.id})"
end
# Right after position
def display_rest
r = rule.rhs[position..-1].map(&:display_name).join(" ")
". #{r} (rule #{rule.id})"
end
end
attr_reader :states, :reads_relation, :includes_relation, :lookback_relation
def initialize(grammar, warning, trace_state: false)
@grammar = grammar
@warning = warning
@trace_state = trace_state
@states = []
# `DR(p, A) = {t ∈ T | p -(A)-> r -(t)-> }`
# where p is state, A is nterm, t is term.
#
# `@direct_read_sets` is a hash whose
# key is [state.id, nterm.token_id],
# value is bitmap of term.
@direct_read_sets = {}
# Reads relation on nonterminal transitions (pair of state and nterm)
# `(p, A) reads (r, C) iff p -(A)-> r -(C)-> and C =>* ε`
# where p, r are state, A, C are nterm.
#
# `@reads_relation` is a hash whose
# key is [state.id, nterm.token_id],
# value is array of [state.id, nterm.token_id].
@reads_relation = {}
# `@read_sets` is a hash whose
# key is [state.id, nterm.token_id],
# value is bitmap of term.
@read_sets = {}
# `(p, A) includes (p', B) iff B -> βAγ, γ =>* ε, p' -(β)-> p`
# where p, p' are state, A, B are nterm, β, γ is sequence of symbol.
#
# `@includes_relation` is a hash whose
# key is [state.id, nterm.token_id],
# value is array of [state.id, nterm.token_id].
@includes_relation = {}
# `(q, A -> ω) lookback (p, A) iff p -(ω)-> q`
# where p, q are state, A -> ω is rule, A is nterm, ω is sequence of symbol.
#
# `@lookback_relation` is a hash whose
# key is [state.id, rule.id],
# value is array of [state.id, nterm.token_id].
@lookback_relation = {}
# `@follow_sets` is a hash whose
# key is [state.id, rule.id],
# value is bitmap of term.
@follow_sets = {}
# `LA(q, A -> ω) = {Follow(p, A) | (q, A -> ω) lookback (p, A)`
#
# `@la` is a hash whose
# key is [state.id, rule.id],
# value is bitmap of term.
@la = {}
end
def compute
# Look Ahead Sets
report_duration(:compute_lr0_states) { compute_lr0_states }
report_duration(:compute_direct_read_sets) { compute_direct_read_sets }
report_duration(:compute_reads_relation) { compute_reads_relation }
report_duration(:compute_read_sets) { compute_read_sets }
report_duration(:compute_includes_relation) { compute_includes_relation }
report_duration(:compute_lookback_relation) { compute_lookback_relation }
report_duration(:compute_follow_sets) { compute_follow_sets }
report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets }
# Conflicts
report_duration(:compute_conflicts) { compute_conflicts }
report_duration(:compute_default_reduction) { compute_default_reduction }
check_conflicts
end
def reporter
StatesReporter.new(self)
end
def states_count
@states.count
end
def direct_read_sets
h = {}
@direct_read_sets.each do |k, v|
h[k] = bitmap_to_terms(v)
end
return h
end
def read_sets
h = {}
@read_sets.each do |k, v|
h[k] = bitmap_to_terms(v)
end
return h
end
def follow_sets
h = {}
@follow_sets.each do |k, v|
h[k] = bitmap_to_terms(v)
end
return h
end
def la
h = {}
@la.each do |k, v|
h[k] = bitmap_to_terms(v)
end
return h
end
private
def sr_conflicts
@states.flat_map(&:sr_conflicts)
end
def rr_conflicts
@states.flat_map(&:rr_conflicts)
end
def initial_attrs
h = {}
attrs.each do |attr|
h[attr.id] = false
end
h
end
def trace_state
if @trace_state
yield STDERR
end
end
def create_state(accessing_symbol, kernels, states_creted)
# A item can appear in some states,
# so need to use `kernels` (not `kernels.first`) as a key.
#
# For example...
#
# %%
# program: '+' strings_1
# | '-' strings_2
# ;
#
# strings_1: string_1
# ;
#
# strings_2: string_1
# | string_2
# ;
#
# string_1: string
# ;
#
# string_2: string '+'
# ;
#
# string: tSTRING
# ;
# %%
#
# For these grammar, there are 2 states
#
# State A
# string_1: string •
#
# State B
# string_1: string •
# string_2: string • '+'
#
return [states_creted[kernels], false] if states_creted[kernels]
state = State.new(@states.count, accessing_symbol, kernels)
@states << state
states_creted[kernels] = state
return [state, true]
end
def setup_state(state)
# closure
closure = []
visited = {}
queued = {}
items = state.kernels.dup
items.each do |item|
queued[item] = true
end
while (item = items.shift) do
visited[item] = true
if (sym = item.next_sym) && sym.nterm?
@grammar.find_rules_by_symbol!(sym).each do |rule|
i = Item.new(rule: rule, position: 0)
next if queued[i]
closure << i
items << i
queued[i] = true
end
end
end
state.closure = closure.sort_by {|i| i.rule.id }
# Trace
trace_state do |out|
out << "Closure: input\n"
state.kernels.each do |item|
out << " #{item.display_rest}\n"
end
out << "\n\n"
out << "Closure: output\n"
state.items.each do |item|
out << " #{item.display_rest}\n"
end
out << "\n\n"
end
# shift & reduce
state.compute_shifts_reduces
end
def enqueue_state(states, state)
# Trace
previous = state.kernels.first.previous_sym
trace_state do |out|
out << sprintf("state_list_append (state = %d, symbol = %d (%s))",
@states.count, previous.number, previous.display_name)
end
states << state
end
def compute_lr0_states
# State queue
states = []
states_creted = {}
state, _ = create_state(symbols.first, [Item.new(rule: @grammar.rules.first, position: 0)], states_creted)
enqueue_state(states, state)
while (state = states.shift) do
# Trace
#
# Bison 3.8.2 renders "(reached by "end-of-input")" for State 0 but
# I think it is not correct...
previous = state.kernels.first.previous_sym
trace_state do |out|
out << "Processing state #{state.id} (reached by #{previous.display_name})\n"
end
setup_state(state)
state.shifts.each do |shift|
new_state, created = create_state(shift.next_sym, shift.next_items, states_creted)
state.set_items_to_state(shift.next_items, new_state)
enqueue_state(states, new_state) if created
end
end
end
def nterm_transitions
a = []
@states.each do |state|
state.nterm_transitions.each do |shift, next_state|
nterm = shift.next_sym
a << [state, nterm, next_state]
end
end
a
end
def compute_direct_read_sets
@states.each do |state|
state.nterm_transitions.each do |shift, next_state|
nterm = shift.next_sym
ary = next_state.term_transitions.map do |shift, _|
shift.next_sym.number
end
key = [state.id, nterm.token_id]
@direct_read_sets[key] = Bitmap.from_array(ary)
end
end
end
def compute_reads_relation
@states.each do |state|
state.nterm_transitions.each do |shift, next_state|
nterm = shift.next_sym
next_state.nterm_transitions.each do |shift2, _next_state2|
nterm2 = shift2.next_sym
if nterm2.nullable
key = [state.id, nterm.token_id]
@reads_relation[key] ||= []
@reads_relation[key] << [next_state.id, nterm2.token_id]
end
end
end
end
end
def compute_read_sets
sets = nterm_transitions.map do |state, nterm, next_state|
[state.id, nterm.token_id]
end
@read_sets = Digraph.new(sets, @reads_relation, @direct_read_sets).compute
end
# Execute transition of state by symbols
# then return final state.
def transition(state, symbols)
symbols.each do |sym|
state = state.transition(sym)
end
state
end
def compute_includes_relation
@states.each do |state|
state.nterm_transitions.each do |shift, next_state|
nterm = shift.next_sym
@grammar.find_rules_by_symbol!(nterm).each do |rule|
i = rule.rhs.count - 1
while (i > -1) do
sym = rule.rhs[i]
break if sym.term?
state2 = transition(state, rule.rhs[0...i])
# p' = state, B = nterm, p = state2, A = sym
key = [state2.id, sym.token_id]
# TODO: need to omit if state == state2 ?
@includes_relation[key] ||= []
@includes_relation[key] << [state.id, nterm.token_id]
break if !sym.nullable
i -= 1
end
end
end
end
end
def compute_lookback_relation
@states.each do |state|
state.nterm_transitions.each do |shift, next_state|
nterm = shift.next_sym
@grammar.find_rules_by_symbol!(nterm).each do |rule|
state2 = transition(state, rule.rhs)
# p = state, A = nterm, q = state2, A -> ω = rule
key = [state2.id, rule.id]
@lookback_relation[key] ||= []
@lookback_relation[key] << [state.id, nterm.token_id]
end
end
end
end
def compute_follow_sets
sets = nterm_transitions.map do |state, nterm, next_state|
[state.id, nterm.token_id]
end
@follow_sets = Digraph.new(sets, @includes_relation, @read_sets).compute
end
def compute_look_ahead_sets
@states.each do |state|
rules.each do |rule|
ary = @lookback_relation[[state.id, rule.id]]
next if !ary
ary.each do |state2_id, nterm_token_id|
# q = state, A -> ω = rule, p = state2, A = nterm
follows = @follow_sets[[state2_id, nterm_token_id]]
next if follows == 0
key = [state.id, rule.id]
@la[key] ||= 0
look_ahead = @la[key] | follows
@la[key] |= look_ahead
# No risk of conflict when
# * the state only has single reduce
# * the state only has nterm_transitions (GOTO)
next if state.reduces.count == 1 && state.term_transitions.count == 0
state.set_look_ahead(rule, bitmap_to_terms(look_ahead))
end
end
end
end
def bitmap_to_terms(bit)
ary = Bitmap.to_array(bit)
ary.map do |i|
@grammar.find_symbol_by_number!(i)
end
end
def compute_conflicts
compute_shift_reduce_conflicts
compute_reduce_reduce_conflicts
end
def compute_shift_reduce_conflicts
states.each do |state|
state.shifts.each do |shift|
state.reduces.each do |reduce|
sym = shift.next_sym
next unless reduce.look_ahead
next if !reduce.look_ahead.include?(sym)
# Shift/Reduce conflict
shift_prec = sym.precedence
reduce_prec = reduce.item.rule.precedence
# Can resolve only when both have prec
unless shift_prec && reduce_prec
state.conflicts << State::Conflict.new(symbols: [sym], reduce: reduce, type: :shift_reduce)
next
end
case
when shift_prec < reduce_prec
# Reduce is selected
state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce)
shift.not_selected = true
next
when shift_prec > reduce_prec
# Shift is selected
state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift)
reduce.add_not_selected_symbol(sym)
next
end
# shift_prec == reduce_prec, then check associativity
case sym.precedence.type
when :right
# Shift is selected
state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift, same_prec: true)
reduce.add_not_selected_symbol(sym)
next
when :left
# Reduce is selected
state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce, same_prec: true)
shift.not_selected = true
next
when :nonassoc
# Can not resolve
#
# nonassoc creates "run-time" error, precedence creates "compile-time" error.
# Then omit both the shift and reduce.
#
# https://www.gnu.org/software/bison/manual/html_node/Using-Precedence.html
state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :error)
shift.not_selected = true
reduce.add_not_selected_symbol(sym)
else
raise "Unknown precedence type. #{sym}"
end
end
end
end
end
def compute_reduce_reduce_conflicts
states.each do |state|
a = []
state.reduces.each do |reduce|
next if reduce.look_ahead.nil?
intersection = a & reduce.look_ahead
a += reduce.look_ahead
if !intersection.empty?
state.conflicts << State::Conflict.new(symbols: intersection.dup, reduce: reduce, type: :reduce_reduce)
end
end
end
end
def compute_default_reduction
states.each do |state|
next if state.reduces.empty?
# Do not set, if conflict exist
next if !state.conflicts.empty?
# Do not set, if shift with `error` exists.
next if state.shifts.map(&:next_sym).include?(@grammar.error_symbol)
state.default_reduction_rule = state.reduces.map do |r|
[r.rule, r.rule.id, (r.look_ahead || []).count]
end.sort_by do |rule, rule_id, count|
[-count, rule_id]
end.first.first
end
end
def check_conflicts
sr_count = sr_conflicts.count
rr_count = rr_conflicts.count
if @grammar.expect
expected_sr_conflicts = @grammar.expect
expected_rr_conflicts = 0
if expected_sr_conflicts != sr_count
@warning.error("shift/reduce conflicts: #{sr_count} found, #{expected_sr_conflicts} expected")
end
if expected_rr_conflicts != rr_count
@warning.error("reduce/reduce conflicts: #{rr_count} found, #{expected_rr_conflicts} expected")
end
else
if sr_count != 0
@warning.warn("shift/reduce conflicts: #{sr_count} found")
end
if rr_count != 0
@warning.warn("reduce/reduce conflicts: #{rr_count} found")
end
end
end
end
end

View file

@ -0,0 +1,310 @@
module Lrama
class StatesReporter
include Lrama::Report::Duration
def initialize(states)
@states = states
end
def report(io, **options)
report_duration(:report) do
_report(io, **options)
end
end
private
def _report(io, grammar: false, states: false, itemsets: false, lookaheads: false, solved: false, verbose: false)
# TODO: Unused terms
# TODO: Unused rules
report_conflicts(io)
report_grammar(io) if grammar
report_states(io, itemsets, lookaheads, solved, verbose)
end
def report_conflicts(io)
has_conflict = false
@states.states.each do |state|
messages = []
cs = state.conflicts.group_by(&:type)
if cs[:shift_reduce]
messages << "#{cs[:shift_reduce].count} shift/reduce"
end
if cs[:reduce_reduce]
messages << "#{cs[:reduce_reduce].count} reduce/reduce"
end
if !messages.empty?
has_conflict = true
io << "State #{state.id} conflicts: #{messages.join(', ')}\n"
end
end
if has_conflict
io << "\n\n"
end
end
def report_grammar(io)
io << "Grammar\n"
last_lhs = nil
@states.rules.each do |rule|
if rule.rhs.empty?
r = "ε"
else
r = rule.rhs.map(&:display_name).join(" ")
end
if rule.lhs == last_lhs
io << sprintf("%5d %s| %s\n", rule.id, " " * rule.lhs.display_name.length, r)
else
io << "\n"
io << sprintf("%5d %s: %s\n", rule.id, rule.lhs.display_name, r)
end
last_lhs = rule.lhs
end
io << "\n\n"
end
def report_states(io, itemsets, lookaheads, solved, verbose)
@states.states.each do |state|
# Report State
io << "State #{state.id}\n\n"
# Report item
last_lhs = nil
list = itemsets ? state.items : state.kernels
list.sort_by {|i| [i.rule_id, i.position] }.each do |item|
rule = item.rule
position = item.position
if rule.rhs.empty?
r = "ε •"
else
r = rule.rhs.map(&:display_name).insert(position, "").join(" ")
end
if rule.lhs == last_lhs
l = " " * rule.lhs.id.s_value.length + "|"
else
l = rule.lhs.id.s_value + ":"
end
la = ""
if lookaheads && item.end_of_rule?
reduce = state.find_reduce_by_item!(item)
look_ahead = reduce.selected_look_ahead
if !look_ahead.empty?
la = " [#{look_ahead.map(&:display_name).join(", ")}]"
end
end
last_lhs = rule.lhs
io << sprintf("%5i %s %s%s\n", rule.id, l, r, la)
end
io << "\n"
# Report shifts
tmp = state.term_transitions.select do |shift, _|
!shift.not_selected
end.map do |shift, next_state|
[shift.next_sym, next_state.id]
end
max_len = tmp.map(&:first).map(&:display_name).map(&:length).max
tmp.each do |term, state_id|
io << " #{term.display_name.ljust(max_len)} shift, and go to state #{state_id}\n"
end
io << "\n" if !tmp.empty?
# Report error caused by %nonassoc
nl = false
tmp = state.resolved_conflicts.select do |resolved|
resolved.which == :error
end.map do |error|
error.symbol.display_name
end
max_len = tmp.map(&:length).max
tmp.each do |name|
nl = true
io << " #{name.ljust(max_len)} error (nonassociative)\n"
end
io << "\n" if !tmp.empty?
# Report reduces
nl = false
max_len = state.non_default_reduces.flat_map(&:look_ahead).compact.map(&:display_name).map(&:length).max || 0
max_len = [max_len, "$default".length].max if state.default_reduction_rule
ary = []
state.non_default_reduces.each do |reduce|
reduce.look_ahead.each do |term|
ary << [term, reduce]
end
end
ary.sort_by do |term, reduce|
term.number
end.each do |term, reduce|
rule = reduce.item.rule
io << " #{term.display_name.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.display_name})\n"
nl = true
end
if r = state.default_reduction_rule
nl = true
s = "$default".ljust(max_len)
if r.initial_rule?
io << " #{s} accept\n"
else
io << " #{s} reduce using rule #{r.id} (#{r.lhs.display_name})\n"
end
end
io << "\n" if nl
# Report nonterminal transitions
tmp = []
max_len = 0
state.nterm_transitions.each do |shift, next_state|
nterm = shift.next_sym
tmp << [nterm, next_state.id]
max_len = [max_len, nterm.id.s_value.length].max
end
tmp.uniq!
tmp.sort_by! do |nterm, state_id|
nterm.number
end
tmp.each do |nterm, state_id|
io << " #{nterm.id.s_value.ljust(max_len)} go to state #{state_id}\n"
end
io << "\n" if !tmp.empty?
if solved
# Report conflict resolutions
state.resolved_conflicts.each do |resolved|
io << " #{resolved.report_message}\n"
end
io << "\n" if !state.resolved_conflicts.empty?
end
if verbose
# Report direct_read_sets
io << " [Direct Read sets]\n"
direct_read_sets = @states.direct_read_sets
@states.nterms.each do |nterm|
terms = direct_read_sets[[state.id, nterm.token_id]]
next if !terms
next if terms.empty?
str = terms.map {|sym| sym.id.s_value }.join(", ")
io << " read #{nterm.id.s_value} shift #{str}\n"
end
io << "\n"
# Reprot reads_relation
io << " [Reads Relation]\n"
@states.nterms.each do |nterm|
a = @states.reads_relation[[state.id, nterm.token_id]]
next if !a
a.each do |state_id2, nterm_id2|
n = @states.nterms.find {|n| n.token_id == nterm_id2 }
io << " (State #{state_id2}, #{n.id.s_value})\n"
end
end
io << "\n"
# Reprot read_sets
io << " [Read sets]\n"
read_sets = @states.read_sets
@states.nterms.each do |nterm|
terms = read_sets[[state.id, nterm.token_id]]
next if !terms
next if terms.empty?
terms.each do |sym|
io << " #{sym.id.s_value}\n"
end
end
io << "\n"
# Reprot includes_relation
io << " [Includes Relation]\n"
@states.nterms.each do |nterm|
a = @states.includes_relation[[state.id, nterm.token_id]]
next if !a
a.each do |state_id2, nterm_id2|
n = @states.nterms.find {|n| n.token_id == nterm_id2 }
io << " (State #{state.id}, #{nterm.id.s_value}) -> (State #{state_id2}, #{n.id.s_value})\n"
end
end
io << "\n"
# Report lookback_relation
io << " [Lookback Relation]\n"
@states.rules.each do |rule|
a = @states.lookback_relation[[state.id, rule.id]]
next if !a
a.each do |state_id2, nterm_id2|
n = @states.nterms.find {|n| n.token_id == nterm_id2 }
io << " (Rule: #{rule.to_s}) -> (State #{state_id2}, #{n.id.s_value})\n"
end
end
io << "\n"
# Reprot follow_sets
io << " [Follow sets]\n"
follow_sets = @states.follow_sets
@states.nterms.each do |nterm|
terms = follow_sets[[state.id, nterm.token_id]]
next if !terms
terms.each do |sym|
io << " #{nterm.id.s_value} -> #{sym.id.s_value}\n"
end
end
io << "\n"
# Report LA
io << " [Look-Ahead Sets]\n"
tmp = []
max_len = 0
@states.rules.each do |rule|
syms = @states.la[[state.id, rule.id]]
next if !syms
tmp << [rule, syms]
max_len = ([max_len] + syms.map {|s| s.id.s_value.length }).max
end
tmp.each do |rule, syms|
syms.each do |sym|
io << " #{sym.id.s_value.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.id.s_value})\n"
end
end
io << "\n" if !tmp.empty?
end
# End of Report State
io << "\n"
end
end
end
end

View file

@ -0,0 +1,3 @@
module Lrama
VERSION = "0.4.0".freeze
end

View file

@ -0,0 +1,25 @@
module Lrama
class Warning
attr_reader :errors, :warns
def initialize(out = STDERR)
@out = out
@errors = []
@warns = []
end
def error(message)
@out << message << "\n"
@errors << message
end
def warn(message)
@out << message << "\n"
@warns << message
end
def has_error?
!@errors.empty?
end
end
end

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,112 @@
<%# b4_generated_by -%>
/* A Bison parser, made by GNU Bison 3.8.2. */
<%# b4_copyright -%>
/* Bison interface for Yacc-like parsers in C
Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation,
Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
/* As a special exception, you may create a larger work that contains
part or all of the Bison parser skeleton and distribute that work
under terms of your choice, so long as that work isn't itself a
parser generator using the skeleton or a modified version thereof
as a parser skeleton. Alternatively, if you modify or redistribute
the parser skeleton itself, you may (at your option) remove this
special exception, which will cause the skeleton and the resulting
Bison output files to be licensed under the GNU General Public
License without this special exception.
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
<%# b4_disclaimer -%>
/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual,
especially those whose name start with YY_ or yy_. They are
private implementation details that can be changed or removed. */
<%# b4_shared_declarations -%>
<%# b4_shared_declarations -%>
<%-# b4_cpp_guard_open([b4_spec_mapped_header_file]) -%>
<%- if output.spec_mapped_header_file -%>
#ifndef <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
# define <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
<%- end -%>
<%-# b4_declare_yydebug & b4_YYDEBUG_define -%>
/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
#if YYDEBUG
extern int yydebug;
#endif
<%-# b4_percent_code_get([[requires]]). %code is not supported -%>
<%-# b4_token_enums_defines -%>
/* Token kinds. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
<%= output.token_enums -%>
};
typedef enum yytokentype yytoken_kind_t;
#endif
<%-# b4_declare_yylstype -%>
<%-# b4_value_type_define -%>
/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line <%= output.grammar.union.lineno %> "<%= output.grammar_file_path %>"
<%= output.grammar.union.braces_less_code %>
#line [@oline@] [@ofile@]
};
typedef union YYSTYPE YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define YYSTYPE_IS_DECLARED 1
#endif
<%-# b4_location_type_define -%>
/* Location type. */
#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
typedef struct YYLTYPE YYLTYPE;
struct YYLTYPE
{
int first_line;
int first_column;
int last_line;
int last_column;
};
# define YYLTYPE_IS_DECLARED 1
# define YYLTYPE_IS_TRIVIAL 1
#endif
<%-# b4_declare_yyerror_and_yylex. Not supported -%>
<%-# b4_declare_yyparse -%>
int yyparse (<%= output.parse_param %>);
<%-# b4_percent_code_get([[provides]]). %code is not supported -%>
<%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
<%- if output.spec_mapped_header_file -%>
#endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %> */
<%- end -%>

View file

@ -72,7 +72,7 @@ GITURL = URI.parse("https://github.com/ruby/ruby.git")
RUBY_VERSION_PATTERN = /^\#define\s+RUBY_VERSION\s+"([\d.]+)"/
ENV["VPATH"] ||= "include/ruby"
YACC = ENV["YACC"] ||= "bison"
YACC = ENV["YACC"] ||= "#{$tooldir}/lrama/exe/lrama"
ENV["BASERUBY"] ||= "ruby"
ENV["RUBY"] ||= "ruby"
ENV["MV"] ||= "mv"

View file

@ -101,7 +101,7 @@ CC = cl -nologo
CPP = $(CC) -E
!endif
!if !defined(YACC)
YACC = bison
YACC = $(BASERUBY) $(top_srcdir)/tool/lrama/exe/lrama
!endif
AR = lib -nologo
PURIFY =