Skip to content

Commit 4b83937

Browse files
committed
Replace Ripper with Prism.lex_compat
1 parent f48d9e3 commit 4b83937

File tree

3 files changed

+122
-54
lines changed

3 files changed

+122
-54
lines changed

lib/rdoc/parser/ripper_state_lex.rb

Lines changed: 115 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,59 @@
11
# frozen_string_literal: true
2-
require 'ripper'
2+
require 'prism'
33

44
##
5-
# Wrapper for Ripper lex states
5+
# Wrapper for Prism lex with Ripper-compatible API
66

77
class RDoc::Parser::RipperStateLex
8-
# :stopdoc:
9-
108
Token = Struct.new(:line_no, :char_no, :kind, :text, :state)
119

12-
EXPR_END = Ripper::EXPR_END
13-
EXPR_ENDFN = Ripper::EXPR_ENDFN
14-
EXPR_ARG = Ripper::EXPR_ARG
15-
EXPR_FNAME = Ripper::EXPR_FNAME
10+
# Lexer states from Ripper
11+
EXPR_END = 0x2 # 2 - Expression ends
12+
EXPR_ENDFN = 0x8 # 8 - Function definition ends
13+
EXPR_ARG = 0x10 # 16 - Inside argument list
14+
EXPR_FNAME = 0x80 # 128 - Inside function name
15+
EXPR_LABEL = 0x400 # 1024 - Label in hash literal
16+
17+
REDEFINABLE_OPERATORS = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~].freeze
1618

17-
class InnerStateLex < Ripper::Filter
18-
def initialize(code)
19-
super(code)
19+
# Returns tokens parsed from +code+.
20+
def self.parse(code)
21+
lex = self.new(code)
22+
tokens = []
23+
begin
24+
while tk = lex.get_squashed_tk
25+
tokens.push tk
26+
end
27+
rescue StopIteration
2028
end
29+
tokens
30+
end
2131

22-
def on_default(event, tok, data)
23-
data << Token.new(lineno, column, event, tok, state)
32+
# Returns +true+ if lex state will be +END+ after +token+.
33+
def self.end?(token)
34+
(token[:state] & EXPR_END)
35+
end
36+
37+
# New lexer for +code+.
38+
def initialize(code)
39+
@buf = []
40+
@heredoc_queue = []
41+
# Use Prism.lex_compat for Ripper-compatible tokenization
42+
lex_result = Prism.lex_compat(code)
43+
prism_tokens = lex_result.value.map do |(pos, kind, text, state)|
44+
line_no, char_no = pos
45+
# Convert Ripper::Lexer::State to integer to avoid Ripper dependency
46+
state_int = state.respond_to?(:to_i) ? state.to_i : state
47+
Token.new(line_no, char_no, kind, text, state_int)
2448
end
49+
50+
# Prism.lex_compat omits :on_sp tokens, so we need to insert them for proper
51+
# syntax highlighting and token stream reconstruction
52+
tokens_with_spaces = insert_space_tokens(prism_tokens, code)
53+
54+
# Fix Prism incompatibility: Prism returns :on_ignored_nl after `def foo; end`
55+
# but parsers expect :on_nl for proper token collection in single-line methods
56+
@tokens = normalize_ignored_nl_for_single_line_methods(tokens_with_spaces)
2557
end
2658

2759
def get_squashed_tk
@@ -39,7 +71,7 @@ def get_squashed_tk
3971
when :on_backtick then
4072
if (tk[:state] & (EXPR_FNAME | EXPR_ENDFN)) != 0
4173
tk[:kind] = :on_ident
42-
tk[:state] = Ripper::Lexer::State.new(EXPR_ARG)
74+
tk[:state] = EXPR_ARG
4375
else
4476
tk = get_string_tk(tk)
4577
end
@@ -73,7 +105,9 @@ def get_squashed_tk
73105
tk
74106
end
75107

76-
private def get_symbol_tk(tk)
108+
private
109+
110+
def get_symbol_tk(tk)
77111
is_symbol = true
78112
symbol_tk = Token.new(tk.line_no, tk.char_no, :on_symbol)
79113
if ":'" == tk[:text] or ':"' == tk[:text] or tk[:text].start_with?('%s')
@@ -120,7 +154,7 @@ def get_squashed_tk
120154
tk
121155
end
122156

123-
private def get_string_tk(tk)
157+
def get_string_tk(tk)
124158
string = tk[:text]
125159
state = nil
126160
kind = :on_tstring
@@ -147,7 +181,7 @@ def get_squashed_tk
147181
Token.new(tk.line_no, tk.char_no, kind, string, state)
148182
end
149183

150-
private def get_regexp_tk(tk)
184+
def get_regexp_tk(tk)
151185
string = tk[:text]
152186
state = nil
153187
loop do
@@ -165,7 +199,7 @@ def get_squashed_tk
165199
Token.new(tk.line_no, tk.char_no, :on_regexp, string, state)
166200
end
167201

168-
private def get_embdoc_tk(tk)
202+
def get_embdoc_tk(tk)
169203
string = tk[:text]
170204
until :on_embdoc_end == (embdoc_tk = get_squashed_tk)[:kind] do
171205
string = string + embdoc_tk[:text]
@@ -174,7 +208,7 @@ def get_squashed_tk
174208
Token.new(tk.line_no, tk.char_no, :on_embdoc, string, embdoc_tk.state)
175209
end
176210

177-
private def get_heredoc_tk(heredoc_name, indent)
211+
def get_heredoc_tk(heredoc_name, indent)
178212
string = ''
179213
start_tk = nil
180214
prev_tk = nil
@@ -193,13 +227,13 @@ def get_squashed_tk
193227
@buf.unshift heredoc_tk
194228
end
195229

196-
private def retrieve_heredoc_info(tk)
230+
def retrieve_heredoc_info(tk)
197231
name = tk[:text].gsub(/\A<<[-~]?(['"`]?)(.+)\1\z/, '\2')
198232
indent = tk[:text] =~ /\A<<[-~]/
199233
[name, indent]
200234
end
201235

202-
private def heredoc_end?(name, indent, tk)
236+
def heredoc_end?(name, indent, tk)
203237
result = false
204238
if :on_heredoc_end == tk[:kind] then
205239
tk_name = tk[:text].chomp
@@ -211,7 +245,7 @@ def get_squashed_tk
211245
result
212246
end
213247

214-
private def get_words_tk(tk)
248+
def get_words_tk(tk)
215249
string = ''
216250
start_token = tk[:text]
217251
start_quote = tk[:text].rstrip[-1]
@@ -249,10 +283,9 @@ def get_squashed_tk
249283
Token.new(line_no, char_no, :on_dstring, text, state)
250284
end
251285

252-
private def get_op_tk(tk)
253-
redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~]
254-
if redefinable_operators.include?(tk[:text]) and tk[:state] == EXPR_ARG then
255-
tk[:state] = Ripper::Lexer::State.new(EXPR_ARG)
286+
def get_op_tk(tk)
287+
if REDEFINABLE_OPERATORS.include?(tk[:text]) and tk[:state] == EXPR_ARG then
288+
tk[:state] = EXPR_ARG
256289
tk[:kind] = :on_ident
257290
elsif tk[:text] =~ /^[-+]$/ then
258291
tk_ahead = get_squashed_tk
@@ -272,31 +305,66 @@ def get_squashed_tk
272305
tk
273306
end
274307

275-
# :startdoc:
276-
277-
# New lexer for +code+.
278-
def initialize(code)
279-
@buf = []
280-
@heredoc_queue = []
281-
@inner_lex = InnerStateLex.new(code)
282-
@tokens = @inner_lex.parse([])
283-
end
284-
285-
# Returns tokens parsed from +code+.
286-
def self.parse(code)
287-
lex = self.new(code)
288-
tokens = []
289-
begin
290-
while tk = lex.get_squashed_tk
291-
tokens.push tk
308+
def normalize_ignored_nl_for_single_line_methods(tokens)
309+
tokens.each_cons(2) do |prev_token, token|
310+
# Convert :on_ignored_nl to :on_nl when it follows an `end` keyword on the same line
311+
# This ensures proper token collection for single-line method definitions
312+
if token.kind == :on_ignored_nl &&
313+
prev_token.kind == :on_kw && prev_token.text == 'end' &&
314+
prev_token.line_no == token.line_no
315+
token[:kind] = :on_nl
292316
end
293-
rescue StopIteration
294317
end
295318
tokens
296319
end
297320

298-
# Returns +true+ if lex state will be +END+ after +token+.
299-
def self.end?(token)
300-
(token[:state] & EXPR_END)
321+
def insert_space_tokens(tokens, code)
322+
return tokens if tokens.empty?
323+
324+
lines = code.lines
325+
result = []
326+
prev_token = nil
327+
328+
tokens.each_with_index do |token, i|
329+
# Check for leading spaces at the start of a line
330+
# (when current token is not on the same line as previous token and doesn't start at column 0)
331+
if prev_token && prev_token.line_no < token.line_no && token.char_no > 0
332+
# There are leading spaces on this line
333+
line_text = lines[token.line_no - 1]
334+
if line_text
335+
leading_spaces = line_text[0...token.char_no]
336+
if leading_spaces && !leading_spaces.empty? && leading_spaces.match?(/\A\s+\z/)
337+
space_token = Token.new(token.line_no, 0, :on_sp, leading_spaces, prev_token.state)
338+
result << space_token
339+
end
340+
end
341+
end
342+
343+
result << token
344+
345+
next_token = tokens[i + 1]
346+
current_end_col = token.char_no + token.text.length
347+
348+
# Insert space tokens for gaps between tokens on the same line
349+
if next_token && next_token.line_no == token.line_no && current_end_col < next_token.char_no
350+
space_text = lines[token.line_no - 1][current_end_col...next_token.char_no]
351+
if space_text && !space_text.empty?
352+
space_token = Token.new(token.line_no, current_end_col, :on_sp, space_text, token.state)
353+
result << space_token
354+
end
355+
# Handle backslash-newline line continuations for proper display
356+
elsif next_token && next_token.line_no > token.line_no
357+
rest_of_line = lines[token.line_no - 1][current_end_col..-1]
358+
if rest_of_line&.match?(/\A\s*\\\n?\z/)
359+
# Insert space tokens for whitespace and backslash-newline
360+
space_token = Token.new(token.line_no, current_end_col, :on_sp, rest_of_line, token.state)
361+
result << space_token
362+
end
363+
end
364+
365+
prev_token = token
366+
end
367+
368+
result
301369
end
302370
end

lib/rdoc/parser/ruby.rb

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
return
1818
end
1919

20-
require 'ripper'
2120
require_relative 'ripper_state_lex'
2221

2322
##
@@ -522,7 +521,7 @@ def get_included_module_with_optional_parens
522521
when :on_comment, :on_embdoc then
523522
@read.pop
524523
if :on_nl == end_token[:kind] and "\n" == tk[:text][-1] and
525-
(!continue or (tk[:state] & Ripper::EXPR_LABEL) != 0) then
524+
(!continue or (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0) then
526525
break if !continue and nest <= 0
527526
end
528527
when :on_comma then
@@ -535,7 +534,7 @@ def get_included_module_with_optional_parens
535534
nest += 1
536535
when 'if', 'unless', 'while', 'until', 'rescue'
537536
# postfix if/unless/while/until/rescue must be EXPR_LABEL
538-
nest += 1 unless (tk[:state] & Ripper::EXPR_LABEL) != 0
537+
nest += 1 unless (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0
539538
when 'end'
540539
nest -= 1
541540
break if nest == 0
@@ -1049,7 +1048,7 @@ def parse_constant_body(container, constant, is_array_or_hash) # :nodoc:
10491048
elsif (:on_kw == tk[:kind] && 'def' == tk[:text]) then
10501049
nest += 1
10511050
elsif (:on_kw == tk[:kind] && %w{do if unless case begin}.include?(tk[:text])) then
1052-
if (tk[:state] & Ripper::EXPR_LABEL) == 0
1051+
if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0
10531052
nest += 1
10541053
end
10551054
elsif [:on_rparen, :on_rbrace, :on_rbracket].include?(tk[:kind]) ||
@@ -1668,7 +1667,7 @@ def parse_method_or_yield_parameters(method = nil,
16681667
when :on_comment, :on_embdoc then
16691668
@read.pop
16701669
if :on_nl == end_token[:kind] and "\n" == tk[:text][-1] and
1671-
(!continue or (tk[:state] & Ripper::EXPR_LABEL) != 0) then
1670+
(!continue or (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0) then
16721671
if method && method.block_params.nil? then
16731672
unget_tk tk
16741673
read_documentation_modifiers method, modifiers
@@ -1888,7 +1887,7 @@ def parse_statements(container, single = NORMAL, current_method = nil,
18881887
end
18891888

18901889
when 'until', 'while' then
1891-
if (tk[:state] & Ripper::EXPR_LABEL) == 0
1890+
if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0
18921891
nest += 1
18931892
skip_optional_do_after_expression
18941893
end
@@ -1904,7 +1903,7 @@ def parse_statements(container, single = NORMAL, current_method = nil,
19041903
skip_optional_do_after_expression
19051904

19061905
when 'case', 'do', 'if', 'unless', 'begin' then
1907-
if (tk[:state] & Ripper::EXPR_LABEL) == 0
1906+
if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0
19081907
nest += 1
19091908
end
19101909

rdoc.gemspec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,5 @@ RDoc includes the +rdoc+ and +ri+ tools for generating and displaying documentat
6969
s.add_dependency 'psych', '>= 4.0.0'
7070
s.add_dependency 'erb'
7171
s.add_dependency 'tsort'
72+
s.add_dependency 'prism'
7273
end

0 commit comments

Comments
 (0)