11# frozen_string_literal: true
2- require 'ripper '
2+ require 'prism '
33
44##
5- # Wrapper for Ripper lex states
5+ # Wrapper for Prism lex with Ripper-compatible API
66
77class RDoc ::Parser ::RipperStateLex
8- # :stopdoc:
9-
108 Token = Struct . new ( :line_no , :char_no , :kind , :text , :state )
119
12- EXPR_END = Ripper ::EXPR_END
13- EXPR_ENDFN = Ripper ::EXPR_ENDFN
14- EXPR_ARG = Ripper ::EXPR_ARG
15- EXPR_FNAME = Ripper ::EXPR_FNAME
10+ # Lexer states from Ripper
11+ EXPR_END = 0x2 # 2 - Expression ends
12+ EXPR_ENDFN = 0x8 # 8 - Function definition ends
13+ EXPR_ARG = 0x10 # 16 - Inside argument list
14+ EXPR_FNAME = 0x80 # 128 - Inside function name
15+ EXPR_LABEL = 0x400 # 1024 - Label in hash literal
16+
17+ REDEFINABLE_OPERATORS = %w[ ! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~ ] . freeze
1618
17- class InnerStateLex < Ripper ::Filter
18- def initialize ( code )
19- super ( code )
19+ # Returns tokens parsed from +code+.
20+ def self . parse ( code )
21+ lex = self . new ( code )
22+ tokens = [ ]
23+ begin
24+ while tk = lex . get_squashed_tk
25+ tokens . push tk
26+ end
27+ rescue StopIteration
2028 end
29+ tokens
30+ end
2131
22- def on_default ( event , tok , data )
23- data << Token . new ( lineno , column , event , tok , state )
32+ # Returns +true+ if lex state will be +END+ after +token+.
33+ def self . end? ( token )
34+ ( token [ :state ] & EXPR_END )
35+ end
36+
37+ # New lexer for +code+.
38+ def initialize ( code )
39+ @buf = [ ]
40+ @heredoc_queue = [ ]
41+ # Use Prism.lex_compat for Ripper-compatible tokenization
42+ lex_result = Prism . lex_compat ( code )
43+ prism_tokens = lex_result . value . map do |( pos , kind , text , state ) |
44+ line_no , char_no = pos
45+ # Convert Ripper::Lexer::State to integer to avoid Ripper dependency
46+ state_int = state . respond_to? ( :to_i ) ? state . to_i : state
47+ Token . new ( line_no , char_no , kind , text , state_int )
2448 end
49+
50+ # Prism.lex_compat omits :on_sp tokens, so we need to insert them for proper
51+ # syntax highlighting and token stream reconstruction
52+ tokens_with_spaces = insert_space_tokens ( prism_tokens , code )
53+
54+ # Fix Prism incompatibility: Prism returns :on_ignored_nl after `def foo; end`
55+ # but parsers expect :on_nl for proper token collection in single-line methods
56+ @tokens = normalize_ignored_nl_for_single_line_methods ( tokens_with_spaces )
2557 end
2658
2759 def get_squashed_tk
@@ -39,7 +71,7 @@ def get_squashed_tk
3971 when :on_backtick then
4072 if ( tk [ :state ] & ( EXPR_FNAME | EXPR_ENDFN ) ) != 0
4173 tk [ :kind ] = :on_ident
42- tk [ :state ] = Ripper :: Lexer :: State . new ( EXPR_ARG )
74+ tk [ :state ] = EXPR_ARG
4375 else
4476 tk = get_string_tk ( tk )
4577 end
@@ -73,7 +105,9 @@ def get_squashed_tk
73105 tk
74106 end
75107
76- private def get_symbol_tk ( tk )
108+ private
109+
110+ def get_symbol_tk ( tk )
77111 is_symbol = true
78112 symbol_tk = Token . new ( tk . line_no , tk . char_no , :on_symbol )
79113 if ":'" == tk [ :text ] or ':"' == tk [ :text ] or tk [ :text ] . start_with? ( '%s' )
@@ -120,7 +154,7 @@ def get_squashed_tk
120154 tk
121155 end
122156
123- private def get_string_tk ( tk )
157+ def get_string_tk ( tk )
124158 string = tk [ :text ]
125159 state = nil
126160 kind = :on_tstring
@@ -147,7 +181,7 @@ def get_squashed_tk
147181 Token . new ( tk . line_no , tk . char_no , kind , string , state )
148182 end
149183
150- private def get_regexp_tk ( tk )
184+ def get_regexp_tk ( tk )
151185 string = tk [ :text ]
152186 state = nil
153187 loop do
@@ -165,7 +199,7 @@ def get_squashed_tk
165199 Token . new ( tk . line_no , tk . char_no , :on_regexp , string , state )
166200 end
167201
168- private def get_embdoc_tk ( tk )
202+ def get_embdoc_tk ( tk )
169203 string = tk [ :text ]
170204 until :on_embdoc_end == ( embdoc_tk = get_squashed_tk ) [ :kind ] do
171205 string = string + embdoc_tk [ :text ]
@@ -174,7 +208,7 @@ def get_squashed_tk
174208 Token . new ( tk . line_no , tk . char_no , :on_embdoc , string , embdoc_tk . state )
175209 end
176210
177- private def get_heredoc_tk ( heredoc_name , indent )
211+ def get_heredoc_tk ( heredoc_name , indent )
178212 string = ''
179213 start_tk = nil
180214 prev_tk = nil
@@ -193,13 +227,13 @@ def get_squashed_tk
193227 @buf . unshift heredoc_tk
194228 end
195229
196- private def retrieve_heredoc_info ( tk )
230+ def retrieve_heredoc_info ( tk )
197231 name = tk [ :text ] . gsub ( /\A <<[-~]?(['"`]?)(.+)\1 \z / , '\2' )
198232 indent = tk [ :text ] =~ /\A <<[-~]/
199233 [ name , indent ]
200234 end
201235
202- private def heredoc_end? ( name , indent , tk )
236+ def heredoc_end? ( name , indent , tk )
203237 result = false
204238 if :on_heredoc_end == tk [ :kind ] then
205239 tk_name = tk [ :text ] . chomp
@@ -211,7 +245,7 @@ def get_squashed_tk
211245 result
212246 end
213247
214- private def get_words_tk ( tk )
248+ def get_words_tk ( tk )
215249 string = ''
216250 start_token = tk [ :text ]
217251 start_quote = tk [ :text ] . rstrip [ -1 ]
@@ -249,10 +283,9 @@ def get_squashed_tk
249283 Token . new ( line_no , char_no , :on_dstring , text , state )
250284 end
251285
252- private def get_op_tk ( tk )
253- redefinable_operators = %w[ ! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~ ]
254- if redefinable_operators . include? ( tk [ :text ] ) and tk [ :state ] == EXPR_ARG then
255- tk [ :state ] = Ripper ::Lexer ::State . new ( EXPR_ARG )
286+ def get_op_tk ( tk )
287+ if REDEFINABLE_OPERATORS . include? ( tk [ :text ] ) and tk [ :state ] == EXPR_ARG then
288+ tk [ :state ] = EXPR_ARG
256289 tk [ :kind ] = :on_ident
257290 elsif tk [ :text ] =~ /^[-+]$/ then
258291 tk_ahead = get_squashed_tk
@@ -272,31 +305,66 @@ def get_squashed_tk
272305 tk
273306 end
274307
275- # :startdoc:
276-
277- # New lexer for +code+.
278- def initialize ( code )
279- @buf = [ ]
280- @heredoc_queue = [ ]
281- @inner_lex = InnerStateLex . new ( code )
282- @tokens = @inner_lex . parse ( [ ] )
283- end
284-
285- # Returns tokens parsed from +code+.
286- def self . parse ( code )
287- lex = self . new ( code )
288- tokens = [ ]
289- begin
290- while tk = lex . get_squashed_tk
291- tokens . push tk
308+ def normalize_ignored_nl_for_single_line_methods ( tokens )
309+ tokens . each_cons ( 2 ) do |prev_token , token |
310+ # Convert :on_ignored_nl to :on_nl when it follows an `end` keyword on the same line
311+ # This ensures proper token collection for single-line method definitions
312+ if token . kind == :on_ignored_nl &&
313+ prev_token . kind == :on_kw && prev_token . text == 'end' &&
314+ prev_token . line_no == token . line_no
315+ token [ :kind ] = :on_nl
292316 end
293- rescue StopIteration
294317 end
295318 tokens
296319 end
297320
298- # Returns +true+ if lex state will be +END+ after +token+.
299- def self . end? ( token )
300- ( token [ :state ] & EXPR_END )
321+ def insert_space_tokens ( tokens , code )
322+ return tokens if tokens . empty?
323+
324+ lines = code . lines
325+ result = [ ]
326+ prev_token = nil
327+
328+ tokens . each_with_index do |token , i |
329+ # Check for leading spaces at the start of a line
330+ # (when current token is not on the same line as previous token and doesn't start at column 0)
331+ if prev_token && prev_token . line_no < token . line_no && token . char_no > 0
332+ # There are leading spaces on this line
333+ line_text = lines [ token . line_no - 1 ]
334+ if line_text
335+ leading_spaces = line_text [ 0 ...token . char_no ]
336+ if leading_spaces && !leading_spaces . empty? && leading_spaces . match? ( /\A \s +\z / )
337+ space_token = Token . new ( token . line_no , 0 , :on_sp , leading_spaces , prev_token . state )
338+ result << space_token
339+ end
340+ end
341+ end
342+
343+ result << token
344+
345+ next_token = tokens [ i + 1 ]
346+ current_end_col = token . char_no + token . text . length
347+
348+ # Insert space tokens for gaps between tokens on the same line
349+ if next_token && next_token . line_no == token . line_no && current_end_col < next_token . char_no
350+ space_text = lines [ token . line_no - 1 ] [ current_end_col ...next_token . char_no ]
351+ if space_text && !space_text . empty?
352+ space_token = Token . new ( token . line_no , current_end_col , :on_sp , space_text , token . state )
353+ result << space_token
354+ end
355+ # Handle backslash-newline line continuations for proper display
356+ elsif next_token && next_token . line_no > token . line_no
357+ rest_of_line = lines [ token . line_no - 1 ] [ current_end_col ..-1 ]
358+ if rest_of_line &.match? ( /\A \s *\\ \n ?\z / )
359+ # Insert space tokens for whitespace and backslash-newline
360+ space_token = Token . new ( token . line_no , current_end_col , :on_sp , rest_of_line , token . state )
361+ result << space_token
362+ end
363+ end
364+
365+ prev_token = token
366+ end
367+
368+ result
301369 end
302370end
0 commit comments