File: processed_source.rb

package info (click to toggle)
ruby-rubocop-ast 1.49.0-2
links: PTS, VCS
area: main
in suites: forky, sid
size: 1,768 kB
sloc: ruby: 17,017; yacc: 90; makefile: 9
file content (411 lines) | stat: -rw-r--r-- 13,223 bytes
# frozen_string_literal: true

require 'digest/sha1'

module RuboCop
  module AST
    # A `Prism` interface's class that provides a fixed `Prism::ParseLexResult` instead of parsing.
    #
    # This class implements the `parse_lex` method to return a preparsed `Prism::ParseLexResult`
    # rather than parsing the source code.  When the parse result is already available externally,
    # such as in Ruby LSP, the Prism parsing process can be bypassed.
    class PrismPreparsed
      def initialize(prism_result)
        unless prism_result.is_a?(Prism::ParseLexResult)
          raise ArgumentError, <<~MESSAGE
            Expected a `Prism::ParseLexResult` object, but received `#{prism_result.class}`.
          MESSAGE
        end

        @prism_result = prism_result
      end

      def parse_lex(_source, **_prism_options)
        @prism_result
      end
    end

    # ProcessedSource contains objects which are generated by Parser
    # and other information such as disabled lines for cops.
    # It also provides a convenient way to access source lines.
    class ProcessedSource # rubocop:disable Metrics/ClassLength
      # @api private
      STRING_SOURCE_NAME = '(string)'

      INVALID_LEVELS = %i[error fatal].freeze
      private_constant :INVALID_LEVELS

      PARSER_ENGINES = %i[default parser_whitequark parser_prism].freeze
      private_constant :PARSER_ENGINES

      attr_reader :path, :buffer, :ast, :comments, :tokens, :diagnostics,
                  :parser_error, :raw_source, :ruby_version, :parser_engine

      def self.from_file(path, ruby_version, parser_engine: :default)
        file = File.read(path, mode: 'rb')
        new(file, ruby_version, path, parser_engine: parser_engine)
      end

      def initialize(
        source, ruby_version, path = nil, parser_engine: :default, prism_result: nil
      )
        parser_engine = normalize_parser_engine(parser_engine, ruby_version)

        # Defaults source encoding to UTF-8, regardless of the encoding it has
        # been read with, which could be non-utf8 depending on the default
        # external encoding.
        (+source).force_encoding(Encoding::UTF_8) unless source.encoding == Encoding::UTF_8

        @raw_source = source
        @path = path
        @diagnostics = []
        @ruby_version = ruby_version
        @parser_engine = parser_engine
        @parser_error = nil

        parse(source, ruby_version, parser_engine, prism_result)
      end

      def ast_with_comments
        return if !ast || !comments

        @ast_with_comments ||= Parser::Source::Comment.associate_by_identity(ast, comments)
      end

      # Returns the source lines, line break characters removed, excluding a
      # possible __END__ and everything that comes after.
      def lines
        @lines ||= begin
          all_lines = @buffer.source_lines
          last_token_line = tokens.any? ? tokens.last.line : all_lines.size
          result = []
          all_lines.each_with_index do |line, ix|
            break if ix >= last_token_line && line == '__END__'

            result << line
          end
          result
        end
      end

      def [](*args)
        lines[*args]
      end

      def valid_syntax?
        return false if @parser_error

        @diagnostics.none? { |d| INVALID_LEVELS.include?(d.level) }
      end

      # Raw source checksum for tracking infinite loops.
      def checksum
        Digest::SHA1.hexdigest(@raw_source)
      end

      # @deprecated Use `comments.each`
      def each_comment(&block)
        comments.each(&block)
      end

      # @deprecated Use `comment_at_line`, `each_comment_in_lines`, or `comments.find`
      def find_comment(&block)
        comments.find(&block)
      end

      # @deprecated Use `tokens.each`
      def each_token(&block)
        tokens.each(&block)
      end

      # @deprecated Use `tokens.find`
      def find_token(&block)
        tokens.find(&block)
      end

      def file_path
        buffer.name
      end

      def blank?
        ast.nil?
      end

      # @return [Comment, nil] the comment at that line, if any.
      def comment_at_line(line)
        comment_index[line]
      end

      # @return [Boolean] if the given line number has a comment.
      def line_with_comment?(line)
        comment_index.include?(line)
      end

      # Enumerates on the comments contained with the given `line_range`
      def each_comment_in_lines(line_range)
        return to_enum(:each_comment_in_lines, line_range) unless block_given?

        line_range.each do |line|
          if (comment = comment_index[line])
            yield comment
          end
        end
      end

      # @return [Boolean] if any of the lines in the given `source_range` has a comment.
      # Consider using `each_comment_in_lines` instead
      def contains_comment?(source_range)
        each_comment_in_lines(source_range.line..source_range.last_line).any?
      end
      # @deprecated use contains_comment?
      alias commented? contains_comment?

      # @deprecated Use `each_comment_in_lines`
      # Should have been called `comments_before_or_at_line`. Doubtful it has of any valid use.
      def comments_before_line(line)
        each_comment_in_lines(0..line).to_a
      end

      def start_with?(string)
        return false if self[0].nil?

        self[0].start_with?(string)
      end

      def preceding_line(token)
        lines[token.line - 2]
      end

      def current_line(token)
        lines[token.line - 1]
      end

      def following_line(token)
        lines[token.line]
      end

      def line_indentation(line_number)
        lines[line_number - 1]
          .match(/^(\s*)/)[1]
          .to_s
          .length
      end

      def tokens_within(range_or_node)
        begin_index = first_token_index(range_or_node)
        end_index = last_token_index(range_or_node)
        sorted_tokens[begin_index..end_index]
      end

      def first_token_of(range_or_node)
        sorted_tokens[first_token_index(range_or_node)]
      end

      def last_token_of(range_or_node)
        sorted_tokens[last_token_index(range_or_node)]
      end

      # The tokens list is always sorted by token position, except for cases when heredoc
      # is passed as a method argument. In this case tokens are interleaved by
      # heredoc contents' tokens.
      def sorted_tokens
        # Use stable sort.
        @sorted_tokens ||= tokens.sort_by.with_index { |token, i| [token.begin_pos, i] }
      end

      private

      def comment_index
        @comment_index ||= {}.tap do |hash|
          comments.each { |c| hash[c.location.line] = c }
        end
      end

      def parse(source, ruby_version, parser_engine, prism_result)
        buffer_name = @path || STRING_SOURCE_NAME
        @buffer = Parser::Source::Buffer.new(buffer_name, 1)

        begin
          @buffer.source = source
        rescue EncodingError, Parser::UnknownEncodingInMagicComment => e
          @parser_error = e
          @ast = nil
          @comments = []
          @tokens = []
          return
        end

        parser = create_parser(ruby_version, parser_engine, prism_result)

        @ast, @comments, @tokens = tokenize(parser)
      end

      def tokenize(parser)
        begin
          ast, comments, tokens = parser.tokenize(@buffer)
          ast ||= nil # force `false` to `nil`, see https://github.com/whitequark/parser/pull/722
        rescue Parser::SyntaxError
          # All errors are in diagnostics. No need to handle exception.
          comments = []
          tokens = []
        end

        ast&.complete!
        tokens.map! { |t| Token.from_parser_token(t) }

        [ast, comments, tokens]
      end

      # rubocop:disable Lint/FloatComparison, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
      def parser_class(ruby_version, parser_engine)
        case parser_engine
        when :parser_whitequark
          case ruby_version
          when 1.9
            require 'parser/ruby19'
            Parser::Ruby19
          when 2.0
            require 'parser/ruby20'
            Parser::Ruby20
          when 2.1
            require 'parser/ruby21'
            Parser::Ruby21
          when 2.2
            require 'parser/ruby22'
            Parser::Ruby22
          when 2.3
            require 'parser/ruby23'
            Parser::Ruby23
          when 2.4
            require 'parser/ruby24'
            Parser::Ruby24
          when 2.5
            require 'parser/ruby25'
            Parser::Ruby25
          when 2.6
            require 'parser/ruby26'
            Parser::Ruby26
          when 2.7
            require 'parser/ruby27'
            Parser::Ruby27
          when 2.8, 3.0
            require 'parser/ruby30'
            Parser::Ruby30
          when 3.1
            require 'parser/ruby31'
            Parser::Ruby31
          when 3.2
            require 'parser/ruby32'
            Parser::Ruby32
          when 3.3
            require 'parser/ruby33'
            Parser::Ruby33
          when 3.4
            require 'parser/ruby34'
            Parser::Ruby34
          else
            raise ArgumentError, 'RuboCop supports target Ruby versions 3.4 and below with ' \
                                 "`parser`. Specified target Ruby version: #{ruby_version.inspect}"
          end
        when :parser_prism
          case ruby_version
          when 3.3
            Prism::Translation::Parser33
          when 3.4
            Prism::Translation::Parser34
          when 3.5, 4.0
            Prism::Translation::Parser40
          when 4.1
            Prism::Translation::Parser41
          else
            raise ArgumentError, 'RuboCop supports target Ruby versions 3.3 and above with Prism. ' \
                                 "Specified target Ruby version: #{ruby_version.inspect}"
          end
        end
      end
      # rubocop:enable Lint/FloatComparison, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength

      def builder_class(parser_engine)
        case parser_engine
        when :parser_whitequark
          RuboCop::AST::Builder
        when :parser_prism
          RuboCop::AST::BuilderPrism
        end
      end

      # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
      def create_parser(ruby_version, parser_engine, prism_result)
        builder = builder_class(parser_engine).new

        parser_class = parser_class(ruby_version, parser_engine)

        parser_instance = if parser_engine == :parser_prism && prism_result
                            # NOTE: Since it is intended for use with Ruby LSP, it targets only Prism.
                            # If there is no reuse of a pre-parsed result, such as in Ruby LSP,
                            # regular parsing with Prism occurs, and `else` branch will be executed.
                            prism_reparsed = PrismPreparsed.new(prism_result)
                            parser_class.new(builder, parser: prism_reparsed)
                          else
                            parser_class.new(builder)
                          end

        parser_instance.tap do |parser|
          # On JRuby there's a risk that we hang in tokenize() if we
          # don't set the all errors as fatal flag. The problem is caused by a bug
          # in Racc that is discussed in issue #93 of the whitequark/parser
          # project on GitHub.
          parser.diagnostics.all_errors_are_fatal = (RUBY_ENGINE != 'ruby')
          parser.diagnostics.ignore_warnings = false
          parser.diagnostics.consumer = lambda do |diagnostic|
            @diagnostics << diagnostic
          end
        end
      end
      # rubocop:enable Metrics/AbcSize, Metrics/MethodLength

      def normalize_parser_engine(parser_engine, ruby_version)
        parser_engine = parser_engine.to_sym
        unless PARSER_ENGINES.include?(parser_engine)
          raise ArgumentError, 'The keyword argument `parser_engine` accepts `default`, ' \
                               "`parser_whitequark`, or `parser_prism`, but `#{parser_engine}` " \
                               'was passed.'
        end
        if parser_engine == :default
          default_parser_engine(ruby_version)
        else
          parser_engine
        end
      end

      # The Parser gem does not support Ruby 3.5 or later.
      # It is also not fully compatible with Ruby 3.4 but for
      # now respects using parser for backwards compatibility.
      def default_parser_engine(ruby_version)
        if ruby_version >= 3.4
          :parser_prism
        else
          :parser_whitequark
        end
      end

      def first_token_index(range_or_node)
        begin_pos = source_range(range_or_node).begin_pos
        sorted_tokens.bsearch_index { |token| token.begin_pos >= begin_pos }
      end

      def last_token_index(range_or_node)
        end_pos = source_range(range_or_node).end_pos
        sorted_tokens.bsearch_index { |token| token.end_pos >= end_pos }
      end

      def source_range(range_or_node)
        if range_or_node.respond_to?(:source_range)
          range_or_node.source_range
        else
          range_or_node
        end
      end
    end
  end
end