File: ruby.rb

package info (click to toggle)
ruby-gettext 3.3.3-2
links: PTS, VCS
area: main
in suites: bookworm, bullseye
size: 6,424 kB
sloc: ruby: 9,643; makefile: 8
file content (433 lines) | stat: -rw-r--r-- 11,455 bytes
=begin
  parser/ruby.rb - parser for ruby script

  Copyright (C) 2013-2019  Sutou Kouhei <kou@clear-code.com>
  Copyright (C) 2003-2009  Masao Mutoh
  Copyright (C) 2005       speakillof
  Copyright (C) 2001,2002  Yasushi Shoji, Masao Mutoh

  You may redistribute it and/or modify it under the same
  license terms as Ruby or LGPL.

=end

require "ripper"
require "stringio"

require "gettext/po_entry"

module GetText
  class RubyParser
    class POExtractor < Ripper::Filter
      ID = ["gettext", "_", "N_", "sgettext", "s_"]
      PLURAL_ID = ["ngettext", "n_", "Nn_", "ns_", "nsgettext"]
      MSGCTXT_ID = ["pgettext", "p_"]
      MSGCTXT_PLURAL_ID = ["npgettext", "np_"]

      attr_accessor :use_comment
      attr_accessor :comment_tag
      def initialize(*args)
        super(*args)
        @in_block_arguments = false
        @ignore_next_comma = false
        @context_stack = []
        @need_definition_name = false
        @current_po_entry = nil
        @current_po_entry_nth_attribute = 0
        @use_comment = false
        @comment_tag = nil
        @last_comment = ""
        @reset_comment = false
        @string_mark_stack = []
        @string_stack = []
      end

      def process_on_op(token, po)
        @in_block_arguments = !@in_block_arguments if token == "|"
        po
      end

      def process_on_kw(token, po)
        store_po_entry(po)
        case token
        when "begin", "case", "do", "for"
          @context_stack.push(token)
        when "class", "def", "module"
          @context_stack.push(token)
        when "if", "unless", "until", "while"
          # postfix case
          unless state.allbits?(Ripper::EXPR_LABEL)
            @context_stack.push(token)
          end
        when "end"
          @context_stack.pop
        end
        po
      end

      def process_on_ident(token, po)
        store_po_entry(po)

        return po if @in_block_arguments
        return po if state.allbits?(Ripper::EXPR_ENDFN)

        case token
        when *ID
          @current_po_entry = POEntry.new(:normal)
        when *PLURAL_ID
          @current_po_entry = POEntry.new(:plural)
        when *MSGCTXT_ID
          @current_po_entry = POEntry.new(:msgctxt)
        when *MSGCTXT_PLURAL_ID
          @current_po_entry = POEntry.new(:msgctxt_plural)
        end
        if @current_po_entry
          @current_po_entry.add_comment(@last_comment) unless @last_comment.empty?
          @last_comment = ""
          @current_po_entry.references << "#{filename}:#{lineno}"
          @current_po_entry_nth_attribute = 0
        end
        po
      end

      def process_on_const(token, po)
        case token
        when "N_"," Nn_"
          # TODO: Check the next token is :on_lparen
          process_on_ident(token, po)
        else
          po
        end
      end

      def process_on_comment(token, po)
        @last_comment = "" if @reset_comment
        @reset_comment = false
        if @last_comment.empty?
          content = token.gsub(/\A#\s*/, "").chomp
          if comment_to_be_extracted?(content)
            @last_comment << content
          end
        else
          content = token.gsub(/\A#/, "").chomp
          @last_comment << "\n"
          @last_comment << content
        end
        po
      end

      def process_on_sp(token, po)
        po
      end

      def process_on_tstring_beg(token, po)
        if token.start_with?("%Q")
          @string_mark_stack << "\""
        elsif token.start_with?("%q")
          @string_mark_stack << "'"
        elsif token.start_with?("%")
          @string_mark_stack << "\""
        else
          @string_mark_stack << token
        end
        @string_stack << ""
        po
      end

      def process_on_tstring_content(token, po)
        case @string_mark_stack.last
        when "\"", "`"
          @string_stack.last << token.gsub(/\\./) do |data|
            case data
            when "\\n"
              "\n"
            when "\\t"
              "\t"
            when "\\\\"
              "\\"
            when "\\\""
              "\""
            when "\\\#"
              "#"
            else
              data
            end
          end
        else
          @string_stack.last << token.gsub(/\\./) do |data|
            case data
            when "\\\\"
              "\\"
            when "\\'"
              "'"
            else
              data
            end
          end
        end
        po
      end

      def process_on_tstring_end(token, po)
        @ignore_next_comma = false
        string_mark = @string_mark_stack.pop
        case string_mark
        when "\"", "'"
          last_string = @string_stack.pop
          if @current_po_entry and last_string
            @current_po_entry[@current_po_entry_nth_attribute] =
              (@current_po_entry[@current_po_entry_nth_attribute] || "") +
              last_string
          end
        end
        po
      end

      def process_on_heredoc_beg(token, po)
        if token.end_with?("'")
          @string_mark_stack << "'"
        else
          @string_mark_stack << "\""
        end
        @string_stack << ""
        po
      end

      def process_on_heredoc_end(token, po)
        process_on_tstring_end(token, po)
      end

      def process_on_regexp_beg(token, po)
        @string_mark_stack << "\""
        @string_stack << ""
        po
      end

      def process_on_embexpr_beg(token, po)
        @current_po_entry = nil
        @current_po_entry_nth_attribute = 0
        po
      end

      def process_on_regexp_end(token, po)
        @string_mark_stack.pop
        @string_stack.pop
        po
      end

      def process_on_int(token, po)
        @ignore_next_comma = true
        po
      end

      def process_on_comma(token, po)
        unless @ignore_next_comma
          if @current_po_entry
            @current_po_entry_nth_attribute += 1
          end
        end
        po
      end

      def process_on_rparen(token, po)
        store_po_entry(po)
        po
      end

      def process_on_nl(token, po)
        @reset_comment = true
        po
      end

      def process_on_symbeg(token, po)
        if token.start_with?("%s") or [":'", ":\""].include?(token)
          @string_mark_stack << ":"
          @string_stack << ""
        end
        po
      end

      def process_on_symbols_beg(token, po)
        @string_mark_stack << "\""
        @string_stack << ""
        po
      end

      def process_on_backtick(token, po)
        @string_mark_stack << "`"
        @string_stack << ""
        po
      end

      def process_on_qsymbols_beg(token, po)
        @string_mark_stack << token
        @string_stack << ""
        po
      end

      def process_on_qwords_beg(token, po)
        @string_mark_stack << token
        @string_stack << ""
        po
      end

      def on_default(event, token, po)
        trace(event, token) do
          process_method = "process_#{event}"
          if respond_to?(process_method)
            __send__(process_method, token, po)
          else
            po
          end
        end
      end

      private
      @@debug = ENV["GETTEXT_RUBY_PARSER_DEBUG"]
      def debug?
        @@debug
      end

      def trace(event_name, token)
        pp [event_name, token, state, @context_stack.last] if debug?
        yield
      end

      def store_po_entry(po)
        return if @current_po_entry.nil?
        po << @current_po_entry if @current_po_entry.msgid
        @current_po_entry = nil
        @current_po_entry_nth_attribute = 0
      end

      def comment_to_be_extracted?(comment)
        return false unless @use_comment

        return true if @comment_tag.nil?

        comment.start_with?(@comment_tag)
      end
    end

    class << self
      def target?(file)  # :nodoc:
        true # always true, as the default parser.
      end

      # Parses Ruby script located at `path`.
      #
      # This is a short cut method. It equals to `new(path,
      # options).parse`.
      #
      # @param (see #initialize)
      # @option (see #initialize)
      # @return (see #parse)
      # @see #initialize
      # @see #parse
      def parse(path, options={})
        parser = new(path, options)
        parser.parse
      end
    end

    #
    # @example `:comment_tag` option: String tag
    #   path = "hello.rb"
    #   # content:
    #   #   # TRANSLATORS: This is a comment to translators.
    #   #   _("Hello")
    #   #
    #   #   # This is a comment for programmers.
    #   #   # TRANSLATORS: This is a comment to translators.
    #   #   # This is also a comment to translators.
    #   #   _("World")
    #   #
    #   #   # This is a comment for programmers.
    #   #   # This is also a comment for programmers
    #   #   # because all lines don't start with "TRANSRATORS:".
    #   #   _("Bye")
    #   options = {:comment_tag => "TRANSLATORS:"}
    #   parser = GetText::RubyParser.new(path, options)
    #   parser.parse
    #   # => [
    #   #   POEntry<
    #   #     :msgid => "Hello",
    #   #     :extracted_comment =>
    #   #       "TRANSLATORS: This is a comment to translators.",
    #   #   >,
    #   #   POEntry<
    #   #     :msgid => "World",
    #   #     :extracted_comment =>
    #   #       "TRANSLATORS: This is a comment to translators.\n" +
    #   #       "This is also a comment to translators.",
    #   #   >,
    #   #   POEntry<
    #   #     :msgid => "Bye",
    #   #     :extracted_comment => nil,
    #   #   >,
    #   # ]
    #
    # @example `:comment_tag` option: nil tag
    #   path = "hello.rb"
    #   # content:
    #   #   # This is a comment to translators.
    #   #   # This is also a comment for translators.
    #   #   _("Hello")
    #   options = {:comment_tag => nil}
    #   parser = GetText::RubyParser.new(path, options)
    #   parser.parse
    #   # => [
    #   #   POEntry<
    #   #     :msgid => "Hello",
    #   #     :extracted_comment =>
    #   #       "This is a comment to translators.\n" +
    #   #       " This is also a comment for translators.",
    #   #   >,
    #   # ]
    #
    # @param path [String] Ruby script path to be parsed
    # @param options [Hash] Options
    # @option options [String, nil] :comment_tag The tag to
    #   detect comments to be extracted. The extracted comments are
    #   used to deliver messages to translators from programmers.
    #
    #   If the tag is String and a line in a comment start with the
    #   tag, the line and the following lines are extracted.
    #
    #   If the tag is nil, all comments are extracted.
    def initialize(path, options={})
      @path = path
      @options = options
    end

    # Extracts messages from @path.
    #
    # @return [Array<POEntry>] Extracted messages
    def parse
      source = IO.read(@path)

      encoding = detect_encoding(source) || source.encoding
      source.force_encoding(encoding)

      parse_source(source)
    end

    def detect_encoding(source)
      binary_source = source.dup.force_encoding("ASCII-8BIT")
      if /\A.*coding\s*[=:]\s*([[:alnum:]\-_]+)/ =~ binary_source
        $1.gsub(/-(?:unix|mac|dos)\z/, "")
      else
        nil
      end
    end

    def parse_source(source)
      extractor = POExtractor.new(source, @path)
      if @options.key?(:comment_tag)
        extractor.use_comment = true
        extractor.comment_tag = @options[:comment_tag]
      end
      extractor.parse([])
    end
  end
end