File: apache.rake

package info (click to toggle)
ruby-rouge 4.6.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,836 kB
  • sloc: ruby: 38,168; sed: 2,071; perl: 152; makefile: 8
file content (122 lines) | stat: -rw-r--r-- 3,484 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

require 'open-uri'

APACHE_DOCS_URI = "https://downloads.apache.org/httpd/docs/"
APACHE_KEYWORDS_FILE = "./lib/rouge/lexers/apache/keywords.rb"

namespace :builtins do
  task :apache do
    generator = Rouge::Tasks::Builtins::Apache.new

    input = URI.open(APACHE_DOCS_URI) { |f| f.read }
    files = generator.download_docs(input)
    list  = files.delete("directives.html")
    mods  = files.values

    keywords = generator.extract_keywords(list)
    values   = generator.extract_values(mods)

    output = generator.render_output(keywords, values)

    File.write(APACHE_KEYWORDS_FILE, output)
  end
end

module Rouge
  module Tasks
    module Builtins
      class Apache
        def download_docs(input)
          files = Hash.new

          name, ext = input.match(/href="(.+\.en)(\.zip)"/) { |m| [m[1], m[2]] }
          docs_zip = APACHE_DOCS_URI + name + ext

          system "mkdir -p /tmp/rouge"
          Dir.chdir "/tmp/rouge" do
            system "wget -q #{docs_zip}"
            system "unzip -oq #{name + ext}"
            Dir.chdir "./#{name}/mod/" do
              Dir.glob("./{directives,core,mod_*,mpm*}.html").each do |f|
                files[File.basename(f)] = File.read(f)
              end
            end
          end

          files
        end

        def extract_keywords(input)
          keywords = Hash.new { |h,k| h[k] = Array.new }

          input.each_line do |line|
            if line.scrub =~ %r(<li><a.*?>(&lt;)?(.*?)(&gt;)?</a></li>)
              next unless $2

              if $1 && $3
                key = "sections"
              else
                key = "directives"
              end

              keywords[key].push $2.downcase
            end
          end

          keywords
        end

        def extract_values(inputs)
          values = Set.new

          inputs.each do |input|
            input = input.scrub
            input.scan(%r[Syntax:.*?<code>(.*?)</code>]m) do |m|
              m[0].split(/[\s|]/).
                drop(1).
                filter { |v| v.index(/^\w[\w*-]+$/) }.
                each { |v| values.add(v.downcase) }
            end

            input.scan(%r[<dt>\s*(?:<.+?>\s*)*(.*?)<]m) do |m|
              m[0].split(/[=\[\]]/).
                filter { |v| v.index(/^\w[\w*-]+$/) }.
                each { |v| values.add(v.downcase) }
            end
          end

          values.to_a.sort
        end

        def render_output(keywords, values, &b)
          return enum_for(:render_output, keywords, values).to_a.join("\n") unless b

          yield   "# -*- coding: utf-8 -*- #"
          yield   "# frozen_string_literal: true"
          yield   ""
          yield   "# DO NOT EDIT"
          yield   "# This file is automatically generated by `rake builtins:apache`."
          yield   "# See tasks/builtins/apache.rake for more info."
          yield   ""
          yield   "module Rouge"
          yield   "  module Lexers"
          yield   "    class Apache"
          keywords.each do |k,v|
            yield "      def self.#{k}"
            yield "        @#{k} ||= Set.new #{v.inspect}"
            yield "      end"
            yield ""
          end
          yield   "      def self.values"
          yield   "        @values ||= Set.new #{values.inspect}"
          yield   "      end"
          yield   "    end"
          yield   "  end"
          yield   "end"
        end
      end
    end
  end
end