1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
|
# -*- coding: utf-8 -*- #
# frozen_string_literal: true
require 'open-uri'
APACHE_DOCS_URI = "https://downloads.apache.org/httpd/docs/"
APACHE_KEYWORDS_FILE = "./lib/rouge/lexers/apache/keywords.rb"
namespace :builtins do
task :apache do
generator = Rouge::Tasks::Builtins::Apache.new
input = URI.open(APACHE_DOCS_URI) { |f| f.read }
files = generator.download_docs(input)
list = files.delete("directives.html")
mods = files.values
keywords = generator.extract_keywords(list)
values = generator.extract_values(mods)
output = generator.render_output(keywords, values)
File.write(APACHE_KEYWORDS_FILE, output)
end
end
module Rouge
module Tasks
module Builtins
class Apache
def download_docs(input)
files = Hash.new
name, ext = input.match(/href="(.+\.en)(\.zip)"/) { |m| [m[1], m[2]] }
docs_zip = APACHE_DOCS_URI + name + ext
system "mkdir -p /tmp/rouge"
Dir.chdir "/tmp/rouge" do
system "wget -q #{docs_zip}"
system "unzip -oq #{name + ext}"
Dir.chdir "./#{name}/mod/" do
Dir.glob("./{directives,core,mod_*,mpm*}.html").each do |f|
files[File.basename(f)] = File.read(f)
end
end
end
files
end
def extract_keywords(input)
keywords = Hash.new { |h,k| h[k] = Array.new }
input.each_line do |line|
if line.scrub =~ %r(<li><a.*?>(<)?(.*?)(>)?</a></li>)
next unless $2
if $1 && $3
key = "sections"
else
key = "directives"
end
keywords[key].push $2.downcase
end
end
keywords
end
def extract_values(inputs)
values = Set.new
inputs.each do |input|
input = input.scrub
input.scan(%r[Syntax:.*?<code>(.*?)</code>]m) do |m|
m[0].split(/[\s|]/).
drop(1).
filter { |v| v.index(/^\w[\w*-]+$/) }.
each { |v| values.add(v.downcase) }
end
input.scan(%r[<dt>\s*(?:<.+?>\s*)*(.*?)<]m) do |m|
m[0].split(/[=\[\]]/).
filter { |v| v.index(/^\w[\w*-]+$/) }.
each { |v| values.add(v.downcase) }
end
end
values.to_a.sort
end
def render_output(keywords, values, &b)
return enum_for(:render_output, keywords, values).to_a.join("\n") unless b
yield "# -*- coding: utf-8 -*- #"
yield "# frozen_string_literal: true"
yield ""
yield "# DO NOT EDIT"
yield "# This file is automatically generated by `rake builtins:apache`."
yield "# See tasks/builtins/apache.rake for more info."
yield ""
yield "module Rouge"
yield " module Lexers"
yield " class Apache"
keywords.each do |k,v|
yield " def self.#{k}"
yield " @#{k} ||= Set.new #{v.inspect}"
yield " end"
yield ""
end
yield " def self.values"
yield " @values ||= Set.new #{values.inspect}"
yield " end"
yield " end"
yield " end"
yield "end"
end
end
end
end
end
|