File: build_entities.rb

package info (click to toggle)
ruby-htmlentities 4.3.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 316 kB
  • sloc: ruby: 2,235; makefile: 3
file content (53 lines) | stat: -rw-r--r-- 1,348 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env ruby
require 'open-uri'
require 'uri'

DTD = {
  'html4'   => 'http://www.w3.org/TR/html4/strict.dtd',
  'xhtml1'  => 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
}

# Start off with an XHTML DTD
flavor = ARGV.first
dtd_uri = DTD[flavor]
entities = {}

dtd = open(dtd_uri){ |io| io.read }
dtd.scan(/<!ENTITY \s+ % \s+ (\w+) \s+ PUBLIC \s+ "(.*?)" \s+ "(.*?)" \s* >/x) do |m|
  entity_file = URI.parse(dtd_uri).merge(m[2]).to_s
  $stderr.puts("Found reference to entity file at #{entity_file}")
  entities_found = 0
  entity = open(entity_file){ |io| io.read }
  entity.scan(/<!ENTITY \s+ (\w+) \s+ (?:CDATA \s+)? "\&\#(.*?);"/x) do |m|
    name, codepoint = m
    case codepoint
    when /^\d/
      entities[name] = codepoint.to_i
    when /^x\d/
      entities[name] = codepoint[1,-1].to_i(16)
    else
      raise "couldn't parse entity definition #{m[0]}"
    end
    entities_found += 1
  end
  $stderr.puts("Found #{entities_found} entities in #{entity_file}")
end

# These two are a special case in the W3C entity file, so fix them:
entities['lt']  = ?<
entities['amp'] = ?&

puts <<"END"
class HTMLEntities
  MAPPINGS = {} unless defined? MAPPINGS
  MAPPINGS['#{flavor}'] = {
#{
  entities.keys.sort_by{ |s| 
    [s.downcase, s] 
  }.map{ |name| 
    "    '#{name}' => #{entities[name]}"
  }.join(",\n")
}
  }
end
END