File: entities.erl

package info (click to toggle)
rabbitmq-server 3.3.5-1.1
  • links: PTS
  • area: main
  • in suites: jessie-kfreebsd
  • size: 12,004 kB
  • sloc: erlang: 78,203; python: 3,187; xml: 2,843; makefile: 903; sh: 831; java: 660; perl: 64; ruby: 63
file content (45 lines) | stat: -rwxr-xr-x 1,425 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/env escript
%% -*- mode: erlang -*-
-export([main/1]).

%% @doc Script used to generate mochiweb_charref.erl table.

main(_) ->
    application:start(inets),
    code:add_patha("ebin"),
    {ok, {_, _, HTML}} = httpc:request("http://www.w3.org/TR/html5/named-character-references.html"),
    print(lists:sort(search(mochiweb_html:parse(HTML)))).

print([F | T]) ->
    io:put_chars([clause(F), ";\n"]),
    print(T);
print([]) ->
    io:put_chars(["entity(_) -> undefined.\n"]),
    ok.

clause({Title, [Codepoint]}) ->
    ["entity(\"", Title, "\") -> 16#", Codepoint];
clause({Title, [First | Rest]}) ->
    ["entity(\"", Title, "\") -> [16#", First,
     [[", 16#", Codepoint] || Codepoint <- Rest],
     "]"].


search(Elem) ->
    search(Elem, []).

search({<<"tr">>, [{<<"id">>, <<"entity-", _/binary>>} | _], Children}, Acc) ->
    %% HTML5 charrefs can have more than one code point(!)
    [{<<"td">>, _, [{<<"code">>, _, [TitleSemi]}]},
     {<<"td">>, [], [RawCPs]} | _] = Children,
    L = byte_size(TitleSemi) - 1,
    <<Title:L/binary, $;>> = TitleSemi,
    {match, Matches} = re:run(RawCPs, "(?:\\s*U\\+)([a-fA-F0-9]+)",
                              [{capture, all, binary}, global]),
    [{Title, [CP || [_, CP] <- Matches]} | Acc];
search({Tag, Attrs, [H | T]}, Acc) ->
    search({Tag, Attrs, T}, search(H, Acc));
search({_Tag, _Attrs, []}, Acc) ->
    Acc;
search(<<_/binary>>, Acc) ->
    Acc.