File: make_entities_inc.py

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (32 lines) | stat: -rw-r--r-- 957 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Creates C data structures for binary lookup table of entities,
# using python's html5 entity data.
# Usage: python3 tools/make_entities_inc.py > src/entities.inc

import html

entities5 = html.entities.html5

# remove keys without semicolons.  For some reason the list
# has duplicates of a few things, like auml, one with and one
# without a semicolon.
entities = sorted([(k[:-1], entities5[k].encode('utf-8')) for k in entities5.keys() if k[-1] == ';'])

# Print out the header:
print("""/* Autogenerated by tools/make_headers_inc.py */

struct cmark_entity_node {
	unsigned char *entity;
        unsigned char bytes[8];
};

#define CMARK_ENTITY_MIN_LENGTH 2
#define CMARK_ENTITY_MAX_LENGTH 32""")

print("#define CMARK_NUM_ENTITIES " + str(len(entities)));

print("\nstatic const struct cmark_entity_node cmark_entities[] = {");

for (ent, bs) in entities:
  print('{(unsigned char*)"' + ent + '", {' + ', '.join(map(str, bs)) + ', 0}},')

print("};")