1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
|
#!/usr/bin/env python3
# create the enterprises.c file from
# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
# or an offline copy
#
# Copyright 2022 by Moshe Kaplan
# Based on make-sminmpec.pl by Gerald Combs
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 2004 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later
import os
import argparse
import re
import urllib.request
ENTERPRISES_CFILE = os.path.join('epan', 'enterprises.c')
ENTERPRISE_NUMBERS_URL = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers"
DECIMAL_PATTERN = r"^(\d+)"
# up to three spaces because of formatting errors in the source
ORGANIZATION_PATTERN = r"^ ?(\S.*)"
FORMERLY_PATTERN = r" \(((formerly|previously) .*)\)"
LOOKUP_FUNCTION = r"""
const char* global_enterprises_lookup(uint32_t value)
{
if (value >= array_length(table)) {
return NULL;
}
return table[value];
}
"""
DUMP_FUNCTION = r"""
void global_enterprises_dump(FILE *fp)
{
for (size_t idx = 0; idx < array_length(table); idx++) {
if (table[idx] != NULL) {
fprintf(fp, "%zu\t%s\n", idx, table[idx]);
}
}
}
"""
# This intermediate format is no longer written to a file - returned as string
def generate_enterprise_entries(file_content):
# We only care about the "Decimal" and "Organization",
# not the contact or email
org_lines = []
last_updated = ""
end_seen = False
for line in file_content.splitlines():
decimal_match = re.match(DECIMAL_PATTERN, line)
if decimal_match:
decimal = decimal_match.group(0)
elif re.match(ORGANIZATION_PATTERN, line):
organization = line.strip()
if organization.lower() == "unassigned":
continue
organization = re.sub(FORMERLY_PATTERN, r"\t# \1", organization)
org_lines += [decimal + "\t" + organization]
elif "last updated" in line.lower():
last_updated = line
elif "end of document" in line.lower():
end_seen = True
if not end_seen:
raise Exception('"End of Document" not found. Truncated source file?')
last_updated_line = "/* " + last_updated + " */\n\n"
output = "\n".join(org_lines) + "\n"
return (output,last_updated_line)
class CFile:
def __init__(self, filename, last_updated_line):
self.filename = filename
self.f = open(filename, 'w')
self.mappings = {}
self.highest_num = 0
# Write file header
self.f.write('/* ' + os.path.basename(self.filename) + '\n')
self.f.write(' *\n')
self.f.write(' * Wireshark - Network traffic analyzer\n')
self.f.write(' * By Gerald Combs <gerald@wireshark.org>\n')
self.f.write(' * Copyright 1998 Gerald Combs\n')
self.f.write(' *\n')
self.f.write(' * Do not edit - this file is automatically generated\n')
self.f.write(' * SPDX-License-Identifier: GPL-2.0-or-later\n')
self.f.write(' */\n\n')
self.f.write(last_updated_line)
# Include header files
self.f.write('#include "config.h"\n\n')
self.f.write('#include <stddef.h>\n')
self.f.write('#include <wsutil/array.h>\n')
self.f.write('#include "enterprises.h"\n')
self.f.write('\n\n')
def __del__(self):
# Write static table
self.f.write('static const char * const table[] =\n')
self.f.write('{\n')
# Largest index
# Entries (read from dict)
for n in range(0, self.highest_num+1):
if n not in self.mappings:
# There are some gaps, write a NULL entry so can lookup by index
line = ' NULL'
else:
line = ' "' + self.mappings[n] + '"'
# Add comma.
if n < self.highest_num:
line += ','
# Add number as aligned comment.
line += ' '*(90-len(line)) + '// ' + str(n)
self.f.write(line+'\n')
# End of array
self.f.write('};\n')
print('Re-generated', self.filename)
# Lookup function
self.f.write(LOOKUP_FUNCTION)
# Dump function
self.f.write(DUMP_FUNCTION)
# Add an individual mapping to the function
def addMapping(self, num, name):
# Handle some escapings
name = name.replace('\\', '\\\\')
name = name.replace('"', '""')
# Record.
self.mappings[num] = name
self.highest_num = num if num>self.highest_num else self.highest_num
def main():
parser = argparse.ArgumentParser(description="Create the {} file.".format(ENTERPRISES_CFILE))
parser.add_argument('--infile')
parser.add_argument('outfile', nargs='?', default=ENTERPRISES_CFILE)
parsed_args = parser.parse_args()
# Read data from file or webpage
if parsed_args.infile:
with open(parsed_args.infile, encoding='utf-8') as fh:
data = fh.read()
else:
with urllib.request.urlopen(ENTERPRISE_NUMBERS_URL) as f:
if f.status != 200:
raise Exception("request for " + ENTERPRISE_NUMBERS_URL + " failed with result code " + f.status)
data = f.read().decode('utf-8').replace(u'\u200e', '')
# Find bits we need and generate enterprise entries
enterprises_content,last_updated_line = generate_enterprise_entries(data)
# Now write to a C file the contents (which is faster than parsing the global file at runtime).
c_file = CFile(parsed_args.outfile, last_updated_line)
mapping_re = re.compile(r'^(\d+)\s+(.*)$')
for line in enterprises_content.splitlines():
match = mapping_re.match(line)
if match:
num, name = match.group(1), match.group(2)
# Strip any comments and/or trailing whitespace
idx = name.find('#')
if idx != -1:
name = name[0:idx]
name = name.rstrip()
# Add
c_file.addMapping(int(num), name)
if __name__ == "__main__":
main()
|