1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
|
#!/usr/bin/env python3
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 1998 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later
'''Update the "manuf" file.
Make-manuf creates a file containing ethernet OUIs and their company
IDs from the databases at IEEE.
'''
import csv
import html
import io
import os
import re
import sys
import urllib.request, urllib.error, urllib.parse
have_icu = False
try:
# Use the grapheme or segments module instead?
import icu
have_icu = True
except ImportError:
pass
def exit_msg(msg=None, status=1):
if msg is not None:
sys.stderr.write(msg + '\n\n')
sys.stderr.write(__doc__ + '\n')
sys.exit(status)
def open_url(url):
'''Open a URL.
Returns a tuple containing the body and response dict. The body is a
str in Python 3 and bytes in Python 2 in order to be compatible with
csv.reader.
'''
if len(sys.argv) > 1:
url_path = os.path.join(sys.argv[1], url[1])
url_fd = open(url_path)
body = url_fd.read()
url_fd.close()
else:
url_path = '/'.join(url)
req_headers = { 'User-Agent': 'Wireshark make-manuf' }
try:
req = urllib.request.Request(url_path, headers=req_headers)
response = urllib.request.urlopen(req)
body = response.read().decode('UTF-8', 'replace').replace(u'\u200e', '')
except urllib.error.HTTPError as e:
exit_msg(f'Error {e.code} opening {url_path}: {e.reason}')
except urllib.error.URLError as e:
exit_msg(f'Error opening {url_path}: {e.reason}')
except Exception as e:
exit_msg(f'Error opening {url_path}: {e}')
return body
# These are applied after punctuation has been removed.
# More examples at https://en.wikipedia.org/wiki/Incorporation_(business)
general_terms = '|'.join([
' a +s\\b', # A/S and A.S. but not "As" as in "Connect As".
' ab\\b', # Also follows "Oy", which is covered below.
' ag\\b',
' b ?v\\b',
' closed joint stock company\\b',
' co\\b',
' company\\b',
' corp\\b',
' corporation\\b',
' corporate\\b',
' de c ?v\\b', # Follows "S.A.", which is covered separately below.
' gmbh\\b',
' holding\\b',
' inc\\b',
' incorporated\\b',
' jsc\\b',
' kg\\b',
' k k\\b', # "K.K." as in "kabushiki kaisha", but not "K+K" as in "K+K Messtechnik".
' limited\\b',
' llc\\b',
' ltd\\b',
' n ?v\\b',
' oao\\b',
' of\\b',
' open joint stock company\\b',
' ooo\\b',
' oü\\b',
' oy\\b',
' oyj\\b',
' plc\\b',
' pty\\b',
' pvt\\b',
' s ?a ?r ?l\\b',
' s ?a\\b',
' s ?p ?a\\b',
' sp ?k\\b',
' s ?r ?l\\b',
' systems\\b',
'\\bthe\\b',
' zao\\b',
' z ?o ?o\\b'
])
# Chinese company names tend to start with the location, skip it (non-exhaustive list).
skip_start = [
'shengzen',
'shenzhen',
'beijing',
'shanghai',
'wuhan',
'hangzhou',
'guangxi',
'guangdong',
'chengdu',
]
# Special cases handled directly
special_case = {
"Advanced Micro Devices": "AMD",
"杭州德澜科技有限公司": "DelanTech" # 杭州德澜科技有限公司(HangZhou Delan Technology Co.,Ltd)
}
def shorten(manuf):
'''Convert a long manufacturer name to abbreviated and short names'''
# Normalize whitespace.
manuf = ' '.join(manuf.split())
orig_manuf = manuf
# Convert all caps to title case
if manuf.isupper():
manuf = manuf.title()
# Remove the contents of parenthesis as ancillary data
manuf = re.sub(r"\(.*\)", '', manuf)
# Remove the contents of fullwidth parenthesis (mostly in Asian names)
manuf = re.sub(r"(.*)", '', manuf)
# Remove "a" before removing punctuation ("Aruba, a Hewlett [...]" etc.)
manuf = manuf.replace(" a ", " ")
# Remove any punctuation
# XXX Use string.punctuation? Note that it includes '-' and '*'.
manuf = re.sub(r"[\"',./:()+-]", ' ', manuf)
# XXX For some reason including the double angle brackets in the above
# regex makes it bomb
manuf = re.sub(r"[«»“”]", ' ', manuf)
# & isn't needed when Standalone
manuf = manuf.replace(" & ", " ")
# Remove business types and other general terms ("the", "inc", "plc", etc.)
plain_manuf = re.sub(general_terms, '', manuf, flags=re.IGNORECASE)
# ...but make sure we don't remove everything.
if not all(s == ' ' for s in plain_manuf):
manuf = plain_manuf
manuf = manuf.strip()
# Check for special case
if manuf in special_case.keys():
manuf = special_case[manuf]
# XXX: Some of the entries have Chinese city or other location
# names written with spaces between each character, like
# Bei jing, Wu Han, Shen Zhen, etc. We should remove that too.
split = manuf.split()
if len(split) > 1 and split[0].lower() in skip_start:
manuf = ' '.join(split[1:])
# Remove all spaces
manuf = re.sub(r'\s+', '', manuf)
if len(manuf) < 1:
sys.stderr.write('Manufacturer "{}" shortened to nothing.\n'.format(orig_manuf))
sys.exit(1)
# Truncate names to a reasonable length, say, 12 characters. If
# the string contains UTF-8, this may be substantially more than
# 12 bytes. It might also be less than 12 visible characters. Plain
# Python slices Unicode strings by code point, which is better
# than raw bytes but not as good as grapheme clusters. PyICU
# supports grapheme clusters. https://bugs.python.org/issue30717
#
# Truncate by code points
trunc_len = 12
if have_icu:
# Truncate by grapheme clusters
bi_ci = icu.BreakIterator.createCharacterInstance(icu.Locale('en_US'))
bi_ci.setText(manuf)
bounds = list(bi_ci)
bounds = bounds[0:trunc_len]
trunc_len = bounds[-1]
manuf = manuf[:trunc_len]
if manuf.lower() == orig_manuf.lower():
# Original manufacturer name was short and simple.
return [manuf, None]
mixed_manuf = orig_manuf
# At least one entry has whitespace in front of a period.
mixed_manuf = re.sub(r'\s+\.', '.', mixed_manuf)
#If company is all caps, convert to mixed case (so it doesn't look like we're screaming the company name)
if mixed_manuf.upper() == mixed_manuf:
mixed_manuf = mixed_manuf.title()
return [manuf, mixed_manuf]
MA_L = 'MA_L'
MA_M = 'MA_M'
MA_S = 'MA_S'
def prefix_to_oui(prefix, prefix_map):
pfx_len = int(len(prefix) * 8 / 2)
prefix24 = prefix[:6]
oui24 = ':'.join(hi + lo for hi, lo in zip(prefix24[0::2], prefix24[1::2]))
if pfx_len == 24:
# 24-bit OUI assignment, no mask
return oui24, MA_L
# Other lengths which require a mask.
oui = prefix.ljust(12, '0')
oui = ':'.join(hi + lo for hi, lo in zip(oui[0::2], oui[1::2]))
if pfx_len == 28:
kind = MA_M
elif pfx_len == 36:
kind = MA_S
prefix_map[oui24] = kind
return '{}/{:d}'.format(oui, int(pfx_len)), kind
def main():
manuf_path = os.path.join(os.path.dirname(__file__), '..', 'epan', 'manuf-data.c')
ieee_d = {
'OUI': { 'url': ["https://standards-oui.ieee.org/oui/", "oui.csv"], 'min_entries': 1000 },
'CID': { 'url': ["https://standards-oui.ieee.org/cid/", "cid.csv"], 'min_entries': 75 },
'IAB': { 'url': ["https://standards-oui.ieee.org/iab/", "iab.csv"], 'min_entries': 1000 },
'OUI28': { 'url': ["https://standards-oui.ieee.org/oui28/", "mam.csv"], 'min_entries': 1000 },
'OUI36': { 'url': ["https://standards-oui.ieee.org/oui36/", "oui36.csv"], 'min_entries': 1000 },
}
oui_d = {
MA_L: { '00:00:00' : ['00:00:00', 'Officially Xerox, but 0:0:0:0:0:0 is more common'] },
MA_M: {},
MA_S: {},
}
min_total = 35000 # 35830 as of 2018-09-05
total_added = 0
# Add IEEE entries from each of their databases
ieee_db_l = ['OUI', 'OUI28', 'OUI36', 'CID', 'IAB']
# map a 24-bit prefix to MA-M/MA-S or none (MA-L by default)
prefix_map = {}
for db in ieee_db_l:
db_url = ieee_d[db]['url']
ieee_d[db]['skipped'] = 0
ieee_d[db]['added'] = 0
ieee_d[db]['total'] = 0
print('Merging {} data from {}'.format(db, db_url))
body = open_url(db_url)
ieee_csv = csv.reader(body.splitlines())
# Pop the title row.
next(ieee_csv)
for ieee_row in ieee_csv:
#Registry,Assignment,Organization Name,Organization Address
#IAB,0050C2DD6,Transas Marine Limited,Datavagen 37 Askim Vastra Gotaland SE 436 32
oui, kind = prefix_to_oui(ieee_row[1].upper(), prefix_map)
manuf = ieee_row[2].strip()
# The Organization Name field occasionally contains HTML entities. Undo them.
manuf = html.unescape(manuf)
# "Watts A\S"
manuf = manuf.replace('\\', '/')
if manuf == 'IEEE Registration Authority':
# These are held for subdivision into MA-M/MA-S
continue
#if manuf == 'Private':
# continue
if oui in oui_d[kind]:
action = 'Skipping'
print('{} - {} IEEE "{}" in favor of "{}"'.format(oui, action, manuf, oui_d[kind][oui]))
ieee_d[db]['skipped'] += 1
else:
oui_d[kind][oui] = shorten(manuf)
ieee_d[db]['added'] += 1
ieee_d[db]['total'] += 1
if ieee_d[db]['total'] < ieee_d[db]['min_entries']:
exit_msg("Too few {} entries. Got {}, wanted {}".format(db, ieee_d[db]['total'], ieee_d[db]['min_entries']))
total_added += ieee_d[db]['total']
if total_added < min_total:
exit_msg("Too few total entries ({})".format(total_added))
try:
manuf_fd = io.open(manuf_path, 'w', encoding='UTF-8')
except Exception:
exit_msg("Couldn't open manuf file for reading ({}) ".format(manuf_path))
manuf_fd.write('''/*
* This file was generated by running ./tools/make-manuf.py.
*
* SPDX-License-Identifier: GPL-2.0-or-later
*
* The data below has been assembled from the following sources:
*
* The IEEE public OUI listings available from:
* <http://standards-oui.ieee.org/oui/oui.csv>
* <http://standards-oui.ieee.org/cid/cid.csv>
* <http://standards-oui.ieee.org/iab/iab.csv>
* <http://standards-oui.ieee.org/oui28/mam.csv>
* <http://standards-oui.ieee.org/oui36/oui36.csv>
*
*/
''')
# Write the prefix map
manuf_fd.write("static const manuf_registry_t ieee_registry_table[] = {\n")
keys = list(prefix_map.keys())
keys.sort()
for oui in keys:
manuf_fd.write(" {{ {{ 0x{}, 0x{}, 0x{} }}, {} }},\n".format(oui[0:2], oui[3:5], oui[6:8], prefix_map[oui]))
manuf_fd.write("};\n\n")
# write the MA-L table
manuf_fd.write("static const manuf_oui24_t global_manuf_oui24_table[] = {\n")
keys = list(oui_d[MA_L].keys())
keys.sort()
for oui in keys:
short = oui_d[MA_L][oui][0]
if oui_d[MA_L][oui][1]:
long = oui_d[MA_L][oui][1]
else:
long = short
line = " {{ {{ 0x{}, 0x{}, 0x{} }}, \"{}\", ".format(oui[0:2], oui[3:5], oui[6:8], short)
sep = 44 - len(line)
if sep <= 0:
sep = 0
line += sep * ' '
line += "\"{}\" }},\n".format(long.replace('"', '\\"'))
manuf_fd.write(line)
manuf_fd.write("};\n\n")
# write the MA-M table
manuf_fd.write("static const manuf_oui28_t global_manuf_oui28_table[] = {\n")
keys = list(oui_d[MA_M].keys())
keys.sort()
for oui in keys:
short = oui_d[MA_M][oui][0]
if oui_d[MA_M][oui][1]:
long = oui_d[MA_M][oui][1]
else:
long = short
line = " {{ {{ 0x{}, 0x{}, 0x{}, 0x{} }}, \"{}\", ".format(oui[0:2], oui[3:5], oui[6:8], oui[9:11], short)
sep = 50 - len(line)
if sep <= 0:
sep = 0
line += sep * ' '
line += "\"{}\" }},\n".format(long.replace('"', '\\"'))
manuf_fd.write(line)
manuf_fd.write("};\n\n")
#write the MA-S table
manuf_fd.write("static const manuf_oui36_t global_manuf_oui36_table[] = {\n")
keys = list(oui_d[MA_S].keys())
keys.sort()
for oui in keys:
short = oui_d[MA_S][oui][0]
if oui_d[MA_S][oui][1]:
long = oui_d[MA_S][oui][1]
else:
long = short
line = " {{ {{ 0x{}, 0x{}, 0x{}, 0x{}, 0x{} }}, \"{}\", ".format(oui[0:2], oui[3:5], oui[6:8], oui[9:11], oui[12:14], short)
sep = 56 - len(line)
if sep <= 0:
sep = 0
line += sep * ' '
line += "\"{}\" }},\n".format(long.replace('"', '\\"'))
manuf_fd.write(line)
manuf_fd.write("};\n")
manuf_fd.close()
for db in ieee_d:
print('{:<20}: {}'.format('IEEE ' + db + ' added', ieee_d[db]['added']))
print('{:<20}: {}'.format('Total added', total_added))
print()
for db in ieee_d:
print('{:<20}: {}'.format('IEEE ' + db + ' total', ieee_d[db]['total']))
print()
for db in ieee_d:
print('{:<20}: {}'.format('IEEE ' + db + ' skipped', ieee_d[db]['skipped']))
if __name__ == '__main__':
main()
|