1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
#!/usr/bin/env python3
#
# make-usb - Creates a file containing vendor and product ids.
# It use the databases from
# - The USB ID Repository: https://usb-ids.gowdy.us (http://www.linux-usb.org), mirrored at Sourceforge
# - libgphoto2 from gPhoto: https://github.com/gphoto/libgphoto2 (http://gphoto.org), available at GitHub
# to create our file epan/dissectors/usb.c
import re
import sys
import urllib.request, urllib.error, urllib.parse
MODE_IDLE = 0
MODE_VENDOR_PRODUCT = 1
MIN_VENDORS = 3400 # 3409 as of 2020-11-15
MIN_PRODUCTS = 20000 # 20361 as of 2020-11-15
mode = MODE_IDLE
req_headers = { 'User-Agent': 'Wireshark make-usb' }
req = urllib.request.Request('https://sourceforge.net/p/linux-usb/repo/HEAD/tree/trunk/htdocs/usb.ids?format=raw', headers=req_headers)
response = urllib.request.urlopen(req)
lines = response.read().decode('UTF-8', 'replace').splitlines()
vendors = dict()
products = dict()
vendors_str="static const value_string usb_vendors_vals[] = {\n"
products_str="static const value_string usb_products_vals[] = {\n"
# Escape backslashes, quotes, control characters and non-ASCII characters.
escapes = {}
for i in range(256):
if i in b'\\"':
escapes[i] = '\\%c' % i
elif i in range(0x20, 0x80) or i in b'\t':
escapes[i] = chr(i)
else:
escapes[i] = '\\%03o' % i
for utf8line in lines:
# Convert single backslashes to double (escaped) backslashes, escape quotes, etc.
utf8line = utf8line.rstrip()
utf8line = re.sub(r"\?+", "?", utf8line)
line = ''.join(escapes[byte] for byte in utf8line.encode('utf8'))
if line == "# Vendors, devices and interfaces. Please keep sorted.":
mode = MODE_VENDOR_PRODUCT
continue
elif line == "# List of known device classes, subclasses and protocols":
mode = MODE_IDLE
continue
if mode == MODE_VENDOR_PRODUCT:
if re.match("^[0-9a-f]{4}", line):
last_vendor=line[:4]
vendors[last_vendor] = line[4:].strip()
elif re.match("^\t[0-9a-f]{4}", line):
line = line.strip()
product = "%s%s"%(last_vendor, line[:4])
products[product] = line[4:].strip()
req = urllib.request.Request('https://raw.githubusercontent.com/gphoto/libgphoto2/master/camlibs/ptp2/library.c', headers=req_headers)
response = urllib.request.urlopen(req)
lines = response.read().decode('UTF-8', 'replace').splitlines()
mode = MODE_IDLE
for line in lines:
if mode == MODE_IDLE and re.match(r".*\bmodels\[\]", line):
mode = MODE_VENDOR_PRODUCT
continue
if mode == MODE_VENDOR_PRODUCT and re.match(r"};", line):
mode = MODE_IDLE
if mode == MODE_IDLE:
continue
m = re.match(r"\s*{\"(.*):(.*)\",\s*0x([0-9a-fA-F]{4}),\s*0x([0-9a-fA-F]{4}),.*},", line)
if m is not None:
manuf = m.group(1).strip()
model = re.sub(r"\(.*\)", "", m.group(2)).strip()
product = m.group(3) + m.group(4)
products[product] = ' '.join((manuf, model))
req = urllib.request.Request('https://raw.githubusercontent.com/gphoto/libgphoto2/master/camlibs/ptp2/music-players.h', headers=req_headers)
response = urllib.request.urlopen(req)
lines = response.read().decode('UTF-8', 'replace').splitlines()
for line in lines:
m = re.match(r"\s*{\s*\"(.*)\",\s*0x([0-9a-fA-F]{4}),\s*\"(.*)\",\s*0x([0-9a-fA-F]{4}),", line)
if m is not None:
manuf = m.group(1).strip()
model = m.group(3).strip()
product = m.group(2) + m.group(4)
products[product] = ' '.join((manuf, model))
if (len(vendors) < MIN_VENDORS):
sys.stderr.write("Not enough vendors: %d\n" % len(vendors))
sys.exit(1)
if (len(products) < MIN_PRODUCTS):
sys.stderr.write("Not enough products: %d\n" % len(products))
sys.exit(1)
vendors = {k.lower(): v for k, v in vendors.items()}
for v in sorted(vendors):
vendors_str += " { 0x%s, \"%s\" },\n"%(v,vendors[v])
vendors_str += """ { 0, NULL }\n};
value_string_ext ext_usb_vendors_vals = VALUE_STRING_EXT_INIT(usb_vendors_vals);
"""
products = {k.lower(): v for k, v in products.items()}
for p in sorted(products):
products_str += " { 0x%s, \"%s\" },\n"%(p,products[p])
products_str += """ { 0, NULL }\n};
value_string_ext ext_usb_products_vals = VALUE_STRING_EXT_INIT(usb_products_vals);
"""
header="""/* usb.c
* USB vendor id and product ids
* This file was generated by running python ./tools/make-usb.py
* Don't change it directly.
*
* Copyright 2012, Michal Labedzki for Tieto Corporation
*
* Other values imported from libghoto2/camlibs/ptp2/library.c, music-players.h
*
* Copyright (C) 2001-2005 Mariusz Woloszyn <emsi@ipartners.pl>
* Copyright (C) 2003-2013 Marcus Meissner <marcus@jet.franken.de>
* Copyright (C) 2005 Hubert Figuiere <hfiguiere@teaser.fr>
* Copyright (C) 2009 Axel Waggershauser <awagger@web.de>
* Copyright (C) 2005-2007 Richard A. Low <richard@wentnet.com>
* Copyright (C) 2005-2012 Linus Walleij <triad@df.lth.se>
* Copyright (C) 2007 Ted Bullock
* Copyright (C) 2012 Sony Mobile Communications AB
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
/*
* XXX We should probably parse a USB ID file at program start instead
* of generating this file.
*/
#include "config.h"
#include <epan/packet.h>
"""
f = open('epan/dissectors/usb.c', 'w')
f.write(header)
f.write("\n")
f.write(vendors_str)
f.write("\n\n")
f.write(products_str)
f.write("\n")
f.close()
print("Success!")
|