#!/usr/bin/env python3
#
# make-usb - Creates a file containing vendor and product ids.
# It use the databases from
# - The USB ID Repository: https://usb-ids.gowdy.us (http://www.linux-usb.org), mirrored at Sourceforge
# - libgphoto2 from gPhoto: https://github.com/gphoto/libgphoto2 (http://gphoto.org), available at GitHub
# to create our file epan/dissectors/usb.c

import re
import sys
import urllib.request, urllib.error, urllib.parse

MODE_IDLE           = 0
MODE_VENDOR_PRODUCT = 1
MIN_VENDORS = 3400 # 3409 as of 2020-11-15
MIN_PRODUCTS = 20000 # 20361 as of 2020-11-15

mode = MODE_IDLE

req_headers = { 'User-Agent': 'Wireshark make-usb' }
req = urllib.request.Request('https://sourceforge.net/p/linux-usb/repo/HEAD/tree/trunk/htdocs/usb.ids?format=raw', headers=req_headers)
response = urllib.request.urlopen(req)
lines = response.read().decode('UTF-8', 'replace').splitlines()

vendors  = dict()
products = dict()
vendors_str="static const value_string usb_vendors_vals[] = {\n"
products_str="static const value_string usb_products_vals[] = {\n"

# Escape backslashes, quotes, control characters and non-ASCII characters.
escapes = {}
for i in range(256):
    if i in b'\\"':
        escapes[i] = '\\%c' % i
    elif i in range(0x20, 0x80) or i in b'\t':
        escapes[i] = chr(i)
    else:
        escapes[i] = '\\%03o' % i

for utf8line in lines:
    # Convert single backslashes to double (escaped) backslashes, escape quotes, etc.
    utf8line = utf8line.rstrip()
    utf8line = re.sub(r"\?+", "?", utf8line)
    line = ''.join(escapes[byte] for byte in utf8line.encode('utf8'))

    if line == "# Vendors, devices and interfaces. Please keep sorted.":
        mode = MODE_VENDOR_PRODUCT
        continue
    elif line == "# List of known device classes, subclasses and protocols":
        mode = MODE_IDLE
        continue

    if mode == MODE_VENDOR_PRODUCT:
        if re.match("^[0-9a-f]{4}", line):
            last_vendor=line[:4]
            vendors[last_vendor] = line[4:].strip()
        elif re.match("^\t[0-9a-f]{4}", line):
            line = line.strip()
            product = "%s%s"%(last_vendor, line[:4])
            products[product] = line[4:].strip()

req = urllib.request.Request('https://raw.githubusercontent.com/gphoto/libgphoto2/master/camlibs/ptp2/library.c', headers=req_headers)
response = urllib.request.urlopen(req)
lines = response.read().decode('UTF-8', 'replace').splitlines()

mode = MODE_IDLE

for line in lines:
    if mode == MODE_IDLE and re.match(r".*\bmodels\[\]", line):
        mode = MODE_VENDOR_PRODUCT
        continue

    if mode == MODE_VENDOR_PRODUCT and re.match(r"};", line):
        mode = MODE_IDLE

    if mode == MODE_IDLE:
        continue

    m = re.match(r"\s*{\"(.*):(.*)\",\s*0x([0-9a-fA-F]{4}),\s*0x([0-9a-fA-F]{4}),.*},", line)
    if m is not None:
        manuf = m.group(1).strip()
        model = re.sub(r"\(.*\)", "", m.group(2)).strip()
        product = m.group(3) + m.group(4)
        products[product] = ' '.join((manuf, model))

req = urllib.request.Request('https://raw.githubusercontent.com/gphoto/libgphoto2/master/camlibs/ptp2/music-players.h', headers=req_headers)
response = urllib.request.urlopen(req)
lines = response.read().decode('UTF-8', 'replace').splitlines()

for line in lines:
    m = re.match(r"\s*{\s*\"(.*)\",\s*0x([0-9a-fA-F]{4}),\s*\"(.*)\",\s*0x([0-9a-fA-F]{4}),", line)
    if m is not None:
        manuf = m.group(1).strip()
        model = m.group(3).strip()
        product = m.group(2) + m.group(4)
        products[product] = ' '.join((manuf, model))


if (len(vendors) < MIN_VENDORS):
    sys.stderr.write("Not enough vendors: %d\n" % len(vendors))
    sys.exit(1)

if (len(products) < MIN_PRODUCTS):
    sys.stderr.write("Not enough products: %d\n" % len(products))
    sys.exit(1)

vendors = {k.lower(): v for k, v in vendors.items()}
for v in sorted(vendors):
    vendors_str += "    { 0x%s, \"%s\" },\n"%(v,vendors[v])

vendors_str += """    { 0, NULL }\n};
value_string_ext ext_usb_vendors_vals = VALUE_STRING_EXT_INIT(usb_vendors_vals);
"""

products = {k.lower(): v for k, v in products.items()}
for p in sorted(products):
    products_str += "    { 0x%s, \"%s\" },\n"%(p,products[p])

products_str += """    { 0, NULL }\n};
value_string_ext ext_usb_products_vals = VALUE_STRING_EXT_INIT(usb_products_vals);
"""

header="""/* usb.c
 * USB vendor id and product ids
 * This file was generated by running python ./tools/make-usb.py
 * Don't change it directly.
 *
 * Copyright 2012, Michal Labedzki for Tieto Corporation
 *
 * Other values imported from libghoto2/camlibs/ptp2/library.c, music-players.h
 *
 * Copyright (C) 2001-2005 Mariusz Woloszyn <emsi@ipartners.pl>
 * Copyright (C) 2003-2013 Marcus Meissner <marcus@jet.franken.de>
 * Copyright (C) 2005 Hubert Figuiere <hfiguiere@teaser.fr>
 * Copyright (C) 2009 Axel Waggershauser <awagger@web.de>
 * Copyright (C) 2005-2007 Richard A. Low <richard@wentnet.com>
 * Copyright (C) 2005-2012 Linus Walleij <triad@df.lth.se>
 * Copyright (C) 2007 Ted Bullock
 * Copyright (C) 2012 Sony Mobile Communications AB
 *
 * Wireshark - Network traffic analyzer
 * By Gerald Combs <gerald@wireshark.org>
 * Copyright 1998 Gerald Combs
 *
 * SPDX-License-Identifier: GPL-2.0-or-later
 */

/*
 * XXX We should probably parse a USB ID file at program start instead
 * of generating this file.
 */

#include "config.h"
#include <epan/packet.h>
"""

f = open('epan/dissectors/usb.c', 'w')
f.write(header)
f.write("\n")
f.write(vendors_str)
f.write("\n\n")
f.write(products_str)
f.write("\n")
f.close()

print("Success!")