1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
|
#!/usr/bin/env python
"""Create a MID csv table from the ITU web page.
Locations with more than one line of MID values will have a dash in
their country name. After creating a draft mid.csv, you will need
to edit those entries.
There are multiple locations listed for a MID (e.g. 306). It is unclear how
to handle those.
"""
import re
import urllib2
from bs4 import BeautifulSoup
mid_url = 'http://www.itu.int/online/mms/glad/cga_mids.sh?lng=E'
data = urllib2.urlopen(mid_url).read()
# print data[:100]
soup = BeautifulSoup(data)
with open('dacs.h', 'w') as dac_out, open('mid2.csv', 'w') as mid_out:
mid_out.write("""prefix,country
# prefix is used as DAC for binary messages or as the 1st three of the MMSI
# http://www.itu.int/online/mms/glad/cga_mids.sh?lng=E\n""")
for tr in soup.find_all('tr'):
td = tr.find('td')
try:
text = td.get_text()
except AttributeError:
continue
if re.match('^\d{3}', text):
# print 'td', td
# print text
mid_vals = [int(val) for val in text.split(',')]
# print mid_vals
else:
continue
country = td.findNextSibling().get_text().strip()
for mid in mid_vals:
try:
mid_out.write('%s,"%s"\n' % (mid, country))
except UnicodeEncodeError:
mid_out.write('BAD mid %s\n"' % mid)
print 'BAD mid', mid
for mid in mid_vals:
try:
header_country = country.replace(' ', '_').split('(')[0].upper()
header_country = header_country.rstrip('_')
header_country = header_country.split('_-_')[0]
dac_out.write(' AIS_DAC_%d_%s = %d,\n' % (mid,
header_country,
mid))
except UnicodeEncodeError:
dac_out.write('BAD mid %s\n"' % mid)
# WARNING: This does not handle the last line of the table.
|