File: mid.py

package info (click to toggle)
python-libais 0.17%2Bgit.20190917.master.e464cf8-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,828 kB
  • sloc: cpp: 56,058; python: 11,979; makefile: 537; sh: 466
file content (63 lines) | stat: -rwxr-xr-x 1,881 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python
"""Create a MID csv table from the ITU web page.

Locations with more than one line of MID values will have a dash in
their country name.  After creating a draft mid.csv, you will need
to edit those entries.

There are multiple locations listed for a MID (e.g. 306).  It is unclear how
to handle those.
"""

import re
import urllib2
from bs4 import BeautifulSoup

mid_url = 'http://www.itu.int/online/mms/glad/cga_mids.sh?lng=E'

data = urllib2.urlopen(mid_url).read()
# print data[:100]



soup = BeautifulSoup(data)

with open('dacs.h', 'w') as dac_out, open('mid2.csv', 'w') as mid_out:

  mid_out.write("""prefix,country
# prefix is used as DAC for binary messages or as the 1st three of the MMSI
# http://www.itu.int/online/mms/glad/cga_mids.sh?lng=E\n""")

  for tr in soup.find_all('tr'):
    td = tr.find('td')
    try:
      text = td.get_text()
    except AttributeError:
      continue
    if re.match('^\d{3}', text):
      # print 'td', td
      # print text
      mid_vals = [int(val) for val in text.split(',')]
     #  print mid_vals
    else:
      continue
    country = td.findNextSibling().get_text().strip()
    for mid in mid_vals:
      try:
        mid_out.write('%s,"%s"\n' % (mid, country))
      except UnicodeEncodeError:
        mid_out.write('BAD mid %s\n"' % mid)
        print 'BAD mid', mid

    for mid in mid_vals:
      try:
        header_country = country.replace(' ', '_').split('(')[0].upper()
        header_country = header_country.rstrip('_')
        header_country = header_country.split('_-_')[0]
        dac_out.write('  AIS_DAC_%d_%s = %d,\n' % (mid,
                                                  header_country,
                                                  mid))
      except UnicodeEncodeError:
        dac_out.write('BAD mid %s\n"' % mid)

# WARNING: This does not handle the last line of the table.