File: isbn.py

package info (click to toggle)
python-stdnum 2.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,960 kB
  • sloc: python: 10,600; javascript: 6,995; sh: 13; makefile: 10
file content (88 lines) | stat: -rwxr-xr-x 3,195 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python3

# update/isbn.py - script to get ISBN prefix data
#
# Copyright (C) 2010-2026 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA

"""This script downloads XML data from the International ISBN Agency
website and provides a compact form of all group prefixes, and registrant
ranges for those prefixes suitable for the numdb module. This data is needed
to correctly split ISBNs into an EAN.UCC prefix, a group prefix, a registrant,
an item number and a check-digit."""

import lxml.etree
import requests


# the location of the ISBN Ranges XML file
download_url = 'https://www.isbn-international.org/export_rangemessage.xml'


# The user agent that will be passed in requests
user_agent = 'Mozilla/5.0 (compatible; python-stdnum updater; +https://arthurdejong.org/python-stdnum/)'


def ranges(group):
    """Provide the ranges for the group."""
    for rule in group.findall('./Rules/Rule'):
        length = int(rule.find('./Length').text.strip())
        if length:
            yield '-'.join(
                x[:length]
                for x in rule.find('./Range').text.strip().split('-'))


def wrap(text):
    """Rewrap the provided text into lines."""
    while text:
        i = len(text)
        if i > 73:
            i = text.rindex(',', 20, 73)
        yield text[:i]
        text = text[i + 1:]


if __name__ == '__main__':
    print('# generated from RangeMessage.xml, downloaded from')
    print('# %s' % download_url)
    response = requests.get(download_url, timeout=30, headers={'User-Agent': user_agent})
    response.raise_for_status()

    # parse XML document
    document = lxml.etree.fromstring(response.content)

    # dump data from document
    print('# file serial %s' % document.find('./MessageSerialNumber').text.strip())
    print('# file date %s' % document.find('./MessageDate').text.strip())

    top_groups = dict(
        (x.find('./Prefix').text.strip(), x)
        for x in document.findall('./EAN.UCCPrefixes/EAN.UCC'))

    prevtop = None
    for group in document.findall('./RegistrationGroups/Group'):
        top, prefix = group.find('./Prefix').text.strip().split('-')
        agency = group.find('./Agency').text.strip()
        if top != prevtop:
            print(top)
            for line in wrap(','.join(ranges(top_groups[top]))):
                print(' %s' % line)
            prevtop = top
        print(' %s agency="%s"' % (prefix, agency))
        for line in wrap(','.join(ranges(group))):
            print('  %s' % line)