File: check.py

package info (click to toggle)

python-astropy 1.3-8~bpo8%2B2

links: PTS, VCS
area: main
in suites: jessie-backports
size: 44,292 kB
sloc: ansic: 160,360; python: 137,322; sh: 11,493; lex: 7,638; yacc: 4,956; xml: 1,796; makefile: 474; cpp: 364

file content (77 lines) | stat: -rw-r--r-- 2,276 bytes

parent folder | download | duplicates (2)

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
A collection of functions for checking various XML-related strings for
standards compliance.
"""
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

from ...extern.six.moves import range, urllib

import re


def check_id(ID):
    """
    Returns `True` if *ID* is a valid XML ID.
    """
    return re.match(r"^[A-Za-z_][A-Za-z0-9_\.\-]*$", ID) is not None


def fix_id(ID):
    """
    Given an arbitrary string, create one that can be used as an xml
    id.  This is rather simplistic at the moment, since it just
    replaces non-valid characters with underscores.
    """
    if re.match(r"^[A-Za-z_][A-Za-z0-9_\.\-]*$", ID):
        return ID
    if len(ID):
        corrected = ID
        if not len(corrected) or re.match('^[^A-Za-z_]$', corrected[0]):
            corrected = '_' + corrected
        corrected = (re.sub(r"[^A-Za-z_]", '_', corrected[0]) +
                     re.sub(r"[^A-Za-z0-9_\.\-]", "_", corrected[1:]))
        return corrected
    return ''

_token_regex = r"(?![\r\l\t ])[^\r\l\t]*(?![\r\l\t ])"


def check_token(token):
    """
    Returns `True` if *token* is a valid XML token, as defined by XML
    Schema Part 2.
    """
    return (token == '' or
            re.match(
                r"[^\r\n\t ]?([^\r\n\t ]| [^\r\n\t ])*[^\r\n\t ]?$", token)
            is not None)


def check_mime_content_type(content_type):
    """
    Returns `True` if *content_type* is a valid MIME content type
    (syntactically at least), as defined by RFC 2045.
    """
    ctrls = ''.join(chr(x) for x in range(0, 0x20))
    token_regex = '[^()<>@,;:\\\"/[\\]?= {}\x7f]+'.format(ctrls)
    return re.match(
        r'(?P<type>{})/(?P<subtype>{})$'.format(token_regex, token_regex),
        content_type) is not None


def check_anyuri(uri):
    """
    Returns `True` if *uri* is a valid URI as defined in RFC 2396.
    """
    if (re.match(
        (r"(([a-zA-Z][0-9a-zA-Z+\-\.]*:)?/{0,2}[0-9a-zA-Z;" +
         r"/?:@&=+$\.\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\.\-_!~*'()%]+)?"),
        uri) is None):
        return False
    try:
        urllib.parse.urlparse(uri)
    except Exception:
        return False
    return True