| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 
 | # Copyright 2015 Abhijit Menon-Sen <ams@2ndQuadrant.com>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible.  If not, see <http://www.gnu.org/licenses/>.
from __future__ import annotations
import re
from ansible.errors import AnsibleParserError, AnsibleError
# Components that match a numeric or alphanumeric begin:end or begin:end:step
# range expression inside square brackets.
numeric_range = r"""
    \[
        (?:[0-9]+:[0-9]+)               # numeric begin:end
        (?::[0-9]+)?                    # numeric :step (optional)
    \]
"""
hexadecimal_range = r"""
    \[
        (?:[0-9a-f]+:[0-9a-f]+)         # hexadecimal begin:end
        (?::[0-9]+)?                    # numeric :step (optional)
    \]
"""
alphanumeric_range = r"""
    \[
        (?:
            [a-z]:[a-z]|                # one-char alphabetic range
            [0-9]+:[0-9]+               # ...or a numeric one
        )
        (?::[0-9]+)?                    # numeric :step (optional)
    \]
"""
# Components that match a 16-bit portion of an IPv6 address in hexadecimal
# notation (0..ffff) or an 8-bit portion of an IPv4 address in decimal notation
# (0..255) or an [x:y(:z)] numeric range.
ipv6_component = r"""
    (?:
        [0-9a-f]{{1,4}}|                # 0..ffff
        {range}                         # or a numeric range
    )
""".format(range=hexadecimal_range)
ipv4_component = r"""
    (?:
        [01]?[0-9]{{1,2}}|              # 0..199
        2[0-4][0-9]|                    # 200..249
        25[0-5]|                        # 250..255
        {range}                         # or a numeric range
    )
""".format(range=numeric_range)
# A hostname label, e.g. 'foo' in 'foo.example.com'. Consists of alphanumeric
# characters plus dashes (and underscores) or valid ranges. The label may not
# start or end with a hyphen or an underscore. This is interpolated into the
# hostname pattern below. We don't try to enforce the 63-char length limit.
label = r"""
    (?:[\w]|{range})                    # Starts with an alphanumeric or a range
    (?:[\w_-]|{range})*                 # Then zero or more of the same or [_-]
    (?<![_-])                           # ...as long as it didn't end with [_-]
""".format(range=alphanumeric_range)
patterns = {
    # This matches a square-bracketed expression with a port specification. What
    # is inside the square brackets is validated later.
    'bracketed_hostport': re.compile(
        r"""^
            \[(.+)\]                    # [host identifier]
            :([0-9]+)                   # :port number
            $
        """, re.X
    ),
    # This matches a bare IPv4 address or hostname (or host pattern including
    # [x:y(:z)] ranges) with a port specification.
    'hostport': re.compile(
        r"""^
            ((?:                        # We want to match:
                [^:\[\]]                # (a non-range character
                |                       # ...or...
                \[[^\]]*\]              # a complete bracketed expression)
            )*)                         # repeated as many times as possible
            :([0-9]+)                   # followed by a port number
            $
        """, re.X
    ),
    # This matches an IPv4 address, but also permits range expressions.
    'ipv4': re.compile(
        r"""^
            (?:{i4}\.){{3}}{i4}         # Three parts followed by dots plus one
            $
        """.format(i4=ipv4_component), re.X | re.I
    ),
    # This matches an IPv6 address, but also permits range expressions.
    #
    # This expression looks complex, but it really only spells out the various
    # combinations in which the basic unit of an IPv6 address (0..ffff) can be
    # written, from :: to 1:2:3:4:5:6:7:8, plus the IPv4-in-IPv6 variants such
    # as ::ffff:192.0.2.3.
    #
    # Note that we can't just use ipaddress.ip_address() because we also have to
    # accept ranges in place of each component.
    'ipv6': re.compile(
        r"""^
            ((?:{0}:){{7}}{0}|          # uncompressed: 1:2:3:4:5:6:7:8
            (?:{0}:){{1,6}}:|           # compressed variants, which are all
            (?:{0}:)(?::{0}){{1,6}}|    # a::b for various lengths of a,b
            (?:{0}:){{2}}(?::{0}){{1,5}}|
            (?:{0}:){{3}}(?::{0}){{1,4}}|
            (?:{0}:){{4}}(?::{0}){{1,3}}|
            (?:{0}:){{5}}(?::{0}){{1,2}}|
            (?:{0}:){{6}}(?::{0})|      # ...all with 2 <= a+b <= 7
            :(?::{0}){{1,6}}|           # ::ffff(:ffff...)
            {0}?::|                     # ffff::, ::
                                        # ipv4-in-ipv6 variants
            (?:0:){{6}}(?:{0}\.){{3}}{0}|
            ::(?:ffff:)?(?:{0}\.){{3}}{0}|
            (?:0:){{5}}ffff:(?:{0}\.){{3}}{0})
            $
        """.format(ipv6_component), re.X | re.I
    ),
    # This matches a hostname or host pattern including [x:y(:z)] ranges.
    #
    # We roughly follow DNS rules here, but also allow ranges (and underscores).
    # In the past, no systematic rules were enforced about inventory hostnames,
    # but the parsing context (e.g. shlex.split(), fnmatch.fnmatch()) excluded
    # various metacharacters anyway.
    #
    # We don't enforce DNS length restrictions here (63 characters per label,
    # 253 characters total) or make any attempt to process IDNs.
    'hostname': re.compile(
        r"""^
            {label}                     # We must have at least one label
            (?:\.{label})*              # Followed by zero or more .labels
            $
        """.format(label=label), re.X | re.I | re.UNICODE
    ),
}
def parse_address(address, allow_ranges=False):
    """
    Takes a string and returns a (host, port) tuple. If the host is None, then
    the string could not be parsed as a host identifier with an optional port
    specification. If the port is None, then no port was specified.
    The host identifier may be a hostname (qualified or not), an IPv4 address,
    or an IPv6 address. If allow_ranges is True, then any of those may contain
    [x:y] range specifications, e.g. foo[1:3] or foo[0:5]-bar[x-z].
    The port number is an optional :NN suffix on an IPv4 address or host name,
    or a mandatory :NN suffix on any square-bracketed expression: IPv6 address,
    IPv4 address, or host name. (This means the only way to specify a port for
    an IPv6 address is to enclose it in square brackets.)
    """
    # First, we extract the port number if one is specified.
    port = None
    for matching in ['bracketed_hostport', 'hostport']:
        m = patterns[matching].match(address)
        if m:
            (address, port) = m.groups()
            port = int(port)
            continue
    # What we're left with now must be an IPv4 or IPv6 address, possibly with
    # numeric ranges, or a hostname with alphanumeric ranges.
    host = None
    for matching in ['ipv4', 'ipv6', 'hostname']:
        m = patterns[matching].match(address)
        if m:
            host = address
            continue
    # If it isn't any of the above, we don't understand it.
    if not host:
        raise AnsibleError("Not a valid network hostname: %s" % address)
    # If we get to this point, we know that any included ranges are valid.
    # If the caller is prepared to handle them, all is well.
    # Otherwise we treat it as a parse failure.
    if not allow_ranges and '[' in host:
        raise AnsibleParserError("Detected range in host but was asked to ignore ranges")
    return (host, port)
 |