File: ssh_config.py

package info (click to toggle)
python-scrapli 2023.7.30-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 4,536 kB
  • sloc: python: 14,459; makefile: 72
file content (532 lines) | stat: -rw-r--r-- 17,910 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
"""scrapli.ssh_config"""
import base64
import hmac
import os
import re
import shlex
import sys
from copy import deepcopy
from pathlib import Path
from typing import Dict, Optional

from scrapli.exceptions import ScrapliTypeError

if sys.version_info >= (3, 8):
    Match = re.Match
else:
    from typing import Match  # pragma:  no cover

HOST_ATTRS = (
    "port",
    "user",
    "address_family",
    "bind_address",
    "connect_timeout",
    "identities_only",
    "identity_file",
    "keyboard_interactive",
    "password_authentication",
    "preferred_authentication",
)


class SSHConfig:
    _config_files: Dict[str, "SSHConfig"] = {}

    def __init__(self, ssh_config_file: str) -> None:
        """
        Initialize SSHConfig Object

        Parse OpenSSH config file

        Try to load the following data for all entries in config file:
            Host
            HostName
            Port
            User
            *AddressFamily
            *BindAddress
            *ConnectTimeout
            IdentitiesOnly
            IdentityFile
            *KbdInteractiveAuthentication
            *PasswordAuthentication
            *PreferredAuthentications

        * items are mostly ready to load but are unused in scrapli right now so are not being set
        at this point.

        NOTE: this does *not* accept duplicate "*" entries -- the final "*" entry will overwrite any
        previous "*" entries. In general for system transport this shouldn't matter much because
        scrapli only cares about parsing the config file to see if a key (any key) exists for a
        given host (we care about that because ideally we use "pipes" auth, but this is only an
        option if we have a key to auth with).

        Args:
            ssh_config_file: string path to ssh configuration file

        Returns:
            None

        Raises:
            ScrapliTypeError: if non-string value provided for ssh_config_file

        """
        if not isinstance(ssh_config_file, str):
            raise ScrapliTypeError(f"`ssh_config_file` expected str, got {type(ssh_config_file)}")

        self.ssh_config_file = os.path.expanduser(ssh_config_file)
        if self.ssh_config_file:
            self.ssh_config = Path(self.ssh_config_file).read_text(encoding="utf-8")
            self.hosts = self._parse()
            if not self.hosts:
                self.hosts = {}
            if "*" not in self.hosts:
                self.hosts["*"] = Host()
                self.hosts["*"].hosts = "*"
        else:
            self.hosts = {}
            self.hosts["*"] = Host()
            self.hosts["*"].hosts = "*"

        # finally merge all args from less specific hosts into the more specific hosts, preserving
        # the options from the more specific hosts of course
        self._merge_hosts()

    def __str__(self) -> str:
        """
        Magic str method for SSHConfig class

        Args:
            N/A

        Returns:
            str: string representation of object

        Raises:
            N/A

        """
        return "SSHConfig Object"

    def __repr__(self) -> str:
        """
        Magic repr method for SSHConfig class

        Args:
            N/A

        Returns:
            str: repr for class object

        Raises:
            N/A

        """
        class_dict = self.__dict__.copy()
        del class_dict["ssh_config"]
        return f"SSHConfig {class_dict}"

    def __bool__(self) -> bool:
        """
        Magic bool method; return True if ssh_config_file

        Args:
            N/A

        Returns:
            bool: True/False if ssh_config_file

        Raises:
            N/A

        """
        return bool(self.ssh_config)

    @staticmethod
    def _strip_comments(line: str) -> str:
        """
        Strip out comments from ssh config file lines

        Args:
            line: to strip comments from

        Returns:
            str: rejoined ssh config file line after stripping comments

        Raises:
            N/A

        """
        line = " ".join(shlex.split(line, comments=True))
        return line

    def _parse(self) -> Dict[str, "Host"]:
        """
        Parse SSH configuration file

        Args:
            N/A

        Returns:
            discovered_hosts: dict of host objects discovered in ssh config file

        Raises:
            N/A

        """
        # uncomment next line and handle global patterns (stuff before hosts) at some point
        # global_config_pattern = re.compile(r"^.*?\b(?=host)", flags=re.I | re.S)
        # use word boundaries with a positive lookahead to get everything between the word host
        # need to do this as whitespace/formatting is not really a thing in ssh_config file
        # match host\s to ensure we don't pick up hostname and split things there accidentally
        host_pattern = re.compile(r"\bhost.*?\b(?=host\s|\s+$|$)", flags=re.I | re.S)
        host_entries = re.findall(pattern=host_pattern, string=self.ssh_config)

        discovered_hosts: Dict[str, Host] = {}
        if not host_entries:
            return discovered_hosts

        # do we need to add whitespace between match and end of line to ensure we match correctly?
        hosts_pattern = re.compile(r"^\s*host[\s=]+(.*)$", flags=re.I | re.M)
        hostname_pattern = re.compile(r"^\s*hostname[\s=]+([\w.-]*)$", flags=re.I | re.M)
        port_pattern = re.compile(r"^\s*port[\s=]+([\d]*)$", flags=re.I | re.M)
        user_pattern = re.compile(r"^\s*user[\s=]+([\w]*)$", flags=re.I | re.M)
        # address_family_pattern = None
        # bind_address_pattern = None
        # connect_timeout_pattern = None
        identities_only_pattern = re.compile(
            r"^\s*identitiesonly[\s=]+(yes|no)$", flags=re.I | re.M
        )
        identity_file_pattern = re.compile(
            r"^\s*identityfile[\s=]+([\w.\/\@~-]*)$", flags=re.I | re.M
        )
        # keyboard_interactive_pattern = None
        # password_authentication_pattern = None
        # preferred_authentication_pattern = None

        for host_entry in host_entries:
            host = Host()
            host_line = re.search(pattern=hosts_pattern, string=host_entry)
            if isinstance(host_line, Match):
                host.hosts = self._strip_comments(host_line.groups()[0])
            else:
                host.hosts = ""
            hostname = re.search(pattern=hostname_pattern, string=host_entry)
            if isinstance(hostname, Match):
                host.hostname = self._strip_comments(hostname.groups()[0])
            port = re.search(pattern=port_pattern, string=host_entry)
            if isinstance(port, Match):
                host.port = int(self._strip_comments(port.groups()[0]))
            user = re.search(pattern=user_pattern, string=host_entry)
            if isinstance(user, Match):
                host.user = self._strip_comments(user.groups()[0])
            # address_family = re.search(user_pattern, host_entry[0])
            # bind_address = re.search(user_pattern, host_entry[0])
            # connect_timeout = re.search(user_pattern, host_entry[0])
            identities_only = re.search(pattern=identities_only_pattern, string=host_entry)
            if isinstance(identities_only, Match):
                host.identities_only = self._strip_comments(identities_only.groups()[0])
            identity_file = re.search(pattern=identity_file_pattern, string=host_entry)
            if isinstance(identity_file, Match):
                host.identity_file = os.path.expanduser(
                    self._strip_comments(identity_file.groups()[0])
                )
            # keyboard_interactive = re.search(user_pattern, host_entry[0])
            # password_authentication = re.search(user_pattern, host_entry[0])
            # preferred_authentication = re.search(user_pattern, host_entry[0])
            discovered_hosts[host.hosts] = host
        return discovered_hosts

    def _merge_hosts(self) -> None:
        """
        Merge less specific host pattern data into a given host

        Args:
            N/A

        Returns:
            None

        Raises:
            N/A

        """
        for host in self.hosts:  # pylint: disable=C0206
            _current_hosts = deepcopy(self.hosts)
            while True:
                fuzzy_match = self._lookup_fuzzy_match(host=host, hosts=_current_hosts)
                for attr in HOST_ATTRS:
                    if not getattr(self.hosts[host], attr):
                        setattr(self.hosts[host], attr, getattr(self.hosts[fuzzy_match], attr))
                try:
                    _current_hosts.pop(fuzzy_match)
                except KeyError:
                    # this means we hit the "*" entry twice and we can bail out
                    break

    def _lookup_fuzzy_match(self, host: str, hosts: Optional[Dict[str, "Host"]] = None) -> str:
        """
        Look up fuzzy matched hosts

        Get the best match ssh config Host entry for a given host; this allows for using
        the splat and question-mark operators in ssh config file

        Args:
            host: host to lookup in discovered_hosts dict
            hosts: hosts dict to operate on; used for passing in partial dict of hosts while
                performing merge operations

        Returns:
            str: Nearest match (if applicable) host or `*` if none found

        Raises:
            N/A

        """
        hosts = hosts or self.hosts

        possible_matches = []
        for host_entry in hosts.keys():
            host_list = host_entry.split()
            for host_pattern in host_list:
                # replace periods with literal period
                # replace asterisk (match 0 or more things) with appropriate regex
                # replace question mark (match one thing) with appropriate regex
                cleaned_host_pattern = (
                    host_pattern.replace(".", r"\.").replace("*", r"(.*)").replace("?", r"(.)")
                )
                # compile with case insensitive
                search_pattern = re.compile(cleaned_host_pattern, flags=re.I)
                result = re.search(pattern=search_pattern, string=host)
                # if we get a result, append it and the original pattern to the possible matches
                if result:
                    possible_matches.append((result, host_entry))

        # initialize a None best match
        current_match = None
        for match in possible_matches:
            if current_match is None:
                current_match = match
            # count how many chars were replaced to get regex to work
            chars_replaced = sum(
                end_char - start_char for start_char, end_char in match[0].regs[1:]
            )

            # count how many chars were replaced to get regex to work on best match
            best_match_chars_replaced = sum(
                end_char - start_char for start_char, end_char in current_match[0].regs[1:]
            )

            # if match replaced less chars than "best_match" we have a new best match
            if chars_replaced < best_match_chars_replaced:
                current_match = match
        return current_match[1] if current_match is not None else "*"

    def lookup(self, host: str) -> "Host":
        """
        Lookup a given host

        Args:
            host: host to lookup in discovered_hosts dict

        Returns:
            Host: best matched host from parsed ssh config file hosts, "*" if no better match found

        Raises:
            N/A

        """
        # return exact 1:1 match if exists
        if host in self.hosts:
            return self.hosts[host]
        # return match if given host is an exact match for a host entry
        for host_line, host_entry in self.hosts.items():
            host_list = host_line.split()
            if host in host_list:
                return host_entry
        # otherwise need to select the most correct host entry
        fuzzy_match = self._lookup_fuzzy_match(host)
        return self.hosts[fuzzy_match]


class Host:
    def __init__(self) -> None:
        """
        Host Object

        Create a Host object based on ssh config file information
        """
        self.hosts: str = ""
        self.hostname: Optional[str] = None
        self.port: Optional[int] = None
        self.user: str = ""
        self.address_family: Optional[str] = None
        self.bind_address: Optional[str] = None
        self.connect_timeout: Optional[str] = None
        self.identities_only: Optional[str] = None
        self.identity_file: Optional[str] = None
        self.keyboard_interactive: Optional[str] = None
        self.password_authentication: Optional[str] = None
        self.preferred_authentication: Optional[str] = None

    def __str__(self) -> str:
        """
        Magic str method for HostEntry class

        Args:
            N/A

        Returns:
            str: string for class object

        Raises:
            N/A

        """
        return f"Host: {self.hosts}"

    def __repr__(self) -> str:
        """
        Magic repr method for HostEntry class

        Args:
            N/A

        Returns:
            str: repr for class object

        Raises:
            N/A

        """
        class_dict = self.__dict__.copy()
        return f"Host {class_dict}"


class SSHKnownHosts:
    def __init__(self, ssh_known_hosts_file: str) -> None:
        """
        Initialize SSHKnownHosts Object

        Parse OpenSSH known hosts file

        Try to load the following data for all entries in known hosts file:
            Host
            Key Type
            Public Key

        Args:
            ssh_known_hosts_file: string path to ssh known hosts file

        Returns:
            None

        Raises:
            TypeError: if non-string value provided for ssh_known_hosts

        """
        if not isinstance(ssh_known_hosts_file, str):
            raise TypeError(
                f"`ssh_known_hosts_file` expected str, got {type(ssh_known_hosts_file)}"
            )

        self.ssh_known_hosts_file = os.path.expanduser(ssh_known_hosts_file)
        if self.ssh_known_hosts_file:
            self.ssh_known_hosts = Path(self.ssh_known_hosts_file).read_text(encoding="utf-8")
            self.hosts = self._parse()
            if not self.hosts:
                self.hosts = {}
        else:
            self.hosts = {}

    def _parse(self) -> Dict[str, Dict[str, str]]:
        """
        Parse OpenSSH known hosts file

        Args:
            N/A

        Returns:
            known_hosts: dict of host public keys discovered in known hosts file

        Raises:
            N/A

        """
        # match any non whitespace from start of the line... this should cover v4/v6/names
        # skip a space and match any word (also w/ hyphen) to get key type, lastly
        # match any non whitespace to the end of the line to get the public key
        host_pattern = re.compile(r"^\S+\s[\w\-]+\s\S+$", flags=re.I | re.M)
        host_entries = re.findall(pattern=host_pattern, string=self.ssh_known_hosts)

        known_hosts: Dict[str, Dict[str, str]] = {}
        for host_entry in host_entries:
            host, key_type, public_key = host_entry.split()
            # to simplify lookups down the line, split any list of hosts and just create a unique
            # entry per host
            for individual_host in host.split(","):
                known_hosts[individual_host] = {"key_type": key_type, "public_key": public_key}
        return known_hosts

    def lookup(self, host: str) -> Dict[str, str]:
        """
        Lookup a given host's public key

        Args:
            host: host to lookup in known_hosts dict

        Returns:
            host_public_key: matched host public key from parsed ssh known hosts file,
                empty dict if not found

        Raises:
            N/A

        """
        # return exact 1:1 match if exists
        if host in self.hosts:
            return self.hosts[host]
        # return match if given host is an exact match for a hashed host entry
        raw_host = host.encode(encoding="utf-8")
        for host_id, host_public_key in self.hosts.items():
            if host_id.startswith("|1|"):
                _, _, encoded_salt, encoded_hashed_host = host_id.split("|")
                raw_salt = base64.b64decode(encoded_salt)
                raw_hashed_host = base64.b64decode(encoded_hashed_host)
                if hmac.HMAC(raw_salt, raw_host, "sha1").digest() == raw_hashed_host:
                    return host_public_key
        # otherwise return empty dict
        return {}


def ssh_config_factory(ssh_config_file: str) -> SSHConfig:
    """
    Sorta kinda make a singleton out of SSHConfig

    Not exactly a singleton in that its more like a singleton *per ssh config file path* since a
    user may elect to use different ssh config files for different things! The only place this
    should ever be called from is the base driver which has already resolved the ssh config file
    path -- so we should get only fully qualified paths. We then use this path as the key in the
    `_config_files` dict of the SSHConfig object, storing the actual object we instantiate as the
    value. This allows us to only ever create one instance of SSHConfig for each provided ssh
    config file!

    Args:
        ssh_config_file: fully qualified string path to ssh config file

    Returns:
        SSHConfig: instantiated SSHConfig object

    Raises:
        N/A

    """
    config_files = SSHConfig._config_files  # pylint: disable=W0212

    if ssh_config_file in config_files:
        return config_files[ssh_config_file]

    ssh_config = SSHConfig(ssh_config_file=ssh_config_file)
    config_files[ssh_config_file] = ssh_config
    return ssh_config