File: anonymize-logs.py

package info (click to toggle)
network-manager 1.54.3-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 71,432 kB
  • sloc: ansic: 483,661; python: 11,632; xml: 8,546; sh: 5,552; perl: 596; cpp: 178; javascript: 130; ruby: 107; makefile: 64; lisp: 22
file content (193 lines) | stat: -rwxr-xr-x 5,947 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/env python3

from textwrap import wrap
import subprocess
import ipaddress
import argparse
import os
import re


domains = []

hosts_sub = {}
host_next = 0

macs_sub = {}
mac_next = 0

ips_sub = {}
ip4_next = ipaddress.IPv4Address("0.0.0.0")
ip6_next = ipaddress.IPv6Address("ffff::")


def main(args):
    must_autoreplace_hostnames = not args.show_hostnames
    must_replace_hostnames = must_autoreplace_hostnames or args.domain or args.hostname

    init_hostnames_and_domains_sub(args)

    with open(args.log_file) as f:
        for line in (line.strip() for line in f):
            if must_replace_hostnames:
                line = replace_hostnames(line, must_autoreplace_hostnames)
            if not args.show_macs:
                line = replace_macs(line)
            if not args.show_public_ips or args.hide_private_ips:
                line = replace_ips(line, args.show_public_ips, args.hide_private_ips)

            print(line)


def init_hostnames_and_domains_sub(args):
    global domains

    if not args.show_hostnames:
        domains.extend(["com", "org", "net", "gov", "es", "it"])

        r = subprocess.run("hostname", capture_output=True)
        if r.returncode == 0:
            own_hostname = r.stdout.decode().strip()
            add_host_sub(own_hostname, ".self")

    # domains and hostname passed explicitly are replaced even with --show-hostnames
    domains.extend(d.strip(". ") for d in args.domain)
    domains = "|".join(domains)

    for hostname in args.hostname:
        add_host_sub(hostname)


def add_host_sub(hostname: str, suffix: str = ""):
    global hosts_sub
    global host_next

    # if it's a domain-like hostname (i.e example.com) adds .ext at the end
    if suffix == "" and re.search(r"\.({})$".format(domains), hostname):
        suffix = ".ext"

    if hostname not in hosts_sub:
        hosts_sub[hostname] = "hostname{}{}".format(host_next, suffix)
        host_next += 1


def replace_hostnames(line: str, autodetect_from_logs: bool) -> str:
    global hosts_sub

    # look for known log messages that show hostnames
    if autodetect_from_logs:
        match = re.search(r"get-hostname: \"(.*)\"", line)
        if match:
            add_host_sub(match.group(1))

        match = re.search(r"set hostname to \"(.*)\"", line)
        if match:
            add_host_sub(match.group(1))

        match = re.search(
            r"hostname changed from (\(none\)|\".*\") to (\(none\)|\".*\")", line
        )
        if match:
            if match.group(1) != "(none)":
                add_host_sub(match.group(1).strip('"'))
            if match.group(2) != "(none)":
                add_host_sub(match.group(2).strip('"'))

    # look for domain-like strings
    if domains:
        match = re.search(r"[\w\-\.]+?\.(" + domains + r")\b", line)
        if match:
            add_host_sub(match.group(0))

    for orig, repl in hosts_sub.items():
        line = line.replace(orig, repl)

    return line


def replace_macs(line: str) -> str:
    global macs_sub
    global mac_next

    macs = re.findall(r"(?:[0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}", line)

    for mac in macs:
        if mac not in macs_sub:
            macs_sub[mac] = ":".join(wrap("{:012x}".format(mac_next), width=2))
            mac_next += 1

        line = line.replace(mac, macs_sub[mac])

    return line


def replace_ips(line: str, show_public: bool, hide_private: bool) -> str:
    global ips_sub
    global ip4_next
    global ip6_next

    ips4 = re.findall(r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}", line)
    ips6 = re.findall(r"(?:[0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}", line)

    for addr_str in ips4 + ips6:
        try:
            addr = ipaddress.ip_address(addr_str)
        except:  # not IP
            continue

        if (addr.is_private and not hide_private) or (addr.is_global and show_public):
            continue

        if addr.exploded not in ips_sub:
            if type(addr) is ipaddress.IPv4Address:
                ips_sub[addr.exploded] = str(ip4_next).replace("0.", "IP4.", 1)
                ip4_next += 1
            else:
                ips_sub[addr.exploded] = str(ip6_next).replace("ffff:", "IPv6:", 1)
                ip6_next += 1

        line = line.replace(addr_str, ips_sub[addr.exploded])

    return line


if __name__ == "__main__":
    args_parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description="""Anonymize some data from NetworkManager logs.

Note that it only covers some common stuff like MAC and IP addresses or
hostnames.  Do not trust it and manually review that the log doesn't contain
sensitive data before sharing it.

Changing IP address can make that problems related to routing are impossible to
analyze. Because of that, private IPs which are normally not sensitive are not
hidden by default, and if the problem is related to routing you might need to
use the --show-public-ips option""",
        epilog="Options of the type --show-* disable masking that type of data.",
        formatter_class=argparse.RawTextHelpFormatter,
    )
    args_parser.add_argument("-H", "--show-hostnames", action="store_true")
    args_parser.add_argument("-m", "--show-macs", action="store_true")
    args_parser.add_argument("-g", "--show-public-ips", action="store_true")
    args_parser.add_argument("-p", "--hide-private-ips", action="store_true")
    args_parser.add_argument(
        "-d",
        "--domain",
        action="append",
        default=[],
        help='additional domains to hide, like ".xyz", can be passed more than once',
    )
    args_parser.add_argument(
        "-n",
        "--hostname",
        action="append",
        default=[],
        help="additional hostnames to hide, can be passed more than once",
    )
    args_parser.add_argument(
        "log_file", nargs="?", default="/dev/stdin", help="Log file (by default, stdin)"
    )

    args = args_parser.parse_args()
    main(args)