File: mat2

package info (click to toggle)
mat2 0.8.0-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 6,236 kB
  • sloc: python: 2,733; makefile: 7
file content (186 lines) | stat: -rwxr-xr-x 6,360 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env python3

import os
from typing import Tuple, Generator, List, Union
import sys
import mimetypes
import argparse
import logging
import unicodedata

try:
    from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
    from libmat2 import check_dependencies, UnknownMemberPolicy
except ValueError as e:
    print(e)
    sys.exit(1)

__version__ = '0.8.0'

# Make pyflakes happy
assert Tuple
assert Union

logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING)


def __check_file(filename: str, mode: int=os.R_OK) -> bool:
    if not os.path.exists(filename):
        print("[-] %s is doesn't exist." % filename)
        return False
    elif not os.path.isfile(filename):
        print("[-] %s is not a regular file." % filename)
        return False
    elif not os.access(filename, mode):
        print("[-] %s is not readable and writeable." % filename)
        return False
    return True


def create_arg_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
    parser.add_argument('files', nargs='*', help='the files to process')
    parser.add_argument('-v', '--version', action='version',
                        version='MAT2 %s' % __version__)
    parser.add_argument('-l', '--list', action='store_true',
                        help='list all supported fileformats')
    parser.add_argument('--check-dependencies', action='store_true',
                        help='check if MAT2 has all the dependencies it needs')
    parser.add_argument('-V', '--verbose', action='store_true',
                        help='show more verbose status information')
    parser.add_argument('--unknown-members', metavar='policy', default='abort',
                        help='how to handle unknown members of archive-style files (policy should' +
                        ' be one of: %s)' % ', '.join(p.value for p in UnknownMemberPolicy))


    info = parser.add_mutually_exclusive_group()
    info.add_argument('-s', '--show', action='store_true',
                      help='list harmful metadata detectable by MAT2 without removing them')
    info.add_argument('-L', '--lightweight', action='store_true',
                      help='remove SOME metadata')
    return parser


def show_meta(filename: str):
    if not __check_file(filename):
        return

    p, mtype = parser_factory.get_parser(filename)  # type: ignore
    if p is None:
        print("[-] %s's format (%s) is not supported" % (filename, mtype))
        return
    __print_meta(filename, p.get_meta())


def __print_meta(filename: str, metadata: dict, depth: int=1):
    padding = " " * depth*2
    if not metadata:
        print(padding + "No metadata found")
        return

    print("[%s] Metadata for %s:" % ('+'*depth, filename))

    for (k, v) in sorted(metadata.items()):
        if isinstance(v, dict):
            __print_meta(k, v, depth+1)
            continue

        # Remove control characters
        # We might use 'Cc' instead of 'C', but better safe than sorry
        # https://www.unicode.org/reports/tr44/#GC_Values_Table
        try:
            v = ''.join(ch for ch in v if not unicodedata.category(ch).startswith('C'))
        except TypeError:
            pass  # for things that aren't iterable

        try:  # FIXME this is ugly.
            print(padding + "  %s: %s" % (k, v))
        except UnicodeEncodeError:
            print(padding + "  %s: harmful content" % k)


def clean_meta(filename: str, is_lightweight: bool, policy: UnknownMemberPolicy) -> bool:
    if not __check_file(filename, os.R_OK|os.W_OK):
        return False

    p, mtype = parser_factory.get_parser(filename)  # type: ignore
    if p is None:
        print("[-] %s's format (%s) is not supported" % (filename, mtype))
        return False
    p.unknown_member_policy = policy
    p.lightweight_cleaning = is_lightweight

    try:
        return p.remove_all()
    except RuntimeError as e:
        print("[-] %s can't be cleaned: %s" % (filename, e))
    return False



def show_parsers():
    print('[+] Supported formats:')
    formats = set()  # Set[str]
    for parser in parser_factory._get_parsers():  # type: ignore
        for mtype in parser.mimetypes:
            extensions = set()  # Set[str]
            for extension in mimetypes.guess_all_extensions(mtype):
                if extension not in UNSUPPORTED_EXTENSIONS:
                    extensions.add(extension)
            if not extensions:
                # we're not supporting a single extension in the current
                # mimetype, so there is not point in showing the mimetype at all
                continue
            formats.add('  - %s (%s)' % (mtype, ', '.join(extensions)))
    print('\n'.join(sorted(formats)))


def __get_files_recursively(files: List[str]) -> Generator[str, None, None]:
    for f in files:
        if os.path.isdir(f):
            for path, _, _files in os.walk(f):
                for _f in _files:
                    fname = os.path.join(path, _f)
                    if __check_file(fname):
                        yield fname
        elif __check_file(f):
            yield f

def main() -> int:
    arg_parser = create_arg_parser()
    args = arg_parser.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.INFO)

    if not args.files:
        if args.list:
            show_parsers()
            return 0
        elif args.check_dependencies:
            print("Dependencies required for MAT2 %s:" % __version__)
            for key, value in sorted(check_dependencies().items()):
                print('- %s: %s' % (key, 'yes' if value else 'no'))
        else:
            arg_parser.print_help()
        return 0

    elif args.show:
        for f in __get_files_recursively(args.files):
            show_meta(f)
        return 0

    else:
        policy = UnknownMemberPolicy(args.unknown_members)
        if policy == UnknownMemberPolicy.KEEP:
            logging.warning('Keeping unknown member files may leak metadata in the resulting file!')

        no_failure = True
        for f in __get_files_recursively(args.files):
            if clean_meta(f, args.lightweight, policy) is False:
                no_failure = False
        return 0 if no_failure is True else -1


if __name__ == '__main__':
    sys.exit(main())