File: generate_profile.py

package info (click to toggle)
scap-security-guide 0.1.76-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 110,644 kB
  • sloc: xml: 241,883; sh: 73,777; python: 32,527; makefile: 27
file content (290 lines) | stat: -rwxr-xr-x 8,796 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
#!/usr/bin/env python3

# Get input file
# Determine format (CSV or something else like OSCAL)
# Parse input into a data structure (dictionary)

import abc
import argparse
import os
import re

import json
import yaml
from pycompliance import pycompliance

import pandas

DESCRIPTION = '''
A tool for converting benchmarks to profiles.
'''


class LiteralUnicode(str):
    pass


def literal_unicode_representer(dumper, data):
    # NOTE(rhmdnd): pyyaml will not format a string using the style we define for the scalar below
    # if any strings in the data end with a space (e.g., 'some text ' instead of 'some text'). This
    # has been reported upstream in https://github.com/yaml/pyyaml/issues/121. This particular code
    # goes through every line of data and strips any whitespace characters from the end of the
    # string, and reconstructs the string with newlines so that it will format properly.
    text = [line.rstrip() for line in data.splitlines()]
    sanitized = '\n'.join(text)
    return dumper.represent_scalar(u'tag:yaml.org,2002:str', sanitized, style='|')


yaml.add_representer(LiteralUnicode, literal_unicode_representer)


def setup_argument_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description=DESCRIPTION)
    parser.add_argument('-i', '--input-file', required=True)

    subparsers = parser.add_subparsers()
    list_parser = subparsers.add_parser('list', help='List controls within a benchmark')
    list_parser.set_defaults(func=list_controls)
    generate_parser = subparsers.add_parser('generate', help='Generate a control from benchmark')
    generate_parser.add_argument('-c', '--control', help='Control ID to generate')
    generate_parser.add_argument('-s', '--section',
                                 help='Section ID to generate, including containing controls')
    generate_parser.set_defaults(func=generate_control)

    return parser


class Parser(abc.ABC):

    @abc.abstractmethod
    def __init__(self) -> None:
        raise NotImplementedError

    @abc.abstractmethod
    def get_name(self):
        raise NotImplementedError

    @abc.abstractmethod
    def get_version(self):
        raise NotImplementedError

    @abc.abstractmethod
    def parse(self):
        raise NotImplementedError


class XLSXParser(Parser):
    def __init__(self, input_file: str):
        self.input_file = input_file
        self.file_format = ".xlsx"

    def parse(self) -> pycompliance.Benchmark:
        cols = [
            'section #',
            'recommendation #',
            'profile',
            'title',
            'assessment status',
            'description',
            'remediation procedure',
            'rationale statement',
            'audit procedure']

        benchmark_name = self.get_name()
        benchmark_version = self.get_version()
        b = pycompliance.Benchmark(benchmark_name)
        b.version = benchmark_version
        df = pandas.read_excel(
            self.input_file, sheet_name='Combined Profiles', usecols=cols)
        result = df.to_json(orient='split')
        d = json.loads(result)

        for i in d['data']:
            section = str(i[0])
            if section.endswith('.0'):
                section = section.rstrip('.0')
            control = i[1]
            level = i[2]
            title = i[3]
            assessment = i[4]
            description = i[5]
            rationale = i[6]
            remediation = i[7]
            audit = i[8]
            if section and not control:
                s = pycompliance.Section(section)
                s.title = title
                s.description = description
                b.add_section(s)
            elif section and control:
                c = pycompliance.Control(control)
                c.title = title
                c.level = level
                c.description = description
                c.remediation = remediation
                c.rationale = rationale
                c.audit = audit
                c.assessment = assessment
                b.add_control(c)
        return b

    def get_name(self) -> str:
        name = os.path.splitext(self.input_file)[0]
        original = os.path.basename(name).replace("_", " ")
        parts = original.split()
        n = ''
        for p in parts:
            if p.startswith(self.get_version()):
                break
            n = n + p + ' '
        return n.strip()

    def get_version(self) -> str:
        name = os.path.splitext(self.input_file)[0]
        m = re.search(r"v\d.+", name)
        if m:
            return m.group()
        else:
            raise Exception("Unable to determine version from file name")


class Generator:

    def __init__(self, benchmark: pycompliance.Benchmark) -> None:
        self.benchmark = benchmark

    def placeholder(self, key=None):
        if key is None:
            key = 'PLACEHOLDER'
        return key

    def _get_controls(self, section=None) -> list[dict]:
        controls = []
        if section:
            c = self._generate(section)
            controls.append(c)
            return controls
        for i in self.benchmark.children:
            c = self._generate(i)
            controls.append(c)
        return controls

    def _get_levels(self) -> list[dict]:
        levels = []
        for n in self.benchmark.traverse(self.benchmark):
            if hasattr(n, 'level'):
                level = n.level.replace(' ', '_').lower()
                if level not in levels:
                    levels.append(level)
        res = []
        for level in levels:
            res.append({'id': level, 'inherits_from': self.placeholder()})
        return res

    def _generate(self, node: pycompliance.Node) -> dict:
        d = {
            'id': node.id,
            'title': node.title,
            'status': self.placeholder(key='pending'),
            'rules': []
        }
        if hasattr(node, 'level'):
            d['levels'] = node.level.replace(' ', '_').lower()
        if node.children:
            d['controls'] = []
        for node in node.children:
            d['controls'].append(self._generate(node))
        return d


class RuleGenerator(Generator):

    def __init__(self, benchmark: pycompliance.Benchmark):
        super().__init__(benchmark)

    def generate(self, control: pycompliance.Control):
        if not isinstance(control, pycompliance.Control):
            return
        description = (
                LiteralUnicode(control.description) + '\n' +
                LiteralUnicode(control.remediation)
        )
        output = {
            'documentation_complete': False,
            'title': LiteralUnicode(control.title),
            'description': description,
            'rationale': LiteralUnicode(control.rationale),
            'severity': self.placeholder(),
            'references': self.placeholder(),
            'ocil': LiteralUnicode(control.audit),
            'ocil_clause': self.placeholder(),
            'warnings': self.placeholder(),
            'template': self.placeholder(),
        }
        print(yaml.dump(output, sort_keys=False, width=float("inf")))


class SectionGenerator(Generator):

    def generate(self, section=None):
        output = {
            'controls': self._get_controls(section=section)
        }
        print(yaml.dump(output, sort_keys=False, width=float("inf")))


class ProfileGenerator(Generator):

    def generate(self, section=None):
        output = {
            'policy': self.benchmark.name,
            'title': self.benchmark.name,
            'id': self.placeholder(),
            'version': self.benchmark.version.lstrip('v'),
            'source': self.placeholder(key="https://example.com/benchmark"),
            'levels': self._get_levels(),
            'controls': self._get_controls(section=section)
        }
        print(yaml.dump(output, sort_keys=False))


def get_parser(input_file) -> Parser:
    if input_file.endswith('xlsx'):
        return XLSXParser(input_file)
    raise Exception("Unable to parse format")


def list_controls(args):
    p = get_parser(args.input_file)
    b = p.parse()
    for n in b.traverse(b):
        if isinstance(n, pycompliance.Control):
            print(n.id)


def generate_control(args):
    p = get_parser(args.input_file)
    b = p.parse()

    control = b.find(args.control)
    section = b.find(args.section)
    if control:
        r = RuleGenerator(b)
        r.generate(control)
    elif section:
        r = SectionGenerator(b)
        r.generate(section)
    else:
        p = ProfileGenerator(b)
        p.generate()


def main():
    arg_parser = setup_argument_parser()
    args = arg_parser.parse_args()
    args.func(args)


main()