File: check_pcre2_compatibility.py

package info (click to toggle)
scap-security-guide 0.1.76-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 110,644 kB
  • sloc: xml: 241,883; sh: 73,777; python: 32,527; makefile: 27
file content (64 lines) | stat: -rw-r--r-- 1,804 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/python3

import argparse
import pcre2
import xml.etree.ElementTree as ET
from typing import Generator, List

from ssg.constants import PREFIX_TO_NS


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument("data_stream")
    return parser.parse_args()


def find_all_patern_match(root: ET.Element, section_xpath: str) -> Generator:
    section = root.find(section_xpath, PREFIX_TO_NS)
    if section is None:
        return
    for child in section:
        for el in child.findall(".//*[@operation='pattern match']"):
            if el.text is not None:
                yield el.text


def extract_all_regexes_from_oval(root: ET.Element) -> list:
    regexes: List[str] = []
    sections_xpaths = ["./oval-def:objects", "./oval-def:states"]
    for section_xpath in sections_xpaths:
        regexes += find_all_patern_match(root, section_xpath)
    return regexes


def extract_all_regexes_from_data_stream(ds_file_path: str) -> set:
    regexes = []
    tree = ET.parse(ds_file_path)
    root = tree.getroot()
    xpath = "./ds:component/oval-def:oval_definitions"
    for oval_definitions in root.findall(xpath, PREFIX_TO_NS):
        regexes += extract_all_regexes_from_oval(oval_definitions)
    return set(regexes)


def check_pcre2_compatibility(regexes: set) -> bool:
    result = True
    for regex in regexes:
        try:
            pcre2.compile(regex)
        except pcre2.exceptions.CompileError as ecp:
            result = False
            print(f"Regular expression {regex} is invalid: {str(ecp)}")
    return result


def main():
    args = parse_args()
    regexes = extract_all_regexes_from_data_stream(args.data_stream)
    if not check_pcre2_compatibility(regexes):
        exit(1)


if __name__ == "__main__":
    main()