File: filter_sarif.py

package info (click to toggle)
avogadrolibs 1.101.0-4
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 78,500 kB
  • sloc: cpp: 139,792; ansic: 2,212; python: 1,435; perl: 321; sh: 90; makefile: 46
file content (143 lines) | stat: -rw-r--r-- 4,817 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# From https://github.com/zbazztian/filter-sarif/blob/master/filter_sarif.py
# Some modifications by Geoffrey Hutchison

import argparse
import json
import re
from globber import match


def match_path_and_rule(path, rule, patterns):
    result = True
    for sign, file_pattern, rule_pattern in patterns:
        if match(rule_pattern, rule) and match(file_pattern, path):
            result = sign
    return result


def parse_pattern(line):
    sep_char = ":"
    esc_char = "\\"
    file_pattern = ""
    rule_pattern = ""
    seen_separator = False
    sign = True

    # inclusion or exclusion pattern?
    uline = line
    if line:
        if line[0] == "-":
            sign = False
            uline = line[1:]
        elif line[0] == "+":
            uline = line[1:]

    i = 0
    while i < len(uline):
        char = uline[i]
        i = i + 1
        if char == sep_char:
            if seen_separator:
                raise Exception(
                    'Invalid pattern: "' + line + '" Contains more than one separator!'
                )
            seen_separator = True
            continue

        if char == esc_char:
            next_char = uline[i] if (i < len(uline)) else None
            if next_char in ["+", "-", esc_char, sep_char]:
                i = i + 1
                char = next_char

        if seen_separator:
            rule_pattern = rule_pattern + char
        else:
            file_pattern = file_pattern + char

    if not rule_pattern:
        rule_pattern = "**"

    return sign, file_pattern, rule_pattern


def filter_sarif(args):
    if args.split_lines:
        tmp = []
        for pattern in args.patterns:
            tmp = tmp + re.split("\r?\n", pattern)
        args.patterns = tmp

    args.patterns = [parse_pattern(pattern) for pattern in args.patterns if pattern]

    print("Given patterns:")
    for sign, file_pattern, rule_pattern in args.patterns:
        sign_text = "positive" if sign else "negative"
        print(f"files: {file_pattern}    rules: {rule_pattern} ({sign_text})")

    with open(args.input, "r", encoding="UTF-8") as file:
        sarif = json.load(file)

    for run in sarif.get("runs", []):
        if run.get("results", []):
            new_results = []
            for result in run["results"]:
                if result.get("locations", []):
                    new_locations = []
                    for location in result["locations"]:
                        # TODO: The uri field is optional. We might have to fetch the
                        #  actual uri from "artifacts" via "index"
                        # (https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md)
                        uri = (
                            location.get("physicalLocation", {})
                            .get("artifactLocation", {})
                            .get("uri", None)
                        )
                        # TODO: The ruleId field is optional and potentially ambiguous.
                        # We might have to fetch the actual ruleId from the rule metadata
                        # via the ruleIndex field.
                        # (https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md)
                        rule_id = result["ruleId"]
                        if uri is None or match_path_and_rule(
                            uri, rule_id, args.patterns
                        ):
                            new_locations.append(location)
                    result["locations"] = new_locations
                    if new_locations:
                        new_results.append(result)
                else:
                    # locations array doesn't exist or is empty, so we can't match on anything
                    # therefore, we include the result in the output
                    new_results.append(result)
            run["results"] = new_results

    with open(args.output, "w", encoding="UTF-8") as file:
        json.dump(sarif, file, indent=args.indent)


def main():
    parser = argparse.ArgumentParser(prog="filter-sarif")
    parser.add_argument("--input", help="Input SARIF file", required=True)
    parser.add_argument("--output", help="Output SARIF file", required=True)
    parser.add_argument(
        "--split-lines",
        default=False,
        action="store_true",
        help="Split given patterns on newlines.",
    )
    parser.add_argument(
        "--indent", default=None, type=int, help="Indentation level for JSON output."
    )
    parser.add_argument("patterns", help="Inclusion and exclusion patterns.", nargs="+")

    def print_usage():
        print(parser.format_usage())

    args = parser.parse_args()
    filter_sarif(args)


if __name__ == "__main__":
    main()