File: prune_seed_json.py

package info (click to toggle)
privacybadger 2025.12.9-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 12,700 kB
  • sloc: javascript: 56,159; python: 2,225; sh: 403; makefile: 57; xml: 6
file content (49 lines) | stat: -rwxr-xr-x 1,323 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python3

import json
import sys

from collections import OrderedDict


def prune_action_map(data):
    new_action_map = {}

    for domain in data['action_map']:
        keep = False
        action_entry = data['action_map'][domain]

        # keep base domains,
        # and DNT-compliant or cookieblocked subdomains
        if domain in data['snitch_map']:
            keep = True
        else:
            if 'dnt' in action_entry:
                keep = True
            elif action_entry['heuristicAction'] == "cookieblock":
                keep = True

        if keep:
            new_action_map[domain] = action_entry

    data['action_map'] = new_action_map

    return data


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print(f"Usage: {sys.argv[0]} BADGER_SEED_DATA.json")
        sys.exit(1)

    with open(sys.argv[1], 'r+', encoding='utf-8') as seed_file:
        # read in seed data, preserving ordering
        seed_data = json.load(seed_file, object_pairs_hook=OrderedDict)

        seed_data = prune_action_map(seed_data)

        # write the data back out
        seed_file.seek(0)
        seed_file.truncate(0)
        # this should match how data gets written out by Badger Sett
        json.dump(seed_data, seed_file, indent=2, sort_keys=True, separators=(',', ': '))