1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
|
import argparse
import base64
import hashlib
import logging
import re
from io import open
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
parser = argparse.ArgumentParser(
description="Checks inline scripts in report.html against a " "whitelist of hashes (Content Security Policy)"
)
parser.add_argument("--report", help="Report file (.html)", required=True)
parser.add_argument("--whitelist", help="Whitelist to compare against", required=True)
args = parser.parse_args()
def is_executable(script):
executable_types = {
"text/javascript",
"application/javascript",
"module" "text/ecmascript",
"application/ecmascript",
}
script_type = script.attrs.get("type", "text/javascript")
return script_type in executable_types
def get_hash(script):
return "sha256-" + base64.b64encode(hashlib.sha256(script.encode("utf-8")).digest()).decode()
whitelist_with_comments = open(args.whitelist, "r", encoding="utf-8").read()
whitelist = re.sub(r"#.*", "", whitelist_with_comments)
html_report = open(args.report, "r", encoding="utf-8").read()
soup = BeautifulSoup(html_report, features="html.parser")
scripts = [script.get_text() for script in soup.select("script") if is_executable(script)]
missing_scripts = [script for script in scripts if get_hash(script) not in whitelist]
if missing_scripts:
logger.warning("The following scripts are missing from {}".format(args.whitelist))
for script in missing_scripts:
hash = get_hash(script)
snippet = script.replace("\n", "")[0:80]
logger.warning(" '{}' # {}".format(hash, snippet))
exit(1)
|