1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
|
from django.core.exceptions import ImproperlyConfigured
from django.utils.encoding import smart_str
from compressor.parser import ParserBase
class BeautifulSoupParser(ParserBase):
def __init__(self, content):
super().__init__(content)
try:
from bs4 import BeautifulSoup
self.soup = BeautifulSoup(self.content, "html.parser")
except ImportError as err:
raise ImproperlyConfigured("Error while importing BeautifulSoup: %s" % err)
def css_elems(self):
return self.soup.find_all({"link": True, "style": True})
def js_elems(self):
return self.soup.find_all("script")
def elem_attribs(self, elem):
attrs = dict(elem.attrs)
# hack around changed behaviour in bs4, it returns lists now instead of one string, see
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#multi-valued-attributes
for key, value in attrs.items():
if type(value) is list:
attrs[key] = " ".join(value)
return attrs
def elem_content(self, elem):
return elem.string
def elem_name(self, elem):
return elem.name
def elem_str(self, elem):
elem_as_string = smart_str(elem)
if elem.name == "link":
# This makes testcases happy
elem_as_string = elem_as_string.replace("/>", ">")
return elem_as_string
|