1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
|
From e6fdf7573a4e6134187b8ed2f086ee2a6f68526b Mon Sep 17 00:00:00 2001
From: Daniele Tricoli <eriol@mornie.org>
Date: Fri, 23 Feb 2018 03:20:14 +0100
Subject: Switch to bs4
Patch-Name: switch-to-bs4.patch
---
pyth/plugins/xhtml/reader.py | 17 ++++++++---------
tests/test_writelatex.py | 1 -
tests/test_writepdf.py | 6 +++---
3 files changed, 11 insertions(+), 13 deletions(-)
diff --git a/pyth/plugins/xhtml/reader.py b/pyth/plugins/xhtml/reader.py
index 775bf58..ed4221e 100644
--- a/pyth/plugins/xhtml/reader.py
+++ b/pyth/plugins/xhtml/reader.py
@@ -2,7 +2,8 @@
Read documents from xhtml
"""
-import BeautifulSoup
+from bs4 import BeautifulSoup
+from bs4.element import NavigableString
from pyth import document
from pyth.format import PythReader
@@ -23,10 +24,8 @@ class XHTMLReader(PythReader):
self.link_callback = link_callback
def go(self):
- soup = BeautifulSoup.BeautifulSoup(self.source,
- convertEntities=BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
- fromEncoding=self.encoding,
- smartQuotesTo=None)
+ soup = BeautifulSoup(self.source,
+ from_encoding=self.encoding)
# Make sure the document content doesn't use multi-lines
soup = self.format(soup)
doc = document.Document()
@@ -56,12 +55,12 @@ class XHTMLReader(PythReader):
text = unicode(node)
lines = [x.strip() for x in text.splitlines()]
text = ' '.join(lines)
- node.replaceWith(BeautifulSoup.BeautifulSoup(text))
- soup = BeautifulSoup.BeautifulSoup(unicode(soup))
+ node.replaceWith(BeautifulSoup(text))
+ soup = BeautifulSoup(unicode(soup))
# replace all <br/> tag by newline character
for node in soup.findAll('br'):
node.replaceWith("\n")
- soup = BeautifulSoup.BeautifulSoup(unicode(soup))
+ soup = BeautifulSoup(unicode(soup))
return soup
def is_bold(self, node):
@@ -141,7 +140,7 @@ class XHTMLReader(PythReader):
Process a BeautifulSoup node and fill its elements into a pyth
base object.
"""
- if isinstance(node, BeautifulSoup.NavigableString):
+ if isinstance(node, NavigableString):
text = self.process_text(node)
if text:
obj.append(text)
diff --git a/tests/test_writelatex.py b/tests/test_writelatex.py
index 58a6ce8..8f2b90c 100644
--- a/tests/test_writelatex.py
+++ b/tests/test_writelatex.py
@@ -7,7 +7,6 @@ import subprocess
import tempfile
import os
import sys
-import BeautifulSoup
from pyth.plugins.latex.writer import LatexWriter
from pyth.plugins.python.reader import *
diff --git a/tests/test_writepdf.py b/tests/test_writepdf.py
index 9cddad7..8cc3258 100644
--- a/tests/test_writepdf.py
+++ b/tests/test_writepdf.py
@@ -7,7 +7,7 @@ import subprocess
import tempfile
import os
import sys
-import BeautifulSoup
+from bs4 import BeautifulSoup
from pyth.plugins.pdf.writer import PDFWriter
from pyth.plugins.python.reader import *
@@ -64,7 +64,7 @@ class TestWritePDF(unittest.TestCase):
doc = PythonReader.read([P[T(BOLD)[u"bold text"]]])
pdf = PDFWriter.write(doc).getvalue()
html = self.pdf_to_html(pdf)
- soup = BeautifulSoup.BeautifulSoup(html)
+ soup = BeautifulSoup(html)
node = soup.find("b")
assert node
assert node.string == "bold text"
@@ -73,7 +73,7 @@ class TestWritePDF(unittest.TestCase):
doc = PythonReader.read([P[T(ITALIC)[u"italic text"]]])
pdf = PDFWriter.write(doc).getvalue()
html = self.pdf_to_html(pdf)
- soup = BeautifulSoup.BeautifulSoup(html)
+ soup = BeautifulSoup(html)
node = soup.find("i")
assert node
assert node.string == "italic text"
|