1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
package org.jsoup.helper;
import org.jsoup.Jsoup;
import org.jsoup.integration.ParseTest;
import org.jsoup.nodes.Element;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import java.io.File;
import java.io.IOException;
import static org.jsoup.TextUtil.LE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class W3CDomTest {
@Test
public void simpleConversion() {
String html = "<html><head><title>W3c</title></head><body><p class='one' id=12>Text</p><!-- comment --><invalid>What<script>alert('!')";
org.jsoup.nodes.Document doc = Jsoup.parse(html);
W3CDom w3c = new W3CDom();
Document wDoc = w3c.fromJsoup(doc);
String out = w3c.asString(wDoc);
assertEquals(
"<html>" + LE +
"<head>" + LE +
"<META http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" + LE +
"<title>W3c</title>" + LE +
"</head>" + LE +
"<body>" + LE +
"<p class=\"one\" id=\"12\">Text</p>" + LE +
"<!-- comment -->" + LE +
"<invalid>What<script>alert('!')</script>" + LE +
"</invalid>" + LE +
"</body>" + LE +
"</html>" + LE
, out);
}
@Test
@org.junit.Ignore
public void convertsGoogle() throws IOException {
File in = ParseTest.getFile("/htmltests/google-ipod.html");
org.jsoup.nodes.Document doc = Jsoup.parse(in, "UTF8");
W3CDom w3c = new W3CDom();
Document wDoc = w3c.fromJsoup(doc);
Node htmlEl = wDoc.getChildNodes().item(0);
assertEquals(null, htmlEl.getNamespaceURI());
assertEquals("html", htmlEl.getLocalName());
assertEquals("html", htmlEl.getNodeName());
String out = w3c.asString(wDoc);
assertTrue(out.contains("ipod"));
}
@Test
public void namespacePreservation() throws IOException {
File in = ParseTest.getFile("/htmltests/namespaces.xhtml");
org.jsoup.nodes.Document jsoupDoc;
jsoupDoc = Jsoup.parse(in, "UTF-8");
Document doc;
org.jsoup.helper.W3CDom jDom = new org.jsoup.helper.W3CDom();
doc = jDom.fromJsoup(jsoupDoc);
Node htmlEl = doc.getChildNodes().item(0);
assertEquals("http://www.w3.org/1999/xhtml", htmlEl.getNamespaceURI());
assertEquals("html", htmlEl.getLocalName());
assertEquals("html", htmlEl.getNodeName());
Node epubTitle = htmlEl.getChildNodes().item(2).getChildNodes().item(3);
assertEquals("http://www.idpf.org/2007/ops", epubTitle.getNamespaceURI());
assertEquals("title", epubTitle.getLocalName());
assertEquals("epub:title", epubTitle.getNodeName());
Node xSection = epubTitle.getNextSibling().getNextSibling();
assertEquals("urn:test", xSection.getNamespaceURI());
assertEquals("section", xSection.getLocalName());
assertEquals("x:section", xSection.getNodeName());
}
@Test
public void handlesInvalidAttributeNames() {
String html = "<html><head></head><body style=\"color: red\" \" name\"></body></html>";
org.jsoup.nodes.Document jsoupDoc;
jsoupDoc = Jsoup.parse(html);
Element body = jsoupDoc.select("body").first();
assertTrue(body.hasAttr("\"")); // actually an attribute with key '"'. Correct per HTML5 spec, but w3c xml dom doesn't dig it
assertTrue(body.hasAttr("name\""));
Document w3Doc = new W3CDom().fromJsoup(jsoupDoc);
}
}
|