@Test public void testClone() { Document doc = Jsoup.parse("<div><p>One<p><span>Two</div>"); Element p = doc.select("p").get(1); Element clone = p.clone(); assertNull(clone.parent()); // should be orphaned assertEquals(0, clone.siblingIndex); assertEquals(1, p.siblingIndex); assertNotNull(p.parent()); clone.append("<span>Three"); assertEquals("<p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(clone.outerHtml())); assertEquals("<div><p>One</p><p><span>Two</span></p></div>", TextUtil.stripNewlines(doc.body().html())); // not modified doc.body().appendChild(clone); // adopt assertNotNull(clone.parent()); assertEquals("<div><p>One</p><p><span>Two</span></p></div><p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(doc.body().html())); }
@Ignore @Test public void testSupplyParserToConnection() throws IOException { String xmlUrl = "http://direct.infohound.net/tools/jsoup-xml-test.xml"; // parse with both xml and html parser, ensure different Document xmlDoc = Jsoup.connect(xmlUrl).parser(Parser.xmlParser()).get(); Document htmlDoc = Jsoup.connect(xmlUrl).parser(Parser.htmlParser()).get(); Document autoXmlDoc = Jsoup.connect(xmlUrl).get(); // check connection auto detects xml, uses xml parser assertEquals("<doc><val>One<val>Two</val>Three</val></doc>", TextUtil.stripNewlines(xmlDoc.html())); assertFalse(htmlDoc.equals(xmlDoc)); assertEquals(xmlDoc, autoXmlDoc); assertEquals(1, htmlDoc.select("head").size()); // html parser normalises assertEquals(0, xmlDoc.select("head").size()); // xml parser does not assertEquals(0, autoXmlDoc.select("head").size()); // xml parser does not }
@Test public void testTextBean() { Document doc = Jsoup.parse("<p>One <span>two &</span> three &</p>"); Element p = doc.select("p").first(); Element span = doc.select("span").first(); assertEquals("two &", span.text()); TextNode spanText = (TextNode) span.childNode(0); assertEquals("two &", spanText.text()); TextNode tn = (TextNode) p.childNode(2); assertEquals(" three &", tn.text()); tn.text(" POW!"); assertEquals("One <span>two &</span> POW!", TextUtil.stripNewlines(p.html())); tn.attr("text", "kablam &"); assertEquals("kablam &", tn.text()); assertEquals("One <span>two &</span>kablam &", TextUtil.stripNewlines(p.html())); }
@Test public void testRemoveAfterIndex() { Document doc2 = Jsoup.parse( "<html><body><div><p>before1</p><p>before2</p><p>XXX</p><p>after1</p><p>after2</p></div></body></html>", ""); Element body = doc2.select("body").first(); Elements elems = body.select("p:matchesOwn(XXX)"); Element xElem = elems.first(); Elements afterX = xElem.parent().getElementsByIndexGreaterThan(xElem.elementSiblingIndex()); for(Element p : afterX) { p.remove(); } assertEquals("<body><div><p>before1</p><p>before2</p><p>XXX</p></div></body>", TextUtil.stripNewlines(body.outerHtml())); }
@Test public void testRemoveBeforeIndex() { Document doc = Jsoup.parse( "<html><body><div><p>before1</p><p>before2</p><p>XXX</p><p>after1</p><p>after2</p></div></body></html>", ""); Element body = doc.select("body").first(); Elements elems = body.select("p:matchesOwn(XXX)"); Element xElem = elems.first(); Elements beforeX = xElem.parent().getElementsByIndexLessThan(xElem.elementSiblingIndex()); for(Element p : beforeX) { p.remove(); } assertEquals("<body><div><p>XXX</p><p>after1</p><p>after2</p></div></body>", TextUtil.stripNewlines(body.outerHtml())); }
@Test public void basicBehaviourTest() { String h = "<div><p><a href='javascript:sendAllMoney()'>Dodgy</a> <A HREF='HTTP://nice.com'>Nice</a></p><blockquote>Hello</blockquote>"; String cleanHtml = Jsoup.clean(h, Whitelist.basic()); assertEquals("<p><a rel=\"nofollow\">Dodgy</a> <a href=\"http://nice.com\" rel=\"nofollow\">Nice</a></p><blockquote>Hello</blockquote>", TextUtil.stripNewlines(cleanHtml)); }
@Test public void testRemoveEnforcedAttributes() { String h = "<div><p><A HREF='HTTP://nice.com'>Nice</a></p><blockquote>Hello</blockquote>"; String cleanHtml = Jsoup.clean(h, Whitelist.basic().removeEnforcedAttribute("a", "rel")); assertEquals("<p><a href=\"http://nice.com\">Nice</a></p><blockquote>Hello</blockquote>", TextUtil.stripNewlines(cleanHtml)); }
@Test public void discardsNakedTds() { // jsoup used to make this into an implicit table; but browsers make it into a text run String h = "<td>Hello<td><p>There<p>now"; Document doc = Jsoup.parse(h); assertEquals("Hello<p>There</p><p>now</p>", TextUtil.stripNewlines(doc.body().html())); // <tbody> is introduced if no implicitly creating table, but allows tr to be directly under table }
@Test public void handlesBlocksInDefinitions() { // per the spec, dt and dd are inline, but in practise are block String h = "<dl><dt><div id=1>Term</div></dt><dd><div id=2>Def</div></dd></dl>"; Document doc = Jsoup.parse(h); assertEquals("dt", doc.select("#1").first().parent().tagName()); assertEquals("dd", doc.select("#2").first().parent().tagName()); assertEquals("<dl><dt><div id=\"1\">Term</div></dt><dd><div id=\"2\">Def</div></dd></dl>", TextUtil.stripNewlines(doc.body().html())); }
@Test public void handlesFrames() { String h = "<html><head><script></script><noscript></noscript></head><frameset><frame src=foo></frame><frame src=foo></frameset></html>"; Document doc = Jsoup.parse(h); assertEquals("<html><head><script></script><noscript></noscript></head><frameset><frame src=\"foo\"><frame src=\"foo\"></frameset></html>", TextUtil.stripNewlines(doc.html())); // no body auto vivification }
@Test public void handlesUnexpectedMarkupInTables() { // whatwg - tests markers in active formatting (if they didn't work, would get in in table) // also tests foster parenting String h = "<table><b><tr><td>aaa</td></tr>bbb</table>ccc"; Document doc = Jsoup.parse(h); assertEquals("<b></b><b>bbb</b><table><tbody><tr><td>aaa</td></tr></tbody></table><b>ccc</b>", TextUtil.stripNewlines(doc.body().html())); }
@Test public void associatedFormControlsWithDisjointForms() { // form gets closed, isn't parent of controls String html = "<table><tr><form><input type=hidden id=1><td><input type=text id=2></td><tr></table>"; Document doc = Jsoup.parse(html); Element el = doc.select("form").first(); assertTrue("Is form element", el instanceof FormElement); FormElement form = (FormElement) el; Elements controls = form.elements(); assertEquals(2, controls.size()); assertEquals("1", controls.get(0).id()); assertEquals("2", controls.get(1).id()); assertEquals("<table><tbody><tr><form></form><input type=\"hidden\" id=\"1\"><td><input type=\"text\" id=\"2\"></td></tr><tr></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html())); }
@Test public void testSimpleXmlParse() { String xml = "<doc id=2 href='/bar'>Foo <br /><link>One</link><link>Two</link></doc>"; XmlTreeBuilder tb = new XmlTreeBuilder(); Document doc = tb.parse(xml, "http://foo.com/"); assertEquals("<doc id=\"2\" href=\"/bar\">Foo <br /><link>One</link><link>Two</link></doc>", TextUtil.stripNewlines(doc.html())); assertEquals(doc.getElementById("2").absUrl("href"), "http://foo.com/bar"); }
@Test public void testPopToClose() { // test: </val> closes Two, </bar> ignored String xml = "<doc><val>One<val>Two</val></bar>Three</doc>"; XmlTreeBuilder tb = new XmlTreeBuilder(); Document doc = tb.parse(xml, "http://foo.com/"); assertEquals("<doc><val>One<val>Two</val>Three</val></doc>", TextUtil.stripNewlines(doc.html())); }
@Test public void testCommentAndDocType() { String xml = "<!DOCTYPE html><!-- a comment -->One <qux />Two"; XmlTreeBuilder tb = new XmlTreeBuilder(); Document doc = tb.parse(xml, "http://foo.com/"); assertEquals("<!DOCTYPE html><!-- a comment -->One <qux />Two", TextUtil.stripNewlines(doc.html())); }
@Test public void testSupplyParserToJsoupClass() { String xml = "<doc><val>One<val>Two</val></bar>Three</doc>"; Document doc = Jsoup.parse(xml, "http://foo.com/", Parser.xmlParser()); assertEquals("<doc><val>One<val>Two</val>Three</val></doc>", TextUtil.stripNewlines(doc.html())); }
@Test public void testSupplyParserToDataStream() throws IOException, URISyntaxException { File xmlFile = new File(XmlTreeBuilder.class.getResource("/htmltests/xml-test.xml").toURI()); InputStream inStream = new FileInputStream(xmlFile); Document doc = Jsoup.parse(inStream, null, "http://foo.com", Parser.xmlParser()); assertEquals("<doc><val>One<val>Two</val>Three</val></doc>", TextUtil.stripNewlines(doc.html())); }
@Test public void testDetectCharsetEncodingDeclaration() throws IOException, URISyntaxException { File xmlFile = new File(XmlTreeBuilder.class.getResource("/htmltests/xml-charset.xml").toURI()); InputStream inStream = new FileInputStream(xmlFile); Document doc = Jsoup.parse(inStream, null, "http://example.com/", Parser.xmlParser()); assertEquals("ISO-8859-1", doc.charset().name()); assertEquals("<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?> <data>äöåéü</data>", TextUtil.stripNewlines(doc.html())); }
@Test public void setHtml() { Document doc = Jsoup.parse("<p>One</p><p>Two</p><p>Three</p>"); Elements ps = doc.select("p"); ps.prepend("<b>Bold</b>").append("<i>Ital</i>"); assertEquals("<p><b>Bold</b>Two<i>Ital</i></p>", TextUtil.stripNewlines(ps.get(1).outerHtml())); ps.html("<span>Gone</span>"); assertEquals("<p><span>Gone</span></p>", TextUtil.stripNewlines(ps.get(1).outerHtml())); }
@Test public void wrapDiv() { String h = "<p><b>This</b> is <b>jsoup</b>.</p> <p>How do you like it?</p>"; Document doc = Jsoup.parse(h); doc.select("p").wrap("<div></div>"); assertEquals("<div><p><b>This</b> is <b>jsoup</b>.</p></div> <div><p>How do you like it?</p></div>", TextUtil.stripNewlines(doc.body().html())); }
@Test public void testClone() { Document doc = Jsoup.parse("<title>Hello</title> <p>One<p>Two"); Document clone = doc.clone(); assertEquals("<html><head><title>Hello</title> </head><body><p>One</p><p>Two</p></body></html>", TextUtil.stripNewlines(clone.html())); clone.title("Hello there"); clone.select("p").first().text("One more").attr("id", "1"); assertEquals("<html><head><title>Hello there</title> </head><body><p id=\"1\">One more</p><p>Two</p></body></html>", TextUtil.stripNewlines(clone.html())); assertEquals("<html><head><title>Hello</title> </head><body><p>One</p><p>Two</p></body></html>", TextUtil.stripNewlines(doc.html())); }
@Test public void testClonesDeclarations() { Document doc = Jsoup.parse("<!DOCTYPE html><html><head><title>Doctype test"); Document clone = doc.clone(); assertEquals(doc.html(), clone.html()); assertEquals("<!doctype html><html><head><title>Doctype test</title></head><body></body></html>", TextUtil.stripNewlines(clone.html())); }
@Test public void testAddNewElement() { Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); Element div = doc.getElementById("1"); div.appendElement("p").text("there"); div.appendElement("P").attr("class", "second").text("now"); assertEquals("<html><head></head><body><div id=\"1\"><p>Hello</p><p>there</p><p class=\"second\">now</p></div></body></html>", TextUtil.stripNewlines(doc.html())); // check sibling index (with short circuit on reindexChildren): Elements ps = doc.select("p"); for (int i = 0; i < ps.size(); i++) { assertEquals(i, ps.get(i).siblingIndex); } }
@Test public void testAppendRowToTable() { Document doc = Jsoup.parse("<table><tr><td>1</td></tr></table>"); Element table = doc.select("tbody").first(); table.append("<tr><td>2</td></tr>"); assertEquals("<table><tbody><tr><td>1</td></tr><tr><td>2</td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html())); }
@Test public void testPrependRowToTable() { Document doc = Jsoup.parse("<table><tr><td>1</td></tr></table>"); Element table = doc.select("tbody").first(); table.prepend("<tr><td>2</td></tr>"); assertEquals("<table><tbody><tr><td>2</td></tr><tr><td>1</td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html())); // check sibling index (reindexChildren): Elements ps = doc.select("tr"); for (int i = 0; i < ps.size(); i++) { assertEquals(i, ps.get(i).siblingIndex); } }
@Test public void testPrependText() { Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); Element div = doc.getElementById("1"); div.prependText("there & now > "); assertEquals("there & now > Hello", div.text()); assertEquals("there & now > <p>Hello</p>", TextUtil.stripNewlines(div.html())); }
@Test public void testAddNewHtml() { Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); Element div = doc.getElementById("1"); div.append("<p>there</p><p>now</p>"); assertEquals("<p>Hello</p><p>there</p><p>now</p>", TextUtil.stripNewlines(div.html())); // check sibling index (no reindexChildren): Elements ps = doc.select("p"); for (int i = 0; i < ps.size(); i++) { assertEquals(i, ps.get(i).siblingIndex); } }
@Test public void testPrependNewHtml() { Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); Element div = doc.getElementById("1"); div.prepend("<p>there</p><p>now</p>"); assertEquals("<p>there</p><p>now</p><p>Hello</p>", TextUtil.stripNewlines(div.html())); // check sibling index (reindexChildren): Elements ps = doc.select("p"); for (int i = 0; i < ps.size(); i++) { assertEquals(i, ps.get(i).siblingIndex); } }
@Test public void testWrap() { Document doc = Jsoup.parse("<div><p>Hello</p><p>There</p></div>"); Element p = doc.select("p").first(); p.wrap("<div class='head'></div>"); assertEquals("<div><div class=\"head\"><p>Hello</p></div><p>There</p></div>", TextUtil.stripNewlines(doc.body().html())); Element ret = p.wrap("<div><div class=foo></div><p>What?</p></div>"); assertEquals("<div><div class=\"head\"><div><div class=\"foo\"><p>Hello</p></div><p>What?</p></div></div><p>There</p></div>", TextUtil.stripNewlines(doc.body().html())); assertEquals(ret, p); }
@Test public void before() { Document doc = Jsoup.parse("<div><p>Hello</p><p>There</p></div>"); Element p1 = doc.select("p").first(); p1.before("<div>one</div><div>two</div>"); assertEquals("<div><div>one</div><div>two</div><p>Hello</p><p>There</p></div>", TextUtil.stripNewlines(doc.body().html())); doc.select("p").last().before("<p>Three</p><!-- four -->"); assertEquals("<div><div>one</div><div>two</div><p>Hello</p><p>Three</p><!-- four --><p>There</p></div>", TextUtil.stripNewlines(doc.body().html())); }
@Test public void after() { Document doc = Jsoup.parse("<div><p>Hello</p><p>There</p></div>"); Element p1 = doc.select("p").first(); p1.after("<div>one</div><div>two</div>"); assertEquals("<div><p>Hello</p><div>one</div><div>two</div><p>There</p></div>", TextUtil.stripNewlines(doc.body().html())); doc.select("p").last().after("<p>Three</p><!-- four -->"); assertEquals("<div><p>Hello</p><div>one</div><div>two</div><p>There</p><p>Three</p><!-- four --></div>", TextUtil.stripNewlines(doc.body().html())); }
@Test public void insertChildrenAsCopy() { Document doc = Jsoup.parse("<div id=1>Text <p>One</p> Text <p>Two</p></div><div id=2></div>"); Element div1 = doc.select("div").get(0); Element div2 = doc.select("div").get(1); Elements ps = doc.select("p").clone(); ps.first().text("One cloned"); div2.insertChildren(-1, ps); assertEquals(4, div1.childNodeSize()); // not moved -- cloned assertEquals(2, div2.childNodeSize()); assertEquals("<div id=\"1\">Text <p>One</p> Text <p>Two</p></div><div id=\"2\"><p>One cloned</p><p>Two</p></div>", TextUtil.stripNewlines(doc.body().html())); }
@Test public void testSplitAnEmbolden() { Document doc = Jsoup.parse("<div>Hello there</div>"); Element div = doc.select("div").first(); TextNode tn = (TextNode) div.childNode(0); TextNode tail = tn.splitText(6); tail.wrap("<b></b>"); assertEquals("Hello <b>there</b>", TextUtil.stripNewlines(div.html())); // not great that we get \n<b>there there... must correct }
@Test public void testRemove() { Document doc = Jsoup.parse("<p>One <span>two</span> three</p>"); Element p = doc.select("p").first(); p.childNode(0).remove(); assertEquals("two three", p.text()); assertEquals("<span>two</span> three", TextUtil.stripNewlines(p.html())); }
@Test public void unwrap() { Document doc = Jsoup.parse("<div>One <span>Two <b>Three</b></span> Four</div>"); Element span = doc.select("span").first(); Node twoText = span.childNode(0); Node node = span.unwrap(); assertEquals("<div>One Two <b>Three</b> Four</div>", TextUtil.stripNewlines(doc.body().html())); assertTrue(node instanceof TextNode); assertEquals("Two ", ((TextNode) node).text()); assertEquals(node, twoText); assertEquals(node.parent(), doc.select("div").first()); }
@Test public void unwrapNoChildren() { Document doc = Jsoup.parse("<div>One <span></span> Two</div>"); Element span = doc.select("span").first(); Node node = span.unwrap(); assertEquals("<div>One Two</div>", TextUtil.stripNewlines(doc.body().html())); assertTrue(node == null); }