private String cleanWikiTest(String markup) throws IOException { StringWriter writer = new StringWriter(); HtmlDocumentBuilder builder = new HtmlDocumentBuilder(writer); builder.setEmitAsDocument(false); MarkupParser parser = new MarkupParser(new MediaWikiDialect()); parser.setBuilder(builder); parser.parse(markup); final String html = writer.toString(); final StringBuilder cleaned = new StringBuilder(); HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() { public void handleText(char[] data, int pos) { cleaned.append(new String(data)).append(' '); } }; new ParserDelegator().parse(new StringReader(html), callback, false); return cleaned.toString(); }
public void processFileHierarchy(String urlString) { try { System.out.println(urlString); URL url = new URL(urlString); InputStream in = url.openStream(); InputStreamReader reader = new InputStreamReader(in); // ParserCallback class to handle the href tags LinkHandler callback = new LinkHandler(); // key step to enable recursion callback.setCurrentPath(urlString); // parse the HTML document new ParserDelegator().parse(reader, callback, false); } catch (Exception e) { e.printStackTrace(); } }
public void testInsertAfterEnd_Specs2() throws Exception { htmlDoc.setParser(new ParserDelegator()); htmlDoc.setEditable(false); Element root = htmlDoc.getDefaultRootElement(); Element body = root.getElement(0); Element p = body.getElement(0); htmlDoc.insertAfterEnd(p, "<a>link</a>"); Marker insertMarker = htmlDoc.getInsertMarker(); assertEquals(new Integer(0), getInsertInfo(insertMarker).get(1)); ElementSpec[] specs = (ElementSpec[])(getInsertInfo(insertMarker).get(0)); insertMarker.reset(); assertEquals(2, specs.length); checkEndTagSpec(specs[0]); assertSpec(specs[1], ElementSpec.ContentType, ElementSpec.OriginateDirection, 0, "link".toCharArray()); AttributeSet specAttr = specs[1].getAttributes(); assertEquals(2, specAttr.getAttributeCount()); checkAttributes(specAttr, StyleConstants.NameAttribute, Tag.CONTENT); }
public void testInsertAfterEnd_Events() throws Exception { Element root = htmlDoc.getDefaultRootElement(); Element body = root.getElement(0); Element p = body.getElement(0); addElement(); htmlDoc.setParser(new ParserDelegator()); DocumentController listener = new DocumentController(); htmlDoc.addDocumentListener(listener); htmlDoc.insertAfterEnd(p, "<a>link</a><b>bold</b>"); assertEquals(1, listener.getNumEvents()); checkEvent(body, listener.getEvent(0), DocumentEvent.EventType.INSERT, 3, 8, 4); listener.reset(); htmlDoc.insertAfterEnd(body, "<a>link</a><b>bold</b>"); assertEquals(1, listener.getNumEvents()); checkEvent(root, listener.getEvent(0), DocumentEvent.EventType.INSERT, 3, 8, 12); }
public void testInsertAfterEnd_Exceptions() throws Exception { addElements(); Element root = htmlDoc.getDefaultRootElement(); Element body = root.getElement(0); Element p = body.getElement(0); Element leaf = p.getElement(0); try { htmlDoc.insertAfterEnd(leaf, "<a>link</a>"); fail("IllegalStateException should be thrown"); } catch (IllegalStateException e) { } htmlDoc.setParser(new ParserDelegator()); htmlDoc.insertAfterEnd(null, "<a>link</a>"); }
public void testInsertAfterStart_Specs2() throws Exception { htmlDoc.setParser(new ParserDelegator()); htmlDoc.setEditable(false); Element root = htmlDoc.getDefaultRootElement(); Element body = root.getElement(0); Element p = body.getElement(0); htmlDoc.insertAfterStart(p, "<a>link</a>"); Marker insertMarker = htmlDoc.getInsertMarker(); assertEquals(new Integer(0), getInsertInfo(insertMarker).get(1)); ElementSpec[] specs = (ElementSpec[])(getInsertInfo(insertMarker).get(0)); insertMarker.reset(); assertEquals(1, specs.length); assertSpec(specs[0], ElementSpec.ContentType, ElementSpec.OriginateDirection, 0, "link".toCharArray()); AttributeSet specAttr = specs[0].getAttributes(); assertEquals(2, specAttr.getAttributeCount()); checkAttributes(specAttr, StyleConstants.NameAttribute, Tag.CONTENT); }
public void testInsertAfterStart_Events() throws Exception { Element root = htmlDoc.getDefaultRootElement(); Element body = root.getElement(0); Element p = body.getElement(0); addElement(); htmlDoc.setParser(new ParserDelegator()); DocumentController listener = new DocumentController(); htmlDoc.addDocumentListener(listener); htmlDoc.insertAfterStart(p, "<a>link</a><b>bold</b>"); assertEquals(1, listener.getNumEvents()); checkEvent(p, listener.getEvent(0), DocumentEvent.EventType.INSERT, 3, 8, 0); listener.reset(); htmlDoc.insertAfterStart(body, "<a>link</a><b>bold</b>"); assertEquals(1, listener.getNumEvents()); if (!isHarmony()) { checkEvent(body, listener.getEvent(0), DocumentEvent.EventType.INSERT, 3, 8, 0); } else { checkEvent(body, listener.getEvent(0), DocumentEvent.EventType.INSERT, 2, 8, 0); } }
public void testInsertBeforeEnd_Specs2() throws Exception { htmlDoc.setParser(new ParserDelegator()); htmlDoc.setEditable(false); Element root = htmlDoc.getDefaultRootElement(); Element body = root.getElement(0); Element p = body.getElement(0); htmlDoc.insertBeforeEnd(p, "<a>link</a>"); Marker insertMarker = htmlDoc.getInsertMarker(); assertEquals(new Integer(0), getInsertInfo(insertMarker).get(1)); ElementSpec[] specs = (ElementSpec[])(getInsertInfo(insertMarker).get(0)); insertMarker.reset(); assertEquals(1, specs.length); assertSpec(specs[0], ElementSpec.ContentType, ElementSpec.OriginateDirection, 0, "link".toCharArray()); AttributeSet specAttr = specs[0].getAttributes(); assertEquals(2, specAttr.getAttributeCount()); checkAttributes(specAttr, StyleConstants.NameAttribute, Tag.CONTENT); }
public void testInsertBeforeEnd_Events() throws Exception { Element root = htmlDoc.getDefaultRootElement(); Element body = root.getElement(0); Element p = body.getElement(0); addElement(); htmlDoc.setParser(new ParserDelegator()); DocumentController listener = new DocumentController(); htmlDoc.addDocumentListener(listener); htmlDoc.insertBeforeEnd(p, "<a>link</a><b>bold</b>"); assertEquals(1, listener.getNumEvents()); checkEvent(p, listener.getEvent(0), DocumentEvent.EventType.INSERT, 3, 8, 4); listener.reset(); htmlDoc.insertBeforeEnd(body, "<a>link</a><b>bold</b>"); assertEquals(1, listener.getNumEvents()); checkEvent(body, listener.getEvent(0), DocumentEvent.EventType.INSERT, 2, 8, 13); }
public void testInsertBeforeStart_Specs2() throws Exception { htmlDoc.setParser(new ParserDelegator()); htmlDoc.setEditable(false); Element root = htmlDoc.getDefaultRootElement(); Element body = root.getElement(0); Element p = body.getElement(0); htmlDoc.insertBeforeStart(p, "<a>link</a>"); Marker insertMarker = htmlDoc.getInsertMarker(); assertEquals(new Integer(0), getInsertInfo(insertMarker).get(1)); ElementSpec[] specs = (ElementSpec[])(getInsertInfo(insertMarker).get(0)); insertMarker.reset(); assertEquals(2, specs.length); checkEndTagSpec(specs[0]); assertSpec(specs[1], ElementSpec.ContentType, ElementSpec.OriginateDirection, 0, "link".toCharArray()); AttributeSet specAttr = specs[1].getAttributes(); assertEquals(2, specAttr.getAttributeCount()); checkAttributes(specAttr, StyleConstants.NameAttribute, Tag.CONTENT); }
public void testInsertBeforeStart_Events() throws Exception { Element root = htmlDoc.getDefaultRootElement(); Element branch1 = root.getElement(0); Element branch2 = branch1.getElement(0); addElement(); htmlDoc.setParser(new ParserDelegator()); DocumentController listener = new DocumentController(); htmlDoc.addDocumentListener(listener); htmlDoc.insertBeforeStart(branch2, "<a>link</a><b>bold</b>"); assertEquals(1, listener.getNumEvents()); if (!isHarmony()) { checkEvent(branch1, listener.getEvent(0), DocumentEvent.EventType.INSERT, 3, 8, 0); } else { checkEvent(branch1, listener.getEvent(0), DocumentEvent.EventType.INSERT, 2, 8, 0); } listener.reset(); htmlDoc.insertBeforeStart(branch1, "<a>link</a><b>bold</b>"); assertEquals(1, listener.getNumEvents()); if (!isHarmony()) { checkEvent(root, listener.getEvent(0), DocumentEvent.EventType.INSERT, 3, 8, 0); } else { checkEvent(root, listener.getEvent(0), DocumentEvent.EventType.INSERT, 2, 8, 0); } }
public void testInsertBeforeStart_Exceptions() throws Exception { addElements(); Element root = htmlDoc.getDefaultRootElement(); Element branch1 = root.getElement(0); Element branch2 = branch1.getElement(0); Element branch3 = branch2.getElement(0); try { htmlDoc.insertBeforeStart(branch3, "<a>link</a>"); fail("IllegalStateException should be thrown"); } catch (IllegalStateException e) { } htmlDoc.setParser(new ParserDelegator()); htmlDoc.insertBeforeStart(null, "<a>link</a>"); }
public void testSetInnerHTML_Specs2() throws Exception { htmlDoc.setParser(new ParserDelegator()); htmlDoc.setEditable(false); Element root = htmlDoc.getDefaultRootElement(); Element body = root.getElement(0); Element p = body.getElement(0); htmlDoc.setInnerHTML(p, "<a>link</a>"); Marker insertMarker = htmlDoc.getInsertMarker(); assertEquals(new Integer(0), getInsertInfo(insertMarker).get(1)); ElementSpec[] specs = (ElementSpec[])(getInsertInfo(insertMarker).get(0)); insertMarker.reset(); assertEquals(4, specs.length); assertSpec(specs[0], ElementSpec.ContentType, ElementSpec.OriginateDirection, 0, "link".toCharArray()); AttributeSet specAttr = specs[0].getAttributes(); assertEquals(2, specAttr.getAttributeCount()); checkAttributes(specAttr, StyleConstants.NameAttribute, Tag.CONTENT); assertSpec(specs[1], ElementSpec.ContentType, ElementSpec.OriginateDirection, 0, new char[]{'\n'}); checkEndTagSpec(specs[2]); checkEndTagSpec(specs[3]); }
public void testSetInnerHTML_Events() throws Exception { addElements(); Element root = htmlDoc.getDefaultRootElement(); Element branch1 = root.getElement(0); final Element branch2 = branch1.getElement(0); Element branch3 = branch2.getElement(0); htmlDoc.setParser(new ParserDelegator()); DocumentController listener = new DocumentController(); htmlDoc.addDocumentListener(listener); htmlDoc.setInnerHTML(branch2, "<a>link</a><b>bold</b>"); assertEquals(2, listener.getNumEvents()); checkEvent(branch2, listener.getEvent(0), DocumentEvent.EventType.INSERT, 4, 9, 0); checkEvent(branch2, listener.getEvent(1), DocumentEvent.EventType.REMOVE, 4, 14, 8); listener.reset(); htmlDoc.setInnerHTML(branch1, "<a>link</a><b>bold</b>"); assertEquals(2, listener.getNumEvents()); checkEvent(branch1, listener.getEvent(0), DocumentEvent.EventType.INSERT, 4, 9, 0); checkEvent(branch1, listener.getEvent(1), DocumentEvent.EventType.REMOVE, 2, 10, 8); }
public void testSetInnerHTML_Structure() throws Exception { addElements(); Element root = htmlDoc.getDefaultRootElement(); Element branch1 = root.getElement(0); Element branch2 = branch1.getElement(0); Element branch3 = branch2.getElement(0); htmlDoc.setParser(new ParserDelegator()); assertEquals(4, branch2.getElementCount()); htmlDoc.setInnerHTML(branch2, "<a>link</a><b>bold</b>"); assertEquals("linkbold", htmlDoc.getText(0, htmlDoc.getLength())); assertEquals(3, branch2.getElementCount()); htmlDoc.setInnerHTML(branch1, "<a>link</a><b>bold</b>"); assertEquals("linkbold", htmlDoc.getText(0, htmlDoc.getLength())); assertEquals(3, branch1.getElementCount()); htmlDoc.setInnerHTML(root, "<a>link</a><b>bold</b>"); assertEquals("linkbold", htmlDoc.getText(0, htmlDoc.getLength())); assertEquals(3, root.getElementCount()); }
public void testSetOuterHTML_Specs2() throws Exception { htmlDoc.setParser(new ParserDelegator()); Element root = htmlDoc.getDefaultRootElement(); Element body = root.getElement(0); Element p = body.getElement(0); htmlDoc.setOuterHTML(p, "<a>link</a>"); Marker insertMarker = htmlDoc.getInsertMarker(); assertEquals(new Integer(0), getInsertInfo(insertMarker).get(1)); ElementSpec[] specs = (ElementSpec[])(getInsertInfo(insertMarker).get(0)); insertMarker.reset(); assertEquals(4, specs.length); checkEndTagSpec(specs[0]); assertSpec(specs[1], ElementSpec.ContentType, ElementSpec.OriginateDirection, 0, "link".toCharArray()); AttributeSet specAttr = specs[1].getAttributes(); assertEquals(2, specAttr.getAttributeCount()); checkAttributes(specAttr, StyleConstants.NameAttribute, Tag.CONTENT); assertSpec(specs[2], ElementSpec.ContentType, ElementSpec.OriginateDirection, 0, new char[]{'\n'}); checkEndTagSpec(specs[3]); }
public void testSetOuterHTML_Structure() throws Exception { addElements(); Element root = htmlDoc.getDefaultRootElement(); Element branch1 = root.getElement(0); Element branch2 = branch1.getElement(0); htmlDoc.setParser(new ParserDelegator()); assertEquals(1, branch1.getElementCount()); htmlDoc.setOuterHTML(branch2, "<a>link</a><b>bold</b>"); assertEquals("linkbold", htmlDoc.getText(0, htmlDoc.getLength())); assertEquals(3, branch1.getElementCount()); htmlDoc.setOuterHTML(branch1, "<a>link</a><b>bold</b>"); assertEquals("linkbold", htmlDoc.getText(0, htmlDoc.getLength())); assertEquals(3, root.getElementCount()); htmlDoc.setOuterHTML(root, "<a>link</a><b>bold</b>"); assertEquals("linkbold", htmlDoc.getText(0, htmlDoc.getLength())); assertEquals(3, htmlDoc.getDefaultRootElement().getElementCount()); }
public void testSetOuterHTML_Exceptions() throws Exception { addElements(); Element root = htmlDoc.getDefaultRootElement(); Element branch1 = root.getElement(0); Element branch2 = branch1.getElement(0); Element branch3 = branch2.getElement(0); try { htmlDoc.setOuterHTML(branch3, "<a>link</a>"); fail("IllegalStateException should be thrown"); } catch (IllegalStateException e) { } htmlDoc.setParser(new ParserDelegator()); htmlDoc.setOuterHTML(null, "<a>link</a>"); }
public String verify(String html, String trace) throws Exception { out.setLength(0); HTMLEditorKit.ParserCallback callback = this; ParserDelegator delegator = new ParserDelegator(); delegator.parse(new StringReader(html), callback, true); String ou = out.toString(); if (trace != null) { if (!ou.equals(trace)) { System.err.println("Unable to parse '" + html + "':"); System.err.println(" expected: '" + trace + "',"); System.out.println(" returned: '" + ou + "'."); throw new Exception("'" + html + "' -> '" + ou + "' expected '" + trace + "'" ); } } return ou; }