@Override public List<HTMLNode> getChildren() { if (children != null) return children; children = new ArrayList<HTMLNode>(); for (Node node : target.childNodes()) { HTMLNode htmlNode = null; if (node instanceof Comment) { htmlNode = new HTMLCommentImpl((Comment) node, this); } else if (node instanceof Element) { htmlNode = new HTMLElementImpl<Element>((Element) node, this); } else { htmlNode = new HTMLNodeImpl<Node>( node, this); } children.add(htmlNode); } return children; }
private void print2(String baseLocation) throws IOException, TransformerException, ParserConfigurationException { Document document = Jsoup.connect(baseLocation).get(); Elements content = document.getElementsByAttributeValue("class", "entry-content"); String title = null; ArrayList<String> list = new ArrayList<>(); for (Element div : content) { List<Node> nodes = div.childNodes(); for (Node node : nodes) { if (node instanceof Element) { if (((Element) node).tagName().equals("h3")) { writeFile(title, list); list.clear(); System.out.println("Title: " + node.childNode(0)); title = node.childNode(0).toString(); } else if (((Element) node).tagName().equals("table")) { //print table Elements tr = ((Element) node).getElementsByTag("tr"); for (Element element : tr) { Elements td = element.getElementsByTag("td"); for (Element value : td) { if (value.childNodeSize() > 0) { if (!(value.childNode(0) instanceof Comment)) { // System.out.println("Emoticon: " + value.childNode(0) + " " + value.childNode(0).getClass().getSimpleName()); list.add(value.childNode(0).toString()); } } } } } } } } }
@Override public boolean matches(Element root, Element element) { List<Node> family = element.childNodes(); for (Node n : family) { if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType)) return false; } return true; }
@Override public boolean matches(Element root, Element element) { List<Node> family = element.childNodes(); for (int i = 0; i < family.size(); i++) { Node n = family.get(i); if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType)) return false; } return true; }
@Test public void parsesComments() { String html = "<html><head></head><body><img src=foo><!-- <table><tr><td></table> --><p>Hello</p></body></html>"; Document doc = Jsoup.parse(html); Element body = doc.body(); Comment comment = (Comment) body.childNode(1); // comment should not be sub of img, as it's an empty tag assertEquals(" <table><tr><td></table> ", comment.getData()); Element p = body.child(1); TextNode text = (TextNode) p.childNode(0); assertEquals("Hello", text.getWholeText()); }
@Override public boolean isComment() { if (node instanceof Comment) { return true; } return false; }
@Override public void visit(Node node) { if (node instanceof TextNode || node instanceof Comment || node instanceof DataNode) { node.replaceWith(new TextNode(StringUtils.EMPTY, node.baseUri())); } }
void insert(Token.Comment commentToken) { Comment comment = new Comment(commentToken.getData()); insertNode(comment); }
HTMLCommentImpl(Comment target, HTMLElement parent) { super(target, parent); }