public static void main(String[] args) { String d = "<span><div>test</div></span>"; Document doc = Jsoup.parse(d); Element div = doc.select("div").first(); // <div></div> div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div> div.prepend("<p>First</p>"); div.append("<p>Last</p>"); // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div> div.appendElement(d); Element span = doc.select("span").first(); // <span>One</span> span.wrap("<li><a href='http://example.com/'></a></li>"); // now: <li><a href="http://example.com"><span>One</span></a></li> System.out.println(doc.html()); String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false)); System.out.println(s); }
/** * Jsoup.parse(in, charsetName, baseUri) */ @Override public Document handle( InputStream input) throws IOException{ //获取Jsoup参数 String charsetName = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_CHARSETNAME, Docx4jConstants.DEFAULT_CHARSETNAME ); String baseUri = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_BASEURI,""); //使用Jsoup将html转换成Document对象 Document doc = Jsoup.parse(input, charsetName, baseUri); OutputSettings outputSettings = new OutputSettings(); outputSettings.prettyPrint(false); /* outputSettings.syntax(syntax) outputSettings.charset(charset) outputSettings*/ doc.outputSettings(outputSettings); //返回Document对象 return doc; }
private String getJavadocCommentAsText(IMember member) { try (Reader reader = JavadocContentAccess.getHTMLContentReader(member, true, true)) { if (reader == null) { return null; } String javadocAsHtml = CharStreams.toString(reader); String javadocAsString = Jsoup.clean(javadocAsHtml, "", Whitelist.none(), new OutputSettings().prettyPrint(false)); // trim lines try (BufferedReader bufferedReader = new BufferedReader(new StringReader(javadocAsString))) { return bufferedReader.lines().map(line->line.trim()).collect(Collectors.joining("\n")); } } catch (JavaModelException | IOException e) { return null; } }
@Test public void testHtmlAndXmlSyntax() { String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'><>&"<foo />bar"; Document doc = Jsoup.parse(h); doc.outputSettings().syntax(Syntax.html); assertEquals("<!doctype html>\n" + "<html>\n" + " <head></head>\n" + " <body>\n" + " <img async checked src=\"&<>"\"><>&\"\n" + " <foo />bar\n" + " </body>\n" + "</html>", doc.html()); doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); assertEquals("<!DOCTYPE html>\n" + "<html>\n" + " <head></head>\n" + " <body>\n" + " <img async=\"\" checked=\"checked\" src=\"&<>"\" /><>&\"\n" + " <foo />bar\n" + " </body>\n" + "</html>", doc.html()); }
@Test public void testHtmlAppendable() { String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>"; Document document = Jsoup.parse(htmlContent); OutputSettings outputSettings = new OutputSettings(); outputSettings.prettyPrint(false); document.outputSettings(outputSettings); assertEquals(htmlContent, document.html(new StringWriter()).toString()); }
public static void main( String[] args ) { // load html from file Document doc = loadHtmlFromFile("index.html", "utf-8"); if(doc == null) { LogUtils.d(CLS_NAME, "main", "Document is null"); return; } /* BEFORE modification */ System.out.println("===== BEFORE ====="); System.out.println(doc.html()); /* add meta charset utf-8 */ Element tagMetaCharset = new Element(Tag.valueOf("meta"), ""); tagMetaCharset.attr("charset", "utf-8"); // add meta tag to head doc.head().appendChild(tagMetaCharset); /* create tag <p> for description */ Element tagPDescription = new Element(Tag.valueOf("p"), ""); tagPDescription.text("It is a very powerful HTML parser! I love it so much..."); // add to body doc.body().appendChild(tagPDescription); /* create tag <p> for author */ tagPDescription.before("<p>Author: Johnathan Hedley</p>"); /* add attribute to tag <p> author */ Element tagPAuthor = doc.body().select("p:contains(Author)").first(); tagPAuthor.attr("align", "center"); /* body add class */ doc.body().addClass("content"); doc.body().addClass("content"); doc.body().addClass("content2"); /* output the final HTML */ OutputSettings settings = new OutputSettings(); settings.indentAmount(8); doc.outputSettings(settings); System.out.println("===== AFTER ====="); System.out.println(doc.html()); }
@Override public String sanitize(String input, String baseUri, UserInputSanitizerPolicy policy) { Whitelist whitelist = getWhitelist(policy); OutputSettings outputSettings = new OutputSettings().prettyPrint(false); return Jsoup.clean(input, baseUri, whitelist, outputSettings); }
@Override protected boolean apply(DataProfil profil) { List<DataTexte> dataTextes = getAllElements(DataTexte.class, profil); for(DataTexte dataTexte : dataTextes) { org.jsoup.nodes.Document doc = Jsoup.parse(dataTexte.getContenuHTML()); Elements spanTP = doc.select("span."+JLabelTP.JLABEL_TP); for(Element spanElt : spanTP) { String spanID = spanElt.attr("id"); BufferedImage oldImage = dataTexte.putImage(spanID, null); int width = oldImage.getWidth(), height = oldImage.getHeight(); // Get a DOMImplementation. DOMImplementation domImpl = GenericDOMImplementation.getDOMImplementation(); // Create an instance of org.w3c.dom.Document. String svgNS = "http://www.w3.org/2000/svg"; Document document = domImpl.createDocument(svgNS, "svg", null); // Create an instance of the SVG Generator. SVGGraphics2D svgGenerator = new SVGGraphics2D(document); svgGenerator.setSVGCanvasSize(new Dimension(width, height)); svgGenerator.drawImage(oldImage, 0, 0, null); Element svgElement; try (StringWriter w = new StringWriter()) { svgGenerator.stream(w,true); svgElement = Jsoup.parse(w.toString()).outputSettings(new OutputSettings().prettyPrint(false)).select("svg").first(); } catch (IOException ex) { Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, null, ex); svgElement = Jsoup.parse("<svg></svg>").select("svg").first(); } Element imgElt = spanElt.select("img").first(); String id = imgElt.attr("id"), title = imgElt.attr("title"); svgElement.attr("id", id).attr("width",width+"").attr("height",height+"").attr("title", title); String svg = svgElement.outerHtml(); spanElt.html(svg); } dataTexte.setContenuHTML(doc.html()); } return true; }
private List<Mail> getMails(Message messages[]) throws MessagingException, IOException { List<Mail> mails = new ArrayList<Mail>(); for (int i = 0; i < messages.length; i++) { Message msg = messages[i]; Address[] fromAddress = msg.getFrom(); Mail mail = new Mail(); mail.setId(msg.getMessageNumber()); mail.setFrom(fromAddress[0].toString()); mail.setSubject(msg.getSubject()); mail.getToList().addAll(Arrays.asList(parseAddresses(msg.getRecipients(RecipientType.TO)))); mail.getCcList().addAll(Arrays.asList(parseAddresses(msg.getRecipients(RecipientType.CC)))); mail.setSendDate(msg.getSentDate().toString()); mail.setFlags(getFlags(msg)); String messageContent = ""; Object msgContent = msg.getContent(); /* Check if content is pure text/html or in parts */ if (msgContent instanceof Multipart) { Multipart multipart = (Multipart) msgContent; for (int j = 0; j < multipart.getCount(); j++) { BodyPart bodyPart = multipart.getBodyPart(j); String disposition = bodyPart.getDisposition(); if (disposition != null && (disposition.equals(BodyPart.ATTACHMENT))) { mail.getAttachments().add(getAttachmentDescription(bodyPart)); } else if (disposition != null && (disposition.equals(BodyPart.INLINE))) { if (bodyPart.isMimeType("text/*")) { messageContent += (String) bodyPart.getContent(); } } else { messageContent += bodyPart.getContent().toString(); } } } else { messageContent = messages[i].getContent().toString(); } // escape javascript // Document doc = Jsoup.parse(messageContent); // doc.removeAttr("script"); String prettyPrintedBodyFragment = Jsoup.clean(messageContent, "", Whitelist.none().addTags("br", "p"), new OutputSettings().prettyPrint(true)); // get plain text with preserved line breaks by disabled prettyPrint mail.setMessage(Jsoup.clean(prettyPrintedBodyFragment, "", Whitelist.none(), new OutputSettings().prettyPrint(false))); mails.add(mail); } return mails; }
@Secured({ "ROLE_ANONYMOUS", "ROLE_USER", "ROLE_ADMIN" }) @Override public Layout createContent(final String parameters, final MenuBar menuBar, final Panel panel) { final VerticalLayout panelContent = createPanelContent(); final String pageId = getPageId(parameters); final DataContainer<DocumentElement, String> documentElementDataContainer = getApplicationManager() .getDataContainer(DocumentElement.class); final DataContainer<DocumentContentData, String> documentContentDataDataContainer = getApplicationManager() .getDataContainer(DocumentContentData.class); getApplicationManager() .getDataContainer(CommitteeProposalComponentData.class); final DocumentElement documentElement = documentElementDataContainer.load(pageId); if (documentElement != null) { getDocumentMenuItemFactory().createDocumentMenuBar(menuBar, pageId); LabelFactory.createHeader2Label(panelContent,DOCUMENT_DATA); final List<DocumentContentData> documentContentlist = documentContentDataDataContainer .getAllBy(DocumentContentData_.id, pageId); if (!documentContentlist.isEmpty()) { final Panel formPanel = new Panel(); formPanel.setSizeFull(); panelContent.addComponent(formPanel); final FormLayout formContent = new FormLayout(); formPanel.setContent(formContent); final String cleanContent = Jsoup.clean(documentContentlist.get(0).getContent(),"", Whitelist.simpleText(), new OutputSettings().indentAmount(4)); final Label htmlContent = new Label(cleanContent, ContentMode.HTML); formContent.addComponent(htmlContent); final DocumentWordCountRequest documentWordCountRequest = new DocumentWordCountRequest(); documentWordCountRequest.setDocumentId(pageId); documentWordCountRequest.setMaxResults(MAX_RESULTS); documentWordCountRequest.setSessionId(RequestContextHolder.currentRequestAttributes().getSessionId()); final DocumentWordCountResponse resp = (DocumentWordCountResponse) getApplicationManager().service(documentWordCountRequest); if (resp.getWordCountMap() != null) { final Label wordCloud = new Label(createWordCloud(resp.getWordCountMap()), ContentMode.HTML); formContent.addComponent(wordCloud); } panelContent.setExpandRatio(formPanel, ContentRatio.GRID); } panel.setContent(panelContent); getPageActionEventHelper().createPageEvent(ViewAction.VISIT_DOCUMENT_VIEW, ApplicationEventGroup.USER, NAME, parameters, pageId); } return panelContent; }