Java 类org.jsoup.nodes.TextNode 实例源码

项目:eclipse.jdt.ls    文件:HtmlToPlainText.java   
@Override
public void head(Node node, int depth) {
    String name = node.nodeName();
    if (node instanceof TextNode) {
        append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
    } else if (name.equals("ul")) {
        listNesting++;
    } else if (name.equals("li")) {
        append("\n ");
        for (int i = 1; i < listNesting; i++) {
            append("  ");
        }
        if (listNesting == 1) {
            append("* ");
        } else {
            append("- ");
        }
    } else if (name.equals("dt")) {
        append("  ");
    } else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) {
        append("\n");
    }
}
项目:case-html-data-gather    文件:HTMLDataGather.java   
private String gatherWuBi(Element tagContentEL) {
    Elements spans = tagContentEL.select("span.diczx7");
    for (Element span : spans) {
        if (span.text().equals("五笔:")) {
            // 后一个兄弟文本节点
            Node textNode = span.nextSibling();
            if (textNode instanceof TextNode) {
                String wubi=((TextNode) textNode).text();
                //去掉特殊字符
                wubi=wubi.replaceAll("\\W", "");
                return wubi;
            }
        }
    }
    return null;
}
项目:YuiHatano    文件:ShadowResources.java   
protected Map<String, List<String>> getResourceArrayMap(String tag) {
    Map<String, List<String>> map = new HashMap<>();

    Document document = getValuesXmlDocument();
    Elements elements = document.getElementsByTag(tag);

    for (Element element : elements) {
        Elements items = element.getElementsByTag("item");

        List<String> itemsText = new ArrayList<>();

        for (Element item : items) {
            String text = ((TextNode) item.childNode(0)).text();

            itemsText.add(text);
        }

        String name = element.attr("name");
        map.put(name, itemsText);
    }
    return map;
}
项目:YuiHatano    文件:ShadowResources.java   
/**
 * 获取strings.xml 资源名-值 映射表
 *
 * @return
 */
protected Map<String, String> getStringResNameAndValueMap() {
    Map<String, String> map = new HashMap<>();

    Document document = getValuesXmlDocument();
    Elements strings  = document.getElementsByTag("string");

    for (int i = 0; i < strings.size(); i++) {
        Element element = strings.get(i);
        String  name    = element.attr("name");

        if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) {
            String text = ((TextNode) element.childNode(0)).text();

            map.put(name, text);
        }
    }

    return map;
}
项目:KBUnitTest    文件:ShadowResources.java   
protected Map<String, List<String>> getResourceArrayMap(String tag) {
    Map<String, List<String>> map = new HashMap<>();

    Document document = getValuesXmlDocument();
    Elements elements = document.getElementsByTag(tag);

    for (Element element : elements) {
        Elements items = element.getElementsByTag("item");

        List<String> itemsText = new ArrayList<>();

        for (Element item : items) {
            String text = ((TextNode) item.childNode(0)).text();

            itemsText.add(text);
        }

        String name = element.attr("name");
        map.put(name, itemsText);
    }
    return map;
}
项目:KBUnitTest    文件:ShadowResources.java   
/**
 * 获取strings.xml 资源名-值 映射表
 *
 * @return
 */
protected Map<String, String> getStringResNameAndValueMap() {
    Map<String, String> map = new HashMap<>();

    Document document = getValuesXmlDocument();
    Elements strings  = document.getElementsByTag("string");

    for (int i = 0; i < strings.size(); i++) {
        Element element = strings.get(i);
        String  name    = element.attr("name");

        if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) {
            String text = ((TextNode) element.childNode(0)).text();

            map.put(name, text);
        }
    }

    return map;
}
项目:Xndroid    文件:OutputFormatter.java   
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
    for (Node child : e.childNodes()) {
        if (unlikely(child)) {
            continue;
        }
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            String txt = textNode.text();
            accum.append(txt);
        } else if (child instanceof Element) {
            Element element = (Element) child;
            if (accum.length() > 0 && element.isBlock()
                    && !lastCharIsWhitespace(accum))
                accum.append(' ');
            else if (element.tagName().equals("br"))
                accum.append(' ');
            appendTextSkipHidden(element, accum, indent + 1);
        }
    }
}
项目:solr-cmd-utils    文件:HtmlJsoupFilter.java   
public void mapAllElements(String selector, String fieldName) {
    Elements elements = jsoupDocument.select(selector);
    for (int i = 0; i < elements.size(); i++) {
        Element element = elements.get(i);


        StringBuilder value = new StringBuilder();
        for(Element subElements : element.getAllElements()) {
            for (TextNode textNode : subElements.textNodes()) {
                final String text = textNode.text();
                value.append(text);
                value.append(" ");
            }
        }
        document.addField(fieldName, value.toString().trim());
    }
}
项目:awplab-core    文件:JsoupSession.java   
public Node getFirstNonEmptyNodeChild(Element parent) {
    if (parent == null) return null;

    if (parent.childNodeSize() == 0) return null;

    if (parent.childNode(0) instanceof Element) {
        return parent.childNode(0);
    }

    if (parent.childNode(0) instanceof TextNode && ((TextNode) parent.childNode(0)).text().replaceAll("\u00A0", " ").trim().length() > 0) {
        return parent.childNode(0);
    }
    else {
        return getNextNonEmptyNode(parent.childNode(0));
    }

}
项目:RenewPass    文件:HtmlElements.java   
public HtmlNode getHtmlNode(org.jsoup.nodes.Node node) {
    if(elementCache.containsKey(node)) {
        return elementCache.get(node);
    }
    else {
        HtmlNode htmlNode = null;
        if(node instanceof Element)
            htmlNode = new HtmlElement(page, (Element)node);
        else if(node instanceof TextNode)
            htmlNode = new HtmlTextNode(page, (TextNode)node);
        else
            htmlNode = new HtmlNode(page, node);
        elementCache.put(node, htmlNode);
        return htmlNode;
    }
}
项目:sigir2016-collection-for-focused-retrieval    文件:Paragraph.java   
public void initRawInfo()
{
    StringBuilder sb = new StringBuilder();
    for (Node n : this) {
        //            NodeHelper.cleanEmptyElements(n);
        if (n instanceof TextNode) {
            this.setTagName(getPath(n));
            String nodeRawText = ((TextNode) n).text();
            sb.append(Utils.normalizeWhitespace(nodeRawText).trim());

            if (NodeHelper.isLink(n)) {
                charsCountInLinks += nodeRawText.length();
            }
        }
    }

    rawText = sb.toString();
}
项目:sigir2016-collection-for-focused-retrieval    文件:Paragraph.java   
public String getPath(Node n)
{
    String nodePath = "";
    while (n != null) {
        if (n instanceof TextNode) {
            n = n.parent();
        }
        if (NodeHelper.isInnerText(n)) {
            n = n.parent();
        }
        String parentNodeName = n.nodeName();
        nodePath = parentNodeName + "." + nodePath;

        if (!parentNodeName.equalsIgnoreCase("html")) {
            n = n.parent();
        }
        else {
            break;
        }
    }

    return nodePath;
}
项目:dkpro-c4corpus    文件:Paragraph.java   
public void initRawInfo()
{
    StringBuilder sb = new StringBuilder();
    for (Node n : this) {
        //            NodeHelper.cleanEmptyElements(n);
        if (n instanceof TextNode) {
            this.setTagName(getPath(n));
            String nodeRawText = ((TextNode) n).text();
            sb.append(Utils.normalizeBreaks(nodeRawText).trim());

            if (NodeHelper.isLink(n)) {
                charsCountInLinks += nodeRawText.length();
            }
        }
    }

    rawText = sb.toString();
}
项目:dkpro-c4corpus    文件:Paragraph.java   
public String getPath(Node n)
{
    String nodePath = "";
    while (n != null) {
        if (n instanceof TextNode) {
            n = n.parent();
        }
        if (NodeHelper.isInnerText(n)) {
            n = n.parent();
        }
        String parentNodeName = n.nodeName();
        nodePath = parentNodeName + "." + nodePath;

        if (!parentNodeName.equalsIgnoreCase("html")) {
            n = n.parent();
        }
        else {
            break;
        }
    }

    return nodePath;
}
项目:JumpGo    文件:OutputFormatter.java   
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
    for (Node child : e.childNodes()) {
        if (unlikely(child)) {
            continue;
        }
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            String txt = textNode.text();
            accum.append(txt);
        } else if (child instanceof Element) {
            Element element = (Element) child;
            if (accum.length() > 0 && element.isBlock()
                    && !lastCharIsWhitespace(accum))
                accum.append(' ');
            else if (element.tagName().equals("br"))
                accum.append(' ');
            appendTextSkipHidden(element, accum, indent + 1);
        }
    }
}
项目:DeeBrowser    文件:OutputFormatter.java   
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
    for (Node child : e.childNodes()) {
        if (unlikely(child)) {
            continue;
        }
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            String txt = textNode.text();
            accum.append(txt);
        } else if (child instanceof Element) {
            Element element = (Element) child;
            if (accum.length() > 0 && element.isBlock()
                    && !lastCharIsWhitespace(accum))
                accum.append(' ');
            else if (element.tagName().equals("br"))
                accum.append(' ');
            appendTextSkipHidden(element, accum, indent + 1);
        }
    }
}
项目:common    文件:HtmlToPlainText.java   
public void head(Node node, int depth) {
    String name = node.nodeName();
    if (node instanceof TextNode)
        append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
    else if (name.equals("li"))
        append("\n * ");
    else if (name.equals("dt"))
        append("  ");
    else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr"))
        append("\n");
}
项目:gestock    文件:HtmlToPlainText.java   
public void head(Node node, int depth) {
    String name = node.nodeName();
    if (node instanceof TextNode)
        append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
    else if (name.equals("li"))
        append("\n * ");
    else if (name.equals("dt"))
        append("  ");
    else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr"))
        append("\n");
}
项目:zongtui-webcrawler    文件:ElementOperator.java   
@Override
public String operate(Element element) {
    int index = 0;
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            if (group == 0) {
                accum.append(textNode.text());
            } else if (++index == group) {
                return textNode.text();
            }
        }
    }
    return accum.toString();
}
项目:KomiReader    文件:KomicaScraper.java   
/**
 * Extract Date + ID + No
 * Ex: " 15/02/14(六)07:14:32 ID:F.OqpZFA No.6135732"
 * @return Post
 */
private Post extractIDString(Post post, TextNode node) {
    Pattern r = Pattern.compile("(\\d{2})/(\\d{2})/(\\d{2}).+?(\\d{2}):(\\d{2}):(\\d{2}) ID:([\\./0-9A-Za-z]+?) No\\.(\\d+)");
    Matcher m = r.matcher(node.text());
    if (m.find()) {
        Integer Y = Integer.parseInt(m.group(1)) + 2000, //year
                M = Integer.parseInt(m.group(2)) - 1, //month
                D = Integer.parseInt(m.group(3)), //day
                H = Integer.parseInt(m.group(4)), //hours
                I = Integer.parseInt(m.group(5)), //minutes
                S = Integer.parseInt(m.group(6)); //seconds
        Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("Asia/Taipei"));
        cal.set(Y, M, D, H, I, S);
        post.date = cal;

        post.tripId = m.group(7);
        post.no = m.group(8);
    }

    return post;
}
项目:FxcnBeta    文件:DetailsPresenter.java   
private void addView(StringBuilder sb, Node node) {
    int preSBLen = sb.length();
    for (Node subNode : node.childNodes()) {
        String subNodeName = subNode.nodeName();
        if ("img".equals(subNodeName)) {
            if (sb.length() > 0) {
                removeLastUselessChars(sb);// 移除最后两个回车符
                if (sb.length() > 0) {
                    mView.addTextToContent(sb.toString());
                    sb.delete(0, sb.length());
                }
                preSBLen = 0;
            }
            String link = subNode.attributes().get("src");
            mView.addImageToContent(link);
            mImageUrls.add(link);
        } else if ("#text".equals(subNodeName)) {
            sb.append(((TextNode) subNode).text());
        } else {
            addView(sb, subNode);
        }
    }
    if (sb.length() - preSBLen > 0 && "p".equals(node.nodeName())) {
        sb.append("\n\n");
    }
}
项目:jodtemplate    文件:HtmlStylizer.java   
private List<Element> process(final org.jsoup.nodes.Element element, final Element arPr, final Element apPr,
        final Slide slide) throws IOException {

    if (BR_TAG.equals(element.tagName())) {
        return Arrays.asList(new Element(PPTXDocument.BR_ELEMENT, getDrawingmlNamespace()));
    }

    final List<org.jsoup.nodes.Element> tags = getAllTags(element);

    final List<Element> elements = new ArrayList<>();
    for (Node node : element.childNodes()) {
        if (node instanceof org.jsoup.nodes.Element) {
            elements.addAll(process((org.jsoup.nodes.Element) node, arPr, apPr, slide));
        } else if (node instanceof TextNode) {
            final TextNode textNode = (TextNode) node;
            elements.add(createTextElement(tags, arPr, textNode, slide));
        }
    }
    if (LI_TAG.equals(element.tagName())) {
        return createListElements(tags, elements, apPr, element);
    }
    if (P_TAG.equals(element.tagName())) {
        return Arrays.asList(createParagraphElement(elements, apPr));
    }
    return elements;
}
项目:FanFictionReader    文件:HtmlParser.java   
/**
 * A recursive function that converts an element and its children, creating spans as
 * required.
 *
 * @param element The element to convert
 */
public void convert(Element element) {
    // Begin the span
    handleStartTag(element);

    // Process the intermediate nodes
    List<Node> nodes = element.childNodes();

    for (Node node : nodes) {
        if (node instanceof Element) {
            // Recursively convert element nodes
            convert((Element) node);
        } else if (node instanceof TextNode) {
            // Add the text to the span
            characters(((TextNode) node).getWholeText());
        }
    }

    // End the span
    handleEndTag(element);
}
项目:JumpGo    文件:OutputFormatter.java   
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
    for (Node child : e.childNodes()) {
        if (unlikely(child)) {
            continue;
        }
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            String txt = textNode.text();
            accum.append(txt);
        } else if (child instanceof Element) {
            Element element = (Element) child;
            if (accum.length() > 0 && element.isBlock()
                    && !lastCharIsWhitespace(accum))
                accum.append(' ');
            else if (element.tagName().equals("br"))
                accum.append(' ');
            appendTextSkipHidden(element, accum, indent + 1);
        }
    }
}
项目:play1-maven-plugin    文件:JSoupSeleneseParser.java   
private String getTableDataValue( Element tdNode )
{
    //return tdNode.html();
    StringBuffer buf = new StringBuffer();
    List<Node> childNodes = tdNode.childNodes();
    for ( Node tdChild : childNodes )
    {
        if ( tdChild instanceof TextNode )
        {
            buf.append( ( (TextNode) tdChild ).text() );
        }
        else if ( tdChild instanceof Element )
        {
            Element tdChildElement = (Element) tdChild;
            if ( "br".equals( tdChildElement.tagName() ) )
            {
                buf.append( "<br />" );
            }
        }
    }
    return buf.toString();
}
项目:ContentExtractor    文件:ContentExtractor.java   
private void addTextNode(TextNode tNode) {

        String text = tNode.text().trim();
        if (text.isEmpty()) {
            return;
        }
        String xpath = JsoupHelper.getXpath(tNode);
        tNodeList.add(tNode);
        xpathMap.put(tNode, xpath);

        CountInfo countInfo = new CountInfo(tNode);
        ArrayList<CountInfo> countInfoList = countMap.get(xpath);
        if (countInfoList == null) {
            countInfoList = new ArrayList<CountInfo>();
            countMap.put(xpath, countInfoList);
        }
        countInfoList.add(countInfo);
    }
项目:astor    文件:HtmlToPlainText.java   
public void head(Node node, int depth) {
    String name = node.nodeName();
    if (node instanceof TextNode)
        append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
    else if (name.equals("li"))
        append("\n * ");
    else if (name.equals("dt"))
        append("  ");
    else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr"))
        append("\n");
}
项目:astor    文件:Cleaner.java   
public void head(Node source, int depth) {
    if (source instanceof Element) {
        Element sourceEl = (Element) source;

        if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
            ElementMeta meta = createSafeElement(sourceEl);
            Element destChild = meta.el;
            destination.appendChild(destChild);

            numDiscarded += meta.numAttribsDiscarded;
            destination = destChild;
        } else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
            numDiscarded++;
        }
    } else if (source instanceof TextNode) {
        TextNode sourceText = (TextNode) source;
        TextNode destText = new TextNode(sourceText.getWholeText());
        destination.appendChild(destText);
    } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
      DataNode sourceData = (DataNode) source;
      DataNode destData = new DataNode(sourceData.getWholeData());
      destination.appendChild(destData);
    } else { // else, we don't care about comments, xml proc instructions, etc
        numDiscarded++;
    }
}
项目:astor    文件:Cleaner.java   
public void head(Node source, int depth) {
    if (source instanceof Element) {
        Element sourceEl = (Element) source;

        if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
            ElementMeta meta = createSafeElement(sourceEl);
            Element destChild = meta.el;
            destination.appendChild(destChild);

            numDiscarded += meta.numAttribsDiscarded;
            destination = destChild;
        } else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
            numDiscarded++;
        }
    } else if (source instanceof TextNode) {
        TextNode sourceText = (TextNode) source;
        TextNode destText = new TextNode(sourceText.getWholeText());
        destination.appendChild(destText);
    } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
      DataNode sourceData = (DataNode) source;
      DataNode destData = new DataNode(sourceData.getWholeData());
      destination.appendChild(destData);
    } else { // else, we don't care about comments, xml proc instructions, etc
        numDiscarded++;
    }
}
项目:astor    文件:HtmlToPlainText.java   
public void head(Node node, int depth) {
    String name = node.nodeName();
    if (node instanceof TextNode)
        append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
    else if (name.equals("li"))
        append("\n * ");
    else if (name.equals("dt"))
        append("  ");
    else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr"))
        append("\n");
}
项目:WordCount    文件:ContentExtractor.java   
private void addTextNode(TextNode tNode) {

        String text = tNode.text().trim();
        if (text.isEmpty()) {
            return;
        }
        String xpath = JsoupHelper.getXpath(tNode);
        tNodeList.add(tNode);
        xpathMap.put(tNode, xpath);

        CountInfo countInfo = new CountInfo(tNode);
        ArrayList<CountInfo> countInfoList = countMap.get(xpath);
        if (countInfoList == null) {
            countInfoList = new ArrayList<CountInfo>();
            countMap.put(xpath, countInfoList);
        }
        countInfoList.add(countInfo);
    }
项目:Vaadin-SignatureField    文件:DeclarativeTestBaseBase.java   
/**
 * Produce predictable html (attributes in alphabetical order), always
 * include close tags
 */
private String elementToHtml(Element producedElem, StringBuilder sb) {
    ArrayList<String> names = new ArrayList<String>();
    for (Attribute a : producedElem.attributes().asList()) {
        names.add(a.getKey());
    }
    Collections.sort(names);

    sb.append("<" + producedElem.tagName() + "");
    for (String attrName : names) {
        sb.append(" ").append(attrName).append("=").append("\'")
                .append(producedElem.attr(attrName)).append("\'");
    }
    sb.append(">");
    for (Node child : producedElem.childNodes()) {
        if (child instanceof Element) {
            elementToHtml((Element) child, sb);
        } else if (child instanceof TextNode) {
            String text = ((TextNode) child).text();
            sb.append(text.trim());
        }
    }
    sb.append("</").append(producedElem.tagName()).append(">");
    return sb.toString();
}
项目:vertretungsplan-server    文件:UntisCommonParser.java   
/**
 * Parst eine "Nachrichten zum Tag"-Tabelle aus Untis-Vertretungsplänen
 * 
 * @param table
 *            das <code>table</code>-Element des HTML-Dokuments, das geparst
 *            werden soll
 * @param data
 *            Daten von der Schule (aus <code>Schule.getData()</code>)
 * @param tag
 *            der {@link VertretungsplanTag} in dem die Nachrichten
 *            gespeichert werden sollen
 */
protected void parseNachrichten(Element table, JSONObject data,
        VertretungsplanTag tag) {
    Elements zeilen = table
            .select("tr:not(:contains(Nachrichten zum Tag))");
    for (Element i : zeilen) {
        Elements spalten = i.select("td");
        String info = "";
        for (Element b : spalten) {
            info += "\n"
                    + TextNode.createFromEncoded(b.html(), null)
                            .getWholeText();
        }
        info = info.substring(1); // remove first \n
        tag.getNachrichten().add(info);
    }
}
项目:jinjava    文件:TruncateHtmlFilter.java   
@Override
public void head(Node node, int depth) {
  if (node instanceof TextNode) {
    TextNode text = (TextNode) node;
    String textContent = text.text();

    if (textLen >= maxTextLen) {
      text.text("");
    } else if (textLen + textContent.length() > maxTextLen) {
      int ptr = maxTextLen - textLen;
      if (!killwords) {
        ptr = Functions.movePointerToJustBeforeLastWord(ptr, textContent) - 1;
      }

      text.text(textContent.substring(0, ptr) + ending);
      textLen = maxTextLen;
    } else {
      textLen += textContent.length();
    }
  }
}
项目:NewsWebsiteTextExtractor    文件:SpanStrategy.java   
public String cleanHtml(Document doc) {
    StringBuilder cleanText = new StringBuilder();
    baseCleaner.intitialCleanse(doc);
    Elements spans = doc.getElementsByTag("span");
    for (Element span : spans) {
        baseCleaner.removeSingleSpaceTextNodes(span);
        List<Node> children = span.childNodes();
        if (baseCleaner.nodesContainConsecutiveBR(children)) {
            Elements brs = span.getElementsByTag("br");
            for (Element br : brs) {
                br.replaceWith(new TextNode("LINEBREAK", null));
            }
            if (!baseCleaner.elementOnlyContainLink(span) && baseCleaner.elementHasPromisingIdentifier(span)) {
                cleanText.append(span.text());
            }
        }
    }
    String cText = cleanText.toString();
    return cText.replaceAll("LINEBREAK", "\r\n");
}
项目:NewsWebsiteTextExtractor    文件:DivStrategy.java   
public String cleanHtml(Document doc) {
    StringBuilder cleanText = new StringBuilder();
    baseCleaner.intitialCleanse(doc);
    Elements divs = doc.getElementsByTag("div");
    for (Element div : divs) {
        baseCleaner.removeSingleSpaceTextNodes(div);
        List<Node> children = div.childNodes();
        if (baseCleaner.nodesContainConsecutiveBR(children)) {
            Elements brs = div.getElementsByTag("br");
            for (Element br : brs) {
                br.replaceWith(new TextNode("LINEBREAK", null));
            }
            if (!baseCleaner.elementOnlyContainLink(div) && baseCleaner.elementHasPromisingIdentifier(div)) {
                cleanText.append(div.text());
            }
        }
    }
    String cText = cleanText.toString();
    return cText.replaceAll("LINEBREAK", "\r\n");
}
项目:ViTA    文件:PartsAndChaptersReviser.java   
/**
 * Creates new Epublines and detects HTML-Linebreaks in the text.
 * 
 * @param chapter
 * @param chapterElement
 * @param mode
 */
private void addEpubline(List<Epubline> chapter, Element chapterElement, String mode) {
  String writeNext = "";
  List<TextNode> textNodes = chapterElement.textNodes();
  int textIndex = 0;
  for (Node node : chapterElement.childNodes()) {
    if ("#text".equals(node.nodeName().trim())) {
      // text node -> add test
      writeNext = writeNext.concat(textNodes.get(textIndex).text());
      textIndex++;
    } else if ("br".equals(node.nodeName().trim())) {
      // break -> make a new line
      chapter.add(new Epubline(mode, writeNext, ""));
      writeNext = "";
    }
  }
  if (!"".equals(writeNext)) {
    chapter.add(new Epubline(mode, writeNext, ""));
  }
}
项目:Html2Rtf    文件:Html2RtfParser.java   
/**
 * maps the given elemen e to its rtf type. when the rtf element has
 * subelement they are given in the childs array
 * 
 * @param e
 *            the current html node for which a rtf element should be
 *            created
 * @param childs
 *            the rtf child elements if any
 * @return an rtf child element
 */
private Object getRtfTNode(Node node, ElementContainer childs) {
    final String name = node.nodeName().toLowerCase();

    Object ret = null;

    if (node instanceof TextNode) {
        ret = ((TextNode) node).text();
    } else if (node instanceof Element) {

        if (name.equals("p"))
            return childs;

        NodeHandler<ElementContainer, Object> handler = handlers.get(name);

        //TODO better use a NoOpHandler that ignores the tag and log it?
        if (handler == null)
            throw new RuntimeException("WTF? Don't know this tag: " + name);

        ret = handler.handle(childs);
    }

    return ret;
}
项目:Html2Rtf    文件:Rtf2Html.java   
public Rtf2Html() {
    super();
    entries = new LinkedHashMap<TextNode, Element>();
    parserItems = new ArrayList<RtfElementParser>();

    org.jsoup.nodes.Document document = Jsoup.parse("<body></body>");
    body = document.body();

    // setup the handlers
    parserItems.add(new FontElementParser("font"));
    parserItems.add(new BooleanElementParser("b", StyleConstants.Bold));
    parserItems.add(new BooleanElementParser("i", StyleConstants.Italic));
    parserItems
            .add(new BooleanElementParser("u", StyleConstants.Underline));

}
项目:Html2Rtf    文件:Rtf2Html.java   
private void addToMap(Element element, Document document) {
        int i = element.getStartOffset();
        int j = element.getEndOffset();
        String s;
        try {
            s = document.getText(i, j - i);

//          if (s.trim().isEmpty())
//              return;

            org.jsoup.nodes.TextNode n = new org.jsoup.nodes.TextNode(s, "");

            body.appendChild(n);

            entries.put(n, element);
        } catch (BadLocationException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }