Java 类org.jsoup.nodes.TextNode 实例源码
项目:eclipse.jdt.ls
文件:HtmlToPlainText.java
@Override
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode) {
append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
} else if (name.equals("ul")) {
listNesting++;
} else if (name.equals("li")) {
append("\n ");
for (int i = 1; i < listNesting; i++) {
append(" ");
}
if (listNesting == 1) {
append("* ");
} else {
append("- ");
}
} else if (name.equals("dt")) {
append(" ");
} else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) {
append("\n");
}
}
项目:case-html-data-gather
文件:HTMLDataGather.java
private String gatherWuBi(Element tagContentEL) {
Elements spans = tagContentEL.select("span.diczx7");
for (Element span : spans) {
if (span.text().equals("五笔:")) {
// 后一个兄弟文本节点
Node textNode = span.nextSibling();
if (textNode instanceof TextNode) {
String wubi=((TextNode) textNode).text();
//去掉特殊字符
wubi=wubi.replaceAll("\\W", "");
return wubi;
}
}
}
return null;
}
项目:YuiHatano
文件:ShadowResources.java
protected Map<String, List<String>> getResourceArrayMap(String tag) {
Map<String, List<String>> map = new HashMap<>();
Document document = getValuesXmlDocument();
Elements elements = document.getElementsByTag(tag);
for (Element element : elements) {
Elements items = element.getElementsByTag("item");
List<String> itemsText = new ArrayList<>();
for (Element item : items) {
String text = ((TextNode) item.childNode(0)).text();
itemsText.add(text);
}
String name = element.attr("name");
map.put(name, itemsText);
}
return map;
}
项目:YuiHatano
文件:ShadowResources.java
/**
* 获取strings.xml 资源名-值 映射表
*
* @return
*/
protected Map<String, String> getStringResNameAndValueMap() {
Map<String, String> map = new HashMap<>();
Document document = getValuesXmlDocument();
Elements strings = document.getElementsByTag("string");
for (int i = 0; i < strings.size(); i++) {
Element element = strings.get(i);
String name = element.attr("name");
if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) {
String text = ((TextNode) element.childNode(0)).text();
map.put(name, text);
}
}
return map;
}
项目:KBUnitTest
文件:ShadowResources.java
protected Map<String, List<String>> getResourceArrayMap(String tag) {
Map<String, List<String>> map = new HashMap<>();
Document document = getValuesXmlDocument();
Elements elements = document.getElementsByTag(tag);
for (Element element : elements) {
Elements items = element.getElementsByTag("item");
List<String> itemsText = new ArrayList<>();
for (Element item : items) {
String text = ((TextNode) item.childNode(0)).text();
itemsText.add(text);
}
String name = element.attr("name");
map.put(name, itemsText);
}
return map;
}
项目:KBUnitTest
文件:ShadowResources.java
/**
* 获取strings.xml 资源名-值 映射表
*
* @return
*/
protected Map<String, String> getStringResNameAndValueMap() {
Map<String, String> map = new HashMap<>();
Document document = getValuesXmlDocument();
Elements strings = document.getElementsByTag("string");
for (int i = 0; i < strings.size(); i++) {
Element element = strings.get(i);
String name = element.attr("name");
if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) {
String text = ((TextNode) element.childNode(0)).text();
map.put(name, text);
}
}
return map;
}
项目:Xndroid
文件:OutputFormatter.java
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
for (Node child : e.childNodes()) {
if (unlikely(child)) {
continue;
}
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
String txt = textNode.text();
accum.append(txt);
} else if (child instanceof Element) {
Element element = (Element) child;
if (accum.length() > 0 && element.isBlock()
&& !lastCharIsWhitespace(accum))
accum.append(' ');
else if (element.tagName().equals("br"))
accum.append(' ');
appendTextSkipHidden(element, accum, indent + 1);
}
}
}
项目:solr-cmd-utils
文件:HtmlJsoupFilter.java
public void mapAllElements(String selector, String fieldName) {
Elements elements = jsoupDocument.select(selector);
for (int i = 0; i < elements.size(); i++) {
Element element = elements.get(i);
StringBuilder value = new StringBuilder();
for(Element subElements : element.getAllElements()) {
for (TextNode textNode : subElements.textNodes()) {
final String text = textNode.text();
value.append(text);
value.append(" ");
}
}
document.addField(fieldName, value.toString().trim());
}
}
项目:awplab-core
文件:JsoupSession.java
public Node getFirstNonEmptyNodeChild(Element parent) {
if (parent == null) return null;
if (parent.childNodeSize() == 0) return null;
if (parent.childNode(0) instanceof Element) {
return parent.childNode(0);
}
if (parent.childNode(0) instanceof TextNode && ((TextNode) parent.childNode(0)).text().replaceAll("\u00A0", " ").trim().length() > 0) {
return parent.childNode(0);
}
else {
return getNextNonEmptyNode(parent.childNode(0));
}
}
项目:RenewPass
文件:HtmlElements.java
public HtmlNode getHtmlNode(org.jsoup.nodes.Node node) {
if(elementCache.containsKey(node)) {
return elementCache.get(node);
}
else {
HtmlNode htmlNode = null;
if(node instanceof Element)
htmlNode = new HtmlElement(page, (Element)node);
else if(node instanceof TextNode)
htmlNode = new HtmlTextNode(page, (TextNode)node);
else
htmlNode = new HtmlNode(page, node);
elementCache.put(node, htmlNode);
return htmlNode;
}
}
项目:sigir2016-collection-for-focused-retrieval
文件:Paragraph.java
public void initRawInfo()
{
StringBuilder sb = new StringBuilder();
for (Node n : this) {
// NodeHelper.cleanEmptyElements(n);
if (n instanceof TextNode) {
this.setTagName(getPath(n));
String nodeRawText = ((TextNode) n).text();
sb.append(Utils.normalizeWhitespace(nodeRawText).trim());
if (NodeHelper.isLink(n)) {
charsCountInLinks += nodeRawText.length();
}
}
}
rawText = sb.toString();
}
项目:sigir2016-collection-for-focused-retrieval
文件:Paragraph.java
public String getPath(Node n)
{
String nodePath = "";
while (n != null) {
if (n instanceof TextNode) {
n = n.parent();
}
if (NodeHelper.isInnerText(n)) {
n = n.parent();
}
String parentNodeName = n.nodeName();
nodePath = parentNodeName + "." + nodePath;
if (!parentNodeName.equalsIgnoreCase("html")) {
n = n.parent();
}
else {
break;
}
}
return nodePath;
}
项目:dkpro-c4corpus
文件:Paragraph.java
public void initRawInfo()
{
StringBuilder sb = new StringBuilder();
for (Node n : this) {
// NodeHelper.cleanEmptyElements(n);
if (n instanceof TextNode) {
this.setTagName(getPath(n));
String nodeRawText = ((TextNode) n).text();
sb.append(Utils.normalizeBreaks(nodeRawText).trim());
if (NodeHelper.isLink(n)) {
charsCountInLinks += nodeRawText.length();
}
}
}
rawText = sb.toString();
}
项目:dkpro-c4corpus
文件:Paragraph.java
public String getPath(Node n)
{
String nodePath = "";
while (n != null) {
if (n instanceof TextNode) {
n = n.parent();
}
if (NodeHelper.isInnerText(n)) {
n = n.parent();
}
String parentNodeName = n.nodeName();
nodePath = parentNodeName + "." + nodePath;
if (!parentNodeName.equalsIgnoreCase("html")) {
n = n.parent();
}
else {
break;
}
}
return nodePath;
}
项目:JumpGo
文件:OutputFormatter.java
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
for (Node child : e.childNodes()) {
if (unlikely(child)) {
continue;
}
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
String txt = textNode.text();
accum.append(txt);
} else if (child instanceof Element) {
Element element = (Element) child;
if (accum.length() > 0 && element.isBlock()
&& !lastCharIsWhitespace(accum))
accum.append(' ');
else if (element.tagName().equals("br"))
accum.append(' ');
appendTextSkipHidden(element, accum, indent + 1);
}
}
}
项目:DeeBrowser
文件:OutputFormatter.java
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
for (Node child : e.childNodes()) {
if (unlikely(child)) {
continue;
}
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
String txt = textNode.text();
accum.append(txt);
} else if (child instanceof Element) {
Element element = (Element) child;
if (accum.length() > 0 && element.isBlock()
&& !lastCharIsWhitespace(accum))
accum.append(' ');
else if (element.tagName().equals("br"))
accum.append(' ');
appendTextSkipHidden(element, accum, indent + 1);
}
}
}
项目:common
文件:HtmlToPlainText.java
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
else if (name.equals("li"))
append("\n * ");
else if (name.equals("dt"))
append(" ");
else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr"))
append("\n");
}
项目:gestock
文件:HtmlToPlainText.java
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
else if (name.equals("li"))
append("\n * ");
else if (name.equals("dt"))
append(" ");
else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr"))
append("\n");
}
项目:zongtui-webcrawler
文件:ElementOperator.java
@Override
public String operate(Element element) {
int index = 0;
StringBuilder accum = new StringBuilder();
for (Node node : element.childNodes()) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
if (group == 0) {
accum.append(textNode.text());
} else if (++index == group) {
return textNode.text();
}
}
}
return accum.toString();
}
项目:KomiReader
文件:KomicaScraper.java
/**
* Extract Date + ID + No
* Ex: " 15/02/14(六)07:14:32 ID:F.OqpZFA No.6135732"
* @return Post
*/
private Post extractIDString(Post post, TextNode node) {
Pattern r = Pattern.compile("(\\d{2})/(\\d{2})/(\\d{2}).+?(\\d{2}):(\\d{2}):(\\d{2}) ID:([\\./0-9A-Za-z]+?) No\\.(\\d+)");
Matcher m = r.matcher(node.text());
if (m.find()) {
Integer Y = Integer.parseInt(m.group(1)) + 2000, //year
M = Integer.parseInt(m.group(2)) - 1, //month
D = Integer.parseInt(m.group(3)), //day
H = Integer.parseInt(m.group(4)), //hours
I = Integer.parseInt(m.group(5)), //minutes
S = Integer.parseInt(m.group(6)); //seconds
Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("Asia/Taipei"));
cal.set(Y, M, D, H, I, S);
post.date = cal;
post.tripId = m.group(7);
post.no = m.group(8);
}
return post;
}
项目:FxcnBeta
文件:DetailsPresenter.java
private void addView(StringBuilder sb, Node node) {
int preSBLen = sb.length();
for (Node subNode : node.childNodes()) {
String subNodeName = subNode.nodeName();
if ("img".equals(subNodeName)) {
if (sb.length() > 0) {
removeLastUselessChars(sb);// 移除最后两个回车符
if (sb.length() > 0) {
mView.addTextToContent(sb.toString());
sb.delete(0, sb.length());
}
preSBLen = 0;
}
String link = subNode.attributes().get("src");
mView.addImageToContent(link);
mImageUrls.add(link);
} else if ("#text".equals(subNodeName)) {
sb.append(((TextNode) subNode).text());
} else {
addView(sb, subNode);
}
}
if (sb.length() - preSBLen > 0 && "p".equals(node.nodeName())) {
sb.append("\n\n");
}
}
项目:jodtemplate
文件:HtmlStylizer.java
private List<Element> process(final org.jsoup.nodes.Element element, final Element arPr, final Element apPr,
final Slide slide) throws IOException {
if (BR_TAG.equals(element.tagName())) {
return Arrays.asList(new Element(PPTXDocument.BR_ELEMENT, getDrawingmlNamespace()));
}
final List<org.jsoup.nodes.Element> tags = getAllTags(element);
final List<Element> elements = new ArrayList<>();
for (Node node : element.childNodes()) {
if (node instanceof org.jsoup.nodes.Element) {
elements.addAll(process((org.jsoup.nodes.Element) node, arPr, apPr, slide));
} else if (node instanceof TextNode) {
final TextNode textNode = (TextNode) node;
elements.add(createTextElement(tags, arPr, textNode, slide));
}
}
if (LI_TAG.equals(element.tagName())) {
return createListElements(tags, elements, apPr, element);
}
if (P_TAG.equals(element.tagName())) {
return Arrays.asList(createParagraphElement(elements, apPr));
}
return elements;
}
项目:FanFictionReader
文件:HtmlParser.java
/**
* A recursive function that converts an element and its children, creating spans as
* required.
*
* @param element The element to convert
*/
public void convert(Element element) {
// Begin the span
handleStartTag(element);
// Process the intermediate nodes
List<Node> nodes = element.childNodes();
for (Node node : nodes) {
if (node instanceof Element) {
// Recursively convert element nodes
convert((Element) node);
} else if (node instanceof TextNode) {
// Add the text to the span
characters(((TextNode) node).getWholeText());
}
}
// End the span
handleEndTag(element);
}
项目:JumpGo
文件:OutputFormatter.java
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
for (Node child : e.childNodes()) {
if (unlikely(child)) {
continue;
}
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
String txt = textNode.text();
accum.append(txt);
} else if (child instanceof Element) {
Element element = (Element) child;
if (accum.length() > 0 && element.isBlock()
&& !lastCharIsWhitespace(accum))
accum.append(' ');
else if (element.tagName().equals("br"))
accum.append(' ');
appendTextSkipHidden(element, accum, indent + 1);
}
}
}
项目:play1-maven-plugin
文件:JSoupSeleneseParser.java
private String getTableDataValue( Element tdNode )
{
//return tdNode.html();
StringBuffer buf = new StringBuffer();
List<Node> childNodes = tdNode.childNodes();
for ( Node tdChild : childNodes )
{
if ( tdChild instanceof TextNode )
{
buf.append( ( (TextNode) tdChild ).text() );
}
else if ( tdChild instanceof Element )
{
Element tdChildElement = (Element) tdChild;
if ( "br".equals( tdChildElement.tagName() ) )
{
buf.append( "<br />" );
}
}
}
return buf.toString();
}
项目:ContentExtractor
文件:ContentExtractor.java
private void addTextNode(TextNode tNode) {
String text = tNode.text().trim();
if (text.isEmpty()) {
return;
}
String xpath = JsoupHelper.getXpath(tNode);
tNodeList.add(tNode);
xpathMap.put(tNode, xpath);
CountInfo countInfo = new CountInfo(tNode);
ArrayList<CountInfo> countInfoList = countMap.get(xpath);
if (countInfoList == null) {
countInfoList = new ArrayList<CountInfo>();
countMap.put(xpath, countInfoList);
}
countInfoList.add(countInfo);
}
项目:astor
文件:HtmlToPlainText.java
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
else if (name.equals("li"))
append("\n * ");
else if (name.equals("dt"))
append(" ");
else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr"))
append("\n");
}
项目:astor
文件:Cleaner.java
public void head(Node source, int depth) {
if (source instanceof Element) {
Element sourceEl = (Element) source;
if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
ElementMeta meta = createSafeElement(sourceEl);
Element destChild = meta.el;
destination.appendChild(destChild);
numDiscarded += meta.numAttribsDiscarded;
destination = destChild;
} else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
numDiscarded++;
}
} else if (source instanceof TextNode) {
TextNode sourceText = (TextNode) source;
TextNode destText = new TextNode(sourceText.getWholeText());
destination.appendChild(destText);
} else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
DataNode sourceData = (DataNode) source;
DataNode destData = new DataNode(sourceData.getWholeData());
destination.appendChild(destData);
} else { // else, we don't care about comments, xml proc instructions, etc
numDiscarded++;
}
}
项目:astor
文件:Cleaner.java
public void head(Node source, int depth) {
if (source instanceof Element) {
Element sourceEl = (Element) source;
if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
ElementMeta meta = createSafeElement(sourceEl);
Element destChild = meta.el;
destination.appendChild(destChild);
numDiscarded += meta.numAttribsDiscarded;
destination = destChild;
} else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
numDiscarded++;
}
} else if (source instanceof TextNode) {
TextNode sourceText = (TextNode) source;
TextNode destText = new TextNode(sourceText.getWholeText());
destination.appendChild(destText);
} else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
DataNode sourceData = (DataNode) source;
DataNode destData = new DataNode(sourceData.getWholeData());
destination.appendChild(destData);
} else { // else, we don't care about comments, xml proc instructions, etc
numDiscarded++;
}
}
项目:astor
文件:HtmlToPlainText.java
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
else if (name.equals("li"))
append("\n * ");
else if (name.equals("dt"))
append(" ");
else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr"))
append("\n");
}
项目:WordCount
文件:ContentExtractor.java
private void addTextNode(TextNode tNode) {
String text = tNode.text().trim();
if (text.isEmpty()) {
return;
}
String xpath = JsoupHelper.getXpath(tNode);
tNodeList.add(tNode);
xpathMap.put(tNode, xpath);
CountInfo countInfo = new CountInfo(tNode);
ArrayList<CountInfo> countInfoList = countMap.get(xpath);
if (countInfoList == null) {
countInfoList = new ArrayList<CountInfo>();
countMap.put(xpath, countInfoList);
}
countInfoList.add(countInfo);
}
项目:Vaadin-SignatureField
文件:DeclarativeTestBaseBase.java
/**
* Produce predictable html (attributes in alphabetical order), always
* include close tags
*/
private String elementToHtml(Element producedElem, StringBuilder sb) {
ArrayList<String> names = new ArrayList<String>();
for (Attribute a : producedElem.attributes().asList()) {
names.add(a.getKey());
}
Collections.sort(names);
sb.append("<" + producedElem.tagName() + "");
for (String attrName : names) {
sb.append(" ").append(attrName).append("=").append("\'")
.append(producedElem.attr(attrName)).append("\'");
}
sb.append(">");
for (Node child : producedElem.childNodes()) {
if (child instanceof Element) {
elementToHtml((Element) child, sb);
} else if (child instanceof TextNode) {
String text = ((TextNode) child).text();
sb.append(text.trim());
}
}
sb.append("</").append(producedElem.tagName()).append(">");
return sb.toString();
}
项目:vertretungsplan-server
文件:UntisCommonParser.java
/**
* Parst eine "Nachrichten zum Tag"-Tabelle aus Untis-Vertretungsplänen
*
* @param table
* das <code>table</code>-Element des HTML-Dokuments, das geparst
* werden soll
* @param data
* Daten von der Schule (aus <code>Schule.getData()</code>)
* @param tag
* der {@link VertretungsplanTag} in dem die Nachrichten
* gespeichert werden sollen
*/
protected void parseNachrichten(Element table, JSONObject data,
VertretungsplanTag tag) {
Elements zeilen = table
.select("tr:not(:contains(Nachrichten zum Tag))");
for (Element i : zeilen) {
Elements spalten = i.select("td");
String info = "";
for (Element b : spalten) {
info += "\n"
+ TextNode.createFromEncoded(b.html(), null)
.getWholeText();
}
info = info.substring(1); // remove first \n
tag.getNachrichten().add(info);
}
}
项目:jinjava
文件:TruncateHtmlFilter.java
@Override
public void head(Node node, int depth) {
if (node instanceof TextNode) {
TextNode text = (TextNode) node;
String textContent = text.text();
if (textLen >= maxTextLen) {
text.text("");
} else if (textLen + textContent.length() > maxTextLen) {
int ptr = maxTextLen - textLen;
if (!killwords) {
ptr = Functions.movePointerToJustBeforeLastWord(ptr, textContent) - 1;
}
text.text(textContent.substring(0, ptr) + ending);
textLen = maxTextLen;
} else {
textLen += textContent.length();
}
}
}
项目:NewsWebsiteTextExtractor
文件:SpanStrategy.java
public String cleanHtml(Document doc) {
StringBuilder cleanText = new StringBuilder();
baseCleaner.intitialCleanse(doc);
Elements spans = doc.getElementsByTag("span");
for (Element span : spans) {
baseCleaner.removeSingleSpaceTextNodes(span);
List<Node> children = span.childNodes();
if (baseCleaner.nodesContainConsecutiveBR(children)) {
Elements brs = span.getElementsByTag("br");
for (Element br : brs) {
br.replaceWith(new TextNode("LINEBREAK", null));
}
if (!baseCleaner.elementOnlyContainLink(span) && baseCleaner.elementHasPromisingIdentifier(span)) {
cleanText.append(span.text());
}
}
}
String cText = cleanText.toString();
return cText.replaceAll("LINEBREAK", "\r\n");
}
项目:NewsWebsiteTextExtractor
文件:DivStrategy.java
public String cleanHtml(Document doc) {
StringBuilder cleanText = new StringBuilder();
baseCleaner.intitialCleanse(doc);
Elements divs = doc.getElementsByTag("div");
for (Element div : divs) {
baseCleaner.removeSingleSpaceTextNodes(div);
List<Node> children = div.childNodes();
if (baseCleaner.nodesContainConsecutiveBR(children)) {
Elements brs = div.getElementsByTag("br");
for (Element br : brs) {
br.replaceWith(new TextNode("LINEBREAK", null));
}
if (!baseCleaner.elementOnlyContainLink(div) && baseCleaner.elementHasPromisingIdentifier(div)) {
cleanText.append(div.text());
}
}
}
String cText = cleanText.toString();
return cText.replaceAll("LINEBREAK", "\r\n");
}
项目:ViTA
文件:PartsAndChaptersReviser.java
/**
* Creates new Epublines and detects HTML-Linebreaks in the text.
*
* @param chapter
* @param chapterElement
* @param mode
*/
private void addEpubline(List<Epubline> chapter, Element chapterElement, String mode) {
String writeNext = "";
List<TextNode> textNodes = chapterElement.textNodes();
int textIndex = 0;
for (Node node : chapterElement.childNodes()) {
if ("#text".equals(node.nodeName().trim())) {
// text node -> add test
writeNext = writeNext.concat(textNodes.get(textIndex).text());
textIndex++;
} else if ("br".equals(node.nodeName().trim())) {
// break -> make a new line
chapter.add(new Epubline(mode, writeNext, ""));
writeNext = "";
}
}
if (!"".equals(writeNext)) {
chapter.add(new Epubline(mode, writeNext, ""));
}
}
项目:Html2Rtf
文件:Html2RtfParser.java
/**
* maps the given elemen e to its rtf type. when the rtf element has
* subelement they are given in the childs array
*
* @param e
* the current html node for which a rtf element should be
* created
* @param childs
* the rtf child elements if any
* @return an rtf child element
*/
private Object getRtfTNode(Node node, ElementContainer childs) {
final String name = node.nodeName().toLowerCase();
Object ret = null;
if (node instanceof TextNode) {
ret = ((TextNode) node).text();
} else if (node instanceof Element) {
if (name.equals("p"))
return childs;
NodeHandler<ElementContainer, Object> handler = handlers.get(name);
//TODO better use a NoOpHandler that ignores the tag and log it?
if (handler == null)
throw new RuntimeException("WTF? Don't know this tag: " + name);
ret = handler.handle(childs);
}
return ret;
}
项目:Html2Rtf
文件:Rtf2Html.java
public Rtf2Html() {
super();
entries = new LinkedHashMap<TextNode, Element>();
parserItems = new ArrayList<RtfElementParser>();
org.jsoup.nodes.Document document = Jsoup.parse("<body></body>");
body = document.body();
// setup the handlers
parserItems.add(new FontElementParser("font"));
parserItems.add(new BooleanElementParser("b", StyleConstants.Bold));
parserItems.add(new BooleanElementParser("i", StyleConstants.Italic));
parserItems
.add(new BooleanElementParser("u", StyleConstants.Underline));
}
项目:Html2Rtf
文件:Rtf2Html.java
private void addToMap(Element element, Document document) {
int i = element.getStartOffset();
int j = element.getEndOffset();
String s;
try {
s = document.getText(i, j - i);
// if (s.trim().isEmpty())
// return;
org.jsoup.nodes.TextNode n = new org.jsoup.nodes.TextNode(s, "");
body.appendChild(n);
entries.put(n, element);
} catch (BadLocationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}