@Override public void head(Node node, int depth) { String name = node.nodeName(); if (node instanceof TextNode) { append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM. } else if (name.equals("ul")) { listNesting++; } else if (name.equals("li")) { append("\n "); for (int i = 1; i < listNesting; i++) { append(" "); } if (listNesting == 1) { append("* "); } else { append("- "); } } else if (name.equals("dt")) { append(" "); } else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) { append("\n"); } }
boolean inSelectScope(String targetName) { for (int pos = stack.size() -1; pos >= 0; pos--) { Element el = stack.get(pos); String elName = el.nodeName(); if (elName.equals(targetName)) return true; if (!StringUtil.in(elName, TagSearchSelectScope)) // all elements except return false; } Validate.fail("Should not be reachable"); return false; }
/** * Send HTTP GET request to {@link #endpointUrl}, updates {@link #csrfToken} * token * * @return true if {@link #endpointUrl} is accessible * @throws IOException * @throws ClientProtocolException * @throws AuthenticationException */ protected void fetchCsrfTokenFromHac() throws ClientProtocolException, IOException, AuthenticationException { final HttpGet getRequest = new HttpGet(getEndpointUrl()); try { final HttpResponse response = httpClient.execute(getRequest, getContext()); final String responseString = new BasicResponseHandler().handleResponse(response); csrfToken = getCsrfToken(responseString); if( StringUtil.isBlank(csrfToken) ) { throw new AuthenticationException(ErrorMessage.CSRF_TOKEN_CANNOT_BE_OBTAINED); } } catch (UnknownHostException error) { final String errorMessage = error.getMessage(); final Matcher matcher = HACPreferenceConstants.HOST_REGEXP_PATTERN.matcher(getEndpointUrl()); if (matcher.find() && matcher.group(1).equals(errorMessage)) { throw new UnknownHostException( String.format(ErrorMessage.UNKNOWN_HOST_EXCEPTION_MESSAGE_FORMAT, matcher.group(1))); } throw error; } }
/** * Prints script execution result to the console * * @param jsonResult * result of script import in JSON format. */ protected void displayScriptExecutionResult(final String jsonResult) { final JSONObject result = new JSONObject(jsonResult); final String output = result.getString(ScriptExecution.Response.OUPUT_KEY); final String stacktrace = result.getString(ScriptExecution.Response.STACK_TRACE_KEY); final String executionResult = result.getString(ScriptExecution.Response.RESULT_KEY); if (StringUtil.isBlank(stacktrace)) { ConsoleUtils.printMessage(RESULT_LABEL); ConsoleUtils.printMessage(executionResult); ConsoleUtils.printLine(); ConsoleUtils.printMessage(OUTPUT_LABEL); ConsoleUtils.printMessage(output); } else { ConsoleUtils.printError(stacktrace); ConsoleUtils.printMessage(output); } }
public void head(Node node, int depth) { String name = node.nodeName(); if (node instanceof TextNode) append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM. else if (name.equals("li")) append("\n * "); else if (name.equals("dt")) append(" "); else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) append("\n"); }
/** * Get a CSS selector that will uniquely select this element. * <p> * If the element has an ID, returns #id; * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'}, * followed by a unique selector for the element (tag.class.class:nth-child(n)). * </p> * * @return the CSS Path that can be used to retrieve the element in a selector. */ public String cssSelector() { if (id().length() > 0) return "#" + id(); // Translate HTML namespace ns:tag to CSS namespace syntax ns|tag String tagName = tagName().replace(':', '|'); StringBuilder selector = new StringBuilder(tagName); String classes = StringUtil.join(classNames(), "."); if (classes.length() > 0) selector.append('.').append(classes); if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node return selector.toString(); selector.insert(0, " > "); if (parent().select(selector.toString()).size() > 1) selector.append(String.format( ":nth-child(%d)", elementSiblingIndex() + 1)); return parent().cssSelector() + selector.toString(); }
@Test public void fetchHandlesXml() throws IOException { // should auto-detect xml and use XML parser, unless explicitly requested the html parser String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml"; Connection con = Jsoup.connect(xmlUrl); Document doc = con.get(); Connection.Request req = con.request(); assertTrue(req.parser().getTreeBuilder() instanceof XmlTreeBuilder); assertEquals("<xml> <link> one </link> <table> Two </table> </xml>", StringUtil.normaliseWhitespace(doc.outerHtml())); }
@Test public void handlesInvalidDoctypes() { // would previously throw invalid name exception on empty doctype Document doc = Jsoup.parse("<!DOCTYPE>"); assertEquals( "<!doctype> <html> <head></head> <body></body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml())); doc = Jsoup.parse("<!DOCTYPE><html><p>Foo</p></html>"); assertEquals( "<!doctype> <html> <head></head> <body> <p>Foo</p> </body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml())); doc = Jsoup.parse("<!DOCTYPE \u0000>"); assertEquals( "<!doctype �> <html> <head></head> <body></body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml())); }
public void tail(Node node, int depth) { String name = node.nodeName(); if (StringUtil.in(name, "br", "dd", "dt", "p", "h1", "h2", "h3", "h4", "h5")) append("\n"); else if (name.equals("a")) append(String.format(" <%s>", node.absUrl("href"))); }
private void append(String text) { if (text.startsWith("\n")) width = 0; // reset counter if starts with a newline. only from formats above, not in natural text if (text.equals(" ") && (accum.length() == 0 || StringUtil.in(accum.substring(accum.length() - 1), " ", "\n"))) return; // don't accumulate long runs of empty spaces if (text.length() + width > maxWidth) { // won't fit, needs to wrap String words[] = text.split("\\s+"); for (int i = 0; i < words.length; i++) { String word = words[i]; boolean last = i == words.length - 1; if (!last) // insert a space if not the last word word = word + " "; if (word.length() + width > maxWidth) { // wrap and reset counter accum.append("\n").append(word); width = word.length(); } else { accum.append(word); width += word.length(); } } } else { // fits as is, without need to wrap text accum.append(text); width += text.length(); } }
/** * Get a CSS selector that will uniquely select this element. * <p> * If the element has an ID, returns #id; * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'}, * followed by a unique selector for the element (tag.class.class:nth-child(n)). * </p> * * @return the CSS Path that can be used to retrieve the element in a selector. */ public String cssSelector() { if (id().length() > 0) return "#" + id(); StringBuilder selector = new StringBuilder(tagName()); String classes = StringUtil.join(classNames(), "."); if (classes.length() > 0) selector.append('.').append(classes); if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node return selector.toString(); selector.insert(0, " > "); if (parent().select(selector.toString()).size() > 1) selector.append(String.format( ":nth-child(%d)", elementSiblingIndex() + 1)); return parent().cssSelector() + selector.toString(); }
public static String trimQuotes(String str) { Validate.isTrue(str != null && str.length() > 0); String quote = str.substring(0, 1); if (StringUtil.in(quote, "\"", "'")) { Validate.isTrue(str.endsWith(quote), "Quote" + " for " + str + " is incomplete!"); str = str.substring(1, str.length() - 1); } return str; }
protected void showDefinition(ReviewData paramReviewData) { VocabularyData localVocabularyData = paramReviewData.getVocabulary(); int i = AppletUtil.getAppletStatus(mActivity, "collins"); if ((i == 2) || (i == 1)) { if (!StringUtil.isBlank(localVocabularyData.getEnDefn())) { String str1 = localVocabularyData.getEnDefn(); Matcher localMatcher = Pattern.compile("<vocab>(.*?)</vocab>").matcher(str1); String str2 = getBlank(this.mTvWordDefiniton, localVocabularyData.getContent()); while (localMatcher.find()) str1 = str1.replaceFirst(localMatcher.group(0), str2); mTvWordDefiniton.setText(str1.trim()); return; } mTvWordDefiniton.setText(localVocabularyData.getCnDefinition().trim()); return; } mTvWordDefiniton.setText(localVocabularyData.getCnDefinition().trim()); }
protected void showDefinition(ReviewData paramReviewData) { VocabularyData localVocabularyData = paramReviewData.getVocabulary(); if (this.mIsEnableCollins) { if (!StringUtil.isBlank(localVocabularyData.getEnDefn())) { String str1 = localVocabularyData.getEnDefn(); Matcher localMatcher = Pattern.compile("<vocab>(.*?)</vocab>").matcher(str1); String str2 = getBlank(this.mTvWordDefiniton, localVocabularyData.getContent()); while (localMatcher.find()) str1 = str1.replaceFirst(localMatcher.group(0), str2); this.mTvWordDefiniton.setText(str1.trim()); return; } this.mTvWordDefiniton.setText(StringUtils.trimToEmpty(localVocabularyData.getCnDefinition())); return; } this.mTvWordDefiniton.setText(StringUtils.trimToEmpty(localVocabularyData.getCnDefinition())); }
void popStackToClose(String... elNames) { Iterator<Element> it = stack.descendingIterator(); while (it.hasNext()) { Element next = it.next(); if (StringUtil.in(next.nodeName(), elNames)) { it.remove(); break; } else { it.remove(); } } }
@Override public Optional<BibEntry> performSearchById(String identifier) throws FetcherException { if (StringUtil.isBlank(identifier)) { return Optional.empty(); } this.ensureThatIsbnIsValid(identifier); IsbnViaEbookDeFetcher isbnViaEbookDeFetcher = new IsbnViaEbookDeFetcher(importFormatPreferences); Optional<BibEntry> bibEntry = isbnViaEbookDeFetcher.performSearchById(identifier); // nothing found at ebook.de, try chimbori.com if (!bibEntry.isPresent()) { LOGGER.debug("No entry found at ebook.de try chimbori.com"); IsbnViaChimboriFetcher isbnViaChimboriFetcher = new IsbnViaChimboriFetcher(importFormatPreferences); bibEntry = isbnViaChimboriFetcher.performSearchById(identifier); } return bibEntry; }
private void clearStackToContext(String... nodeNames) { Iterator<Element> it = stack.descendingIterator(); while (it.hasNext()) { Element next = it.next(); if (StringUtil.in(next.nodeName(), nodeNames) || next.nodeName().equals("html")) break; else it.remove(); } }
/** * Utility method to consume reader and unescape entities found within. * @param inAttribute * @return unescaped string from reader */ String unescapeEntities(boolean inAttribute) { StringBuilder builder = StringUtil.stringBuilder(); while (!reader.isEmpty()) { builder.append(reader.consumeTo('&')); if (reader.matches('&')) { reader.consume(); int[] c = consumeCharacterReference(null, inAttribute); if (c == null || c.length==0) builder.append('&'); else { builder.appendCodePoint(c[0]); if (c.length == 2) builder.appendCodePoint(c[1]); } } } return builder.toString(); }
private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) { for (int pos = stack.size() -1; pos >= 0; pos--) { Element el = stack.get(pos); String elName = el.nodeName(); if (StringUtil.in(elName, targetNames)) return true; if (StringUtil.in(elName, baseTypes)) return false; if (extraTypes != null && StringUtil.in(elName, extraTypes)) return false; } Validate.fail("Should not be reachable"); return false; }
public void addTargetRequests(List<String> requests, ResponseType responseType) { synchronized (targetRequests) { for (String url : requests) { if (isValidUrl(url)) { targetRequests.add(new Request(StringUtil.resolve(currentUrl, url), responseType)); } } } }
public void addTargetRequest(String url, ResponseType responseType) { if (isValidUrl(url)) { synchronized (targetRequests) { targetRequests.add(new Request(StringUtil.resolve(currentUrl, url), responseType)); } } }
public void head(Node node, int i) { String name = node.nodeName(); if (node instanceof TextNode) accum.append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM. else if (name.equals("li")) accum.append("\n * "); else if (name.equals("dt")) accum.append(" "); else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) accum.append("\n"); }
public void tail(Node node, int depth) { String name = node.nodeName(); if (StringUtil.in(name, "br", "dd", "dt", "p", "h1", "h2", "h3", "h4", "h5")) accum.append("\n"); }
@Override public List<String> selectList(Element element) { Elements elements = element.select("a"); List<String> links = new ArrayList<String>(elements.size()); for (Element element0 : elements) { if (!StringUtil.isBlank(element0.baseUri())) { links.add(element0.attr("abs:href")); } else { links.add(element0.attr("href")); } } return links; }
public static boolean isEmptyElement(Node node) { if (node == null) { return false; } if (node instanceof TextNode) { return StringUtil.isBlank(((TextNode) node).text()); } if (!(node instanceof Element)) { return false; } boolean isEmptyTag = ((Element) node).tag().isEmpty(); return !isEmptyTag && hasEmptyChidren(node); }