private void appendGames(Document document) { if (document != null) { Elements scriptElements = document.getElementsByTag("script"); Pattern pattern = Pattern.compile("window.espn.scoreboardData[\\s\t]*= (.*);.*window.espn.scoreboardSettings.*"); for (Element element : scriptElements) { for (DataNode node : element.dataNodes()) { if (node.getWholeData().startsWith("window.espn.scoreboardData")) { Matcher matcher = pattern.matcher(node.getWholeData()); if (matcher.matches()) { EspnJson espnJson = new Gson().fromJson(matcher.group(1), EspnJson.class); teamsList.putAll(espnJson.getTeams()); } } } } } }
@Test public void leagueStatusCheck() throws Exception { Document doc = Jsoup.connect("http://www.espn.com/wnba/scoreboard/_/group/50") .timeout(60 * 1000) .maxBodySize(0) .get(); Elements scriptElements = doc.getElementsByTag("script"); Pattern pattern = Pattern.compile("window.espn.scoreboardData[\\s\t]*= (.*);.*window.espn.scoreboardSettings.*"); for (Element element : scriptElements) { for (DataNode node : element.dataNodes()) { if (node.getWholeData().startsWith("window.espn.scoreboardData")) { Matcher matcher = pattern.matcher(node.getWholeData()); if (matcher.matches()) { Gson gson = new Gson(); EspnJson espnJson = new Gson().fromJson(matcher.group(1), EspnJson.class); System.out.println(espnJson.getTeams()); assertEquals(false, espnJson.getTeams().isEmpty()); } } } } }
@Override public void run() { Document parsedDocument = null; try { parsedDocument = Jsoup.connect(href).timeout(600 * 1000).get(); } catch (IOException e) { e.printStackTrace(); } Elements scriptElements = parsedDocument.getElementsByTag("script"); Pattern pattern = Pattern.compile(".*value\":\"(.*)\"\\},\\{\"name.*"); for (Element element : scriptElements) { for (DataNode node : element.dataNodes()) { Matcher matcher = pattern.matcher(node.getWholeData().replaceAll("\n", "")); if (matcher.matches()) { System.out.println(teamCity + "," + StringUtils.capitalize(teamName) + "," + matcher.group(1).toUpperCase()); } } } Thread.currentThread().interrupt(); }
public void head(Node source, int depth) { if (source instanceof Element) { Element sourceEl = (Element) source; if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs ElementMeta meta = createSafeElement(sourceEl); Element destChild = meta.el; destination.appendChild(destChild); numDiscarded += meta.numAttribsDiscarded; destination = destChild; } else if (source != root) { // not a safe tag, so don't add. don't count root against discarded. numDiscarded++; } } else if (source instanceof TextNode) { TextNode sourceText = (TextNode) source; TextNode destText = new TextNode(sourceText.getWholeText()); destination.appendChild(destText); } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) { DataNode sourceData = (DataNode) source; DataNode destData = new DataNode(sourceData.getWholeData()); destination.appendChild(destData); } else { // else, we don't care about comments, xml proc instructions, etc numDiscarded++; } }
public void head(Node source, int depth) { if (skipChildren) { return; } if (source instanceof Element) { Element sourceElement = (Element) source; if (isSafeTag(sourceElement)) { String sourceTag = sourceElement.tagName(); Attributes destinationAttributes = sourceElement.attributes().clone(); Element destinationChild = new Element(Tag.valueOf(sourceTag), sourceElement.baseUri(), destinationAttributes); destination.appendChild(destinationChild); destination = destinationChild; } else if (source != root) { skipChildren = true; } } else if (source instanceof TextNode) { TextNode sourceText = (TextNode) source; TextNode destinationText = new TextNode(sourceText.getWholeText(), source.baseUri()); destination.appendChild(destinationText); } else if (source instanceof DataNode && isSafeTag(source.parent())) { DataNode sourceData = (DataNode) source; DataNode destinationData = new DataNode(sourceData.getWholeData(), source.baseUri()); destination.appendChild(destinationData); } }
private void appendGames(Document scoreBoardDocument, HashMap<Status, List<Competitor>> hashMap) { Elements scriptElements = scoreBoardDocument.getElementsByTag("script"); Pattern pattern = Pattern.compile("window.espn.scoreboardData[\\s\t]*= (.*);.*window.espn.scoreboardSettings.*"); for (Element element : scriptElements) { for (DataNode node : element.dataNodes()) { if (node.getWholeData().startsWith("window.espn.scoreboardData")) { Matcher matcher = pattern.matcher(node.getWholeData()); if (matcher.matches()) { EspnJson espnJson = new Gson().fromJson(matcher.group(1), EspnJson.class); hashMap.putAll(espnJson.getStatus()); } } } } }
public static void appendReplacement(Matcher matcher, Node node, String replacement) { StringBuffer buffer = new StringBuffer(); matcher.appendReplacement(buffer, ""); if (buffer.length() != 0) node.before(new TextNode(buffer.toString(), node.baseUri())); node.before(new DataNode(replacement, node.baseUri())); }
public static String getLocationFromText(String text) { if (text == null) { return null; } String location = null; String strWZxxxxxx = null; String strMsg = null; Document doc = Jsoup.parse(text); Elements scriptTags = doc.getElementsByTag("script"); String nodeStr = null; for (Element tag : scriptTags) { for (DataNode node : tag.dataNodes()) { nodeStr = node.getWholeData(); System.out.println("node:" + node); } } for (String str : nodeStr.split("\n")) { System.out.println("str:" + str); if (str.contains("strWZxxxxxx") && str.contains("|")) { strWZxxxxxx = ((str.split("\\|"))[1].split("\""))[0]; System.out.println(strWZxxxxxx); } else if (str.contains("strMsg")) { strMsg = (str.split("\""))[1]; System.out.println(strMsg); break; } } if (strMsg != null&&!TextUtils.isEmpty(strMsg)) { location = strMsg; } else{ location = strWZxxxxxx; } return location; }
/** * Replace link tags with style tags in order to keep the same inclusion * order * * @param doc * the html document * @param cssContents * the list of external css files with their content */ private void internStyles(Document doc, List<ExternalCss> cssContents) { Elements els = doc.select(CSS_LINKS_SELECTOR); for (Element e : els) { if (!TRUE_VALUE.equals(e.attr(SKIP_INLINE))) { String path = e.attr(HREF_ATTR); Element style = new Element(Tag.valueOf(STYLE_TAG), ""); style.appendChild(new DataNode(getCss(cssContents, path), "")); e.replaceWith(style); } } }
@Override public void head(Node node, int depth) { if (node instanceof Element) { Element element = (Element) node; String tagName = element.tag().getName(); if (tagName.equals(HtmlTags.POLYMER_ELEMENT.getName())) { renameAttributesAttributeValue(element); } else if (tagName.equals("script")) { insideScriptElement = true; } else { renameAllAnnotatedEventAttributes(element); renameAllAttributeValues(element); } } else if (node instanceof TextNode) { TextNode textNode = (TextNode) node; textNode.text(renameStringWithDatabindingDirectives(textNode.getWholeText())); } else if (insideScriptElement && node instanceof DataNode) { DataNode dataNode = (DataNode) node; String js = dataNode.getWholeData(); try { js = JsRenamer.renameProperties(renameMap, js); } catch (JavaScriptParsingException e) { System.err.println(e); } dataNode.setWholeData(js); } }
@Override public void visit(Node node) { if (node instanceof TextNode || node instanceof Comment || node instanceof DataNode) { node.replaceWith(new TextNode(StringUtils.EMPTY, node.baseUri())); } }
@Override public void modifyBootstrapPage(BootstrapPageResponse response) { Document document = response.getDocument(); // Add the widgetsetUrl parameter to the bootstrap parameters. // This is overridden to avoid adding the naive random query // parameter (used by core to avoid caching of js file). final VaadinService service = response.getSession().getService(); final VaadinRequest request = response.getRequest(); final String staticFilePath = service .getStaticFileLocation(request); // VAADIN folder location final String vaadinDir = staticFilePath + "/VAADIN/"; // Figure out widgetset final UICreateEvent event = new UICreateEvent(request, response.getUiClass()); String widgetset = response.getUIProvider().getWidgetset(event); if (widgetset == null) { widgetset = request.getService() .getConfiguredWidgetset(request); } // Url for the widgetset final String widgetsetUrl = String.format( "%swidgetsets/%s/%s.nocache.js", vaadinDir, widgetset, widgetset); // Update the bootstrap page Element scriptTag = document.getElementsByTag("script").last(); String script = scriptTag.html(); scriptTag.html(""); script = script.replace("});", ",\"widgetsetUrl\":\"" + widgetsetUrl + "\",\"offlineEnabled\":" + isOfflineModeEnabled() + "});"); scriptTag.appendChild(new DataNode(script, scriptTag.baseUri())); if (isCacheManifestEnabled()) { // Add cache manifest attribute to html tag document.getElementsByTag("html").attr( "manifest", vaadinDir + "widgetsets/" + widgetset + "/" + generateManifestFileName(response)); } }