Java 类org.jsoup.nodes.Element 实例源码

项目:OpenEyesReading-android    文件:HttpApiImpl.java   
/**
 * 每日推荐
 *
 * @param str
 */
public List<IHistoryDailyPicks> takeDailyPick(String str) {
    Document document = Jsoup.parse(str);
    List<IHistoryDailyPicks> dailyPicksList = new ArrayList<>();
    Elements element = document.getElementsByClass("tuijian").get(0)
            .getElementsByClass("box");
    for (Element element2 : element) {
        IHistoryDailyPicks dailyPicks = new IHistoryDailyPicks();
        Element info = element2.getElementsByClass("info").get(0);
        dailyPicks.setTitle(info.getElementsByTag("a").text());// title
        String time = info.getElementsByClass("time").text().trim();
        dailyPicks.setTime(time.substring(0, time.length() - 1));// time
        dailyPicks.setDiscuss(info.getElementsByClass("pinglun").text());// Discuss
        dailyPicks.setDescribe(element2.getElementsByClass("info1").text());// Describe
        dailyPicks.setHref(AppUtils.Constants.URL_ILISHI + info.getElementsByTag("a").attr("href"));// Href
        dailyPicks.setImgHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("img").attr("src"));// imgHref
        dailyPicksList.add(dailyPicks);
    }
    return dailyPicksList;
}
项目:NewPipeExtractor    文件:SoundcloudParsingHelper.java   
public static String clientId() throws ReCaptchaException, IOException, RegexException {
    if (clientId != null && !clientId.isEmpty()) return clientId;

    Downloader dl = NewPipe.getDownloader();

    String response = dl.download("https://soundcloud.com");
    Document doc = Jsoup.parse(response);

    // TODO: Find a less heavy way to get the client_id
    // Currently we are downloading a 1MB file (!) just to get the client_id,
    // youtube-dl don't have a way too, they are just hardcoding and updating it when it becomes invalid.
    // The embed mode has a way to get it, but we still have to download a heavy file (~800KB).
    Element jsElement = doc.select("script[src^=https://a-v2.sndcdn.com/assets/app]").first();
    String js = dl.download(jsElement.attr("src"));

    return clientId = Parser.matchGroup1(",client_id:\"(.*?)\"", js);
}
项目:wulkanowy    文件:TeachersInfo.java   
public TeachersData getTeachersData() throws IOException {
    Document doc = snp.getSnPPageDocument(SCHOOL_PAGE_URL);
    Elements rows = doc.select(".mainContainer > table tbody tr");
    String description = doc.select(".mainContainer > p").first().text();

    List<Subject> subjects = new ArrayList<>();

    for (Element subject : rows) {
        subjects.add(new Subject()
                .setName(subject.select("td").get(1).text())
                .setTeachers(subject.select("td").get(2).text().split(", "))
        );
    }

    return new TeachersData()
            .setClassName(description.split(", ")[0].split(": ")[1].trim())
            .setClassTeacher(description.split("Wychowawcy:")[1].trim().split(", "))
            .setSubjects(subjects);
}
项目:OpenHub    文件:TopicsPresenter.java   
private ArrayList<Topic> getTopTopics(Document doc) throws Exception{
    ArrayList<Topic> topTopics = new ArrayList<>();
    Elements elements = doc.getElementsByClass("col-12 col-sm-6 col-md-4 mb-4");
    for (Element element : elements) {
        Element idElement = element.select("a").first();
        Element imageElement = element.select("a > img").first();
        Element titleElement = element.select("a > p").get(0);
        Element descElement = element.select("a > p").get(1);

        String id = idElement.attr("href");
        id = id.substring(id.lastIndexOf("/") + 1);
        String name = titleElement.textNodes().get(0).text();
        String desc = descElement.textNodes().get(0).text();
        String image = imageElement == null ? null : imageElement.attr("src");

        Topic topic = new Topic()
                .setId(id)
                .setName(name)
                .setDesc(desc)
                .setImage(image);
        topTopics.add(topic);
    }
    return topTopics;
}
项目:LushX    文件:PandaCrawler.java   
private void savePandaLivesToRedis(Document document) {
    List<Video> lives = new ArrayList<>();
    Elements elements = document.select("li.video-list-item.video-no-tag");
    for (Element element : elements) {
        Video videoDTO = new Video();
        String title = element.select("div.video-info span.video-nickname").text();
        String image = element.select("img.video-img").attr("data-original");
        image = image.replace("http:", "");
        String url = PANDA + element.attr("data-id");
        videoDTO.setTitle(title);
        videoDTO.setImage(image);
        videoDTO.setValue(url);
        lives.add(videoDTO);
        if (lives.size() > 48) {
            break;
        }
    }
    String key = redisSourceManager.VIDEO_PREFIx_HOME_LIVE_KEY + "_" + TAG;
    redisSourceManager.saveVideos(key, lives);
}
项目:NewPipeExtractor    文件:YoutubeStreamExtractor.java   
@Override
public StreamInfoItemCollector getRelatedVideos() throws IOException, ExtractionException {
    assertPageFetched();
    try {
        StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId());
        Element ul = doc.select("ul[id=\"watch-related\"]").first();
        if (ul != null) {
            for (Element li : ul.children()) {
                // first check if we have a playlist. If so leave them out
                if (li.select("a[class*=\"content-link\"]").first() != null) {
                    collector.commit(extractVideoPreviewInfo(li));
                }
            }
        }
        return collector;
    } catch (Exception e) {
        throw new ParsingException("Could not get related videos", e);
    }
}
项目:Blackboard    文件:HtmlUtils.java   
private static void removeUselessElements(Element element) {
    for (Element child : element.children()) {
        if (child.children().size() > 0)
            removeUselessElements(child);
        else {
            switch (child.tagName()) {
                case "br":
                case "a":
                case "p":
                case "h1":
                case "h2":
                case "h3":
                case "h4":
                case "span":
                    break;
                default:
                    Element parent = child.parent();
                    child.remove();
                    parent.insertChildren(0, child.children());
                    break;
            }
        }
    }
}
项目:jspider    文件:JsoupParser.java   
private Object getValueText(Elements elements, JsoupExpression jsoupExpression) {
    if (elements == null || elements.isEmpty()) {
        return null;
    }
    Element element = elements.get(0);

    if (jsoupExpression.isTextMethod()) {
        return StringUtils.trim(element.text());
    }
    if (jsoupExpression.isValMethod()) {
        return StringUtils.trim(element.val());
    }
    if (jsoupExpression.isAttrMethod()) {
        return StringUtils.trim(element.attr(jsoupExpression.getParameter()));
    }
    if (jsoupExpression.isOuterHtmlMethod()) {
        return StringUtils.trim(element.outerHtml());
    }
    if (jsoupExpression.isOwnTextMethod()) {
        return StringUtils.trim(element.ownText());
    }
    if (jsoupExpression.isHtmlMethod()) {
        return StringUtils.trim(element.html());
    }
    return StringUtils.trim(element.text());
}
项目:Android-Scrapper    文件:ExampleUnitTest.java   
@Test
public void leagueStatusCheck() throws Exception {
    Document doc = Jsoup.connect("http://www.espn.com/wnba/scoreboard/_/group/50")
            .timeout(60 * 1000)
            .maxBodySize(0)
            .get();
    Elements scriptElements = doc.getElementsByTag("script");
    Pattern pattern = Pattern.compile("window.espn.scoreboardData[\\s\t]*= (.*);.*window.espn.scoreboardSettings.*");
    for (Element element : scriptElements) {
        for (DataNode node : element.dataNodes()) {
            if (node.getWholeData().startsWith("window.espn.scoreboardData")) {
                Matcher matcher = pattern.matcher(node.getWholeData());
                if (matcher.matches()) {
                    Gson gson = new Gson();
                    EspnJson espnJson = new Gson().fromJson(matcher.group(1), EspnJson.class);
                    System.out.println(espnJson.getTeams());
                    assertEquals(false, espnJson.getTeams().isEmpty());
                }
            }
        }
    }
}
项目:Android-Scrapper    文件:LeagueBase.java   
private Game constructGameFromHtmlBlock(Element currentHtmlBlock) {
    Game gameFromHtmlBlock = DefaultFactory.Game.constructDefault();
    gameFromHtmlBlock.setScoreType(getScoreType());
    gameFromHtmlBlock.setLeagueType(this);
    Elements updatedHtmlBlocks = currentHtmlBlock.select("td");
    boolean once = true;
    int position = 0;
    for (Element currentColumnBlock : updatedHtmlBlocks) {
        if (once) {
            once = false;
            createGameInfo(Jsoup.parse(currentColumnBlock.html().replaceAll("(?i)<br[^>]*>", "br2n")).text(), gameFromHtmlBlock);

        } else {
            createBidInfo(Jsoup.parse(currentColumnBlock.html().replaceAll("(?i)<br[^>]*>", "br2n")).text(), gameFromHtmlBlock, position == 2);
        }
        position++;
    }
    gameFromHtmlBlock.setVIBid();
    gameFromHtmlBlock.createID();
    return gameFromHtmlBlock;
}
项目:KBUnitTest    文件:ShadowResources.java   
/**
 * 获取strings.xml 资源名-值 映射表
 *
 * @return
 */
protected Map<String, String> getStringResNameAndValueMap() {
    Map<String, String> map = new HashMap<>();

    Document document = getValuesXmlDocument();
    Elements strings  = document.getElementsByTag("string");

    for (int i = 0; i < strings.size(); i++) {
        Element element = strings.get(i);
        String  name    = element.attr("name");

        if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) {
            String text = ((TextNode) element.childNode(0)).text();

            map.put(name, text);
        }
    }

    return map;
}
项目:Mod-Tools    文件:RemoteModParser.java   
protected String clean(Element element) {
    Element work = element.clone();
    String[] unwantedElements = "img,script,style,link,canvas".split(",");
    String[] unwantedAttributes = "style,class,target,id,src".split(",");
    for (String tag : unwantedElements) {
        work.getElementsByTag(tag).forEach((el) -> {
            el.remove();
        });
    }
    for (String attr : unwantedAttributes) {
        work.getElementsByAttribute(attr).forEach((el) -> {
            el.removeAttr(attr);
        });
    }
    return work.html().replaceAll("\\s{2,}", " ");
}
项目:ripme    文件:FlickrRipper.java   
/**
 * Login to Flickr.
 * @return Cookies for logged-in session
 * @throws IOException
 */
@SuppressWarnings("unused")
private Map<String,String> signinToFlickr() throws IOException {
    Response resp = Jsoup.connect("http://www.flickr.com/signin/")
                        .userAgent(USER_AGENT)
                        .followRedirects(true)
                        .method(Method.GET)
                        .execute();
    Document doc = resp.parse();
    Map<String,String> postData = new HashMap<>();
    for (Element input : doc.select("input[type=hidden]")) {
        postData.put(input.attr("name"),  input.attr("value"));
    }
    postData.put("passwd_raw",  "");
    postData.put(".save",   "");
    postData.put("login",   new String(Base64.decode("bGVmYWtlZGVmYWtl")));
    postData.put("passwd",  new String(Base64.decode("MUZha2V5ZmFrZQ==")));
    String action = doc.select("form[method=post]").get(0).attr("action");
    resp = Jsoup.connect(action)
                .cookies(resp.cookies())
                .data(postData)
                .method(Method.POST)
                .execute();
    return resp.cookies();
}
项目:xxl-crawler    文件:XxlCrawlerTest.java   
public static void main(String[] args) {

        XxlCrawler crawler = new XxlCrawler.Builder()
                .setUrls("https://my.oschina.net/xuxueli/blog")
                .setWhiteUrlRegexs("https://my\\.oschina\\.net/xuxueli/blog/\\d+")
                .setThreadCount(3)
                .setPageParser(new PageParser<PageVo>() {
                    @Override
                    public void parse(Document html, Element pageVoElement, PageVo pageVo) {
                        // 解析封装 PageVo 对象
                        String pageUrl = html.baseUri();
                        System.out.println(pageUrl + ":" + pageVo.toString());
                    }
                })
                .build();

        System.out.println("start");
        crawler.start(true);
        System.out.println("end");
    }
项目:PicKing    文件:Meizi4493.java   
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "gb2312"));
    PicInfo info = new PicInfo();
    Elements elements = document.select("div.picsbox p img");
    if (elements.size() > 0)
        info.setPicUrl(elements.get(0).attr("src"));
    Elements title = document.select(".picmainer h1");
    if (title.size() > 0)
        info.setTitle(title.text());
    Elements tags = document.select(".pleft a");
    if (tags.size() > 0) {
        List<String> tagList = new ArrayList<>();
        for (Element element : tags)
            tagList.add(element.text());
        info.setTags(tagList);
    }
    urls.add(info);
    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
项目:StubbornJava    文件:ThemeForestScraper.java   
public static List<HtmlCssTheme> popularThemes() {
    HttpUrl url = HttpUrl.parse(POPULAR_THEMES_URL);
    Request request = new Request.Builder().url(url).get().build();
    String html = Retry.retryUntilSuccessfulWithBackoff(
        () -> client.newCall(request).execute()
    );

    Elements elements = Jsoup.parse(html).select("script");
    Element script = Seq.seq(elements)
                        .filter(e -> {

                            return e.html().startsWith("window.INITIAL_STATE=");
                        })
                        .findFirst().orElse(null);
    String rawJson = script.html().substring("window.INITIAL_STATE=".length());
    JsonNode node = Json.serializer().nodeFromJson(rawJson);
    return Seq.seq(node.path("searchPage").path("results").path("matches"))
              .map(ThemeForestScraper::themeFromElement)
              .toList();

                                    //.map(ThemeForestScraper::themeFromElement).toList();

}
项目:ripme    文件:ImagebamRipper.java   
private void fetchImage() {
    try {
        Document doc = Http.url(url).get();
        // Find image
        Elements images = doc.select(".image-container img");
        if (images.size() == 0) {
            logger.warn("Image not found at " + this.url);
            return;
        }
        Element image = images.first();
        String imgsrc = image.attr("src");
        logger.info("Found URL " + imgsrc);
        // Provide prefix and let the AbstractRipper "guess" the filename
        String prefix = "";
        if (Utils.getConfigBoolean("download.save_order", true)) {
            prefix = String.format("%03d_", index);
        }
        addURLToDownload(new URL(imgsrc), prefix);
    } catch (IOException e) {
        logger.error("[!] Exception while loading/parsing " + this.url, e);
    }
}
项目:LiteReader    文件:StaffDetailSummaryViewModel.java   
private String extractContent(String url) {
    String summary = "";
    try {
        Document doc = Jsoup.connect(url).get();
        Element element = doc.getElementById("intro").child(1);
        if (element.childNodeSize() == 2) {
            Element target = element.getElementsByClass("hidden").get(0);
            summary = target.text();
        } else {
            summary = element.text();
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return summary;
}
项目:ChatExchange-old    文件:ChatFragment.java   
private void processMessageViews(URL url, String html)
{
    Document document = Jsoup.parse(html);
    Elements elements = document.select("user-container");

    for (Element e : elements)
    {
        Elements link = e.select("a");
        Element signature = new Element("");

        for (Element e1 : link)
        {
            if (e1.hasAttr("class") && e1.attr("class").equals("signature"))
            {
                signature = e1;
                break;
            }
        }

    }
}
项目:BackOffice    文件:TestDataController.java   
private List<Map<String, String>> getParsedData(Content content){
    List<Map<String,String >> testData = new ArrayList<>();
    JSONObject jsonObject = new JSONObject(content.toString());
    JSONObject body = (JSONObject) jsonObject.get("body");
    JSONObject storage = (JSONObject) body.get("storage");
    Document doc = Jsoup.parse(storage.get("value").toString());
    Elements tRows = doc.select("tr");
    LOG.info("Парсим данные страницы");
    for (Element row : tRows){
        HashMap<String,String> rowData = new HashMap<>();
        Elements tds = row.select("td[colspan!="+countColumns+"]");
        if (tds.size()!=0 && (!tds.get(0).text().equals(""))){
            rowData.put("dKey",tds.get(0).text());
            rowData.put("dValue",tds.get(1).text());
            rowData.put("dComment",tds.get(2).text());
            rowData.put("dHltValue",tds.get(3).text());
            rowData.put("dDevValue",tds.get(4).text());
            testData.add(rowData);
        }
        LOG.debug(row.text());
    }
    return testData;
}
项目:zhkuas_ssm_maven    文件:CourseBasicalInfoAnalysiser.java   
@Override
public List doAnalysis(String html) {
    List<Course> list = new ArrayList<Course>();
    Element element = null;
    Elements options = null;
    element=HTMLUtil.getSelectorByName(html, Constants.HTML_ELEMENT_NAME.SELECT_NAME_COURSE.getValue());
    if (element != null) {
        options = element.children();
        Course course = null;
        for (Element e : options) {
            if (e.attr("value").equals(""))
                continue;
            course = new Course();
            course.setName(HTMLUtil.cutName(e.text()));
            course.setNo(e.attr("value"));
            course.setNameNo(HTMLUtil.cutNameNo(e.text()));
            list.add(course);
        }
    }
    return list;
}
项目:PicKing    文件:XiuMM.java   
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));

    Elements title = document.select("div.album_desc div.inline");
    String sTitle = "";
    if (title.size() > 0)
        sTitle = title.get(0).text();

    Elements elements = document.select(".gallary_item .pic_box img");
    for (Element element : elements) {
        urls.add(new PicInfo(baseUrl + element.attr("src")).setTitle(sTitle));
    }
    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
项目:NFLFantasyAnalyzer    文件:WebScraper.java   
public void populatePointsGivenToRB(Team team) {
    int rankCounter = 0;
        Element rbPointsAllowedTable = rbPointsAllowedURL.select("table").get(1);
        Elements rbPointsAllowedRows = rbPointsAllowedTable.select("tr");

        for (int i = 2; i < rbPointsAllowedRows.size(); i++) {
                Element row = rbPointsAllowedRows.get(i);
                Elements cols = row.select("td");
                rankCounter++;

                if(cols.get(0).text().contains(team.getName())) {
                    if (cols.get(2).text().contains("*")) {
                    team.setFpToRBRank(rankCounter);
                    team.setFpToRBAvg(Double.parseDouble(cols.get(18).text()));
                    break;
                } else {
                    team.setFpToRBRank(rankCounter);
                    team.setFpToRBAvg(Double.parseDouble(cols.get(19).text()));
                    break;
                }
                }
        }
}
项目:JsoupSample    文件:YingTaoJsoupManager.java   
public List<MagneticModel> getList() {
    List<MagneticModel> listModels = new ArrayList<>();
    MagneticModel magneticModel;
    Elements a = document.select("div.r");
    int size = a.size();
    for (int i = 0; i < size; i++) {
        if (i != size - 1) {
            Element element = a.get(i);
            magneticModel = new MagneticModel();
            magneticModel.title = element.select("a[class]").text();
            magneticModel.url = element.select("a:not(.link)").attr("href");
            listModels.add(magneticModel);
        }
    }
    return listModels;
}
项目:Xndroid    文件:OutputFormatter.java   
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
    for (Node child : e.childNodes()) {
        if (unlikely(child)) {
            continue;
        }
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            String txt = textNode.text();
            accum.append(txt);
        } else if (child instanceof Element) {
            Element element = (Element) child;
            if (accum.length() > 0 && element.isBlock()
                    && !lastCharIsWhitespace(accum))
                accum.append(' ');
            else if (element.tagName().equals("br"))
                accum.append(' ');
            appendTextSkipHidden(element, accum, indent + 1);
        }
    }
}
项目:calendar-component    文件:Calendar.java   
@Override
public void writeDesign(Element design, DesignContext designContext) {
    super.writeDesign(design, designContext);

    if (currentTimeFormat != null) {
        design.attr("time-format", currentTimeFormat == TimeFormat.Format12H ? "12h" : "24h");
    }
    if (startDate != null) {
        design.attr("start-date", DATE_FORMAT.format(getStartDate()));
    }
    if (endDate != null) {
        design.attr("end-date", DATE_FORMAT.format(getEndDate()));
    }
    if (!getZoneId().equals(ZoneId.systemDefault())) {
        design.attr("time-zone", getZoneId().getId());
    }
}
项目:ripme    文件:PahealRipper.java   
@Override
public List<String> getURLsFromPage(Document page) {
    Elements elements = page.select(".shm-thumb.thumb>a").not(".shm-thumb-link");
    List<String> res = new ArrayList<>(elements.size());

    for (Element e : elements) {
        res.add(e.absUrl("href"));
    }

    return res;
}
项目:SeleniumTest    文件:Functions.java   
/**
 * 获取全部节点的内部的html
 * @param context
 * @return
 */
public List<JXNode> html(Elements context){
    List<JXNode> res = new LinkedList<JXNode>();
    if (context!=null&&context.size()>0){
        for (Element e:context){
            res.add(JXNode.t(e.html()));
        }
    }
    return res;
}
项目:Xndroid    文件:ArticleTextExtractor.java   
private int calcWeight(Element e) {
    int weight = 0;
    if (POSITIVE.matcher(e.className()).find())
        weight += 35;

    if (POSITIVE.matcher(e.id()).find())
        weight += 45;

    if (UNLIKELY.matcher(e.className()).find())
        weight -= 20;

    if (UNLIKELY.matcher(e.id()).find())
        weight -= 20;

    if (NEGATIVE.matcher(e.className()).find())
        weight -= 50;

    if (NEGATIVE.matcher(e.id()).find())
        weight -= 50;

    String style = e.attr("style");
    if (style != null && !style.isEmpty() && NEGATIVE_STYLE.matcher(style).find())
        weight -= 50;

    String itemprop = e.attr("itemprop");
    if (itemprop != null && !itemprop.isEmpty() && POSITIVE.matcher(itemprop).find()) {
        weight += 100;
    }

    return weight;
}
项目:ripme    文件:NhentaiRipper.java   
@Override
public List<String> getURLsFromPage(Document page) {
    List<String> imageURLs = new ArrayList<>();
    Elements thumbs = page.select(".gallerythumb");
    for (Element el : thumbs) {
        String imageUrl = el.attr("href");
        imageURLs.add("https://nhentai.net" + imageUrl);
    }
    return imageURLs;
}
项目:ZhihuQuestionsSpider    文件:KuaidailiProxySite.java   
@Override
public List<Proxy> parseProxys(String content) {
    Document doc = Jsoup.parse(content);
    Elements elements = doc.select("div#list table tbody tr");
    List<Proxy> proxyList = new ArrayList<>();
    for(Element tr : elements){
        Elements tds = tr.children();
        String ip = tds.get(0).text().trim();
        Integer port = Integer.parseInt(tds.get(1).text());
        proxyList.add(new Proxy(ip,port));
    }
    return proxyList;
}
项目:OpenEyesReading-android    文件:HttpApiImpl.java   
/**
 * 推荐阅读
 *
 * @param str
 */
public List<IHistoryOldPhoto> takeProposeRead(String str) {
    Document document = Jsoup.parse(str);
    List<IHistoryOldPhoto> proposeReads = new ArrayList<>();
    Elements elements = document.getElementsByClass("oldpic");
    Elements element = elements.get(0).getElementsByTag("li");
    for (Element element2 : element) {
        IHistoryOldPhoto proposeRead = new IHistoryOldPhoto();
        proposeRead.setTitle(element2.getElementsByTag("img").attr("title"));//title
        proposeRead.setHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("a").attr("href"));//href
        proposeRead.setImgHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("img").attr("src"));//imgHref
        proposeReads.add(proposeRead);
    }
    return proposeReads;
}
项目:NetDiscovery    文件:CssSelector.java   
@Override
public Element selectElement(Element element) {
    Elements elements = element.select(selectorText);
    if (Preconditions.isNotBlank(elements)) {
        return elements.get(0);
    }
    return null;
}
项目:eadlsync    文件:YStatementSeItemHelper.java   
private static Element getSeItemContentBody(SeItem item) {
    Document doc;
    Element body = null;
    try {
        doc = Jsoup.connect(item.getId().toString()).get();
        body = doc.body();
    } catch (IOException e) {
        LOG.debug("Failed to parse se-item '{}'", item.getName());
    }
    return body;
}
项目:case-html-data-gather    文件:HTMLDataGather.java   
private List<DuYinDM> gatherDuyins(Element contentEL)throws Exception{
    Elements elements=contentEL.select("p");
    DuYinDM dm=null;
    List<DuYinDM> results=new ArrayList<DuYinDM>(3);
    for (Element p : elements) {
        if(p.children().isEmpty())continue;
        Element firstChild=p.child(0);
        if("span".equals(firstChild.tagName())){
            if(firstChild.hasClass("dicpy")){
                if(dm!=null){
                    results.add(dm);
                }
                dm=new DuYinDM();
                String duyin=firstChild.text();
                dm.setDuyin(duyin);
            }
        }else   if("em".equals(firstChild.tagName())){

            StringBuilder ziyi=new StringBuilder();
            Node next=firstChild.nextSibling();
            while(next!=null){
                if(next instanceof TextNode){
                    ziyi.append(((TextNode) next).text());
                }else if(next instanceof Element){
                    ziyi.append(((Element) next).text());
                }
                next=next.nextSibling();
            }
            dm.addZiyi(ziyi.toString());
        }
    }
    if(dm!=null){
        results.add(dm);
    }
    return results;
}
项目:ripme    文件:Hentai2readRipper.java   
@Override
public List<String> getURLsFromPage(Document doc) {
    List<String> result = new ArrayList<String>();
    for (Element el : doc.select("div.block-content > div > div.img-container > a > img.img-responsive")) {
        String imageURL = "https:" + el.attr("src");
        imageURL = imageURL.replace("hentaicdn.com", "static.hentaicdn.com");
        imageURL = imageURL.replace("thumbnails/", "");
        imageURL = imageURL.replace("tmb", "");
        result.add(imageURL);
    }
        return result;
}
项目:SeleniumTest    文件:AxisSelector.java   
/**
 * 节点后面的全部同胞节点following-sibling
 * @param e
 * @return
 */
public Elements followingSibling(Element e){
    Elements rs = new Elements();
    Element tmp = e.nextElementSibling();
    while (tmp!=null){
        rs.add(tmp);
        tmp = tmp.nextElementSibling();
    }
    return rs;
}
项目:jsoup-annotations    文件:JsoupProcessor.java   
/**
 * Extract first element according to a query
 */
private static Element element(Element container, String query) {

    Elements select = container.select(query);

    if (select.size() == 0) {
        throw new ElementNotFoundException(query);
    }

    return select.first();
}
项目:sipsoup    文件:CacheCSSFunction.java   
public boolean matches(Element root, Element element) {
    if (root == element)
        return false;

    Element parent = element.parent();
    while (true) {
        if (evaluator.matches(root, parent))
            return true;
        if (parent == root)
            break;
        parent = parent.parent();
    }
    return false;
}
项目:ScriptSpider    文件:ContentExtractor.java   
private static String GetDivContent(Element div) {
    StringBuilder sb = new StringBuilder();
    //考虑div里标签内容的顺序,对div子树进行深度优先搜索
    Stack<Element> sk = new Stack<Element>();
    sk.push(div);
    while (!sk.empty()) {
        //
        Element e = sk.pop();
        //对于div中的div过滤掉
        if (e != div && e.tagName().equals("div")) continue;
        //考虑正文被包含在p标签中的情况,并且p标签里不能含有a标签
        if (e.tagName().equals("p") && e.getElementsByTag("a").size() == 0) {
            String className = e.className();
            if (className.length() != 0 && className.equals("pictext")) continue;
            sb.append(e.text());
            sb.append("\n");
            continue;
        } else if (e.tagName().equals("td")) {
            //考虑正文被包含在td标签中的情况
            if (e.getElementsByTag("div").size() != 0) continue;
            sb.append(e.text());
            sb.append("\n");
            continue;

        }
        //将孩子节点加入栈中
        Elements children = e.children();
        for (int i = children.size() - 1; i >= 0; i--) {
            sk.push((Element) children.get(i));
        }
    }

    return sb.toString();
}