Java 类org.jsoup.nodes.Element 实例源码
项目:OpenEyesReading-android
文件:HttpApiImpl.java
/**
* 每日推荐
*
* @param str
*/
public List<IHistoryDailyPicks> takeDailyPick(String str) {
Document document = Jsoup.parse(str);
List<IHistoryDailyPicks> dailyPicksList = new ArrayList<>();
Elements element = document.getElementsByClass("tuijian").get(0)
.getElementsByClass("box");
for (Element element2 : element) {
IHistoryDailyPicks dailyPicks = new IHistoryDailyPicks();
Element info = element2.getElementsByClass("info").get(0);
dailyPicks.setTitle(info.getElementsByTag("a").text());// title
String time = info.getElementsByClass("time").text().trim();
dailyPicks.setTime(time.substring(0, time.length() - 1));// time
dailyPicks.setDiscuss(info.getElementsByClass("pinglun").text());// Discuss
dailyPicks.setDescribe(element2.getElementsByClass("info1").text());// Describe
dailyPicks.setHref(AppUtils.Constants.URL_ILISHI + info.getElementsByTag("a").attr("href"));// Href
dailyPicks.setImgHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("img").attr("src"));// imgHref
dailyPicksList.add(dailyPicks);
}
return dailyPicksList;
}
项目:NewPipeExtractor
文件:SoundcloudParsingHelper.java
public static String clientId() throws ReCaptchaException, IOException, RegexException {
if (clientId != null && !clientId.isEmpty()) return clientId;
Downloader dl = NewPipe.getDownloader();
String response = dl.download("https://soundcloud.com");
Document doc = Jsoup.parse(response);
// TODO: Find a less heavy way to get the client_id
// Currently we are downloading a 1MB file (!) just to get the client_id,
// youtube-dl don't have a way too, they are just hardcoding and updating it when it becomes invalid.
// The embed mode has a way to get it, but we still have to download a heavy file (~800KB).
Element jsElement = doc.select("script[src^=https://a-v2.sndcdn.com/assets/app]").first();
String js = dl.download(jsElement.attr("src"));
return clientId = Parser.matchGroup1(",client_id:\"(.*?)\"", js);
}
项目:wulkanowy
文件:TeachersInfo.java
public TeachersData getTeachersData() throws IOException {
Document doc = snp.getSnPPageDocument(SCHOOL_PAGE_URL);
Elements rows = doc.select(".mainContainer > table tbody tr");
String description = doc.select(".mainContainer > p").first().text();
List<Subject> subjects = new ArrayList<>();
for (Element subject : rows) {
subjects.add(new Subject()
.setName(subject.select("td").get(1).text())
.setTeachers(subject.select("td").get(2).text().split(", "))
);
}
return new TeachersData()
.setClassName(description.split(", ")[0].split(": ")[1].trim())
.setClassTeacher(description.split("Wychowawcy:")[1].trim().split(", "))
.setSubjects(subjects);
}
项目:OpenHub
文件:TopicsPresenter.java
private ArrayList<Topic> getTopTopics(Document doc) throws Exception{
ArrayList<Topic> topTopics = new ArrayList<>();
Elements elements = doc.getElementsByClass("col-12 col-sm-6 col-md-4 mb-4");
for (Element element : elements) {
Element idElement = element.select("a").first();
Element imageElement = element.select("a > img").first();
Element titleElement = element.select("a > p").get(0);
Element descElement = element.select("a > p").get(1);
String id = idElement.attr("href");
id = id.substring(id.lastIndexOf("/") + 1);
String name = titleElement.textNodes().get(0).text();
String desc = descElement.textNodes().get(0).text();
String image = imageElement == null ? null : imageElement.attr("src");
Topic topic = new Topic()
.setId(id)
.setName(name)
.setDesc(desc)
.setImage(image);
topTopics.add(topic);
}
return topTopics;
}
项目:LushX
文件:PandaCrawler.java
private void savePandaLivesToRedis(Document document) {
List<Video> lives = new ArrayList<>();
Elements elements = document.select("li.video-list-item.video-no-tag");
for (Element element : elements) {
Video videoDTO = new Video();
String title = element.select("div.video-info span.video-nickname").text();
String image = element.select("img.video-img").attr("data-original");
image = image.replace("http:", "");
String url = PANDA + element.attr("data-id");
videoDTO.setTitle(title);
videoDTO.setImage(image);
videoDTO.setValue(url);
lives.add(videoDTO);
if (lives.size() > 48) {
break;
}
}
String key = redisSourceManager.VIDEO_PREFIx_HOME_LIVE_KEY + "_" + TAG;
redisSourceManager.saveVideos(key, lives);
}
项目:NewPipeExtractor
文件:YoutubeStreamExtractor.java
@Override
public StreamInfoItemCollector getRelatedVideos() throws IOException, ExtractionException {
assertPageFetched();
try {
StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId());
Element ul = doc.select("ul[id=\"watch-related\"]").first();
if (ul != null) {
for (Element li : ul.children()) {
// first check if we have a playlist. If so leave them out
if (li.select("a[class*=\"content-link\"]").first() != null) {
collector.commit(extractVideoPreviewInfo(li));
}
}
}
return collector;
} catch (Exception e) {
throw new ParsingException("Could not get related videos", e);
}
}
项目:Blackboard
文件:HtmlUtils.java
private static void removeUselessElements(Element element) {
for (Element child : element.children()) {
if (child.children().size() > 0)
removeUselessElements(child);
else {
switch (child.tagName()) {
case "br":
case "a":
case "p":
case "h1":
case "h2":
case "h3":
case "h4":
case "span":
break;
default:
Element parent = child.parent();
child.remove();
parent.insertChildren(0, child.children());
break;
}
}
}
}
项目:jspider
文件:JsoupParser.java
private Object getValueText(Elements elements, JsoupExpression jsoupExpression) {
if (elements == null || elements.isEmpty()) {
return null;
}
Element element = elements.get(0);
if (jsoupExpression.isTextMethod()) {
return StringUtils.trim(element.text());
}
if (jsoupExpression.isValMethod()) {
return StringUtils.trim(element.val());
}
if (jsoupExpression.isAttrMethod()) {
return StringUtils.trim(element.attr(jsoupExpression.getParameter()));
}
if (jsoupExpression.isOuterHtmlMethod()) {
return StringUtils.trim(element.outerHtml());
}
if (jsoupExpression.isOwnTextMethod()) {
return StringUtils.trim(element.ownText());
}
if (jsoupExpression.isHtmlMethod()) {
return StringUtils.trim(element.html());
}
return StringUtils.trim(element.text());
}
项目:Android-Scrapper
文件:ExampleUnitTest.java
@Test
public void leagueStatusCheck() throws Exception {
Document doc = Jsoup.connect("http://www.espn.com/wnba/scoreboard/_/group/50")
.timeout(60 * 1000)
.maxBodySize(0)
.get();
Elements scriptElements = doc.getElementsByTag("script");
Pattern pattern = Pattern.compile("window.espn.scoreboardData[\\s\t]*= (.*);.*window.espn.scoreboardSettings.*");
for (Element element : scriptElements) {
for (DataNode node : element.dataNodes()) {
if (node.getWholeData().startsWith("window.espn.scoreboardData")) {
Matcher matcher = pattern.matcher(node.getWholeData());
if (matcher.matches()) {
Gson gson = new Gson();
EspnJson espnJson = new Gson().fromJson(matcher.group(1), EspnJson.class);
System.out.println(espnJson.getTeams());
assertEquals(false, espnJson.getTeams().isEmpty());
}
}
}
}
}
项目:Android-Scrapper
文件:LeagueBase.java
private Game constructGameFromHtmlBlock(Element currentHtmlBlock) {
Game gameFromHtmlBlock = DefaultFactory.Game.constructDefault();
gameFromHtmlBlock.setScoreType(getScoreType());
gameFromHtmlBlock.setLeagueType(this);
Elements updatedHtmlBlocks = currentHtmlBlock.select("td");
boolean once = true;
int position = 0;
for (Element currentColumnBlock : updatedHtmlBlocks) {
if (once) {
once = false;
createGameInfo(Jsoup.parse(currentColumnBlock.html().replaceAll("(?i)<br[^>]*>", "br2n")).text(), gameFromHtmlBlock);
} else {
createBidInfo(Jsoup.parse(currentColumnBlock.html().replaceAll("(?i)<br[^>]*>", "br2n")).text(), gameFromHtmlBlock, position == 2);
}
position++;
}
gameFromHtmlBlock.setVIBid();
gameFromHtmlBlock.createID();
return gameFromHtmlBlock;
}
项目:KBUnitTest
文件:ShadowResources.java
/**
* 获取strings.xml 资源名-值 映射表
*
* @return
*/
protected Map<String, String> getStringResNameAndValueMap() {
Map<String, String> map = new HashMap<>();
Document document = getValuesXmlDocument();
Elements strings = document.getElementsByTag("string");
for (int i = 0; i < strings.size(); i++) {
Element element = strings.get(i);
String name = element.attr("name");
if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) {
String text = ((TextNode) element.childNode(0)).text();
map.put(name, text);
}
}
return map;
}
项目:Mod-Tools
文件:RemoteModParser.java
protected String clean(Element element) {
Element work = element.clone();
String[] unwantedElements = "img,script,style,link,canvas".split(",");
String[] unwantedAttributes = "style,class,target,id,src".split(",");
for (String tag : unwantedElements) {
work.getElementsByTag(tag).forEach((el) -> {
el.remove();
});
}
for (String attr : unwantedAttributes) {
work.getElementsByAttribute(attr).forEach((el) -> {
el.removeAttr(attr);
});
}
return work.html().replaceAll("\\s{2,}", " ");
}
项目:ripme
文件:FlickrRipper.java
/**
* Login to Flickr.
* @return Cookies for logged-in session
* @throws IOException
*/
@SuppressWarnings("unused")
private Map<String,String> signinToFlickr() throws IOException {
Response resp = Jsoup.connect("http://www.flickr.com/signin/")
.userAgent(USER_AGENT)
.followRedirects(true)
.method(Method.GET)
.execute();
Document doc = resp.parse();
Map<String,String> postData = new HashMap<>();
for (Element input : doc.select("input[type=hidden]")) {
postData.put(input.attr("name"), input.attr("value"));
}
postData.put("passwd_raw", "");
postData.put(".save", "");
postData.put("login", new String(Base64.decode("bGVmYWtlZGVmYWtl")));
postData.put("passwd", new String(Base64.decode("MUZha2V5ZmFrZQ==")));
String action = doc.select("form[method=post]").get(0).attr("action");
resp = Jsoup.connect(action)
.cookies(resp.cookies())
.data(postData)
.method(Method.POST)
.execute();
return resp.cookies();
}
项目:xxl-crawler
文件:XxlCrawlerTest.java
public static void main(String[] args) {
XxlCrawler crawler = new XxlCrawler.Builder()
.setUrls("https://my.oschina.net/xuxueli/blog")
.setWhiteUrlRegexs("https://my\\.oschina\\.net/xuxueli/blog/\\d+")
.setThreadCount(3)
.setPageParser(new PageParser<PageVo>() {
@Override
public void parse(Document html, Element pageVoElement, PageVo pageVo) {
// 解析封装 PageVo 对象
String pageUrl = html.baseUri();
System.out.println(pageUrl + ":" + pageVo.toString());
}
})
.build();
System.out.println("start");
crawler.start(true);
System.out.println("end");
}
项目:PicKing
文件:Meizi4493.java
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
List<PicInfo> urls = new ArrayList<>();
Document document = Jsoup.parse(new String(result, "gb2312"));
PicInfo info = new PicInfo();
Elements elements = document.select("div.picsbox p img");
if (elements.size() > 0)
info.setPicUrl(elements.get(0).attr("src"));
Elements title = document.select(".picmainer h1");
if (title.size() > 0)
info.setTitle(title.text());
Elements tags = document.select(".pleft a");
if (tags.size() > 0) {
List<String> tagList = new ArrayList<>();
for (Element element : tags)
tagList.add(element.text());
info.setTags(tagList);
}
urls.add(info);
resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
resultMap.put(DetailActivity.parameter.RESULT, urls);
return resultMap;
}
项目:StubbornJava
文件:ThemeForestScraper.java
public static List<HtmlCssTheme> popularThemes() {
HttpUrl url = HttpUrl.parse(POPULAR_THEMES_URL);
Request request = new Request.Builder().url(url).get().build();
String html = Retry.retryUntilSuccessfulWithBackoff(
() -> client.newCall(request).execute()
);
Elements elements = Jsoup.parse(html).select("script");
Element script = Seq.seq(elements)
.filter(e -> {
return e.html().startsWith("window.INITIAL_STATE=");
})
.findFirst().orElse(null);
String rawJson = script.html().substring("window.INITIAL_STATE=".length());
JsonNode node = Json.serializer().nodeFromJson(rawJson);
return Seq.seq(node.path("searchPage").path("results").path("matches"))
.map(ThemeForestScraper::themeFromElement)
.toList();
//.map(ThemeForestScraper::themeFromElement).toList();
}
项目:ripme
文件:ImagebamRipper.java
private void fetchImage() {
try {
Document doc = Http.url(url).get();
// Find image
Elements images = doc.select(".image-container img");
if (images.size() == 0) {
logger.warn("Image not found at " + this.url);
return;
}
Element image = images.first();
String imgsrc = image.attr("src");
logger.info("Found URL " + imgsrc);
// Provide prefix and let the AbstractRipper "guess" the filename
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
addURLToDownload(new URL(imgsrc), prefix);
} catch (IOException e) {
logger.error("[!] Exception while loading/parsing " + this.url, e);
}
}
项目:LiteReader
文件:StaffDetailSummaryViewModel.java
private String extractContent(String url) {
String summary = "";
try {
Document doc = Jsoup.connect(url).get();
Element element = doc.getElementById("intro").child(1);
if (element.childNodeSize() == 2) {
Element target = element.getElementsByClass("hidden").get(0);
summary = target.text();
} else {
summary = element.text();
}
} catch (Exception e) {
e.printStackTrace();
}
return summary;
}
项目:ChatExchange-old
文件:ChatFragment.java
private void processMessageViews(URL url, String html)
{
Document document = Jsoup.parse(html);
Elements elements = document.select("user-container");
for (Element e : elements)
{
Elements link = e.select("a");
Element signature = new Element("");
for (Element e1 : link)
{
if (e1.hasAttr("class") && e1.attr("class").equals("signature"))
{
signature = e1;
break;
}
}
}
}
项目:BackOffice
文件:TestDataController.java
private List<Map<String, String>> getParsedData(Content content){
List<Map<String,String >> testData = new ArrayList<>();
JSONObject jsonObject = new JSONObject(content.toString());
JSONObject body = (JSONObject) jsonObject.get("body");
JSONObject storage = (JSONObject) body.get("storage");
Document doc = Jsoup.parse(storage.get("value").toString());
Elements tRows = doc.select("tr");
LOG.info("Парсим данные страницы");
for (Element row : tRows){
HashMap<String,String> rowData = new HashMap<>();
Elements tds = row.select("td[colspan!="+countColumns+"]");
if (tds.size()!=0 && (!tds.get(0).text().equals(""))){
rowData.put("dKey",tds.get(0).text());
rowData.put("dValue",tds.get(1).text());
rowData.put("dComment",tds.get(2).text());
rowData.put("dHltValue",tds.get(3).text());
rowData.put("dDevValue",tds.get(4).text());
testData.add(rowData);
}
LOG.debug(row.text());
}
return testData;
}
项目:zhkuas_ssm_maven
文件:CourseBasicalInfoAnalysiser.java
@Override
public List doAnalysis(String html) {
List<Course> list = new ArrayList<Course>();
Element element = null;
Elements options = null;
element=HTMLUtil.getSelectorByName(html, Constants.HTML_ELEMENT_NAME.SELECT_NAME_COURSE.getValue());
if (element != null) {
options = element.children();
Course course = null;
for (Element e : options) {
if (e.attr("value").equals(""))
continue;
course = new Course();
course.setName(HTMLUtil.cutName(e.text()));
course.setNo(e.attr("value"));
course.setNameNo(HTMLUtil.cutNameNo(e.text()));
list.add(course);
}
}
return list;
}
项目:PicKing
文件:XiuMM.java
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
List<PicInfo> urls = new ArrayList<>();
Document document = Jsoup.parse(new String(result, "utf-8"));
Elements title = document.select("div.album_desc div.inline");
String sTitle = "";
if (title.size() > 0)
sTitle = title.get(0).text();
Elements elements = document.select(".gallary_item .pic_box img");
for (Element element : elements) {
urls.add(new PicInfo(baseUrl + element.attr("src")).setTitle(sTitle));
}
resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
resultMap.put(DetailActivity.parameter.RESULT, urls);
return resultMap;
}
项目:NFLFantasyAnalyzer
文件:WebScraper.java
public void populatePointsGivenToRB(Team team) {
int rankCounter = 0;
Element rbPointsAllowedTable = rbPointsAllowedURL.select("table").get(1);
Elements rbPointsAllowedRows = rbPointsAllowedTable.select("tr");
for (int i = 2; i < rbPointsAllowedRows.size(); i++) {
Element row = rbPointsAllowedRows.get(i);
Elements cols = row.select("td");
rankCounter++;
if(cols.get(0).text().contains(team.getName())) {
if (cols.get(2).text().contains("*")) {
team.setFpToRBRank(rankCounter);
team.setFpToRBAvg(Double.parseDouble(cols.get(18).text()));
break;
} else {
team.setFpToRBRank(rankCounter);
team.setFpToRBAvg(Double.parseDouble(cols.get(19).text()));
break;
}
}
}
}
项目:JsoupSample
文件:YingTaoJsoupManager.java
public List<MagneticModel> getList() {
List<MagneticModel> listModels = new ArrayList<>();
MagneticModel magneticModel;
Elements a = document.select("div.r");
int size = a.size();
for (int i = 0; i < size; i++) {
if (i != size - 1) {
Element element = a.get(i);
magneticModel = new MagneticModel();
magneticModel.title = element.select("a[class]").text();
magneticModel.url = element.select("a:not(.link)").attr("href");
listModels.add(magneticModel);
}
}
return listModels;
}
项目:Xndroid
文件:OutputFormatter.java
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
for (Node child : e.childNodes()) {
if (unlikely(child)) {
continue;
}
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
String txt = textNode.text();
accum.append(txt);
} else if (child instanceof Element) {
Element element = (Element) child;
if (accum.length() > 0 && element.isBlock()
&& !lastCharIsWhitespace(accum))
accum.append(' ');
else if (element.tagName().equals("br"))
accum.append(' ');
appendTextSkipHidden(element, accum, indent + 1);
}
}
}
项目:calendar-component
文件:Calendar.java
@Override
public void writeDesign(Element design, DesignContext designContext) {
super.writeDesign(design, designContext);
if (currentTimeFormat != null) {
design.attr("time-format", currentTimeFormat == TimeFormat.Format12H ? "12h" : "24h");
}
if (startDate != null) {
design.attr("start-date", DATE_FORMAT.format(getStartDate()));
}
if (endDate != null) {
design.attr("end-date", DATE_FORMAT.format(getEndDate()));
}
if (!getZoneId().equals(ZoneId.systemDefault())) {
design.attr("time-zone", getZoneId().getId());
}
}
项目:ripme
文件:PahealRipper.java
@Override
public List<String> getURLsFromPage(Document page) {
Elements elements = page.select(".shm-thumb.thumb>a").not(".shm-thumb-link");
List<String> res = new ArrayList<>(elements.size());
for (Element e : elements) {
res.add(e.absUrl("href"));
}
return res;
}
项目:SeleniumTest
文件:Functions.java
/**
* 获取全部节点的内部的html
* @param context
* @return
*/
public List<JXNode> html(Elements context){
List<JXNode> res = new LinkedList<JXNode>();
if (context!=null&&context.size()>0){
for (Element e:context){
res.add(JXNode.t(e.html()));
}
}
return res;
}
项目:Xndroid
文件:ArticleTextExtractor.java
private int calcWeight(Element e) {
int weight = 0;
if (POSITIVE.matcher(e.className()).find())
weight += 35;
if (POSITIVE.matcher(e.id()).find())
weight += 45;
if (UNLIKELY.matcher(e.className()).find())
weight -= 20;
if (UNLIKELY.matcher(e.id()).find())
weight -= 20;
if (NEGATIVE.matcher(e.className()).find())
weight -= 50;
if (NEGATIVE.matcher(e.id()).find())
weight -= 50;
String style = e.attr("style");
if (style != null && !style.isEmpty() && NEGATIVE_STYLE.matcher(style).find())
weight -= 50;
String itemprop = e.attr("itemprop");
if (itemprop != null && !itemprop.isEmpty() && POSITIVE.matcher(itemprop).find()) {
weight += 100;
}
return weight;
}
项目:ripme
文件:NhentaiRipper.java
@Override
public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<>();
Elements thumbs = page.select(".gallerythumb");
for (Element el : thumbs) {
String imageUrl = el.attr("href");
imageURLs.add("https://nhentai.net" + imageUrl);
}
return imageURLs;
}
项目:ZhihuQuestionsSpider
文件:KuaidailiProxySite.java
@Override
public List<Proxy> parseProxys(String content) {
Document doc = Jsoup.parse(content);
Elements elements = doc.select("div#list table tbody tr");
List<Proxy> proxyList = new ArrayList<>();
for(Element tr : elements){
Elements tds = tr.children();
String ip = tds.get(0).text().trim();
Integer port = Integer.parseInt(tds.get(1).text());
proxyList.add(new Proxy(ip,port));
}
return proxyList;
}
项目:OpenEyesReading-android
文件:HttpApiImpl.java
/**
* 推荐阅读
*
* @param str
*/
public List<IHistoryOldPhoto> takeProposeRead(String str) {
Document document = Jsoup.parse(str);
List<IHistoryOldPhoto> proposeReads = new ArrayList<>();
Elements elements = document.getElementsByClass("oldpic");
Elements element = elements.get(0).getElementsByTag("li");
for (Element element2 : element) {
IHistoryOldPhoto proposeRead = new IHistoryOldPhoto();
proposeRead.setTitle(element2.getElementsByTag("img").attr("title"));//title
proposeRead.setHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("a").attr("href"));//href
proposeRead.setImgHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("img").attr("src"));//imgHref
proposeReads.add(proposeRead);
}
return proposeReads;
}
项目:NetDiscovery
文件:CssSelector.java
@Override
public Element selectElement(Element element) {
Elements elements = element.select(selectorText);
if (Preconditions.isNotBlank(elements)) {
return elements.get(0);
}
return null;
}
项目:eadlsync
文件:YStatementSeItemHelper.java
private static Element getSeItemContentBody(SeItem item) {
Document doc;
Element body = null;
try {
doc = Jsoup.connect(item.getId().toString()).get();
body = doc.body();
} catch (IOException e) {
LOG.debug("Failed to parse se-item '{}'", item.getName());
}
return body;
}
项目:case-html-data-gather
文件:HTMLDataGather.java
private List<DuYinDM> gatherDuyins(Element contentEL)throws Exception{
Elements elements=contentEL.select("p");
DuYinDM dm=null;
List<DuYinDM> results=new ArrayList<DuYinDM>(3);
for (Element p : elements) {
if(p.children().isEmpty())continue;
Element firstChild=p.child(0);
if("span".equals(firstChild.tagName())){
if(firstChild.hasClass("dicpy")){
if(dm!=null){
results.add(dm);
}
dm=new DuYinDM();
String duyin=firstChild.text();
dm.setDuyin(duyin);
}
}else if("em".equals(firstChild.tagName())){
StringBuilder ziyi=new StringBuilder();
Node next=firstChild.nextSibling();
while(next!=null){
if(next instanceof TextNode){
ziyi.append(((TextNode) next).text());
}else if(next instanceof Element){
ziyi.append(((Element) next).text());
}
next=next.nextSibling();
}
dm.addZiyi(ziyi.toString());
}
}
if(dm!=null){
results.add(dm);
}
return results;
}
项目:ripme
文件:Hentai2readRipper.java
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("div.block-content > div > div.img-container > a > img.img-responsive")) {
String imageURL = "https:" + el.attr("src");
imageURL = imageURL.replace("hentaicdn.com", "static.hentaicdn.com");
imageURL = imageURL.replace("thumbnails/", "");
imageURL = imageURL.replace("tmb", "");
result.add(imageURL);
}
return result;
}
项目:SeleniumTest
文件:AxisSelector.java
/**
* 节点后面的全部同胞节点following-sibling
* @param e
* @return
*/
public Elements followingSibling(Element e){
Elements rs = new Elements();
Element tmp = e.nextElementSibling();
while (tmp!=null){
rs.add(tmp);
tmp = tmp.nextElementSibling();
}
return rs;
}
项目:jsoup-annotations
文件:JsoupProcessor.java
/**
* Extract first element according to a query
*/
private static Element element(Element container, String query) {
Elements select = container.select(query);
if (select.size() == 0) {
throw new ElementNotFoundException(query);
}
return select.first();
}
项目:sipsoup
文件:CacheCSSFunction.java
public boolean matches(Element root, Element element) {
if (root == element)
return false;
Element parent = element.parent();
while (true) {
if (evaluator.matches(root, parent))
return true;
if (parent == root)
break;
parent = parent.parent();
}
return false;
}
项目:ScriptSpider
文件:ContentExtractor.java
private static String GetDivContent(Element div) {
StringBuilder sb = new StringBuilder();
//考虑div里标签内容的顺序,对div子树进行深度优先搜索
Stack<Element> sk = new Stack<Element>();
sk.push(div);
while (!sk.empty()) {
//
Element e = sk.pop();
//对于div中的div过滤掉
if (e != div && e.tagName().equals("div")) continue;
//考虑正文被包含在p标签中的情况,并且p标签里不能含有a标签
if (e.tagName().equals("p") && e.getElementsByTag("a").size() == 0) {
String className = e.className();
if (className.length() != 0 && className.equals("pictext")) continue;
sb.append(e.text());
sb.append("\n");
continue;
} else if (e.tagName().equals("td")) {
//考虑正文被包含在td标签中的情况
if (e.getElementsByTag("div").size() != 0) continue;
sb.append(e.text());
sb.append("\n");
continue;
}
//将孩子节点加入栈中
Elements children = e.children();
for (int i = children.size() - 1; i >= 0; i--) {
sk.push((Element) children.get(i));
}
}
return sb.toString();
}