Java 类org.jsoup.Jsoup 实例源码

项目:PicKing    文件:MiniTokyo.java   
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<AlbumInfo> data = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("ul.scans li a:has(img)");
    for (Element element : elements) {
        AlbumInfo temp = new AlbumInfo();
        temp.setAlbumUrl(element.attr("href"));
        Elements elements1 = element.select("img");
        if (elements1.size() > 0)
            temp.setPicUrl(elements1.get(0).attr("src"));
        data.add(temp);
    }

    resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(ContentsActivity.parameter.RESULT, data);
    return resultMap;
}
项目:PicKing    文件:Aitaotu.java   
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements title = document.select("#photos h1");
    String sTitle = "";
    if (title.size() > 0)
        sTitle = title.get(0).text();

    Elements time = document.select(".tsmaincont-desc span");
    String sTime = "";
    if (time.size() > 0)
        sTime = time.get(0).text();

    Elements elements = document.select("#big-pic img");
    for (Element element : elements) {
        urls.add(new PicInfo(element.attr("src")).setTitle(sTitle).setTime(sTime));
    }

    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
项目:Shadbot    文件:AudioEventListener.java   
@Override
public void onTrackException(AudioPlayer player, AudioTrack track, FriendlyException err) {
    errorCount++;

    String errMessage = Jsoup.parse(StringUtils.remove(err.getMessage(), "Watch on YouTube")).text().trim();

    if(errorCount <= 3) {
        BotUtils.sendMessage(String.format(Emoji.RED_CROSS + " Sorry, %s. I'll try to play the next available song.",
                errMessage.toLowerCase()), guildMusic.getChannel());
    }

    if(errorCount == 3) {
        BotUtils.sendMessage(Emoji.RED_FLAG + " Too many errors in a row, I will ignore them until finding a music that can be played.",
                guildMusic.getChannel());
        LogUtils.infof("{Guild ID: %d} Too many errors in a row. They will be ignored until music can be played.",
                guildMusic.getChannel().getGuild().getLongID());
    }

    LogUtils.infof("{Guild ID: %d} %sTrack exception: %s",
            guildMusic.getChannel().getGuild().getLongID(), errorCount > 3 ? "(Ignored) " : "", errMessage);

    if(!guildMusic.getScheduler().nextTrack()) {
        guildMusic.end();
    }
}
项目:Slide-RSS    文件:ReorderFeeds.java   
@Override
protected String doInBackground(String... strings) {
    String urlBase = strings[0];
    Document doc = null;
    try {
        String url = urlBase.startsWith("http")? urlBase : "http://" + urlBase;
        doc = Jsoup.connect(url).get();
        Elements links = doc.select("link[type=application/rss+xml]");

        if (links.size() > 0) {
            String rss_url = links.get(0).attr("abs:href");
            return rss_url;
        } else {
            return null;
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;
}
项目:doubanbook    文件:TestSpider.java   
public Document getDocument(String url) {
    Document doc = null;
    try {

        doc = Jsoup.connect(url)
                .userAgent(
                        "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36")
                .header("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
                .header("scheme", "https").header("version", "HTTP/1.1")
                .header("accept-encoding", "gzip, deflate, sdch").header("accept-language", "zh-CN,zh;q=0.8")
                // .header("cookie", "bid=\"Q5KWZL7y8g7\";")
                .header("cache-control", "max-age=0").get();

    } catch (Exception e) {

    }
    return doc;

}
项目:Android-Scrapper    文件:EspnScoreboardParser.java   
private void init(int i) {
    try {
        this.documentDefault = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/" + i + "/date/" + DateUtils.getDatePlus("yyyyMMdd", -1))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
        this.document = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/" + i + "/date/" + DateUtils.getDatePlus("yyyyMMdd", 0))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
        this.documentTomorrow = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/" + i + "/date/" + DateUtils.getDatePlus("yyyyMMdd", 1))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
项目:PicKing    文件:Aitaotu.java   
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements title = document.select("#photos h1");
    String sTitle = "";
    if (title.size() > 0)
        sTitle = title.get(0).text();

    Elements time = document.select(".tsmaincont-desc span");
    String sTime = "";
    if (time.size() > 0)
        sTime = time.get(0).text();

    Elements elements = document.select("#big-pic img");
    for (Element element : elements) {
        urls.add(new PicInfo(element.attr("src")).setTitle(sTitle).setTime(sTime));
    }

    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
项目:bboxapi-voicemail    文件:VoiceMailApi.java   
private UserInfo getUserInfo() {

        UserInfo userInfo = new UserInfo();

        try {
            Connection.Response customerInfo = Jsoup.connect(VOICEMAIL_SERVICE_URI).cookies(loadCookies()).execute();

            Document doc = customerInfo.parse();

            Elements pseudo = doc.select("input[name=pseudo]");
            Elements phoneNumber = doc.select("input[name=voip_num]");
            Elements login = doc.select("input[name=login]");
            Elements email = doc.select("input[name=email]");
            Elements uid = doc.select("input[name=uid]");

            userInfo.setPseudo((pseudo.size() > 0) ? pseudo.get(0).attr("value") : "");
            userInfo.setPhoneNumber((phoneNumber.size() > 0) ? phoneNumber.get(0).attr("value") : "");
            userInfo.setLogin((login.size() > 0) ? login.get(0).attr("value") : "");
            userInfo.setEmail((email.size() > 0) ? email.get(0).attr("value") : "");
            userInfo.setUid((uid.size() > 0) ? uid.get(0).attr("value") : "");

        } catch (IOException e) {
            e.printStackTrace();
        }
        return userInfo;
    }
项目:ovh-java-sdk    文件:ApiOvhUtils.java   
/**
 * Create a new OVH Application using https://eu.api.ovh.com/createApp/
 * Outout the Application Key and Application Secret in std-out
 * @param nic
 * @param password
 * @throws IOException
 */
public void createApplication(String nic, String password) throws IOException {
    String url = "https://eu.api.ovh.com/createApp/";
    Document doc = Jsoup.connect(url)
            .data("nic", nic)
            .data("password", password)
            .data("applicationName", "One Shoot Token")
            .data("applicationDescription", "One Shoot Token")
            .post();
    String body = doc.toString();
    Pattern extract = Pattern.compile(" Application (\\w+)<pre><name>([^<]+)</name></pre>");
    Matcher m = extract.matcher(body);
    String Key = null;
    String Secret = null;
    while (m.find()) {
        String k = m.group(1);
        String v = m.group(2);
        if (k.equals("Key"))
            Key = v;
        if (k.equals("Secret"))
            Secret = v;
    }
    log.warn("Key:{} Secret:{}", Key, Secret);
}
项目:ProxyPool    文件:Ip66ProxyListPageParser.java   
@Override
    public List<Proxy> parse(String content) {
        List<Proxy> proxyList = new ArrayList<>();
        if (content == null || content.equals("")){
            return proxyList;
        }
        Document document = Jsoup.parse(content);
        Elements elements = document.select("table tr:gt(1)");
        for (Element element : elements){
            String ip = element.select("td:eq(0)").first().text();
            String port  = element.select("td:eq(1)").first().text();
            String isAnonymous = element.select("td:eq(3)").first().text();
//            if(!anonymousFlag || isAnonymous.contains("匿")){
            proxyList.add(new Proxy(ip, Integer.valueOf(port), Constant.TIME_INTERVAL));
//            }
        }
        return proxyList;
    }
项目:ZhihuQuestionsSpider    文件:ParseRegularUtil.java   
public static void parseZhihuTopics1(Page page, Result result) {
    String json = page.getContent();
    JSONObject object = JSON.parseObject(json);
    JSONArray array = object.getJSONArray("msg");
    if(array.size()==0) {
        result.setSkip(true);
        return;
    }
    for (int i = 0; i < array.size(); i++) {
        String topicStr = array.getString(i);
        Document doc = Jsoup.parseBodyFragment(topicStr);
        Element a = doc.body().select("div.item").first().select("a[target]").first();
        String href = "https://www.zhihu.com" + a.attr("href")+"/newest";
        result.addRequest(new Request(href, HttpMethod.GET));
    }
    Request request = new Request("https://www.zhihu.com/node/TopicsPlazzaListV2", HttpMethod.POST);
    JSONObject object1 = new JSONObject();
    object1.put("topic_id", page.getRequest().getAddch("topic_id"));
    object1.put("offset", Integer.valueOf(((Integer) page.getRequest().getAddch("offset")) + 20));
    object1.put("hash_id", "22e50cd21ed9df7085ff76d62175e986");
    request.addParame("method", "next")
            .addParame("params", object1.toJSONString()).addAttach("offset", Integer.valueOf(((Integer) page.getRequest().getAddch("offset")) + 20)).addAttach("topic_id", page.getRequest().getAddch("topic_id"));
    result.addRequest(request);
}
项目:PicKing    文件:Mntu92.java   
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    PicInfo picInfo = new PicInfo();
    Elements elements = document.select("#bigpic img");
    for (Element element : elements) {
        picInfo.setPicUrl(baseUrl + element.attr("src"));
    }
    Elements title = document.select("#entry h1");
    if (title.size() > 0)
        picInfo.setTitle(title.text());
    Elements tags = document.select(".postinfo a");
    if (tags.size() > 0) {
        List<String> tagList = new ArrayList<>();
        for (Element t : tags)
            tagList.add(t.text());
        picInfo.setTags(tagList);
    }
    urls.add(picInfo);

    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
项目:BackOffice    文件:TestDataController.java   
private List<Map<String, String>> getParsedData(Content content){
    List<Map<String,String >> testData = new ArrayList<>();
    JSONObject jsonObject = new JSONObject(content.toString());
    JSONObject body = (JSONObject) jsonObject.get("body");
    JSONObject storage = (JSONObject) body.get("storage");
    Document doc = Jsoup.parse(storage.get("value").toString());
    Elements tRows = doc.select("tr");
    LOG.info("Парсим данные страницы");
    for (Element row : tRows){
        HashMap<String,String> rowData = new HashMap<>();
        Elements tds = row.select("td[colspan!="+countColumns+"]");
        if (tds.size()!=0 && (!tds.get(0).text().equals(""))){
            rowData.put("dKey",tds.get(0).text());
            rowData.put("dValue",tds.get(1).text());
            rowData.put("dComment",tds.get(2).text());
            rowData.put("dHltValue",tds.get(3).text());
            rowData.put("dDevValue",tds.get(4).text());
            testData.add(rowData);
        }
        LOG.debug(row.text());
    }
    return testData;
}
项目:android-apps    文件:JsoupUtil.java   
public static void fetchBody(final String url, final Callback callback) {
  new Thread() {
    @Override
    public void run() {
      try {
        Document document = Jsoup
            .connect(url)
            .userAgent(FormatUtil.USER_AGENT_PC)
            .get();
        callback.output(document.body());
      } catch (IOException e) {
        e.printStackTrace();
      }
    }
  }.start();
}
项目:MyAnimeViewer    文件:Parser.java   
public String getSafeUploadVideo(String url) {
    //http://www.safeupload.org/getembed/f93fb4096e0875979215c0307dd53ff5
    String lVideoUrl = "";
    try {
        Document docdata = Jsoup.connect(url).userAgent(mUserAgent)
                .referrer("http://www.google.com")
                .timeout(Parser.getParseTimeOut()).get();
        Elements eles = docdata.select("body").select("div + script");
        if (eles != null && !eles.isEmpty()) {
            lVideoUrl = eles.get(eles.size() - 2).html();
            if (!TextUtils.isEmpty(lVideoUrl)) {
                final Pattern pattern = Pattern.compile("'file': '(.+?)'");
                final Matcher matcher = pattern.matcher(lVideoUrl);
                matcher.find();
                lVideoUrl = matcher.group(1);
            }
        }
    } catch (IOException e) {
        WriteLog.appendLog(Log.getStackTraceString(e));
    }
    return lVideoUrl;
}
项目:9AnimeAndroid    文件:NineAnimeApi.java   
public List<Anime> search(String query) throws IOException, InterruptedException, ExecutionException, JSONException {
    String url = "https://9anime.to/search?keyword=" + query.replace(" ", "+");
    Document doc = Jsoup.connect(url).get();
    int totalPages = 1;

    List<Anime> animes = new ArrayList<>();

    if (doc.select("div.paging").size() > 0) {
        totalPages = Math.max(Integer.parseInt(doc.select("span.total").first().text()), totalPages);
    }

    animes.addAll(parseSearchPage(doc));

    for (int i = 1; i < totalPages; ++i) {
        animes.addAll(parseSearchPage(Jsoup.connect(url + "&page=" + (i + 1)).get()));
    }

    return animes;
}
项目:GoSCELE    文件:BaseProvider.java   
@Override
@Deprecated
protected List<Elements> doInBackground(String... params) {
    List<Elements> results = new ArrayList<>();
    int idx = 0;
    try {
        Connection.Response response = Jsoup.connect(url())
                .data(data())
                .method(method())
                .cookies(cookies())
                .execute();
        cookies = response.cookies();
        for (String param : params) {
            if (!TextUtils.isEmpty(param)) {
                results.add(response.parse().select(param));
            }
            publishProgress((int) ((double) (++idx / params.length)) * 100);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return results;
}
项目:PicKing    文件:Aitaotu.java   
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("#big-pic img");
    Elements title = document.select("#photos h1");
    String sTitle = title.size() > 0 ? title.get(0).text() : "";
    Elements tags = document.select(".fbl a");
    List<String> tagList = new ArrayList<>();
    if (tags.size() > 0) {
        for (Element tag : tags)
            tagList.add(tag.text());
    }
    for (Element element : elements) {
        PicInfo picInfo = new PicInfo()
                .setTags(tagList)
                .setTitle(sTitle)
                .setPicUrl(element.attr("src"));
        urls.add(picInfo);
    }
    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
项目:quiz_helper    文件:Mnemonic.java   
public List<Definition> wordLookup(String key) {
    try {
        Document doc = Jsoup.connect(wordUrl + key)
                .userAgent("Mozilla")
                .timeout(5000)
                .get();
        List<Definition> definitionList = new ArrayList<>();
        for(Element memo : doc.select(".span9")){
            HashMap<String, String> eleMap = new HashMap<>();
            eleMap.put(EXP_ELE[0], memo.text());
            definitionList.add(new Definition(eleMap, memo.text()));
        }
        return definitionList;

    } catch (IOException ioe) {
        //Log.d("time out", Log.getStackTraceString(ioe));
        Toast.makeText(MyApplication.getContext(), Log.getStackTraceString(ioe), Toast.LENGTH_SHORT).show();
        return new ArrayList<Definition>();
    }

}
项目:EducationalAdministrationSystem    文件:HttpUtils.java   
public static Response getCourseInfo(Map<String, String> cookiesMap, String id, String name) {

        try {
            Connection con = Jsoup.connect(Constant.CoursePraise.COURSE_URL);
            con.ignoreContentType(true);
            Iterator<Map.Entry<String, String>> it = cookiesMap.entrySet().iterator();
            while (it.hasNext()) {
                Map.Entry<String, String> en = it.next();
                con = con.cookie(en.getKey(), en.getValue());
            }

            return con.method(Connection.Method.GET)
                    .data("id", id)
                    .data("name", name)
                    .data("pId", "")
                    .data("level", "0")
                    .data(Constant.CoursePraise.COURSE_URL_OTHER_PARAM, "zTreeAsyncTest")
                    .data(Constant.CoursePraise.COURSE_URL_, "1507812989512")
                    .timeout(10000)
                    .execute();

        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }
项目:Android-Scrapper    文件:EspnScoreboardParser.java   
private void init() {
    try {
        this.documentDefault = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/50/" + "date/" + DateUtils.getDatePlus("yyyyMMdd", -1))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
        this.document = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/50/" + "date/" + DateUtils.getDatePlus("yyyyMMdd", 0))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
        this.documentTomorrow = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/50/" + "date/" + DateUtils.getDatePlus("yyyyMMdd", 1))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
项目:ZhihuQuestionsSpider    文件:ParseRegularUtil.java   
public static void parseZhihuTopics(Page page, Result result){
    String html = page.getContent();
    Document doc = Jsoup.parse(html);
    Element zmTopicCatMainUl = doc.body().select("div.zg-wrap.zu-main.clearfix").first().select("ul.zm-topic-cat-main.clearfix").first();
    Elements lis = zmTopicCatMainUl.getElementsByTag("li");
    for(Element element : lis){
        String topicId = element.attr("data-id");
        Request request = new Request("https://www.zhihu.com/node/TopicsPlazzaListV2", HttpMethod.POST);
        JSONObject object = new JSONObject();
        object.put("topic_id",Integer.parseInt(topicId));
        object.put("offset",0);
        object.put("hash_id","22e50cd21ed9df7085ff76d62175e986");
        request.addParame("method","next")
                .addParame("params",object.toJSONString()).addAttach("offset",Integer.valueOf(0)).addAttach("topic_id",Integer.parseInt(topicId));
        result.addRequest(request);
    }
}
项目:Linda-AI    文件:Act.java   
static void Wikipedia(String dico) {
    Document significatowikipedia = null;
    String cercowikipedia = dico.substring((dico.indexOf("'")) + 1, (dico.lastIndexOf("'")));
    try {
        significatowikipedia = Jsoup.connect("https://it.wikipedia.org/wiki/" + cercowikipedia.replace(" ", "_")).userAgent("Mozilla").get();
        String divs = significatowikipedia.select("p").text();
        if (!divs.equals("")) {
            new GUI().giveResponse("La ricerca di " + cercowikipedia + " su wikipedia ha restituito il seguente risultato:" + '\n' + divs);
        } else {
            new GUI().giveResponse("Mi dispiace, non ho trovato informazioni su " + cercowikipedia + " su Wikipedia...");
        }
    } catch (HttpStatusException e) {
        new GUI().giveResponse("Mi dispiace, Wikipedia sembra non avere una voce per '" + cercowikipedia +"'...");
    } catch (java.io.IOException f) {
        f.printStackTrace();
    } catch (StringIndexOutOfBoundsException g) {
        new GUI().giveResponse("Ricorda che, perché io cerchi informazioni riguardo a qualcosa, occorre che tu la definisca fra due virgolette!");
    }
}
项目:UnicesuLabs    文件:PaginaLab.java   
public static void getPaginaLabs(){

        Document doc = Jsoup.parse(html.toString());
        Elements blocos = doc.select("table[class=bloco]");
        Element bloco;
        String nBloco;
        Elements labs;
        String nLab;

        for (int i = 0; i < blocos.size(); i++){

            bloco = blocos.get(i);
            nBloco = bloco.select("tr").get(0).select("th").get(0).html();

            labs = bloco.select("table[class=tableReserva");
            for (int j = 0; j < labs.size();j++){

                labo = labs.get(j);
                nLab = labo.select("tr").get(0).select("td").html();
                Lab.array.add(new Lab(nBloco, nLab, aula(1), aula(2)));
                Log.d("nLAB","-->"+nBloco +" - "+ nLab +" - "+ aula(1) +" - "+ aula(2));

            }

        }
    }
项目:NTPaprEng    文件:AdvSearchedWebPage.java   
@Override
public List<? extends Link> extractAll() {
    System.out.println("Links parsing: url=" + getUrl() + " type=AdvSearched");

    final Document dom = Jsoup.parse(getText());

    // 所有链接集合
    final List<Link> allLinks = new ArrayList<>();

    if (isFirstPage()) {
        allLinks.addAll(getSiblingLinks(dom));
    }

    // 得到目前页面论文链接
    final List<? extends Link> paperLinks =
            getPaperLinks(parsePaperLinks(dom));

    allLinks.addAll(paperLinks);

    return allLinks;
}
项目:HouseSearch    文件:HouseController.java   
/**
 * 获取总页数,返回给前台
 * 参数
 *
 * @param cityCode 城市
 * @param minPrice 最低价格
 * @param maxPrice 最高价格
 * @return
 */
@ResponseBody
@RequestMapping(value = "/GetTotalPages", method = RequestMethod.POST)
public int GetTotalPages(String cityCode, int minPrice, int maxPrice, String area, String subway) {
    //构建URL
    String oldUrl = "http://" + cityCode + ".58.com";
    Connection conn = Jsoup.connect(oldUrl);
    int pages = 0;
    try {
        Response response = conn.method(Method.GET).execute();
        newUrl = response.url().toString() + "/pinpaigongyu/pn/";
        String nowUrl = newUrl + "1/?minprice=" + minPrice + "_" + maxPrice + area + subway;
        Document doc = Jsoup.connect(nowUrl).get();
        int listsum = Integer.valueOf(doc.getElementsByClass("listsum").select("em").text());
        pages = listsum % 20 == 0 ? listsum / 20 : listsum / 20 + 1;  //计算页数
    } catch (IOException ex) {

    }
    return pages;
}
项目:Babler    文件:YouTubeCaptionsScraper.java   
/**
 * Fetches captions/transcript for a given video
 * @param videoID to fetch
 * @param lang this captions should be in
 * @throws IOException
 */
public void getAndSaveTranscript(String videoID, String lang) throws IOException {

    lang = LanguageCode.convertIso2toIso1(lang);

    String url = captionEndPoint+"lang="+lang+"&v="+videoID;
    GetMethod get = new GetMethod(url);
    this.client.executeMethod(get);
    String xmlData = get.getResponseBodyAsString();

    //parse XML
    Document doc = Jsoup.parse(xmlData, "", Parser.xmlParser());
    String allCaps = "";
    for (Element e : doc.select("text")) {
        allCaps += e.text();
    }

    FileSaver file = new FileSaver(allCaps, lang, "youtube_caps", url, videoID);
    file.save(logDb);

}
项目:NewPipeExtractor    文件:YoutubePlaylistExtractor.java   
private void setupNextStreamsAjax(Downloader downloader) throws IOException, ReCaptchaException, ParsingException {
    String ajaxDataRaw = downloader.download(nextStreamsUrl);
    try {
        JsonObject ajaxData = JsonParser.object().from(ajaxDataRaw);

        String htmlDataRaw = "<table><tbody id=\"pl-load-more-destination\">" + ajaxData.getString("content_html") + "</tbody></table>";
        nextStreamsAjax = Jsoup.parse(htmlDataRaw, nextStreamsUrl);

        String nextStreamsHtmlDataRaw = ajaxData.getString("load_more_widget_html");
        if (!nextStreamsHtmlDataRaw.isEmpty()) {
            nextStreamsUrl = getNextStreamsUrlFrom(Jsoup.parse(nextStreamsHtmlDataRaw, nextStreamsUrl));
        } else {
            nextStreamsUrl = "";
        }
    } catch (JsonParserException e) {
        throw new ParsingException("Could not parse json data for next streams", e);
    }
}
项目:NewPipeExtractor    文件:SoundcloudPlaylistUrlIdHandler.java   
@Override
public String cleanUrl(String complexUrl) throws ParsingException {
    Utils.checkUrl(URL_PATTERN, complexUrl);

    try {
        Element ogElement = Jsoup.parse(NewPipe.getDownloader().download(complexUrl))
                .select("meta[property=og:url]").first();

        return ogElement.attr("content");
    } catch (Exception e) {
        throw new ParsingException(e.getMessage(), e);
    }
}
项目:newblog    文件:LibraryUtil.java   
public static List<Myreading> htmltoJavaBean() {
    String html = getHTML();
    Element element = Jsoup.parse(html).select("table.jieyue-table").get(0).select("tbody").get(0);
    List<Myreading> list = new ArrayList<>();
    Elements trs = element.select("tr");
    for (Element tr : trs) {
        Elements tds = tr.select("td");
        Myreading myreading = new Myreading();
        myreading.setTitle(tds.get(1).text());
        myreading.setAuthor(tds.get(2).text());
        myreading.setBookindex(tds.get(3).text());
        list.add(myreading);
        logger.info("借阅记录抓取成功" + myreading.getTitle());
    }
    return list;
}
项目:LushX    文件:JsoupUtils.java   
public static Document getDocWithPC(String url) {
    try {
        return Jsoup.connect(url).userAgent(UA_PC).timeout(TIME_OUT).ignoreContentType(true).get();
    } catch (IOException e) {
        logger.error("网址请求失败:" + url);
        throw new LushXException("网址请求失败:" + url);
    }
}
项目:sipsoup    文件:JsoupParseTest.java   
public static void main(String[] args) {
    Document test = Jsoup.parse("test");
    System.out.println(test);

    //没用
    boolean test1 = Jsoup.isValid("test", Whitelist.none());
    System.out.println(test1);

    Document document = Jsoup.parse(null);
    System.out.println(document);
}
项目:matrix-appservice-email    文件:GmailClientFormatter.java   
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    Element contentDiv = body.select("div[dir='ltr']").first();
    if (contentDiv == null) {
        log.warn("Found no valid content in e-mail from Gmail, returning empty");
        return "";
    }

    while (contentDiv.children().size() > 0 && contentDiv.children().last().is("br")) {
        contentDiv.children().last().remove();
    }

    return Jsoup.clean(contentDiv.html(), Whitelist.basic());
}
项目:FacetExtract    文件:test111111111.java   
public static void main(String[] args) {
    Document doc = null;
    try {
        doc = Jsoup.connect("https://en.wikipedia.org/wiki/Array_data_structure").get();
    } catch (IOException e) {
        e.printStackTrace();
    }
    System.out.println(doc.text());
}
项目:crawling-framework    文件:CyberscoopExtractorTest.java   
@Test
public void testTitleExtraction000() throws Exception {
    String url = "https://www.cyberscoop.com/u-s-oil-gas-companies-still-trying-catch-cybersecurity-experts-say/";
    String html = loadArticle("cyberscoop1");
    Document document = Jsoup.parse(html, url);
    HttpArticleParseResult article = ArticleExtractor.extractArticleWithDetails(html, url, cyberscoopSourceWithoutTitleSelector(), null);
    assertEquals(1, article.getTitleMatches().size());
    assertEquals("META:og:title", article.getTitleMatches().get(0));
}
项目:CNode-OAuth-Login-Android    文件:CNodeOAuthLoginView.java   
private void startGetAccessTokenAsyncTask(final String cookie) {
    showLoadingLayout();
    executorService.execute(new Runnable() {

        @Override
        public void run() {
            try {
                Document document = Jsoup.connect("https://cnodejs.org/setting").header("Cookie", cookie).get();
                final String accessToken = document.getElementById("content").getElementsByClass("panel").get(2).child(1).child(0).text().replace("字符串:", "").replace(" ", "").trim();
                handler.post(new Runnable() {

                    @Override
                    public void run() {
                        showFinishLayout();
                        if (loginCallback != null) {
                            loginCallback.onLoginSuccess(accessToken);
                        }
                    }

                });
            } catch (IOException e) {
                handler.post(new Runnable() {

                    @Override
                    public void run() {
                        showErrorLayout();
                    }

                });
            }
        }

    });
}
项目:UpdogFarmer    文件:SteamWebHandler.java   
public boolean autoVote() {
    final String url = STEAM_STORE + "SteamAwards/?l=english";
    try {
        final Document doc = Jsoup.connect(url)
                .referrer(url)
                .followRedirects(true)
                .cookies(generateWebCookies())
                .get();
        final Element container = doc.select("div.vote_nominations").first();
        if (container == null) {
            return false;
        }
        final String voteId = container.attr("data-voteid");
        final Elements voteNominations = container.select("div.vote_nomination");
        if (voteNominations == null) {
            return false;
        }
        final Element choice = voteNominations.get(new Random().nextInt(voteNominations.size()));
        final String appId = choice.attr("data-vote-appid");
        final Document doc2 = Jsoup.connect(STEAM_STORE + "salevote")
                .referrer(STEAM_STORE)
                .cookies(generateWebCookies())
                .data("sessionid", sessionId)
                .data("voteid", voteId)
                .data("appid", appId)
                .post();
        return true;
    } catch (IOException e) {
        e.printStackTrace();
    }
    return false;
}
项目:case-html-data-gather    文件:HTMLDataGather.java   
public HanZiDM from(String url) throws Exception {
    Document doc = Jsoup.connect(url).timeout(timeout).get();
    HanZiDM dm=this.parse(doc);
    if(dm==null){
        throw new RuntimeException();
    }
    return dm;
}
项目:PicKing    文件:Yesky.java   
@Override
public String getDetailNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "gb2312"));
    Elements elements = document.select(".l_effect_bottom li a");
    if (elements.size() > 0)
        if (elements.get(elements.size() - 1).attr("href").equals(currentUrl))
            return "";
    Elements elements1 = document.select(".l_effect_img_mid a");
    if (elements1.size() > 0)
        return elements1.get(0).attr("href");
    return "";
}
项目:NFLFantasyAnalyzer    文件:WebScraper.java   
public void initializeRecordAndTopPlayerURL(String recAndPlayerURL) throws IOException{
    try {
        this.recAndPlayerURL = Jsoup.connect(recAndPlayerURL).get();
    } catch(IOException e) {
        throw new IOException("Error initializing RecordAndTopPlayerURL", e);
    }
}