Python selenium.webdriver 模块,PhantomJS() 实例源码

我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用selenium.webdriver.PhantomJS()

项目:sparphantor    作者:antitree    | 项目源码 | 文件源码
def __init__(self, queue, DEBUG=config.DEBUG, reset=False, socksport=None):
        if not socksport:
            socksport = config.SOCKS_PORT
        ## TODO add checks that a socks proxy is even open
        ## TODO add Tor checks to make sure circuits are operating
        threading.Thread.__init__(self)
        self.reset = reset  # Whether to check if a url has been collected
        self.queue = queue  # Multithreading queue of urls
        self.proxysettings = [
            '--proxy=127.0.0.1:%s' % socksport,
            '--proxy-type=socks5',
        ]
        #self.proxysettings = [] # DEBUG
        #self.ignore_ssl = ['--ignore-ssl-errors=true', '--ssl-protocols=any']
        self.ignore_ssl = []
        self.service_args = self.proxysettings + self.ignore_ssl

        self.failcount = 0    # Counts failures
        self.donecount = 0    # Counts successes
        self.tor = tor.tor()  # Manages Tor via control port

        if DEBUG:  # PhantomJS sends a lot of data if debug set to DEBUG
            logging.basicConfig(level=logging.INFO)
项目:SerpScrap    作者:ecoron    | 项目源码 | 文件源码
def _get_webdriver(self):
        """Return a webdriver instance and set it up
        with the according profile/ proxies.
        Chrome is quite fast, but not as stealthy as PhantomJS.
        Returns:
            The appropriate webdriver mode according to self.browser_type.
            If no webdriver mode could be found, return False.
        """
        if self.browser_type == 'chrome':
            return self._get_Chrome()
        elif self.browser_type == 'firefox':
            return self._get_Firefox()
        elif self.browser_type == 'phantomjs':
            return self._get_PhantomJS()

        return False
项目:landchina-spider    作者:sundiontheway    | 项目源码 | 文件源码
def process_request(self, request, spider):
        if request.meta.has_key('PhantomJS'):
            log.debug('PhantomJS Requesting: %s' % request.url)
            ua = None
            try:
                ua = UserAgent().random
            except:
                ua = 'Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11'

            webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.settings.userAgent'] = ua

            try:
                self.driver.get(request.url)
                content = self.driver.page_source.encode('utf-8')
                url = self.driver.current_url.encode('utf-8')
            except:
                return HtmlResponse(request.url, encoding='utf-8', status=503, body='')

            if content == '<html><head></head><body></body></html>':
                return HtmlResponse(request.url, encoding ='utf-8', status=503, body='')
            else:
                return HtmlResponse(url, encoding='utf-8', status=200, body=content)

        else:
            log.debug('Common Requesting: %s' % request.url)
项目:danmu-bilibili    作者:saberxxy    | 项目源码 | 文件源码
def main(number):
    url = 'http://www.bilibili.com/video/av' + str(number) + '/'
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = (
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
    )
    dcap["phantomjs.page.settings.loadImages"] = False
    # phantomjs.exe???G:\Anaconda3\phantomjs\bin
    driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                 desired_capabilities=dcap)
    try:
        driver.get(url)
        # time.sleep(random.uniform(1, 5))
        content = driver.page_source  # ??????
        driver.close()
        driver.quit()
        soup = BeautifulSoup(content, 'lxml')
        getInfo(soup)
    except Exception:
        pass
    finally:
        if driver:
            driver.quit()
项目:danmu-bilibili    作者:saberxxy    | 项目源码 | 文件源码
def getSoup(start, stop):
    try:
        for number in range(start, stop + 1):
            url = 'http://www.bilibili.com/video/av'+str(number)+'/'
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
                "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            )
            dcap["phantomjs.page.settings.loadImages"] = False
            # phantomjs.exe???G:\Anaconda3\phantomjs\bin
            driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(url)
            # time.sleep(1)  # ?????????????
            content = driver.page_source  # ??????
            driver.close()
            driver.quit()
            soup = BeautifulSoup(content, 'lxml')
            getInfo(soup)
    except Exception:
        pass
项目:snippets    作者:Electsys-Partner    | 项目源码 | 文件源码
def give_me_the_page(n, user_name, password, broswer, pt = None):
    if not pt:
        if broswer=='Chrome':
            pt = webdriver.Chrome()
        elif broswer=='Safari':
            pt = webdriver.Safari()
        else:
            pt = webdriver.PhantomJS()
    pt.get('http://electsys.sjtu.edu.cn/edu/login.aspx')
    time.sleep(1)
    pt.execute_script("""var img=document.getElementById('form-input').getElementsByTagName('div')[2].getElementsByTagName('img')[0];
        var d=document.createElement('CANVAS');
        var cxt=d.getContext('2d');
        d.width=img.width;
        d.height=img.height;
        cxt.drawImage(img,0,0);
        img.src=d.toDataURL('png');""")
项目:core-python    作者:yidao620c    | 项目源码 | 文件源码
def _click_page(total_posts, pool_size, group_index):
    _log.info('?{}?: starting...'.format(group_index + 1))
    if group_index > 0 and total_posts < pool_size * group_index:
        return
    # ????????
    _driver = webdriver.PhantomJS()
    _driver.get('https://www.xncoding.com/archives/')

    global TRY_COUNT
    for k in range(1, TRY_COUNT + 1):
        # _log.info('?{}?: ?{}???...'.format(group_index + 1, k))
        for i in range(pool_size * group_index, min(pool_size * (group_index + 1), total_posts)):
            l_xpath = '(//article/header/h1[@class="post-title"]/a[@class="post-title-link"])[{}]'.format(i + 1)
            ele = WebDriverWait(_driver, 2).until(
                EC.presence_of_element_located((By.XPATH, l_xpath))
            )
            ele.click()
            WebDriverWait(_driver, 5).until(
                EC.presence_of_element_located((By.XPATH, '//div[@class="post-body"]'))
            )
            _driver.back()

    _log.info('?{}?: finished.'.format(group_index + 1))
    _driver.close()
项目:core-python    作者:yidao620c    | 项目源码 | 文件源码
def just_click():
    # ????????
    _driver = webdriver.PhantomJS()
    _driver.get('https://www.xncoding.com/archives/')
    # driver.maximize_window()
    posts_count = len(_driver.find_elements_by_xpath(
        '//article/header/h1[@class="post-title"]/a[@class="post-title-link"]'))
    for cc in range(1, posts_count + 1):
        l_xpath = '(//article/header/h1[@class="post-title"]/a[@class="post-title-link"])[{}]'.format(cc)
        ele = WebDriverWait(_driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, l_xpath))
        )
        _log.info('???{}???'.format(cc))
        ele.click()
        WebDriverWait(_driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//div[@class="post-body"]'))
        )
        _driver.back()
项目:bilibili-selenium-project    作者:umiharasorano    | 项目源码 | 文件源码
def start_PhantomJS():
    uaList = []
    for line in open('Base_Data\\Ualist.txt'):
        uaList.append(line[:-1])
    open('Base_Data\\Ualist.txt').close()
    i = random.choice(uaList)
    headers = {
        'Accept':'*/*',
        'Accept-Language':'zh-CN,zh;q=1',
        'User-Agent': i,
        'Connection': 'keep-alive'
    }
    service_args = [
        #'--proxy=127.0.0.1:9999',
        #'--proxy-type=http',
        '--ignore-ssl-errors=true',
        ]
    for key,value in headers.items():
        webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.{}'.format(key)] = value
    webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.settings.userAgent'] = i
    dr = webdriver.PhantomJS(executable_path=r'C:\\Users\\sorano\\Desktop\\???????\\Asuna Sword\\bin\\phantomjs.exe',service_args=service_args)
    return dr,uaList
项目:Jarvis    作者:sukeesh    | 项目源码 | 文件源码
def find_hackathon(self):
    print('--- Fetching hackathons--- \n')
    driver = webdriver.PhantomJS()
    driver.get('https://www.hackerearth.com/challenges/')
    res = driver.page_source
    soup = BeautifulSoup(res, 'lxml')
    upcoming = soup.find('div', {'class': 'upcoming challenge-list'})

    if upcoming is not None:

        all_hackathons = upcoming.find_all('div', {'class': 'challenge-content'})

        for i, hackathon in enumerate(all_hackathons, 1):
            challenge_type = hackathon.find('div', {'class': 'challenge-type'}).text.replace("\n", " ").strip()
            challenge_name = hackathon.find('div', {'class': 'challenge-name'}).text.replace("\n", " ").strip()
            date_time = hackathon.find('div', {'class': 'challenge-list-meta challenge-card-wrapper'}).text.replace("\n", " ").strip()
            print("[{}] {}\n{}\n{}\n\n".format(str(i), challenge_name, challenge_type, date_time))
    else:
        print("No hackathon data found.")
项目:MIT-Hodor    作者:kalbhor    | 项目源码 | 文件源码
def login(rollno, password):
    driver = webdriver.PhantomJS()
    driver.get("http://slcm.manipal.edu/loginForm.aspx")
    user_field = driver.find_element_by_id("txtUserid")
    pass_field = driver.find_element_by_id("txtpassword")

    user_field.send_keys(rollno)
    pass_field.send_keys(password)
    sleep(0.5)
    driver.find_element_by_css_selector('#btnLogin').click()
    sleep(1)


    try:
        driver.find_element_by_id("txtUserid")
        return None
    except:
        pass

    return driver
项目:isar    作者:ilbers    | 项目源码 | 文件源码
def create_selenium_driver(browser='chrome'):
    # set default browser string based on env (if available)
    env_browser = os.environ.get('TOASTER_TESTS_BROWSER')
    if env_browser:
        browser = env_browser

    if browser == 'chrome':
        return webdriver.Chrome(
            service_args=["--verbose", "--log-path=selenium.log"]
        )
    elif browser == 'firefox':
        return webdriver.Firefox()
    elif browser == 'marionette':
        capabilities = DesiredCapabilities.FIREFOX
        capabilities['marionette'] = True
        return webdriver.Firefox(capabilities=capabilities)
    elif browser == 'ie':
        return webdriver.Ie()
    elif browser == 'phantomjs':
        return webdriver.PhantomJS()
    else:
        msg = 'Selenium driver for browser %s is not available' % browser
        raise RuntimeError(msg)
项目:isp-data-pollution    作者:essandess    | 项目源码 | 文件源码
def add_url_links(self,links,url=''):
        k = 0
        for link in sorted(links,key=lambda k: random.random()):
            lp = uprs.urlparse(link)
            if (lp.scheme == 'http' or lp.scheme == 'https') and not self.blacklisted(link):
                if self.add_link(link): k += 1
                if k > self.max_links_per_page: break
        if self.verbose or self.debug:
            current_url = url  # default
            try:
                @self.phantomjs_short_timeout
                def phantomjs_current_url(): return self.driver.current_url
                current_url = phantomjs_current_url()
                # the current_url method breaks on a lot of sites, e.g.
                # python3 -c 'from selenium import webdriver; driver = webdriver.PhantomJS(); driver.get("https://github.com"); print(driver.title); print(driver.current_url); driver.quit()'
            except Exception as e:
                if self.debug: print('.current_url exception:\n{}'.format(e))
        if self.debug:
            print("{}: {:d} links added, {:d} total, {:.1f} bits domain entropy".format(current_url,k,self.link_count(),self.domain_entropy()))
        elif self.verbose:
            self.print_progress(current_url,num_links=k)
项目:cabu    作者:thylong    | 项目源码 | 文件源码
def load_driver(config, vdisplay=None):
    """Initialize a weddriver selected in config with given config.

    Args:
        config (dict): The configuration loaded previously in Cabu.

    Returns:
        webdriver (selenium.webdriver): An instance of selenium webdriver or None.
    """

    if config['DRIVER_NAME'] == 'Firefox':
        driver = load_firefox(config)
    elif config['DRIVER_NAME'] == 'Chrome':
        driver = load_chrome(config)
    elif config['DRIVER_NAME'] == 'PhantomJS':
        driver = load_phantomjs(config)
    elif not config.get('DRIVER_NAME'):
        return None
    else:
        raise DriverException(vdisplay, 'Driver unrecognized.')

    driver.set_page_load_timeout(config['DRIVER_PAGE_TIMEOUT'])
    driver.set_window_size(config['DRIVER_WINDOWS_WIDTH'], config['DRIVER_WINDOWS_HEIGHT'])

    return driver
项目:voamos    作者:miguelsc    | 项目源码 | 文件源码
def init_driver(self):
        global driver

        if self.is_initialized:
            return

        if self.driver_name == 'chrome':
            driver = webdriver.Chrome(executable_path=self.driver_path)
        elif self.driver_name == 'phantomjs':
            driver = webdriver.PhantomJS(executable_path=self.driver_path)
        elif self.driver_name == 'firefox':
            driver = webdriver.Firefox(executable_path=self.driver_path)
        else:
            raise Exception(
                'Driver "{}" is not supported'.format(self.driver_name))

        self.is_initialized = True
        driver.set_window_size(self.width, self.height)
        driver.implicitly_wait(5)
项目:spoon    作者:Jiramew    | 项目源码 | 文件源码
def get_webdriver(self):
        service_args = []

        if self.webdriver_config.proxy:
            service_args.extend([
                "--proxy=" + self.webdriver_config.proxy,
                "--proxy-type=http",
                "--ignore-ssl-errors=true"
            ])

        dcapability = dict(DesiredCapabilities.PHANTOMJS)
        if self.webdriver_config.header:
            dcapability["phantomjs.page.settings.userAgent"] = self.webdriver_config.header['User-Agent']
            dcapability["phantomjs.page.customHeaders.User-Agent"] = self.webdriver_config.header['User-Agent']
        dcapability["takesScreenshot"] = True
        driver = webdriver.PhantomJS(self.webdriver_config.phantomjs_path,
                                     service_args=service_args,
                                     desired_capabilities=dcapability)

        driver.set_page_load_timeout(self.webdriver_config.timeout)
        return driver
项目:EasyCrawler    作者:playwolf719    | 项目源码 | 文件源码
def process_request(self, request, spider):
        try:
            driver = webdriver.PhantomJS() #????????
             # driver = webdriver.Firefox()
            print "---"+str(request.meta["page"])+"-----js url start-------"
            print datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            driver.get(self.pc_index_url+"&page="+str(request.meta["page"]) )
            # time.sleep(1)
            tmp=driver.find_element_by_id('sf-item-list-data').get_attribute("innerHTML")
            print "---"+str(request.meta["page"])+"-----js url end-------"
            print datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            body = tmp
            return HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request)
        except Exception,e:
            print "-------------------"
            print e.__doc__
            print e.message
            print "-------------------"
项目:scrapy-training    作者:scrapinghub    | 项目源码 | 文件源码
def scrape():
    driver = webdriver.PhantomJS()
    driver.get('http://quotes.toscrape.com/js-onclick')
    while True:
        sel = parsel.Selector(text=driver.page_source)
        for quote in sel.css('div.quote'):
            print({
                'text': quote.css('span.text::text').extract_first(),
                'author': quote.css('span small::text').extract_first(),
                'tags': quote.css('div.tags a.tag::text').extract(),
            })
        try:
            next_button = driver.find_element_by_css_selector('li.next > a')
            next_button.click()
        except NoSuchElementException:
            break
项目:internet-content-detection    作者:liubo0621    | 项目源码 | 文件源码
def get_html_by_webdirver(url, proxies = ''):
    html = None
    try:

        driver = webdriver.PhantomJS()

        if proxies:
            proxy=webdriver.Proxy()
            proxy.proxy_type=ProxyType.MANUAL
            proxy.http_proxy= proxies  #'220.248.229.45:3128'
            #????????webdriver.DesiredCapabilities.PHANTOMJS?
            proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
            driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)

        driver.get(url)
        html = driver.page_source
        # driver.save_screenshot('1.png')   #????
        driver.close()
    except Exception as e:
        log.error(e)
    return html and len(html) < 1024 * 1024 and html or None
项目:pelisalacarta-ce    作者:pelisalacarta-ce    | 项目源码 | 文件源码
def _unshorten_linkbucks(self, uri):
        try:
            with closing(PhantomJS(
                    service_log_path=os.path.dirname(os.path.realpath(__file__)) + '/ghostdriver.log')) as browser:
                browser.get(uri)

                # wait 5 seconds
                time.sleep(5)

                page_source = browser.page_source

                link = re.findall(r'skiplink(.*?)\>', page_source)
                if link is not None:
                    link = re.sub(r'\shref\=|\"', '', link[0])
                    if link == '':
                        return uri, 'Failed to extract link.'
                    return link, 200
                else:
                    return uri, 'Failed to extract link.'

        except Exception as e:
            return uri, str(e)
项目:crestify    作者:crestify    | 项目源码 | 文件源码
def fulltext_extract(bookmark):
    browser = webdriver.PhantomJS(service_args=[
        "--ignore-ssl-errors=true",
        "--ssl-protocol=tlsv1",
        "--load-images=no"])
    fulltext_bookmark = Bookmark.query.get(bookmark.id)
    browser.get(fulltext_bookmark.main_url)
    body = browser.find_element_by_tag_name('body')
    bodytext = body.text
    soup = BeautifulSoup4(bodytext)
    full_text = soup.text
    full_text = " ".join(full_text.split())
    full_text = full_text.replace('\n', '')
    full_text = full_text.encode('utf-8')
    fulltext_bookmark.full_text = full_text
    db.session.commit()
    browser.quit()
项目:decoration-design-crawler    作者:imflyn    | 项目源码 | 文件源码
def phantomjs_opened(self):
        capabilities = DesiredCapabilities.PHANTOMJS.copy()
        proxy = proxy_pool.random_choice_proxy()
        capabilities['proxy'] = {
            'proxyType': 'MANUAL',
            'ftpProxy': proxy,
            'sslProxy': proxy,
            'httpProxy': proxy,
            'noProxy': None
        }
        # capabilities['phantomjs.cli.args'] = [
        #   '--proxy-auth=' + evar.get('WONDERPROXY_USER') + ':' + evar.get('WONDERPROXY_PASS')
        # ]
        driver = webdriver.PhantomJS(desired_capabilities=capabilities)
        driver.set_page_load_timeout(120)
        return driver
项目:Product_Discovery_Automation    作者:nro111    | 项目源码 | 文件源码
def getBestSellers(self):
        best_Seller_Scraper = Best_Seller_Scraper()
        print("Just assigned best_Seller_Scraper = Best_Seller_Scraper.Best_Seller_Scraper")
        driver = webdriver.PhantomJS("/phantomjs-2.1.1-windows/bin/phantomjs.exe")
        print("Just assigned         driver = webdriver.PhantomJS()")

        bestSellers = []

        #Navigate to Amazon's best seller list  
        #Scrape all the Best Seller categories from Amazon and return them as an array
        bestSellerCategories = best_Seller_Scraper.getAmazonBestSellerCategories(driver)  
        print("got best seller categories")
        #Loop through each of the categories and pass them into the getSubCategories method
        for bestSellerCategory in bestSellerCategories:
            bestSellerSubCategories = best_Seller_Scraper.getSubCategories(bestSellerCategory, driver)
            #Loop through each of the subCategories and pass them into the getBestSeller method
            for bestSellerSubCategory in bestSellerSubCategories:
                bestSellers = best_Seller_Scraper.getBestSellers(bestSellerSubCategory, driver) 
        #Return the bestSellers array after it has members added to it
        return bestSellers
项目:scrapy_tutorials    作者:happyAnger6    | 项目源码 | 文件源码
def phantomjs_process(self,request):
        def do_counts(str_counts):
            try:
                counts = str_counts.replace(',','')
                return counts
            except:
                return 0
        def do_item(item):
            if item and isinstance(item,list):
                return item[0]
            return item
        try:
            url = request.url
            driver = webdriver.PhantomJS(executable_path="/usr/bin/phantomjs")
            driver.get(request.url)
            body = driver.page_source
            response = HtmlResponse(url,body=body.encode('UTF-8'),request=request)
        except Exception as e:
            self.logger.error("phantomjs error:",e,url)
            return []
        return self.parse_one_news(response)
项目:taobao    作者:chifeng111    | 项目源码 | 文件源码
def get_page(key_words):
    html = []
    b = webdriver.PhantomJS(executable_path="phantomjs.exe")
    #b = webdriver.Firefox()
    b.get("https://world.taobao.com/")
    time.sleep(3)
    b.find_element_by_id('q').send_keys(key_words)
    b.find_element_by_xpath('/html/body/div[1]/div[2]/div/div/div/div[2]/div[1]/div[2]/form/div[1]/button').click()
    time.sleep(3)
    b.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    b.maximize_window()
    html.append(b.page_source.encode('gbk', 'ignore'))
    for i in range(99):
        b.find_element_by_xpath('/html/body/div[5]/div[4]/div/div[1]/div[1]/div[4]/div/div/a[last()]/span').click()
        page = str(i+1)
        time.sleep(5)
        b.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        html.append(b.page_source.encode('gbk', 'ignore'))
        print("?????%s?" %page)
    b.close()
    return html

#/html/body/div[5]/div[4]/div/div[1]/div[1]/div[4]/div/div/a[last()]/span
#/html/body/div[5]/div[4]/div/div[1]/div[1]/div[4]/div/div/a[7]/span
项目:Spider    作者:poluo    | 项目源码 | 文件源码
def grasp_main():
    count = 11

    driver=webdriver.PhantomJS()
    while count:
        with open("result{0}.json".format(count),'r')  as fobj:
            data_list = json.load(fobj)
        print(len(data_list))
        count = count -1
        pool= multiprocessing.Pool()
        for data in data_list:
            pool.apply_async(get_detail_info, args=(driver,data['href'],))
        pool.close()
        pool.join()
        break
        time.sleep(20)
    driver.quit()
项目:dust_repos    作者:taozhijiang    | 项目源码 | 文件源码
def request_body(url):
    ret = ""
    browser = webdriver.PhantomJS()
    response = browser.get(url)
    content = browser.page_source
    soup = BeautifulSoup(content, 'lxml')
    bodys = soup.find('div', attrs={"class":"fd_article_ws "})
    if not bodys:
        print("Error1:" + url)
        return "??????"

    body = bodys.findAll('div')
    if not body:
        body = bodys.findAll('p')
        if not body:
            print("Error2:" + url)
            sys.exit()

    for item in body:
        if item and item.text:
            ret += item.text.strip() + "\n"

    return ret
项目:dust_repos    作者:taozhijiang    | 项目源码 | 文件源码
def request_body(url):
    ret = ""

    browser = webdriver.PhantomJS()
    response = browser.get(url)
    content = browser.page_source
    soup = BeautifulSoup(content, 'lxml')
    bodys = soup.find('div', attrs={"class":"paper_content"})
    if not bodys:
        print("Error1:" + url)
        sys.exit()
    body = bodys.findAll('div')
    if not body:
        body = bodys.findAll('p')
        if not body:
            print("Error2:" + url)
            sys.exit()

    for item in body:
        if item and item.text:
            ret += item.text.strip() + "\n"

    return ret
项目:tianyancha    作者:Range0122    | 项目源码 | 文件源码
def process_request(self, request, spider):
        if request.url[26] == 'c':
            ua = random.choice(self.user_agent_list)
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = ua
            dcap["phantomjs.page.settings.loadImages"] = False
            driver = webdriver.PhantomJS(executable_path='E:\Webdriver\phantomjs-2.1.1-windows\\bin\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(request.url)
            sleep_time = random.randint(15, 22)
            time.sleep(sleep_time)
            try:
                detail = driver.find_element_by_xpath('//a[@ng-click="showDetail = btnOnClick(showDetail)"]')
                detail.click()
            except:
                pass
            body = driver.page_source
            url = driver.current_url
            driver.quit()
            return HtmlResponse(url=url, body=body, request=request, encoding='utf-8')
项目:pyEbaySniper    作者:braph    | 项目源码 | 文件源码
def setup_vars():
    reg_variable('USER',     'User for ebay')
    reg_variable('PASSWORD', 'Password for ebay')
    reg_variable('DRIVER',   'Driver to use with selenium', 'PhantomJS',
        validate=lambda v: v in ('Chrome', 'Firefox', 'PhantomJS')
    )
    reg_variable('LOCALE',   'Localization for numerics and monetary stuff',
        validate=lambda v: locale.setlocale(locale.LC_ALL, v)
    )
    reg_variable('BID_AHEAD_SECONDS', 'How many seconds before the actually specified time the bid should be placed',
        value=3, type=int
    )
    reg_variable('HISTORY',   'History file',
        os.path.expanduser("~/.ebay_hist")
    )
    #reg_variable('COOKIE_FILE', 'File for cookies. (Optional)',
    #    os.path.expandvars('/tmp/ebay-$USER-cookie')
    #)

    reg_variable('DEBUG', 'Print stacktraces and write ghostdriver.log', type=bool, value=0)
    reg_variable('LOGIN_URL', 'URL for ebay login page', 'https://signin.ebay.de/ws/eBayISAPI.dll?SignIn')
    reg_variable('LOGIN_URL_RE', 'RegEx to check if URL is a login page', 'https://signin.ebay.de')
    reg_variable('LOGIN_FIELD_PASS_RE', 'RegEx to find password input field in login page', 'passwor')
    reg_variable('LOGIN_FIELD_USER_RE', 'RegEx to find user input field in login page', 'e-mail')
项目:dbas    作者:hhucn    | 项目源码 | 文件源码
def test_english_to_english_page_source():
    """
    service_args:  to prevent ssl v3 error

    :return: Test in the page_source if the national flag changed from english to english
    """
    driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"])
    driver.get(ROOT + PATH + LANGUAGE["ENGLISH"])
    driver.get(ROOT)
    driver.refresh()

    try:
        html_content = driver.page_source
        assert_in(TEST_ID["ENGLISH"], html_content)
        assert_not_in(TEST_ID["GERMAN"], html_content)
    finally:
        driver.close()
项目:dbas    作者:hhucn    | 项目源码 | 文件源码
def test_english_to_german_page_source():
    """
    service_args:  to prevent ssl v3 error

    :return: Test in the page_source if the national flag changed from english to german
    """
    driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"])
    driver.get(ROOT + PATH + LANGUAGE["GERMAN"])
    driver.get(ROOT)
    driver.refresh()

    try:
        html_content = driver.page_source
        assert_in(TEST_ID["GERMAN"], html_content)
        assert_not_in(TEST_ID["ENGLISH"], html_content)
    finally:
        driver.close()
项目:dbas    作者:hhucn    | 项目源码 | 文件源码
def test_german_to_german_page_source():
    """
    service_args:  to prevent ssl v3 error

    :return: Test in the page_source if the national flag changed from german to german
    """
    driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"])
    driver.get(ROOT + PATH + LANGUAGE["GERMAN"])
    driver.get(ROOT)
    driver.refresh()

    try:
        html_content = driver.page_source
        assert_in(TEST_ID["GERMAN"], html_content)
        assert_not_in(TEST_ID["ENGLISH"], html_content)
    finally:
        driver.close()
项目:dbas    作者:hhucn    | 项目源码 | 文件源码
def test_german_to_english_page_source():
    """
    service_args:  to prevent ssl v3 error

    :return: Test in the page_source if the national flag changed from german to english
    """
    driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"])
    driver.get(ROOT + PATH + LANGUAGE["ENGLISH"])
    driver.get(ROOT)
    driver.refresh()

    try:
        html_content = driver.page_source
        assert_in(TEST_ID["ENGLISH"], html_content)
        assert_not_in(TEST_ID["GERMAN"], html_content)
    finally:
        driver.close()
项目:dbas    作者:hhucn    | 项目源码 | 文件源码
def test_english_to_german_cookies():
    """
    service_args:  to prevent ssl v3 error
    cookies[len(cookies) - 1].get("value"): because the value of the language is always a dictionary
                                            at the last place of cookies.

    :return: Test in the cookies if the language changed from english to german
    """
    driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"])
    driver.get(ROOT + PATH + LANGUAGE["GERMAN"])
    driver.get(ROOT)
    driver.refresh()

    try:
        cookies = driver.get_cookies()
        language_value = cookies[len(cookies) - 1].get("value")

        if language_value is not None:
            assert_in(LANGUAGE["GERMAN"], language_value)
            assert_not_in(LANGUAGE["ENGLISH"], language_value)
        else:
            raise Exception("Cookie language value is empty")

    finally:
        driver.close()
项目:dbas    作者:hhucn    | 项目源码 | 文件源码
def test_german_to_german_cookies():
    """
    service_args:  to prevent ssl v3 error
    cookies[len(cookies) - 1].get("value"): because the value of the language is always a dictionary
                                            at the last place of cookies.

    :return: Test in the cookies if the language changed from german to german
    """
    driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"])
    driver.get(ROOT + PATH + LANGUAGE["GERMAN"])
    driver.get(ROOT)
    driver.refresh()

    try:
        cookies = driver.get_cookies()
        language_value = cookies[len(cookies) - 1].get("value")

        if language_value is not None:
            assert_in(LANGUAGE["GERMAN"], language_value)
            assert_not_in(LANGUAGE["ENGLISH"], language_value)
        else:
            raise Exception("Cookie language value is empty")

    finally:
        driver.close()
项目:dbas    作者:hhucn    | 项目源码 | 文件源码
def test_german_to_english_cookies():
    """
    service_args:  to prevent ssl v3 error
    cookies[len(cookies) - 1].get("value"): because the value of the language is always a dictionary
                                            at the last place of cookies.

    :return: Test in the cookies if the language changed from german to english
    """
    driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"])
    driver.get(ROOT + PATH + LANGUAGE["ENGLISH"])
    driver.get(ROOT)
    driver.refresh()

    try:
        cookies = driver.get_cookies()
        language_value = cookies[len(cookies) - 1].get("value")

        if language_value is not None:
            assert_in(LANGUAGE["ENGLISH"], language_value)
            assert_not_in(LANGUAGE["GERMAN"], language_value)
        else:
            raise Exception("Cookie language value is empty")

    finally:
        driver.close()
项目:rstviewer    作者:arne-cl    | 项目源码 | 文件源码
def rs3topng(rs3_filepath, png_filepath=None):
    """Convert a RS3 file into a PNG image of the RST tree.

    If no output filename is given, the PNG image is returned
    as a string (which is useful for embedding).
    """
    try:
        from selenium import webdriver
        from selenium.common.exceptions import WebDriverException
    except ImportError:
        raise ImportError(
            'Please install selenium: pip install selenium')

    html_str = rs3tohtml(rs3_filepath)

    temp = tempfile.NamedTemporaryFile(suffix='.html', delete=False)
    temp.write(html_str.encode('utf8'))
    temp.close()

    try:
        driver = webdriver.PhantomJS()
    except WebDriverException as err:
        raise WebDriverException(
           'Please install phantomjs: http://phantomjs.org/\n' + err.msg)

    driver.get(temp.name)
    os.unlink(temp.name)

    png_str = driver.get_screenshot_as_png()
    if png_filepath:
        with open(png_filepath, 'w') as png_file:
            png_file.write(png_str)
    else:
        return png_str
项目:NetEaseMusicCrawler    作者:yaochao    | 项目源码 | 文件源码
def __init__(self, settings):
        self.options = settings.get('PHANTOMJS_OPTIONS', {})  # ???
        max_run = settings.get('PHANTOMJS_MAXRUN', 10)  # PhantomJS ???????????, ??10
        self.sem = defer.DeferredSemaphore(max_run)
        self.queue = Queue.LifoQueue(maxsize=max_run)  # LifoQueue ??????
        SignalManager(dispatcher.Any).connect(receiver=self._close, signal=signals.spider_closed)
项目:NetEaseMusicCrawler    作者:yaochao    | 项目源码 | 文件源码
def _wait_request(self, request, spider):
        try:
            driver = self.queue.get_nowait()
        except:
            driver = webdriver.PhantomJS(**self.options)
        driver.get(request.url)

        # wait until ajax completed
        dfd = threads.deferToThread(self._wait_and_switch, driver)
        dfd.addCallback(self._response, driver, spider)
        return dfd
项目:ComicSpider    作者:QuantumLiu    | 项目源码 | 文件源码
def get_pages(self):
        '''
        ??Phantomjs??????????????????url
        Get all pages' urls using selenium an phantomJS
        return:
            a list of tuple (page_num,page_url)
        '''
        r_slt=r'onchange="select_page\(\)">([\s\S]*?)</select>'
        r_p=r'<option value="(.*?)".*?>?(\d*?)?<'
        try:
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            # ???????????????
            dcap["phantomjs.page.settings.loadImages"] = False
            driver = webdriver.PhantomJS(desired_capabilities=dcap)
            driver.get(self.chapter_url)
            text=driver.page_source
            st=re.findall(r_slt,text)[0]
            self.pages = [(int(p[-1]),p[0]) for p in re.findall(r_p,st)]
        except Exception:
            traceback.print_exc()
            self.pages = []
        except KeyboardInterrupt:
            raise KeyboardInterrupt
        finally:
            driver.quit()
            print('Got {l} pages in chapter {ch}'.format(l=len(self.pages),ch=self.chapter_title))
            return self.pages
项目:taobao-data2    作者:songyubin530    | 项目源码 | 文件源码
def get_taobao_cate():
    url = 'https://shopsearch.taobao.com/search?app=shopsearch'
    driver = webdriver.PhantomJS(executable_path="d:\\phantomjs.exe")
    driver.get(url)
    driver.implicitly_wait(3)
    page = driver.page_source
    soup = BeautifulSoup(page, 'lxml')
    cate_name = re.findall(r"q=(.*?)&amp;tracelog=shopsearchnoqcat", str(soup))
    for c in cate_name:
        cname = urllib.parse.unquote(c, encoding='gb2312')
        cate_list.append(c)
        print(cname)
    print(cate_list)
项目:SerpScrap    作者:ecoron    | 项目源码 | 文件源码
def _get_PhantomJS(self):
        try:
            service_args = []

            if self.proxy:
                service_args.extend([
                    '--proxy={}:{}'.format(self.proxy.host, self.proxy.port),
                    '--proxy-type={}'.format(self.proxy.proto),
                ])

                if self.proxy.username and self.proxy.password:
                    service_args.append(
                        '--proxy-auth={}:{}'.format(
                            self.proxy.username,
                            self.proxy.password
                        )
                    )

            useragent = random_user_agent(
                mobile=False
            )
            logger.info('useragent: {}'.format(useragent))
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = useragent
            try:
                self.webdriver = webdriver.PhantomJS(
                    executable_path=self.config['executable_path'],
                    service_args=service_args,
                    desired_capabilities=dcap
                )
                return True
            except (ConnectionError, ConnectionRefusedError, ConnectionResetError) as err:
                logger.error(err)
                return False
        except WebDriverException as e:
            logger.error(e)
        return False
项目:Wolf_of_Investopedia    作者:theriley106    | 项目源码 | 文件源码
def __init__(self):
        self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
        self.driver = webdriver.PhantomJS()
        self.driver.get('http://www.investopedia.com/markets/stocks/tsla/')
        self.driver.save_screenshot('screen.png') # save a screenshot to disk
        networkActivity = str(re.findall('https:\/\/superquotes\.xignite\.com\/((.*?))"', str(self.driver.get_log('har')))[0])
        self.Token = str(networkActivity.partition("&_token=")[2]).partition('&')[0]
        self.UserID = ''.join(re.findall('(\d+)\D', str(networkActivity.partition("&_token_userid=")[2].partition(' ')[0])))
项目:Tktr    作者:Intuity    | 项目源码 | 文件源码
def make_browser(cls):
        # Build a selenium browser
        try:
            cls.browser = webdriver.PhantomJS()
        except Exception:
            try:
                # Fall back to Firefox
                cls.browser = webdriver.Firefox()
            except:
                raise Exception("Could not start a Firefox or PhantomJS instance!")
        cls.browser.get("http://127.0.0.1:%i/" % cls.port_num)
        # Setup to support routing
        cls.app = cls._make_app()
项目:integration    作者:mendersoftware    | 项目源码 | 文件源码
def phantom_driver():
    return webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true", "--web-security=false"])
项目:onionstack    作者:ntddk    | 项目源码 | 文件源码
def get_title_with_screenshot(url):
    driver = webdriver.PhantomJS(service_args = service_args, desired_capabilities = dcap)
    driver.set_window_size(1024, 512)
    driver.get('http://' + url + '.onion') # 'http://' is required.
    driver.save_screenshot(url + '.png')
    title = driver.title
    driver.close()
    return title
项目:landchina-spider    作者:sundiontheway    | 项目源码 | 文件源码
def __init__(self):
        self.driver = webdriver.PhantomJS(service_args=['--load-images=false', '--disk-cache=true'])
项目:MediumBot    作者:MattFlood7    | 项目源码 | 文件源码
def Launch():
    """
    Launch the Medium bot and ask the user what browser they want to use.
    """

    if 'chrome' not in DRIVER.lower() and 'firefox' not in DRIVER.lower() and 'phantomjs' not in DRIVER.lower():

        # Browser choice
        print 'Choose your browser:'
        print '[1] Chrome'
        print '[2] Firefox/Iceweasel'
        print '[3] PhantomJS'

        while True:
            try:
                browserChoice = int(raw_input('Choice? '))
            except ValueError:
                print 'Invalid choice.',
            else:
                if browserChoice not in [1,2,3]:
                    print 'Invalid choice.',
                else:
                    break

        StartBrowser(browserChoice)

    elif 'chrome' in DRIVER.lower():
        StartBrowser(1)

    elif 'firefox' in DRIVER.lower():
        StartBrowser(2)

    elif 'phantomjs' in DRIVER.lower():
        StartBrowser(3)