我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用selenium.webdriver.PhantomJS()。
def __init__(self, queue, DEBUG=config.DEBUG, reset=False, socksport=None): if not socksport: socksport = config.SOCKS_PORT ## TODO add checks that a socks proxy is even open ## TODO add Tor checks to make sure circuits are operating threading.Thread.__init__(self) self.reset = reset # Whether to check if a url has been collected self.queue = queue # Multithreading queue of urls self.proxysettings = [ '--proxy=127.0.0.1:%s' % socksport, '--proxy-type=socks5', ] #self.proxysettings = [] # DEBUG #self.ignore_ssl = ['--ignore-ssl-errors=true', '--ssl-protocols=any'] self.ignore_ssl = [] self.service_args = self.proxysettings + self.ignore_ssl self.failcount = 0 # Counts failures self.donecount = 0 # Counts successes self.tor = tor.tor() # Manages Tor via control port if DEBUG: # PhantomJS sends a lot of data if debug set to DEBUG logging.basicConfig(level=logging.INFO)
def _get_webdriver(self): """Return a webdriver instance and set it up with the according profile/ proxies. Chrome is quite fast, but not as stealthy as PhantomJS. Returns: The appropriate webdriver mode according to self.browser_type. If no webdriver mode could be found, return False. """ if self.browser_type == 'chrome': return self._get_Chrome() elif self.browser_type == 'firefox': return self._get_Firefox() elif self.browser_type == 'phantomjs': return self._get_PhantomJS() return False
def process_request(self, request, spider): if request.meta.has_key('PhantomJS'): log.debug('PhantomJS Requesting: %s' % request.url) ua = None try: ua = UserAgent().random except: ua = 'Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11' webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.settings.userAgent'] = ua try: self.driver.get(request.url) content = self.driver.page_source.encode('utf-8') url = self.driver.current_url.encode('utf-8') except: return HtmlResponse(request.url, encoding='utf-8', status=503, body='') if content == '<html><head></head><body></body></html>': return HtmlResponse(request.url, encoding ='utf-8', status=503, body='') else: return HtmlResponse(url, encoding='utf-8', status=200, body=content) else: log.debug('Common Requesting: %s' % request.url)
def main(number): url = 'http://www.bilibili.com/video/av' + str(number) + '/' dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = ( "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0" ) dcap["phantomjs.page.settings.loadImages"] = False # phantomjs.exe???G:\Anaconda3\phantomjs\bin driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe', desired_capabilities=dcap) try: driver.get(url) # time.sleep(random.uniform(1, 5)) content = driver.page_source # ?????? driver.close() driver.quit() soup = BeautifulSoup(content, 'lxml') getInfo(soup) except Exception: pass finally: if driver: driver.quit()
def getSoup(start, stop): try: for number in range(start, stop + 1): url = 'http://www.bilibili.com/video/av'+str(number)+'/' dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = ( "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0" ) dcap["phantomjs.page.settings.loadImages"] = False # phantomjs.exe???G:\Anaconda3\phantomjs\bin driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe', desired_capabilities=dcap) driver.get(url) # time.sleep(1) # ????????????? content = driver.page_source # ?????? driver.close() driver.quit() soup = BeautifulSoup(content, 'lxml') getInfo(soup) except Exception: pass
def give_me_the_page(n, user_name, password, broswer, pt = None): if not pt: if broswer=='Chrome': pt = webdriver.Chrome() elif broswer=='Safari': pt = webdriver.Safari() else: pt = webdriver.PhantomJS() pt.get('http://electsys.sjtu.edu.cn/edu/login.aspx') time.sleep(1) pt.execute_script("""var img=document.getElementById('form-input').getElementsByTagName('div')[2].getElementsByTagName('img')[0]; var d=document.createElement('CANVAS'); var cxt=d.getContext('2d'); d.width=img.width; d.height=img.height; cxt.drawImage(img,0,0); img.src=d.toDataURL('png');""")
def _click_page(total_posts, pool_size, group_index): _log.info('?{}?: starting...'.format(group_index + 1)) if group_index > 0 and total_posts < pool_size * group_index: return # ???????? _driver = webdriver.PhantomJS() _driver.get('https://www.xncoding.com/archives/') global TRY_COUNT for k in range(1, TRY_COUNT + 1): # _log.info('?{}?: ?{}???...'.format(group_index + 1, k)) for i in range(pool_size * group_index, min(pool_size * (group_index + 1), total_posts)): l_xpath = '(//article/header/h1[@class="post-title"]/a[@class="post-title-link"])[{}]'.format(i + 1) ele = WebDriverWait(_driver, 2).until( EC.presence_of_element_located((By.XPATH, l_xpath)) ) ele.click() WebDriverWait(_driver, 5).until( EC.presence_of_element_located((By.XPATH, '//div[@class="post-body"]')) ) _driver.back() _log.info('?{}?: finished.'.format(group_index + 1)) _driver.close()
def just_click(): # ???????? _driver = webdriver.PhantomJS() _driver.get('https://www.xncoding.com/archives/') # driver.maximize_window() posts_count = len(_driver.find_elements_by_xpath( '//article/header/h1[@class="post-title"]/a[@class="post-title-link"]')) for cc in range(1, posts_count + 1): l_xpath = '(//article/header/h1[@class="post-title"]/a[@class="post-title-link"])[{}]'.format(cc) ele = WebDriverWait(_driver, 10).until( EC.element_to_be_clickable((By.XPATH, l_xpath)) ) _log.info('???{}???'.format(cc)) ele.click() WebDriverWait(_driver, 10).until( EC.presence_of_element_located((By.XPATH, '//div[@class="post-body"]')) ) _driver.back()
def start_PhantomJS(): uaList = [] for line in open('Base_Data\\Ualist.txt'): uaList.append(line[:-1]) open('Base_Data\\Ualist.txt').close() i = random.choice(uaList) headers = { 'Accept':'*/*', 'Accept-Language':'zh-CN,zh;q=1', 'User-Agent': i, 'Connection': 'keep-alive' } service_args = [ #'--proxy=127.0.0.1:9999', #'--proxy-type=http', '--ignore-ssl-errors=true', ] for key,value in headers.items(): webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.{}'.format(key)] = value webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.settings.userAgent'] = i dr = webdriver.PhantomJS(executable_path=r'C:\\Users\\sorano\\Desktop\\???????\\Asuna Sword\\bin\\phantomjs.exe',service_args=service_args) return dr,uaList
def find_hackathon(self): print('--- Fetching hackathons--- \n') driver = webdriver.PhantomJS() driver.get('https://www.hackerearth.com/challenges/') res = driver.page_source soup = BeautifulSoup(res, 'lxml') upcoming = soup.find('div', {'class': 'upcoming challenge-list'}) if upcoming is not None: all_hackathons = upcoming.find_all('div', {'class': 'challenge-content'}) for i, hackathon in enumerate(all_hackathons, 1): challenge_type = hackathon.find('div', {'class': 'challenge-type'}).text.replace("\n", " ").strip() challenge_name = hackathon.find('div', {'class': 'challenge-name'}).text.replace("\n", " ").strip() date_time = hackathon.find('div', {'class': 'challenge-list-meta challenge-card-wrapper'}).text.replace("\n", " ").strip() print("[{}] {}\n{}\n{}\n\n".format(str(i), challenge_name, challenge_type, date_time)) else: print("No hackathon data found.")
def login(rollno, password): driver = webdriver.PhantomJS() driver.get("http://slcm.manipal.edu/loginForm.aspx") user_field = driver.find_element_by_id("txtUserid") pass_field = driver.find_element_by_id("txtpassword") user_field.send_keys(rollno) pass_field.send_keys(password) sleep(0.5) driver.find_element_by_css_selector('#btnLogin').click() sleep(1) try: driver.find_element_by_id("txtUserid") return None except: pass return driver
def create_selenium_driver(browser='chrome'): # set default browser string based on env (if available) env_browser = os.environ.get('TOASTER_TESTS_BROWSER') if env_browser: browser = env_browser if browser == 'chrome': return webdriver.Chrome( service_args=["--verbose", "--log-path=selenium.log"] ) elif browser == 'firefox': return webdriver.Firefox() elif browser == 'marionette': capabilities = DesiredCapabilities.FIREFOX capabilities['marionette'] = True return webdriver.Firefox(capabilities=capabilities) elif browser == 'ie': return webdriver.Ie() elif browser == 'phantomjs': return webdriver.PhantomJS() else: msg = 'Selenium driver for browser %s is not available' % browser raise RuntimeError(msg)
def add_url_links(self,links,url=''): k = 0 for link in sorted(links,key=lambda k: random.random()): lp = uprs.urlparse(link) if (lp.scheme == 'http' or lp.scheme == 'https') and not self.blacklisted(link): if self.add_link(link): k += 1 if k > self.max_links_per_page: break if self.verbose or self.debug: current_url = url # default try: @self.phantomjs_short_timeout def phantomjs_current_url(): return self.driver.current_url current_url = phantomjs_current_url() # the current_url method breaks on a lot of sites, e.g. # python3 -c 'from selenium import webdriver; driver = webdriver.PhantomJS(); driver.get("https://github.com"); print(driver.title); print(driver.current_url); driver.quit()' except Exception as e: if self.debug: print('.current_url exception:\n{}'.format(e)) if self.debug: print("{}: {:d} links added, {:d} total, {:.1f} bits domain entropy".format(current_url,k,self.link_count(),self.domain_entropy())) elif self.verbose: self.print_progress(current_url,num_links=k)
def load_driver(config, vdisplay=None): """Initialize a weddriver selected in config with given config. Args: config (dict): The configuration loaded previously in Cabu. Returns: webdriver (selenium.webdriver): An instance of selenium webdriver or None. """ if config['DRIVER_NAME'] == 'Firefox': driver = load_firefox(config) elif config['DRIVER_NAME'] == 'Chrome': driver = load_chrome(config) elif config['DRIVER_NAME'] == 'PhantomJS': driver = load_phantomjs(config) elif not config.get('DRIVER_NAME'): return None else: raise DriverException(vdisplay, 'Driver unrecognized.') driver.set_page_load_timeout(config['DRIVER_PAGE_TIMEOUT']) driver.set_window_size(config['DRIVER_WINDOWS_WIDTH'], config['DRIVER_WINDOWS_HEIGHT']) return driver
def init_driver(self): global driver if self.is_initialized: return if self.driver_name == 'chrome': driver = webdriver.Chrome(executable_path=self.driver_path) elif self.driver_name == 'phantomjs': driver = webdriver.PhantomJS(executable_path=self.driver_path) elif self.driver_name == 'firefox': driver = webdriver.Firefox(executable_path=self.driver_path) else: raise Exception( 'Driver "{}" is not supported'.format(self.driver_name)) self.is_initialized = True driver.set_window_size(self.width, self.height) driver.implicitly_wait(5)
def get_webdriver(self): service_args = [] if self.webdriver_config.proxy: service_args.extend([ "--proxy=" + self.webdriver_config.proxy, "--proxy-type=http", "--ignore-ssl-errors=true" ]) dcapability = dict(DesiredCapabilities.PHANTOMJS) if self.webdriver_config.header: dcapability["phantomjs.page.settings.userAgent"] = self.webdriver_config.header['User-Agent'] dcapability["phantomjs.page.customHeaders.User-Agent"] = self.webdriver_config.header['User-Agent'] dcapability["takesScreenshot"] = True driver = webdriver.PhantomJS(self.webdriver_config.phantomjs_path, service_args=service_args, desired_capabilities=dcapability) driver.set_page_load_timeout(self.webdriver_config.timeout) return driver
def process_request(self, request, spider): try: driver = webdriver.PhantomJS() #???????? # driver = webdriver.Firefox() print "---"+str(request.meta["page"])+"-----js url start-------" print datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") driver.get(self.pc_index_url+"&page="+str(request.meta["page"]) ) # time.sleep(1) tmp=driver.find_element_by_id('sf-item-list-data').get_attribute("innerHTML") print "---"+str(request.meta["page"])+"-----js url end-------" print datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") body = tmp return HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request) except Exception,e: print "-------------------" print e.__doc__ print e.message print "-------------------"
def scrape(): driver = webdriver.PhantomJS() driver.get('http://quotes.toscrape.com/js-onclick') while True: sel = parsel.Selector(text=driver.page_source) for quote in sel.css('div.quote'): print({ 'text': quote.css('span.text::text').extract_first(), 'author': quote.css('span small::text').extract_first(), 'tags': quote.css('div.tags a.tag::text').extract(), }) try: next_button = driver.find_element_by_css_selector('li.next > a') next_button.click() except NoSuchElementException: break
def get_html_by_webdirver(url, proxies = ''): html = None try: driver = webdriver.PhantomJS() if proxies: proxy=webdriver.Proxy() proxy.proxy_type=ProxyType.MANUAL proxy.http_proxy= proxies #'220.248.229.45:3128' #????????webdriver.DesiredCapabilities.PHANTOMJS? proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS) driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS) driver.get(url) html = driver.page_source # driver.save_screenshot('1.png') #???? driver.close() except Exception as e: log.error(e) return html and len(html) < 1024 * 1024 and html or None
def _unshorten_linkbucks(self, uri): try: with closing(PhantomJS( service_log_path=os.path.dirname(os.path.realpath(__file__)) + '/ghostdriver.log')) as browser: browser.get(uri) # wait 5 seconds time.sleep(5) page_source = browser.page_source link = re.findall(r'skiplink(.*?)\>', page_source) if link is not None: link = re.sub(r'\shref\=|\"', '', link[0]) if link == '': return uri, 'Failed to extract link.' return link, 200 else: return uri, 'Failed to extract link.' except Exception as e: return uri, str(e)
def fulltext_extract(bookmark): browser = webdriver.PhantomJS(service_args=[ "--ignore-ssl-errors=true", "--ssl-protocol=tlsv1", "--load-images=no"]) fulltext_bookmark = Bookmark.query.get(bookmark.id) browser.get(fulltext_bookmark.main_url) body = browser.find_element_by_tag_name('body') bodytext = body.text soup = BeautifulSoup4(bodytext) full_text = soup.text full_text = " ".join(full_text.split()) full_text = full_text.replace('\n', '') full_text = full_text.encode('utf-8') fulltext_bookmark.full_text = full_text db.session.commit() browser.quit()
def phantomjs_opened(self): capabilities = DesiredCapabilities.PHANTOMJS.copy() proxy = proxy_pool.random_choice_proxy() capabilities['proxy'] = { 'proxyType': 'MANUAL', 'ftpProxy': proxy, 'sslProxy': proxy, 'httpProxy': proxy, 'noProxy': None } # capabilities['phantomjs.cli.args'] = [ # '--proxy-auth=' + evar.get('WONDERPROXY_USER') + ':' + evar.get('WONDERPROXY_PASS') # ] driver = webdriver.PhantomJS(desired_capabilities=capabilities) driver.set_page_load_timeout(120) return driver
def getBestSellers(self): best_Seller_Scraper = Best_Seller_Scraper() print("Just assigned best_Seller_Scraper = Best_Seller_Scraper.Best_Seller_Scraper") driver = webdriver.PhantomJS("/phantomjs-2.1.1-windows/bin/phantomjs.exe") print("Just assigned driver = webdriver.PhantomJS()") bestSellers = [] #Navigate to Amazon's best seller list #Scrape all the Best Seller categories from Amazon and return them as an array bestSellerCategories = best_Seller_Scraper.getAmazonBestSellerCategories(driver) print("got best seller categories") #Loop through each of the categories and pass them into the getSubCategories method for bestSellerCategory in bestSellerCategories: bestSellerSubCategories = best_Seller_Scraper.getSubCategories(bestSellerCategory, driver) #Loop through each of the subCategories and pass them into the getBestSeller method for bestSellerSubCategory in bestSellerSubCategories: bestSellers = best_Seller_Scraper.getBestSellers(bestSellerSubCategory, driver) #Return the bestSellers array after it has members added to it return bestSellers
def phantomjs_process(self,request): def do_counts(str_counts): try: counts = str_counts.replace(',','') return counts except: return 0 def do_item(item): if item and isinstance(item,list): return item[0] return item try: url = request.url driver = webdriver.PhantomJS(executable_path="/usr/bin/phantomjs") driver.get(request.url) body = driver.page_source response = HtmlResponse(url,body=body.encode('UTF-8'),request=request) except Exception as e: self.logger.error("phantomjs error:",e,url) return [] return self.parse_one_news(response)
def get_page(key_words): html = [] b = webdriver.PhantomJS(executable_path="phantomjs.exe") #b = webdriver.Firefox() b.get("https://world.taobao.com/") time.sleep(3) b.find_element_by_id('q').send_keys(key_words) b.find_element_by_xpath('/html/body/div[1]/div[2]/div/div/div/div[2]/div[1]/div[2]/form/div[1]/button').click() time.sleep(3) b.execute_script("window.scrollTo(0, document.body.scrollHeight);") b.maximize_window() html.append(b.page_source.encode('gbk', 'ignore')) for i in range(99): b.find_element_by_xpath('/html/body/div[5]/div[4]/div/div[1]/div[1]/div[4]/div/div/a[last()]/span').click() page = str(i+1) time.sleep(5) b.execute_script("window.scrollTo(0, document.body.scrollHeight);") html.append(b.page_source.encode('gbk', 'ignore')) print("?????%s?" %page) b.close() return html #/html/body/div[5]/div[4]/div/div[1]/div[1]/div[4]/div/div/a[last()]/span #/html/body/div[5]/div[4]/div/div[1]/div[1]/div[4]/div/div/a[7]/span
def grasp_main(): count = 11 driver=webdriver.PhantomJS() while count: with open("result{0}.json".format(count),'r') as fobj: data_list = json.load(fobj) print(len(data_list)) count = count -1 pool= multiprocessing.Pool() for data in data_list: pool.apply_async(get_detail_info, args=(driver,data['href'],)) pool.close() pool.join() break time.sleep(20) driver.quit()
def request_body(url): ret = "" browser = webdriver.PhantomJS() response = browser.get(url) content = browser.page_source soup = BeautifulSoup(content, 'lxml') bodys = soup.find('div', attrs={"class":"fd_article_ws "}) if not bodys: print("Error1:" + url) return "??????" body = bodys.findAll('div') if not body: body = bodys.findAll('p') if not body: print("Error2:" + url) sys.exit() for item in body: if item and item.text: ret += item.text.strip() + "\n" return ret
def request_body(url): ret = "" browser = webdriver.PhantomJS() response = browser.get(url) content = browser.page_source soup = BeautifulSoup(content, 'lxml') bodys = soup.find('div', attrs={"class":"paper_content"}) if not bodys: print("Error1:" + url) sys.exit() body = bodys.findAll('div') if not body: body = bodys.findAll('p') if not body: print("Error2:" + url) sys.exit() for item in body: if item and item.text: ret += item.text.strip() + "\n" return ret
def process_request(self, request, spider): if request.url[26] == 'c': ua = random.choice(self.user_agent_list) dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = ua dcap["phantomjs.page.settings.loadImages"] = False driver = webdriver.PhantomJS(executable_path='E:\Webdriver\phantomjs-2.1.1-windows\\bin\phantomjs.exe', desired_capabilities=dcap) driver.get(request.url) sleep_time = random.randint(15, 22) time.sleep(sleep_time) try: detail = driver.find_element_by_xpath('//a[@ng-click="showDetail = btnOnClick(showDetail)"]') detail.click() except: pass body = driver.page_source url = driver.current_url driver.quit() return HtmlResponse(url=url, body=body, request=request, encoding='utf-8')
def setup_vars(): reg_variable('USER', 'User for ebay') reg_variable('PASSWORD', 'Password for ebay') reg_variable('DRIVER', 'Driver to use with selenium', 'PhantomJS', validate=lambda v: v in ('Chrome', 'Firefox', 'PhantomJS') ) reg_variable('LOCALE', 'Localization for numerics and monetary stuff', validate=lambda v: locale.setlocale(locale.LC_ALL, v) ) reg_variable('BID_AHEAD_SECONDS', 'How many seconds before the actually specified time the bid should be placed', value=3, type=int ) reg_variable('HISTORY', 'History file', os.path.expanduser("~/.ebay_hist") ) #reg_variable('COOKIE_FILE', 'File for cookies. (Optional)', # os.path.expandvars('/tmp/ebay-$USER-cookie') #) reg_variable('DEBUG', 'Print stacktraces and write ghostdriver.log', type=bool, value=0) reg_variable('LOGIN_URL', 'URL for ebay login page', 'https://signin.ebay.de/ws/eBayISAPI.dll?SignIn') reg_variable('LOGIN_URL_RE', 'RegEx to check if URL is a login page', 'https://signin.ebay.de') reg_variable('LOGIN_FIELD_PASS_RE', 'RegEx to find password input field in login page', 'passwor') reg_variable('LOGIN_FIELD_USER_RE', 'RegEx to find user input field in login page', 'e-mail')
def test_english_to_english_page_source(): """ service_args: to prevent ssl v3 error :return: Test in the page_source if the national flag changed from english to english """ driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"]) driver.get(ROOT + PATH + LANGUAGE["ENGLISH"]) driver.get(ROOT) driver.refresh() try: html_content = driver.page_source assert_in(TEST_ID["ENGLISH"], html_content) assert_not_in(TEST_ID["GERMAN"], html_content) finally: driver.close()
def test_english_to_german_page_source(): """ service_args: to prevent ssl v3 error :return: Test in the page_source if the national flag changed from english to german """ driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"]) driver.get(ROOT + PATH + LANGUAGE["GERMAN"]) driver.get(ROOT) driver.refresh() try: html_content = driver.page_source assert_in(TEST_ID["GERMAN"], html_content) assert_not_in(TEST_ID["ENGLISH"], html_content) finally: driver.close()
def test_german_to_german_page_source(): """ service_args: to prevent ssl v3 error :return: Test in the page_source if the national flag changed from german to german """ driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"]) driver.get(ROOT + PATH + LANGUAGE["GERMAN"]) driver.get(ROOT) driver.refresh() try: html_content = driver.page_source assert_in(TEST_ID["GERMAN"], html_content) assert_not_in(TEST_ID["ENGLISH"], html_content) finally: driver.close()
def test_german_to_english_page_source(): """ service_args: to prevent ssl v3 error :return: Test in the page_source if the national flag changed from german to english """ driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"]) driver.get(ROOT + PATH + LANGUAGE["ENGLISH"]) driver.get(ROOT) driver.refresh() try: html_content = driver.page_source assert_in(TEST_ID["ENGLISH"], html_content) assert_not_in(TEST_ID["GERMAN"], html_content) finally: driver.close()
def test_english_to_german_cookies(): """ service_args: to prevent ssl v3 error cookies[len(cookies) - 1].get("value"): because the value of the language is always a dictionary at the last place of cookies. :return: Test in the cookies if the language changed from english to german """ driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"]) driver.get(ROOT + PATH + LANGUAGE["GERMAN"]) driver.get(ROOT) driver.refresh() try: cookies = driver.get_cookies() language_value = cookies[len(cookies) - 1].get("value") if language_value is not None: assert_in(LANGUAGE["GERMAN"], language_value) assert_not_in(LANGUAGE["ENGLISH"], language_value) else: raise Exception("Cookie language value is empty") finally: driver.close()
def test_german_to_german_cookies(): """ service_args: to prevent ssl v3 error cookies[len(cookies) - 1].get("value"): because the value of the language is always a dictionary at the last place of cookies. :return: Test in the cookies if the language changed from german to german """ driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"]) driver.get(ROOT + PATH + LANGUAGE["GERMAN"]) driver.get(ROOT) driver.refresh() try: cookies = driver.get_cookies() language_value = cookies[len(cookies) - 1].get("value") if language_value is not None: assert_in(LANGUAGE["GERMAN"], language_value) assert_not_in(LANGUAGE["ENGLISH"], language_value) else: raise Exception("Cookie language value is empty") finally: driver.close()
def test_german_to_english_cookies(): """ service_args: to prevent ssl v3 error cookies[len(cookies) - 1].get("value"): because the value of the language is always a dictionary at the last place of cookies. :return: Test in the cookies if the language changed from german to english """ driver = webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true"]) driver.get(ROOT + PATH + LANGUAGE["ENGLISH"]) driver.get(ROOT) driver.refresh() try: cookies = driver.get_cookies() language_value = cookies[len(cookies) - 1].get("value") if language_value is not None: assert_in(LANGUAGE["ENGLISH"], language_value) assert_not_in(LANGUAGE["GERMAN"], language_value) else: raise Exception("Cookie language value is empty") finally: driver.close()
def rs3topng(rs3_filepath, png_filepath=None): """Convert a RS3 file into a PNG image of the RST tree. If no output filename is given, the PNG image is returned as a string (which is useful for embedding). """ try: from selenium import webdriver from selenium.common.exceptions import WebDriverException except ImportError: raise ImportError( 'Please install selenium: pip install selenium') html_str = rs3tohtml(rs3_filepath) temp = tempfile.NamedTemporaryFile(suffix='.html', delete=False) temp.write(html_str.encode('utf8')) temp.close() try: driver = webdriver.PhantomJS() except WebDriverException as err: raise WebDriverException( 'Please install phantomjs: http://phantomjs.org/\n' + err.msg) driver.get(temp.name) os.unlink(temp.name) png_str = driver.get_screenshot_as_png() if png_filepath: with open(png_filepath, 'w') as png_file: png_file.write(png_str) else: return png_str
def __init__(self, settings): self.options = settings.get('PHANTOMJS_OPTIONS', {}) # ??? max_run = settings.get('PHANTOMJS_MAXRUN', 10) # PhantomJS ???????????, ??10 self.sem = defer.DeferredSemaphore(max_run) self.queue = Queue.LifoQueue(maxsize=max_run) # LifoQueue ?????? SignalManager(dispatcher.Any).connect(receiver=self._close, signal=signals.spider_closed)
def _wait_request(self, request, spider): try: driver = self.queue.get_nowait() except: driver = webdriver.PhantomJS(**self.options) driver.get(request.url) # wait until ajax completed dfd = threads.deferToThread(self._wait_and_switch, driver) dfd.addCallback(self._response, driver, spider) return dfd
def get_pages(self): ''' ??Phantomjs??????????????????url Get all pages' urls using selenium an phantomJS return: a list of tuple (page_num,page_url) ''' r_slt=r'onchange="select_page\(\)">([\s\S]*?)</select>' r_p=r'<option value="(.*?)".*?>?(\d*?)?<' try: dcap = dict(DesiredCapabilities.PHANTOMJS) # ??????????????? dcap["phantomjs.page.settings.loadImages"] = False driver = webdriver.PhantomJS(desired_capabilities=dcap) driver.get(self.chapter_url) text=driver.page_source st=re.findall(r_slt,text)[0] self.pages = [(int(p[-1]),p[0]) for p in re.findall(r_p,st)] except Exception: traceback.print_exc() self.pages = [] except KeyboardInterrupt: raise KeyboardInterrupt finally: driver.quit() print('Got {l} pages in chapter {ch}'.format(l=len(self.pages),ch=self.chapter_title)) return self.pages
def get_taobao_cate(): url = 'https://shopsearch.taobao.com/search?app=shopsearch' driver = webdriver.PhantomJS(executable_path="d:\\phantomjs.exe") driver.get(url) driver.implicitly_wait(3) page = driver.page_source soup = BeautifulSoup(page, 'lxml') cate_name = re.findall(r"q=(.*?)&tracelog=shopsearchnoqcat", str(soup)) for c in cate_name: cname = urllib.parse.unquote(c, encoding='gb2312') cate_list.append(c) print(cname) print(cate_list)
def _get_PhantomJS(self): try: service_args = [] if self.proxy: service_args.extend([ '--proxy={}:{}'.format(self.proxy.host, self.proxy.port), '--proxy-type={}'.format(self.proxy.proto), ]) if self.proxy.username and self.proxy.password: service_args.append( '--proxy-auth={}:{}'.format( self.proxy.username, self.proxy.password ) ) useragent = random_user_agent( mobile=False ) logger.info('useragent: {}'.format(useragent)) dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = useragent try: self.webdriver = webdriver.PhantomJS( executable_path=self.config['executable_path'], service_args=service_args, desired_capabilities=dcap ) return True except (ConnectionError, ConnectionRefusedError, ConnectionResetError) as err: logger.error(err) return False except WebDriverException as e: logger.error(e) return False
def __init__(self): self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} self.driver = webdriver.PhantomJS() self.driver.get('http://www.investopedia.com/markets/stocks/tsla/') self.driver.save_screenshot('screen.png') # save a screenshot to disk networkActivity = str(re.findall('https:\/\/superquotes\.xignite\.com\/((.*?))"', str(self.driver.get_log('har')))[0]) self.Token = str(networkActivity.partition("&_token=")[2]).partition('&')[0] self.UserID = ''.join(re.findall('(\d+)\D', str(networkActivity.partition("&_token_userid=")[2].partition(' ')[0])))
def make_browser(cls): # Build a selenium browser try: cls.browser = webdriver.PhantomJS() except Exception: try: # Fall back to Firefox cls.browser = webdriver.Firefox() except: raise Exception("Could not start a Firefox or PhantomJS instance!") cls.browser.get("http://127.0.0.1:%i/" % cls.port_num) # Setup to support routing cls.app = cls._make_app()
def phantom_driver(): return webdriver.PhantomJS(service_args=["--ignore-ssl-errors=true", "--web-security=false"])
def get_title_with_screenshot(url): driver = webdriver.PhantomJS(service_args = service_args, desired_capabilities = dcap) driver.set_window_size(1024, 512) driver.get('http://' + url + '.onion') # 'http://' is required. driver.save_screenshot(url + '.png') title = driver.title driver.close() return title
def __init__(self): self.driver = webdriver.PhantomJS(service_args=['--load-images=false', '--disk-cache=true'])
def Launch(): """ Launch the Medium bot and ask the user what browser they want to use. """ if 'chrome' not in DRIVER.lower() and 'firefox' not in DRIVER.lower() and 'phantomjs' not in DRIVER.lower(): # Browser choice print 'Choose your browser:' print '[1] Chrome' print '[2] Firefox/Iceweasel' print '[3] PhantomJS' while True: try: browserChoice = int(raw_input('Choice? ')) except ValueError: print 'Invalid choice.', else: if browserChoice not in [1,2,3]: print 'Invalid choice.', else: break StartBrowser(browserChoice) elif 'chrome' in DRIVER.lower(): StartBrowser(1) elif 'firefox' in DRIVER.lower(): StartBrowser(2) elif 'phantomjs' in DRIVER.lower(): StartBrowser(3)