我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用selenium.webdriver.Proxy()。
def get_html_by_webdirver(url, proxies = ''): html = None try: driver = webdriver.PhantomJS() if proxies: proxy=webdriver.Proxy() proxy.proxy_type=ProxyType.MANUAL proxy.http_proxy= proxies #'220.248.229.45:3128' #????????webdriver.DesiredCapabilities.PHANTOMJS? proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS) driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS) driver.get(url) html = driver.page_source # driver.save_screenshot('1.png') #???? driver.close() except Exception as e: log.error(e) return html and len(html) < 1024 * 1024 and html or None
def process_request(self, request, spider): if spider.name == "gsxt": # print("PhantomJS is starting...") # driver = webdriver.PhantomJS(r"/home/lxw/Downloads/phantomjs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs") # OK driver = webdriver.Chrome(r"/home/lxw/Software/chromedirver_selenium/chromedriver") # OK """ # Using IP Proxies: # ????chrome?????chrome???IP????????????????? # ??DesiredCapabilities(????)??????????sessionId????????????????????????????url proxy = webdriver.Proxy() proxy.proxy_type = ProxyType.MANUAL req = requests.get("http://datazhiyuan.com:60001/plain", timeout=10) print("Get an IP proxy:", req.text) if req.text: proxy.http_proxy = req.text # "1.9.171.51:800" # ????????webdriver.DesiredCapabilities.PHANTOMJS? proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS) driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS) """ driver.get(request.url) # ????????????, ??http://roll.news.qq.com/?? time.sleep(2) js = "var q=document.documentElement.scrollTop=10000" driver.execute_script(js) # ???js???????????????????? time.sleep(3) body = driver.page_source print("??" + request.url) return HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request) else: return
def reflashProxy(caps, driver, pIPs): if len(pIPs) < minPIPCount: # ??ip??????? pIPs = getAvailableIPs() # pipObj = random.choice(pIPs) randomPIpIndex = random.randint(0, len(pIPs)) pipObj = pIPs[randomPIpIndex] pIp = pipObj[0] pPort = pipObj[1] ua = random.choice(USER_AGENTS) caps["phantomjs.page.settings.userAgent"] = ua proxy = webdriver.Proxy() proxy.proxy_type = ProxyType.MANUAL proxy.http_proxy = pIp + ':' + str(pPort) # ????????webdriver.DesiredCapabilities.PHANTOMJS? proxy.add_to_capabilities(caps) driver.start_session(caps) return pIPs, pIp, randomPIpIndex
def start_browser(): webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.{}'.format('User-Agent')] = random.choice(user_agent_list) browser = webdriver.PhantomJS() # proxy = webdriver.Proxy() # proxy.proxy_type = ProxyType.MANUAL # proxy.http_proxy = '127.0.0.1:56923' # proxy.add_to_capabilities( webdriver.DesiredCapabilities.PHANTOMJS) browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS) browser.implicitly_wait(120) browser.set_page_load_timeout(120) return browser
def main(): # browser = webdriver.PhantomJS() # Be OK in command line, but not in PyCharm. # browser = webdriver.PhantomJS(r"/home/lxw/Downloads/phantomjs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs") browser = webdriver.Chrome(r"/home/lxw/Software/chromedirver_selenium/chromedriver") # OK browser.get("http://ipecho.net/plain") print('session_id: ', browser.session_id) print('page_source: ', browser.page_source) print('cookie: ', browser.get_cookies()) print("----"*10, "\n") # ??DesiredCapabilities(????)??????????sessionId????????????????????????????url proxy = webdriver.Proxy() proxy.proxy_type = ProxyType.MANUAL # req = requests.get("http://datazhiyuan.com:60001/plain", timeout=10) req = requests.get("http://localhost:60001/plain", timeout=10) print("Get an IP proxy:", req.text) if req.text: proxy.http_proxy = req.text # '1.9.171.51:800' # ????????webdriver.DesiredCapabilities.PHANTOMJS? proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS) browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS) browser.get("http://ipecho.net/plain") print('session_id: ', browser.session_id) print('page_source: ', browser.page_source) print('cookie: ', browser.get_cookies()) print("----"*10, "\n") # ??????? proxy = webdriver.Proxy() proxy.proxy_type = ProxyType.DIRECT proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS) browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS) browser.get("http://ipecho.net/plain") print('session_id: ', browser.session_id) print('page_source: ', browser.page_source) print('cookie: ', browser.get_cookies()) print("----"*10, "\n")
def get_driver_phantomjs(): """ References: PhantomJS: 1. [??PHANTOMJS?USER-AGENT](http://smilejay.com/2013/12/set-user-agent-for-phantomjs/) 2. [Selenium 2 - Setting user agent for IE and Chrome](http://stackoverflow.com/questions/6940477/selenium-2-setting-user-agent-for-ie-and-chrome) """ dcap = dict(DesiredCapabilities.PHANTOMJS) # Setting User-Agent ua = random.choice(RotateUserAgentMiddleware.user_agent_list) if ua: print("Current User-Agent is:", ua) dcap["phantomjs.page.settings.userAgent"] = ua driver = webdriver.PhantomJS(executable_path=r"/home/lxw/Downloads/phantomjs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs", desired_capabilities=dcap) """ # Setting IP Proxies # ??DesiredCapabilities(????)??????????sessionId????????????????????????????url proxy = webdriver.Proxy() proxy.proxy_type = ProxyType.MANUAL ip_proxy = get_proxy() if ip_proxy: proxy.http_proxy = ip_proxy # ????????webdriver.DesiredCapabilities.PHANTOMJS? # proxy.add_to_capabilities(DesiredCapabilities.PHANTOMJS) # driver.start_session(DesiredCapabilities.PHANTOMJS) proxy.add_to_capabilities(dcap) driver.start_session(dcap) """ # ?????? driver.set_page_load_timeout(TIMEOUT) driver.set_script_timeout(TIMEOUT) # ??????????? return driver
def get_driver_chrome(self): # chromedriver options = webdriver.ChromeOptions() proxy = get_proxy() # NOTE: ??"http"?"https"??????????http?????https self.proxies["http"] = proxy self.proxies["https"] = proxy if proxy: options.add_argument('--proxy-server=' + proxy) display = Display(visible=0, size=(800, 800)) display.start() driver = webdriver.Chrome(executable_path=r"/home/lxw/Software/chromedriver_selenium/chromedriver", chrome_options=options) """ # PhantomJS: Not working. why? driver = webdriver.PhantomJS(executable_path=r"/home/lxw/Downloads/phantomjs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs") proxy = webdriver.Proxy() proxy.proxy_type = ProxyType.MANUAL proxy_str = get_proxy() if proxy_str: proxy.http_proxy = proxy_str # ????????webdriver.DesiredCapabilities.PHANTOMJS? proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS) driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS) """ # ?????? driver.set_page_load_timeout(self.TIMEOUT) driver.set_script_timeout(self.TIMEOUT) # ??????????? return driver
def getNewBrowserArgs(): pIPs = getAvailableIPs() pipObj = random.choice(pIPs) # randomPIpIndex = random.randint(0, len(pIPs)) # pipObj = pIPs[randomPIpIndex] pIp = pipObj[0] pPort = pipObj[1] # ua = random.choice(USER_AGENTS) # caps["phantomjs.page.settings.userAgent"] = ua # proxy = webdriver.Proxy() # proxy.proxy_type = ProxyType.MANUAL # proxy.http_proxy = pIp + ':' + str(pPort) # # ????????webdriver.DesiredCapabilities.PHANTOMJS? # proxy.add_to_capabilities(caps) # driver.start_session(caps) # if driver: # try: # # driver.close() # driver.quit() # except Exception as er: # print er caps = webdriver.DesiredCapabilities.PHANTOMJS ua = random.choice(USER_AGENTS) caps["phantomjs.page.settings.userAgent"] = ua service_args = [ '--proxy=' + pIp + ':' + str(pPort), '--proxy-type=http', ] return service_args,caps
def test_browser(browser, stats, binary, urls, per_tab_pause, settle_wait_time, proxy, process_count): test_options = { 'per_tab_pause': per_tab_pause, 'settle_wait_time': settle_wait_time } if browser == 'Chrome': options = webdriver.chrome.options.Options() options.binary_location = binary caps = options.to_capabilities() if proxy: webdriver_proxy = webdriver.Proxy() webdriver_proxy.http_proxy = proxy webdriver_proxy.add_to_capabilities(caps) driver = webdriver.Chrome(desired_capabilities=caps) test = MultiTabTest(driver, stats, **test_options) test.open_urls(urls) driver.quit() elif browser == 'Firefox': for count in process_count: print "FIREFOX WITH %d CONTENT PROCESSES" % count test = FirefoxMultiTabTest(binary, stats, proxy=proxy, process_count=count, **test_options) test.open_urls(urls) elif browser in ('Safari', 'IE'): # Currently this is a manual test, sorry. manual_test = os.path.abspath(os.path.join( os.path.dirname(__file__), 'comp_analysis_manual_test.htm')) test = ManualMultiTabTest(binary, stats, **test_options) prefix = 'file://' if browser == 'IE' else '' test.open_urls([prefix + manual_test]) elif browser == 'Edge': # Currently this is even more manual than IE and Safari. Edge won't # let us provide a path to launch. print "Open up explorer, find 'atsy/example/comp_analysis_manual_test.htm'" print "Right-click, 'Open with' -> 'Microsoft Edge'" print "Run the test, press enter when it's done." import sys sys.stdin.read(1) stats.print_stats() else: raise Exception("Unhandled browser: %s" % browser)