Python selenium.webdriver 模块,Proxy() 实例源码
我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用selenium.webdriver.Proxy()。
def get_html_by_webdirver(url, proxies = ''):
html = None
try:
driver = webdriver.PhantomJS()
if proxies:
proxy=webdriver.Proxy()
proxy.proxy_type=ProxyType.MANUAL
proxy.http_proxy= proxies #'220.248.229.45:3128'
#????????webdriver.DesiredCapabilities.PHANTOMJS?
proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
driver.get(url)
html = driver.page_source
# driver.save_screenshot('1.png') #????
driver.close()
except Exception as e:
log.error(e)
return html and len(html) < 1024 * 1024 and html or None
def process_request(self, request, spider):
if spider.name == "gsxt":
# print("PhantomJS is starting...")
# driver = webdriver.PhantomJS(r"/home/lxw/Downloads/phantomjs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs") # OK
driver = webdriver.Chrome(r"/home/lxw/Software/chromedirver_selenium/chromedriver") # OK
"""
# Using IP Proxies:
# ????chrome?????chrome???IP?????????????????
# ??DesiredCapabilities(????)??????????sessionId????????????????????????????url
proxy = webdriver.Proxy()
proxy.proxy_type = ProxyType.MANUAL
req = requests.get("http://datazhiyuan.com:60001/plain", timeout=10)
print("Get an IP proxy:", req.text)
if req.text:
proxy.http_proxy = req.text # "1.9.171.51:800"
# ????????webdriver.DesiredCapabilities.PHANTOMJS?
proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
"""
driver.get(request.url) # ????????????, ??http://roll.news.qq.com/??
time.sleep(2)
js = "var q=document.documentElement.scrollTop=10000"
driver.execute_script(js) # ???js????????????????????
time.sleep(3)
body = driver.page_source
print("??" + request.url)
return HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request)
else:
return
def reflashProxy(caps, driver, pIPs):
if len(pIPs) < minPIPCount:
# ??ip???????
pIPs = getAvailableIPs()
# pipObj = random.choice(pIPs)
randomPIpIndex = random.randint(0, len(pIPs))
pipObj = pIPs[randomPIpIndex]
pIp = pipObj[0]
pPort = pipObj[1]
ua = random.choice(USER_AGENTS)
caps["phantomjs.page.settings.userAgent"] = ua
proxy = webdriver.Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy.http_proxy = pIp + ':' + str(pPort)
# ????????webdriver.DesiredCapabilities.PHANTOMJS?
proxy.add_to_capabilities(caps)
driver.start_session(caps)
return pIPs, pIp, randomPIpIndex
def start_browser():
webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.{}'.format('User-Agent')] = random.choice(user_agent_list)
browser = webdriver.PhantomJS()
# proxy = webdriver.Proxy()
# proxy.proxy_type = ProxyType.MANUAL
# proxy.http_proxy = '127.0.0.1:56923'
# proxy.add_to_capabilities( webdriver.DesiredCapabilities.PHANTOMJS)
browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
browser.implicitly_wait(120)
browser.set_page_load_timeout(120)
return browser
def main():
# browser = webdriver.PhantomJS() # Be OK in command line, but not in PyCharm.
# browser = webdriver.PhantomJS(r"/home/lxw/Downloads/phantomjs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs")
browser = webdriver.Chrome(r"/home/lxw/Software/chromedirver_selenium/chromedriver") # OK
browser.get("http://ipecho.net/plain")
print('session_id: ', browser.session_id)
print('page_source: ', browser.page_source)
print('cookie: ', browser.get_cookies())
print("----"*10, "\n")
# ??DesiredCapabilities(????)??????????sessionId????????????????????????????url
proxy = webdriver.Proxy()
proxy.proxy_type = ProxyType.MANUAL
# req = requests.get("http://datazhiyuan.com:60001/plain", timeout=10)
req = requests.get("http://localhost:60001/plain", timeout=10)
print("Get an IP proxy:", req.text)
if req.text:
proxy.http_proxy = req.text # '1.9.171.51:800'
# ????????webdriver.DesiredCapabilities.PHANTOMJS?
proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
browser.get("http://ipecho.net/plain")
print('session_id: ', browser.session_id)
print('page_source: ', browser.page_source)
print('cookie: ', browser.get_cookies())
print("----"*10, "\n")
# ???????
proxy = webdriver.Proxy()
proxy.proxy_type = ProxyType.DIRECT
proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
browser.get("http://ipecho.net/plain")
print('session_id: ', browser.session_id)
print('page_source: ', browser.page_source)
print('cookie: ', browser.get_cookies())
print("----"*10, "\n")
def get_driver_phantomjs():
"""
References:
PhantomJS:
1. [??PHANTOMJS?USER-AGENT](http://smilejay.com/2013/12/set-user-agent-for-phantomjs/)
2. [Selenium 2 - Setting user agent for IE and Chrome](http://stackoverflow.com/questions/6940477/selenium-2-setting-user-agent-for-ie-and-chrome)
"""
dcap = dict(DesiredCapabilities.PHANTOMJS)
# Setting User-Agent
ua = random.choice(RotateUserAgentMiddleware.user_agent_list)
if ua:
print("Current User-Agent is:", ua)
dcap["phantomjs.page.settings.userAgent"] = ua
driver = webdriver.PhantomJS(executable_path=r"/home/lxw/Downloads/phantomjs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs", desired_capabilities=dcap)
"""
# Setting IP Proxies
# ??DesiredCapabilities(????)??????????sessionId????????????????????????????url
proxy = webdriver.Proxy()
proxy.proxy_type = ProxyType.MANUAL
ip_proxy = get_proxy()
if ip_proxy:
proxy.http_proxy = ip_proxy
# ????????webdriver.DesiredCapabilities.PHANTOMJS?
# proxy.add_to_capabilities(DesiredCapabilities.PHANTOMJS)
# driver.start_session(DesiredCapabilities.PHANTOMJS)
proxy.add_to_capabilities(dcap)
driver.start_session(dcap)
"""
# ??????
driver.set_page_load_timeout(TIMEOUT)
driver.set_script_timeout(TIMEOUT) # ???????????
return driver
def get_driver_chrome(self):
# chromedriver
options = webdriver.ChromeOptions()
proxy = get_proxy()
# NOTE: ??"http"?"https"??????????http?????https
self.proxies["http"] = proxy
self.proxies["https"] = proxy
if proxy:
options.add_argument('--proxy-server=' + proxy)
display = Display(visible=0, size=(800, 800))
display.start()
driver = webdriver.Chrome(executable_path=r"/home/lxw/Software/chromedriver_selenium/chromedriver", chrome_options=options)
"""
# PhantomJS: Not working. why?
driver = webdriver.PhantomJS(executable_path=r"/home/lxw/Downloads/phantomjs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs")
proxy = webdriver.Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy_str = get_proxy()
if proxy_str:
proxy.http_proxy = proxy_str
# ????????webdriver.DesiredCapabilities.PHANTOMJS?
proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
"""
# ??????
driver.set_page_load_timeout(self.TIMEOUT)
driver.set_script_timeout(self.TIMEOUT) # ???????????
return driver
def getNewBrowserArgs():
pIPs = getAvailableIPs()
pipObj = random.choice(pIPs)
# randomPIpIndex = random.randint(0, len(pIPs))
# pipObj = pIPs[randomPIpIndex]
pIp = pipObj[0]
pPort = pipObj[1]
# ua = random.choice(USER_AGENTS)
# caps["phantomjs.page.settings.userAgent"] = ua
# proxy = webdriver.Proxy()
# proxy.proxy_type = ProxyType.MANUAL
# proxy.http_proxy = pIp + ':' + str(pPort)
# # ????????webdriver.DesiredCapabilities.PHANTOMJS?
# proxy.add_to_capabilities(caps)
# driver.start_session(caps)
# if driver:
# try:
# # driver.close()
# driver.quit()
# except Exception as er:
# print er
caps = webdriver.DesiredCapabilities.PHANTOMJS
ua = random.choice(USER_AGENTS)
caps["phantomjs.page.settings.userAgent"] = ua
service_args = [
'--proxy=' + pIp + ':' + str(pPort),
'--proxy-type=http',
]
return service_args,caps
def test_browser(browser, stats, binary, urls,
per_tab_pause, settle_wait_time,
proxy, process_count):
test_options = {
'per_tab_pause': per_tab_pause,
'settle_wait_time': settle_wait_time
}
if browser == 'Chrome':
options = webdriver.chrome.options.Options()
options.binary_location = binary
caps = options.to_capabilities()
if proxy:
webdriver_proxy = webdriver.Proxy()
webdriver_proxy.http_proxy = proxy
webdriver_proxy.add_to_capabilities(caps)
driver = webdriver.Chrome(desired_capabilities=caps)
test = MultiTabTest(driver, stats, **test_options)
test.open_urls(urls)
driver.quit()
elif browser == 'Firefox':
for count in process_count:
print "FIREFOX WITH %d CONTENT PROCESSES" % count
test = FirefoxMultiTabTest(binary, stats, proxy=proxy, process_count=count, **test_options)
test.open_urls(urls)
elif browser in ('Safari', 'IE'):
# Currently this is a manual test, sorry.
manual_test = os.path.abspath(os.path.join(
os.path.dirname(__file__), 'comp_analysis_manual_test.htm'))
test = ManualMultiTabTest(binary, stats, **test_options)
prefix = 'file://' if browser == 'IE' else ''
test.open_urls([prefix + manual_test])
elif browser == 'Edge':
# Currently this is even more manual than IE and Safari. Edge won't
# let us provide a path to launch.
print "Open up explorer, find 'atsy/example/comp_analysis_manual_test.htm'"
print "Right-click, 'Open with' -> 'Microsoft Edge'"
print "Run the test, press enter when it's done."
import sys
sys.stdin.read(1)
stats.print_stats()
else:
raise Exception("Unhandled browser: %s" % browser)