Python selenium.webdriver.common.desired_capabilities.DesiredCapabilities 模块,PHANTOMJS 实例源码

我们从Python开源项目中,提取了以下44个代码示例,用于说明如何使用selenium.webdriver.common.desired_capabilities.DesiredCapabilities.PHANTOMJS

项目:danmu-bilibili    作者:saberxxy    | 项目源码 | 文件源码
def main(number):
    url = 'http://www.bilibili.com/video/av' + str(number) + '/'
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = (
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
    )
    dcap["phantomjs.page.settings.loadImages"] = False
    # phantomjs.exe???G:\Anaconda3\phantomjs\bin
    driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                 desired_capabilities=dcap)
    try:
        driver.get(url)
        # time.sleep(random.uniform(1, 5))
        content = driver.page_source  # ??????
        driver.close()
        driver.quit()
        soup = BeautifulSoup(content, 'lxml')
        getInfo(soup)
    except Exception:
        pass
    finally:
        if driver:
            driver.quit()
项目:danmu-bilibili    作者:saberxxy    | 项目源码 | 文件源码
def getSoup(start, stop):
    try:
        for number in range(start, stop + 1):
            url = 'http://www.bilibili.com/video/av'+str(number)+'/'
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
                "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            )
            dcap["phantomjs.page.settings.loadImages"] = False
            # phantomjs.exe???G:\Anaconda3\phantomjs\bin
            driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(url)
            # time.sleep(1)  # ?????????????
            content = driver.page_source  # ??????
            driver.close()
            driver.quit()
            soup = BeautifulSoup(content, 'lxml')
            getInfo(soup)
    except Exception:
        pass
项目:spoon    作者:Jiramew    | 项目源码 | 文件源码
def get_webdriver(self):
        service_args = []

        if self.webdriver_config.proxy:
            service_args.extend([
                "--proxy=" + self.webdriver_config.proxy,
                "--proxy-type=http",
                "--ignore-ssl-errors=true"
            ])

        dcapability = dict(DesiredCapabilities.PHANTOMJS)
        if self.webdriver_config.header:
            dcapability["phantomjs.page.settings.userAgent"] = self.webdriver_config.header['User-Agent']
            dcapability["phantomjs.page.customHeaders.User-Agent"] = self.webdriver_config.header['User-Agent']
        dcapability["takesScreenshot"] = True
        driver = webdriver.PhantomJS(self.webdriver_config.phantomjs_path,
                                     service_args=service_args,
                                     desired_capabilities=dcapability)

        driver.set_page_load_timeout(self.webdriver_config.timeout)
        return driver
项目:tianyancha    作者:Range0122    | 项目源码 | 文件源码
def process_request(self, request, spider):
        if request.url[26] == 'c':
            ua = random.choice(self.user_agent_list)
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = ua
            dcap["phantomjs.page.settings.loadImages"] = False
            driver = webdriver.PhantomJS(executable_path='E:\Webdriver\phantomjs-2.1.1-windows\\bin\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(request.url)
            sleep_time = random.randint(15, 22)
            time.sleep(sleep_time)
            try:
                detail = driver.find_element_by_xpath('//a[@ng-click="showDetail = btnOnClick(showDetail)"]')
                detail.click()
            except:
                pass
            body = driver.page_source
            url = driver.current_url
            driver.quit()
            return HtmlResponse(url=url, body=body, request=request, encoding='utf-8')
项目:mindl-legacy    作者:MinoMino    | 项目源码 | 文件源码
def __init__(self, url):
        self.url = url
        self.book_name = "N/A"
        self.book_volume = None

        # Set the user agent to something generic.
        dc = dict(DC.PHANTOMJS)
        dc["phantomjs.page.settings.userAgent"] = USER_AGENT

        self.d = webdriver.PhantomJS(desired_capabilities=dc,
            service_args=["--ignore-ssl-errors=true", "--ssl-protocol=any", "--web-security=false", "--ssl-protocol=TLSv1"])
        # Set cookies that makes it think we previously agreed to the ToS.
        self.d.add_cookie({"name": "tachiyomi_auto_reader", "value": "Browser", "domain": ".ebookjapan.jp", "path": "/"})
        self.d.add_cookie({"name": "tachiyomi_user_policy", "value": "on", "domain": ".ebookjapan.jp", "path": "/"})
        self.d.set_window_size(1120, 550)
        # Generic waiter.
        self.wait = WebDriverWait(self.d, 60)
项目:ComicSpider    作者:QuantumLiu    | 项目源码 | 文件源码
def get_pages(self):
        '''
        ??Phantomjs??????????????????url
        Get all pages' urls using selenium an phantomJS
        return:
            a list of tuple (page_num,page_url)
        '''
        r_slt=r'onchange="select_page\(\)">([\s\S]*?)</select>'
        r_p=r'<option value="(.*?)".*?>?(\d*?)?<'
        try:
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            # ???????????????
            dcap["phantomjs.page.settings.loadImages"] = False
            driver = webdriver.PhantomJS(desired_capabilities=dcap)
            driver.get(self.chapter_url)
            text=driver.page_source
            st=re.findall(r_slt,text)[0]
            self.pages = [(int(p[-1]),p[0]) for p in re.findall(r_p,st)]
        except Exception:
            traceback.print_exc()
            self.pages = []
        except KeyboardInterrupt:
            raise KeyboardInterrupt
        finally:
            driver.quit()
            print('Got {l} pages in chapter {ch}'.format(l=len(self.pages),ch=self.chapter_title))
            return self.pages
项目:SerpScrap    作者:ecoron    | 项目源码 | 文件源码
def _get_PhantomJS(self):
        try:
            service_args = []

            if self.proxy:
                service_args.extend([
                    '--proxy={}:{}'.format(self.proxy.host, self.proxy.port),
                    '--proxy-type={}'.format(self.proxy.proto),
                ])

                if self.proxy.username and self.proxy.password:
                    service_args.append(
                        '--proxy-auth={}:{}'.format(
                            self.proxy.username,
                            self.proxy.password
                        )
                    )

            useragent = random_user_agent(
                mobile=False
            )
            logger.info('useragent: {}'.format(useragent))
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = useragent
            try:
                self.webdriver = webdriver.PhantomJS(
                    executable_path=self.config['executable_path'],
                    service_args=service_args,
                    desired_capabilities=dcap
                )
                return True
            except (ConnectionError, ConnectionRefusedError, ConnectionResetError) as err:
                logger.error(err)
                return False
        except WebDriverException as e:
            logger.error(e)
        return False
项目:danmu-bilibili    作者:saberxxy    | 项目源码 | 文件源码
def getSoup(start, stop):

    try:
        for number in range(start, stop+1):

            url = 'http://space.bilibili.com/'+str(number)+'/#!/'
            # url = 'http://space.bilibili.com/122879/#!/'

            # "http://http://space.bilibili.com/122879/#!/"
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
                "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            )
            dcap["phantomjs.page.settings.loadImages"] = False  #??????????
            # executable_path='D:\\Chrome\\phantomjs-2.1.1-windows\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe',
            driver = webdriver.PhantomJS(desired_capabilities=dcap)
            driver.get(url)
            content = driver.page_source  # ??????
            # print(content)
            driver.close()
            soup = BeautifulSoup(content, 'lxml')
            username= getInfo(soup) # ?????
            uid = number # number??uid

            get_fans_uid = GetFansUid.GetFansUid(number)
            fansuid, fansnumber = get_fans_uid.get_uids()  # ????id?????
            print(uid, username, fansnumber)

            saveData(uid, username, fansnumber, fansuid)# ?????
    except Exception:
        print("get page error")
        return getSoup(number + 1, stop+1)


# ????
项目:danmu-bilibili    作者:saberxxy    | 项目源码 | 文件源码
def main(number):
    url = 'http://space.bilibili.com/' + str(number) + '/#!/'
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = (
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
    )
    dcap["phantomjs.page.settings.loadImages"] = False  # ??????????
    driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                 desired_capabilities=dcap)
    try:
        driver.get(url)
        content = driver.page_source  # ??????
        driver.close()
        driver.quit()  # ??????????????
        soup = BeautifulSoup(content, 'lxml')
        username = getInfo(soup)  # ?????
        uid = number  # number??uid
        get_fans_uid = GetFansUid(number)
        fansuid, fansnumber = get_fans_uid.get_uids()  # ????id?????

        saveData(uid, username, fansnumber, fansuid)  # ?????
    except Exception:
        pass
    finally:
        if driver:
            driver.quit()
项目:danmu-bilibili    作者:saberxxy    | 项目源码 | 文件源码
def getSoup(start, stop):
    try:
        for number in range(start, stop+1):
            url = 'http://space.bilibili.com/'+str(number)+'/#!/'
            # "http://space.bilibili.com/1643718/#!/"
            # "http://space.bilibili.com/902915/#!/"
            # "http://space.bilibili.com/1/#!/"
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
                "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            )
            dcap["phantomjs.page.settings.loadImages"] = False  #??????????
            # phantomjs.exe???G:\Anaconda3\phantomjs\bin
            driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(url)
            # time.sleep(1)  # ?????????????
            content = driver.page_source  # ??????
            # print(content)
            driver.close()
            driver.quit()
            soup = BeautifulSoup(content, 'lxml')
            getInfo(soup)
    except Exception:
        pass


# ????
项目:danmu-bilibili    作者:saberxxy    | 项目源码 | 文件源码
def getSoup(start, stop):

    try:
        for number in range(start, stop+1):

            url = 'http://space.bilibili.com/'+str(number)+'/#!/'
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
                "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            )
            dcap["phantomjs.page.settings.loadImages"] = False  #??????????
            driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(url)
            content = driver.page_source  # ??????
            driver.close()
            driver.quit()  #??????????????
            soup = BeautifulSoup(content, 'lxml')
            username= getInfo(soup)  # ?????
            uid = number  # number??uid
            get_fans_uid = GetFansUid(number)
            fansuid, fansnumber = get_fans_uid.get_uids()  # ????id?????

            saveData(uid, username, fansnumber, fansuid)  # ?????
    except Exception:
        print("get page error")
        return getSoup(number+1, stop+1)


# ????
项目:danmu-bilibili    作者:saberxxy    | 项目源码 | 文件源码
def getSoup(start, stop):

    try:
        for number in range(start, stop+1):

            url = 'http://space.bilibili.com/'+str(number)+'/#!/'
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = (
                "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"
            )
            dcap["phantomjs.page.settings.loadImages"] = False  #??????????
            driver = webdriver.PhantomJS(executable_path='G:\\Anaconda3\\phantomjs\\bin\\phantomjs.exe',
                                         desired_capabilities=dcap)
            driver.get(url)
            content = driver.page_source  # ??????
            driver.close()
            driver.quit()  #??????????????
            soup = BeautifulSoup(content, 'lxml')
            username= getInfo(soup)  # ?????
            uid = number  # number??uid
            get_gz_uid = GetFollowUid(number)
            gzsuid, gznumber = get_gz_uid.get_uids()  # ????id?????

            saveData(uid, username, gznumber, gzsuid)  # ?????
    except Exception:
        print("get page error")
        return getSoup(number+1, stop+1)


# ????
项目:devsecops-example-helloworld    作者:boozallen    | 项目源码 | 文件源码
def __init__(self, executable_path="phantomjs",
                 port=0, desired_capabilities=DesiredCapabilities.PHANTOMJS,
                 service_args=None, service_log_path=None):
        """
        Creates a new instance of the PhantomJS / Ghostdriver.

        Starts the service and then creates new instance of the driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - desired_capabilities: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_args : A List of command line arguments to pass to PhantomJS
         - service_log_path: Path for phantomjs service to log to.
        """
        self.service = Service(executable_path, port=port,
            service_args=service_args, log_path=service_log_path)
        self.service.start()

        try:
            RemoteWebDriver.__init__(self,
                command_executor=self.service.service_url,
                desired_capabilities=desired_capabilities)
        except:
            self.quit()
            raise

        self._is_remote = False
项目:wagtailannotatedimage    作者:takeflight    | 项目源码 | 文件源码
def setUpClass(cls):
        super(TestEditHandler, cls).setUpClass()
        caps = DesiredCapabilities.PHANTOMJS
        # caps['loggingPrefs'] = { 'browser':'ALL' }
        cls.driver = webdriver.PhantomJS(desired_capabilities=caps)

        cls.driver.set_window_size(1920, 1080)
        cls.driver.implicitly_wait(10)
项目:open-source-feeds    作者:mhfowler    | 项目源码 | 文件源码
def initialize_driver(self, driver=None):
        if self.command_executor:
            chrome_options = Options()
            chrome_options.add_argument("--disable-notifications")
            if self.proxy:
                chrome_options.add_argument('--proxy-server=%s' % self.proxy)
            self.driver = webdriver.Remote(
                command_executor=self.command_executor,
                desired_capabilities=chrome_options.to_capabilities()
            )
        else:
            if self.which_driver == 'phantomjs':
                dcap = dict(DesiredCapabilities.PHANTOMJS)
                dcap["phantomjs.page.settings.userAgent"] = (
                    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 "
                    "(KHTML, like Gecko) Chrome/15.0.87"
                )
                driver = webdriver.PhantomJS(desired_capabilities=dcap)
                driver.set_window_size(1400, 1000)
                self.driver = driver
            elif self.which_driver == 'chrome':
                chrome_options = Options()
                chrome_options.add_argument("--disable-notifications")
                if self.proxy:
                    chrome_options.add_argument('--proxy-server=%s' % self.proxy)
                self.driver = webdriver.Chrome(chrome_options=chrome_options)
            # otherwise use the driver passed in
            else:
                self.driver = driver
        # set page load timeout
        self.driver.set_page_load_timeout(time_to_wait=240)
项目:flasky    作者:RoseOu    | 项目源码 | 文件源码
def __init__(self, executable_path="phantomjs",
                 port=0, desired_capabilities=DesiredCapabilities.PHANTOMJS,
                 service_args=None, service_log_path=None):
        """
        Creates a new instance of the PhantomJS / Ghostdriver.

        Starts the service and then creates new instance of the driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - desired_capabilities: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_args : A List of command line arguments to pass to PhantomJS
         - service_log_path: Path for phantomjs service to log to.
        """
        self.service = Service(executable_path, port=port,
            service_args=service_args, log_path=service_log_path)
        self.service.start()

        try:
            RemoteWebDriver.__init__(self,
                command_executor=self.service.service_url,
                desired_capabilities=desired_capabilities)
        except:
            self.quit()
            raise

        self._is_remote = False
项目:isp-data-pollution    作者:essandess    | 项目源码 | 文件源码
def open_driver(self):
        self.quit_driver()
        if not hasattr(self, 'driver') or not isinstance(self.driver,webdriver.phantomjs.webdriver.WebDriver):
            # phantomjs driver
            # http://engineering.shapesecurity.com/2015/01/detecting-phantomjs-based-visitors.html
            # https://coderwall.com/p/9jgaeq/set-phantomjs-user-agent-string
            # http://phantomjs.org/api/webpage/property/settings.html
            # http://stackoverflow.com/questions/23390974/phantomjs-keeping-cache
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            # dcap['browserName'] = 'Chrome'
            dcap['phantomjs.page.settings.userAgent'] = ( self.user_agent )
            dcap['phantomjs.page.settings.loadImages'] = ( 'false' )
            dcap['phantomjs.page.settings.clearMemoryCaches'] = ( 'true' )
            dcap['phantomjs.page.settings.resourceTimeout'] = ( max(2000,int(self.timeout * 1000)) )
            dcap['acceptSslCerts'] = ( True )
            dcap['applicationCacheEnabled'] = ( True )
            dcap['handlesAlerts'] = ( False )
            dcap['phantomjs.page.customHeaders'] = ( { 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate, sdch' } )
            phantomjs_service_args = ['--disk-cache=false','--ignore-ssl-errors=false','--ssl-protocol=TLSv1.2']
            if self.proxy is not None:
                phantomjs_service_args = ['--proxy={}'.format(self.proxy)] + phantomjs_service_args
            if self.phantomjs_binary_path is None:
                driver = webdriver.PhantomJS(desired_capabilities=dcap,service_args=phantomjs_service_args)
            else:
                driver = webdriver.PhantomJS(self.phantomjs_binary_path,desired_capabilities=dcap,service_args=phantomjs_service_args)
            driver.set_window_size(1296,1018)   # Tor browser size on Linux
            driver.implicitly_wait(self.timeout)
            driver.set_page_load_timeout(self.timeout)
            driver.set_script_timeout(self.timeout)
            self.driver = driver
项目:leetcode    作者:thomasyimgit    | 项目源码 | 文件源码
def __init__(self, executable_path="phantomjs",
                 port=0, desired_capabilities=DesiredCapabilities.PHANTOMJS,
                 service_args=None, service_log_path=None):
        """
        Creates a new instance of the PhantomJS / Ghostdriver.

        Starts the service and then creates new instance of the driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - desired_capabilities: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_args : A List of command line arguments to pass to PhantomJS
         - service_log_path: Path for phantomjs service to log to.
        """
        self.service = Service(
            executable_path,
            port=port,
            service_args=service_args,
            log_path=service_log_path)
        self.service.start()

        try:
            RemoteWebDriver.__init__(
                self,
                command_executor=self.service.service_url,
                desired_capabilities=desired_capabilities)
        except Exception:
            self.quit()
            raise

        self._is_remote = False
项目:cabu    作者:thylong    | 项目源码 | 文件源码
def load_phantomjs(config):
    """Start PhantomJS webdriver with the given configuration.

    Args:
        config (dict): The configuration loaded previously in Cabu.

    Returns:
        webdriver (selenium.webdriver): An instance of phantomJS webdriver.

    """
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    service_args = [
        '--ignore-ssl-errors=true',
        '--ssl-protocol=any',
        '--web-security=false'
    ]

    if os.environ.get('HTTPS_PROXY') or os.environ.get('HTTP_PROXY'):
        proxy_address = os.environ.get('HTTPS_PROXY', os.environ.get('HTTP_PROXY'))
        proxy_ip = re.search('http\:\/\/(.*)$', proxy_address).group(1)
        service_args.append('--proxy=%s' % proxy_ip)
        service_args.append('--proxy-type=http')

    if 'HEADERS' in config and config['HEADERS']:
        dcap = Headers(config).set_headers(dcap)

    return webdriver.PhantomJS(
        desired_capabilities=dcap,
        service_args=service_args,
        service_log_path=os.path.devnull
    )
项目:cabu    作者:thylong    | 项目源码 | 文件源码
def test_phantomjs_headers_loading(self):
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        headers = Headers(self.config).set_headers(dcap)
        self.assertEquals(
            headers['phantomjs.page.customHeaders.User-Agent'],
            'Mozilla/6.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36'
            ' (KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36'
        )
项目:cabu    作者:thylong    | 项目源码 | 文件源码
def test_chrome_headers_loading(self):
        self.app.config['DRIVER_NAME'] = 'Chrome'
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        with self.assertRaises(Exception):
            Headers(self.config).set_headers(dcap)
项目:selenium-image-crawler    作者:scirag    | 项目源码 | 文件源码
def create_selenium_driver(self):
        # driver = webdriver.Chrome()
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (
            "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
        )
        driver = webdriver.PhantomJS(desired_capabilities=dcap)
        driver.set_window_size(1024, 768)
        return driver
项目:weiboCrawler    作者:hjydzh    | 项目源码 | 文件源码
def get_browser():
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.Accept-Language'] = 'zh-CN,zh;q=0.8'
    DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.Connection'] = 'keep-alive'
    DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
    DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.Accept-Encoding'] = 'gzip, deflate, sdch'
    DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.Cache-Control'] = 'max-age=0'
    phantomjs_path = "G:\\programeSoftwares\\python2.7\\Scripts\\phantomjs.exe"
    dcap["phantomjs.page.settings.userAgent"] = ("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36")
    #browser = webdriver.PhantomJS(desired_capabilities=dcap)
    browser = webdriver.PhantomJS(desired_capabilities=dcap,executable_path=phantomjs_path)
        #self.browser = webdriver.PhantomJS(desired_capabilities=dcap
    browser.set_window_size(1920, 1080)
    return browser
项目:ShuoshuoMonitor    作者:aploium    | 项目源码 | 文件源码
def __init__(self, executable_path="phantomjs",
                 port=0, desired_capabilities=DesiredCapabilities.PHANTOMJS,
                 service_args=None, service_log_path=None):
        """
        Creates a new instance of the PhantomJS / Ghostdriver.

        Starts the service and then creates new instance of the driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - desired_capabilities: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_args : A List of command line arguments to pass to PhantomJS
         - service_log_path: Path for phantomjs service to log to.
        """
        self.service = Service(executable_path, port=port,
            service_args=service_args, log_path=service_log_path)
        self.service.start()

        try:
            RemoteWebDriver.__init__(self,
                command_executor=self.service.service_url,
                desired_capabilities=desired_capabilities)
        except:
            self.quit()
            raise

        self._is_remote = False
项目:jtyd_python_spider    作者:xtuyaowu    | 项目源码 | 文件源码
def visit(self, url, xpath=None, timeout=60, retry=1, load_images=False, **kwargs):
        if self.browser:
            self.browser.quit()
        desired_capabilities = dict()
        desired_capabilities['phantomjs.page.settings.userAgent'] = self.ua if self.ua else 'Mozilla/5.0 (Windows NT 6.1; rv:42.0) Gecko/20100101 Firefox/42.0'
        service_args = list()
        if not load_images:
            service_args += ['--load-images=false']
        if self.proxy:
            service_args += ['--proxy=%s' % self.proxy]
        DesiredCapabilities.PHANTOMJS.update(desired_capabilities)
        try:
            browser = webdriver.PhantomJS(service_args=service_args if service_args else None,
                                          desired_capabilities=DesiredCapabilities.PHANTOMJS)
        except Exception as e:
            print str(e)
            return None
        count = 0
        while (retry + 1) > count:
            count += 1
            try:
                browser.get(url)
                break
            except Exception as e:
                print str(e)
        if xpath:
            browser.implicitly_wait(timeout)
            try:
                browser.find_element_by_xpath(xpath)
            except Exception as e:
                print str(e)
        self.browser = browser
        result = browser.page_source
        return result if result != '<html><head></head><body></body></html>' else None
项目:jtyd_python_spider    作者:xtuyaowu    | 项目源码 | 文件源码
def get(self, url, xpath, timeout, retry, service_args, desired_capabilities):
        browser = None
        try:
            result = dict()
            if desired_capabilities:
                DesiredCapabilities.PHANTOMJS.update(json.loads(desired_capabilities))
            browser = webdriver.PhantomJS(service_args=json.loads(service_args) if service_args else None,
                                          desired_capabilities=DesiredCapabilities.PHANTOMJS)
            count = 0
            while (retry + 1) > count:
                count += 1
                try:
                    browser.get(url)
                    break
                except Exception as e:
                    print str(e)
            if xpath:
                browser.implicitly_wait(timeout)
                try:
                    browser.find_element_by_xpath(xpath)
                except Exception as e:
                    print str(e)
            text = browser.page_source
            if text == '<html><head></head><body></body></html>':
                browser.quit()
                return ''
            result['cookies'] = browser.get_cookies()
            result['text'] = text.encode('utf-8')
            browser.quit()
            return json.dumps(result)
        except Exception as e:
            if browser:
                browser.quit()
            print str(e)
            return ''
项目:wechat_spider    作者:CoolWell    | 项目源码 | 文件源码
def download_articles_ph(self, url):
        '''
        ??phantomjs????
        :param url: ????
        :return:
        '''
        if url is None:
            return None
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (
            UA
        )
        dcap["takesScreenshot"] = (False)
        try:
            driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--load-images=no'])
        except Exception as e:
            print(datetime.datetime.now())
            print(url)
            print(e)
        else:
            try:
                driver.set_page_load_timeout(30)
                driver.get(url)
                time.sleep(1)
                # driver.implicitly_wait(2)
                html = driver.page_source
                return html
            except:
                print(datetime.datetime.now())
                print(url)
            finally:
                driver.quit()
项目:wechat_spider    作者:CoolWell    | 项目源码 | 文件源码
def maintain_cookies_ph(self):
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = UA
        cookie = []
        # ??5?cookies
        for i in range(5):
            driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--load-images=no', ])
            driver.get("http://weixin.sogou.com/")
            # ??cookie??
            cookie.append(driver.get_cookies())
            # print(driver.get_cookies())
            driver.quit()
        return cookie
项目:Snapper    作者:dxa4481    | 项目源码 | 文件源码
def host_worker(hostQueue, fileQueue, timeout, user_agent, verbose):
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = user_agent
    dcap["accept_untrusted_certs"] = True
    driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true'], desired_capabilities=dcap) # or add to your PATH
    driver.set_window_size(1024, 768) # optional
    driver.set_page_load_timeout(timeout)
    while(not hostQueue.empty()):
        host = hostQueue.get()
        if not host.startswith("http://") and not host.startswith("https://"):
            host1 = "http://" + host
            host2 = "https://" + host
            filename1 = os.path.join("output", "images", str(uuid4()) + ".png")
            filename2 = os.path.join("output", "images", str(uuid4()) + ".png")
            if verbose:
                print("Fetching %s" % host1)
            if host_reachable(host1, timeout) and save_image(host1, filename1, driver):
                fileQueue.put({host1: filename1})
            else:
                if verbose:
                    print("%s is unreachable or timed out" % host1)
            if verbose:
                print("Fetching %s" % host2)
            if host_reachable(host2, timeout) and save_image(host2, filename2, driver):
                fileQueue.put({host2: filename2})
            else:
                if verbose:
                    print("%s is unreachable or timed out" % host2)
        else:
            filename = os.path.join("output", "images", str(uuid4()) + ".png")
            if verbose:
                print("Fetching %s" % host)
            if host_reachable(host, timeout) and save_image(host, filename, driver):
                fileQueue.put({host: filename})
            else:
                if verbose:
                    print("%s is unreachable or timed out" % host)
项目:trackship    作者:nabeelio    | 项目源码 | 文件源码
def __init__(self, user_agent=None, cookies_file=None):
        """
        Initialize the phantom JS selenium driver
        :return:
        """
        self.conf = config
        self.user_agent = user_agent
        self.cookies_file = cookies_file

        # http://phantomjs.org/api/webpage/property/settings.html
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap['phantomjs.page.settings.loadImages'] = False
        dcap['phantomjs.page.settings.webSecurityEnabled'] = False
        dcap['phantomjs.page.settings.localToRemoteUrlAccessEnabled'] = True

        if user_agent:
            dcap['phantomjs.page.settings.userAgent'] = user_agent

        self.driver = webdriver.PhantomJS(
            desired_capabilities=dcap,
            executable_path=self.conf['general']['phantomjs'],
        )

        self.load_cookies()

        self.driver.implicitly_wait(30)
        self.driver.set_window_size(1024, 768)
项目:DIS_MeituanReptile    作者:myvary    | 项目源码 | 文件源码
def handle_phantomjs (self):
        '''
        ????phantomjs???
        :return: driver
        '''
        conf = {}
        for line in fileinput.input("..//..//abuyun.conf"):
            lines = line.replace(' ', '').replace('\n', '').split("=")
            conf[lines[0]] = lines[1]
        print '??'
        # ?????
        proxyHost = conf["proxyHost"]
        proxyPort = conf["proxyPort"]
        # ???????????
        proxyUser = conf["proxyUser"]
        proxyPass = conf["proxyPass"]
        service_args = [
            "--proxy-type=http",
            "--proxy=%(host)s:%(port)s" % {
                "host": proxyHost,
                "port": proxyPort,
            },
            "--proxy-auth=%(user)s:%(pass)s" % {
                "user": proxyUser,
                "pass": proxyPass,
            },
        ]
        phantomjs_path = r"phantomjs"
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        # ?????UA??????????
        ua = self.rad_ua()  ##?????UA
        dcap["phantomjs.page.settings.userAgent"] = ua
        driver = webdriver.PhantomJS(desired_capabilities=dcap, executable_path=phantomjs_path, service_args=service_args)
        return driver
项目:amazon_order_history_scraper    作者:drewctate    | 项目源码 | 文件源码
def __init__(self, executable_path="phantomjs",
                 port=0, desired_capabilities=DesiredCapabilities.PHANTOMJS,
                 service_args=None, service_log_path=None):
        """
        Creates a new instance of the PhantomJS / Ghostdriver.

        Starts the service and then creates new instance of the driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - desired_capabilities: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_args : A List of command line arguments to pass to PhantomJS
         - service_log_path: Path for phantomjs service to log to.
        """
        self.service = Service(
            executable_path,
            port=port,
            service_args=service_args,
            log_path=service_log_path)
        self.service.start()

        try:
            RemoteWebDriver.__init__(
                self,
                command_executor=self.service.service_url,
                desired_capabilities=desired_capabilities)
        except:
            self.quit()
            raise

        self._is_remote = False
项目:webapp    作者:superchilli    | 项目源码 | 文件源码
def __init__(self, executable_path="phantomjs",
                 port=0, desired_capabilities=DesiredCapabilities.PHANTOMJS,
                 service_args=None, service_log_path=None):
        """
        Creates a new instance of the PhantomJS / Ghostdriver.

        Starts the service and then creates new instance of the driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - desired_capabilities: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_args : A List of command line arguments to pass to PhantomJS
         - service_log_path: Path for phantomjs service to log to.
        """
        self.service = Service(executable_path, port=port,
            service_args=service_args, log_path=service_log_path)
        self.service.start()

        try:
            RemoteWebDriver.__init__(self,
                command_executor=self.service.service_url,
                desired_capabilities=desired_capabilities)
        except:
            self.quit()
            raise

        self._is_remote = False
项目:fintech_spider    作者:hee0624    | 项目源码 | 文件源码
def get_driver_phantomjs():
    """
    References:
    PhantomJS:
    1. [??PHANTOMJS?USER-AGENT](http://smilejay.com/2013/12/set-user-agent-for-phantomjs/)
    2. [Selenium 2 - Setting user agent for IE and Chrome](http://stackoverflow.com/questions/6940477/selenium-2-setting-user-agent-for-ie-and-chrome)
    """
    dcap = dict(DesiredCapabilities.PHANTOMJS)

    # Setting User-Agent
    ua = random.choice(RotateUserAgentMiddleware.user_agent_list)
    if ua:
        print("Current User-Agent is:", ua)
        dcap["phantomjs.page.settings.userAgent"] = ua

    driver = webdriver.PhantomJS(executable_path=r"/home/lxw/Downloads/phantomjs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs", desired_capabilities=dcap)

    """
    # Setting IP Proxies
    # ??DesiredCapabilities(????)??????????sessionId????????????????????????????url
    proxy = webdriver.Proxy()
    proxy.proxy_type = ProxyType.MANUAL
    ip_proxy = get_proxy()
    if ip_proxy:
        proxy.http_proxy = ip_proxy

    # ????????webdriver.DesiredCapabilities.PHANTOMJS?
    # proxy.add_to_capabilities(DesiredCapabilities.PHANTOMJS)
    # driver.start_session(DesiredCapabilities.PHANTOMJS)
    proxy.add_to_capabilities(dcap)
    driver.start_session(dcap)
    """

    # ??????
    driver.set_page_load_timeout(TIMEOUT)
    driver.set_script_timeout(TIMEOUT)  # ???????????

    return driver
项目:SneakerBotTutorials    作者:theriley106    | 项目源码 | 文件源码
def createHeadlessBrowser(proxy=None, XResolution=1024, YResolution=768):
    #proxy = None
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = (
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36')
    if proxy != None:
        service_args = ['--proxy={}'.format(proxy),'--proxy-type=https','--ignore-ssl-errors=true', '--ssl-protocol=any', '--web-security=false',]
        driver = webdriver.PhantomJS(service_args=service_args, desired_capabilities=dcap)
    else:
        driver = webdriver.PhantomJS(desired_capabilities=dcap)
    driver.set_window_size(XResolution,YResolution)
    driver.set_page_load_timeout(20)
    return driver
项目:sparphantor    作者:antitree    | 项目源码 | 文件源码
def _init_browser(self):
        ''' Setup selenium browser. Uses default path location
        if none is specified. Returns browser object or
        None if it fails.'''
        # User Agent
        uas = [
            "Mozilla/5.0 (Windows NT 6.1; rv:31.0) Gecko/20100101 Firefox/31.0",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36",
            ]
        ua = random.choice(uas)

        ## PhantomJS Binary files
        phantoms = config.PHANTOM_PATH
        phantompath = random.choice(phantoms)

        # Custom user agent
        dc = dict(DesiredCapabilities.PHANTOMJS)
        dc["phantomjs.page.settings.userAgent"] = ua
        #dc["pages.settings.XSSAuditEnabled"] = "true"

        try:
            browser = webdriver.PhantomJS(
                phantompath,
                service_args=self.service_args,
                desired_capabilities=dc
            )
        except WebDriverException as err:
            logging.error("Could not create browser. Check path")
            logging.error(err)
            return None
        except:
            logging.error("Major problem with webdriver. "
                          "Could be related to performance."
                          "Decrease the number of threads.")
            return None
        browser.set_page_load_timeout(45)

        ## DELETED GOOD STUFF ##
        return browser
项目:wechat_spider    作者:CoolWell    | 项目源码 | 文件源码
def download(self, link, name, url):
        """
        ????????????
        :param link:
        :param name:
        :param url:
        :return:
        """
        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (
            random.choice(self.agents)
        )
        dcap["takesScreenshot"] = False
        dcap["phantomjs.page.customHeaders.Cookie"] = random.choice(self.cookie)
        # dcap["phantomjs.page.settings.resourceTimeout"] = ("1000")
        try:
            driver1 = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--load-images=no', ])
        except Exception as e:
            with open(r'list_error.txt', 'a') as f:
                f.write(name.encode('utf-8'))
                f.write('\n')
            print(datetime.datetime.now())
            print(url)
            print(e)
        else:
            try:
                driver1.set_page_load_timeout(20)
                driver1.get(link)
                b = True
                try:
                    driver1.find_element_by_class_name('page_verify')
                except:
                    b = False

                if b is True:
                    print('page needs verify, stop the program')
                    print('the last weixinNUM is %s\n' % name)
                    self.ocr4wechat(link)
                    time.sleep(5)
                    with open(r'list_error.txt', 'a') as f:
                        f.write(name.encode('utf-8'))
                        f.write('\n')
                else:
                    html = driver1.page_source
                    return link, html
            except Exception as e:
                with open(r'list_error.txt', 'a') as f:
                    f.write(name.encode('utf-8'))
                    f.write('\n')
                print(url)
                print(datetime.datetime.now())
                print(e)

            finally:
                driver1.quit()
项目:DIS_MeituanReptile    作者:myvary    | 项目源码 | 文件源码
def downloader_html_ph(url, up_num):  ##??PhantomJS??????
    '''
    url        :??????url
    up_num     :?????
    '''
    # print driver.service
    print '????????!    URL?', url, '    ?????:', up_num
    conf = {}
    for line in fileinput.input("..//..//abuyun.conf"):
        lines = line.replace(' ', '').replace('\n', '').split("=")
        conf[lines[0]] = lines[1]
    # ?????
    proxyHost = conf["proxyHost"]
    proxyPort = conf["proxyPort"]
    # ???????????
    proxyUser = conf["proxyUser"]
    proxyPass = conf["proxyPass"]
    service_args = [
        "--proxy-type=http",
        "--proxy=%(host)s:%(port)s" % {
            "host": proxyHost,
            "port": proxyPort,
        },
        "--proxy-auth=%(user)s:%(pass)s" % {
            "user": proxyUser,
            "pass": proxyPass,
        },
    ]
    phantomjs_path = r"phantomjs"
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    # ?????UA??????????
    ua = rad_ua()  ##?????UA
    dcap["phantomjs.page.settings.userAgent"] = ua
    # ,service_args=service_args ?????
    driver = webdriver.PhantomJS(desired_capabilities=dcap, executable_path=phantomjs_path)
    driver.get(url)
    time.sleep(2)
    ##???????????????
    dian = ''
    print '?????',
    for i in range(up_num):
        driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
        ##??????????????
        time.sleep(2)
        dian = dian + '.'
        print '.',

    print driver.current_url, '?????????????'

    data = driver.page_source.encode("utf-8")
    # ??????
    html_parser = HTMLParser.HTMLParser()
    data = html_parser.unescape(data)
    return data
项目:biweeklybudget    作者:jantman    | 项目源码 | 文件源码
def get_browser(self, browser_name):
        """get a webdriver browser instance """
        self._browser_name = browser_name
        if browser_name == 'firefox':
            logger.debug("getting Firefox browser (local)")
            if 'DISPLAY' not in os.environ:
                logger.debug("exporting DISPLAY=:0")
                os.environ['DISPLAY'] = ":0"
            browser = webdriver.Firefox()
        elif browser_name == 'chrome':
            logger.debug("getting Chrome browser (local)")
            browser = webdriver.Chrome()
            browser.set_window_size(1920, 1080)
            browser.implicitly_wait(2)
        elif browser_name == 'chrome-headless':
            logger.debug('getting Chrome browser (local) with --headless')
            chrome_options = Options()
            chrome_options.add_argument("--headless")
            browser = webdriver.Chrome(chrome_options=chrome_options)
            browser.set_window_size(1920, 1080)
            browser.implicitly_wait(2)
        elif browser_name == 'phantomjs':
            logger.debug("getting PhantomJS browser (local)")
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            dcap["phantomjs.page.settings.userAgent"] = self.user_agent
            args = [
                '--cookies-file={c}'.format(c=self._cookie_file),
                '--ssl-protocol=any',
                '--ignore-ssl-errors=true',
                '--web-security=false'
            ]
            browser = webdriver.PhantomJS(
                desired_capabilities=dcap, service_args=args
            )
            browser.set_window_size(1024, 768)
        else:
            raise SystemExit(
                "ERROR: browser type must be one of 'firefox', 'chrome', "
                "'chrome-headless' or 'phantomjs', not '{b}'".format(
                    b=browser_name
                )
            )
        logger.debug("returning browser")
        return browser
项目:scrapyweixi    作者:Felix-P-Code    | 项目源码 | 文件源码
def selenium_request(url ,isscreen = False):
    osurl = '%s/xici/validateimg/' % os.path.dirname(os.path.abspath("scrapy.cfg"))

    ua_list = [
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/48.0.2564.82 Chrome/48.0.2564.82 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
        "Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36"
    ]

    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.resourceTimeout"] = 15
    dcap["phantomjs.page.settings.loadImages"] = True
    dcap["phantomjs.page.settings.userAgent"] = choice(ua_list)
    driver = webdriver.PhantomJS(executable_path='/Users/felixchan/Tool/phantomjs',desired_capabilities=dcap)
    # driver = webdriver.Firefox()
    driver.get(url)
    if isscreen:
        imgURL = '%s%s.png' % (osurl,int(time.time()))
        uploadimg = '%s%s_2.png' % (osurl,int(time.time()))
        driver.save_screenshot(imgURL)  # ????
        time.sleep(1)

        ocr = RClient(VALIDATE['username'], VALIDATE['password'], VALIDATE['soft_id'], VALIDATE['soft_key'])
        left = 260
        top = 12
        right = 396
        bottom = 70

        im = Image.open(imgURL)
        im = im.crop((left, top, right, bottom))
        im.save(uploadimg)
        ims = open(uploadimg, 'rb').read()
        post_result = ocr.create(uploadimg,ims, 3040)
        varidate_code = post_result['Result']
        print(post_result)


        elem = driver.find_element_by_id('input')
        elem.send_keys(varidate_code)
        #elem.send_keys(Keys.ENTER)  #??????Enter??
        driver.find_element_by_id('bt').click()
        driver.refresh()

    driver.implicitly_wait(2)
    time.sleep(1)
    true_page = driver.page_source  # .decode('utf-8','ignore')
    driver.close()
    return true_page
项目:fintech_spider    作者:hee0624    | 项目源码 | 文件源码
def __init__(self,
                 url="http://www.gsxt.gov.cn/index.html",
                 #url="http://sh.gsxt.gov.cn/notice",
                 #search_text = u"????",
                 search_text = u"????????????",
                 input_id='keyword',
                 search_element_id='btn_query',
                 gt_element_class_name='gt_box',
                 gt_slider_knob_name='gt_slider_knob',
                 result_numbers_xpath='/html/body/div[2]/div[3]/div[1]/span',
                 result_list_verify_id=None,
                 result_list_verify_class=None,
                 is_gap_every_broad=True):

        """
        url: ??????
        search_text: ??????
        input_id: ???????id
        search_element_id: ????????id
        gt_element_class_name: ??????????class?????????????????
        gt_slider_knob_name: ????????????class?????????????????
        result_numbers_xpath: ??????????? ???????xpath,??????`50`?????????
        result_list_verify_id: ?????????????id????????????????(??????ajax) or
        result_list_verify_class: ?????????????class?????????????????(??????ajax)
        is_gap_every_broad: ???????True????????????????????????????????????????
        """
        self.url = url
        self.search_text = search_text
        self.input_id = input_id
        self.search_element_id = search_element_id
        self.gt_element_class_name = gt_element_class_name
        self.gt_slider_knob_name = gt_slider_knob_name
        self.result_numbers_xpath = result_numbers_xpath
        self.result_list_verify_id = result_list_verify_id
        self.result_list_verify_class = result_list_verify_class
        self.is_gap_every_broad = is_gap_every_broad


        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36"
        )

        #self.driver = webdriver.PhantomJS(desired_capabilities=dcap)
        self.driver = webdriver.Chrome("/home/hee/driver/chromedriver")

        #self.driver.maximize_window()
        time.sleep(random.uniform(2.0, 3.0))
项目:fintech_spider    作者:hee0624    | 项目源码 | 文件源码
def __init__(self,
                 url="http://www.gsxt.gov.cn/index.html",
                 #url="http://sh.gsxt.gov.cn/notice",
                 #search_text = u"????",
                 search_text = u"????????????",
                 input_id='keyword',
                 search_element_id='btn_query',
                 gt_element_class_name='gt_box',
                 gt_slider_knob_name='gt_slider_knob',
                 result_numbers_xpath='/html/body/div[2]/div[3]/div[1]/span',
                 result_list_verify_id=None,
                 result_list_verify_class=None,
                 is_gap_every_broad=True):

        """
        url: ??????
        search_text: ??????
        input_id: ???????id
        search_element_id: ????????id
        gt_element_class_name: ??????????class?????????????????
        gt_slider_knob_name: ????????????class?????????????????
        result_numbers_xpath: ??????????? ???????xpath,??????`50`?????????
        result_list_verify_id: ?????????????id????????????????(??????ajax) or
        result_list_verify_class: ?????????????class?????????????????(??????ajax)
        is_gap_every_broad: ???????True????????????????????????????????????????
        """
        self.url = url
        self.search_text = search_text
        self.input_id = input_id
        self.search_element_id = search_element_id
        self.gt_element_class_name = gt_element_class_name
        self.gt_slider_knob_name = gt_slider_knob_name
        self.result_numbers_xpath = result_numbers_xpath
        self.result_list_verify_id = result_list_verify_id
        self.result_list_verify_class = result_list_verify_class
        self.is_gap_every_broad = is_gap_every_broad


        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36"
        )

        #self.driver = webdriver.PhantomJS(desired_capabilities=dcap)
        # self.driver = webdriver.Chrome("/home/hee/driver/chromedriver") # hee
        self.driver = webdriver.Chrome(r"/home/lxw/Software/chromedirver_selenium/chromedriver")    # lxw


        #self.driver.maximize_window()
        time.sleep(random.uniform(2.0, 3.0))
项目:fintech_spider    作者:hee0624    | 项目源码 | 文件源码
def __init__(self,
                 url="http://www.gsxt.gov.cn/index.html",
                 #url="http://sh.gsxt.gov.cn/notice",
                 #search_text = u"????",
                 search_text = u"????????????",
                 input_id='keyword',
                 search_element_id='btn_query',
                 gt_element_class_name='gt_box',
                 gt_slider_knob_name='gt_slider_knob',
                 result_numbers_xpath='/html/body/div[2]/div[3]/div[1]/span',
                 result_list_verify_id=None,
                 result_list_verify_class=None,
                 is_gap_every_broad=True):

        """
        url: ??????
        search_text: ??????
        input_id: ???????id
        search_element_id: ????????id
        gt_element_class_name: ??????????class?????????????????
        gt_slider_knob_name: ????????????class?????????????????
        result_numbers_xpath: ??????????? ???????xpath,??????`50`?????????
        result_list_verify_id: ?????????????id????????????????(??????ajax) or
        result_list_verify_class: ?????????????class?????????????????(??????ajax)
        is_gap_every_broad: ???????True????????????????????????????????????????
        """
        self.url = url
        self.search_text = search_text
        self.input_id = input_id
        self.search_element_id = search_element_id
        self.gt_element_class_name = gt_element_class_name
        self.gt_slider_knob_name = gt_slider_knob_name
        self.result_numbers_xpath = result_numbers_xpath
        self.result_list_verify_id = result_list_verify_id
        self.result_list_verify_class = result_list_verify_class
        self.is_gap_every_broad = is_gap_every_broad


        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36"
        )

        #self.driver = webdriver.PhantomJS(desired_capabilities=dcap)
        self.driver = webdriver.Chrome("/home/hee/driver/chromedriver") # hee
        # self.driver = webdriver.Chrome(r"/home/lxw/Software/chromedirver_selenium/chromedriver")    # lxw


        #self.driver.maximize_window()
        time.sleep(random.uniform(2.0, 3.0))
项目:findtrip    作者:fankcoder    | 项目源码 | 文件源码
def findTrip():
    url = "http://flights.ctrip.com/booking/XMN-BJS-day-1.html?DDate1=2016-04-18"
    ua_list = [
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/48.0.2564.82 Chrome/48.0.2564.82 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
            "Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36"
            ]

    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.resourceTimeout"] = 15 
    dcap["phantomjs.page.settings.loadImages"] = False
    dcap["phantomjs.page.settings.userAgent"] = choice(ua_list)
    #driver = webdriver.PhantomJS(executable_path=u'/home/icgoo/pywork/spider/phantomjs',desired_capabilities=dcap)
    #driver = webdriver.PhantomJS(executable_path=u'/home/fank/pywork/spider/phantomjs',desired_capabilities=dcap)
    driver = webdriver.Firefox()

    driver.get(url)
    driver.implicitly_wait(3)
    time.sleep(5)
    page = driver.page_source # .decode('utf-8','ignore')
    html = etree.HTML(page)

    fligint_div = "//div[@id='J_flightlist2']/div"
    items = html.xpath(fligint_div)
    detail = []
    for index,item in enumerate(items):
        flight_tr = fligint_div+'['+str(index+1)+']'+'//tr'
        istrain = html.xpath(flight_tr + "//div[@class='train_flight_tit']")
        if istrain:
            pass # is train add
        else:
            company = html.xpath(flight_tr + "//div[@class='info-flight J_flight_no']//text()")
            flight_time_from = html.xpath(flight_tr + "//td[@class='right']/div[1]//text()")
            flight_time_to = html.xpath(flight_tr + "//td[@class='left']/div[1]//text()")
            flight_time = [flight_time_from,flight_time_to]
            airports_from =  html.xpath(flight_tr + "//td[@class='right']/div[2]//text()")
            airports_to = html.xpath(flight_tr + "//td[@class='left']/div[2]//text()")
            airports = [airports_from,airports_to]
            price = html.xpath(flight_tr + "[1]//td[@class='price middle ']/span//text()")

        detail.append(
                dict(
                    company=company,
                    flight_time=flight_time,
                    airports=airports,
                    price=price
                    ))
    print detail
    driver.close()
    return detail