Python urllib.request 模块,HTTPRedirectHandler() 实例源码

我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用urllib.request.HTTPRedirectHandler()

项目:twittershade    作者:nicolavic98    | 项目源码 | 文件源码
def follow_redirects(link, sites= None):
    """Follow directs for the link as long as the redirects are on the given
    sites and return the resolved link."""
    def follow(url):
        return sites == None or urlparse.urlparse(url).hostname in sites

    class RedirectHandler(urllib2.HTTPRedirectHandler):
        def __init__(self):
            self.last_url = None
        def redirect_request(self, req, fp, code, msg, hdrs, newurl):
            self.last_url = newurl
            if not follow(newurl):
                return None
            r = urllib2.HTTPRedirectHandler.redirect_request(
                self, req, fp, code, msg, hdrs, newurl)
            r.get_method = lambda : 'HEAD'
            return r

    if not follow(link):
        return link
    redirect_handler = RedirectHandler()
    opener = urllib2.build_opener(redirect_handler)
    req = urllib2.Request(link)
    req.get_method = lambda : 'HEAD'
    try:
        with contextlib.closing(opener.open(req,timeout=1)) as site:
            return site.url
    except:
        return redirect_handler.last_url if redirect_handler.last_url else link
项目:TwiBot    作者:ShruthiChari    | 项目源码 | 文件源码
def follow_redirects(link, sites= None):
    """Follow directs for the link as long as the redirects are on the given
    sites and return the resolved link."""
    def follow(url):
        return sites == None or urlparse.urlparse(url).hostname in sites

    class RedirectHandler(urllib2.HTTPRedirectHandler):
        def __init__(self):
            self.last_url = None
        def redirect_request(self, req, fp, code, msg, hdrs, newurl):
            self.last_url = newurl
            if not follow(newurl):
                return None
            r = urllib2.HTTPRedirectHandler.redirect_request(
                self, req, fp, code, msg, hdrs, newurl)
            r.get_method = lambda : 'HEAD'
            return r

    if not follow(link):
        return link
    redirect_handler = RedirectHandler()
    opener = urllib2.build_opener(redirect_handler)
    req = urllib2.Request(link)
    req.get_method = lambda : 'HEAD'
    try:
        with contextlib.closing(opener.open(req,timeout=1)) as site:
            return site.url
    except:
        return redirect_handler.last_url if redirect_handler.last_url else link
项目:TwiBot    作者:ShruthiChari    | 项目源码 | 文件源码
def follow_redirects(link, sites= None):
    """Follow directs for the link as long as the redirects are on the given
    sites and return the resolved link."""
    def follow(url):
        return sites == None or urlparse.urlparse(url).hostname in sites

    class RedirectHandler(urllib2.HTTPRedirectHandler):
        def __init__(self):
            self.last_url = None
        def redirect_request(self, req, fp, code, msg, hdrs, newurl):
            self.last_url = newurl
            if not follow(newurl):
                return None
            r = urllib2.HTTPRedirectHandler.redirect_request(
                self, req, fp, code, msg, hdrs, newurl)
            r.get_method = lambda : 'HEAD'
            return r

    if not follow(link):
        return link
    redirect_handler = RedirectHandler()
    opener = urllib2.build_opener(redirect_handler)
    req = urllib2.Request(link)
    req.get_method = lambda : 'HEAD'
    try:
        with contextlib.closing(opener.open(req,timeout=1)) as site:
            return site.url
    except:
        return redirect_handler.last_url if redirect_handler.last_url else link
项目:DevOps    作者:YoLoveLife    | 项目源码 | 文件源码
def RedirectHandlerFactory(follow_redirects=None, validate_certs=True):
    """This is a class factory that closes over the value of
    ``follow_redirects`` so that the RedirectHandler class has access to
    that value without having to use globals, and potentially cause problems
    where ``open_url`` or ``fetch_url`` are used multiple times in a module.
    """

    class RedirectHandler(urllib_request.HTTPRedirectHandler):
        """This is an implementation of a RedirectHandler to match the
        functionality provided by httplib2. It will utilize the value of
        ``follow_redirects`` that is passed into ``RedirectHandlerFactory``
        to determine how redirects should be handled in urllib2.
        """

        def redirect_request(self, req, fp, code, msg, hdrs, newurl):
            handler = maybe_add_ssl_handler(newurl, validate_certs)
            if handler:
                urllib_request._opener.add_handler(handler)

            if follow_redirects == 'urllib2':
                return urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl)
            elif follow_redirects in ['no', 'none', False]:
                raise urllib_error.HTTPError(newurl, code, msg, hdrs, fp)

            do_redirect = False
            if follow_redirects in ['all', 'yes', True]:
                do_redirect = (code >= 300 and code < 400)

            elif follow_redirects == 'safe':
                m = req.get_method()
                do_redirect = (code >= 300 and code < 400 and m in ('GET', 'HEAD'))

            if do_redirect:
                # be conciliant with URIs containing a space
                newurl = newurl.replace(' ', '%20')
                newheaders = dict((k,v) for k,v in req.headers.items()
                                  if k.lower() not in ("content-length", "content-type")
                                 )
                return urllib_request.Request(newurl,
                               headers=newheaders,
                               origin_req_host=req.get_origin_req_host(),
                               unverifiable=True)
            else:
                raise urllib_error.HTTPError(req.get_full_url(), code, msg, hdrs, fp)

    return RedirectHandler
项目:ansible-provider-docs    作者:alibaba    | 项目源码 | 文件源码
def RedirectHandlerFactory(follow_redirects=None, validate_certs=True):
    """This is a class factory that closes over the value of
    ``follow_redirects`` so that the RedirectHandler class has access to
    that value without having to use globals, and potentially cause problems
    where ``open_url`` or ``fetch_url`` are used multiple times in a module.
    """

    class RedirectHandler(urllib_request.HTTPRedirectHandler):
        """This is an implementation of a RedirectHandler to match the
        functionality provided by httplib2. It will utilize the value of
        ``follow_redirects`` that is passed into ``RedirectHandlerFactory``
        to determine how redirects should be handled in urllib2.
        """

        def redirect_request(self, req, fp, code, msg, hdrs, newurl):
            handler = maybe_add_ssl_handler(newurl, validate_certs)
            if handler:
                urllib_request._opener.add_handler(handler)

            if follow_redirects == 'urllib2':
                return urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl)
            elif follow_redirects in ['no', 'none', False]:
                raise urllib_error.HTTPError(newurl, code, msg, hdrs, fp)

            do_redirect = False
            if follow_redirects in ['all', 'yes', True]:
                do_redirect = (code >= 300 and code < 400)

            elif follow_redirects == 'safe':
                m = req.get_method()
                do_redirect = (code >= 300 and code < 400 and m in ('GET', 'HEAD'))

            if do_redirect:
                # be conciliant with URIs containing a space
                newurl = newurl.replace(' ', '%20')
                newheaders = dict((k, v) for k, v in req.headers.items()
                                  if k.lower() not in ("content-length", "content-type"))
                try:
                    # Python 2-3.3
                    origin_req_host = req.get_origin_req_host()
                except AttributeError:
                    # Python 3.4+
                    origin_req_host = req.origin_req_host
                return urllib_request.Request(newurl,
                                              headers=newheaders,
                                              origin_req_host=origin_req_host,
                                              unverifiable=True)
            else:
                raise urllib_error.HTTPError(req.get_full_url(), code, msg, hdrs, fp)

    return RedirectHandler