我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用urllib.request.HTTPRedirectHandler()。
def follow_redirects(link, sites= None): """Follow directs for the link as long as the redirects are on the given sites and return the resolved link.""" def follow(url): return sites == None or urlparse.urlparse(url).hostname in sites class RedirectHandler(urllib2.HTTPRedirectHandler): def __init__(self): self.last_url = None def redirect_request(self, req, fp, code, msg, hdrs, newurl): self.last_url = newurl if not follow(newurl): return None r = urllib2.HTTPRedirectHandler.redirect_request( self, req, fp, code, msg, hdrs, newurl) r.get_method = lambda : 'HEAD' return r if not follow(link): return link redirect_handler = RedirectHandler() opener = urllib2.build_opener(redirect_handler) req = urllib2.Request(link) req.get_method = lambda : 'HEAD' try: with contextlib.closing(opener.open(req,timeout=1)) as site: return site.url except: return redirect_handler.last_url if redirect_handler.last_url else link
def RedirectHandlerFactory(follow_redirects=None, validate_certs=True): """This is a class factory that closes over the value of ``follow_redirects`` so that the RedirectHandler class has access to that value without having to use globals, and potentially cause problems where ``open_url`` or ``fetch_url`` are used multiple times in a module. """ class RedirectHandler(urllib_request.HTTPRedirectHandler): """This is an implementation of a RedirectHandler to match the functionality provided by httplib2. It will utilize the value of ``follow_redirects`` that is passed into ``RedirectHandlerFactory`` to determine how redirects should be handled in urllib2. """ def redirect_request(self, req, fp, code, msg, hdrs, newurl): handler = maybe_add_ssl_handler(newurl, validate_certs) if handler: urllib_request._opener.add_handler(handler) if follow_redirects == 'urllib2': return urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl) elif follow_redirects in ['no', 'none', False]: raise urllib_error.HTTPError(newurl, code, msg, hdrs, fp) do_redirect = False if follow_redirects in ['all', 'yes', True]: do_redirect = (code >= 300 and code < 400) elif follow_redirects == 'safe': m = req.get_method() do_redirect = (code >= 300 and code < 400 and m in ('GET', 'HEAD')) if do_redirect: # be conciliant with URIs containing a space newurl = newurl.replace(' ', '%20') newheaders = dict((k,v) for k,v in req.headers.items() if k.lower() not in ("content-length", "content-type") ) return urllib_request.Request(newurl, headers=newheaders, origin_req_host=req.get_origin_req_host(), unverifiable=True) else: raise urllib_error.HTTPError(req.get_full_url(), code, msg, hdrs, fp) return RedirectHandler
def RedirectHandlerFactory(follow_redirects=None, validate_certs=True): """This is a class factory that closes over the value of ``follow_redirects`` so that the RedirectHandler class has access to that value without having to use globals, and potentially cause problems where ``open_url`` or ``fetch_url`` are used multiple times in a module. """ class RedirectHandler(urllib_request.HTTPRedirectHandler): """This is an implementation of a RedirectHandler to match the functionality provided by httplib2. It will utilize the value of ``follow_redirects`` that is passed into ``RedirectHandlerFactory`` to determine how redirects should be handled in urllib2. """ def redirect_request(self, req, fp, code, msg, hdrs, newurl): handler = maybe_add_ssl_handler(newurl, validate_certs) if handler: urllib_request._opener.add_handler(handler) if follow_redirects == 'urllib2': return urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl) elif follow_redirects in ['no', 'none', False]: raise urllib_error.HTTPError(newurl, code, msg, hdrs, fp) do_redirect = False if follow_redirects in ['all', 'yes', True]: do_redirect = (code >= 300 and code < 400) elif follow_redirects == 'safe': m = req.get_method() do_redirect = (code >= 300 and code < 400 and m in ('GET', 'HEAD')) if do_redirect: # be conciliant with URIs containing a space newurl = newurl.replace(' ', '%20') newheaders = dict((k, v) for k, v in req.headers.items() if k.lower() not in ("content-length", "content-type")) try: # Python 2-3.3 origin_req_host = req.get_origin_req_host() except AttributeError: # Python 3.4+ origin_req_host = req.origin_req_host return urllib_request.Request(newurl, headers=newheaders, origin_req_host=origin_req_host, unverifiable=True) else: raise urllib_error.HTTPError(req.get_full_url(), code, msg, hdrs, fp) return RedirectHandler