Java 类org.apache.http.params.HttpProtocolParamBean 实例源码
项目:Crawler4j
文件:PageFetcher.java
public PageFetcher(CrawlConfig config) {
super(config);
HttpParams params = new BasicHttpParams();
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
paramsBean.setVersion(HttpVersion.HTTP_1_1);
paramsBean.setContentCharset("UTF-8");
paramsBean.setUseExpectContinue(false);
params.setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BROWSER_COMPATIBILITY);
params.setParameter(CoreProtocolPNames.USER_AGENT, config.getUserAgentString());
params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, config.getSocketTimeout());
params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, config.getConnectionTimeout());
params.setBooleanParameter("http.protocol.handle-redirects", false);
SchemeRegistry schemeRegistry = new SchemeRegistry();
schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory()));
if (config.isIncludeHttpsPages()) {
schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory()));
}
connectionManager = new PoolingClientConnectionManager(schemeRegistry);
connectionManager.setMaxTotal(config.getMaxTotalConnections());
connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost());
httpClient = new DefaultHttpClient(connectionManager, params);
if (config.getProxyHost() != null) {
if (config.getProxyUsername() != null) {
httpClient.getCredentialsProvider().setCredentials(
new AuthScope(config.getProxyHost(), config.getProxyPort()),
new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword()));
}
HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort());
httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
}
httpClient.addResponseInterceptor(new HttpResponseInterceptor() {
@Override
public void process(final HttpResponse response, final HttpContext context) throws HttpException,
IOException {
HttpEntity entity = response.getEntity();
Header contentEncoding = entity.getContentEncoding();
if (contentEncoding != null) {
HeaderElement[] codecs = contentEncoding.getElements();
for (HeaderElement codec : codecs) {
if (codec.getName().equalsIgnoreCase("gzip")) {
response.setEntity(new GzipDecompressingEntity(response.getEntity()));
return;
}
}
}
}
});
if (connectionMonitorThread == null) {
connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager);
}
connectionMonitorThread.start();
}
项目:sisob-data-extractor
文件:PageFetcher.java
public synchronized static void startConnectionMonitorThread()
{
if (connectionMonitorThread == null)
{
HttpParams params = new BasicHttpParams();
HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
paramsBean.setVersion(HttpVersion.HTTP_1_1);
paramsBean.setContentCharset("UTF-8");
paramsBean.setUseExpectContinue(false);
params.setParameter("http.useragent", Configurations.getStringProperty("fetcher.user_agent",
"crawler4j (http://code.google.com/p/crawler4j/)"));
params.setIntParameter("http.socket.timeout", Configurations.getIntProperty("fetcher.socket_timeout", 20000));
params.setIntParameter("http.connection.timeout",
Configurations.getIntProperty("fetcher.connection_timeout", 30000));
params.setBooleanParameter("http.protocol.handle-redirects", false);
ConnPerRouteBean connPerRouteBean = new ConnPerRouteBean();
connPerRouteBean.setDefaultMaxPerRoute(Configurations.getIntProperty("fetcher.max_connections_per_host", 100));
ConnManagerParams.setMaxConnectionsPerRoute(params, connPerRouteBean);
ConnManagerParams.setMaxTotalConnections(params,
Configurations.getIntProperty("fetcher.max_total_connections", 100));
SchemeRegistry schemeRegistry = new SchemeRegistry();
schemeRegistry.register(new Scheme("http", PlainSocketFactory.getSocketFactory(), 80));
if (Configurations.getBooleanProperty("fetcher.crawl_https", false)) {
schemeRegistry.register(new Scheme("https", SSLSocketFactory.getSocketFactory(), 443));
}
connectionManager = new ThreadSafeClientConnManager(params, schemeRegistry);
//ProjectLogger.LOGGER.setLevel(Level.INFO);
httpclient = new DefaultHttpClient(connectionManager, params);
connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager);
}
connectionMonitorThread.start();
}