/** * Make sure that this instance is initialized. This is particularly required to use HDFS {@link URL}s. */ public void ensureInitialized() { if (this.isInitialized) return; // Add handler for HDFS URL for java.net.URL LoggerFactory.getLogger(HadoopFileSystem.class).info("Adding handler for HDFS URLs."); try { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); } catch (Throwable t) { LoggerFactory.getLogger(HadoopFileSystem.class).error( "Could not set URL stream handler factory.", t ); } finally { this.isInitialized = true; } }
protected void initHdfs() { try { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); } catch (Throwable e) { // ignore as its most likely already set LOG.debug("Cannot set URLStreamHandlerFactory due " + e.getMessage() + ". This exception will be ignored.", e); } }
public static void setHdfsURLStreamHandlerFactory() throws NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException { Field factoryField = URL.class.getDeclaredField("factory"); factoryField.setAccessible(true); URLStreamHandlerFactory urlStreamHandlerFactory = (URLStreamHandlerFactory) factoryField.get(null); if (urlStreamHandlerFactory == null) { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); } else { try { factoryField.setAccessible(true); factoryField.set( null, new FsUrlStreamHandlerFactory()); } catch (IllegalAccessException e1) { LOGGER.error("Could not access URLStreamHandler factory field on URL class: {}"); throw new RuntimeException( "Could not access URLStreamHandler factory field on URL class: {}", e1); } } }
/** * Loads the {@link FsUrlStreamHandlerFactory}. * * @param conf the configuration to use * * @throws IOException if something goes wrong */ public static void loadFsUrlStreamHandler(final Configuration conf) throws IOException { // Here to avoid https://issues.apache.org/jira/browse/HADOOP-9041 FileSystem.get(conf); // Hook up the HDFS URL scheme handler // noinspection ErrorNotRethrown try { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); } catch (final Error e) { // This can happen if the handler has already been loaded so ignore System.err.println("The HDFS URL scheme handler has already been loaded"); } }
/** * Test opening and reading from an InputStream through a hdfs:// URL. * <p> * First generate a file with some content through the FileSystem API, then * try to open and read the file through the URL stream API. * * @throws IOException */ public void testDfsUrls() throws IOException { Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster(conf, 2, true, null); FileSystem fs = cluster.getFileSystem(); // Setup our own factory // setURLSteramHandlerFactor is can be set at most once in the JVM // the new URLStreamHandler is valid for all tests cases // in TestStreamHandler FsUrlStreamHandlerFactory factory = new org.apache.hadoop.fs.FsUrlStreamHandlerFactory(); java.net.URL.setURLStreamHandlerFactory(factory); Path filePath = new Path("/thefile"); try { byte[] fileContent = new byte[1024]; for (int i = 0; i < fileContent.length; ++i) fileContent[i] = (byte) i; // First create the file through the FileSystem API OutputStream os = fs.create(filePath); os.write(fileContent); os.close(); // Second, open and read the file content through the URL API URI uri = fs.getUri(); URL fileURL = new URL(uri.getScheme(), uri.getHost(), uri.getPort(), filePath .toString()); InputStream is = fileURL.openStream(); assertNotNull(is); byte[] bytes = new byte[4096]; assertEquals(1024, is.read(bytes)); is.close(); for (int i = 0; i < fileContent.length; ++i) assertEquals(fileContent[i], bytes[i]); // Cleanup: delete the file fs.delete(filePath, false); } finally { fs.close(); cluster.shutdown(); } }
@Override public void afterPropertiesSet() throws Exception { internalConfig = createConfiguration(configuration); internalConfig.setClassLoader(beanClassLoader); if (resources != null) { for (Resource resource : resources) { internalConfig.addResource(resource.getURL()); } } ConfigurationUtils.addProperties(internalConfig, properties); // for below property values we can't use constants // from hadoop packages because we need to able to // compile for different versions. // set hdfs / fs URI last to override all other properties if (StringUtils.hasText(fsUri)) { internalConfig.set("fs.default.name", fsUri.trim()); internalConfig.set("fs.defaultFS", fsUri.trim()); } if (StringUtils.hasText(jtUri)) { internalConfig.set("mapred.job.tracker", jtUri.trim()); } if (StringUtils.hasText(rmUri)) { internalConfig.set("yarn.resourcemanager.address", rmUri.trim()); } if (initialize) { internalConfig.size(); } postProcessConfiguration(internalConfig); if (registerJvmUrl) { try { // force UGI init to prevent infinite loop - see SHDP-92 UserGroupInformation.setConfiguration(internalConfig); URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory(getObject())); log.info("Registered HDFS URL stream handler"); } catch (Error err) { log.warn("Cannot register Hadoop URL stream handler - one is already registered"); } } }
/** * Test opening and reading from an InputStream through a hdfs:// URL. * <p> * First generate a file with some content through the FileSystem API, then * try to open and read the file through the URL stream API. * * @throws IOException */ public void testDfsUrls() throws IOException { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); FileSystem fs = cluster.getFileSystem(); // Setup our own factory // setURLSteramHandlerFactor is can be set at most once in the JVM // the new URLStreamHandler is valid for all tests cases // in TestStreamHandler FsUrlStreamHandlerFactory factory = new org.apache.hadoop.fs.FsUrlStreamHandlerFactory(); java.net.URL.setURLStreamHandlerFactory(factory); Path filePath = new Path("/thefile"); try { byte[] fileContent = new byte[1024]; for (int i = 0; i < fileContent.length; ++i) fileContent[i] = (byte) i; // First create the file through the FileSystem API OutputStream os = fs.create(filePath); os.write(fileContent); os.close(); // Second, open and read the file content through the URL API URI uri = fs.getUri(); URL fileURL = new URL(uri.getScheme(), uri.getHost(), uri.getPort(), filePath .toString()); InputStream is = fileURL.openStream(); assertNotNull(is); byte[] bytes = new byte[4096]; assertEquals(1024, is.read(bytes)); is.close(); for (int i = 0; i < fileContent.length; ++i) assertEquals(fileContent[i], bytes[i]); // Cleanup: delete the file fs.delete(filePath, false); } finally { fs.close(); cluster.shutdown(); } }