@Test public void testNativeCodeLoaded() { if (requireTestJni() == false) { LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required."); return; } if (!NativeCodeLoader.isNativeCodeLoaded()) { fail("TestNativeCodeLoader: libhadoop.so testing was required, but " + "libhadoop.so was not loaded."); } assertFalse(NativeCodeLoader.getLibraryName().isEmpty()); // library names are depended on platform and build envs // so just check names are available assertFalse(ZlibFactory.getLibraryName().isEmpty()); if (NativeCodeLoader.buildSupportsSnappy()) { assertFalse(SnappyCodec.getLibraryName().isEmpty()); } if (NativeCodeLoader.buildSupportsOpenssl()) { assertFalse(OpensslCipher.getLibraryName().isEmpty()); } assertFalse(Lz4Codec.getLibraryName().isEmpty()); LOG.info("TestNativeCodeLoader: libhadoop.so is loaded."); }
@Test public void testNativeCodeLoaded() { if (requireTestJni() == false) { LOG.info("TestNativeCodeLoader: libhadoop.so testing is not required."); return; } if (!NativeCodeLoader.isNativeCodeLoaded()) { fail("TestNativeCodeLoader: libhadoop.so testing was required, but " + "libhadoop.so was not loaded."); } assertFalse(NativeCodeLoader.getLibraryName().isEmpty()); // library names are depended on platform and build envs // so just check names are available assertFalse(ZlibFactory.getLibraryName().isEmpty()); if (NativeCodeLoader.buildSupportsSnappy()) { assertFalse(SnappyCodec.getLibraryName().isEmpty()); } assertFalse(Lz4Codec.getLibraryName().isEmpty()); LOG.info("TestNativeCodeLoader: libhadoop.so is loaded."); }
/** * Sets task classes with related info if needed into configuration object. * * @param job Configuration to change. * @param setMapper Option to set mapper and input format classes. * @param setCombiner Option to set combiner class. * @param setReducer Option to set reducer and output format classes. */ public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer, boolean outputCompression) { if (setMapper) { job.setMapperClass(HadoopWordCount2Mapper.class); job.setInputFormatClass(TextInputFormat.class); } if (setCombiner) job.setCombinerClass(HadoopWordCount2Combiner.class); if (setReducer) { job.setReducerClass(HadoopWordCount2Reducer.class); job.setOutputFormatClass(TextOutputFormat.class); } if (outputCompression) { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setCompressOutput(job, true); job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName()); } }
@Test public void testGetCodecString() { Configuration configuration = dfsServer.getConf(); assertEquals(MrUtil.CODEC_NONE, MrUtil.getCodecString(configuration)); configuration.setBoolean(FileOutputFormat.COMPRESS, false); assertEquals(MrUtil.CODEC_NONE, MrUtil.getCodecString(configuration)); configuration.setBoolean(FileOutputFormat.COMPRESS, true); assertEquals(new DefaultCodec().getDefaultExtension().substring(1, new DefaultCodec().getDefaultExtension().length()), MrUtil.getCodecString(configuration)); configuration.set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName()); assertEquals(new SnappyCodec().getDefaultExtension().substring(1, new SnappyCodec().getDefaultExtension().length()), MrUtil.getCodecString(configuration)); configuration.set(FileOutputFormat.COMPRESS_TYPE, CompressionType.BLOCK.toString()); assertEquals(new SnappyCodec().getDefaultExtension().substring(1, new SnappyCodec().getDefaultExtension().length()), MrUtil.getCodecString(configuration)); configuration.set(FileOutputFormat.COMPRESS_TYPE, CompressionType.NONE.toString()); assertEquals(MrUtil.CODEC_NONE, MrUtil.getCodecString(configuration)); configuration.set(FileOutputFormat.COMPRESS_TYPE, CompressionType.BLOCK.toString()); configuration.setBoolean(FileOutputFormat.COMPRESS, false); assertEquals(MrUtil.CODEC_NONE, MrUtil.getCodecString(configuration)); }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.out.printf("Usage: CreateSequenceFile <input dir> <output dir>\n"); return -1; } Job job = new Job(getConf()); job.setJarByClass(CreateSequenceFile.class); job.setJobName("Create Sequence File"); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1])); FileOutputFormat.setCompressOutput(job,true); FileOutputFormat.setOutputCompressorClass(job,SnappyCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
protected JobConf configStage1() throws Exception { final JobConf conf = new JobConf(getConf(), ConCmptBlock.class); conf.set("block_width", "" + block_width); conf.set("recursive_diagmult", "" + recursive_diagmult); conf.setJobName("data-piqid.pegasus.ConCmptBlock_pass1"); conf.setMapperClass(MapStage1.class); conf.setReducerClass(RedStage1.class); FileInputFormat.setInputPaths(conf, edge_path, curbm_path); FileOutputFormat.setOutputPath(conf, tempbm_path); FileOutputFormat.setCompressOutput(conf, true); FileOutputFormat.setOutputCompressorClass(conf, SnappyCodec.class); conf.setNumReduceTasks(nreducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return conf; }
protected JobConf configStage2() throws Exception { final JobConf conf = new JobConf(getConf(), ConCmptBlock.class); conf.set("block_width", "" + block_width); conf.setJobName("data-piqid.pegasus.ConCmptBlock_pass2"); conf.setMapperClass(MapStage2.class); conf.setReducerClass(RedStage2.class); FileInputFormat.setInputPaths(conf, tempbm_path); FileOutputFormat.setOutputPath(conf, nextbm_path); FileOutputFormat.setCompressOutput(conf, true); FileOutputFormat.setOutputCompressorClass(conf, SnappyCodec.class); conf.setNumReduceTasks(nreducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return conf; }
protected JobConf configStage4() throws Exception { final JobConf conf = new JobConf(getConf(), ConCmptBlock.class); conf.set("block_width", "" + block_width); conf.setJobName("data-piqid.pegasus.ConCmptBlock_pass4"); conf.setMapperClass(MapStage4.class); FileInputFormat.setInputPaths(conf, curbm_path); FileOutputFormat.setOutputPath(conf, curbm_unfold_path); FileOutputFormat.setCompressOutput(conf, true); FileOutputFormat.setOutputCompressorClass(conf, SnappyCodec.class); conf.setNumReduceTasks(0); //This is essential for map-only tasks. conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return conf; }
protected JobConf configStage5() throws Exception { final JobConf conf = new JobConf(getConf(), ConCmptBlock.class); conf.set("block_width", "" + block_width); conf.setJobName("data-piqid.pegasus.ConCmptBlock_pass5"); conf.setMapperClass(MapStage5.class); conf.setReducerClass(RedStage5.class); conf.setCombinerClass(RedStage5.class); FileInputFormat.setInputPaths(conf, curbm_path); FileOutputFormat.setOutputPath(conf, summaryout_path); FileOutputFormat.setCompressOutput(conf, true); FileOutputFormat.setOutputCompressorClass(conf, SnappyCodec.class); conf.setNumReduceTasks(nreducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(LongWritable.class); return conf; }
protected JobConf configStage1() throws Exception { final JobConf conf = new JobConf(getConf(), ConCmptIVGen.class); conf.set("number_nodes", "" + number_nodes); conf.setJobName("data-piqid.pegasus.ConCmptIVGen_Stage1"); conf.setMapperClass(MapStage1.class); conf.setReducerClass(RedStage1.class); FileInputFormat.setInputPaths(conf, input_path); FileOutputFormat.setOutputPath(conf, output_path); FileOutputFormat.setCompressOutput(conf, true); FileOutputFormat.setOutputCompressorClass(conf, SnappyCodec.class); conf.setNumReduceTasks(number_reducers); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return conf; }
@Test public void testCsvBlurDriverTest3() throws Exception { Configuration configurationSetup = new Configuration(); ControllerPool controllerPool = new CsvBlurDriver.ControllerPool() { @Override public Iface getClient(String controllerConnectionStr) { return getMockIface(); } }; AtomicReference<Callable<Void>> ref = new AtomicReference<Callable<Void>>(); Job job = CsvBlurDriver.setupJob(configurationSetup, controllerPool, ref, "-c", "host:40010", "-d", "family1", "col1", "col2", "-d", "family2", "col3", "col4", "-t", "table1", "-i", _path1.toString(), "-i", _path2.toString(), "-S", "-C", "1000000", "2000000", "-p", "SNAPPY"); assertNotNull(job); Configuration configuration = job.getConfiguration(); TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(configuration); assertEquals(tableDescriptor.getName(), "table1"); Collection<String> inputs = configuration.getStringCollection("mapred.input.dir"); assertEquals(2, inputs.size()); Map<String, List<String>> familyAndColumnNameMap = CsvBlurMapper.getFamilyAndColumnNameMap(configuration); assertEquals(2, familyAndColumnNameMap.size()); assertEquals("true", configuration.get(CsvBlurDriver.MAPRED_COMPRESS_MAP_OUTPUT)); assertEquals(SnappyCodec.class.getName(), configuration.get(CsvBlurDriver.MAPRED_MAP_OUTPUT_COMPRESSION_CODEC)); }
/** * Gets an InputStream that uses the snappy codec and wraps the supplied base input stream. * * @param the buffer size for the codec to use (in bytes) * @param in the base input stream to wrap around * @return an InputStream that uses the Snappy codec * @throws Exception if snappy is not available or an error occurs during reflection */ public InputStream getSnappyInputStream( int bufferSize, InputStream in ) throws Exception { if ( !isHadoopSnappyAvailable() ) { throw new Exception( "Hadoop-snappy does not seem to be available" ); } ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader( getClass().getClassLoader() ); try { SnappyCodec c = new SnappyCodec(); Configuration newConf = new Configuration(); newConf.set( IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize ); c.setConf( newConf ); return c.createInputStream( in ); } finally { Thread.currentThread().setContextClassLoader( cl ); } }
/** * Gets an OutputStream that uses the snappy codec and wraps the supplied base output stream. * * @param the buffer size for the codec to use (in bytes) * @param out the base output stream to wrap around * @return a OutputStream that uses the Snappy codec * @throws Exception if snappy is not available or an error occurs during reflection */ public OutputStream getSnappyOutputStream( int bufferSize, OutputStream out ) throws Exception { if ( !isHadoopSnappyAvailable() ) { throw new Exception( "Hadoop-snappy does not seem to be available" ); } ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader( getClass().getClassLoader() ); try { SnappyCodec c = new SnappyCodec(); Configuration newConf = new Configuration(); newConf.set( IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize ); c.setConf( newConf ); return c.createOutputStream( out ); } finally { Thread.currentThread().setContextClassLoader( cl ); } }
public InputStream openInputStream(Path inputFilePath) throws IOException { /* 224 */ FileSystem inputFs = inputFilePath.getFileSystem(this.conf); /* 225 */ InputStream inputStream = inputFs.open(inputFilePath); /* */ /* 227 */ if (!this.outputCodec.equalsIgnoreCase("keep")) { /* 228 */ String suffix = Utils.getSuffix(inputFilePath.getName()); /* 229 */ if (suffix.equalsIgnoreCase("gz")) /* 230 */ return new GZIPInputStream(inputStream); /* 231 */ if (suffix.equalsIgnoreCase("snappy")) { /* 232 */ SnappyCodec codec = new SnappyCodec(); /* 233 */ codec.setConf(getConf()); /* 234 */ return codec.createInputStream(inputStream); /* 235 */ } // if ((suffix.equalsIgnoreCase("lzop")) || (suffix.equalsIgnoreCase("lzo"))) { /* 236 */ // LzopCodec codec = new LzopCodec(); /* 237 */ // codec.setConf(getConf()); /* 238 */ // return codec.createInputStream(inputStream); /* */ // } /* */ } /* 241 */ return inputStream; /* */ }
public OutputStream openOutputStream(Path outputFilePath) throws IOException { /* 245 */ FileSystem outputFs = outputFilePath.getFileSystem(this.conf); /* 246 */ OutputStream outputStream = outputFs.create(outputFilePath, this.reporter); /* 247 */ if ((this.outputCodec.equalsIgnoreCase("gzip")) || (this.outputCodec.equalsIgnoreCase("gz"))) /* 248 */ return new GZIPOutputStream(outputStream); /* 249 */ //if (this.outputCodec.equalsIgnoreCase("lzo")) { /* 250 */ //LzopCodec codec = new LzopCodec(); /* 251 */ //codec.setConf(getConf()); /* 252 */ //return codec.createOutputStream(outputStream); /* 253 */ //} if (this.outputCodec.equalsIgnoreCase("snappy")) { /* 254 */ SnappyCodec codec = new SnappyCodec(); /* 255 */ codec.setConf(getConf()); /* 256 */ return codec.createOutputStream(outputStream); /* */ } /* 258 */ return outputStream; /* */ }
@Test public void testSnappyDirectBlockCompression() { int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 }; assumeTrue(SnappyCodec.isNativeCodeLoaded()); try { for (int i = 0; i < size.length; i++) { compressDecompressLoop(size[i]); } } catch (IOException ex) { fail("testSnappyDirectBlockCompression ex !!!" + ex); } }
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); //set from the command line job.setJarByClass(Phase2ExactMatchDeDuplication.class); job.setJobName(Phase2ExactMatchDeDuplication.class.getName()); // mapper job.setMapperClass(ExactMatchDetectionMapper.class); // we will compress the mapper's output (use fast Snappy compressor) job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true); job.getConfiguration() .setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class); // reducer job.setReducerClass(UniqueWarcWriterReducer.class); // no combiner, as the output classes in mapper and reducer are different! // input-output is warc job.setInputFormatClass(WARCInputFormat.class); job.setOutputFormatClass(WARCOutputFormat.class); // mapper output data job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(WARCWritable.class); // set output compression to GZip FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); // set from the command line job.setJarByClass(Phase1FullJob.class); job.setJobName(Phase1FullJob.class.getName()); // mapper job.setMapperClass(MapperClass.class); // we will compress the mapper's output (use fast Snappy compressor) job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true); job.getConfiguration() .setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class); // reducer job.setReducerClass(SimpleWarcWriterReducer.class); // input-output is warc job.setInputFormatClass(WARCInputFormat.class); job.setOutputFormatClass(WARCOutputFormat.class); // mapper output data job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(WARCWritable.class); // set output compression to GZip FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
public SnappyFilterStreamContext(OutputStream outputStream) throws IOException { SnappyCodec codec = new SnappyCodec(); codec.setConf(new Configuration()); try { filterStream = new SnappyFilterStream( codec.createOutputStream(outputStream, new SnappyCompressor(bufferSize))); } catch (IOException e) { throw e; } }
private boolean checkNativeSnappy() { try { SnappyCodec.checkNativeCodeLoaded(); } catch (UnsatisfiedLinkError u) { LOG.error("WARNING: Skipping Snappy compression test since native libraries were not found."); return true; } catch (RuntimeException e) { LOG.error("WARNING: Skipping Snappy compression test since native libraries were not found."); return true; } return false; }
@Test public void testSnappyCompressionSimple() throws IOException { if (checkNativeSnappy()) { return; } File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy"); BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile)); Configuration conf = new Configuration(); CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(SnappyCodec.class, conf); FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream( codec.createOutputStream(os)); int ONE_MB = 1024 * 1024; String testStr = "TestSnap-16bytes"; for (int i = 0; i < ONE_MB; i++) { // write 16 MBs filterStream.write(testStr.getBytes()); } filterStream.flush(); filterStream.close(); CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile)); byte[] recovered = new byte[testStr.length()]; int bytesRead = is.read(recovered); is.close(); assertEquals(testStr, new String(recovered)); }
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT)); Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT)); Configuration conf = super.getConf(); JobConf job = new JobConf(conf); job.setJarByClass(BloomFilterCreator.class); job.set(AvroJob.OUTPUT_SCHEMA, AvroBytesRecord.SCHEMA.toString()); job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName()); job.setInputFormat(KeyValueTextInputFormat.class); job.setOutputFormat(AvroOutputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(BloomFilter.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(BloomFilter.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); return JobClient.runJob(job).isSuccessful() ? 0 : 1; }
protected JobConf configStage1(String out_prefix) throws Exception { final JobConf conf = new JobConf(getConf(), MatvecPrep.class); conf.set("block_size", "" + block_size); conf.set("matrix_row", "" + number_nodes); conf.set("out_prefix", "" + out_prefix); conf.set("makesym", "" + makesym); conf.setJobName("data-piqid.pegasus.MatvecPrep_Stage1"); conf.setMapperClass(MapStage1.class); conf.setReducerClass(RedStage1.class); FileSystem fs = FileSystem.get(getConf()); fs.delete(output_path, true); FileInputFormat.setInputPaths(conf, edge_path); FileOutputFormat.setOutputPath(conf, output_path); FileOutputFormat.setCompressOutput(conf, true); FileOutputFormat.setOutputCompressorClass(conf, SnappyCodec.class); int num_reduce_tasks = nreducer; conf.setNumReduceTasks(num_reduce_tasks); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputValueClass(Text.class); return conf; }
/** * Used for returning the compression kind used in ORC * * @param codec * @return */ private CompressionKind resolveCompression(CompressionCodec codec) { if (codec instanceof Lz4Codec) return CompressionKind.LZ4; else if (codec instanceof SnappyCodec) return CompressionKind.SNAPPY; else if (codec instanceof ZlibCodec) return CompressionKind.ZLIB; else return CompressionKind.NONE; }
/** * Tests whether hadoop-snappy (not to be confused with other java-based snappy implementations such as jsnappy or * snappy-java) plus the native snappy libraries are available. * * @return true if hadoop-snappy is available on the classpath */ public boolean isHadoopSnappyAvailable() { ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader( getClass().getClassLoader() ); try { return SnappyCodec.isNativeCodeLoaded(); } catch ( Throwable t ) { return false; } finally { Thread.currentThread().setContextClassLoader( cl ); } }
@Override public void open() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state); logger.debug( "Opening data file with pathTmp:{} (final path will be path:{})", pathTmp, path); try { CompressionCodecName codecName = CompressionCodecName.UNCOMPRESSED; if (enableCompression) { if (SnappyCodec.isNativeCodeLoaded()) { codecName = CompressionCodecName.SNAPPY; } else { logger.warn("Compression enabled, but Snappy native code not loaded. " + "Parquet file will not be compressed."); } } avroParquetWriter = new AvroParquetWriter<E>(fileSystem.makeQualified(pathTmp), schema, codecName, DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE); } catch (IOException e) { throw new DatasetWriterException("Unable to create writer to path:" + pathTmp, e); } state = ReaderWriterState.OPEN; }