Java 类org.apache.hadoop.io.compress.SplittableCompressionCodec 实例源码
项目:hadoopoffice
文件:MapReduceExcelOutputIntegrationTest.java
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
项目:hadoopoffice
文件:MapReduceExcelInputIntegrationTest.java
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
项目:hadoopoffice
文件:HadoopFileReader.java
public InputStream openFile(Path path) throws IOException {
CompressionCodec codec=compressionCodecs.getCodec(path);
FSDataInputStream fileIn=fs.open(path);
// check if compressed
if (codec==null) { // uncompressed
LOG.debug("Reading from an uncompressed file \""+path+"\"");
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
LOG.debug("Reading from a compressed file \""+path+"\" with splittable compression codec");
long end = fs.getFileStatus(path).getLen();
return ((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
} else {
LOG.debug("Reading from a compressed file \""+path+"\" with non-splittable compression codec");
return codec.createInputStream(fileIn,decompressor);
}
}
}
项目:hadoopcryptoledger
文件:SparkBitcoinBlockCounterSparkMasterIntegrationTest.java
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(conf).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
项目:hadoopcryptoledger
文件:MapReduceBitcoinTransactionIntegrationTest.java
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
项目:hadoopcryptoledger
文件:MapReduceEthereumBlockIntegrationTest.java
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
项目:hadoopcryptoledger
文件:Spark2BitcoinBlockCounterSparkMasterIntegrationTest.java
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(conf).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
项目:hadoopcryptoledger
文件:MapReduceBitcoinBlockIntegrationTest.java
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
项目:legion
文件:JsonInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
CompressionCodec codec;
Configuration job = context.getConfiguration();
legionObjective =
ObjectiveDeserializer.deserialize(job.get("legion_objective"));
if (legionObjective.getCodecOverride() != null) {
codec = new CompressionCodecFactory(context.getConfiguration())
.getCodecByClassName(legionObjective.getCodecOverride());
} else {
codec = new CompressionCodecFactory(context.getConfiguration())
.getCodec(file);
}
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:Hive-XML-SerDe
文件:SplittableXmlInputFormat.java
private InputStream getInputStream(JobConf jobConf, FileSplit split) throws IOException, ClassNotFoundException {
FSDataInputStream fsin = null;
// open the file and seek to the start of the split
long splitStart = split.getStart();
long splitEnd = splitStart + split.getLength();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(jobConf);
fsin = fs.open(split.getPath());
fsin.seek(splitStart);
Configuration conf = new Configuration();
CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
CompressionCodec codec = compressionCodecFactory.getCodec(split.getPath());
Decompressor decompressor = CodecPool.getDecompressor(codec);
if (codec instanceof SplittableCompressionCodec) {
return ((SplittableCompressionCodec) codec).createInputStream(fsin,
decompressor,
splitStart,
splitEnd,
SplittableCompressionCodec.READ_MODE.BYBLOCK);
} else {
return codec.createInputStream(fsin, decompressor);
}
}
项目:angel
文件:BalanceInputFormat.java
protected boolean isSplitable(Configuration conf, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(conf).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop-logfile-inputformat
文件:LogfileInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop
文件:KeyValueTextInputFormat.java
protected boolean isSplitable(FileSystem fs, Path file) {
final CompressionCodec codec = compressionCodecs.getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop
文件:CombineFileInputFormat.java
protected boolean isSplitable(FileSystem fs, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(fs.getConf()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop
文件:CombineFileInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop
文件:KeyValueTextInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop
文件:TextInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:spark-util
文件:ErrorHandlingLineRecordReader.java
public void initialize(InputSplit genericSplit, TaskAttemptContext context) {
try {
FileSplit split = (FileSplit)genericSplit;
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt("mapreduce.input.linerecordreader.line.maxlength", 2147483647);
this.start = split.getStart();
this.end = this.start + split.getLength();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(job);
this.fileIn = fs.open(file);
CompressionCodec codec = (new CompressionCodecFactory(job)).getCodec(file);
if(null != codec) {
this.isCompressedInput = true;
this.decompressor = CodecPool.getDecompressor(codec);
if(codec instanceof SplittableCompressionCodec) {
SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream(this.fileIn, this.decompressor, this.start, this.end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
this.in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
this.start = cIn.getAdjustedStart();
this.end = cIn.getAdjustedEnd();
this.filePosition = cIn;
} else {
this.in = new SplitLineReader(codec.createInputStream(this.fileIn, this.decompressor), job, this.recordDelimiterBytes);
this.filePosition = this.fileIn;
}
} else {
this.fileIn.seek(this.start);
this.in = new SplitLineReader(this.fileIn, job, this.recordDelimiterBytes);
this.filePosition = this.fileIn;
}
if(this.start != 0L) {
this.start += (long)this.in.readLine(new Text(), 0, this.maxBytesToConsume(this.start));
}
this.pos = this.start;
}catch(Exception ex){
LOG.warn("Exception occurred during initialization {}", ex, ex);
}
}
项目:aliyun-oss-hadoop-fs
文件:KeyValueTextInputFormat.java
protected boolean isSplitable(FileSystem fs, Path file) {
final CompressionCodec codec = compressionCodecs.getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:aliyun-oss-hadoop-fs
文件:CombineFileInputFormat.java
protected boolean isSplitable(FileSystem fs, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(fs.getConf()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:aliyun-oss-hadoop-fs
文件:CombineFileInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:aliyun-oss-hadoop-fs
文件:KeyValueTextInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:aliyun-oss-hadoop-fs
文件:TextInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:big-c
文件:KeyValueTextInputFormat.java
protected boolean isSplitable(FileSystem fs, Path file) {
final CompressionCodec codec = compressionCodecs.getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:big-c
文件:CombineFileInputFormat.java
protected boolean isSplitable(FileSystem fs, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(fs.getConf()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:big-c
文件:CombineFileInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:big-c
文件:KeyValueTextInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:big-c
文件:TextInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoopcryptoledger
文件:AbstractEthereumRecordReader.java
/***
* Initializes readers
*
* @param split Split to be used (asssumed to be a file split)
* ϟaram context context of the job
* @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
* @throws java.lang.InterruptedException in case of thread interruption
*
*/
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit fSplit = (FileSplit)split;
// Initialize start and end of split
start = fSplit.getStart();
end = start + fSplit.getLength();
final Path file = fSplit.getPath();
codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
final FileSystem fs = file.getFileSystem(context.getConfiguration());
FSDataInputStream fileIn = fs.open(file);
// open stream
if (isCompressedInput()) { // decompress
decompressor = CodecPool.getDecompressor(codec);
if (codec instanceof SplittableCompressionCodec) {
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
ebr = new EthereumBlockReader(cIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
start = cIn.getAdjustedStart();
end = cIn.getAdjustedEnd();
filePosition = cIn; // take pos from compressed stream
} else {
ebr = new EthereumBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
filePosition = fileIn;
}
} else {
fileIn.seek(start);
ebr = new EthereumBlockReader(fileIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
filePosition = fileIn;
}
}
项目:hadoopcryptoledger
文件:AbstractBitcoinFileInputFormat.java
/**
*
* This method is experimental and derived from TextInputFormat. It is not necessary and not recommended to compress the blockchain files. Instead it is recommended to extract relevant data from the blockchain files once and store them in a format suitable for analytics (including compression), such as ORC or Parquet.
*
*/
@Override
protected boolean isSplitable(FileSystem fs, Path file) {
if (!(this.isSplitable)) {
return false;
}
final CompressionCodec codec = compressionCodecs.getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoopcryptoledger
文件:AbstractBitcoinRecordReader.java
/**
* Initializes reader
* @param split Split to use (assumed to be a file split)
* @param context context of the job
*
*
* @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
* @throws java.lang.InterruptedException in case of thread interruption
*
*/
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit fSplit = (FileSplit)split;
// Initialize start and end of split
start = fSplit.getStart();
end = start + fSplit.getLength();
final Path file = fSplit.getPath();
codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
final FileSystem fs = file.getFileSystem(context.getConfiguration());
FSDataInputStream fileIn = fs.open(file);
// open stream
if (isCompressedInput()) { // decompress
decompressor = CodecPool.getDecompressor(codec);
if (codec instanceof SplittableCompressionCodec) {
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
bbr = new BitcoinBlockReader(cIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,this.readAuxPOW);
start = cIn.getAdjustedStart();
end = cIn.getAdjustedEnd();
filePosition = cIn; // take pos from compressed stream
} else {
bbr = new BitcoinBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,readAuxPOW);
filePosition = fileIn;
}
} else {
fileIn.seek(start);
bbr = new BitcoinBlockReader(fileIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,readAuxPOW);
filePosition = fileIn;
}
// seek to block start (for the case a block overlaps a split)
try {
bbr.seekBlockStart();
} catch (BitcoinBlockReadException bbre) {
LOG.error("Error reading Bitcoin blockchhain data");
LOG.error(bbre);
}
}
项目:hadoop-2.6.0-cdh5.4.3
文件:KeyValueTextInputFormat.java
protected boolean isSplitable(FileSystem fs, Path file) {
final CompressionCodec codec = compressionCodecs.getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop-2.6.0-cdh5.4.3
文件:CombineFileInputFormat.java
protected boolean isSplitable(FileSystem fs, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(fs.getConf()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop-2.6.0-cdh5.4.3
文件:CombineFileInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop-2.6.0-cdh5.4.3
文件:KeyValueTextInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TextInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop-2.6.0-cdh5.4.3
文件:KeyValueTextInputFormat.java
protected boolean isSplitable(FileSystem fs, Path file) {
final CompressionCodec codec = compressionCodecs.getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop-2.6.0-cdh5.4.3
文件:CombineFileInputFormat.java
@Override
protected boolean isSplitable(FileSystem fs, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(fs.getConf()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop-2.6.0-cdh5.4.3
文件:CombineFileInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TextInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}