Java 类org.apache.hadoop.mapreduce.lib.input.NLineInputFormat 实例源码
项目:ViraPipe
文件:InterleaveMulti.java
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
zips.foreach( splits -> {
Path path = splits._1.getPath();
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
});
}
项目:ViraPipe
文件:InterleaveMulti.java
private static void splitFastq(FileStatus fst, String fqPath, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
Path fqpath = new Path(fqPath);
String fqname = fqpath.getName();
String[] ns = fqname.split("\\.");
//TODO: Handle also compressed files
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
splitRDD.foreach( split -> {
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
writeFastqFile(fqreader, new Configuration(), splitDir + "/split_" + split.getStart() + "." + ns[1]);
});
}
项目:ViraPipe
文件:Decompress.java
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
zips.foreach( splits -> {
Path path = splits._1.getPath();
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
});
}
项目:ViraPipe
文件:DecompressInterleave.java
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
String[] ns = fst.getPath().getName().split("\\.");
//TODO: Handle also compressed files
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
zips.foreach( splits -> {
Path path = splits._1.getPath();
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir, path.getParent().getName()+"_"+splits._1.getStart()+".fq");
});
}
项目:aliyun-oss-hadoop-fs
文件:TestMapreduceConfigFields.java
@SuppressWarnings("deprecation")
@Override
public void initializeMemberVariables() {
xmlFilename = new String("mapred-default.xml");
configurationClasses = new Class[] { MRJobConfig.class, MRConfig.class,
JHAdminConfig.class, ShuffleHandler.class, FileOutputFormat.class,
FileInputFormat.class, Job.class, NLineInputFormat.class,
JobConf.class, FileOutputCommitter.class };
// Initialize used variables
configurationPropsToSkipCompare = new HashSet<String>();
// Set error modes
errorIfMissingConfigProps = true;
errorIfMissingXmlProps = false;
// Ignore deprecated MR1 properties in JobConf
configurationPropsToSkipCompare
.add(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY);
configurationPropsToSkipCompare
.add(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY);
}
项目:mrgeo
文件:DelimitedVectorInputFormat.java
public static void setupJob(Job job, int minFeaturesPerSplit, long featureCount)
{
if (minFeaturesPerSplit > 0)
{
if (featureCount < 0)
{
throw new IllegalArgumentException("Expected a feature count");
}
int maxMapTasks = job.getConfiguration().getInt("mapred.tasktracker.map.tasks.maximum", -1);
if (maxMapTasks > 0)
{
int featuresPerSplit = (int) (featureCount / maxMapTasks);
if (featuresPerSplit < minFeaturesPerSplit)
{
featuresPerSplit = minFeaturesPerSplit;
}
job.getConfiguration().setBoolean(USE_NLINE_FORMAT, true);
NLineInputFormat.setNumLinesPerSplit(job, featuresPerSplit);
}
}
}
项目:lembos
文件:NLineInputFormatWrap.java
/**
* Java wrapper for {@link NLineInputFormat#getNumLinesPerSplit(org.apache.hadoop.mapreduce.JobContext)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function being called
*
* @return the number of lines per split
*/
@JSStaticFunction
public static Object getNumLinesPerSplit(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
final Object arg0 = args.length >= 1 ? args[0] : Undefined.instance;
if (args.length < 1) {
throw Utils.makeError(ctx, thisObj, LembosMessages.ONE_ARG_EXPECTED);
} else if (!JavaScriptUtils.isDefined(arg0)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_REQUIRED);
} else if (!(arg0 instanceof JobWrap)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_MUST_BE_JOB);
}
return NLineInputFormat.getNumLinesPerSplit(((JobWrap)arg0).getJob());
}
项目:lembos
文件:NLineInputFormatWrap.java
/**
* Java wrapper for {@link NLineInputFormat#setNumLinesPerSplit(org.apache.hadoop.mapreduce.Job, int)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function called (unused)
*/
@JSStaticFunction
public static void setNumLinesPerSplit(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
final Object arg0 = args.length >= 1 ? args[0] : Undefined.instance;
final Object arg1 = args.length >= 2 ? args[1] : Undefined.instance;
if (args.length < 2) {
throw Utils.makeError(ctx, thisObj, LembosMessages.TWO_ARGS_EXPECTED);
} else if (!JavaScriptUtils.isDefined(arg0)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_REQUIRED);
} else if (!JavaScriptUtils.isDefined(arg1)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.SECOND_ARG_REQUIRED);
} else if (!(arg0 instanceof JobWrap)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_MUST_BE_JOB);
} else if (!(arg1 instanceof Number)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.SECOND_ARG_ARG_MUST_BE_NUM);
}
NLineInputFormat.setNumLinesPerSplit(((JobWrap)arg0).getJob(), JavaScriptUtils.fromNumber(arg1).intValue());
}
项目:ViraPipe
文件:Decompress.java
private static void splitFastq(FileStatus fst, String fqPath, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
Path fqpath = new Path(fqPath);
String fqname = fqpath.getName();
String[] ns = fqname.split("\\.");
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
splitRDD.foreach( split -> {
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
writeFastqFile(fqreader, new Configuration(), splitDir + "/split_" + split.getStart() + "." + ns[1]);
});
}
项目:ViraPipe
文件:Interleave.java
private static void splitFastq(FileStatus fst, String fqPath, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
Path fqpath = new Path(fqPath);
String fqname = fqpath.getName();
String[] ns = fqname.split("\\.");
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
splitRDD.foreach( split -> {
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
writeFastqFile(fqreader, new Configuration(), splitDir + "/split_" + split.getStart() + "." + ns[1]);
});
}
项目:ViraPipe
文件:Interleave.java
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
zips.foreach( splits -> {
Path path = splits._1.getPath();
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
});
}
项目:dkpro-c4corpus
文件:Phase3Step4LocalDeDuplication.java
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(Phase3Step4LocalDeDuplication.class);
job.setJobName(Phase3Step4LocalDeDuplication.class.getName());
// paths
String inputPath = args[0];
// text files of ids to be deleted
String outputPath = args[1];
// input: reading max N lines for each mapper
job.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.addInputPath(job, new Path(inputPath));
job.getConfiguration().setInt("mapreduce.input.lineinputformat.linespermap", LINES);
// mapper
job.setMapperClass(LocalGreedyDeDuplicationMapper.class);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
// reducer
job.setReducerClass(IDCollectorReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
项目:ldbc_snb_datagen
文件:HadoopPersonGenerator.java
/**
* Generates a Person hadoop sequence file containing key-value paiers
* where the key is the person id and the value is the person itself.
*
* @param outputFileName The name of the file to store the persons.
* @throws Exception
*/
public void run(String outputFileName, String postKeySetterName) throws Exception {
String hadoopDir = new String(conf.get("ldbc.snb.datagen.serializer.hadoopDir"));
String tempFile = hadoopDir + "/mrInputFile";
FileSystem dfs = FileSystem.get(conf);
dfs.delete(new Path(tempFile), true);
writeToOutputFile(tempFile, Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numThreads")), conf);
int numThreads = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numThreads"));
conf.setInt("mapreduce.input.lineinputformat.linespermap", 1);
conf.set("postKeySetterName", postKeySetterName);
Job job = Job.getInstance(conf, "SIB Generate Users & 1st Dimension");
job.setMapOutputKeyClass(TupleKey.class);
job.setMapOutputValueClass(Person.class);
job.setOutputKeyClass(TupleKey.class);
job.setOutputValueClass(Person.class);
job.setJarByClass(HadoopPersonGeneratorMapper.class);
job.setMapperClass(HadoopPersonGeneratorMapper.class);
job.setReducerClass(HadoopPersonGeneratorReducer.class);
job.setNumReduceTasks(numThreads);
job.setInputFormatClass(NLineInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(tempFile));
FileOutputFormat.setOutputPath(job, new Path(outputFileName));
if (!job.waitForCompletion(true)) {
throw new Exception();
}
}
项目:HIndex
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf, opts);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.setInputPaths(job, inputDir);
// this is default, but be explicit about it just in case.
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
DescriptiveStatistics.class, // commons-math
ObjectMapper.class); // jackson-mapper-asl
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:DISH
文件:ImageSearcher.java
/**
* Run method called for starting a MapReduce Job
*/
public int run(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
checkRequiredPaths();
long startTime = 0;
if(measureTime)
startTime = System.nanoTime();
Configuration conf = getConf();
Job job = Job.getInstance(conf, "ImageSearcher");
job.setJarByClass(ImageSearcher.class);
job.setMapperClass(ImageSearchMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(ImageDistanceMap.class);
job.setReducerClass(ImageSearchReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(NLineInputFormat.class);
job.setNumReduceTasks(1);
FileInputFormat.addInputPath(job, new Path(conf.get("ImageFeatures")));
FileOutputFormat.setOutputPath(job, new Path(conf.get("Output")));
boolean res = job.waitForCompletion(true);
if(measureTime) {
long elapsedTime = System.nanoTime() - startTime;
System.out.println("== MapReduce Execution Time: " + (double)elapsedTime / 1000000000.0 + "s ==");
}
return res ? 0 : 1;
}
项目:hops
文件:TestMapreduceConfigFields.java
@Override
public void initializeMemberVariables() {
xmlFilename = new String("mapred-default.xml");
configurationClasses = new Class[] { MRJobConfig.class, MRConfig.class,
JHAdminConfig.class, ShuffleHandler.class, FileOutputFormat.class,
FileInputFormat.class, Job.class, NLineInputFormat.class,
JobConf.class, FileOutputCommitter.class };
// Initialize used variables
configurationPropsToSkipCompare = new HashSet<String>();
xmlPropsToSkipCompare = new HashSet<String>();
// Set error modes
errorIfMissingConfigProps = true;
errorIfMissingXmlProps = false;
// Ignore deprecated MR1 properties in JobConf
configurationPropsToSkipCompare
.add(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY);
configurationPropsToSkipCompare
.add(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY);
// Obsolete entries listed in MAPREDUCE-6057 were removed from trunk
// but not removed from branch-2.
xmlPropsToSkipCompare.add("map.sort.class");
xmlPropsToSkipCompare.add("mapreduce.local.clientfactory.class.name");
xmlPropsToSkipCompare.add("mapreduce.jobtracker.system.dir");
xmlPropsToSkipCompare.add("mapreduce.jobtracker.staging.root.dir");
}
项目:PyroDB
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf, opts);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.setInputPaths(job, inputDir);
// this is default, but be explicit about it just in case.
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
Histogram.class, // yammer metrics
ObjectMapper.class); // jackson-mapper-asl
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:eoulsan
文件:SAM2BAMHadoopModule.java
/**
* Create the index Hadoop job.
* @param conf the Hadoop configuration
* @param submitFile the path to the submit file
* @param jobDescription the job description
* @return a Job object
* @throws IOException if an error occurs while creating the index
*/
private Job createIndexJob(final Configuration conf,
final DataFile submitFile, final String jobDescription)
throws IOException {
final Configuration jobConf = new Configuration(conf);
// Set one task per map
jobConf.set("mapreduce.input.lineinputformat.linespermap", "" + 1);
// Set Job name
// Create the job and its name
final Job job = Job.getInstance(jobConf, jobDescription);
// Set the jar
job.setJarByClass(IndexerMapper.class);
// Set input path
FileInputFormat.addInputPath(job, new Path(submitFile.getSource()));
job.setInputFormatClass(NLineInputFormat.class);
// Set the Mapper class
job.setMapperClass(IndexerMapper.class);
// Set the output key class
job.setOutputKeyClass(NullWritable.class);
// Set the output value class
job.setOutputValueClass(NullWritable.class);
// Set the output format
job.setOutputFormatClass(NullOutputFormat.class);
// Set the number of reducers
job.setNumReduceTasks(0);
return job;
}
项目:eggshell
文件:Egg.java
/** Defines how to read data from a file into the Mapper instances.
* This method sets the input format to the
* 'NLineInputFormat' implementation.
* @return The 'this' object
*/
@JSFunction
public Egg nLineInputFormat ()
{
job.setInputFormatClass(NLineInputFormat.class);
return this;
}
项目:flint
文件:FlintHadoop.java
/**
* Main method
* @param args the first and only expected item in the String[] is the path to the textfile
* containing a list of paths of files to be examined, each path on a single line.
* @throws IOException
* @throws ClassNotFoundException
* @throws InterruptedException
*/
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// set up the configuration
// NOTE: apparently it's important to set the conf parameters prior to instantiating
// the job with this config, which is somewhere cloned.
Configuration conf = new Configuration();
// ***** list of config parameters, verified to work *****
conf.setInt(gHadoopVersion.linesPerMapKey(), 50);
// 60 * 60 * 1000ms == 1h; this is very long but necessary for some files :-(
conf.set(gHadoopVersion.taskTimeoutKey(), Integer.toString(60 * 60 * 1000));
// set up the job
// String to use for name and output folder in HDFS
String name = "FLintHadoop_"+System.currentTimeMillis();
Job job = new Job(conf, name);
job.setJarByClass(FlintHadoop.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(name));
//set the mapper to this class' mapper
job.setMapperClass(FlintMap.class);
job.setReducerClass(FlintReduce.class);
//this input format should split the input by one line per map by default.
job.setInputFormatClass(NLineInputFormat.class);
//sets how the output is written cf. OutputFormat
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(CheckResultText.class);
// TODO: shouldn't the number of allowed tasks be set in the config on the cluster,
// as it's sensitive to the hardware setup rather than to this code?
job.setNumReduceTasks(28);
job.waitForCompletion(true);
}
项目:ViraPipe
文件:HDFSWriter.java
private static JavaPairRDD<Text, SequencedFragment> interleaveReads(String fastq, String fastq2, int splitlen, JavaSparkContext sc) throws IOException {
FileSystem fs = FileSystem.get(new Configuration());
FileStatus fst = fs.getFileStatus(new Path(fastq));
FileStatus fst2 = fs.getFileStatus(new Path(fastq2));
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
return zips.flatMapToPair( splits -> {
FastqInputFormat.FastqRecordReader fqreader = new FastqInputFormat.FastqRecordReader(new Configuration(), splits._1);
FastqInputFormat.FastqRecordReader fqreader2 = new FastqInputFormat.FastqRecordReader(new Configuration(), splits._2);
ArrayList<Tuple2<Text, SequencedFragment>> reads = new ArrayList<Tuple2<Text, SequencedFragment>>();
while (fqreader.nextKeyValue()) {
String key = fqreader.getCurrentKey().toString();
String[] keysplit = key.split(" ");
key = keysplit[0];
SequencedFragment sf = new SequencedFragment();
sf.setQuality(new Text(fqreader.getCurrentValue().getQuality().toString()));
sf.setSequence(new Text(fqreader.getCurrentValue().getSequence().toString()));
if (fqreader2.nextKeyValue()) {
String key2 = fqreader2.getCurrentKey().toString();
String[] keysplit2 = key2.split(" ");
key2 = keysplit2[0];
//key2 = key2.replace(" 2:N:0:1","/2");
SequencedFragment sf2 = new SequencedFragment();
sf2.setQuality(new Text(fqreader2.getCurrentValue().getQuality().toString()));
sf2.setSequence(new Text(fqreader2.getCurrentValue().getSequence().toString()));
reads.add(new Tuple2<Text, SequencedFragment>(new Text(key), sf));
reads.add(new Tuple2<Text, SequencedFragment>(new Text(key2), sf2));
}
}
return reads.iterator();
});
}
项目:rdf2x
文件:ElephasQuadParser.java
@Override
public JavaRDD<Quad> parseQuads(String path) {
Configuration conf = new Configuration();
Integer batchSize = config.getBatchSize();
conf.set(NLineInputFormat.LINES_PER_MAP, batchSize.toString());
if (config.getErrorHandling() == ParseErrorHandling.Throw) {
conf.set(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, "false");
} else {
conf.set(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, "true");
}
Boolean isLineBased = config.getLineBasedFormat();
if (isLineBased == null) {
isLineBased = guessIsLineBasedFormat(path);
}
JavaRDD<Quad> quads;
Integer partitions = config.getRepartition();
if (isLineBased) {
log.info("Parsing RDF in parallel with batch size: {}", batchSize);
quads = sc.newAPIHadoopFile(path,
NQuadsInputFormat.class,
LongWritable.class, // position
QuadWritable.class, // value
conf).values().map(QuadWritable::get);
} else {
// let Jena guess the format, load whole files
log.info("Input format is not line based, parsing RDF by Master node only.");
quads = sc.newAPIHadoopFile(path,
TriplesOrQuadsInputFormat.class,
LongWritable.class, // position
QuadWritable.class, // value
conf).values().map(QuadWritable::get);
if (partitions == null) {
log.warn("Reading non-line based formats by master node only, consider setting --parsing.repartition to redistribute work to other nodes.");
}
}
if (partitions != null) {
log.info("Distributing workload, repartitioning into {} partitions", partitions);
quads = quads.repartition(partitions);
}
final List<String> acceptedLanguages = config.getAcceptedLanguages();
// if only some languages are accepted
if (!acceptedLanguages.isEmpty()) {
// filter out literals of unsupported languages
quads = quads.filter(quad ->
!quad.getObject().isLiteral() ||
quad.getObject().getLiteralLanguage() == null ||
quad.getObject().getLiteralLanguage().isEmpty() ||
acceptedLanguages.contains(quad.getObject().getLiteralLanguage())
);
}
return quads;
}
项目:Halyard
文件:HalyardBulkUpdate.java
@Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("Usage: bulkupdate [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] <input_file_with_SPARQL_queries> <output_path> <table_name>");
return -1;
}
TableMapReduceUtil.addDependencyJars(getConf(),
HalyardExport.class,
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class,
HTable.class,
HBaseConfiguration.class,
AuthenticationProtos.class,
Trace.class,
Gauge.class);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setStrings(TABLE_NAME_PROPERTY, args[2]);
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + args[1] + " -> " + args[2]);
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setJarByClass(HalyardBulkUpdate.class);
job.setMapperClass(SPARQLMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.setInputFormatClass(NLineInputFormat.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], false, 0)) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
LOG.info("Bulk Update Completed..");
return 0;
}
}
return -1;
}
项目:Gobblin
文件:MRJobLauncher.java
/**
* Prepare the Hadoop MR job, including configuring the job and setting up the input/output paths.
*/
private Path prepareHadoopJob(List<WorkUnit> workUnits) throws IOException {
TimingEvent mrJobSetupTimer = this.eventSubmitter.getTimingEvent(TimingEventNames.RunJobTimings.MR_JOB_SETUP);
this.job.setJarByClass(MRJobLauncher.class);
this.job.setMapperClass(TaskRunner.class);
// The job is mapper-only
this.job.setNumReduceTasks(0);
this.job.setInputFormatClass(NLineInputFormat.class);
this.job.setOutputFormatClass(GobblinOutputFormat.class);
this.job.setMapOutputKeyClass(NullWritable.class);
this.job.setMapOutputValueClass(NullWritable.class);
// Turn off speculative execution
this.job.setSpeculativeExecution(false);
// Job input path is where input work unit files are stored
Path jobInputPath = new Path(this.mrJobDir, INPUT_DIR_NAME);
// Prepare job input
Path jobInputFile = prepareJobInput(jobInputPath, workUnits);
NLineInputFormat.addInputPath(this.job, jobInputFile);
// Job output path is where serialized task states are stored
Path jobOutputPath = new Path(this.mrJobDir, OUTPUT_DIR_NAME);
SequenceFileOutputFormat.setOutputPath(this.job, jobOutputPath);
// Serialize source state to a file which will be picked up by the mappers
Path jobStateFilePath = new Path(this.mrJobDir, JOB_STATE_FILE_NAME);
SerializationUtils.serializeState(this.fs, jobStateFilePath, this.jobContext.getJobState());
job.getConfiguration().set(ConfigurationKeys.JOB_STATE_FILE_PATH_KEY, jobStateFilePath.toString());
if (this.jobProps.containsKey(ConfigurationKeys.MR_JOB_MAX_MAPPERS_KEY)) {
// When there is a limit on the number of mappers, each mapper may run
// multiple tasks if the total number of tasks is larger than the limit.
int maxMappers = Integer.parseInt(this.jobProps.getProperty(ConfigurationKeys.MR_JOB_MAX_MAPPERS_KEY));
if (workUnits.size() > maxMappers) {
int numTasksPerMapper =
workUnits.size() % maxMappers == 0 ? workUnits.size() / maxMappers : workUnits.size() / maxMappers + 1;
NLineInputFormat.setNumLinesPerSplit(this.job, numTasksPerMapper);
}
}
mrJobSetupTimer.stop();
return jobOutputPath;
}
项目:mondo-integration
文件:CloudAtlThriftServlet.java
@Override
public String launch(String transformation, ModelSpec source, ModelSpec target) throws InvalidTransformation, InvalidModelSpec, TException {
try {
Job job = Job.getInstance(getConfiguration(), ATLMRMaster.DEFAULT_JOB_NAME);
Configuration conf = job.getConfiguration();
conf.set("mapreduce.app-submission.cross-platform", "true");
// Configure classes
Bundle bundle = Platform.getBundle(CloudAtlServletPlugin.PLUGIN_ID);
final File fJar = new File(FileLocator.toFileURL(bundle.getResource("libs/atl-mr.jar")).toURI());
job.setJar(fJar.getAbsolutePath());
job.setMapperClass(ATLMRMapper.class);
job.setReducerClass(ATLMRReducer.class);
job.setInputFormatClass(NLineInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(BytesWritable.class);
job.setNumReduceTasks(1);
// Configure MapReduce input/outputs
ResourceSet resourceSet = new ResourceSetImpl();
ATLMRUtils.configureRegistry(conf);
Builder builder = new RecordBuilder.Builder(
URI.createURI(source.getUri()),
Arrays.asList(new URI[]{ URI.createURI(source.getMetamodelUris().get(0)) } ));
Path recordsPath = new Path("/tmp/" + UUID.randomUUID().toString() + ".rec");
FileSystem recordsFileSystem = FileSystem.get(recordsPath.toUri(), conf);
builder.save(recordsFileSystem.create(recordsPath));
FileInputFormat.setInputPaths(job, recordsPath);
String timestamp = new SimpleDateFormat("yyyyMMddhhmm").format(new Date());
String outDirName = "atlmr-out-" + timestamp + "-" + UUID.randomUUID();
FileOutputFormat.setOutputPath(job, new Path(job.getWorkingDirectory().suffix(Path.SEPARATOR + outDirName).toUri()));
// Configure records per map
InputStream inputStream = recordsFileSystem.open(recordsPath);
long linesPerMap = (long) Math.ceil((double) ATLMRMaster.countLines(inputStream) / 1);
job.getConfiguration().setLong(NLineInputFormat.LINES_PER_MAP, linesPerMap);
recordsFileSystem.close();
// Configure ATL related inputs/outputs
job.getConfiguration().set(ATLMRMaster.TRANSFORMATION, transformation);
job.getConfiguration().set(ATLMRMaster.SOURCE_METAMODEL, source.getMetamodelUris().get(0));
job.getConfiguration().set(ATLMRMaster.TARGET_METAMODEL, target.getMetamodelUris().get(0));
job.getConfiguration().set(ATLMRMaster.INPUT_MODEL, source.getUri());
job.getConfiguration().set(ATLMRMaster.OUTPUT_MODEL, target.getUri());
// Copy libraries to populate the job's classpath
IPath path = new org.eclipse.core.runtime.Path("libs");
URL fileURL = FileLocator.find(bundle, path, null);
String localJarsDir = new File(FileLocator.toFileURL(fileURL).toURI()).getAbsolutePath();
String hdfsJarsDir = "/temp/hadoop/atlrm/libs";
// TODO: This JobHelper needs to be updated to the new API
JobHelper.copyLocalJarsToHdfs(localJarsDir, hdfsJarsDir, conf);
JobHelper.addHdfsJarsToDistributedCache(hdfsJarsDir, job);
Logger.getGlobal().log(Level.INFO, "Sending Job");
job.submit();
Logger.getGlobal().log(Level.INFO, "Job sent");
return job.getJobID().toString();
} catch (IOException | InterruptedException | ClassNotFoundException | URISyntaxException e) {
throw new TException(e);
}
}
项目:search
文件:MapReduceIndexerTool.java
/**
* To uniformly spread load across all mappers we randomize fullInputList
* with a separate small Mapper & Reducer preprocessing step. This way
* each input line ends up on a random position in the output file list.
* Each mapper indexes a disjoint consecutive set of files such that each
* set has roughly the same size, at least from a probabilistic
* perspective.
*
* For example an input file with the following input list of URLs:
*
* A
* B
* C
* D
*
* might be randomized into the following output list of URLs:
*
* C
* A
* D
* B
*
* The implementation sorts the list of lines by randomly generated numbers.
*/
private Job randomizeManyInputFiles(Configuration baseConfig, Path fullInputList, Path outputStep2Dir, int numLinesPerSplit)
throws IOException {
Job job2 = Job.getInstance(baseConfig);
job2.setJarByClass(getClass());
job2.setJobName(getClass().getName() + "/" + Utils.getShortClassName(LineRandomizerMapper.class));
job2.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.addInputPath(job2, fullInputList);
NLineInputFormat.setNumLinesPerSplit(job2, numLinesPerSplit);
job2.setMapperClass(LineRandomizerMapper.class);
job2.setReducerClass(LineRandomizerReducer.class);
job2.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job2, outputStep2Dir);
job2.setNumReduceTasks(1);
job2.setOutputKeyClass(LongWritable.class);
job2.setOutputValueClass(Text.class);
return job2;
}
项目:mondo-collab-framework
文件:CloudAtlThriftServlet.java
@Override
public String launch(String transformation, ModelSpec source, ModelSpec target) throws InvalidTransformation, InvalidModelSpec, TException {
try {
Job job = Job.getInstance(getConfiguration(), ATLMRMaster.DEFAULT_JOB_NAME);
Configuration conf = job.getConfiguration();
conf.set("mapreduce.app-submission.cross-platform", "true");
// Configure classes
Bundle bundle = Platform.getBundle(CloudAtlServletPlugin.PLUGIN_ID);
final File fJar = new File(FileLocator.toFileURL(bundle.getResource("libs/atl-mr.jar")).toURI());
job.setJar(fJar.getAbsolutePath());
job.setMapperClass(ATLMRMapper.class);
job.setReducerClass(ATLMRReducer.class);
job.setInputFormatClass(NLineInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(BytesWritable.class);
job.setNumReduceTasks(1);
// Configure MapReduce input/outputs
ResourceSet resourceSet = new ResourceSetImpl();
ATLMRUtils.configureRegistry(conf);
Builder builder = new RecordBuilder.Builder(
URI.createURI(source.getUri()),
Arrays.asList(new URI[]{ URI.createURI(source.getMetamodelUris().get(0)) } ));
Path recordsPath = new Path("/tmp/" + UUID.randomUUID().toString() + ".rec");
FileSystem recordsFileSystem = FileSystem.get(recordsPath.toUri(), conf);
builder.save(recordsFileSystem.create(recordsPath));
FileInputFormat.setInputPaths(job, recordsPath);
String timestamp = new SimpleDateFormat("yyyyMMddhhmm").format(new Date());
String outDirName = "atlmr-out-" + timestamp + "-" + UUID.randomUUID();
FileOutputFormat.setOutputPath(job, new Path(job.getWorkingDirectory().suffix(Path.SEPARATOR + outDirName).toUri()));
// Configure records per map
InputStream inputStream = recordsFileSystem.open(recordsPath);
long linesPerMap = (long) Math.ceil((double) ATLMRMaster.countLines(inputStream) / 1);
job.getConfiguration().setLong(NLineInputFormat.LINES_PER_MAP, linesPerMap);
recordsFileSystem.close();
// Configure ATL related inputs/outputs
job.getConfiguration().set(ATLMRMaster.TRANSFORMATION, transformation);
job.getConfiguration().set(ATLMRMaster.SOURCE_METAMODEL, source.getMetamodelUris().get(0));
job.getConfiguration().set(ATLMRMaster.TARGET_METAMODEL, target.getMetamodelUris().get(0));
job.getConfiguration().set(ATLMRMaster.INPUT_MODEL, source.getUri());
job.getConfiguration().set(ATLMRMaster.OUTPUT_MODEL, target.getUri());
// Copy libraries to populate the job's classpath
IPath path = new org.eclipse.core.runtime.Path("libs");
URL fileURL = FileLocator.find(bundle, path, null);
String localJarsDir = new File(FileLocator.toFileURL(fileURL).toURI()).getAbsolutePath();
String hdfsJarsDir = "/temp/hadoop/atlrm/libs";
// TODO: This JobHelper needs to be updated to the new API
JobHelper.copyLocalJarsToHdfs(localJarsDir, hdfsJarsDir, conf);
JobHelper.addHdfsJarsToDistributedCache(hdfsJarsDir, job);
Logger.getGlobal().log(Level.INFO, "Sending Job");
job.submit();
Logger.getGlobal().log(Level.INFO, "Job sent");
return job.getJobID().toString();
} catch (IOException | InterruptedException | ClassNotFoundException | URISyntaxException e) {
throw new TException(e);
}
}
项目:read-open-source-code
文件:MapReduceIndexerTool.java
/**
* To uniformly spread load across all mappers we randomize fullInputList
* with a separate small Mapper & Reducer preprocessing step. This way
* each input line ends up on a random position in the output file list.
* Each mapper indexes a disjoint consecutive set of files such that each
* set has roughly the same size, at least from a probabilistic
* perspective.
*
* For example an input file with the following input list of URLs:
*
* A
* B
* C
* D
*
* might be randomized into the following output list of URLs:
*
* C
* A
* D
* B
*
* The implementation sorts the list of lines by randomly generated numbers.
*/
private Job randomizeManyInputFiles(Configuration baseConfig, Path fullInputList, Path outputStep2Dir, int numLinesPerSplit)
throws IOException {
Job job2 = Job.getInstance(baseConfig);
job2.setJarByClass(getClass());
job2.setJobName(getClass().getName() + "/" + Utils.getShortClassName(LineRandomizerMapper.class));
job2.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.addInputPath(job2, fullInputList);
NLineInputFormat.setNumLinesPerSplit(job2, numLinesPerSplit);
job2.setMapperClass(LineRandomizerMapper.class);
job2.setReducerClass(LineRandomizerReducer.class);
job2.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job2, outputStep2Dir);
job2.setNumReduceTasks(1);
job2.setOutputKeyClass(LongWritable.class);
job2.setOutputValueClass(Text.class);
return job2;
}
项目:mrgeo
文件:DelimitedVectorInputFormat.java
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException
{
boolean useNLineFormat = context.getConfiguration().getBoolean(USE_NLINE_FORMAT, false);
if (useNLineFormat)
{
List<InputSplit> splits = new NLineInputFormat().getSplits(context);
// This is a workaround to what appears to be a bug in in how NLineInputFormat
// computes its splits. When there are multiple splits in a file, it seems
// the start position in the last split is off by one. Note that this corrective
// code needs to check the last split for each different file that appears
// in the list of splits.
for (int index = 2; index < splits.size(); index++)
{
FileSplit previousSplit = (FileSplit) splits.get(index - 1);
FileSplit currSplit = (FileSplit) splits.get(index);
// If this index is the last split, or we've moved on to splits from a different
// file, then we need to adjust the last split for that file.
int lastFileIndex = -1;
if (index == splits.size() - 1)
{
lastFileIndex = index;
}
else if (!currSplit.getPath().equals(previousSplit.getPath()))
{
lastFileIndex = index - 1;
}
if (lastFileIndex >= 2)
{
FileSplit lastFileSplit = (FileSplit) splits.get(lastFileIndex);
FileSplit priorSplit = (FileSplit) splits.get(lastFileIndex - 1);
if (lastFileSplit.getPath().equals(priorSplit.getPath()))
{
if (priorSplit.getPath().equals(lastFileSplit.getPath()) &&
priorSplit.getStart() + priorSplit.getLength() < lastFileSplit.getStart())
{
// Adjust the start of previous split
FileSplit replacement = new FileSplit(lastFileSplit.getPath(),
priorSplit.getStart() + priorSplit.getLength(),
lastFileSplit.getLength() + 1,
lastFileSplit.getLocations());
log.info("Replacing split: " + lastFileSplit);
log.info(" With split: " + replacement);
splits.set(lastFileIndex, replacement);
}
}
}
}
return splits;
}
else
{
return new TextInputFormat().getSplits(context);
}
}
项目:eoulsan
文件:HadoopCompatibleTaskScheduler.java
private Job createHadoopJob(final Configuration conf,
final DataFile submitFile, final int requiredMemory,
final String jobDescription) throws IOException {
final Configuration jobConf = new Configuration(conf);
// Set one task per map
jobConf.set("mapreduce.input.lineinputformat.linespermap", "" + 1);
if (requiredMemory > 0) {
// Set the memory required by the reads mapper
jobConf.set("mapreduce.map.memory.mb", "" + requiredMemory);
int jvmMemory = requiredMemory - 128;
if (jvmMemory <= 0) {
jvmMemory = requiredMemory;
}
// Set the memory required by JVM
jobConf.set("mapreduce.map.java.opts", "-Xmx" + jvmMemory + "M");
}
// Set Job name
// Create the job and its name
final Job job = Job.getInstance(jobConf, jobDescription);
// Set the jar
job.setJarByClass(HadoopCompatibleTaskScheduler.class);
// Set input path
FileInputFormat.addInputPath(job, new Path(submitFile.getSource()));
job.setInputFormatClass(NLineInputFormat.class);
// Set the Mapper class
job.setMapperClass(HadoopCompatibleMapper.class);
// Set the output key class
job.setOutputKeyClass(NullWritable.class);
// Set the output value class
job.setOutputValueClass(NullWritable.class);
// Set the output format
job.setOutputFormatClass(NullOutputFormat.class);
// Set the number of reducers
job.setNumReduceTasks(0);
return job;
}
项目:ViraPipe
文件:DecompressInterleave.java
private static void splitFastq(FileStatus fst, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
//TODO: Handle also compressed files
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, new Configuration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
splitRDD.foreach( split -> {
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), split);
writeFastqFile(fqreader, new Configuration(), splitDir + "/" + split.getPath().getName()+"_"+split.getStart() + ".fq");
});
}
项目:imputationserver
文件:ImputationJobMinimac3.java
@Override
public void setupJob(Job job) {
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setMapperClass(ImputationMapperMinimac3.class);
job.setInputFormatClass(NLineInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setNumReduceTasks(0);
}
项目:lembos
文件:NLineInputFormatWrap.java
/**
* Java wrapper for
* {@link NLineInputFormat#addInputPath(org.apache.hadoop.mapreduce.Job, org.apache.hadoop.fs.Path)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function being called
*/
@JSStaticFunction
public static void addInputPath(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
FileInputFormatHelper.addInputPath(NLineInputFormat.class, ctx, thisObj, args);
}
项目:lembos
文件:NLineInputFormatWrap.java
/**
* Java wrapper for {@link NLineInputFormat#addInputPaths(org.apache.hadoop.mapreduce.Job, String)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function being called
*/
@JSStaticFunction
public static void addInputPaths(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
FileInputFormatHelper.addInputPaths(NLineInputFormat.class, ctx, thisObj, args);
}
项目:lembos
文件:NLineInputFormatWrap.java
/**
* Java wrapper for {@link NLineInputFormat#getInputPathFilter(org.apache.hadoop.mapreduce.JobContext)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function being called
*
* @return class name for the input path filter or undefined
*/
@JSStaticFunction
public static Object getInputPathFilter(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
return FileInputFormatHelper.getInputPathFilter(NLineInputFormat.class, ctx, thisObj, args);
}
项目:lembos
文件:NLineInputFormatWrap.java
/**
* Java wrapper for {@link NLineInputFormat#getInputPaths(org.apache.hadoop.mapreduce.JobContext)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function being called
*
* @return array of input paths
*/
@JSStaticFunction
public static Object getInputPaths(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
return FileInputFormatHelper.getInputPaths(NLineInputFormat.class, ctx, thisObj, args);
}
项目:lembos
文件:NLineInputFormatWrap.java
/**
* Java wrapper for {@link NLineInputFormat#getMaxSplitSize(org.apache.hadoop.mapreduce.JobContext)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function being called
*
* @return the max split size
*/
@JSStaticFunction
public static Object getMaxSplitSize(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
return FileInputFormatHelper.getMaxSplitSize(NLineInputFormat.class, ctx, thisObj, args);
}
项目:lembos
文件:NLineInputFormatWrap.java
/**
* Java wrapper for {@link NLineInputFormat#getMinSplitSize(org.apache.hadoop.mapreduce.JobContext)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function being called
*
* @return the max split size
*/
@JSStaticFunction
public static Object getMinSplitSize(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
return FileInputFormatHelper.getMinSplitSize(NLineInputFormat.class, ctx, thisObj, args);
}
项目:lembos
文件:NLineInputFormatWrap.java
/**
* Wraps {@link NLineInputFormat#setInputPathFilter(org.apache.hadoop.mapreduce.Job, Class)}.
*
* @param ctx the JavaScript context (unused)
* @param thisObj the 'this' object of the caller
* @param args the arguments for the call
* @param func the function called (unused)
*/
@JSStaticFunction
public static void setInputPathFilter(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
FileInputFormatHelper.setInputPathFilter(NLineInputFormat.class, ctx, thisObj, args);
}
项目:lembos
文件:NLineInputFormatWrap.java
/**
* Java wrapper for
* {@link NLineInputFormat#setInputPaths(org.apache.hadoop.mapreduce.Job, org.apache.hadoop.fs.Path...)} and
* {@link NLineInputFormat#setInputPaths(org.apache.hadoop.mapreduce.Job, String)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function being called
*/
@JSStaticFunction
public static void setInputPaths(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
FileInputFormatHelper.setInputPaths(NLineInputFormat.class, ctx, thisObj, args);
}