Java 类org.apache.hadoop.mapreduce.lib.input.MultipleInputs 实例源码
项目:mapreduce-samples
文件:Multiplication.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(Multiplication.class);
ChainMapper.addMapper(job, CooccurrenceMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf);
ChainMapper.addMapper(job, RatingMapper.class, Text.class, Text.class, Text.class, Text.class, conf);
job.setMapperClass(CooccurrenceMapper.class);
job.setMapperClass(RatingMapper.class);
job.setReducerClass(MultiplicationReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, CooccurrenceMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, RatingMapper.class);
TextOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
项目:mapreduce-samples
文件:UnitMultiplication.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(UnitMultiplication.class);
ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
job.setReducerClass(MultiplicationReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
项目:mapreduce-samples
文件:UnitMultiplication.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.setFloat("beta", Float.parseFloat(args[3]));
Job job = Job.getInstance(conf);
job.setJarByClass(UnitMultiplication.class);
ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
job.setReducerClass(MultiplicationReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
项目:mapreduce-samples
文件:UnitSum.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.setFloat("beta", Float.parseFloat(args[3]));
Job job = Job.getInstance(conf);
job.setJarByClass(UnitSum.class);
ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);
job.setReducerClass(SumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
项目:Data-Science-with-Hadoop
文件:ReduceJoin.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "Reduce-side join");
job.setJarByClass(ReduceJoin.class);
job.setReducerClass(ReduceJoinReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, SalesRecordMapper.class) ;
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, AccountRecordMapper.class) ;
// FileOutputFormat.setOutputPath(job, new Path(args[2]));
Path outputPath = new Path(args[2]);
FileOutputFormat.setOutputPath(job, outputPath);
outputPath.getFileSystem(conf).delete(outputPath);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:book-merger
文件:BookMerger.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "book merger");
job.setJarByClass(BookMerger.class);
job.setCombinerClass(BookDataReducer.class);
job.setReducerClass(BookDataReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(BookMapWritable.class);
FileOutputFormat.setOutputPath(job, new Path(args[0]));
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CanonicalMapper.class);
MultipleInputs.addInputPath(job, new Path(args[2]), TextInputFormat.class, LibraryThingMapper.class);
MultipleInputs.addInputPath(job, new Path(args[3]), TextInputFormat.class, LTScrapedMapper.class);
job.waitForCompletion(true);
}
项目:mara
文件:MultiInputAnnotationHandler.java
@Override
public void process(Annotation annotation, Job job, Object target)
throws ToolException {
for (Input input : ((MultiInput)annotation).value()) {
Path path = getInputAsPath(input.path());
if (input.mapper() == Mapper.class) {
MultipleInputs.addInputPath(job, path, input.format());
}
else {
MultipleInputs.addInputPath(job, path, input.format(), input.mapper());
// Need to call again here so the call is captured by our aspect which
// will replace it with the annotated delegating mapper class for resource
// injection if required.
job.setMapperClass(DelegatingMapper.class);
}
}
}
项目:mara
文件:JobAnnotationUtilTest.java
@Test @Ignore // NOT WORKING
public void testConfigureJobFromClass() {
Class<?> clazz = TestMapper.class;
try {
PowerMockito.mockStatic(MultipleInputs.class);
// Now configure
PowerMockito.verifyStatic(Mockito.times(6));
annotationUtil.configureJobFromClass(clazz, job);
} catch (ToolException e) {
e.printStackTrace();
fail(e.getMessage());
}
}
项目:book-hadoop-hacks
文件:TestReduceSideJoin.java
public int run(String[] args) throws Exception {
Job job=Job.getInstance(getConf(), "reduce side join");
job.setJarByClass(getClass());
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class,ReduceSideJoinMasterMap.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class,ReduceSideJoinMasterMap.class);
job.setMapOutputKeyClass(IntPair.class);
job.setMapOutputValueClass(Text.class);
job.setPartitionerClass(ReducesidejoinPartitioner.class);
job.setGroupingComparatorClass(ReduceSideJoinGroupingComparator.class);
job.setReducerClass(ReduceSideJoinReduce.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(args[2]));
return job.waitForCompletion(true)?0:1;
}
项目:hadoop-fieldformat
文件:TestRun.java
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
//conf.set("mapreduce.fieldoutput.header", "ct_audit,ct_action");
Job job = new Job(conf);
job.setJobName("test fieldInput");
job.setJarByClass(TestRun.class);
MultipleInputs.addInputPath(job, new Path(args[0]), FieldInputFormat.class, CTMapper.class);
job.setNumReduceTasks(0);
//FileOutputFormat.setOutputPath(job, new Path(args[1]));
FieldOutputFormat.setOutputPath(job, new Path(args[1]));
job.setOutputFormatClass(FieldOutputFormat.class);
job.submit();
job.waitForCompletion(true);
return 0;
}
项目:hadoop-map-reduce-patterns
文件:PostCommentHierarchy.java
@Override
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "PostCommentHeirarchy");
job.setJarByClass(PostCommentHierarchy.class);
MultipleInputs.addInputPath(job, new Path(args[0]),
TextInputFormat.class, PostMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]),
TextInputFormat.class, CommentMapper.class);
job.setReducerClass(PostCommentHierarchyReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(args[2]));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
return job.waitForCompletion(true) ? 0 : 2;
}
项目:InsAdjustment
文件:InsDriver.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(InsDriver.class);
// job.setMapperClass(PFMapper.class);
job.setReducerClass(InsReducer.class);
// job.setNumReduceTasks(0);
job.setJobName("Participant Adjustment PoC");
String busDate = args[3].toString();
job.getConfiguration().set("BUS_DATE", busDate);
// map-reduce job.
Path inputPath1 = new Path(args[0]);
Path inputPath2 = new Path(args[1]);
Path outputPath = new Path(args[2]);
MultipleInputs.addInputPath(job, inputPath1, TextInputFormat.class, PFMapper.class);
MultipleInputs.addInputPath(job, inputPath2, TextInputFormat.class, BRMapper.class);
FileOutputFormat.setOutputPath(job, outputPath);
// TODO: Update the output path for the output directory of the
// map-reduce job.
// configuration should contain reference to your namenode
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// Submit the job and wait for it to finish.
job.waitForCompletion(true);
}
项目:Bigdata
文件:Question3.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 3) {
System.err.println("Usage: Question3 <in> <out>");
System.exit(3);
}
@SuppressWarnings("deprecation")
Job job1 = new Job(conf, "averageRating");
@SuppressWarnings("deprecation")
Job job2 = new Job(conf,"reduceSideJoin");
job1.setJarByClass(Question3.class);
job1.setMapperClass(TopTenMap.class);
job1.setReducerClass(TopTenReduce.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(FloatWritable.class);
FileInputFormat.addInputPath(job1, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job1, new Path("/bxr140530/Asgn/temp"));
if(job1.waitForCompletion(true))
{
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(Text.class);
job2.setJarByClass(Question3.class);
job2.setMapperClass(TopTenJoinMap.class);
job2.setReducerClass(TopTenJoinReduce.class);
MultipleInputs.addInputPath(job2,new Path("/bxr140530/Asgn/temp"),TextInputFormat.class,TopTenJoinMap.class);
MultipleInputs.addInputPath(job2,new Path(otherArgs[1]),TextInputFormat.class,BusinessMap.class);
FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));
job2.waitForCompletion(true);
}
}
项目:incubator-rya
文件:JoinSelectStatsUtil.java
public static void initJoinMRJob(Job job, String prospectsPath, String spoPath, Class<? extends Mapper<CompositeType,TripleCard,?,?>> mapperClass,
String outPath, String auths) throws AccumuloSecurityException {
MultipleInputs.addInputPath(job, new Path(prospectsPath), SequenceFileInputFormat.class, mapperClass);
MultipleInputs.addInputPath(job, new Path(spoPath), SequenceFileInputFormat.class, mapperClass);
job.setMapOutputKeyClass(CompositeType.class);
job.setMapOutputValueClass(TripleCard.class);
SequenceFileOutputFormat.setOutputPath(job, new Path(outPath));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(TripleEntry.class);
job.setOutputValueClass(CardList.class);
}
项目:incubator-rya
文件:JoinSelectStatisticsTest.java
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String outpath = conf.get(OUTPUTPATH);
Job job = new Job(conf, this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
job.setJarByClass(this.getClass());
conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);
MultipleInputs.addInputPath(job, new Path(PROSPECTSOUT.getAbsolutePath()),
SequenceFileInputFormat.class, JoinSelectAggregateMapper.class);
MultipleInputs.addInputPath(job,new Path(SPOOUT.getAbsolutePath()) ,
SequenceFileInputFormat.class, JoinSelectAggregateMapper.class);
job.setMapOutputKeyClass(CompositeType.class);
job.setMapOutputValueClass(TripleCard.class);
tempDir = new File(File.createTempFile(outpath, "txt").getParentFile(), System.currentTimeMillis() + "");
SequenceFileOutputFormat.setOutputPath(job, new Path(tempDir.getAbsolutePath()));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(TripleEntry.class);
job.setOutputValueClass(CardList.class);
job.setSortComparatorClass(JoinSelectSortComparator.class);
job.setGroupingComparatorClass(JoinSelectGroupComparator.class);
job.setPartitionerClass(JoinSelectPartitioner.class);
job.setReducerClass(JoinReducer.class);
job.setNumReduceTasks(32);
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
}
项目:incubator-rya
文件:AbstractReasoningTool.java
/**
* Set up the MapReduce job to use Accumulo as an input.
* @param tableMapper Mapper class to use
*/
protected void configureAccumuloInput(Class<? extends Mapper<Key,Value,?,?>> tableMapper)
throws AccumuloSecurityException {
MRReasoningUtils.configureAccumuloInput(job);
MultipleInputs.addInputPath(job, new Path("/tmp/input"),
AccumuloInputFormat.class, tableMapper);
}
项目:incubator-rya
文件:AbstractReasoningTool.java
/**
* Set up the MapReduce job to use an RDF file as an input.
* @param rdfMapper class to use
*/
protected void configureRdfInput(Path inputPath,
Class<? extends Mapper<LongWritable, RyaStatementWritable, ?, ?>> rdfMapper) {
Configuration conf = job.getConfiguration();
String format = conf.get(MRUtils.FORMAT_PROP, RDFFormat.RDFXML.getName());
conf.set(MRUtils.FORMAT_PROP, format);
MultipleInputs.addInputPath(job, inputPath,
RdfFileInputFormat.class, rdfMapper);
}
项目:hiped2
文件:StreamingRepartitionJoin.java
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(Options.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path usersPath = new Path(cli.getArgValueAsString(Options.USERS));
Path userLogsPath = new Path(cli.getArgValueAsString(Options.USER_LOGS));
Path outputPath = new Path(cli.getArgValueAsString(Options.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(StreamingRepartitionJoin.class);
MultipleInputs.addInputPath(job, usersPath, TextInputFormat.class, UserMap.class);
MultipleInputs.addInputPath(job, userLogsPath, TextInputFormat.class, UserLogMap.class);
ShuffleUtils.configBuilder()
.useNewApi()
.setSortIndices(KeyFields.USER, KeyFields.DATASET)
.setPartitionerIndices(KeyFields.USER)
.setGroupIndices(KeyFields.USER)
.configure(job.getConfiguration());
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Tuple.class);
job.setMapOutputValueClass(Tuple.class);
FileOutputFormat.setOutputPath(job, outputPath);
return job.waitForCompletion(true) ? 0 : 1;
}
项目:hiped2
文件:BloomJoin.java
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(Options.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path usersPath = new Path(cli.getArgValueAsString(Options.USERS));
Path userLogsPath = new Path(cli.getArgValueAsString(Options.USER_LOGS));
Path bloomPath = new Path(cli.getArgValueAsString(Options.BLOOM_FILE));
Path outputPath = new Path(cli.getArgValueAsString(Options.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(BloomJoin.class);
MultipleInputs.addInputPath(job, usersPath, TextInputFormat.class, UserMap.class);
MultipleInputs.addInputPath(job, userLogsPath, TextInputFormat.class, UserLogMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Tuple.class);
job.setReducerClass(Reduce.class);
job.addCacheFile(bloomPath.toUri());
job.getConfiguration().set(AbstractFilterMap.DISTCACHE_FILENAME_CONFIG, bloomPath.getName());
FileInputFormat.setInputPaths(job, userLogsPath);
FileOutputFormat.setOutputPath(job, outputPath);
return job.waitForCompletion(true) ? 0 : 1;
}
项目:incubator-mrql
文件:MapOperation.java
/** The cMap physical operator
* @param map_fnc mapper function
* @param acc_fnc optional accumulator function
* @param zero optional the zero value for the accumulator
* @param source input data source
* @param stop_counter optional counter used in repeat operation
* @return a new data source that contains the result
*/
public final static DataSet cMap ( Tree map_fnc, // mapper function
Tree acc_fnc, // optional accumulator function
Tree zero, // optional the zero value for the accumulator
DataSet source, // input data source
String stop_counter ) // optional counter used in repeat operation
throws Exception {
conf = MapReduceEvaluator.clear_configuration(conf);
String newpath = new_path(conf);
conf.set("mrql.mapper",map_fnc.toString());
conf.set("mrql.counter",stop_counter);
if (zero != null) {
conf.set("mrql.accumulator",acc_fnc.toString());
conf.set("mrql.zero",zero.toString());
} else conf.set("mrql.zero","");
setupSplits(source,conf);
Job job = new Job(conf,newpath);
distribute_compiled_arguments(job.getConfiguration());
job.setJarByClass(MapReducePlan.class);
job.setOutputKeyClass(MRContainer.class);
job.setOutputValueClass(MRContainer.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
for (DataSource p: source.source)
MultipleInputs.addInputPath(job,new Path(p.path),(Class<? extends MapReduceMRQLFileInputFormat>)p.inputFormat,cMapMapper.class);
FileOutputFormat.setOutputPath(job,new Path(newpath));
job.setNumReduceTasks(0);
job.waitForCompletion(true);
long c = (stop_counter.equals("-")) ? 0
: job.getCounters().findCounter("mrql",stop_counter).getValue();
return new DataSet(new BinaryDataSource(newpath,conf),c,outputRecords(job));
}
项目:hadoop-in-action
文件:JoinRecordWithStationName.java
@Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
JobBuilder
.printUsage(this, "<ncdc input> <station input> <output>");
return -1;
}
Job job = Job.getInstance(getConf(), "Join weather records with station names");
job.setJarByClass(getClass());
Path ncdcInputPath = new Path(args[0]);
Path stationInputPath = new Path(args[1]);
Path outputPath = new Path(args[2]);
MultipleInputs.addInputPath(job, ncdcInputPath, TextInputFormat.class,
JoinRecordMapper.class);
MultipleInputs.addInputPath(job, stationInputPath,
TextInputFormat.class, JoinStationMapper.class);
FileOutputFormat.setOutputPath(job, outputPath);
job.setPartitionerClass(KeyPartitioner.class);
job.setGroupingComparatorClass(TextPair.FirstComparator.class);
job.setMapOutputKeyClass(TextPair.class);
job.setReducerClass(JoinReducer.class);
job.setOutputKeyClass(Text.class);
return job.waitForCompletion(true) ? 0 : 1;
}
项目:shaf
文件:PathInputConfig.java
/**
* Configures the job input for the file-system data source.
*/
@Override
public void configure(
final Class<? extends DistributedProcess<?, ?, ?, ?, ?, ?>> pcls,
ProcessConfiguration config, Job job) throws JobConfigException {
try {
int index = 0;
InputTokenizer it = config.getInputTokenizer();
while (it.nextToken()) {
LOG.debug("Configuring input number: " + (index++));
Path path = new Path(config.getBase(), it.getPath());
if (it.isMapperClassNameDefined()) {
@SuppressWarnings("unchecked")
Class<? extends MapProcess<?, ?, ?, ?>> mcls = (Class<? extends MapProcess<?, ?, ?, ?>>) DynamicClassLoader
.getClassByName(it.getMapperClassName());
JobInput anno = mcls.getAnnotation(JobInput.class);
MultipleInputs.addInputPath(job, path, anno.formatClass(),
HadoopMapProcessWrapper.class);
HadoopMapProcessWrapper.addWrappingProcessClass(job, path,
mcls);
LOG.debug("Adds input format: "
+ anno.formatClass());
LOG.debug("Adds mapper: " + mcls);
}
FileInputFormat.addInputPath(job, path);
LOG.debug("Adds input path: " + path);
}
} catch (PropertyNotFoundException | ClassNotFoundException
| IOException exc) {
throw new JobConfigException("Failed to configure job input.", exc);
}
}
项目:incubator-blur
文件:IndexerJobDriver.java
private boolean runMrWithLookup(String uuid, TableDescriptor descriptor, List<Path> inprogressPathList, String table,
Path fileCache, Path outputPath, int reducerMultipler, Path tmpPath, TableStats tableStats, String snapshot)
throws ClassNotFoundException, IOException, InterruptedException {
PartitionedInputResult result = buildPartitionedInputData(uuid, tmpPath, descriptor, inprogressPathList, snapshot,
fileCache);
Job job = Job.getInstance(getConf(), "Blur Row Updater for table [" + table + "]");
ExistingDataIndexLookupMapper.setSnapshot(job, MRUPDATE_SNAPSHOT);
FileInputFormat.addInputPath(job, result._partitionedInputData);
MultipleInputs.addInputPath(job, result._partitionedInputData, SequenceFileInputFormat.class,
ExistingDataIndexLookupMapper.class);
for (Path p : inprogressPathList) {
FileInputFormat.addInputPath(job, p);
MultipleInputs.addInputPath(job, p, SequenceFileInputFormat.class, NewDataMapper.class);
}
BlurOutputFormat.setOutputPath(job, outputPath);
BlurOutputFormat.setupJob(job, descriptor);
job.setReducerClass(UpdateReducer.class);
job.setMapOutputKeyClass(IndexKey.class);
job.setMapOutputValueClass(IndexValue.class);
job.setPartitionerClass(IndexKeyPartitioner.class);
job.setGroupingComparatorClass(IndexKeyWritableComparator.class);
BlurOutputFormat.setReducerMultiplier(job, reducerMultipler);
boolean success = job.waitForCompletion(true);
Counters counters = job.getCounters();
LOG.info("Counters [" + counters + "]");
return success;
}
项目:incubator-blur
文件:IndexerJobDriver.java
private boolean runMrOnly(TableDescriptor descriptor, List<Path> inprogressPathList, String table, Path fileCache,
Path outputPath, int reducerMultipler) throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(getConf(), "Blur Row Updater for table [" + table + "]");
Path tablePath = new Path(descriptor.getTableUri());
BlurInputFormat.setLocalCachePath(job, fileCache);
BlurInputFormat.addTable(job, descriptor, MRUPDATE_SNAPSHOT);
MultipleInputs.addInputPath(job, tablePath, BlurInputFormat.class, ExistingDataMapper.class);
for (Path p : inprogressPathList) {
FileInputFormat.addInputPath(job, p);
MultipleInputs.addInputPath(job, p, SequenceFileInputFormat.class, NewDataMapper.class);
}
BlurOutputFormat.setOutputPath(job, outputPath);
BlurOutputFormat.setupJob(job, descriptor);
job.setReducerClass(UpdateReducer.class);
job.setMapOutputKeyClass(IndexKey.class);
job.setMapOutputValueClass(IndexValue.class);
job.setPartitionerClass(IndexKeyPartitioner.class);
job.setGroupingComparatorClass(IndexKeyWritableComparator.class);
BlurOutputFormat.setReducerMultiplier(job, reducerMultipler);
boolean success = job.waitForCompletion(true);
Counters counters = job.getCounters();
LOG.info("Counters [" + counters + "]");
return success;
}
项目:MapReduceSamplesJava
文件:PostCommentHierarchy.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 3) {
System.err.println("Usage: PostCommentHierarchy <posts> <comments> <outdir>");
System.exit(1);
}
Job job = new Job(conf, "PostCommentHierarchy");
job.setJarByClass(PostCommentHierarchy.class);
MultipleInputs.addInputPath(job, new Path(otherArgs[0]),
TextInputFormat.class, PostMapper.class);
MultipleInputs.addInputPath(job, new Path(otherArgs[1]),
TextInputFormat.class, CommentMapper.class);
job.setReducerClass(PostCommentHierarchyReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
System.exit(job.waitForCompletion(true) ? 0 : 2);
}
项目:hadoop-map-reduce-patterns
文件:ReplicatedUserJoin.java
@Override
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
GenericOptionsParser parser = new GenericOptionsParser(conf, args);
String[] otherArgs = parser.getRemainingArgs();
if (otherArgs.length != 4) {
printUsage();
}
Job job = new Job(conf, "ReduceSideJoin");
job.setJarByClass(ReplicatedUserJoin.class);
// Use MultipleInputs to set which input uses what mapper
// This will keep parsing of each data set separate from a logical
// standpoint
// The first two elements of the args array are the two inputs
MultipleInputs.addInputPath(job, new Path(args[0]),
TextInputFormat.class, UserJoinMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]),
TextInputFormat.class, CommentJoinMapper.class);
job.getConfiguration().set("join.type", args[2]);
job.setReducerClass(UserJoinReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(args[3]));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
return job.waitForCompletion(true) ? 0 : 2;
}
项目:hadoop-map-reduce-patterns
文件:ReduceSideJoinBloomFilter.java
@Override
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
GenericOptionsParser parser = new GenericOptionsParser(conf, args);
String[] otherArgs = parser.getRemainingArgs();
if (otherArgs.length != 4) {
printUsage();
}
Job job = new Job(conf, "ReduceSideJoinBloomFilter");
job.setJarByClass(ReduceSideJoinBloomFilter.class);
// Use MultipleInputs to set which input uses what mapper
// This will keep parsing of each data set separate from a logical
// standpoint
// The first two elements of the args array are the two inputs
MultipleInputs.addInputPath(job, new Path(args[0]),
TextInputFormat.class, UserJoinMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]),
TextInputFormat.class, CommentJoinMapperWithBloom.class);
job.getConfiguration().set("join.type", args[2]);
job.setReducerClass(UserJoinReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(args[3]));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
return job.waitForCompletion(true) ? 0 : 2;
}
项目:hadoop-map-reduce-patterns
文件:ReduceSideJoin.java
@Override
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
GenericOptionsParser parser = new GenericOptionsParser(conf, args);
String[] otherArgs = parser.getRemainingArgs();
if (otherArgs.length != 4) {
printUsage();
}
Job job = new Job(conf, "ReduceSideJoin");
job.setJarByClass(ReduceSideJoin.class);
// Use MultipleInputs to set which input uses what mapper
// This will keep parsing of each data set separate from a logical
// standpoint
// The first two elements of the args array are the two inputs
MultipleInputs.addInputPath(job, new Path(args[0]),
TextInputFormat.class, UserJoinMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]),
TextInputFormat.class, CommentJoinMapper.class);
job.getConfiguration().set("join.type", args[2]);
job.setReducerClass(UserJoinReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(args[3]));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
return job.waitForCompletion(true) ? 0 : 2;
}
项目:npc-recommender-imdb
文件:FScore.java
public static int run(String[] args, final int REDUCERS, final int T)
throws IOException, ClassNotFoundException, InterruptedException{
//set config
Configuration c = new Configuration();
c.set("CPATH", args[2]+"/recommander");
c.setInt("T", T);
Job job = new Job(c, "FScore");
//metrics
job.setNumReduceTasks(REDUCERS);
//Classes
job.setJarByClass(FScore.class);
// job.setMapperClass(FMapper.class);
job.setReducerClass(FReducer.class);
//mapOutput,reduceOutput
// job.setInputFormatClass(TextInputFormat.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
//IO
// FileInputFormat.addInputPaths(job, args[1]+"/ratings");
MultipleInputs.addInputPath(job, new Path(args[1]+"/ratings"),
TextInputFormat.class, FMapperRatings.class);
MultipleInputs.addInputPath(job, new Path(args[2]+"/recommander"),
TextInputFormat.class, FMapperResults.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]+"/fscore"));
return (job.waitForCompletion(true) ? 0 : -1);
}
项目:hadoop-relational
文件:Union.java
@Override
public int run(String[] args) throws Exception {
System.out.println(Arrays.asList(args).toString());
// Parse arguments
Path oneRelationPath = new Path(args[0]),
twoRelationPath = new Path(args[1]),
outputRelationPath = new Path(args[2]);
// Setup job
Job job = Job.getInstance(conf);
job.setJarByClass(Union.class);
MultipleInputs.addInputPath(job, oneRelationPath, TextInputFormat.class, UnionMapper.class);
MultipleInputs.addInputPath(job, twoRelationPath, TextInputFormat.class, UnionMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(Text.class);
// This is a map-only job
job.setNumReduceTasks(0);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, outputRelationPath);
// Run job
job.submit();
return job.waitForCompletion(true) ? 0 : 1;
}
项目:hadoop-relational
文件:Join.java
@Override
public int run(String[] args) throws Exception {
System.out.println(Arrays.asList(args).toString());
// Parse arguments
Path leftRelationPath = new Path(args[0]),
rightRelationPath = new Path(args[1]),
outputRelationPath = new Path(args[2]);
conf.set(new JoinMapperLeft().getJoinKeyIndicesKey(), args[3]);
conf.set(new JoinMapperRight().getJoinKeyIndicesKey(), args[4]);
// Setup job
Job job = Job.getInstance(conf);
job.setJarByClass(Join.class);
MultipleInputs.addInputPath(job, leftRelationPath, TextInputFormat.class, JoinMapperLeft.class);
MultipleInputs
.addInputPath(job, rightRelationPath, TextInputFormat.class, JoinMapperRight.class);
job.setMapOutputKeyClass(Tuple.class);
job.setMapOutputValueClass(MarkedTuple.class);
job.setReducerClass(JoinReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, outputRelationPath);
// Run job
job.submit();
return job.waitForCompletion(true) ? 0 : 1;
}
项目:TimeseriesDelta
文件:RollingAverage.java
public int run(String[] args) throws Exception
{
Job job = Job.getInstance(config);
job.setJarByClass(RollingAverage.class);
job.setJobName("RollingAverage");
if( args.length != 4 ) {
System.out.printf("Usage: %s <hdfs-input-dir> <hdfs-output-dir> <local-config-file>\n" , args[0]);
return -1;
}
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(TimeseriesKey.class);
job.setMapOutputValueClass(TimeseriesDataPoint.class);
job.setMapperClass(RollingAverageMapper.class);
job.setReducerClass(RollingAverageReducer.class);
job.setPartitionerClass(NaturalKeyPartitioner.class);
job.setSortComparatorClass(CompositeKeyComparator.class);
job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
Path inputPath = new Path(args[1]);
MultipleInputs.addInputPath(job, inputPath, TextInputFormat.class, RollingAverageMapper.class);
TextOutputFormat.setOutputPath(job, new Path(args[2]));
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
loadJobConfig( job, args[3] );
return (job.waitForCompletion(true) ? 0 : 1);
}
项目:mapreduce-samples
文件:UnitMultiplication.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(UnitMultiplication.class);
//how chain two mapper classes?
job.setReducerClass(MultiplicationReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
项目:Bigdata
文件:Top10BusRev.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // get all args
if (otherArgs.length != 3) {
System.err.println("Usage: Top10BusRev <review> <fbusiness> <ooutput> ");
System.exit(2);
}
// create a job with name "toptenratemov"
Job job = new Job(conf, "Top10BusRev");
job.setJarByClass(Top10BusRev.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
// uncomment the following line to add the Combiner
// job.setCombinerClass(Reduce.class);
// set output key type
job.setOutputKeyClass(Text.class);
// set output value type
job.setOutputValueClass(FloatWritable.class);
//set the HDFS path of the input data
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
// set the HDFS path for the output
FileOutputFormat.setOutputPath(job, new Path("/datatemp"));
//Wait till job completion
if(job.waitForCompletion(true) == true){
// create a job with name "toptenratemov"
Job job2 = new Job(conf, "Top10BusRev");
job2.setJarByClass(Top10BusRev.class);
job2.setReducerClass(ReduceTop.class);
MultipleInputs.addInputPath(job2, new Path("/datatemp"), TextInputFormat.class,
MapTopRating.class);
MultipleInputs.addInputPath(job2, new Path(otherArgs[1]), TextInputFormat.class,
MapTopBusiness.class);
// uncomment the following line to add the Combiner
// job.setCombinerClass(Reduce.class);
// set output key type
job2.setOutputKeyClass(Text.class);
// set output value type
job2.setOutputValueClass(Text.class);
//set the HDFS path of the input data
// set the HDFS path for the output
FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2]));
job2.waitForCompletion(true);
}
}
项目:dkpro-c4corpus
文件:Phase4RemoveDuplicatesUsingReduceSideJoins.java
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(Phase4RemoveDuplicatesUsingReduceSideJoins.class);
job.setJobName(Phase4RemoveDuplicatesUsingReduceSideJoins.class.getName());
// paths
// text files of ids to be deleted
String textFilePath = args[0];
// corpus with *.warc.gz
String commaSeparatedInputFiles = args[1];
// output
String outputPath = args[2];
//second input the look up text file
MultipleInputs.addInputPath(job, new Path(textFilePath), TextInputFormat.class,
JoinTextMapper.class);
//first input the data set (check comma separated availability)
MultipleInputs.addInputPath(job, new Path(commaSeparatedInputFiles), WARCInputFormat.class,
JoinWARCMapper.class);
job.setPartitionerClass(SourceJoiningKeyPartitioner.class);
job.setGroupingComparatorClass(SourceJoiningGroupingComparator.class);
job.setMapOutputKeyClass(CompositeKey.class);
job.setMapOutputValueClass(WARCWritable.class);
job.setReducerClass(JoinReducer.class);
job.setOutputFormatClass(WARCOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(WARCWritable.class);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
return job.waitForCompletion(true) ? 0 : 1;
}
项目:HiveQueryMRJoin
文件:FinalDriver.java
public int run(String[] args) throws Exception {
String inputLoc1 = args[0];
String inputLoc2 = args[1];
String inputLoc3 = args[2];
String outputLoc = args[3];
String outputLoc2 = args[4];
String finalOutput = args[5];
// Job Chaning of multiple jobs.
// Executing Job 1.
conf1 = getConf();
conf1.set("mapred.job.queue.name", "sree");
Job job1 = new Job(conf1,
"MapReduce: Performing Cross Join for 2 tables ");
job1.setJarByClass(FinalDriver.class);
MultipleInputs.addInputPath(job1, new Path(inputLoc1),
TextInputFormat.class, ConsumerMapper.class);
MultipleInputs.addInputPath(job1, new Path(inputLoc2),
TextInputFormat.class, PurchasesMapper.class);
job1.setReducerClass(IntmdteJoinReducer.class);
job1.setMapOutputKeyClass(IntWritable.class);
job1.setMapOutputValueClass(Text.class);
job1.setOutputKeyClass(NullWritable.class);
job1.setOutputValueClass(Text.class);
log.info("Input Path to the map-reduce job " + inputLoc1 + " and "
+ inputLoc2);
log.info("Ouput Path to the map-reduce job " + outputLoc);
Path output = new Path(outputLoc);
FileOutputFormat.setOutputPath(job1, output);
job1.waitForCompletion(true);
// Job 2
conf2 = getConf();
conf2.set("mapred.job.queue.name", "sree");
Job job2 = new Job(conf2,
"MapReduce: Performing Cross Join for Final table");
job2.setJarByClass(FinalDriver.class);
MultipleInputs.addInputPath(job2, new Path(outputLoc),
TextInputFormat.class, JoinResultMapper.class);
MultipleInputs.addInputPath(job2, new Path(inputLoc3),
TextInputFormat.class, TransactionMapper.class);
job2.setReducerClass(IntermediateJoinReducer.class);
job2.setMapOutputKeyClass(Text.class);
job2.setMapOutputValueClass(Text.class);
job2.setOutputKeyClass(NullWritable.class);
job2.setOutputValueClass(Text.class);
log.info("Input Path to the map-reduce job " + inputLoc1 + " and "
+ inputLoc2);
log.info("Ouput Path to the map-reduce job " + outputLoc2);
Path output1 = new Path(outputLoc2);
FileSystem.get(conf2).delete(output1, true);
FileOutputFormat.setOutputPath(job2, output1);
job2.waitForCompletion(true);
// Job 3
conf3 = getConf();
conf3.set("mapred.job.queue.name", "sree");
Job job3 = new Job(conf3, "MapReduce : Final Join ");
job3.setJarByClass(FinalDriver.class);
FileInputFormat.addInputPath(job3, new Path(outputLoc2));
job3.setMapperClass(FinalMapper.class);
job3.setReducerClass(FinalReducer.class);
job3.setMapOutputKeyClass(Text.class);
job3.setMapOutputValueClass(Text.class);
job3.setOutputKeyClass(NullWritable.class);
job3.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job3, new Path(finalOutput));
int status = (job3.waitForCompletion(true) == true) ? 0 : 1;
return status;
}
项目:CBIR-on-Hadoop
文件:SeqReader.java
public static void main(String[] args) throws IOException,
ClassNotFoundException, InterruptedException {
if (args.length < 3) {
System.out.println("Usage: <jar file> <sequence filename(s)> "
+ "<desired number of output sequence files> "
+ "<ABSOLUTE path to hdfs locaiton where the output folder "
+ "will automatically be created>");
System.exit(0);
}
int numOutputFiles = Integer.parseInt(args[args.length - 2]);
if (numOutputFiles < 1) {
// someone is screwing around
numOutputFiles = 1;
}
String absPath = args[args.length - 1];
if (absPath.charAt(absPath.length() - 1) != '/') {
absPath += "/";
}
DateFormat dateFormat = new SimpleDateFormat("ddMMyyyyHHmmss");
Date date = new Date();
String baseOutputName = absPath + "SeqReader" + dateFormat.format(date);
Configuration conf = new Configuration();
conf.set("BASE_OUTPUT_FILE_NAME", baseOutputName);
conf.set("NUM_REDUCERS", Integer.toString(numOutputFiles));
Job job = Job.getInstance(conf);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(SequenceFileToImageMapper.class);
job.setReducerClass(SequenceFileToImageReducer.class);
job.setPartitionerClass(SequenceFileToImagePartitioner.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
// job.setOutputFormatClass(TextOutputFormat.class);
job.setNumReduceTasks(numOutputFiles);
LazyOutputFormat.setOutputFormatClass(job,
SequenceFileOutputFormat.class);
for (int i = 0; i < args.length - 2; i++) {
// FileInputFormat.setInputPaths(job, new Path(args[i]));
MultipleInputs.addInputPath(job, new Path(args[i]),
SequenceFileInputFormat.class);
}
for (int i = 0; i < numOutputFiles; i++) {
MultipleOutputs.addNamedOutput(job, "n" + Integer.toString(i),
SequenceFileOutputFormat.class, Text.class, Text.class);
}
job.setJarByClass(SeqReader.class);
FileOutputFormat.setOutputPath(job, new Path(baseOutputName));
/* write the output folder location
* to a file in the destination folder
*/
Path f = new Path(absPath + "SeqReader.outputlocation");
FileSystem fs = FileSystem.get(conf);
if (fs.exists(f)) {
// File already exists.
// Delete the file before proceeding.
fs.delete(f, true);
}
FSDataOutputStream os = fs.create(f);
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os,
"UTF-8"));
br.write(baseOutputName);
br.close();
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:incubator-mrql
文件:MapReduceOperation.java
/**
* The MapReduce physical operator
* @param map_fnc the mapper function
* @param combine_fnc optional in-mapper combiner function
* @param reduce_fnc the reducer function
* @param acc_fnc optional accumulator function
* @param zero optional the zero value for the accumulator
* @param source the input data source
* @param num_reduces number of reducers
* @param stop_counter optional counter used in repeat operation
* @param orderp does the result need to be ordered?
* @return a new data source that contains the result
*/
public final static DataSet mapReduce ( Tree map_fnc, // mapper function
Tree combine_fnc, // optional in-mapper combiner function
Tree reduce_fnc, // reducer function
Tree acc_fnc, // optional accumulator function
Tree zero, // optional the zero value for the accumulator
DataSet source, // input data source
int num_reduces, // number of reducers
String stop_counter, // optional counter used in repeat operation
boolean orderp ) // does the result need to be ordered?
throws Exception {
conf = MapReduceEvaluator.clear_configuration(conf);
String newpath = new_path(conf);
conf.set("mrql.mapper",map_fnc.toString());
if (combine_fnc != null)
conf.set("mrql.combiner",combine_fnc.toString());
conf.set("mrql.reducer",reduce_fnc.toString());
if (zero != null) { // will use in-mapper combiner
conf.set("mrql.accumulator",acc_fnc.toString());
conf.set("mrql.zero",zero.toString());
} else conf.set("mrql.zero","");
conf.set("mrql.counter",stop_counter);
setupSplits(source,conf);
Job job = new Job(conf,newpath);
distribute_compiled_arguments(job.getConfiguration());
job.setJarByClass(MapReducePlan.class);
job.setOutputKeyClass(MRContainer.class);
job.setOutputValueClass(MRContainer.class);
job.setPartitionerClass(MRContainerPartitioner.class);
job.setSortComparatorClass(MRContainerKeyComparator.class);
job.setGroupingComparatorClass(MRContainerKeyComparator.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
for (DataSource p: source.source)
MultipleInputs.addInputPath(job,new Path(p.path),(Class<? extends MapReduceMRQLFileInputFormat>)p.inputFormat,MRMapper.class);
FileOutputFormat.setOutputPath(job,new Path(newpath));
job.setReducerClass(MRReducer.class);
if (Config.trace && PlanGeneration.streamed_MapReduce_reducer(reduce_fnc))
System.out.println("Streamed MapReduce reducer");
if (num_reduces > 0)
job.setNumReduceTasks(num_reduces);
job.waitForCompletion(true);
long c = (stop_counter.equals("-")) ? 0
: job.getCounters().findCounter("mrql",stop_counter).getValue();
DataSource s = new BinaryDataSource(newpath,conf);
s.to_be_merged = orderp;
return new DataSet(s,c,outputRecords(job));
}
项目:RStore
文件:IncreQueryOperator.java
public static void exe(DataCube dc) throws IOException,
InterruptedException, ClassNotFoundException, ParseException {
String datacubeName = dc.cuboid.cuboidName;
String tableName = dc.tableName;
// Path inputPath1 = new Path(datacubeName);
Path inputPath1 = dc.cubePath;
Path inputPath2 = new Path(tableName);
Path outputPath = new Path(dc.outputPath);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outputPath)) {
fs.delete(outputPath);
}
Configuration config = HBaseConfiguration.create();
config.set("datacube", dc.serialize());
Job job = new Job(config, "Incre Querying Operator");
job.setJarByClass(IncreQueryOperator.class); // class that contains
// mapper and reducer
Scan scan = new Scan();
// Scan scan1 = new Scan(startDate.getTime(), endDate.getTime());
// Scan scan = new Scan(dc.cubeRefreshTime, dc.queryingTime);
scan.setCaching(500); // 1 is the default in Scan, which will be bad for
// MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
TableMapReduceUtil.initTableMapperJob(tableName, // input table
scan, RealTimeScanMapper.class, // mapper class
Text.class, // mapper output key
FloatWritable.class, // mapper output value
job);
job.setReducerClass(CubeAggregationReducer.class); // reducer class
job.setOutputFormatClass(TextOutputFormat.class);
MultipleInputs.addInputPath(job, inputPath1, TextInputFormat.class,
CubeScanMapper.class);
MultipleInputs.addInputPath(job, inputPath2, TableInputFormat.class,
RealTimeScanMapper.class);
FileOutputFormat.setOutputPath(job, outputPath);
job.waitForCompletion(true);
}
项目:mgfsm
文件:MaxFsmJob.java
public static void runMaxFsmJob() throws IOException, InterruptedException,
ClassNotFoundException {
Type outputType = commonConfig.getType();
int numberOfReducers = commonConfig.getNumberOfReducers();
Job mCJob = new Job();
mCJob.setJarByClass(MaxFsmJob.class);
mCJob.setJobName("MG-FSM+");
mCJob.getConfiguration().setEnum("org.apache.mahout.fsm.partitioning.outputType", outputType);
MultipleInputs.addInputPath(mCJob,
new Path(commonConfig.getTmpPath()),
SequenceFileInputFormat.class,
MaxFsmMapper.class);
MultipleInputs.addInputPath(mCJob,
commonConfig.getFlistPath(),
SequenceFileInputFormat.class,
MaxFsmMapper.class);
FileOutputFormat.setOutputPath(mCJob, new Path(commonConfig.getOutputPath()));
mCJob.setSortComparatorClass(BytesWritable.Comparator.class);
mCJob.setOutputFormatClass(SequenceFileOutputFormat.class);
mCJob.setCombinerClass(MaxFsmCombiner.class);
mCJob.setReducerClass(MaxFsmReducer.class);
mCJob.setMapOutputKeyClass(BytesWritable.class);
mCJob.setMapOutputValueClass(LongWritable.class);
mCJob.setNumReduceTasks(numberOfReducers);
mCJob.getConfiguration().set("mapreduce.cluster.reducememory.mb", "4096");
mCJob.setOutputKeyClass(IntArrayWritable.class);
mCJob.setOutputValueClass(LongWritable.class);
mCJob.waitForCompletion(true);
while (!mCJob.isComplete()) {
}
}