public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(Multiplication.class); ChainMapper.addMapper(job, CooccurrenceMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf); ChainMapper.addMapper(job, RatingMapper.class, Text.class, Text.class, Text.class, Text.class, conf); job.setMapperClass(CooccurrenceMapper.class); job.setMapperClass(RatingMapper.class); job.setReducerClass(MultiplicationReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, CooccurrenceMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, RatingMapper.class); TextOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(UnitMultiplication.class); ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf); ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf); job.setReducerClass(MultiplicationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.setFloat("beta", Float.parseFloat(args[3])); Job job = Job.getInstance(conf); job.setJarByClass(UnitMultiplication.class); ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf); ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf); job.setReducerClass(MultiplicationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.setFloat("beta", Float.parseFloat(args[3])); Job job = Job.getInstance(conf); job.setJarByClass(UnitSum.class); ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf); ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf); job.setReducerClass(SumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "Reduce-side join"); job.setJarByClass(ReduceJoin.class); job.setReducerClass(ReduceJoinReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, SalesRecordMapper.class) ; MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, AccountRecordMapper.class) ; // FileOutputFormat.setOutputPath(job, new Path(args[2])); Path outputPath = new Path(args[2]); FileOutputFormat.setOutputPath(job, outputPath); outputPath.getFileSystem(conf).delete(outputPath); System.exit(job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "book merger"); job.setJarByClass(BookMerger.class); job.setCombinerClass(BookDataReducer.class); job.setReducerClass(BookDataReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BookMapWritable.class); FileOutputFormat.setOutputPath(job, new Path(args[0])); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CanonicalMapper.class); MultipleInputs.addInputPath(job, new Path(args[2]), TextInputFormat.class, LibraryThingMapper.class); MultipleInputs.addInputPath(job, new Path(args[3]), TextInputFormat.class, LTScrapedMapper.class); job.waitForCompletion(true); }
@Override public void process(Annotation annotation, Job job, Object target) throws ToolException { for (Input input : ((MultiInput)annotation).value()) { Path path = getInputAsPath(input.path()); if (input.mapper() == Mapper.class) { MultipleInputs.addInputPath(job, path, input.format()); } else { MultipleInputs.addInputPath(job, path, input.format(), input.mapper()); // Need to call again here so the call is captured by our aspect which // will replace it with the annotated delegating mapper class for resource // injection if required. job.setMapperClass(DelegatingMapper.class); } } }
@Test @Ignore // NOT WORKING public void testConfigureJobFromClass() { Class<?> clazz = TestMapper.class; try { PowerMockito.mockStatic(MultipleInputs.class); // Now configure PowerMockito.verifyStatic(Mockito.times(6)); annotationUtil.configureJobFromClass(clazz, job); } catch (ToolException e) { e.printStackTrace(); fail(e.getMessage()); } }
public int run(String[] args) throws Exception { Job job=Job.getInstance(getConf(), "reduce side join"); job.setJarByClass(getClass()); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class,ReduceSideJoinMasterMap.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class,ReduceSideJoinMasterMap.class); job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(ReducesidejoinPartitioner.class); job.setGroupingComparatorClass(ReduceSideJoinGroupingComparator.class); job.setReducerClass(ReduceSideJoinReduce.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[2])); return job.waitForCompletion(true)?0:1; }
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); //conf.set("mapreduce.fieldoutput.header", "ct_audit,ct_action"); Job job = new Job(conf); job.setJobName("test fieldInput"); job.setJarByClass(TestRun.class); MultipleInputs.addInputPath(job, new Path(args[0]), FieldInputFormat.class, CTMapper.class); job.setNumReduceTasks(0); //FileOutputFormat.setOutputPath(job, new Path(args[1])); FieldOutputFormat.setOutputPath(job, new Path(args[1])); job.setOutputFormatClass(FieldOutputFormat.class); job.submit(); job.waitForCompletion(true); return 0; }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "PostCommentHeirarchy"); job.setJarByClass(PostCommentHierarchy.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PostMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentMapper.class); job.setReducerClass(PostCommentHierarchyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 2; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(InsDriver.class); // job.setMapperClass(PFMapper.class); job.setReducerClass(InsReducer.class); // job.setNumReduceTasks(0); job.setJobName("Participant Adjustment PoC"); String busDate = args[3].toString(); job.getConfiguration().set("BUS_DATE", busDate); // map-reduce job. Path inputPath1 = new Path(args[0]); Path inputPath2 = new Path(args[1]); Path outputPath = new Path(args[2]); MultipleInputs.addInputPath(job, inputPath1, TextInputFormat.class, PFMapper.class); MultipleInputs.addInputPath(job, inputPath2, TextInputFormat.class, BRMapper.class); FileOutputFormat.setOutputPath(job, outputPath); // TODO: Update the output path for the output directory of the // map-reduce job. // configuration should contain reference to your namenode job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Submit the job and wait for it to finish. job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: Question3 <in> <out>"); System.exit(3); } @SuppressWarnings("deprecation") Job job1 = new Job(conf, "averageRating"); @SuppressWarnings("deprecation") Job job2 = new Job(conf,"reduceSideJoin"); job1.setJarByClass(Question3.class); job1.setMapperClass(TopTenMap.class); job1.setReducerClass(TopTenReduce.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(FloatWritable.class); FileInputFormat.addInputPath(job1, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job1, new Path("/bxr140530/Asgn/temp")); if(job1.waitForCompletion(true)) { job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); job2.setJarByClass(Question3.class); job2.setMapperClass(TopTenJoinMap.class); job2.setReducerClass(TopTenJoinReduce.class); MultipleInputs.addInputPath(job2,new Path("/bxr140530/Asgn/temp"),TextInputFormat.class,TopTenJoinMap.class); MultipleInputs.addInputPath(job2,new Path(otherArgs[1]),TextInputFormat.class,BusinessMap.class); FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2])); job2.waitForCompletion(true); } }
public static void initJoinMRJob(Job job, String prospectsPath, String spoPath, Class<? extends Mapper<CompositeType,TripleCard,?,?>> mapperClass, String outPath, String auths) throws AccumuloSecurityException { MultipleInputs.addInputPath(job, new Path(prospectsPath), SequenceFileInputFormat.class, mapperClass); MultipleInputs.addInputPath(job, new Path(spoPath), SequenceFileInputFormat.class, mapperClass); job.setMapOutputKeyClass(CompositeType.class); job.setMapOutputValueClass(TripleCard.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outPath)); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(TripleEntry.class); job.setOutputValueClass(CardList.class); }
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String outpath = conf.get(OUTPUTPATH); Job job = new Job(conf, this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true); MultipleInputs.addInputPath(job, new Path(PROSPECTSOUT.getAbsolutePath()), SequenceFileInputFormat.class, JoinSelectAggregateMapper.class); MultipleInputs.addInputPath(job,new Path(SPOOUT.getAbsolutePath()) , SequenceFileInputFormat.class, JoinSelectAggregateMapper.class); job.setMapOutputKeyClass(CompositeType.class); job.setMapOutputValueClass(TripleCard.class); tempDir = new File(File.createTempFile(outpath, "txt").getParentFile(), System.currentTimeMillis() + ""); SequenceFileOutputFormat.setOutputPath(job, new Path(tempDir.getAbsolutePath())); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(TripleEntry.class); job.setOutputValueClass(CardList.class); job.setSortComparatorClass(JoinSelectSortComparator.class); job.setGroupingComparatorClass(JoinSelectGroupComparator.class); job.setPartitionerClass(JoinSelectPartitioner.class); job.setReducerClass(JoinReducer.class); job.setNumReduceTasks(32); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
/** * Set up the MapReduce job to use Accumulo as an input. * @param tableMapper Mapper class to use */ protected void configureAccumuloInput(Class<? extends Mapper<Key,Value,?,?>> tableMapper) throws AccumuloSecurityException { MRReasoningUtils.configureAccumuloInput(job); MultipleInputs.addInputPath(job, new Path("/tmp/input"), AccumuloInputFormat.class, tableMapper); }
/** * Set up the MapReduce job to use an RDF file as an input. * @param rdfMapper class to use */ protected void configureRdfInput(Path inputPath, Class<? extends Mapper<LongWritable, RyaStatementWritable, ?, ?>> rdfMapper) { Configuration conf = job.getConfiguration(); String format = conf.get(MRUtils.FORMAT_PROP, RDFFormat.RDFXML.getName()); conf.set(MRUtils.FORMAT_PROP, format); MultipleInputs.addInputPath(job, inputPath, RdfFileInputFormat.class, rdfMapper); }
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(Options.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path usersPath = new Path(cli.getArgValueAsString(Options.USERS)); Path userLogsPath = new Path(cli.getArgValueAsString(Options.USER_LOGS)); Path outputPath = new Path(cli.getArgValueAsString(Options.OUTPUT)); Configuration conf = super.getConf(); Job job = new Job(conf); job.setJarByClass(StreamingRepartitionJoin.class); MultipleInputs.addInputPath(job, usersPath, TextInputFormat.class, UserMap.class); MultipleInputs.addInputPath(job, userLogsPath, TextInputFormat.class, UserLogMap.class); ShuffleUtils.configBuilder() .useNewApi() .setSortIndices(KeyFields.USER, KeyFields.DATASET) .setPartitionerIndices(KeyFields.USER) .setGroupIndices(KeyFields.USER) .configure(job.getConfiguration()); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); FileOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true) ? 0 : 1; }
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(Options.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path usersPath = new Path(cli.getArgValueAsString(Options.USERS)); Path userLogsPath = new Path(cli.getArgValueAsString(Options.USER_LOGS)); Path bloomPath = new Path(cli.getArgValueAsString(Options.BLOOM_FILE)); Path outputPath = new Path(cli.getArgValueAsString(Options.OUTPUT)); Configuration conf = super.getConf(); Job job = new Job(conf); job.setJarByClass(BloomJoin.class); MultipleInputs.addInputPath(job, usersPath, TextInputFormat.class, UserMap.class); MultipleInputs.addInputPath(job, userLogsPath, TextInputFormat.class, UserLogMap.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Tuple.class); job.setReducerClass(Reduce.class); job.addCacheFile(bloomPath.toUri()); job.getConfiguration().set(AbstractFilterMap.DISTCACHE_FILENAME_CONFIG, bloomPath.getName()); FileInputFormat.setInputPaths(job, userLogsPath); FileOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true) ? 0 : 1; }
/** The cMap physical operator * @param map_fnc mapper function * @param acc_fnc optional accumulator function * @param zero optional the zero value for the accumulator * @param source input data source * @param stop_counter optional counter used in repeat operation * @return a new data source that contains the result */ public final static DataSet cMap ( Tree map_fnc, // mapper function Tree acc_fnc, // optional accumulator function Tree zero, // optional the zero value for the accumulator DataSet source, // input data source String stop_counter ) // optional counter used in repeat operation throws Exception { conf = MapReduceEvaluator.clear_configuration(conf); String newpath = new_path(conf); conf.set("mrql.mapper",map_fnc.toString()); conf.set("mrql.counter",stop_counter); if (zero != null) { conf.set("mrql.accumulator",acc_fnc.toString()); conf.set("mrql.zero",zero.toString()); } else conf.set("mrql.zero",""); setupSplits(source,conf); Job job = new Job(conf,newpath); distribute_compiled_arguments(job.getConfiguration()); job.setJarByClass(MapReducePlan.class); job.setOutputKeyClass(MRContainer.class); job.setOutputValueClass(MRContainer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); for (DataSource p: source.source) MultipleInputs.addInputPath(job,new Path(p.path),(Class<? extends MapReduceMRQLFileInputFormat>)p.inputFormat,cMapMapper.class); FileOutputFormat.setOutputPath(job,new Path(newpath)); job.setNumReduceTasks(0); job.waitForCompletion(true); long c = (stop_counter.equals("-")) ? 0 : job.getCounters().findCounter("mrql",stop_counter).getValue(); return new DataSet(new BinaryDataSource(newpath,conf),c,outputRecords(job)); }
@Override public int run(String[] args) throws Exception { if (args.length != 3) { JobBuilder .printUsage(this, "<ncdc input> <station input> <output>"); return -1; } Job job = Job.getInstance(getConf(), "Join weather records with station names"); job.setJarByClass(getClass()); Path ncdcInputPath = new Path(args[0]); Path stationInputPath = new Path(args[1]); Path outputPath = new Path(args[2]); MultipleInputs.addInputPath(job, ncdcInputPath, TextInputFormat.class, JoinRecordMapper.class); MultipleInputs.addInputPath(job, stationInputPath, TextInputFormat.class, JoinStationMapper.class); FileOutputFormat.setOutputPath(job, outputPath); job.setPartitionerClass(KeyPartitioner.class); job.setGroupingComparatorClass(TextPair.FirstComparator.class); job.setMapOutputKeyClass(TextPair.class); job.setReducerClass(JoinReducer.class); job.setOutputKeyClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; }
/** * Configures the job input for the file-system data source. */ @Override public void configure( final Class<? extends DistributedProcess<?, ?, ?, ?, ?, ?>> pcls, ProcessConfiguration config, Job job) throws JobConfigException { try { int index = 0; InputTokenizer it = config.getInputTokenizer(); while (it.nextToken()) { LOG.debug("Configuring input number: " + (index++)); Path path = new Path(config.getBase(), it.getPath()); if (it.isMapperClassNameDefined()) { @SuppressWarnings("unchecked") Class<? extends MapProcess<?, ?, ?, ?>> mcls = (Class<? extends MapProcess<?, ?, ?, ?>>) DynamicClassLoader .getClassByName(it.getMapperClassName()); JobInput anno = mcls.getAnnotation(JobInput.class); MultipleInputs.addInputPath(job, path, anno.formatClass(), HadoopMapProcessWrapper.class); HadoopMapProcessWrapper.addWrappingProcessClass(job, path, mcls); LOG.debug("Adds input format: " + anno.formatClass()); LOG.debug("Adds mapper: " + mcls); } FileInputFormat.addInputPath(job, path); LOG.debug("Adds input path: " + path); } } catch (PropertyNotFoundException | ClassNotFoundException | IOException exc) { throw new JobConfigException("Failed to configure job input.", exc); } }
private boolean runMrWithLookup(String uuid, TableDescriptor descriptor, List<Path> inprogressPathList, String table, Path fileCache, Path outputPath, int reducerMultipler, Path tmpPath, TableStats tableStats, String snapshot) throws ClassNotFoundException, IOException, InterruptedException { PartitionedInputResult result = buildPartitionedInputData(uuid, tmpPath, descriptor, inprogressPathList, snapshot, fileCache); Job job = Job.getInstance(getConf(), "Blur Row Updater for table [" + table + "]"); ExistingDataIndexLookupMapper.setSnapshot(job, MRUPDATE_SNAPSHOT); FileInputFormat.addInputPath(job, result._partitionedInputData); MultipleInputs.addInputPath(job, result._partitionedInputData, SequenceFileInputFormat.class, ExistingDataIndexLookupMapper.class); for (Path p : inprogressPathList) { FileInputFormat.addInputPath(job, p); MultipleInputs.addInputPath(job, p, SequenceFileInputFormat.class, NewDataMapper.class); } BlurOutputFormat.setOutputPath(job, outputPath); BlurOutputFormat.setupJob(job, descriptor); job.setReducerClass(UpdateReducer.class); job.setMapOutputKeyClass(IndexKey.class); job.setMapOutputValueClass(IndexValue.class); job.setPartitionerClass(IndexKeyPartitioner.class); job.setGroupingComparatorClass(IndexKeyWritableComparator.class); BlurOutputFormat.setReducerMultiplier(job, reducerMultipler); boolean success = job.waitForCompletion(true); Counters counters = job.getCounters(); LOG.info("Counters [" + counters + "]"); return success; }
private boolean runMrOnly(TableDescriptor descriptor, List<Path> inprogressPathList, String table, Path fileCache, Path outputPath, int reducerMultipler) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(getConf(), "Blur Row Updater for table [" + table + "]"); Path tablePath = new Path(descriptor.getTableUri()); BlurInputFormat.setLocalCachePath(job, fileCache); BlurInputFormat.addTable(job, descriptor, MRUPDATE_SNAPSHOT); MultipleInputs.addInputPath(job, tablePath, BlurInputFormat.class, ExistingDataMapper.class); for (Path p : inprogressPathList) { FileInputFormat.addInputPath(job, p); MultipleInputs.addInputPath(job, p, SequenceFileInputFormat.class, NewDataMapper.class); } BlurOutputFormat.setOutputPath(job, outputPath); BlurOutputFormat.setupJob(job, descriptor); job.setReducerClass(UpdateReducer.class); job.setMapOutputKeyClass(IndexKey.class); job.setMapOutputValueClass(IndexValue.class); job.setPartitionerClass(IndexKeyPartitioner.class); job.setGroupingComparatorClass(IndexKeyWritableComparator.class); BlurOutputFormat.setReducerMultiplier(job, reducerMultipler); boolean success = job.waitForCompletion(true); Counters counters = job.getCounters(); LOG.info("Counters [" + counters + "]"); return success; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args) .getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: PostCommentHierarchy <posts> <comments> <outdir>"); System.exit(1); } Job job = new Job(conf, "PostCommentHierarchy"); job.setJarByClass(PostCommentHierarchy.class); MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, PostMapper.class); MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, CommentMapper.class); job.setReducerClass(PostCommentHierarchyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(otherArgs[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); System.exit(job.waitForCompletion(true) ? 0 : 2); }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 4) { printUsage(); } Job job = new Job(conf, "ReduceSideJoin"); job.setJarByClass(ReplicatedUserJoin.class); // Use MultipleInputs to set which input uses what mapper // This will keep parsing of each data set separate from a logical // standpoint // The first two elements of the args array are the two inputs MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, UserJoinMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentJoinMapper.class); job.getConfiguration().set("join.type", args[2]); job.setReducerClass(UserJoinReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[3])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 2; }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 4) { printUsage(); } Job job = new Job(conf, "ReduceSideJoinBloomFilter"); job.setJarByClass(ReduceSideJoinBloomFilter.class); // Use MultipleInputs to set which input uses what mapper // This will keep parsing of each data set separate from a logical // standpoint // The first two elements of the args array are the two inputs MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, UserJoinMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentJoinMapperWithBloom.class); job.getConfiguration().set("join.type", args[2]); job.setReducerClass(UserJoinReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[3])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 2; }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 4) { printUsage(); } Job job = new Job(conf, "ReduceSideJoin"); job.setJarByClass(ReduceSideJoin.class); // Use MultipleInputs to set which input uses what mapper // This will keep parsing of each data set separate from a logical // standpoint // The first two elements of the args array are the two inputs MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, UserJoinMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentJoinMapper.class); job.getConfiguration().set("join.type", args[2]); job.setReducerClass(UserJoinReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[3])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 2; }
public static int run(String[] args, final int REDUCERS, final int T) throws IOException, ClassNotFoundException, InterruptedException{ //set config Configuration c = new Configuration(); c.set("CPATH", args[2]+"/recommander"); c.setInt("T", T); Job job = new Job(c, "FScore"); //metrics job.setNumReduceTasks(REDUCERS); //Classes job.setJarByClass(FScore.class); // job.setMapperClass(FMapper.class); job.setReducerClass(FReducer.class); //mapOutput,reduceOutput // job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); //IO // FileInputFormat.addInputPaths(job, args[1]+"/ratings"); MultipleInputs.addInputPath(job, new Path(args[1]+"/ratings"), TextInputFormat.class, FMapperRatings.class); MultipleInputs.addInputPath(job, new Path(args[2]+"/recommander"), TextInputFormat.class, FMapperResults.class); FileOutputFormat.setOutputPath(job, new Path(args[2]+"/fscore")); return (job.waitForCompletion(true) ? 0 : -1); }
@Override public int run(String[] args) throws Exception { System.out.println(Arrays.asList(args).toString()); // Parse arguments Path oneRelationPath = new Path(args[0]), twoRelationPath = new Path(args[1]), outputRelationPath = new Path(args[2]); // Setup job Job job = Job.getInstance(conf); job.setJarByClass(Union.class); MultipleInputs.addInputPath(job, oneRelationPath, TextInputFormat.class, UnionMapper.class); MultipleInputs.addInputPath(job, twoRelationPath, TextInputFormat.class, UnionMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // This is a map-only job job.setNumReduceTasks(0); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputRelationPath); // Run job job.submit(); return job.waitForCompletion(true) ? 0 : 1; }
@Override public int run(String[] args) throws Exception { System.out.println(Arrays.asList(args).toString()); // Parse arguments Path leftRelationPath = new Path(args[0]), rightRelationPath = new Path(args[1]), outputRelationPath = new Path(args[2]); conf.set(new JoinMapperLeft().getJoinKeyIndicesKey(), args[3]); conf.set(new JoinMapperRight().getJoinKeyIndicesKey(), args[4]); // Setup job Job job = Job.getInstance(conf); job.setJarByClass(Join.class); MultipleInputs.addInputPath(job, leftRelationPath, TextInputFormat.class, JoinMapperLeft.class); MultipleInputs .addInputPath(job, rightRelationPath, TextInputFormat.class, JoinMapperRight.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(MarkedTuple.class); job.setReducerClass(JoinReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputRelationPath); // Run job job.submit(); return job.waitForCompletion(true) ? 0 : 1; }
public int run(String[] args) throws Exception { Job job = Job.getInstance(config); job.setJarByClass(RollingAverage.class); job.setJobName("RollingAverage"); if( args.length != 4 ) { System.out.printf("Usage: %s <hdfs-input-dir> <hdfs-output-dir> <local-config-file>\n" , args[0]); return -1; } job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(TimeseriesKey.class); job.setMapOutputValueClass(TimeseriesDataPoint.class); job.setMapperClass(RollingAverageMapper.class); job.setReducerClass(RollingAverageReducer.class); job.setPartitionerClass(NaturalKeyPartitioner.class); job.setSortComparatorClass(CompositeKeyComparator.class); job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class); Path inputPath = new Path(args[1]); MultipleInputs.addInputPath(job, inputPath, TextInputFormat.class, RollingAverageMapper.class); TextOutputFormat.setOutputPath(job, new Path(args[2])); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); loadJobConfig( job, args[3] ); return (job.waitForCompletion(true) ? 0 : 1); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(UnitMultiplication.class); //how chain two mapper classes? job.setReducerClass(MultiplicationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // get all args if (otherArgs.length != 3) { System.err.println("Usage: Top10BusRev <review> <fbusiness> <ooutput> "); System.exit(2); } // create a job with name "toptenratemov" Job job = new Job(conf, "Top10BusRev"); job.setJarByClass(Top10BusRev.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); // uncomment the following line to add the Combiner // job.setCombinerClass(Reduce.class); // set output key type job.setOutputKeyClass(Text.class); // set output value type job.setOutputValueClass(FloatWritable.class); //set the HDFS path of the input data FileInputFormat.addInputPath(job, new Path(otherArgs[0])); // set the HDFS path for the output FileOutputFormat.setOutputPath(job, new Path("/datatemp")); //Wait till job completion if(job.waitForCompletion(true) == true){ // create a job with name "toptenratemov" Job job2 = new Job(conf, "Top10BusRev"); job2.setJarByClass(Top10BusRev.class); job2.setReducerClass(ReduceTop.class); MultipleInputs.addInputPath(job2, new Path("/datatemp"), TextInputFormat.class, MapTopRating.class); MultipleInputs.addInputPath(job2, new Path(otherArgs[1]), TextInputFormat.class, MapTopBusiness.class); // uncomment the following line to add the Combiner // job.setCombinerClass(Reduce.class); // set output key type job2.setOutputKeyClass(Text.class); // set output value type job2.setOutputValueClass(Text.class); //set the HDFS path of the input data // set the HDFS path for the output FileOutputFormat.setOutputPath(job2, new Path(otherArgs[2])); job2.waitForCompletion(true); } }
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(Phase4RemoveDuplicatesUsingReduceSideJoins.class); job.setJobName(Phase4RemoveDuplicatesUsingReduceSideJoins.class.getName()); // paths // text files of ids to be deleted String textFilePath = args[0]; // corpus with *.warc.gz String commaSeparatedInputFiles = args[1]; // output String outputPath = args[2]; //second input the look up text file MultipleInputs.addInputPath(job, new Path(textFilePath), TextInputFormat.class, JoinTextMapper.class); //first input the data set (check comma separated availability) MultipleInputs.addInputPath(job, new Path(commaSeparatedInputFiles), WARCInputFormat.class, JoinWARCMapper.class); job.setPartitionerClass(SourceJoiningKeyPartitioner.class); job.setGroupingComparatorClass(SourceJoiningGroupingComparator.class); job.setMapOutputKeyClass(CompositeKey.class); job.setMapOutputValueClass(WARCWritable.class); job.setReducerClass(JoinReducer.class); job.setOutputFormatClass(WARCOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(WARCWritable.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); return job.waitForCompletion(true) ? 0 : 1; }
public int run(String[] args) throws Exception { String inputLoc1 = args[0]; String inputLoc2 = args[1]; String inputLoc3 = args[2]; String outputLoc = args[3]; String outputLoc2 = args[4]; String finalOutput = args[5]; // Job Chaning of multiple jobs. // Executing Job 1. conf1 = getConf(); conf1.set("mapred.job.queue.name", "sree"); Job job1 = new Job(conf1, "MapReduce: Performing Cross Join for 2 tables "); job1.setJarByClass(FinalDriver.class); MultipleInputs.addInputPath(job1, new Path(inputLoc1), TextInputFormat.class, ConsumerMapper.class); MultipleInputs.addInputPath(job1, new Path(inputLoc2), TextInputFormat.class, PurchasesMapper.class); job1.setReducerClass(IntmdteJoinReducer.class); job1.setMapOutputKeyClass(IntWritable.class); job1.setMapOutputValueClass(Text.class); job1.setOutputKeyClass(NullWritable.class); job1.setOutputValueClass(Text.class); log.info("Input Path to the map-reduce job " + inputLoc1 + " and " + inputLoc2); log.info("Ouput Path to the map-reduce job " + outputLoc); Path output = new Path(outputLoc); FileOutputFormat.setOutputPath(job1, output); job1.waitForCompletion(true); // Job 2 conf2 = getConf(); conf2.set("mapred.job.queue.name", "sree"); Job job2 = new Job(conf2, "MapReduce: Performing Cross Join for Final table"); job2.setJarByClass(FinalDriver.class); MultipleInputs.addInputPath(job2, new Path(outputLoc), TextInputFormat.class, JoinResultMapper.class); MultipleInputs.addInputPath(job2, new Path(inputLoc3), TextInputFormat.class, TransactionMapper.class); job2.setReducerClass(IntermediateJoinReducer.class); job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(Text.class); job2.setOutputKeyClass(NullWritable.class); job2.setOutputValueClass(Text.class); log.info("Input Path to the map-reduce job " + inputLoc1 + " and " + inputLoc2); log.info("Ouput Path to the map-reduce job " + outputLoc2); Path output1 = new Path(outputLoc2); FileSystem.get(conf2).delete(output1, true); FileOutputFormat.setOutputPath(job2, output1); job2.waitForCompletion(true); // Job 3 conf3 = getConf(); conf3.set("mapred.job.queue.name", "sree"); Job job3 = new Job(conf3, "MapReduce : Final Join "); job3.setJarByClass(FinalDriver.class); FileInputFormat.addInputPath(job3, new Path(outputLoc2)); job3.setMapperClass(FinalMapper.class); job3.setReducerClass(FinalReducer.class); job3.setMapOutputKeyClass(Text.class); job3.setMapOutputValueClass(Text.class); job3.setOutputKeyClass(NullWritable.class); job3.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job3, new Path(finalOutput)); int status = (job3.waitForCompletion(true) == true) ? 0 : 1; return status; }
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { if (args.length < 3) { System.out.println("Usage: <jar file> <sequence filename(s)> " + "<desired number of output sequence files> " + "<ABSOLUTE path to hdfs locaiton where the output folder " + "will automatically be created>"); System.exit(0); } int numOutputFiles = Integer.parseInt(args[args.length - 2]); if (numOutputFiles < 1) { // someone is screwing around numOutputFiles = 1; } String absPath = args[args.length - 1]; if (absPath.charAt(absPath.length() - 1) != '/') { absPath += "/"; } DateFormat dateFormat = new SimpleDateFormat("ddMMyyyyHHmmss"); Date date = new Date(); String baseOutputName = absPath + "SeqReader" + dateFormat.format(date); Configuration conf = new Configuration(); conf.set("BASE_OUTPUT_FILE_NAME", baseOutputName); conf.set("NUM_REDUCERS", Integer.toString(numOutputFiles)); Job job = Job.getInstance(conf); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(SequenceFileToImageMapper.class); job.setReducerClass(SequenceFileToImageReducer.class); job.setPartitionerClass(SequenceFileToImagePartitioner.class); job.setInputFormatClass(SequenceFileInputFormat.class); // job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(numOutputFiles); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); for (int i = 0; i < args.length - 2; i++) { // FileInputFormat.setInputPaths(job, new Path(args[i])); MultipleInputs.addInputPath(job, new Path(args[i]), SequenceFileInputFormat.class); } for (int i = 0; i < numOutputFiles; i++) { MultipleOutputs.addNamedOutput(job, "n" + Integer.toString(i), SequenceFileOutputFormat.class, Text.class, Text.class); } job.setJarByClass(SeqReader.class); FileOutputFormat.setOutputPath(job, new Path(baseOutputName)); /* write the output folder location * to a file in the destination folder */ Path f = new Path(absPath + "SeqReader.outputlocation"); FileSystem fs = FileSystem.get(conf); if (fs.exists(f)) { // File already exists. // Delete the file before proceeding. fs.delete(f, true); } FSDataOutputStream os = fs.create(f); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); br.write(baseOutputName); br.close(); System.exit(job.waitForCompletion(true) ? 0 : 1); }
/** * The MapReduce physical operator * @param map_fnc the mapper function * @param combine_fnc optional in-mapper combiner function * @param reduce_fnc the reducer function * @param acc_fnc optional accumulator function * @param zero optional the zero value for the accumulator * @param source the input data source * @param num_reduces number of reducers * @param stop_counter optional counter used in repeat operation * @param orderp does the result need to be ordered? * @return a new data source that contains the result */ public final static DataSet mapReduce ( Tree map_fnc, // mapper function Tree combine_fnc, // optional in-mapper combiner function Tree reduce_fnc, // reducer function Tree acc_fnc, // optional accumulator function Tree zero, // optional the zero value for the accumulator DataSet source, // input data source int num_reduces, // number of reducers String stop_counter, // optional counter used in repeat operation boolean orderp ) // does the result need to be ordered? throws Exception { conf = MapReduceEvaluator.clear_configuration(conf); String newpath = new_path(conf); conf.set("mrql.mapper",map_fnc.toString()); if (combine_fnc != null) conf.set("mrql.combiner",combine_fnc.toString()); conf.set("mrql.reducer",reduce_fnc.toString()); if (zero != null) { // will use in-mapper combiner conf.set("mrql.accumulator",acc_fnc.toString()); conf.set("mrql.zero",zero.toString()); } else conf.set("mrql.zero",""); conf.set("mrql.counter",stop_counter); setupSplits(source,conf); Job job = new Job(conf,newpath); distribute_compiled_arguments(job.getConfiguration()); job.setJarByClass(MapReducePlan.class); job.setOutputKeyClass(MRContainer.class); job.setOutputValueClass(MRContainer.class); job.setPartitionerClass(MRContainerPartitioner.class); job.setSortComparatorClass(MRContainerKeyComparator.class); job.setGroupingComparatorClass(MRContainerKeyComparator.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); for (DataSource p: source.source) MultipleInputs.addInputPath(job,new Path(p.path),(Class<? extends MapReduceMRQLFileInputFormat>)p.inputFormat,MRMapper.class); FileOutputFormat.setOutputPath(job,new Path(newpath)); job.setReducerClass(MRReducer.class); if (Config.trace && PlanGeneration.streamed_MapReduce_reducer(reduce_fnc)) System.out.println("Streamed MapReduce reducer"); if (num_reduces > 0) job.setNumReduceTasks(num_reduces); job.waitForCompletion(true); long c = (stop_counter.equals("-")) ? 0 : job.getCounters().findCounter("mrql",stop_counter).getValue(); DataSource s = new BinaryDataSource(newpath,conf); s.to_be_merged = orderp; return new DataSet(s,c,outputRecords(job)); }
public static void exe(DataCube dc) throws IOException, InterruptedException, ClassNotFoundException, ParseException { String datacubeName = dc.cuboid.cuboidName; String tableName = dc.tableName; // Path inputPath1 = new Path(datacubeName); Path inputPath1 = dc.cubePath; Path inputPath2 = new Path(tableName); Path outputPath = new Path(dc.outputPath); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); if (fs.exists(outputPath)) { fs.delete(outputPath); } Configuration config = HBaseConfiguration.create(); config.set("datacube", dc.serialize()); Job job = new Job(config, "Incre Querying Operator"); job.setJarByClass(IncreQueryOperator.class); // class that contains // mapper and reducer Scan scan = new Scan(); // Scan scan1 = new Scan(startDate.getTime(), endDate.getTime()); // Scan scan = new Scan(dc.cubeRefreshTime, dc.queryingTime); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs TableMapReduceUtil.initTableMapperJob(tableName, // input table scan, RealTimeScanMapper.class, // mapper class Text.class, // mapper output key FloatWritable.class, // mapper output value job); job.setReducerClass(CubeAggregationReducer.class); // reducer class job.setOutputFormatClass(TextOutputFormat.class); MultipleInputs.addInputPath(job, inputPath1, TextInputFormat.class, CubeScanMapper.class); MultipleInputs.addInputPath(job, inputPath2, TableInputFormat.class, RealTimeScanMapper.class); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); }
public static void runMaxFsmJob() throws IOException, InterruptedException, ClassNotFoundException { Type outputType = commonConfig.getType(); int numberOfReducers = commonConfig.getNumberOfReducers(); Job mCJob = new Job(); mCJob.setJarByClass(MaxFsmJob.class); mCJob.setJobName("MG-FSM+"); mCJob.getConfiguration().setEnum("org.apache.mahout.fsm.partitioning.outputType", outputType); MultipleInputs.addInputPath(mCJob, new Path(commonConfig.getTmpPath()), SequenceFileInputFormat.class, MaxFsmMapper.class); MultipleInputs.addInputPath(mCJob, commonConfig.getFlistPath(), SequenceFileInputFormat.class, MaxFsmMapper.class); FileOutputFormat.setOutputPath(mCJob, new Path(commonConfig.getOutputPath())); mCJob.setSortComparatorClass(BytesWritable.Comparator.class); mCJob.setOutputFormatClass(SequenceFileOutputFormat.class); mCJob.setCombinerClass(MaxFsmCombiner.class); mCJob.setReducerClass(MaxFsmReducer.class); mCJob.setMapOutputKeyClass(BytesWritable.class); mCJob.setMapOutputValueClass(LongWritable.class); mCJob.setNumReduceTasks(numberOfReducers); mCJob.getConfiguration().set("mapreduce.cluster.reducememory.mb", "4096"); mCJob.setOutputKeyClass(IntArrayWritable.class); mCJob.setOutputValueClass(LongWritable.class); mCJob.waitForCompletion(true); while (!mCJob.isComplete()) { } }