Java 类org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer 实例源码
项目:ditb
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = Job.getInstance(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:LCIndex-HBase-0.94.16
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
job.waitForCompletion(true);
}
项目:HIndex
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:hadoop2
文件:Main.java
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = super.getConf();
// TODO: Add to the configuration the postcode index in customer csv file
// TODO: Add to the configuration the referential file name
Job job = Job.getInstance(conf, JOB_NAME);
// TODO: Add the cache file URI to the job
job.setJarByClass(Main.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(CsvFieldCountMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(LongSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
项目:hadoop2
文件:Main.java
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = super.getConf();
conf.setInt(CsvFieldCountMapper.CSV_FIELD_IDX, 2);
conf.set(CsvFieldCountMapper.FILTER_CACHE_FILE_NAME, "fr_urban_postcodes.txt");
Job job = Job.getInstance(conf, JOB_NAME);
job.addCacheFile(new Path(args[0]).toUri());
job.setJarByClass(Main.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(CsvFieldCountMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(LongSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
项目:Hanhan-Hadoop-MapReduce
文件:WordCountImproved.java
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCountImproved.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(LongSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.addInputPath(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
项目:IRIndex
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
job.waitForCompletion(true);
}
项目:hbase
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = Job.getInstance(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:warcexamples
文件:ServerType.java
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = Job.getInstance(conf, "Extract server type");
job.setJarByClass(ServerType.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(ServerTypeExtracter.class);
job.setReducerClass(LongSumReducer.class);
job.setInputFormatClass(WarcInputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// Execute job and return status
return job.waitForCompletion(true) ? 0 : 1;
}
项目:PyroDB
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:c5
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
job.waitForCompletion(true);
}
项目:HBase-Research
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
job.waitForCompletion(true);
}
项目:hbase-0.94.8-qod
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
job.waitForCompletion(true);
}
项目:hbase-0.94.8-qod
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
job.waitForCompletion(true);
}
项目:DominoHBase
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
job.waitForCompletion(true);
}
项目:hindex
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
job.waitForCompletion(true);
}
项目:MapReduceJobs
文件:InverseWCJob.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
Job job = new Job(conf, "invwordcount");
job.setJarByClass(InverseWCJob.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(InverseWordCountMapper.class);
job.setReducerClass(LongSumReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
项目:common-crawl-mapreduce
文件:WATServerType.java
/**
* Builds and runs the Hadoop job.
* @return 0 if the Hadoop job completes successfully and 1 otherwise.
*/
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = getConf();
//
Job job = new Job(conf);
job.setJarByClass(WATServerType.class);
job.setNumReduceTasks(1);
String inputPath = "data/*.warc.wat.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
LOG.info("Input path: " + inputPath);
FileInputFormat.addInputPath(job, new Path(inputPath));
String outputPath = "/tmp/cc/";
FileSystem fs = FileSystem.newInstance(conf);
if (fs.exists(new Path(outputPath))) {
fs.delete(new Path(outputPath), true);
}
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(WARCFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(ServerTypeMap.ServerMapper.class);
job.setReducerClass(LongSumReducer.class);
if (job.waitForCompletion(true)) {
return 0;
} else {
return 1;
}
}
项目:common-crawl-mapreduce
文件:WETWordCount.java
/**
* Builds and runs the Hadoop job.
* @return 0 if the Hadoop job completes successfully and 1 otherwise.
*/
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = getConf();
//
Job job = new Job(conf);
job.setJarByClass(WETWordCount.class);
job.setNumReduceTasks(1);
String inputPath = "data/*.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
LOG.info("Input path: " + inputPath);
FileInputFormat.addInputPath(job, new Path(inputPath));
String outputPath = "/tmp/cc/";
FileSystem fs = FileSystem.newInstance(conf);
if (fs.exists(new Path(outputPath))) {
fs.delete(new Path(outputPath), true);
}
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(WARCFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(org.commoncrawl.examples.mapreduce.WordCounterMap.WordCountMapper.class);
// The reducer is quite useful in the word frequency task
job.setReducerClass(LongSumReducer.class);
if (job.waitForCompletion(true)) {
return 0;
} else {
return 1;
}
}
项目:common-crawl-mapreduce
文件:WARCTagCounter.java
/**
* Builds and runs the Hadoop job.
* @return 0 if the Hadoop job completes successfully and 1 otherwise.
*/
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = getConf();
//
Job job = new Job(conf);
job.setJarByClass(WARCTagCounter.class);
job.setNumReduceTasks(1);
String inputPath = "data/*.warc.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
LOG.info("Input path: " + inputPath);
FileInputFormat.addInputPath(job, new Path(inputPath));
String outputPath = "/tmp/cc/";
FileSystem fs = FileSystem.newInstance(conf);
if (fs.exists(new Path(outputPath))) {
fs.delete(new Path(outputPath), true);
}
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(WARCFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(TagCounterMap.TagCounterMapper.class);
job.setReducerClass(LongSumReducer.class);
return job.waitForCompletion(true) ? 0 : -1;
}
项目:LCIndex-HBase-0.94.16
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
TableMapReduceUtil.addDependencyJars(job);
// Add a Class from the hbase.jar so it gets registered too.
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
org.apache.hadoop.hbase.util.Bytes.class);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:HIndex
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf, opts);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.setInputPaths(job, inputDir);
// this is default, but be explicit about it just in case.
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
DescriptiveStatistics.class, // commons-math
ObjectMapper.class); // jackson-mapper-asl
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:hadoop2
文件:PostcodeMRTest.java
@Before
public void before() throws URISyntaxException {
CsvFieldCountMapper mapper = new CsvFieldCountMapper();
LongSumReducer<Text> combiner = new LongSumReducer<Text>();
LongSumReducer<Text> reducer = new LongSumReducer<Text>();
mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer, combiner);
Configuration conf = mapReduceDriver.getConfiguration();
conf.setInt(CsvFieldCountMapper.CSV_FIELD_IDX, 2);
conf.set(CsvFieldCountMapper.FILTER_CACHE_FILE_NAME, "fr_urban_postcodes.txt");
mapReduceDriver.addCacheFile(new File("target/test-classes/referential/fr_urban_postcodes.txt").toURI());
}
项目:hadoop2
文件:PostcodeMRTest.java
@Before
public void before() throws URISyntaxException {
CsvFieldCountMapper mapper = new CsvFieldCountMapper();
LongSumReducer<Text> combiner = new LongSumReducer<Text>();
LongSumReducer<Text> reducer = new LongSumReducer<Text>();
mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer, combiner);
Configuration conf = mapReduceDriver.getConfiguration();
conf.setInt(CsvFieldCountMapper.CSV_FIELD_IDX, 2);
}
项目:hadoop2
文件:PostcodeMRTest.java
@Before
public void before() throws URISyntaxException {
CsvFieldCountMapper mapper = new CsvFieldCountMapper();
LongSumReducer<Text> combiner = new LongSumReducer<Text>();
LongSumReducer<Text> reducer = new LongSumReducer<Text>();
mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer, combiner);
Configuration conf = mapReduceDriver.getConfiguration();
conf.setInt(CsvFieldCountMapper.CSV_FIELD_IDX, 2);
conf.set(CsvFieldCountMapper.FILTER_CACHE_FILE_NAME, "fr_urban_postcodes.txt");
mapReduceDriver.addCacheFile(new File("target/test-classes/referential/fr_urban_postcodes.txt").toURI());
}
项目:hadoop2
文件:PostcodeMRTest.java
@Before
public void before() throws URISyntaxException {
CsvFieldCountMapper mapper = new CsvFieldCountMapper();
LongSumReducer<Text> combiner = new LongSumReducer<Text>();
LongSumReducer<Text> reducer = new LongSumReducer<Text>();
mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer, combiner);
Configuration conf = mapReduceDriver.getConfiguration();
conf.setInt(CsvFieldCountMapper.CSV_FIELD_IDX, 2);
}
项目:cloudera-homework
文件:StringPairTestDriver.java
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.out.printf("Usage: " + this.getClass().getName() + " <input dir> <output dir>\n");
return -1;
}
Job job = new Job(getConf());
job.setJarByClass(StringPairTestDriver.class);
job.setJobName("Custom Writable Comparable");
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
/*
* LongSumReducer is a Hadoop API class that sums values into
* A LongWritable. It works with any key and value type, therefore
* supports the new StringPairWritable as a key type.
*/
job.setReducerClass(LongSumReducer.class);
job.setMapperClass(StringPairMapper.class);
/*
* Set the key output class for the job
*/
job.setOutputKeyClass(StringPairWritable.class);
/*
* Set the value output class for the job
*/
job.setOutputValueClass(LongWritable.class);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
项目:cloudera-homework
文件:StringPairTestDriver.java
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.out.printf("Usage: " + this.getClass().getName() + " <input dir> <output dir>\n");
return -1;
}
Job job = new Job(getConf());
job.setJarByClass(StringPairTestDriver.class);
job.setJobName("Custom Writable Comparable");
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
/*
* LongSumReducer is a Hadoop API class that sums values into
* A LongWritable. It works with any key and value type, therefore
* supports the new StringPairWritable as a key type.
*/
job.setReducerClass(LongSumReducer.class);
job.setMapperClass(StringPairMapper.class);
/*
* Set the key output class for the job
*/
/* TODO: implement */
/*
* Set the value output class for the job
*/
job.setOutputValueClass(LongWritable.class);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
项目:IRIndex
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
TableMapReduceUtil.addDependencyJars(job);
// Add a Class from the hbase.jar so it gets registered too.
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
org.apache.hadoop.hbase.util.Bytes.class);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:PyroDB
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf, opts);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.setInputPaths(job, inputDir);
// this is default, but be explicit about it just in case.
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
Histogram.class, // yammer metrics
ObjectMapper.class); // jackson-mapper-asl
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:c5
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
// Add a Class from the hbase.jar so it gets registered too.
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
org.apache.hadoop.hbase.util.Bytes.class);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:cc-warc-examples
文件:WATServerType.java
/**
* Builds and runs the Hadoop job.
* @return 0 if the Hadoop job completes successfully and 1 otherwise.
*/
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = getConf();
//
Job job = new Job(conf);
job.setJarByClass(WATServerType.class);
job.setNumReduceTasks(1);
String inputPath = "data/*.warc.wat.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
LOG.info("Input path: " + inputPath);
FileInputFormat.addInputPath(job, new Path(inputPath));
String outputPath = "/tmp/cc/";
FileSystem fs = FileSystem.newInstance(conf);
if (fs.exists(new Path(outputPath))) {
fs.delete(new Path(outputPath), true);
}
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(WARCFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(ServerTypeMap.ServerMapper.class);
job.setReducerClass(LongSumReducer.class);
if (job.waitForCompletion(true)) {
return 0;
} else {
return 1;
}
}
项目:cc-warc-examples
文件:WETWordCount.java
/**
* Builds and runs the Hadoop job.
* @return 0 if the Hadoop job completes successfully and 1 otherwise.
*/
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = getConf();
//
Job job = new Job(conf);
job.setJarByClass(WETWordCount.class);
job.setNumReduceTasks(1);
String inputPath = "data/*.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
LOG.info("Input path: " + inputPath);
FileInputFormat.addInputPath(job, new Path(inputPath));
String outputPath = "/tmp/cc/";
FileSystem fs = FileSystem.newInstance(conf);
if (fs.exists(new Path(outputPath))) {
fs.delete(new Path(outputPath), true);
}
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(WARCFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(WordCounterMap.WordCountMapper.class);
// The reducer is quite useful in the word frequency task
job.setReducerClass(LongSumReducer.class);
if (job.waitForCompletion(true)) {
return 0;
} else {
return 1;
}
}
项目:cc-warc-examples
文件:WARCTagCounter.java
/**
* Builds and runs the Hadoop job.
* @return 0 if the Hadoop job completes successfully and 1 otherwise.
*/
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = getConf();
//
Job job = new Job(conf);
job.setJarByClass(WARCTagCounter.class);
job.setNumReduceTasks(1);
String inputPath = "data/*.warc.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
LOG.info("Input path: " + inputPath);
FileInputFormat.addInputPath(job, new Path(inputPath));
String outputPath = "/tmp/cc/";
FileSystem fs = FileSystem.newInstance(conf);
if (fs.exists(new Path(outputPath))) {
fs.delete(new Path(outputPath), true);
}
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(WARCFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(TagCounterMap.TagCounterMapper.class);
job.setReducerClass(LongSumReducer.class);
return job.waitForCompletion(true) ? 0 : -1;
}
项目:HBase-Research
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
TableMapReduceUtil.addDependencyJars(job);
// Add a Class from the hbase.jar so it gets registered too.
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
org.apache.hadoop.hbase.util.Bytes.class);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:hbase-0.94.8-qod
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
TableMapReduceUtil.addDependencyJars(job);
// Add a Class from the hbase.jar so it gets registered too.
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
org.apache.hadoop.hbase.util.Bytes.class);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:hbase-0.94.8-qod
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
TableMapReduceUtil.addDependencyJars(job);
// Add a Class from the hbase.jar so it gets registered too.
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
org.apache.hadoop.hbase.util.Bytes.class);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:logparser
文件:Wordcount.java
@Override
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
return 2;
}
conf.set("nl.basjes.parse.apachehttpdlogline.format", logFormat);
// A ',' separated list of fields
conf.set("nl.basjes.parse.apachehttpdlogline.fields",
"STRING:request.status.last");
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(Wordcount.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
job.setInputFormatClass(ApacheHttpdLogfileInputFormat.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(LongSumReducer.class);
// configuration should contain reference to your namenode
FileSystem fs = FileSystem.get(conf);
// true stands for recursively deleting the folder you gave
Path outputPath = new Path(otherArgs[1]);
fs.delete(outputPath, true);
FileOutputFormat.setOutputPath(job, outputPath);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
if (job.waitForCompletion(true)) {
return 0;
}
return 1;
}
项目:DominoHBase
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
// Add a Class from the hbase.jar so it gets registered too.
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
org.apache.hadoop.hbase.util.Bytes.class);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
项目:hindex
文件:PerformanceEvaluation.java
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Path inputDir = writeInputFile(this.conf);
this.conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
this.conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(this.conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir,"outputs"));
TableMapReduceUtil.addDependencyJars(job);
// Add a Class from the hbase.jar so it gets registered too.
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
org.apache.hadoop.hbase.util.Bytes.class);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}