Java 类org.apache.hadoop.mapreduce.lib.input.TextInputFormat 实例源码
项目:Wikipedia-Index
文件:DF.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job =Job.getInstance(conf);
job.setJobName("DocumentFrequencyCount");
job.setJarByClass(DF.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(DFMap.class);
job.setReducerClass(DFReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index
文件:MaxThreeLabel.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job =Job.getInstance(conf);
job.setJobName("MaxThreeLabel");
job.setJarByClass(MaxThreeLabel.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(TextArrayWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(MaxThreeLabelMap.class);
job.setReducerClass(MaxThreeLabelReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index
文件:TF_IDF.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job =Job.getInstance(conf);
job.setJobName("TF-IDFCount");
job.setJarByClass(TF_IDF.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(TextArrayWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(TF_IDFMap.class);
job.setReducerClass(TF_IDFReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.addInputPath(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
项目:hadoop
文件:FailJob.java
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile)
throws IOException {
Configuration conf = getConf();
conf.setBoolean(FAIL_MAP, failMappers);
conf.setBoolean(FAIL_REDUCE, failReducers);
Job job = Job.getInstance(conf, "fail");
job.setJarByClass(FailJob.class);
job.setMapperClass(FailMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(FailReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(TextInputFormat.class);
job.setSpeculativeExecution(false);
job.setJobName("Fail job");
FileInputFormat.addInputPath(job, inputFile);
return job;
}
项目:hadoop
文件:TestMapReduceLazyOutput.java
private static void runTestLazyOutput(Configuration conf, Path output,
int numReducers, boolean createLazily)
throws Exception {
Job job = Job.getInstance(conf, "Test-Lazy-Output");
FileInputFormat.setInputPaths(job, INPUT);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(TestMapReduceLazyOutput.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(numReducers);
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
if (createLazily) {
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
} else {
job.setOutputFormatClass(TextOutputFormat.class);
}
assertTrue(job.waitForCompletion(true));
}
项目:hadoop
文件:UserNamePermission.java
public static void main(String [] args) throws Exception
{
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "user name check");
job.setJarByClass(UserNamePermission.class);
job.setMapperClass(UserNamePermission.UserNameMapper.class);
job.setCombinerClass(UserNamePermission.UserNameReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(UserNamePermission.UserNameReducer.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:hadoop
文件:FieldSelectionMapper.java
public void setup(Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
this.fieldSeparator =
conf.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "\t");
this.mapOutputKeyValueSpec =
conf.get(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:");
try {
this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals(
context.getInputFormatClass().getCanonicalName());
} catch (ClassNotFoundException e) {
throw new IOException("Input format class not found", e);
}
allMapValueFieldsFrom = FieldSelectionHelper.parseOutputKeyValueSpec(
mapOutputKeyValueSpec, mapOutputKeyFieldList, mapOutputValueFieldList);
LOG.info(FieldSelectionHelper.specToString(fieldSeparator,
mapOutputKeyValueSpec, allMapValueFieldsFrom, mapOutputKeyFieldList,
mapOutputValueFieldList) + "\nignoreInputKey:" + ignoreInputKey);
}
项目:mapreduce-samples
文件:Multiplication.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(Multiplication.class);
ChainMapper.addMapper(job, CooccurrenceMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf);
ChainMapper.addMapper(job, RatingMapper.class, Text.class, Text.class, Text.class, Text.class, conf);
job.setMapperClass(CooccurrenceMapper.class);
job.setMapperClass(RatingMapper.class);
job.setReducerClass(MultiplicationReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, CooccurrenceMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, RatingMapper.class);
TextOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
项目:mapreduce-samples
文件:Sum.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(SumMapper.class);
job.setReducerClass(SumReducer.class);
job.setJarByClass(Sum.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
TextInputFormat.setInputPaths(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
项目:mapreduce-samples
文件:DataDividerByUser.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(DataDividerMapper.class);
job.setReducerClass(DataDividerReducer.class);
job.setJarByClass(DataDividerByUser.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
TextInputFormat.setInputPaths(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
项目:mapreduce-samples
文件:Normalize.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(NormalizeMapper.class);
job.setReducerClass(NormalizeReducer.class);
job.setJarByClass(Normalize.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
TextInputFormat.setInputPaths(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
项目:mapreduce-samples
文件:UnitMultiplication.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(UnitMultiplication.class);
ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
job.setReducerClass(MultiplicationReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
项目:mapreduce-samples
文件:UnitMultiplication.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.setFloat("beta", Float.parseFloat(args[3]));
Job job = Job.getInstance(conf);
job.setJarByClass(UnitMultiplication.class);
ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
job.setReducerClass(MultiplicationReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
项目:mapreduce-samples
文件:UnitSum.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.setFloat("beta", Float.parseFloat(args[3]));
Job job = Job.getInstance(conf);
job.setJarByClass(UnitSum.class);
ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);
job.setReducerClass(SumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
项目:big-data-benchmark
文件:HadoopWordCount.java
public static void main(String[] args) throws Exception {
BasicConfigurator.configure();
Configuration conf = new Configuration();
conf.setQuietMode(true);
Job job = Job.getInstance(conf, "WordCount");
job.setJarByClass(HadoopWordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + System.currentTimeMillis()));
long t = System.currentTimeMillis();
job.waitForCompletion(true);
System.out.println("TotalTime=" + (System.currentTimeMillis() - t));
}
项目:aliyun-oss-hadoop-fs
文件:FailJob.java
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile)
throws IOException {
Configuration conf = getConf();
conf.setBoolean(FAIL_MAP, failMappers);
conf.setBoolean(FAIL_REDUCE, failReducers);
Job job = Job.getInstance(conf, "fail");
job.setJarByClass(FailJob.class);
job.setMapperClass(FailMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(FailReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(TextInputFormat.class);
job.setSpeculativeExecution(false);
job.setJobName("Fail job");
FileInputFormat.addInputPath(job, inputFile);
return job;
}
项目:aliyun-oss-hadoop-fs
文件:TestMapReduceLazyOutput.java
private static void runTestLazyOutput(Configuration conf, Path output,
int numReducers, boolean createLazily)
throws Exception {
Job job = Job.getInstance(conf, "Test-Lazy-Output");
FileInputFormat.setInputPaths(job, INPUT);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(TestMapReduceLazyOutput.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(numReducers);
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
if (createLazily) {
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
} else {
job.setOutputFormatClass(TextOutputFormat.class);
}
assertTrue(job.waitForCompletion(true));
}
项目:aliyun-oss-hadoop-fs
文件:UserNamePermission.java
public static void main(String [] args) throws Exception
{
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "user name check");
job.setJarByClass(UserNamePermission.class);
job.setMapperClass(UserNamePermission.UserNameMapper.class);
job.setCombinerClass(UserNamePermission.UserNameReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(UserNamePermission.UserNameReducer.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:aliyun-oss-hadoop-fs
文件:FieldSelectionMapper.java
public void setup(Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
this.fieldSeparator =
conf.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "\t");
this.mapOutputKeyValueSpec =
conf.get(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:");
try {
this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals(
context.getInputFormatClass().getCanonicalName());
} catch (ClassNotFoundException e) {
throw new IOException("Input format class not found", e);
}
allMapValueFieldsFrom = FieldSelectionHelper.parseOutputKeyValueSpec(
mapOutputKeyValueSpec, mapOutputKeyFieldList, mapOutputValueFieldList);
LOG.info(FieldSelectionHelper.specToString(fieldSeparator,
mapOutputKeyValueSpec, allMapValueFieldsFrom, mapOutputKeyFieldList,
mapOutputValueFieldList) + "\nignoreInputKey:" + ignoreInputKey);
}
项目:mutation-server
文件:SortJob.java
@Override
public void setupJob(Job job) {
job.setJarByClass(SortJob.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(SortMap.class);
job.setReducerClass(SortReducer.class);
job.setMapOutputKeyClass(ReadKey.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setSortComparatorClass(ReadKeyComparator.class);
job.setPartitionerClass(ReadKeyPartitioner.class);
job.setGroupingComparatorClass(ReadKeyGroupingComparator.class);
}
项目:big-c
文件:FailJob.java
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile)
throws IOException {
Configuration conf = getConf();
conf.setBoolean(FAIL_MAP, failMappers);
conf.setBoolean(FAIL_REDUCE, failReducers);
Job job = Job.getInstance(conf, "fail");
job.setJarByClass(FailJob.class);
job.setMapperClass(FailMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(FailReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(TextInputFormat.class);
job.setSpeculativeExecution(false);
job.setJobName("Fail job");
FileInputFormat.addInputPath(job, inputFile);
return job;
}
项目:big-c
文件:TestMapReduceLazyOutput.java
private static void runTestLazyOutput(Configuration conf, Path output,
int numReducers, boolean createLazily)
throws Exception {
Job job = Job.getInstance(conf, "Test-Lazy-Output");
FileInputFormat.setInputPaths(job, INPUT);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(TestMapReduceLazyOutput.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(numReducers);
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
if (createLazily) {
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
} else {
job.setOutputFormatClass(TextOutputFormat.class);
}
assertTrue(job.waitForCompletion(true));
}
项目:big-c
文件:UserNamePermission.java
public static void main(String [] args) throws Exception
{
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "user name check");
job.setJarByClass(UserNamePermission.class);
job.setMapperClass(UserNamePermission.UserNameMapper.class);
job.setCombinerClass(UserNamePermission.UserNameReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(UserNamePermission.UserNameReducer.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:big-c
文件:FieldSelectionMapper.java
public void setup(Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
this.fieldSeparator =
conf.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "\t");
this.mapOutputKeyValueSpec =
conf.get(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:");
try {
this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals(
context.getInputFormatClass().getCanonicalName());
} catch (ClassNotFoundException e) {
throw new IOException("Input format class not found", e);
}
allMapValueFieldsFrom = FieldSelectionHelper.parseOutputKeyValueSpec(
mapOutputKeyValueSpec, mapOutputKeyFieldList, mapOutputValueFieldList);
LOG.info(FieldSelectionHelper.specToString(fieldSeparator,
mapOutputKeyValueSpec, allMapValueFieldsFrom, mapOutputKeyFieldList,
mapOutputValueFieldList) + "\nignoreInputKey:" + ignoreInputKey);
}
项目:Bigdata
文件:Question4.java
public static void main(String args[]) throws IOException,InterruptedException, ClassNotFoundException, URISyntaxException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
conf.set("cachefile", otherArgs[0]);
if (otherArgs.length != 3) {
System.err.println("Usage: Question4 <cacheFile> <in> <out>");
System.exit(3);
}
Job job = new Job(conf, "Question4");
DistributedCache.addCacheFile(new URI(args[0]), conf);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
job.setJarByClass(Question4.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
job.waitForCompletion(true);
}
项目:hadoop-knn
文件:KNN.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "KNN");
job.setJarByClass(KNN.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setMapperClass(KnnMapper.class);
job.setReducerClass(KnnReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
项目:Data-Science-with-Hadoop
文件:ReduceJoin.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "Reduce-side join");
job.setJarByClass(ReduceJoin.class);
job.setReducerClass(ReduceJoinReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, SalesRecordMapper.class) ;
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, AccountRecordMapper.class) ;
// FileOutputFormat.setOutputPath(job, new Path(args[2]));
Path outputPath = new Path(args[2]);
FileOutputFormat.setOutputPath(job, outputPath);
outputPath.getFileSystem(conf).delete(outputPath);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:accumulo-examples
文件:WordCount.java
@Override
public int run(String[] args) throws Exception {
Opts opts = new Opts();
opts.parseArgs(WordCount.class.getName(), args);
Job job = Job.getInstance(getConf());
job.setJobName(WordCount.class.getName());
job.setJarByClass(this.getClass());
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, new Path(opts.inputDirectory));
job.setMapperClass(MapClass.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(AccumuloOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Mutation.class);
opts.setAccumuloConfigs(job);
job.waitForCompletion(true);
return 0;
}
项目:envelope
文件:TestFileSystemInput.java
@Test
public void readInputFormat() throws Exception {
Map<String, Object> paramMap = new HashMap<>();
paramMap.put(FileSystemInput.FORMAT_CONFIG, "input-format");
paramMap.put(FileSystemInput.PATH_CONFIG, FileSystemInput.class.getResource(CSV_DATA).getPath());
paramMap.put(FileSystemInput.INPUT_FORMAT_TYPE_CONFIG, TextInputFormat.class.getCanonicalName());
paramMap.put(FileSystemInput.INPUT_FORMAT_KEY_CONFIG, LongWritable.class.getCanonicalName());
paramMap.put(FileSystemInput.INPUT_FORMAT_VALUE_CONFIG, Text.class.getCanonicalName());
paramMap.put("translator.type", DummyInputFormatTranslator.class.getCanonicalName());
config = ConfigFactory.parseMap(paramMap);
FileSystemInput formatInput = new FileSystemInput();
formatInput.configure(config);
Dataset<Row> results = formatInput.read();
assertEquals("Invalid number of rows", 4, results.count());
assertEquals("Invalid first row result", 0L, results.first().getLong(0));
assertEquals("Invalid first row result", "One,Two,Three,Four", results.first().getString(1));
}
项目:TopPI
文件:TopPIoverHadoop.java
private boolean bigItemCount(String output) throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(this.getConf(), "Counting items from " + this.input);
job.setJarByClass(TopPIoverHadoop.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(this.input));
FileOutputFormat.setOutputPath(job, new Path(output));
job.setMapperClass(ItemBigCountingMapper.class);
job.setReducerClass(ItemBigCountingReducer.class);
boolean success = job.waitForCompletion(true);
if (success) {
Counter rebasingMaxID = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS);
this.getConf().setInt(KEY_REBASING_MAX_ID, (int) rebasingMaxID.getValue());
}
return success;
}
项目:TopPI
文件:TopPIoverHadoop.java
private boolean filterInput(String output, String rebasingMapPath) throws IOException, ClassNotFoundException,
InterruptedException {
Job job = Job.getInstance(this.getConf(), "Computing items remapping for " + this.input);
job.setJarByClass(TopPIoverHadoop.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(ConcatenatedTransactionsWritable.class);
DistCache.copyToCache(job, rebasingMapPath);
FileInputFormat.addInputPath(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
job.setMapperClass(FilteringMapper.class);
job.setNumReduceTasks(0);
return job.waitForCompletion(true);
}
项目:geomesa-tutorials
文件:GDELTIngest.java
private static void runMapReduceJob(String featureName,
Map<String, String> dsConf,
Path mapredCSVFilePath) throws Exception {
Job job = Job.getInstance(new Configuration());
job.setJobName("GeoMesa GDELT Ingest");
job.setJarByClass(GDELTIngest.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(GDELTIngestMapper.class);
job.setOutputFormatClass(GeoMesaOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(SimpleFeature.class);
job.setNumReduceTasks(0);
FileInputFormat.setInputPaths(job, mapredCSVFilePath);
GeoMesaOutputFormat.configureDataStore(job, dsConf);
job.getConfiguration().set(FEATURE_NAME, featureName);
job.submit();
if (!job.waitForCompletion(true)) {
throw new Exception("Job failed");
}
}
项目:flink
文件:WordCount.java
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: WordCount <input path> <result path>");
return;
}
final String inputPath = args[0];
final String outputPath = args[1];
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Set up the Hadoop Input Format
Job job = Job.getInstance();
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
TextInputFormat.addInputPath(job, new Path(inputPath));
// Create a Flink job with it
DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
// Tokenize the line and convert from Writable "Text" to String for better handling
DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
// Sum up the words
DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
// Convert String back to Writable "Text" for use with Hadoop Output Format
DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
// Set up Hadoop Output Format
HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
TextOutputFormat.setOutputPath(job, new Path(outputPath));
// Output & Execute
hadoopResult.output(hadoopOutputFormat);
env.execute("Word Count");
}
项目:hadoop-2.6.0-cdh5.4.3
文件:FailJob.java
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile)
throws IOException {
Configuration conf = getConf();
conf.setBoolean(FAIL_MAP, failMappers);
conf.setBoolean(FAIL_REDUCE, failReducers);
Job job = Job.getInstance(conf, "fail");
job.setJarByClass(FailJob.class);
job.setMapperClass(FailMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(FailReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(TextInputFormat.class);
job.setSpeculativeExecution(false);
job.setJobName("Fail job");
FileInputFormat.addInputPath(job, inputFile);
return job;
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TestMapReduceLazyOutput.java
private static void runTestLazyOutput(Configuration conf, Path output,
int numReducers, boolean createLazily)
throws Exception {
Job job = Job.getInstance(conf, "Test-Lazy-Output");
FileInputFormat.setInputPaths(job, INPUT);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(TestMapReduceLazyOutput.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(numReducers);
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
if (createLazily) {
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
} else {
job.setOutputFormatClass(TextOutputFormat.class);
}
assertTrue(job.waitForCompletion(true));
}
项目:hadoop-2.6.0-cdh5.4.3
文件:UserNamePermission.java
public static void main(String [] args) throws Exception
{
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = new Job(conf, "user name check");
job.setJarByClass(UserNamePermission.class);
job.setMapperClass(UserNamePermission.UserNameMapper.class);
job.setCombinerClass(UserNamePermission.UserNameReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(UserNamePermission.UserNameReducer.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:hadoop-2.6.0-cdh5.4.3
文件:FieldSelectionMapper.java
public void setup(Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
this.fieldSeparator =
conf.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "\t");
this.mapOutputKeyValueSpec =
conf.get(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:");
try {
this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals(
context.getInputFormatClass().getCanonicalName());
} catch (ClassNotFoundException e) {
throw new IOException("Input format class not found", e);
}
allMapValueFieldsFrom = FieldSelectionHelper.parseOutputKeyValueSpec(
mapOutputKeyValueSpec, mapOutputKeyFieldList, mapOutputValueFieldList);
LOG.info(FieldSelectionHelper.specToString(fieldSeparator,
mapOutputKeyValueSpec, allMapValueFieldsFrom, mapOutputKeyFieldList,
mapOutputValueFieldList) + "\nignoreInputKey:" + ignoreInputKey);
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TestMapReduceLazyOutput.java
private static void runTestLazyOutput(Configuration conf, Path output,
int numReducers, boolean createLazily)
throws Exception {
Job job = new Job(conf, "Test-Lazy-Output");
FileInputFormat.setInputPaths(job, INPUT);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(TestMapReduceLazyOutput.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(numReducers);
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
if (createLazily) {
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
} else {
job.setOutputFormatClass(TextOutputFormat.class);
}
assertTrue(job.waitForCompletion(true));
}
项目:hadoop-2.6.0-cdh5.4.3
文件:UserNamePermission.java
public static void main(String [] args) throws Exception
{
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = new Job(conf, "user name check");
job.setJarByClass(UserNamePermission.class);
job.setMapperClass(UserNamePermission.UserNameMapper.class);
job.setCombinerClass(UserNamePermission.UserNameReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(UserNamePermission.UserNameReducer.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:Hanhan-Hadoop-MapReduce
文件:WordCountImproved.java
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCountImproved.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(LongSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.addInputPath(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}