Java 类org.apache.hadoop.mapreduce.lib.input.FileInputFormat 实例源码
项目:aliyun-maxcompute-data-collectors
文件:TestImportJob.java
@Override
public void configureInputFormat(Job job, String tableName,
String tableClassName, String splitByCol)
throws ClassNotFoundException, IOException {
// Write a line of text into a file so that we can get
// a record to the map task.
Path dir = new Path(this.options.getTempDir());
Path p = new Path(dir, "sqoop-dummy-import-job-file.txt");
FileSystem fs = FileSystem.getLocal(this.options.getConf());
if (fs.exists(p)) {
boolean result = fs.delete(p, false);
assertTrue("Couldn't delete temp file!", result);
}
BufferedWriter w = new BufferedWriter(
new OutputStreamWriter(fs.create(p)));
w.append("This is a line!");
w.close();
FileInputFormat.addInputPath(job, p);
// And set the InputFormat itself.
super.configureInputFormat(job, tableName, tableClassName, splitByCol);
}
项目:LDA
文件:CalParamDriver.java
public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException {
String jobName = "calculating parameter";
conf.set("params",String.valueOf(params));
Job job = new Job(conf, jobName);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(indexToCountWritable.class);
job.setOutputKeyClass(twoDimensionIndexWritable.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(CalParamsMapper.class);
job.setReducerClass(CalParamsReducer.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job,output);
job.setJarByClass(LDADriver.class);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("calculating parameter failed");
}
}
项目:DocIT
文件:Cut.java
public static void cut(String name, String in) throws Exception {
Job job = createJob(name, in);
job.setNumReduceTasks(0);
boolean success = job.waitForCompletion(true);
if (success) {
// MapReduce reads an input and writes the result to an new location.
// Hence it can not modify data in place, and we have to replace the input
// with the output
Path output = FileOutputFormat.getOutputPath(job);
FileSystem fs = output.getFileSystem(job.getConfiguration());
Path[] inputs = FileInputFormat.getInputPaths(job);
for (int i = 0; i < inputs.length; i++) {
fs.delete(inputs[i], true);
}
fs.rename(output, inputs[0]);
}
}
项目:Wikipedia-Index
文件:TF.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("xmlinput.start", "<page>");
conf.set("xmlinput.end", "</page>");
Job job =Job.getInstance(conf);
job.setJobName("TermFrequencyCount");
job.setJarByClass(TF.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntArrayWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(TFMap.class);
job.setReducerClass(TFReduce.class);
job.setInputFormatClass(XmlInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index
文件:ExtractPage.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("xmlinput.start", "<page>");
conf.set("xmlinput.end", "</page>");
Job job =Job.getInstance(conf);
job.setJobName("ExrtactPages");
job.setJarByClass(ExtractPage.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(ExtractPageMap.class);
job.setReducerClass(ExtractPageReduce.class);
job.setInputFormatClass(XmlInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index
文件:DF.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job =Job.getInstance(conf);
job.setJobName("DocumentFrequencyCount");
job.setJarByClass(DF.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(DFMap.class);
job.setReducerClass(DFReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index
文件:MaxThreeLabel.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job =Job.getInstance(conf);
job.setJobName("MaxThreeLabel");
job.setJarByClass(MaxThreeLabel.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(TextArrayWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(MaxThreeLabelMap.class);
job.setReducerClass(MaxThreeLabelReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
项目:Wikipedia-Index
文件:PageWordCount.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("xmlinput.start", "<page>");
conf.set("xmlinput.end", "</page>");
Job job =Job.getInstance(conf);
job.setJobName("PageWordCount");
job.setJarByClass(PageWordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(PageWordCountMap.class);
job.setCombinerClass(PageWordCountReduce.class);
job.setReducerClass(PageWordCountReduce.class);
job.setInputFormatClass(XmlInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
项目:Wikipedia-Index
文件:TF_IDF.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job =Job.getInstance(conf);
job.setJobName("TF-IDFCount");
job.setJarByClass(TF_IDF.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(TextArrayWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(TF_IDFMap.class);
job.setReducerClass(TF_IDFReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.addInputPath(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
项目:HBase-High-Performance-Cookbook
文件:InputDriver.java
public static void runJob(Path input, Path output, String vectorClassName,Configuration config)
throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = config;
conf.set("vector.implementation.class.name", vectorClassName);
Job job = new Job(conf, "Input Driver running over input: " + input);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(VectorWritable.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(InputMapper.class);
job.setNumReduceTasks(0);
job.setJarByClass(InputDriver.class);
FileInputFormat.addInputPath(job, input);
FileOutputFormat.setOutputPath(job, output);
job.waitForCompletion(true);
}
项目:Hadoop-Codes
文件:MaximumAverageDriver.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "maxaverage");
job.setMapperClass(MaximumAverageMapper.class);
job.setReducerClass(MaximumAverageReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
项目:Hadoop-Codes
文件:testDriver.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "test");
job.setMapperClass(testMapper.class);
job.setPartitionerClass(testPartitioner.class);
job.setReducerClass(testReducer.class);
job.setNumReduceTasks(10);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
项目:Hadoop-Codes
文件:SystemUserDriver.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "systemuser");
//job.setMapperClass(SystemUserMapper.class);
job.setMapperClass(DailyCount.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
项目:Hadoop-Codes
文件:MaxTempDriver.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "maxtemp");
job.setMapperClass(MaxTempMapper.class);
job.setReducerClass(MaxTempReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
项目:aliyun-maxcompute-data-collectors
文件:ExportJobBase.java
@Override
protected void configureInputFormat(Job job, String tableName,
String tableClassName, String splitByCol)
throws ClassNotFoundException, IOException {
if (options.getOdpsTable() != null) {
Configuration conf = job.getConfiguration();
setInputFormatClass(OdpsExportInputFormat.class);
conf.set(OdpsConstants.TABLE_NAME, options.getOdpsTable());
conf.set(OdpsConstants.ACCESS_ID, options.getOdpsAccessID());
conf.set(OdpsConstants.ACCESS_KEY, options.getOdpsAccessKey());
conf.set(OdpsConstants.ENDPOINT, options.getOdpsEndPoint());
conf.set(OdpsConstants.PROJECT, options.getOdpsProject());
String partitionSpec = options.getOdpsPartitionSpec();
if (partitionSpec != null) {
conf.set(OdpsConstants.PARTITION_SPEC, partitionSpec);
}
setMapperClass(OdpsExportMapper.class);
}
super.configureInputFormat(job, tableName, tableClassName, splitByCol);
if (!isHCatJob && options.getOdpsTable() == null) {
FileInputFormat.addInputPath(job, getInputPath());
}
}
项目:hadoop
文件:CredentialsTestJob.java
public Job createJob()
throws IOException {
Configuration conf = getConf();
conf.setInt(MRJobConfig.NUM_MAPS, 1);
Job job = Job.getInstance(conf, "test");
job.setNumReduceTasks(1);
job.setJarByClass(CredentialsTestJob.class);
job.setNumReduceTasks(1);
job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class);
job.setInputFormatClass(SleepJob.SleepInputFormat.class);
job.setPartitionerClass(SleepJob.SleepJobPartitioner.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setSpeculativeExecution(false);
job.setJobName("test job");
FileInputFormat.addInputPath(job, new Path("ignored"));
return job;
}
项目:hadoop
文件:FailJob.java
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile)
throws IOException {
Configuration conf = getConf();
conf.setBoolean(FAIL_MAP, failMappers);
conf.setBoolean(FAIL_REDUCE, failReducers);
Job job = Job.getInstance(conf, "fail");
job.setJarByClass(FailJob.class);
job.setMapperClass(FailMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(FailReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(TextInputFormat.class);
job.setSpeculativeExecution(false);
job.setJobName("Fail job");
FileInputFormat.addInputPath(job, inputFile);
return job;
}
项目:hadoop
文件:MapReduceTestUtil.java
/**
* Creates a simple copy job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a data copy job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createCopyJob(Configuration conf, Path outdir,
Path... indirs) throws Exception {
conf.setInt(MRJobConfig.NUM_MAPS, 3);
Job theJob = Job.getInstance(conf);
theJob.setJobName("DataMoveJob");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClass(DataCopyMapper.class);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClass(Text.class);
theJob.setOutputValueClass(Text.class);
theJob.setReducerClass(DataCopyReducer.class);
theJob.setNumReduceTasks(1);
return theJob;
}
项目:hadoop
文件:MapReduceTestUtil.java
/**
* Creates a simple fail job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a simple fail job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createFailJob(Configuration conf, Path outdir,
Path... indirs) throws Exception {
FileSystem fs = outdir.getFileSystem(conf);
if (fs.exists(outdir)) {
fs.delete(outdir, true);
}
conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
Job theJob = Job.getInstance(conf);
theJob.setJobName("Fail-Job");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClass(FailMapper.class);
theJob.setReducerClass(Reducer.class);
theJob.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClass(Text.class);
theJob.setOutputValueClass(Text.class);
return theJob;
}
项目:hadoop
文件:MapReduceTestUtil.java
public static Job createJob(Configuration conf, Path inDir, Path outDir,
int numInputFiles, int numReds, String input) throws IOException {
Job job = Job.getInstance(conf);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outDir)) {
fs.delete(outDir, true);
}
if (fs.exists(inDir)) {
fs.delete(inDir, true);
}
fs.mkdirs(inDir);
for (int i = 0; i < numInputFiles; ++i) {
DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
file.writeBytes(input);
file.close();
}
FileInputFormat.setInputPaths(job, inDir);
FileOutputFormat.setOutputPath(job, outDir);
job.setNumReduceTasks(numReds);
return job;
}
项目:hadoop
文件:TestMapReduceLazyOutput.java
private static void runTestLazyOutput(Configuration conf, Path output,
int numReducers, boolean createLazily)
throws Exception {
Job job = Job.getInstance(conf, "Test-Lazy-Output");
FileInputFormat.setInputPaths(job, INPUT);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(TestMapReduceLazyOutput.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(numReducers);
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
if (createLazily) {
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
} else {
job.setOutputFormatClass(TextOutputFormat.class);
}
assertTrue(job.waitForCompletion(true));
}
项目:hadoop
文件:SleepJob.java
public Job createJob(int numMapper, int numReducer,
long mapSleepTime, int mapSleepCount,
long reduceSleepTime, int reduceSleepCount)
throws IOException {
Configuration conf = getConf();
conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
Job job = Job.getInstance(conf, "sleep");
job.setNumReduceTasks(numReducer);
job.setJarByClass(SleepJob.class);
job.setMapperClass(SleepMapper.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(SleepReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(SleepInputFormat.class);
job.setPartitionerClass(SleepJobPartitioner.class);
job.setSpeculativeExecution(false);
job.setJobName("Sleep job");
FileInputFormat.addInputPath(job, new Path("ignored"));
return job;
}
项目:hadoop
文件:TestLocalRunner.java
/**
* Run a test with a misconfigured number of mappers.
* Expect failure.
*/
@Test
public void testInvalidMultiMapParallelism() throws Exception {
Job job = Job.getInstance();
Path inputPath = createMultiMapsInput();
Path outputPath = getOutputPath();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
job.setMapperClass(StressMapper.class);
job.setReducerClass(CountingReducer.class);
job.setNumReduceTasks(1);
LocalJobRunner.setLocalMaxRunningMaps(job, -6);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
boolean success = job.waitForCompletion(true);
assertFalse("Job succeeded somehow", success);
}
项目:hadoop
文件:LocatedFileStatusFetcher.java
/**
* @param conf configuration for the job
* @param dirs the initial list of paths
* @param recursive whether to traverse the patchs recursively
* @param inputFilter inputFilter to apply to the resulting paths
* @param newApi whether using the mapred or mapreduce API
* @throws InterruptedException
* @throws IOException
*/
public LocatedFileStatusFetcher(Configuration conf, Path[] dirs,
boolean recursive, PathFilter inputFilter, boolean newApi) throws InterruptedException,
IOException {
int numThreads = conf.getInt(FileInputFormat.LIST_STATUS_NUM_THREADS,
FileInputFormat.DEFAULT_LIST_STATUS_NUM_THREADS);
rawExec = Executors.newFixedThreadPool(
numThreads,
new ThreadFactoryBuilder().setDaemon(true)
.setNameFormat("GetFileInfo #%d").build());
exec = MoreExecutors.listeningDecorator(rawExec);
resultQueue = new LinkedBlockingQueue<List<FileStatus>>();
this.conf = conf;
this.inputDirs = dirs;
this.recursive = recursive;
this.inputFilter = inputFilter;
this.newApi = newApi;
}
项目:hadoop
文件:ControlledJob.java
/**
* Submit this job to mapred. The state becomes RUNNING if submission
* is successful, FAILED otherwise.
*/
protected synchronized void submit() {
try {
Configuration conf = job.getConfiguration();
if (conf.getBoolean(CREATE_DIR, false)) {
FileSystem fs = FileSystem.get(conf);
Path inputPaths[] = FileInputFormat.getInputPaths(job);
for (int i = 0; i < inputPaths.length; i++) {
if (!fs.exists(inputPaths[i])) {
try {
fs.mkdirs(inputPaths[i]);
} catch (IOException e) {
}
}
}
}
job.submit();
this.state = State.RUNNING;
} catch (Exception ioe) {
LOG.info(getJobName()+" got an error while submitting ",ioe);
this.state = State.FAILED;
this.message = StringUtils.stringifyException(ioe);
}
}
项目:hadoop
文件:TestFileInputFormat.java
@Test
public void testListLocatedStatus() throws Exception {
Configuration conf = getConfiguration();
conf.setBoolean("fs.test.impl.disable.cache", false);
conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
"test:///a1/a2");
MockFileSystem mockFs =
(MockFileSystem) new Path("test:///").getFileSystem(conf);
Assert.assertEquals("listLocatedStatus already called",
0, mockFs.numListLocatedStatusCalls);
JobConf job = new JobConf(conf);
TextInputFormat fileInputFormat = new TextInputFormat();
fileInputFormat.configure(job);
InputSplit[] splits = fileInputFormat.getSplits(job, 1);
Assert.assertEquals("Input splits are not correct", 2, splits.length);
Assert.assertEquals("listLocatedStatuss calls",
1, mockFs.numListLocatedStatusCalls);
FileSystem.closeAll();
}
项目:hadoop
文件:TestFileInputFormat.java
@Test
public void testSplitLocationInfo() throws Exception {
Configuration conf = getConfiguration();
conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
"test:///a1/a2");
JobConf job = new JobConf(conf);
TextInputFormat fileInputFormat = new TextInputFormat();
fileInputFormat.configure(job);
FileSplit[] splits = (FileSplit[]) fileInputFormat.getSplits(job, 1);
String[] locations = splits[0].getLocations();
Assert.assertEquals(2, locations.length);
SplitLocationInfo[] locationInfo = splits[0].getLocationInfo();
Assert.assertEquals(2, locationInfo.length);
SplitLocationInfo localhostInfo = locations[0].equals("localhost") ?
locationInfo[0] : locationInfo[1];
SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ?
locationInfo[0] : locationInfo[1];
Assert.assertTrue(localhostInfo.isOnDisk());
Assert.assertTrue(localhostInfo.isInMemory());
Assert.assertTrue(otherhostInfo.isOnDisk());
Assert.assertFalse(otherhostInfo.isInMemory());
}
项目:hadoop
文件:TestFileInputFormat.java
@Test
public void testListStatusSimple() throws IOException {
Configuration conf = new Configuration();
conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
.configureTestSimple(conf, localFs);
JobConf jobConf = new JobConf(conf);
TextInputFormat fif = new TextInputFormat();
fif.configure(jobConf);
FileStatus[] statuses = fif.listStatus(jobConf);
org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
.verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
localFs);
}
项目:hadoop
文件:TestFileInputFormat.java
@Test
public void testListStatusNestedRecursive() throws IOException {
Configuration conf = new Configuration();
conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
.configureTestNestedRecursive(conf, localFs);
JobConf jobConf = new JobConf(conf);
TextInputFormat fif = new TextInputFormat();
fif.configure(jobConf);
FileStatus[] statuses = fif.listStatus(jobConf);
org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
.verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
localFs);
}
项目:hadoop
文件:TestFileInputFormat.java
@Test
public void testListStatusNestedNonRecursive() throws IOException {
Configuration conf = new Configuration();
conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
List<Path> expectedPaths = org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
.configureTestNestedNonRecursive(conf, localFs);
JobConf jobConf = new JobConf(conf);
TextInputFormat fif = new TextInputFormat();
fif.configure(jobConf);
FileStatus[] statuses = fif.listStatus(jobConf);
org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
.verifyFileStatuses(expectedPaths, Lists.newArrayList(statuses),
localFs);
}
项目:hadoop
文件:TestFileInputFormat.java
@Test
public void testListStatusErrorOnNonExistantDir() throws IOException {
Configuration conf = new Configuration();
conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads);
org.apache.hadoop.mapreduce.lib.input.TestFileInputFormat
.configureTestErrorOnNonExistantDir(conf, localFs);
JobConf jobConf = new JobConf(conf);
TextInputFormat fif = new TextInputFormat();
fif.configure(jobConf);
try {
fif.listStatus(jobConf);
Assert.fail("Expecting an IOException for a missing Input path");
} catch (IOException e) {
Path expectedExceptionPath = new Path(TEST_ROOT_DIR, "input2");
expectedExceptionPath = localFs.makeQualified(expectedExceptionPath);
Assert.assertTrue(e instanceof InvalidInputException);
Assert.assertEquals(
"Input path does not exist: " + expectedExceptionPath.toString(),
e.getMessage());
}
}
项目:hadoop
文件:WordStandardDeviation.java
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordstddev <in> <out>");
return 0;
}
Configuration conf = getConf();
Job job = Job.getInstance(conf, "word stddev");
job.setJarByClass(WordStandardDeviation.class);
job.setMapperClass(WordStandardDeviationMapper.class);
job.setCombinerClass(WordStandardDeviationReducer.class);
job.setReducerClass(WordStandardDeviationReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outputpath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputpath);
boolean result = job.waitForCompletion(true);
// read output and calculate standard deviation
stddev = readAndCalcStdDev(outputpath, conf);
return (result ? 0 : 1);
}
项目:hadoop
文件:WordCount.java
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,
new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:hadoop
文件:WordMean.java
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordmean <in> <out>");
return 0;
}
Configuration conf = getConf();
Job job = Job.getInstance(conf, "word mean");
job.setJarByClass(WordMean.class);
job.setMapperClass(WordMeanMapper.class);
job.setCombinerClass(WordMeanReducer.class);
job.setReducerClass(WordMeanReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outputpath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputpath);
boolean result = job.waitForCompletion(true);
mean = readAndCalcMean(outputpath, conf);
return (result ? 0 : 1);
}
项目:hadoop
文件:TeraValidate.java
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf());
if (args.length != 2) {
usage();
return 1;
}
TeraInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJobName("TeraValidate");
job.setJarByClass(TeraValidate.class);
job.setMapperClass(ValidateMapper.class);
job.setReducerClass(ValidateReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// force a single reducer
job.setNumReduceTasks(1);
// force a single split
FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
job.setInputFormatClass(TeraInputFormat.class);
return job.waitForCompletion(true) ? 0 : 1;
}
项目:hadoop
文件:CompressionEmulationUtil.java
/**
* Configure the {@link Job} for enabling compression emulation.
*/
static void configure(final Job job) throws IOException, InterruptedException,
ClassNotFoundException {
// set the random text mapper
job.setMapperClass(RandomTextDataMapper.class);
job.setNumReduceTasks(0);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(GenDataFormat.class);
job.setJarByClass(GenerateData.class);
// set the output compression true
FileOutputFormat.setCompressOutput(job, true);
try {
FileInputFormat.addInputPath(job, new Path("ignored"));
} catch (IOException e) {
LOG.error("Error while adding input path ", e);
}
}
项目:hadoop
文件:GenerateDistCacheData.java
@Override
public Job call() throws IOException, InterruptedException,
ClassNotFoundException {
UserGroupInformation ugi = UserGroupInformation.getLoginUser();
ugi.doAs( new PrivilegedExceptionAction <Job>() {
public Job run() throws IOException, ClassNotFoundException,
InterruptedException {
job.setMapperClass(GenDCDataMapper.class);
job.setNumReduceTasks(0);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(BytesWritable.class);
job.setInputFormatClass(GenDCDataFormat.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setJarByClass(GenerateDistCacheData.class);
try {
FileInputFormat.addInputPath(job, new Path("ignored"));
} catch (IOException e) {
LOG.error("Error while adding input path ", e);
}
job.submit();
return job;
}
});
return job;
}
项目:learn-to-hadoop
文件:MaxTemperature.java
public static void main(String[] args) throws Exception {
if(args.length != 2){
System.err.println("Usage: MaxTemperature <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(MaxTemperature.class);
job.setJobName("Max temperature");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:learn-to-hadoop
文件:MaxTemperatureWithCombiner.java
public static void main(String[] args) throws Exception {
if(args.length != 2){
System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(MaxTemperatureWithCombiner.class);
job.setJobName("Max Temperature With Combiner");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MaxTemperatureMapper.class);
job.setCombinerClass(MaxTemperatureReducer.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
项目:LDA
文件:InputDriver.java
public static void runJob(Configuration conf, Path inputPath, Path output) throws IOException, ClassNotFoundException, InterruptedException {
Job job = new Job(conf, "Input Drive running input:"+inputPath);
log.info("start running InputDriver");
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(indexToWordWritable.class);
job.setOutputKeyClass(twoDimensionIndexWritable.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(InputMapper.class);
job.setReducerClass(InputReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setJarByClass(InputDriver.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, output);
boolean succeeded = job.waitForCompletion(true);
if (!succeeded) {
throw new IllegalStateException("Job failed!");
}
}