Java 类org.apache.hadoop.mapred.JobClient 实例源码
项目:hadoop
文件:NNBench.java
/**
* Run the test
*
* @throws IOException on error
*/
public static void runTests() throws IOException {
config.setLong("io.bytes.per.checksum", bytesPerChecksum);
JobConf job = new JobConf(config, NNBench.class);
job.setJobName("NNBench-" + operation);
FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);
// Explicitly set number of max map attempts to 1.
job.setMaxMapAttempts(1);
// Explicitly turn off speculative execution
job.setSpeculativeExecution(false);
job.setMapperClass(NNBenchMapper.class);
job.setReducerClass(NNBenchReducer.class);
FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks((int) numberOfReduces);
JobClient.runJob(job);
}
项目:ditb
文件:TestTableInputFormat.java
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
final JobConf job = MapreduceTestingShim.getJobConf(mrCluster);
job.setInputFormat(clazz);
job.setOutputFormat(NullOutputFormat.class);
job.setMapperClass(ExampleVerifier.class);
job.setNumReduceTasks(0);
LOG.debug("submitting job.");
final RunningJob run = JobClient.runJob(job);
assertTrue("job failed!", run.isSuccessful());
assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}
项目:hadoop
文件:TestDFSIO.java
private void runIOTest(
Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass,
Path outputDir) throws IOException {
JobConf job = new JobConf(config, TestDFSIO.class);
FileInputFormat.setInputPaths(job, getControlDir(config));
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(mapperClass);
job.setReducerClass(AccumulatingReducer.class);
FileOutputFormat.setOutputPath(job, outputDir);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
项目:hadoop
文件:TestDatamerge.java
private static void joinAs(String jointype,
Class<? extends SimpleCheckerBase> c) throws Exception {
final int srcs = 4;
Configuration conf = new Configuration();
JobConf job = new JobConf(conf, c);
Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
Path[] src = writeSimpleSrc(base, conf, srcs);
job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
SequenceFileInputFormat.class, src));
job.setInt("testdatamerge.sources", srcs);
job.setInputFormat(CompositeInputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(base, "out"));
job.setMapperClass(c);
job.setReducerClass(c);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
JobClient.runJob(job);
base.getFileSystem(job).delete(base, true);
}
项目:hadoop
文件:TestDatamerge.java
public void testEmptyJoin() throws Exception {
JobConf job = new JobConf();
Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer",
Fake_IF.class, src));
job.setInputFormat(CompositeInputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(base, "out"));
job.setMapperClass(IdentityMapper.class);
job.setReducerClass(IdentityReducer.class);
job.setOutputKeyClass(IncomparableKey.class);
job.setOutputValueClass(NullWritable.class);
JobClient.runJob(job);
base.getFileSystem(job).delete(base, true);
}
项目:hadoop
文件:GenericMRLoadGenerator.java
/**
* When no input dir is specified, generate random data.
*/
protected static void confRandom(Job job)
throws IOException {
// from RandomWriter
job.setInputFormatClass(RandomInputFormat.class);
job.setMapperClass(RandomMapOutput.class);
Configuration conf = job.getConfiguration();
final ClusterStatus cluster = new JobClient(conf).getClusterStatus();
int numMapsPerHost = conf.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
long numBytesToWritePerMap =
conf.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024);
if (numBytesToWritePerMap == 0) {
throw new IOException(
"Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
}
long totalBytesToWrite = conf.getLong(RandomTextWriter.TOTAL_BYTES,
numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);
if (numMaps == 0 && totalBytesToWrite > 0) {
numMaps = 1;
conf.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
}
conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
}
项目:hadoop
文件:TestMiniMRProxyUser.java
private void mrRun() throws Exception {
FileSystem fs = FileSystem.get(getJobConf());
Path inputDir = new Path("input");
fs.mkdirs(inputDir);
Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
writer.write("hello");
writer.close();
Path outputDir = new Path("output", "output");
JobConf jobConf = new JobConf(getJobConf());
jobConf.setInt("mapred.map.tasks", 1);
jobConf.setInt("mapred.map.max.attempts", 1);
jobConf.setInt("mapred.reduce.max.attempts", 1);
jobConf.set("mapred.input.dir", inputDir.toString());
jobConf.set("mapred.output.dir", outputDir.toString());
JobClient jobClient = new JobClient(jobConf);
RunningJob runJob = jobClient.submitJob(jobConf);
runJob.waitForCompletion();
assertTrue(runJob.isComplete());
assertTrue(runJob.isSuccessful());
}
项目:hadoop
文件:DistCpV1.java
/**
* Increase the replication factor of _distcp_src_files to
* sqrt(min(maxMapsOnCluster, numMaps)). This is to reduce the chance of
* failing of distcp because of "not having a replication of _distcp_src_files
* available for reading for some maps".
*/
private static void setReplication(Configuration conf, JobConf jobConf,
Path srcfilelist, int numMaps) throws IOException {
int numMaxMaps = new JobClient(jobConf).getClusterStatus().getMaxMapTasks();
short replication = (short) Math.ceil(
Math.sqrt(Math.min(numMaxMaps, numMaps)));
FileSystem fs = srcfilelist.getFileSystem(conf);
FileStatus srcStatus = fs.getFileStatus(srcfilelist);
if (srcStatus.getReplication() < replication) {
if (!fs.setReplication(srcfilelist, replication)) {
throw new IOException("Unable to increase the replication of file " +
srcfilelist);
}
}
}
项目:hadoop
文件:GenerateData.java
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
final JobClient client =
new JobClient(new JobConf(jobCtxt.getConfiguration()));
ClusterStatus stat = client.getClusterStatus(true);
final long toGen =
jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
if (toGen < 0) {
throw new IOException("Invalid/missing generation bytes: " + toGen);
}
final int nTrackers = stat.getTaskTrackers();
final long bytesPerTracker = toGen / nTrackers;
final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
final Matcher m = trackerPattern.matcher("");
for (String tracker : stat.getActiveTrackerNames()) {
m.reset(tracker);
if (!m.find()) {
System.err.println("Skipping node: " + tracker);
continue;
}
final String name = m.group(1);
splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
}
return splits;
}
项目:hadoop
文件:GenerateDistCacheData.java
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
final JobClient client = new JobClient(jobConf);
ClusterStatus stat = client.getClusterStatus(true);
int numTrackers = stat.getTaskTrackers();
final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);
// Total size of distributed cache files to be generated
final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
// Get the path of the special file
String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
throw new RuntimeException("Invalid metadata: #files (" + fileCount
+ "), total_size (" + totalSize + "), filelisturi ("
+ distCacheFileList + ")");
}
Path sequenceFile = new Path(distCacheFileList);
FileSystem fs = sequenceFile.getFileSystem(jobConf);
FileStatus srcst = fs.getFileStatus(sequenceFile);
// Consider the number of TTs * mapSlotsPerTracker as number of mappers.
int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
int numSplits = numTrackers * numMapSlotsPerTracker;
List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
LongWritable key = new LongWritable();
BytesWritable value = new BytesWritable();
// Average size of data to be generated by each map task
final long targetSize = Math.max(totalSize / numSplits,
DistributedCacheEmulator.AVG_BYTES_PER_MAP);
long splitStartPosition = 0L;
long splitEndPosition = 0L;
long acc = 0L;
long bytesRemaining = srcst.getLen();
SequenceFile.Reader reader = null;
try {
reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
while (reader.next(key, value)) {
// If adding this file would put this split past the target size,
// cut the last split and put this file in the next split.
if (acc + key.get() > targetSize && acc != 0) {
long splitSize = splitEndPosition - splitStartPosition;
splits.add(new FileSplit(
sequenceFile, splitStartPosition, splitSize, (String[])null));
bytesRemaining -= splitSize;
splitStartPosition = splitEndPosition;
acc = 0L;
}
acc += key.get();
splitEndPosition = reader.getPosition();
}
} finally {
if (reader != null) {
reader.close();
}
}
if (bytesRemaining != 0) {
splits.add(new FileSplit(
sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
}
return splits;
}
项目:hadoop
文件:TestCompressionEmulationUtils.java
/**
* Runs a GridMix data-generation job.
*/
private static void runDataGenJob(Configuration conf, Path tempDir)
throws IOException, ClassNotFoundException, InterruptedException {
JobClient client = new JobClient(conf);
// get the local job runner
conf.setInt(MRJobConfig.NUM_MAPS, 1);
Job job = Job.getInstance(conf);
CompressionEmulationUtil.configure(job);
job.setInputFormatClass(CustomInputFormat.class);
// set the output path
FileOutputFormat.setOutputPath(job, tempDir);
// submit and wait for completion
job.submit();
int ret = job.waitForCompletion(true) ? 0 : 1;
assertEquals("Job Failed", 0, ret);
}
项目:hadoop
文件:DataJoinJob.java
/**
* Submit/run a map/reduce job.
*
* @param job
* @return true for success
* @throws IOException
*/
public static boolean runJob(JobConf job) throws IOException {
JobClient jc = new JobClient(job);
boolean sucess = true;
RunningJob running = null;
try {
running = jc.submitJob(job);
JobID jobId = running.getID();
System.out.println("Job " + jobId + " is submitted");
while (!running.isComplete()) {
System.out.println("Job " + jobId + " is still running.");
try {
Thread.sleep(60000);
} catch (InterruptedException e) {
}
running = jc.getJob(jobId);
}
sucess = running.isSuccessful();
} finally {
if (!sucess && (running != null)) {
running.killJob();
}
jc.close();
}
return sucess;
}
项目:ditb
文件:TestTableMapReduceUtil.java
@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
Configuration cfg = UTIL.getConfiguration();
JobConf jobConf = new JobConf(cfg);
try {
jobConf.setJobName("process row task");
jobConf.setNumReduceTasks(1);
TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
jobConf);
TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
ClassificatorRowReduce.class, jobConf);
RunningJob job = JobClient.runJob(jobConf);
assertTrue(job.isSuccessful());
} finally {
if (jobConf != null)
FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
}
}
项目:ditb
文件:TestTableMapReduceUtil.java
@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceWithPartitionerEvaluation()
throws IOException {
Configuration cfg = UTIL.getConfiguration();
JobConf jobConf = new JobConf(cfg);
try {
jobConf.setJobName("process row task");
jobConf.setNumReduceTasks(2);
TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
jobConf);
TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class);
RunningJob job = JobClient.runJob(jobConf);
assertTrue(job.isSuccessful());
} finally {
if (jobConf != null)
FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
}
}
项目:ditb
文件:TestMultiTableSnapshotInputFormat.java
@Override
protected void runJob(String jobName, Configuration c, List<Scan> scans)
throws IOException, InterruptedException, ClassNotFoundException {
JobConf job = new JobConf(TEST_UTIL.getConfiguration());
job.setJobName(jobName);
job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
TableMapReduceUtil.addDependencyJars(job);
job.setReducerClass(Reducer.class);
job.setNumReduceTasks(1); // one to get final "first" and "last" key
FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
LOG.info("Started " + job.getJobName());
RunningJob runningJob = JobClient.runJob(job);
runningJob.waitForCompletion();
assertTrue(runningJob.isSuccessful());
LOG.info("After map/reduce completion - job " + jobName);
}
项目:aliyun-oss-hadoop-fs
文件:TestDFSIO.java
private void runIOTest(
Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass,
Path outputDir) throws IOException {
JobConf job = new JobConf(config, TestDFSIO.class);
FileInputFormat.setInputPaths(job, getControlDir(config));
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(mapperClass);
job.setReducerClass(AccumulatingReducer.class);
FileOutputFormat.setOutputPath(job, outputDir);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
项目:aliyun-oss-hadoop-fs
文件:TestDatamerge.java
public void testEmptyJoin() throws Exception {
JobConf job = new JobConf();
Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer",
Fake_IF.class, src));
job.setInputFormat(CompositeInputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(base, "out"));
job.setMapperClass(IdentityMapper.class);
job.setReducerClass(IdentityReducer.class);
job.setOutputKeyClass(IncomparableKey.class);
job.setOutputValueClass(NullWritable.class);
JobClient.runJob(job);
base.getFileSystem(job).delete(base, true);
}
项目:aliyun-oss-hadoop-fs
文件:NNBench.java
/**
* Run the test
*
* @throws IOException on error
*/
public static void runTests() throws IOException {
config.setLong("io.bytes.per.checksum", bytesPerChecksum);
JobConf job = new JobConf(config, NNBench.class);
job.setJobName("NNBench-" + operation);
FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);
// Explicitly set number of max map attempts to 1.
job.setMaxMapAttempts(1);
// Explicitly turn off speculative execution
job.setSpeculativeExecution(false);
job.setMapperClass(NNBenchMapper.class);
job.setReducerClass(NNBenchReducer.class);
FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks((int) numberOfReduces);
JobClient.runJob(job);
}
项目:emr-dynamodb-connector
文件:ReadIopsCalculator.java
public ReadIopsCalculator(JobClient jobClient, DynamoDBClient dynamoDBClient, String tableName,
int totalSegments, int localSegments) {
this.jobConf = (JobConf) jobClient.getConf();
this.jobClient = jobClient;
this.dynamoDBClient = dynamoDBClient;
this.tableName = tableName;
this.totalSegments = totalSegments;
this.localSegments = localSegments;
this.throughputPercent = Double.parseDouble(jobConf.get(DynamoDBConstants
.THROUGHPUT_READ_PERCENT, DynamoDBConstants.DEFAULT_THROUGHPUT_PERCENTAGE));
log.info("Table name: " + tableName);
log.info("Throughput percent: " + throughputPercent);
}
项目:aliyun-oss-hadoop-fs
文件:TestCompressionEmulationUtils.java
/**
* Runs a GridMix data-generation job.
*/
private static void runDataGenJob(Configuration conf, Path tempDir)
throws IOException, ClassNotFoundException, InterruptedException {
JobClient client = new JobClient(conf);
// get the local job runner
conf.setInt(MRJobConfig.NUM_MAPS, 1);
Job job = Job.getInstance(conf);
CompressionEmulationUtil.configure(job);
job.setInputFormatClass(CustomInputFormat.class);
// set the output path
FileOutputFormat.setOutputPath(job, tempDir);
// submit and wait for completion
job.submit();
int ret = job.waitForCompletion(true) ? 0 : 1;
assertEquals("Job Failed", 0, ret);
}
项目:big-c
文件:GenerateData.java
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
final JobClient client =
new JobClient(new JobConf(jobCtxt.getConfiguration()));
ClusterStatus stat = client.getClusterStatus(true);
final long toGen =
jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
if (toGen < 0) {
throw new IOException("Invalid/missing generation bytes: " + toGen);
}
final int nTrackers = stat.getTaskTrackers();
final long bytesPerTracker = toGen / nTrackers;
final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
final Matcher m = trackerPattern.matcher("");
for (String tracker : stat.getActiveTrackerNames()) {
m.reset(tracker);
if (!m.find()) {
System.err.println("Skipping node: " + tracker);
continue;
}
final String name = m.group(1);
splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
}
return splits;
}
项目:THUTag
文件:ImportDouban.java
@Override
public void run(String[] args) throws Exception {
Flags flags = new Flags();
flags.addWithDefaultValue(
"tag_subject_data", "/media/work/datasets(secret)/douban/raw/tag_subject.dat", "");
flags.addWithDefaultValue(
"subject_data", "/media/work/datasets(secret)/douban/raw/subject.dat", "");
flags.add("output");
flags.parseAndCheck(args);
JobConf job = new JobConf(this.getClass());
job.setJobName("convert-douban-raw-to-posts");
MapReduceHelper.setAllOutputTypes(job, Text.class);
MapReduceHelper.setMR(
job, DoubanRawMapper.class, DoubanToPostReducer.class);
job.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(
job, new Path(flags.getString("tag_subject_data")));
TextInputFormat.addInputPath(
job, new Path(flags.getString("subject_data")));
job.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(
job, new Path(flags.getString("output")));
JobClient.runJob(job);
}
项目:GeoCrawler
文件:LinkDbMerger.java
public void merge(Path output, Path[] dbs, boolean normalize, boolean filter)
throws Exception {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start = System.currentTimeMillis();
LOG.info("LinkDb merge: starting at " + sdf.format(start));
JobConf job = createMergeJob(getConf(), output, normalize, filter);
for (int i = 0; i < dbs.length; i++) {
FileInputFormat.addInputPath(job, new Path(dbs[i], LinkDb.CURRENT_NAME));
}
JobClient.runJob(job);
FileSystem fs = FileSystem.get(getConf());
fs.mkdirs(output);
fs.rename(FileOutputFormat.getOutputPath(job), new Path(output,
LinkDb.CURRENT_NAME));
long end = System.currentTimeMillis();
LOG.info("LinkDb merge: finished at " + sdf.format(end) + ", elapsed: "
+ TimingUtil.elapsedTime(start, end));
}
项目:big-c
文件:TestDFSIO.java
private void runIOTest(
Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass,
Path outputDir) throws IOException {
JobConf job = new JobConf(config, TestDFSIO.class);
FileInputFormat.setInputPaths(job, getControlDir(config));
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(mapperClass);
job.setReducerClass(AccumulatingReducer.class);
FileOutputFormat.setOutputPath(job, outputDir);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
项目:bdelab
文件:WordCountOldAPI.java
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(WordCountOldAPI.class);
conf.setJobName("old wordcount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(Map.class);
conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
项目:big-c
文件:TestDatamerge.java
private static void joinAs(String jointype,
Class<? extends SimpleCheckerBase> c) throws Exception {
final int srcs = 4;
Configuration conf = new Configuration();
JobConf job = new JobConf(conf, c);
Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
Path[] src = writeSimpleSrc(base, conf, srcs);
job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
SequenceFileInputFormat.class, src));
job.setInt("testdatamerge.sources", srcs);
job.setInputFormat(CompositeInputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(base, "out"));
job.setMapperClass(c);
job.setReducerClass(c);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
JobClient.runJob(job);
base.getFileSystem(job).delete(base, true);
}
项目:big-c
文件:TestCompressionEmulationUtils.java
/**
* Runs a GridMix data-generation job.
*/
private static void runDataGenJob(Configuration conf, Path tempDir)
throws IOException, ClassNotFoundException, InterruptedException {
JobClient client = new JobClient(conf);
// get the local job runner
conf.setInt(MRJobConfig.NUM_MAPS, 1);
Job job = Job.getInstance(conf);
CompressionEmulationUtil.configure(job);
job.setInputFormatClass(CustomInputFormat.class);
// set the output path
FileOutputFormat.setOutputPath(job, tempDir);
// submit and wait for completion
job.submit();
int ret = job.waitForCompletion(true) ? 0 : 1;
assertEquals("Job Failed", 0, ret);
}
项目:big-c
文件:TestMiniMRProxyUser.java
private void mrRun() throws Exception {
FileSystem fs = FileSystem.get(getJobConf());
Path inputDir = new Path("input");
fs.mkdirs(inputDir);
Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
writer.write("hello");
writer.close();
Path outputDir = new Path("output", "output");
JobConf jobConf = new JobConf(getJobConf());
jobConf.setInt("mapred.map.tasks", 1);
jobConf.setInt("mapred.map.max.attempts", 1);
jobConf.setInt("mapred.reduce.max.attempts", 1);
jobConf.set("mapred.input.dir", inputDir.toString());
jobConf.set("mapred.output.dir", outputDir.toString());
JobClient jobClient = new JobClient(jobConf);
RunningJob runJob = jobClient.submitJob(jobConf);
runJob.waitForCompletion();
assertTrue(runJob.isComplete());
assertTrue(runJob.isSuccessful());
}
项目:big-c
文件:NNBench.java
/**
* Run the test
*
* @throws IOException on error
*/
public static void runTests() throws IOException {
config.setLong("io.bytes.per.checksum", bytesPerChecksum);
JobConf job = new JobConf(config, NNBench.class);
job.setJobName("NNBench-" + operation);
FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);
// Explicitly set number of max map attempts to 1.
job.setMaxMapAttempts(1);
// Explicitly turn off speculative execution
job.setSpeculativeExecution(false);
job.setMapperClass(NNBenchMapper.class);
job.setReducerClass(NNBenchReducer.class);
FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks((int) numberOfReduces);
JobClient.runJob(job);
}
项目:hadoop
文件:ExternalMapReduce.java
public int run(String[] argv) throws IOException {
if (argv.length < 2) {
System.out.println("ExternalMapReduce <input> <output>");
return -1;
}
Path outDir = new Path(argv[1]);
Path input = new Path(argv[0]);
JobConf testConf = new JobConf(getConf(), ExternalMapReduce.class);
//try to load a class from libjar
try {
testConf.getClassByName("testjar.ClassWordCount");
} catch (ClassNotFoundException e) {
System.out.println("Could not find class from libjar");
return -1;
}
testConf.setJobName("external job");
FileInputFormat.setInputPaths(testConf, input);
FileOutputFormat.setOutputPath(testConf, outDir);
testConf.setMapperClass(MapClass.class);
testConf.setReducerClass(Reduce.class);
testConf.setNumReduceTasks(1);
JobClient.runJob(testConf);
return 0;
}
项目:hadoop
文件:TestKeyFieldBasedComparator.java
public void configure(String keySpec, int expect) throws Exception {
Path testdir = new Path(TEST_DIR.getAbsolutePath());
Path inDir = new Path(testdir, "in");
Path outDir = new Path(testdir, "out");
FileSystem fs = getFileSystem();
fs.delete(testdir, true);
conf.setInputFormat(TextInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(LongWritable.class);
conf.setNumMapTasks(1);
conf.setNumReduceTasks(1);
conf.setOutputFormat(TextOutputFormat.class);
conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
conf.setKeyFieldComparatorOptions(keySpec);
conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
conf.setMapperClass(InverseMapper.class);
conf.setReducerClass(IdentityReducer.class);
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
// set up input data in 2 files
Path inFile = new Path(inDir, "part0");
FileOutputStream fos = new FileOutputStream(inFile.toString());
fos.write((line1 + "\n").getBytes());
fos.write((line2 + "\n").getBytes());
fos.close();
JobClient jc = new JobClient(conf);
RunningJob r_job = jc.submitJob(conf);
while (!r_job.isComplete()) {
Thread.sleep(1000);
}
if (!r_job.isSuccessful()) {
fail("Oops! The job broke due to an unexpected error");
}
Path[] outputFiles = FileUtil.stat2Paths(
getFileSystem().listStatus(outDir,
new Utils.OutputFileUtils.OutputFilesFilter()));
if (outputFiles.length > 0) {
InputStream is = getFileSystem().open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = reader.readLine();
//make sure we get what we expect as the first line, and also
//that we have two lines
if (expect == 1) {
assertTrue(line.startsWith(line1));
} else if (expect == 2) {
assertTrue(line.startsWith(line2));
}
line = reader.readLine();
if (expect == 1) {
assertTrue(line.startsWith(line2));
} else if (expect == 2) {
assertTrue(line.startsWith(line1));
}
reader.close();
}
}
项目:hadoop
文件:TestEncryptedShuffle.java
private void encryptedShuffleWithCerts(boolean useClientCerts)
throws Exception {
try {
Configuration conf = new Configuration();
String keystoresDir = new File(BASEDIR).getAbsolutePath();
String sslConfsDir =
KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
useClientCerts);
conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
startCluster(conf);
FileSystem fs = FileSystem.get(getJobConf());
Path inputDir = new Path("input");
fs.mkdirs(inputDir);
Writer writer =
new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
writer.write("hello");
writer.close();
Path outputDir = new Path("output", "output");
JobConf jobConf = new JobConf(getJobConf());
jobConf.setInt("mapred.map.tasks", 1);
jobConf.setInt("mapred.map.max.attempts", 1);
jobConf.setInt("mapred.reduce.max.attempts", 1);
jobConf.set("mapred.input.dir", inputDir.toString());
jobConf.set("mapred.output.dir", outputDir.toString());
JobClient jobClient = new JobClient(jobConf);
RunningJob runJob = jobClient.submitJob(jobConf);
runJob.waitForCompletion();
Assert.assertTrue(runJob.isComplete());
Assert.assertTrue(runJob.isSuccessful());
} finally {
stopCluster();
}
}
项目:hadoop
文件:TestMROldApiJobs.java
static boolean runJob(JobConf conf, Path inDir, Path outDir, int numMaps,
int numReds) throws IOException, InterruptedException {
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outDir)) {
fs.delete(outDir, true);
}
if (!fs.exists(inDir)) {
fs.mkdirs(inDir);
}
String input = "The quick brown fox\n" + "has many silly\n"
+ "red fox sox\n";
for (int i = 0; i < numMaps; ++i) {
DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
file.writeBytes(input);
file.close();
}
DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf, fs);
conf.setOutputCommitter(CustomOutputCommitter.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setNumMapTasks(numMaps);
conf.setNumReduceTasks(numReds);
JobClient jobClient = new JobClient(conf);
RunningJob job = jobClient.submitJob(conf);
return jobClient.monitorAndPrintJob(conf, job);
}
项目:hadoop
文件:TestMRCJCSocketFactory.java
private void closeClient(JobClient client) {
try {
if (client != null)
client.close();
} catch (Exception ignored) {
// nothing we can do
ignored.printStackTrace();
}
}
项目:hadoop
文件:Job.java
/**
* @return the job client of this job
*/
public JobClient getJobClient() {
try {
return new JobClient(super.getJob().getConfiguration());
} catch (IOException ioe) {
return null;
}
}
项目:hadoop
文件:DistCpV1.java
/**
* Driver to copy srcPath to destPath depending on required protocol.
* @param conf configuration
* @param args arguments
*/
static void copy(final Configuration conf, final Arguments args
) throws IOException {
LOG.info("srcPaths=" + args.srcs);
if (!args.dryrun || args.flags.contains(Options.UPDATE)) {
LOG.info("destPath=" + args.dst);
}
JobConf job = createJobConf(conf);
checkSrcPath(job, args.srcs);
if (args.preservedAttributes != null) {
job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes);
}
if (args.mapredSslConf != null) {
job.set("dfs.https.client.keystore.resource", args.mapredSslConf);
}
//Initialize the mapper
try {
if (setup(conf, job, args)) {
JobClient.runJob(job);
}
if(!args.dryrun) {
finalize(conf, job, args.dst, args.preservedAttributes);
}
} finally {
if (!args.dryrun) {
//delete tmp
fullyDelete(job.get(TMP_DIR_LABEL), job);
}
//delete jobDirectory
fullyDelete(job.get(JOB_DIR_LABEL), job);
}
}
项目:hadoop
文件:DistCpV1.java
/**
* Calculate how many maps to run.
* Number of maps is bounded by a minimum of the cumulative size of the
* copy / (distcp.bytes.per.map, default BYTES_PER_MAP or -m on the
* command line) and at most (distcp.max.map.tasks, default
* MAX_MAPS_PER_NODE * nodes in the cluster).
* @param totalBytes Count of total bytes for job
* @param job The job to configure
* @return Count of maps to run.
*/
private static int setMapCount(long totalBytes, JobConf job)
throws IOException {
int numMaps =
(int)(totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP));
numMaps = Math.min(numMaps,
job.getInt(MAX_MAPS_LABEL, MAX_MAPS_PER_NODE *
new JobClient(job).getClusterStatus().getTaskTrackers()));
numMaps = Math.max(numMaps, 1);
job.setNumMapTasks(numMaps);
return numMaps;
}
项目:hadoop
文件:Statistics.java
public Statistics(
final Configuration conf, int pollingInterval, CountDownLatch startFlag)
throws IOException, InterruptedException {
UserGroupInformation ugi = UserGroupInformation.getLoginUser();
this.cluster = ugi.doAs(new PrivilegedExceptionAction<JobClient>() {
public JobClient run() throws IOException {
return new JobClient(new JobConf(conf));
}
});
this.jtPollingInterval = pollingInterval;
maxJobCompletedInInterval = conf.getInt(
MAX_JOBS_COMPLETED_IN_POLL_INTERVAL_KEY, 1);
this.startFlag = startFlag;
}
项目:hadoop
文件:TestGridmixSummary.java
/**
* Test {@link ClusterSummarizer}.
*/
@Test (timeout=20000)
public void testClusterSummarizer() throws IOException {
ClusterSummarizer cs = new ClusterSummarizer();
Configuration conf = new Configuration();
String jt = "test-jt:1234";
String nn = "test-nn:5678";
conf.set(JTConfig.JT_IPC_ADDRESS, jt);
conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, nn);
cs.start(conf);
assertEquals("JT name mismatch", jt, cs.getJobTrackerInfo());
assertEquals("NN name mismatch", nn, cs.getNamenodeInfo());
ClusterStats cStats = ClusterStats.getClusterStats();
conf.set(JTConfig.JT_IPC_ADDRESS, "local");
conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "local");
JobClient jc = new JobClient(conf);
cStats.setClusterMetric(jc.getClusterStatus());
cs.update(cStats);
// test
assertEquals("Cluster summary test failed!", 1, cs.getMaxMapTasks());
assertEquals("Cluster summary test failed!", 1, cs.getMaxReduceTasks());
assertEquals("Cluster summary test failed!", 1, cs.getNumActiveTrackers());
assertEquals("Cluster summary test failed!", 0,
cs.getNumBlacklistedTrackers());
}
项目:ditb
文件:RowCounter.java
public int run(final String[] args) throws Exception {
// Make sure there are at least 3 parameters
if (args.length < 3) {
System.err.println("ERROR: Wrong number of parameters: " + args.length);
return printUsage();
}
JobClient.runJob(createSubmittableJob(args));
return 0;
}