Java 类org.apache.hadoop.mapred.TextInputFormat 实例源码
项目:hadoop
文件:TestMultipleInputs.java
public void testAddInputPathWithMapper() {
final JobConf conf = new JobConf();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
MapClass.class);
MultipleInputs.addInputPath(conf, new Path("/bar"),
KeyValueTextInputFormat.class, MapClass2.class);
final Map<Path, InputFormat> inputs = MultipleInputs
.getInputFormatMap(conf);
final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
.getMapperTypeMap(conf);
assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
.getClass());
assertEquals(MapClass.class, maps.get(new Path("/foo")));
assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:hadoop
文件:PipeMapper.java
public void configure(JobConf job) {
super.configure(job);
//disable the auto increment of the counter. For streaming, no of
//processed records could be different(equal or less) than the no of
//records input.
SkipBadRecords.setAutoIncrMapperProcCount(job, false);
skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
ignoreKey = job.getBoolean("stream.map.input.ignoreKey",
inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
}
try {
mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
}
}
项目:big-data-benchmark
文件:HdfsToMap.java
private static void fillMap(JetInstance client, String name, String inputPath, int parallelism) throws Exception {
DAG dag = new DAG();
JobConf conf = new JobConf();
conf.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(conf, new Path(inputPath));
Vertex reader = dag.newVertex("reader", readHdfsP(conf, Util::entry));
Vertex mapper = dag.newVertex("mapper",
mapP((Map.Entry<LongWritable, Text> e) -> entry(e.getKey().get(), e.getValue().toString())));
Vertex writer = dag.newVertex("writer", writeMapP(name));
reader.localParallelism(parallelism);
mapper.localParallelism(parallelism);
writer.localParallelism(parallelism);
dag.edge(between(reader, mapper));
dag.edge(between(mapper, writer));
JobConfig jobConfig = new JobConfig();
jobConfig.addClass(HdfsToMap.class);
client.newJob(dag, jobConfig).join();
}
项目:ditb
文件:TableMapReduceUtil.java
/**
* @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
*/
public static void addDependencyJars(JobConf job) throws IOException {
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
job,
// when making changes here, consider also mapreduce.TableMapReduceUtil
// pull job classes
job.getMapOutputKeyClass(),
job.getMapOutputValueClass(),
job.getOutputKeyClass(),
job.getOutputValueClass(),
job.getPartitionerClass(),
job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
job.getCombinerClass());
}
项目:aliyun-oss-hadoop-fs
文件:TestMultipleInputs.java
public void testAddInputPathWithMapper() {
final JobConf conf = new JobConf();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
MapClass.class);
MultipleInputs.addInputPath(conf, new Path("/bar"),
KeyValueTextInputFormat.class, MapClass2.class);
final Map<Path, InputFormat> inputs = MultipleInputs
.getInputFormatMap(conf);
final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
.getMapperTypeMap(conf);
assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
.getClass());
assertEquals(MapClass.class, maps.get(new Path("/foo")));
assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:aliyun-oss-hadoop-fs
文件:PipeMapper.java
public void configure(JobConf job) {
super.configure(job);
//disable the auto increment of the counter. For streaming, no of
//processed records could be different(equal or less) than the no of
//records input.
SkipBadRecords.setAutoIncrMapperProcCount(job, false);
skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
ignoreKey = job.getBoolean("stream.map.input.ignoreKey",
inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
}
try {
mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
}
}
项目:Hadoop-CaseStudies
文件:WeatherData.java
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(WeatherData.class);
conf.setJobName("temp");
// Note:- As Mapper's output types are not default so we have to define
// the
// following properties.
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
conf.setMapperClass(MaxTemperatureMapper.class);
conf.setReducerClass(MaxTemperatureReducer.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
项目:big-c
文件:TestMultipleInputs.java
public void testAddInputPathWithMapper() {
final JobConf conf = new JobConf();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
MapClass.class);
MultipleInputs.addInputPath(conf, new Path("/bar"),
KeyValueTextInputFormat.class, MapClass2.class);
final Map<Path, InputFormat> inputs = MultipleInputs
.getInputFormatMap(conf);
final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
.getMapperTypeMap(conf);
assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
.getClass());
assertEquals(MapClass.class, maps.get(new Path("/foo")));
assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:big-c
文件:PipeMapper.java
public void configure(JobConf job) {
super.configure(job);
//disable the auto increment of the counter. For streaming, no of
//processed records could be different(equal or less) than the no of
//records input.
SkipBadRecords.setAutoIncrMapperProcCount(job, false);
skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
ignoreKey = job.getBoolean("stream.map.input.ignoreKey",
inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
}
try {
mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
}
}
项目:THUTag
文件:ImportDouban.java
@Override
public void run(String[] args) throws Exception {
Flags flags = new Flags();
flags.addWithDefaultValue(
"tag_subject_data", "/media/work/datasets(secret)/douban/raw/tag_subject.dat", "");
flags.addWithDefaultValue(
"subject_data", "/media/work/datasets(secret)/douban/raw/subject.dat", "");
flags.add("output");
flags.parseAndCheck(args);
JobConf job = new JobConf(this.getClass());
job.setJobName("convert-douban-raw-to-posts");
MapReduceHelper.setAllOutputTypes(job, Text.class);
MapReduceHelper.setMR(
job, DoubanRawMapper.class, DoubanToPostReducer.class);
job.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(
job, new Path(flags.getString("tag_subject_data")));
TextInputFormat.addInputPath(
job, new Path(flags.getString("subject_data")));
job.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(
job, new Path(flags.getString("output")));
JobClient.runJob(job);
}
项目:tracing-framework
文件:ReadExistingDataJob.java
public void configure(JobConf job) {
// Set the mapper and reducers
job.setMapperClass(ReadDataJob.TestMapper.class);
// Make sure this jar is included
job.setJarByClass(ReadDataJob.TestMapper.class);
// Specify the input and output data formats
job.setInputFormat(TextInputFormat.class);
job.setOutputFormat(NullOutputFormat.class);
// Turn off speculative execution
job.setMapSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
// Add the job input path
FileInputFormat.addInputPath(job, new Path(this.input_path));
}
项目:tracing-framework
文件:ReadDataJob.java
public void configure(JobConf job) {
// Set the mapper and reducers
job.setMapperClass(TestMapper.class);
// job.setReducerClass(TestReducer.class);
// Set the output types of the mapper and reducer
// job.setMapOutputKeyClass(IntWritable.class);
// job.setMapOutputValueClass(NullWritable.class);
// job.setOutputKeyClass(NullWritable.class);
// job.setOutputValueClass(NullWritable.class);
// Make sure this jar is included
job.setJarByClass(TestMapper.class);
// Specify the input and output data formats
job.setInputFormat(TextInputFormat.class);
job.setOutputFormat(NullOutputFormat.class);
// Turn off speculative execution
job.setMapSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
// Add the job input path
FileInputFormat.addInputPath(job, new Path(this.input_filename));
}
项目:bdelab
文件:WordCountOldAPI.java
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(WordCountOldAPI.class);
conf.setJobName("old wordcount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(Map.class);
conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
项目:hadoop-2.6.0-cdh5.4.3
文件:PipeMapper.java
public void configure(JobConf job) {
super.configure(job);
//disable the auto increment of the counter. For streaming, no of
//processed records could be different(equal or less) than the no of
//records input.
SkipBadRecords.setAutoIncrMapperProcCount(job, false);
skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
ignoreKey = job.getBoolean("stream.map.input.ignoreKey",
inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
}
try {
mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
}
}
项目:hadoop-2.6.0-cdh5.4.3
文件:TestMultipleInputs.java
public void testAddInputPathWithMapper() {
final JobConf conf = new JobConf();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
MapClass.class);
MultipleInputs.addInputPath(conf, new Path("/bar"),
KeyValueTextInputFormat.class, MapClass2.class);
final Map<Path, InputFormat> inputs = MultipleInputs
.getInputFormatMap(conf);
final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
.getMapperTypeMap(conf);
assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
.getClass());
assertEquals(MapClass.class, maps.get(new Path("/foo")));
assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:hadoop-2.6.0-cdh5.4.3
文件:PipeMapper.java
public void configure(JobConf job) {
super.configure(job);
//disable the auto increment of the counter. For streaming, no of
//processed records could be different(equal or less) than the no of
//records input.
SkipBadRecords.setAutoIncrMapperProcCount(job, false);
skipping = job.getBoolean("mapred.skip.on", false);
if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
ignoreKey = job.getBoolean("stream.map.input.ignoreKey",
inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
}
try {
mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
}
}
项目:hadoop-EAR
文件:TestMultipleInputs.java
public void testAddInputPathWithMapper() {
final JobConf conf = new JobConf();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
MapClass.class);
MultipleInputs.addInputPath(conf, new Path("/bar"),
KeyValueTextInputFormat.class, MapClass2.class);
final Map<Path, InputFormat> inputs = MultipleInputs
.getInputFormatMap(conf);
final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
.getMapperTypeMap(conf);
assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
.getClass());
assertEquals(MapClass.class, maps.get(new Path("/foo")));
assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:hadoop-EAR
文件:TxnGenerator.java
private static JobConf createJobConf(Configuration conf) throws IOException {
JobConf jobConf = new JobConf(conf);
String jobName = "transaction_generator";
jobConf.setJobName(jobName);
String splitDir = workplace + "split/";
jobConf.set(TEST_DIR_LABEL, workplace);
jobConf.setMapSpeculativeExecution(false);
jobConf.setJarByClass(TxnGenerator.class);
jobConf.setMapperClass(GeneratorMapper.class);
jobConf.setInputFormat(TextInputFormat.class);
FileInputFormat.addInputPath(jobConf, new Path(splitDir));
Random random = new Random();
FileOutputFormat.setOutputPath(jobConf, new Path(workplace, "output" + random.nextLong()));
jobConf.setNumReduceTasks(0);
jobConf.setNumMapTasks(numMappers);
createSplitFiles(conf, new Path(splitDir));
return jobConf;
}
项目:hadoop-EAR
文件:PipeMapper.java
@SuppressWarnings("unchecked")
public void configure(JobConf job) {
super.configure(job);
//disable the auto increment of the counter. For streaming, no of
//processed records could be different(equal or less) than the no of
//records input.
SkipBadRecords.setAutoIncrMapperProcCount(job, false);
skipping = job.getBoolean("mapred.skip.on", false);
String inputFormatClassName = job.getClass("mapred.input.format.class",
TextInputFormat.class).getCanonicalName();
ignoreKey = ignoreKey || inputFormatClassName.equals(TextInputFormat.class.getCanonicalName());
try {
mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
}
}
项目:apex-malhar
文件:WordCount.java
public void run(String[] args) throws Exception
{
JobConf conf = new JobConf(this.getClass());
conf.setJobName("wordcount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(Map.class);
conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
项目:hadoop-plus
文件:TestMultipleInputs.java
public void testAddInputPathWithMapper() {
final JobConf conf = new JobConf();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
MapClass.class);
MultipleInputs.addInputPath(conf, new Path("/bar"),
KeyValueTextInputFormat.class, MapClass2.class);
final Map<Path, InputFormat> inputs = MultipleInputs
.getInputFormatMap(conf);
final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
.getMapperTypeMap(conf);
assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
.getClass());
assertEquals(MapClass.class, maps.get(new Path("/foo")));
assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:hadoop-plus
文件:PipeMapper.java
public void configure(JobConf job) {
super.configure(job);
//disable the auto increment of the counter. For streaming, no of
//processed records could be different(equal or less) than the no of
//records input.
SkipBadRecords.setAutoIncrMapperProcCount(job, false);
skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
ignoreKey = job.getBoolean("stream.map.input.ignoreKey",
inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
}
try {
mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
}
}
项目:systemml
文件:ReaderTextCSVParallel.java
public CSVReadTask(InputSplit split, SplitOffsetInfos offsets,
TextInputFormat informat, JobConf job, MatrixBlock dest,
long rlen, long clen, boolean hasHeader, String delim,
boolean fill, double fillValue, int splitCount)
{
_split = split;
_splitoffsets = offsets; // new SplitOffsetInfos(offsets);
_sparse = dest.isInSparseFormat();
_informat = informat;
_job = job;
_dest = dest;
_rlen = rlen;
_clen = clen;
_isFirstSplit = (splitCount == 0);
_hasHeader = hasHeader;
_fill = fill;
_fillValue = fillValue;
_delim = delim;
_rc = true;
_splitCount = splitCount;
}
项目:hops
文件:TestMultipleInputs.java
@Test
public void testAddInputPathWithMapper() {
final JobConf conf = new JobConf();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
MapClass.class);
MultipleInputs.addInputPath(conf, new Path("/bar"),
KeyValueTextInputFormat.class, MapClass2.class);
final Map<Path, InputFormat> inputs = MultipleInputs
.getInputFormatMap(conf);
final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
.getMapperTypeMap(conf);
assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
.getClass());
assertEquals(MapClass.class, maps.get(new Path("/foo")));
assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:systemml
文件:FrameReaderTextCell.java
protected void readTextCellFrameFromHDFS( Path path, JobConf job, FileSystem fs, FrameBlock dest,
ValueType[] schema, String[] names, long rlen, long clen)
throws IOException
{
if( fs.isDirectory(path) ) {
FileInputFormat.addInputPath(job, path);
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
InputSplit[] splits = informat.getSplits(job, 1);
for(InputSplit split: splits)
readTextCellFrameFromInputSplit(split, informat, job, dest);
}
else {
readRawTextCellFrameFromHDFS(path, job, fs, dest, schema, names, rlen, clen);
}
}
项目:hops
文件:PipeMapper.java
public void configure(JobConf job) {
super.configure(job);
//disable the auto increment of the counter. For streaming, no of
//processed records could be different(equal or less) than the no of
//records input.
SkipBadRecords.setAutoIncrMapperProcCount(job, false);
skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
ignoreKey = job.getBoolean("stream.map.input.ignoreKey",
inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
}
try {
mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
}
}
项目:HIndex
文件:TableMapReduceUtil.java
/**
* @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
*/
public static void addDependencyJars(JobConf job) throws IOException {
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
job,
// when making changes here, consider also mapreduce.TableMapReduceUtil
// pull job classes
job.getMapOutputKeyClass(),
job.getMapOutputValueClass(),
job.getOutputKeyClass(),
job.getOutputValueClass(),
job.getPartitionerClass(),
job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
job.getCombinerClass());
}
项目:pbase
文件:TableMapReduceUtil.java
/**
* @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
*/
public static void addDependencyJars(JobConf job) throws IOException {
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
job,
// when making changes here, consider also mapreduce.TableMapReduceUtil
// pull job classes
job.getMapOutputKeyClass(),
job.getMapOutputValueClass(),
job.getOutputKeyClass(),
job.getOutputValueClass(),
job.getPartitionerClass(),
job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
job.getCombinerClass());
}
项目:FlexMap
文件:TestMultipleInputs.java
public void testAddInputPathWithMapper() {
final JobConf conf = new JobConf();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
MapClass.class);
MultipleInputs.addInputPath(conf, new Path("/bar"),
KeyValueTextInputFormat.class, MapClass2.class);
final Map<Path, InputFormat> inputs = MultipleInputs
.getInputFormatMap(conf);
final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
.getMapperTypeMap(conf);
assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
.getClass());
assertEquals(MapClass.class, maps.get(new Path("/foo")));
assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:presto
文件:HiveUtil.java
static InputFormat<?, ?> getInputFormat(Configuration configuration, Properties schema, boolean symlinkTarget)
{
String inputFormatName = getInputFormatName(schema);
try {
JobConf jobConf = new JobConf(configuration);
Class<? extends InputFormat<?, ?>> inputFormatClass = getInputFormatClass(jobConf, inputFormatName);
if (symlinkTarget && (inputFormatClass == SymlinkTextInputFormat.class)) {
// symlink targets are always TextInputFormat
inputFormatClass = TextInputFormat.class;
}
return ReflectionUtils.newInstance(inputFormatClass, jobConf);
}
catch (ClassNotFoundException | RuntimeException e) {
throw new RuntimeException("Unable to create input format " + inputFormatName, e);
}
}
项目:circus-train
文件:TestUtils.java
public static Table createUnpartitionedTable(
HiveMetaStoreClient metaStoreClient,
String database,
String table,
URI location)
throws TException {
Table hiveTable = new Table();
hiveTable.setDbName(database);
hiveTable.setTableName(table);
hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
hiveTable.putToParameters("EXTERNAL", "TRUE");
StorageDescriptor sd = new StorageDescriptor();
sd.setCols(DATA_COLUMNS);
sd.setLocation(location.toString());
sd.setParameters(new HashMap<String, String>());
sd.setInputFormat(TextInputFormat.class.getName());
sd.setOutputFormat(TextOutputFormat.class.getName());
sd.setSerdeInfo(new SerDeInfo());
sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde");
hiveTable.setSd(sd);
metaStoreClient.createTable(hiveTable);
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));
return hiveTable;
}
项目:circus-train
文件:TestUtils.java
public static Table createPartitionedTable(
HiveMetaStoreClient metaStoreClient,
String database,
String table,
URI location)
throws Exception {
Table hiveTable = new Table();
hiveTable.setDbName(database);
hiveTable.setTableName(table);
hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
hiveTable.putToParameters("EXTERNAL", "TRUE");
hiveTable.setPartitionKeys(PARTITION_COLUMNS);
StorageDescriptor sd = new StorageDescriptor();
sd.setCols(DATA_COLUMNS);
sd.setLocation(location.toString());
sd.setParameters(new HashMap<String, String>());
sd.setInputFormat(TextInputFormat.class.getName());
sd.setOutputFormat(TextOutputFormat.class.getName());
sd.setSerdeInfo(new SerDeInfo());
sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde");
hiveTable.setSd(sd);
metaStoreClient.createTable(hiveTable);
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));
return hiveTable;
}
项目:hadoop
文件:TestMultipleInputs.java
public void testAddInputPathWithFormat() {
final JobConf conf = new JobConf();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class);
MultipleInputs.addInputPath(conf, new Path("/bar"),
KeyValueTextInputFormat.class);
final Map<Path, InputFormat> inputs = MultipleInputs
.getInputFormatMap(conf);
assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
.getClass());
}
项目:hadoop
文件:TestKeyFieldBasedComparator.java
public void configure(String keySpec, int expect) throws Exception {
Path testdir = new Path(TEST_DIR.getAbsolutePath());
Path inDir = new Path(testdir, "in");
Path outDir = new Path(testdir, "out");
FileSystem fs = getFileSystem();
fs.delete(testdir, true);
conf.setInputFormat(TextInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(LongWritable.class);
conf.setNumMapTasks(1);
conf.setNumReduceTasks(1);
conf.setOutputFormat(TextOutputFormat.class);
conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
conf.setKeyFieldComparatorOptions(keySpec);
conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
conf.setMapperClass(InverseMapper.class);
conf.setReducerClass(IdentityReducer.class);
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
// set up input data in 2 files
Path inFile = new Path(inDir, "part0");
FileOutputStream fos = new FileOutputStream(inFile.toString());
fos.write((line1 + "\n").getBytes());
fos.write((line2 + "\n").getBytes());
fos.close();
JobClient jc = new JobClient(conf);
RunningJob r_job = jc.submitJob(conf);
while (!r_job.isComplete()) {
Thread.sleep(1000);
}
if (!r_job.isSuccessful()) {
fail("Oops! The job broke due to an unexpected error");
}
Path[] outputFiles = FileUtil.stat2Paths(
getFileSystem().listStatus(outDir,
new Utils.OutputFileUtils.OutputFilesFilter()));
if (outputFiles.length > 0) {
InputStream is = getFileSystem().open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = reader.readLine();
//make sure we get what we expect as the first line, and also
//that we have two lines
if (expect == 1) {
assertTrue(line.startsWith(line1));
} else if (expect == 2) {
assertTrue(line.startsWith(line2));
}
line = reader.readLine();
if (expect == 1) {
assertTrue(line.startsWith(line2));
} else if (expect == 2) {
assertTrue(line.startsWith(line1));
}
reader.close();
}
}
项目:hadoop
文件:TestMROldApiJobs.java
static boolean runJob(JobConf conf, Path inDir, Path outDir, int numMaps,
int numReds) throws IOException, InterruptedException {
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outDir)) {
fs.delete(outDir, true);
}
if (!fs.exists(inDir)) {
fs.mkdirs(inDir);
}
String input = "The quick brown fox\n" + "has many silly\n"
+ "red fox sox\n";
for (int i = 0; i < numMaps; ++i) {
DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
file.writeBytes(input);
file.close();
}
DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf, fs);
conf.setOutputCommitter(CustomOutputCommitter.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setNumMapTasks(numMaps);
conf.setNumReduceTasks(numReds);
JobClient jobClient = new JobClient(conf);
RunningJob job = jobClient.submitJob(conf);
return jobClient.monitorAndPrintJob(conf, job);
}
项目:hadoop
文件:TestMRAppWithCombiner.java
@Test
public void testCombinerShouldUpdateTheReporter() throws Exception {
JobConf conf = new JobConf(mrCluster.getConfig());
int numMaps = 5;
int numReds = 2;
Path in = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
"testCombinerShouldUpdateTheReporter-in");
Path out = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
"testCombinerShouldUpdateTheReporter-out");
createInputOutPutFolder(in, out, numMaps);
conf.setJobName("test-job-with-combiner");
conf.setMapperClass(IdentityMapper.class);
conf.setCombinerClass(MyCombinerToCheckReporter.class);
//conf.setJarByClass(MyCombinerToCheckReporter.class);
conf.setReducerClass(IdentityReducer.class);
DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf);
conf.setOutputCommitter(CustomOutputCommitter.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, in);
FileOutputFormat.setOutputPath(conf, out);
conf.setNumMapTasks(numMaps);
conf.setNumReduceTasks(numReds);
runJob(conf);
}
项目:hadoop
文件:PipesNonJavaInputFormat.java
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
// Delegate the generation of input splits to the 'original' InputFormat
return ReflectionUtils.newInstance(
job.getClass(Submitter.INPUT_FORMAT,
TextInputFormat.class,
InputFormat.class), job).getSplits(job, numSplits);
}
项目:hadoop
文件:FieldSelectionMapReduce.java
public void configure(JobConf job) {
this.fieldSeparator = job.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR,
"\t");
this.mapOutputKeyValueSpec = job.get(
FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:");
this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals(
job.getInputFormat().getClass().getCanonicalName());
this.reduceOutputKeyValueSpec = job.get(
FieldSelectionHelper.REDUCE_OUTPUT_KEY_VALUE_SPEC, "0-:");
parseOutputKeyValueSpec();
LOG.info(specToString());
}
项目:hadoop
文件:TestStreamingOutputKeyValueTypes.java
@Test
public void testDefaultToIdentityReducer() throws Exception {
args.add("-mapper");args.add(map);
args.add("-jobconf");
args.add("mapreduce.task.files.preserve.failedtasks=true");
args.add("-jobconf");
args.add("stream.tmpdir="+System.getProperty("test.build.data","/tmp"));
args.add("-inputformat");args.add(TextInputFormat.class.getName());
super.testCommandLine();
}
项目:aliyun-oss-hadoop-fs
文件:TestMultipleInputs.java
public void testAddInputPathWithFormat() {
final JobConf conf = new JobConf();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class);
MultipleInputs.addInputPath(conf, new Path("/bar"),
KeyValueTextInputFormat.class);
final Map<Path, InputFormat> inputs = MultipleInputs
.getInputFormatMap(conf);
assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
.getClass());
}