@Override public boolean next( final NullWritable key, final ColumnAndIndex value ) throws IOException { if( currentSpread == null || currentIndex == currentIndexList.size() ){ if( ! nextReader() ){ updateCounter( reader.getReadStats() ); isEnd = true; return false; } } spreadColumn.setSpread( currentSpread ); value.column = spreadColumn; value.index = currentIndexList.get( currentIndex ); value.columnIndex = spreadCounter.get(); currentIndex++; return true; }
/** {@inheritDoc} */ @Override public void init(Job job) { // setup mapper job.setMapperClass(PartitionMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(SummationWritable.class); // setup partitioner job.setPartitionerClass(IndexPartitioner.class); // setup reducer job.setReducerClass(SummingReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(TaskResult.class); final Configuration conf = job.getConfiguration(); final int nParts = conf.getInt(N_PARTS, 1); job.setNumReduceTasks(nParts); // setup input job.setInputFormatClass(SummationInputFormat.class); }
public void testEmptyJoin() throws Exception { Configuration conf = new Configuration(); Path base = cluster.getFileSystem().makeQualified(new Path("/empty")); Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") }; conf.set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose("outer", MapReduceTestUtil.Fake_IF.class, src)); MapReduceTestUtil.Fake_IF.setKeyClass(conf, MapReduceTestUtil.IncomparableKey.class); Job job = Job.getInstance(conf); job.setInputFormatClass(CompositeInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(base, "out")); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setOutputKeyClass(MapReduceTestUtil.IncomparableKey.class); job.setOutputValueClass(NullWritable.class); job.waitForCompletion(true); assertTrue(job.isSuccessful()); base.getFileSystem(conf).delete(base, true); }
@Override protected void map(LongWritable key, SqoopRecord val, Context context) throws IOException, InterruptedException { try { // Loading of LOBs was delayed until we have a Context. val.loadLargeObjects(lobLoader); } catch (SQLException sqlE) { throw new IOException(sqlE); } GenericRecord outKey = AvroUtil.toGenericRecord(val.getFieldMap(), schema, bigDecimalFormatString); wrapper.datum(outKey); context.write(wrapper, NullWritable.get()); }
@Override protected void configureMapper(Job job, String tableName, String tableClassName) throws ClassNotFoundException, IOException { if (isHCatJob) { throw new IOException("Sqoop-HCatalog Integration is not supported."); } switch (getInputFileType()) { case AVRO_DATA_FILE: throw new IOException("Avro data file is not supported."); case SEQUENCE_FILE: case UNKNOWN: default: job.setMapperClass(getMapperClass()); } // Concurrent writes of the same records would be problematic. ConfigurationHelper.setJobMapSpeculativeExecution(job, false); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); }
/** * Read the cut points from the given IFile. * @param fs The file system * @param p The path to read * @param keyClass The map output key class * @param job The job config * @throws IOException */ // matching key types enforced by passing in @SuppressWarnings("unchecked") // map output key class private K[] readPartitions(FileSystem fs, Path p, Class<K> keyClass, Configuration conf) throws IOException { SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf); ArrayList<K> parts = new ArrayList<K>(); K key = ReflectionUtils.newInstance(keyClass, conf); NullWritable value = NullWritable.get(); try { while (reader.next(key, value)) { parts.add(key); key = ReflectionUtils.newInstance(keyClass, conf); } reader.close(); reader = null; } finally { IOUtils.cleanup(LOG, reader); } return parts.toArray((K[])Array.newInstance(keyClass, parts.size())); }
public void map(LongWritable key, Record val, Context context) throws IOException, InterruptedException{ try { odpsImpl.parse(val); context.write(odpsImpl, NullWritable.get()); } catch (Exception e) { LOG.error("Exception raised during data export"); LOG.error("Exception: ", e); LOG.error("On input: " + val); LOG.error("At position " + key); InputSplit is = context.getInputSplit(); LOG.error(""); LOG.error("Currently processing split:"); LOG.error(is); LOG.error(""); LOG.error("This issue might not necessarily be caused by current input"); LOG.error("due to the batching nature of export."); LOG.error(""); throw new IOException("Can't export data, please check failed map task logs", e); } }
private void writeOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException { NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(context); } }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (!reader.hasNext() || reader.pastSync(end)) { key = null; value = null; return false; } if (key == null) { key = new AvroWrapper<T>(); } if (value == null) { value = NullWritable.get(); } key.datum(reader.next(key.datum())); return true; }
/** * {@inheritDoc} */ protected void reduce(final Text key, final Iterable<OrcValue> values, final Context context) throws IOException, InterruptedException { final Configuration configuration = context.getConfiguration(); final String sourcePath = configuration.get("compactionSourcePath"); final String targetPath = configuration.get("compactionTargetPath"); // Reducer stores data at the target directory retaining the directory structure of files String filePath = key.toString().replace(sourcePath, targetPath); if (key.toString().endsWith("/")) { filePath = filePath.concat("file"); } log.info("Compaction output path {}", filePath); final URI uri = URI.create(filePath); final MultipleOutputs multipleOutputs = new MultipleOutputs<NullWritable, OrcValue>(context); try { for (final OrcValue text : values) { multipleOutputs.write(NullWritable.get(), text, uri.toString()); } } finally { multipleOutputs.close(); } }
@Override public Job call() throws IOException, InterruptedException, ClassNotFoundException { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); ugi.doAs( new PrivilegedExceptionAction <Job>() { public Job run() throws IOException, ClassNotFoundException, InterruptedException { job.setMapperClass(GenDCDataMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setInputFormatClass(GenDCDataFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setJarByClass(GenerateDistCacheData.class); try { FileInputFormat.addInputPath(job, new Path("ignored")); } catch (IOException e) { LOG.error("Error while adding input path ", e); } job.submit(); return job; } }); return job; }
private void writeOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException { NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(null); } }
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] parsedTriple = rdfParser.parseTriple(value.toString()); if (parsedTriple != null) { // Convert liters to Pig Types, if possible parsedTriple[2] = Util.toPigTypes(parsedTriple[2]); // Use Predicate for Vertical Partitioning multipleOutputs.write(NullWritable.get(), new Text(parsedTriple[0] + "\t" + parsedTriple[2]), Util.generateFileName(parsedTriple[1])); // Write all parsed triples also to "inputData" for queries where Predicate is not known multipleOutputs.write(NullWritable.get(), new Text(parsedTriple[0] + "\t" + parsedTriple[1] + "\t" + parsedTriple[2]), Util.generateFileName("inputData")); context.getCounter("RDF Dataset Properties", VALID_TRIPLES).increment(1); } else { if (value.getLength() == 0 || value.toString().startsWith("@")) { System.out.println("IGNORING: " + value); context.getCounter("RDF Dataset Properties", IGNORED_LINES).increment(1); } else { System.out.println("DISCARDED: " + value); context.getCounter("RDF Dataset Properties", INVALID_TRIPLES).increment(1); } } }
public void testEmptyJoin() throws Exception { JobConf job = new JobConf(); Path base = cluster.getFileSystem().makeQualified(new Path("/empty")); Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") }; job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer", Fake_IF.class, src)); job.setInputFormat(CompositeInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(base, "out")); job.setMapperClass(IdentityMapper.class); job.setReducerClass(IdentityReducer.class); job.setOutputKeyClass(IncomparableKey.class); job.setOutputValueClass(NullWritable.class); JobClient.runJob(job); base.getFileSystem(job).delete(base, true); }
public RecordReader<NullWritable,NullWritable> getRecordReader( InputSplit ignored, JobConf conf, Reporter reporter) { return new RecordReader<NullWritable,NullWritable>() { private boolean done = false; public boolean next(NullWritable key, NullWritable value) throws IOException { if (done) return false; done = true; return true; } public NullWritable createKey() { return NullWritable.get(); } public NullWritable createValue() { return NullWritable.get(); } public long getPos() throws IOException { return 0L; } public void close() throws IOException { } public float getProgress() throws IOException { return 0.0f; } }; }
public Job createJob() throws IOException { Configuration conf = getConf(); conf.setInt(MRJobConfig.NUM_MAPS, 1); Job job = Job.getInstance(conf, "test"); job.setNumReduceTasks(1); job.setJarByClass(CredentialsTestJob.class); job.setNumReduceTasks(1); job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class); job.setInputFormatClass(SleepJob.SleepInputFormat.class); job.setPartitionerClass(SleepJob.SleepJobPartitioner.class); job.setOutputFormatClass(NullOutputFormat.class); job.setSpeculativeExecution(false); job.setJobName("test job"); FileInputFormat.addInputPath(job, new Path("ignored")); return job; }
public void testTotalOrderBinarySearch() throws Exception { TotalOrderPartitioner<Text,NullWritable> partitioner = new TotalOrderPartitioner<Text,NullWritable>(); Configuration conf = new Configuration(); Path p = TestTotalOrderPartitioner.<Text>writePartitionFile( "totalorderbinarysearch", conf, splitStrings); conf.setBoolean(TotalOrderPartitioner.NATURAL_ORDER, false); conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, Text.class, Object.class); try { partitioner.setConf(conf); NullWritable nw = NullWritable.get(); for (Check<Text> chk : testStrings) { assertEquals(chk.data.toString(), chk.part, partitioner.getPartition(chk.data, nw, splitStrings.length + 1)); } } finally { p.getFileSystem(conf).delete(p, true); } }
@Override protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, int numRegions, int expectedNumSplits) throws Exception { setupCluster(); TableName tableName = TableName.valueOf("testWithMockedMapReduce"); try { createTableAndSnapshot( util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions); JobConf job = new JobConf(util.getConfiguration()); Path tmpTableDir = util.getRandomDir(); TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false, tmpTableDir); // mapred doesn't support start and end keys? o.O verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow()); } finally { util.getHBaseAdmin().deleteSnapshot(snapshotName); util.deleteTable(tableName); tearDownCluster(); } }
/** * Verify IntervalSampler contract, that samples are taken at regular * intervals from the given splits. */ @Test @SuppressWarnings("unchecked") // IntWritable comparator not typesafe public void testIntervalSampler() throws Exception { final int TOT_SPLITS = 16; final int PER_SPLIT_SAMPLE = 4; final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE; final double FREQ = 1.0 / TOT_SPLITS; InputSampler.Sampler<IntWritable,NullWritable> sampler = new InputSampler.IntervalSampler<IntWritable,NullWritable>( FREQ, NUM_SAMPLES); int inits[] = new int[TOT_SPLITS]; for (int i = 0; i < TOT_SPLITS; ++i) { inits[i] = i; } Job ignored = Job.getInstance(); Object[] samples = sampler.getSample(new TestInputSamplerIF( NUM_SAMPLES, TOT_SPLITS, inits), ignored); assertEquals(NUM_SAMPLES, samples.length); Arrays.sort(samples, new IntWritable.Comparator()); for (int i = 0; i < NUM_SAMPLES; ++i) { assertEquals(i, ((IntWritable)samples[i]).get()); } }
/** * Verify IntervalSampler in mapred.lib.InputSampler, which is added back * for binary compatibility of M/R 1.x */ @Test (timeout = 30000) @SuppressWarnings("unchecked") // IntWritable comparator not typesafe public void testMapredIntervalSampler() throws Exception { final int TOT_SPLITS = 16; final int PER_SPLIT_SAMPLE = 4; final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE; final double FREQ = 1.0 / TOT_SPLITS; org.apache.hadoop.mapred.lib.InputSampler.Sampler<IntWritable,NullWritable> sampler = new org.apache.hadoop.mapred.lib.InputSampler.IntervalSampler <IntWritable,NullWritable>(FREQ, NUM_SAMPLES); int inits[] = new int[TOT_SPLITS]; for (int i = 0; i < TOT_SPLITS; ++i) { inits[i] = i; } Job ignored = Job.getInstance(); Object[] samples = sampler.getSample(new TestInputSamplerIF( NUM_SAMPLES, TOT_SPLITS, inits), ignored); assertEquals(NUM_SAMPLES, samples.length); Arrays.sort(samples, new IntWritable.Comparator()); for (int i = 0; i < NUM_SAMPLES; ++i) { assertEquals(i, ((IntWritable)samples[i]).get()); } }
private static <T extends WritableComparable<?>> Path writePartitionFile( String testname, Configuration conf, T[] splits) throws IOException { final FileSystem fs = FileSystem.getLocal(conf); final Path testdir = new Path(System.getProperty("test.build.data", "/tmp") ).makeQualified(fs); Path p = new Path(testdir, testname + "/_partition.lst"); TotalOrderPartitioner.setPartitionFile(conf, p); conf.setInt(MRJobConfig.NUM_REDUCES, splits.length + 1); SequenceFile.Writer w = null; try { w = SequenceFile.createWriter(fs, conf, p, splits[0].getClass(), NullWritable.class, SequenceFile.CompressionType.NONE); for (int i = 0; i < splits.length; ++i) { w.append(splits[i], NullWritable.get()); } } finally { if (null != w) w.close(); } return p; }
@Override public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName, String snapshotName, Path tmpTableDir) throws Exception { Job job = new Job(UTIL.getConfiguration()); TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false, tmpTableDir); }
/** {@inheritDoc} */ @Override public void init(Job job) { // setup mapper job.setMapperClass(SummingMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TaskResult.class); // zero reducer job.setNumReduceTasks(0); // setup input job.setInputFormatClass(PartitionInputFormat.class); }
@Override public boolean next( final NullWritable key, final VectorizedRowBatch outputBatch ) throws IOException { outputBatch.reset(); setting.setPartitionValues( outputBatch ); if( indexSize <= currentIndex ){ if( ! currentReader.hasNext() ){ updateCounter( currentReader.getReadStats() ); outputBatch.endOfFile = true; isEnd = true; return false; } while( ! setSpread() ){ if( ! currentReader.hasNext() ){ updateCounter( currentReader.getReadStats() ); outputBatch.endOfFile = true; isEnd = true; return false; } } } int maxSize = outputBatch.getMaxSize(); if( indexSize < currentIndex + maxSize ){ maxSize = indexSize - currentIndex; } for( int colIndex : needColumnIds ){ assignors[colIndex].setColumnVector( outputBatch.cols[colIndex] , currentIndexList , currentIndex , maxSize ); } outputBatch.size = maxSize; currentIndex += maxSize; if( indexSize <= currentIndex && ! currentReader.hasNext() ){ outputBatch.endOfFile = true; } return outputBatch.size > 0; }
@Override public Job call() throws IOException, InterruptedException, ClassNotFoundException { ugi.doAs( new PrivilegedExceptionAction<Job>() { public Job run() throws IOException, ClassNotFoundException, InterruptedException { job.setMapperClass(SleepMapper.class); job.setReducerClass(SleepReducer.class); job.setNumReduceTasks((mapTasksOnly) ? 0 : jobdesc.getNumberReduces()); job.setMapOutputKeyClass(GridmixKey.class); job.setMapOutputValueClass(NullWritable.class); job.setSortComparatorClass(GridmixKey.Comparator.class); job.setGroupingComparatorClass(SpecGroupingComparator.class); job.setInputFormatClass(SleepInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setPartitionerClass(DraftPartitioner.class); job.setJarByClass(SleepJob.class); job.getConfiguration().setBoolean(Job.USED_GENERIC_PARSER, true); job.submit(); return job; } }); return job; }
@Test public void testReduceParentKey() throws IOException { configuration.set("compactionSourcePath", "/src/path"); configuration.set("compactionTargetPath", "/target/path"); values.add(value1); values.add(value2); reduceDriver.withInput(inputParentKey, values); final BytesWritable bytesWritable = new BytesWritable("value/src/path".getBytes()); reduceDriver.withPathOutput(NullWritable.get(), value1, "value/target/path"); reduceDriver.withPathOutput(NullWritable.get(), value2, "value/target/path"); reduceDriver.runTest(); }
/** * Create a handler that will handle any records output from the application. * @param collector the "real" collector that takes the output * @param reporter the reporter for reporting progress */ public OutputHandler(OutputCollector<K, V> collector, Reporter reporter, RecordReader<FloatWritable,NullWritable> recordReader, String expectedDigest) { this.reporter = reporter; this.collector = collector; this.recordReader = recordReader; this.expectedDigest = expectedDigest; }
@Test public void T_allTest_1() throws IOException{ String dirName = this.getClass().getClassLoader().getResource( "io/out" ).getPath(); String outPath = String.format( "%s/TestMDSHiveDirectVectorizedReader_T_allTest_1.mds" , dirName ); createFile( outPath ); HiveVectorizedReaderSetting setting = getHiveVectorizedReaderSetting( new HiveReaderSetting( new Configuration() , new OrExpressionNode() , true , false , false ) ); File inFile = new File( outPath ); MDSHiveDirectVectorizedReader reader = new MDSHiveDirectVectorizedReader( new FileInputStream( inFile ) , inFile.length() , 0 , inFile.length() , setting , new DummyJobReporter() ); NullWritable key = reader.createKey(); VectorizedRowBatch value = reader.createValue(); int colCount = 0; while( reader.next( key , value ) ){ BytesColumnVector str = (BytesColumnVector)value.cols[0]; LongColumnVector num2 = (LongColumnVector)value.cols[2]; LongColumnVector p = (LongColumnVector)value.cols[4]; assertEquals( null , value.cols[1] ); assertEquals( null , value.cols[3] ); for( int i = 0 ; i < value.size ; i++,colCount++ ){ assertEquals( new String( str.vector[i] , str.start[i] , str.length[i] ) , "a-" + colCount ); assertEquals( num2.vector[i] , colCount * 2 ); assertEquals( p.vector[0] , 100 ); } } reader.getPos(); reader.getProgress(); reader.close(); }
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //this -> <is=1000, is book=10> TreeMap<Integer, List<String>> tm = new TreeMap<Integer, List<String>>(Collections.reverseOrder()); for (Text val : values) { String cur_val = val.toString().trim(); String word = cur_val.split("=")[0].trim(); int count = Integer.parseInt(cur_val.split("=")[1].trim()); if(tm.containsKey(count)) { tm.get(count).add(word); } else { List<String> list = new ArrayList<>(); list.add(word); tm.put(count, list); } } Iterator<Integer> iter = tm.keySet().iterator(); for(int j=0 ; iter.hasNext() && j < n; j++) { int keyCount = iter.next(); List<String> words = tm.get(keyCount); for(String curWord: words) { context.write(new DBOutputWritable(key.toString(), curWord, keyCount), NullWritable.get()); j++; } } }
public void testTotalOrderCustomComparator() throws Exception { TotalOrderPartitioner<Text,NullWritable> partitioner = new TotalOrderPartitioner<Text,NullWritable>(); Configuration conf = new Configuration(); Text[] revSplitStrings = Arrays.copyOf(splitStrings, splitStrings.length); Arrays.sort(revSplitStrings, new ReverseStringComparator()); Path p = TestTotalOrderPartitioner.<Text>writePartitionFile( "totalordercustomcomparator", conf, revSplitStrings); conf.setBoolean(TotalOrderPartitioner.NATURAL_ORDER, false); conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, Text.class, Object.class); conf.setClass(MRJobConfig.KEY_COMPARATOR, ReverseStringComparator.class, RawComparator.class); ArrayList<Check<Text>> revCheck = new ArrayList<Check<Text>>(); revCheck.add(new Check<Text>(new Text("aaaaa"), 9)); revCheck.add(new Check<Text>(new Text("aaabb"), 9)); revCheck.add(new Check<Text>(new Text("aabbb"), 9)); revCheck.add(new Check<Text>(new Text("aaaaa"), 9)); revCheck.add(new Check<Text>(new Text("babbb"), 8)); revCheck.add(new Check<Text>(new Text("baabb"), 8)); revCheck.add(new Check<Text>(new Text("yai"), 1)); revCheck.add(new Check<Text>(new Text("yak"), 1)); revCheck.add(new Check<Text>(new Text("z"), 0)); revCheck.add(new Check<Text>(new Text("ddngo"), 4)); revCheck.add(new Check<Text>(new Text("hi"), 3)); try { partitioner.setConf(conf); NullWritable nw = NullWritable.get(); for (Check<Text> chk : revCheck) { assertEquals(chk.data.toString(), chk.part, partitioner.getPartition(chk.data, nw, splitStrings.length + 1)); } } finally { p.getFileSystem(conf).delete(p, true); } }
@SuppressWarnings("unchecked") // Explicit check for value class agreement public V createValue() { if (null == valueclass) { Class<?> cls = kids[kids.length -1].createValue().getClass(); for (int i = kids.length -1; cls.equals(NullWritable.class); i--) { cls = kids[i].createValue().getClass(); } valueclass = cls.asSubclass(Writable.class); } if (valueclass.equals(NullWritable.class)) { return (V) NullWritable.get(); } return (V) ReflectionUtils.newInstance(valueclass, null); }
@Override public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(TaskAttemptContext ctx, Path outputPath) throws IOException { // the writers require a header in order to create a codec, even if // the header isn't being written out setSAMHeader(samheader); setWriteHeader(writeHeader); return super.getRecordWriter(ctx, outputPath); }
@Override public void reduce(Text key, Iterable<TextPair> values, Context context) throws IOException, InterruptedException { for (TextPair value : values) { // Use key (Predicate) as folder name for Vertical Partitioning multipleOutputs.write(NullWritable.get(), new Text(value.getFirst() + "\t" + value.getSecond()), Util.generateFileName(key.toString())); // Write all parsed triples also to "inputData" for queries where Predicate is not known multipleOutputs.write(NullWritable.get(), new Text(value.getFirst() + "\t" + key.toString() + "\t" + value.getSecond()), Util.generateFileName("inputData")); } }
@Override public Job call() throws IOException, InterruptedException, ClassNotFoundException { UserGroupInformation ugi = UserGroupInformation.getLoginUser(); ugi.doAs( new PrivilegedExceptionAction <Job>() { public Job run() throws IOException, ClassNotFoundException, InterruptedException { // check if compression emulation is enabled if (CompressionEmulationUtil .isCompressionEmulationEnabled(job.getConfiguration())) { CompressionEmulationUtil.configure(job); } else { configureRandomBytesDataGenerator(); } job.submit(); return job; } private void configureRandomBytesDataGenerator() { job.setMapperClass(GenDataMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setInputFormatClass(GenDataFormat.class); job.setOutputFormatClass(RawBytesOutputFormat.class); job.setJarByClass(GenerateData.class); try { FileInputFormat.addInputPath(job, new Path("ignored")); } catch (IOException e) { LOG.error("Error while adding input path ", e); } } }); return job; }
@Override protected void configureMapper(Job job, String tableName, String tableClassName) throws IOException { job.setOutputKeyClass(SqoopRecord.class); job.setOutputValueClass(NullWritable.class); job.setMapperClass(getMapperClass()); }
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName, String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception { //create the table and snapshot createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions); if (shutdownCluster) { util.shutdownMiniHBaseCluster(); } try { // create the job JobConf jobConf = new JobConf(util.getConfiguration()); jobConf.setJarByClass(util.getClass()); org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(jobConf, TestTableSnapshotInputFormat.class); TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, jobConf, true, tableDir); jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class); jobConf.setNumReduceTasks(1); jobConf.setOutputFormat(NullOutputFormat.class); RunningJob job = JobClient.runJob(jobConf); Assert.assertTrue(job.isSuccessful()); } finally { if (!shutdownCluster) { util.getHBaseAdmin().deleteSnapshot(snapshotName); util.deleteTable(tableName); } } }
public void map(LongWritable key, SqoopRecord val, Context context) throws IOException, InterruptedException { String dataset = inputSplit.getCurrentDataset(); outkey.set(val.toString()); numberOfRecords++; mos.write(outkey, NullWritable.get(), dataset); }
@Test public void testReduceParentKey() throws IOException { configuration.set("compactionSourcePath", "/src/path"); configuration.set("compactionTargetPath", "/target/path"); values.add(value1); values.add(value2); reduceDriver.withInput(inputParentKey, values); reduceDriver.withPathOutput(NullWritable.get(), value1, "value/target/path"); reduceDriver.withPathOutput(NullWritable.get(), value2, "value/target/path"); reduceDriver.runTest(); }
@Override public DataInputBuffer getValue() throws IOException { ByteArrayOutputStream dt = new ByteArrayOutputStream(); NullWritable key = NullWritable.get(); key.write(new DataOutputStream(dt)); DataInputBuffer result = new DataInputBuffer(); byte[] b = dt.toByteArray(); result.reset(b, 0, b.length); return result; }
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter( TaskAttemptContext context) throws IOException, InterruptedException { boolean isMapOnly = context.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(context.getConfiguration()) : AvroJob.getOutputSchema(context.getConfiguration()); final DataFileWriter<T> WRITER = new DataFileWriter<T>(new ReflectDatumWriter<T>()); configureDataFileWriter(WRITER, context); Path path = getDefaultWorkFile(context, EXT); WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { @Override public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { WRITER.append(wrapper.datum()); } @Override public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { WRITER.close(); } }; }