@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmean <in> <out>"); return 0; } Configuration conf = getConf(); Job job = Job.getInstance(conf, "word mean"); job.setJarByClass(WordMean.class); job.setMapperClass(WordMeanMapper.class); job.setCombinerClass(WordMeanReducer.class); job.setReducerClass(WordMeanReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); mean = readAndCalcMean(outputpath, conf); return (result ? 0 : 1); }
public void map(LongWritable key, Text val, OutputCollector<LongWritable, Text> output, Reporter reporter) throws IOException { String str = val.toString(); LOG.debug("MAP key:" +key +" value:" + str); if(MAPPER_BAD_RECORDS.get(0).equals(str)) { LOG.warn("MAP Encountered BAD record"); System.exit(-1); } else if(MAPPER_BAD_RECORDS.get(1).equals(str)) { LOG.warn("MAP Encountered BAD record"); throw new RuntimeException("Bad record "+str); } else if(MAPPER_BAD_RECORDS.get(2).equals(str)) { try { LOG.warn("MAP Encountered BAD record"); Thread.sleep(15*60*1000); } catch (InterruptedException e) { e.printStackTrace(); } } output.collect(key, val); }
@Test(timeout=60000) public void testStandAloneClient() throws IOException { Client client = new Client(LongWritable.class, conf); InetSocketAddress address = new InetSocketAddress("127.0.0.1", 10); try { client.call(new LongWritable(RANDOM.nextLong()), address, null, null, 0, conf); fail("Expected an exception to have been thrown"); } catch (IOException e) { String message = e.getMessage(); String addressText = address.getHostName() + ":" + address.getPort(); assertTrue("Did not find "+addressText+" in "+message, message.contains(addressText)); Throwable cause=e.getCause(); assertNotNull("No nested exception in "+e,cause); String causeText=cause.getMessage(); assertTrue("Did not find " + causeText + " in " + message, message.contains(causeText)); } }
@Override protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { int k = context.getConfiguration().getInt("k", -1); double[] result = new double[k]; for (Text value : values) { String[] ai = value.toString().split(","); for (int j = 0; j < k; j++) { result[j] += Double.parseDouble(ai[j]); } } StringBuilder res = new StringBuilder(prefix); for (int i = 0; i < k; i++) { res.append(result[i]); if (i < k - 1) { res.append(","); } } context.write(key, new Text(res.toString())); }
public void map(LongWritable key, Text val, Context c) throws IOException, InterruptedException { // Create a whole bunch of objects. List<Integer> lst = new ArrayList<Integer>(); for (int i = 0; i < 20000; i++) { lst.add(new Integer(i)); } // Actually use this list, to ensure that it isn't just optimized away. int sum = 0; for (int x : lst) { sum += x; } // throw away the list and run a GC. lst = null; System.gc(); c.write(new LongWritable(sum), val); }
private void runIOTest( Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, Path outputDir) throws IOException { JobConf job = new JobConf(config, TestDFSIO.class); FileInputFormat.setInputPaths(job, getControlDir(config)); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(mapperClass); job.setReducerClass(AccumulatingReducer.class); FileOutputFormat.setOutputPath(job, outputDir); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
@Override public void run() { for (int i = 0; i < count; i++) { try { final long param = RANDOM.nextLong(); LongWritable value = call(client, param, server, conf); if (value.get() != param) { LOG.fatal("Call failed!"); failed = true; break; } } catch (Exception e) { LOG.fatal("Caught: " + StringUtils.stringifyException(e)); failed = true; } } }
@Test(timeout=60000) public void testStandAloneClient() throws IOException { Client client = new Client(LongWritable.class, conf); InetSocketAddress address = new InetSocketAddress("127.0.0.1", 10); try { call(client, RANDOM.nextLong(), address, conf); fail("Expected an exception to have been thrown"); } catch (IOException e) { String message = e.getMessage(); String addressText = address.getHostName() + ":" + address.getPort(); assertTrue("Did not find "+addressText+" in "+message, message.contains(addressText)); Throwable cause=e.getCause(); assertNotNull("No nested exception in "+e,cause); String causeText=cause.getMessage(); assertTrue("Did not find " + causeText + " in " + message, message.contains(causeText)); } finally { client.stop(); } }
@Test(timeout=60000) public void testIpcConnectTimeout() throws IOException { // start server Server server = new TestServer(1, true); InetSocketAddress addr = NetUtils.getConnectAddress(server); //Intentionally do not start server to get a connection timeout // start client Client.setConnectTimeout(conf, 100); Client client = new Client(LongWritable.class, conf); // set the rpc timeout to twice the MIN_SLEEP_TIME try { call(client, new LongWritable(RANDOM.nextLong()), addr, MIN_SLEEP_TIME * 2, conf); fail("Expected an exception to have been thrown"); } catch (SocketTimeoutException e) { LOG.info("Get a SocketTimeoutException ", e); } client.stop(); }
public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = job.getConfiguration(); Path src = new Path(conf.get(INDIRECT_INPUT_FILE, null)); FileSystem fs = src.getFileSystem(conf); List<InputSplit> splits = new ArrayList<InputSplit>(); LongWritable key = new LongWritable(); Text value = new Text(); for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, conf); sl.next(key, value);) { splits.add(new IndirectSplit(new Path(value.toString()), key.get())); } return splits; }
/** * Tests reducer consuming output. * * @throws Exception */ public void testChainReduceNoOuptut() throws Exception { Configuration conf = createJobConf(); String expectedOutput = ""; Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1, input); job.setJobName("chain"); ChainMapper.addMapper(job, Mapper.class, IntWritable.class, Text.class, LongWritable.class, Text.class, null); ChainReducer.setReducer(job, ConsumeReduce.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); ChainReducer.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); job.waitForCompletion(true); assertTrue("Job failed", job.isSuccessful()); assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil .readOutput(outDir, conf)); }
/** * Tests one of the mappers throwing exception. * * @throws Exception */ public void testChainFail() throws Exception { Configuration conf = createJobConf(); Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input); job.setJobName("chain"); ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); ChainMapper.addMapper(job, FailMap.class, LongWritable.class, Text.class, IntWritable.class, Text.class, null); ChainMapper.addMapper(job, Mapper.class, IntWritable.class, Text.class, LongWritable.class, Text.class, null); job.waitForCompletion(true); assertTrue("Job Not failed", !job.isSuccessful()); }
public static void seekTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(READ_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job,CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(SeekMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, READ_DIR); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
public void testNestedIterable() throws Exception { Random r = new Random(); Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) }; TupleWritable sTuple = makeTuple(writs); assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0)); }
/** * Emits random words sequence of desired size. Note that the desired output * size is passed as the value parameter to this map. */ @Override public void map(NullWritable key, LongWritable value, Context context) throws IOException, InterruptedException { //TODO Control the extra data written .. //TODO Should the key\tvalue\n be considered for measuring size? // Can counters like BYTES_WRITTEN be used? What will be the value of // such counters in LocalJobRunner? for (long bytes = value.get(); bytes > 0;) { String randomKey = rtg.getRandomWord(); String randomValue = rtg.getRandomWord(); context.write(new Text(randomKey), new Text(randomValue)); bytes -= (randomValue.getBytes(charsetUTF8).length + randomKey.getBytes(charsetUTF8).length); } }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String doc = value.toString(); String text = slice(doc, "<text", "</text>", true); if (text.length() < 1) return; char txt[] = text.toLowerCase().toCharArray(); for (int i = 0; i < txt.length; ++i) { if (!((txt[i] >= 'a' && txt[i] <= 'z') || (txt[i] >= 'A' && txt[i] <= 'Z'))) txt[i] = ' '; } String id = slice(doc, "<id>", "</id>", false); if (id.length() < 1) return; StringTokenizer itr = new StringTokenizer(String.valueOf(txt)); int sum = itr.countTokens(); while (itr.hasMoreTokens()) { String s = itr.nextToken(); word.set(id + '-' + s); IntWritable tmp[] = {new IntWritable(sum), new IntWritable(1)}; IntArrayWritable temp = new IntArrayWritable(tmp); context.write(word, temp); } }
/** * Actually instantiate the user's chosen RecordReader implementation. */ @SuppressWarnings("unchecked") private void createChildReader() throws IOException, InterruptedException { LOG.debug("ChildSplit operates on: " + split.getPath(index)); Configuration conf = context.getConfiguration(); // Determine the file format we're reading. Class rrClass; if (ExportJobBase.isSequenceFiles(conf, split.getPath(index))) { rrClass = SequenceFileRecordReader.class; } else { rrClass = LineRecordReader.class; } // Create the appropriate record reader. this.rr = (RecordReader<LongWritable, Object>) ReflectionUtils.newInstance(rrClass, conf); }
/** * test DBInputFormat class. Class should split result for chunks * @throws Exception */ @Test(timeout = 10000) public void testDBInputFormat() throws Exception { JobConf configuration = new JobConf(); setupDriver(configuration); DBInputFormat<NullDBWritable> format = new DBInputFormat<NullDBWritable>(); format.setConf(configuration); format.setConf(configuration); DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10); Reporter reporter = mock(Reporter.class); RecordReader<LongWritable, NullDBWritable> reader = format.getRecordReader( splitter, configuration, reporter); configuration.setInt(MRJobConfig.NUM_MAPS, 3); InputSplit[] lSplits = format.getSplits(configuration, 3); assertEquals(5, lSplits[0].getLength()); assertEquals(3, lSplits.length); // test reader .Some simple tests assertEquals(LongWritable.class, reader.createKey().getClass()); assertEquals(0, reader.getPos()); assertEquals(0, reader.getProgress(), 0.001); reader.close(); }
public void map(LongWritable key, Text value, Context context) throws IOException, NumberFormatException, InterruptedException { String[] vals = value.toString().split("\t"); if (!vals[1].contains(":")) { vals[1] = "m:" + vals[1]; } context.write(new LongWritable(Long.parseLong(vals[0])), new Text(vals[1])); }
public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { boolean sqrt = context.getConfiguration().getBoolean("sqrt", false); StringBuilder result = new StringBuilder(); String[] arrayNames = new String[] {"m", "a", "b"}; Map<String, double[]> arrays = new HashMap<>(); for (String arrayName : arrayNames) { arrays.put(arrayName, new double[k]); } for (Text value : values) { String[] keyVal = value.toString().split(":"); String[] xi = keyVal[1].split(","); for (int j = 0; j < k; j++) { arrays.get(keyVal[0])[j] = Double.parseDouble(xi[j]); } } for (int j = 0; j < k; j++) { double frac = arrays.get("a")[j] / arrays.get("b")[j]; if (sqrt) { frac = Math.sqrt(frac); } result.append(arrays.get("m")[j] * frac); if (j != k - 1) result.append(","); } context.write(key, new Text(result.toString())); }
@Override public RecordReader<LongWritable, Text> getRecordReader( InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); return new DelimitedAndFixedWidthRecordReader(job, (FileSplit) genericSplit); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { IntWritable outKey = new IntWritable(); IntWritable outValue = new IntWritable(); for (int j = 0; j < NUM_TESTS; j++) { for (int i = 0; i < NUM_VALUES; i++) { outKey.set(j); outValue.set(i); context.write(outKey, outValue); } } }
public void map(LongWritable key, Text value, Context context ) throws IOException, InterruptedException { if (context.getConfiguration().getBoolean(FAIL_MAP, true)) { throw new RuntimeException("Intentional map failure"); } context.write(key, NullWritable.get()); }
/** Create and setup a job */ private static Job createJob(String name, Configuration conf ) throws IOException { final Job job = Job.getInstance(conf, NAME + "_" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(BaileyBorweinPlouffe.class); // setup mapper job.setMapperClass(BbpMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); // setup reducer job.setReducerClass(BbpReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(1); // setup input job.setInputFormatClass(BbpInputFormat.class); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job; }
@Override public void map(LongWritable key, SqoopRecord val, Context context) throws IOException, InterruptedException { try { // Loading of LOBs was delayed until we have a Context. val.loadLargeObjects(lobLoader); } catch (SQLException sqlE) { throw new IOException(sqlE); } outkey.set(val.toString()); context.write(outkey, NullWritable.get()); }
public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException { // Set up context File testFile = new File(testFileUrl.getFile()); long testFileSize = testFile.length(); Path testFilePath = new Path(testFile.getAbsolutePath()); Configuration conf = new Configuration(); conf.setInt("io.file.buffer.size", 1); // Gather the records returned by the record reader ArrayList<String> records = new ArrayList<String>(); long offset = 0; LongWritable key = new LongWritable(); Text value = new Text(); while (offset < testFileSize) { FileSplit split = new FileSplit(testFilePath, offset, splitSize, (String[]) null); LineRecordReader reader = new LineRecordReader(conf, split); while (reader.next(key, value)) { records.add(value.toString()); } offset += splitSize; } return records; }
@SuppressWarnings("deprecation") private void createControlFile(FileSystem fs, long nrBytes, // in bytes int nrFiles ) throws IOException { LOG.info("creating control file: "+nrBytes+" bytes, "+nrFiles+" files"); Path controlDir = getControlDir(config); fs.delete(controlDir, true); for(int i=0; i < nrFiles; i++) { String name = getFileName(i); Path controlFile = new Path(controlDir, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, config, controlFile, Text.class, LongWritable.class, CompressionType.NONE); writer.append(new Text(name), new LongWritable(nrBytes)); } catch(Exception e) { throw new IOException(e.getLocalizedMessage()); } finally { if (writer != null) writer.close(); writer = null; } } LOG.info("created control files for: "+nrFiles+" files"); }
/** * Reduce task done, write output to a file. */ @Override public void cleanup(Context context) throws IOException { //write output to a file Configuration conf = context.getConfiguration(); Path outDir = new Path(conf.get(FileOutputFormat.OUTDIR)); Path outFile = new Path(outDir, "reduce-out"); FileSystem fileSys = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf, outFile, LongWritable.class, LongWritable.class, CompressionType.NONE); writer.append(new LongWritable(numInside), new LongWritable(numOutside)); writer.close(); }
private static SequenceFile.Writer[] createWriters(Path testdir, Configuration conf, int srcs, Path[] src) throws IOException { for (int i = 0; i < srcs; ++i) { src[i] = new Path(testdir, Integer.toString(i + 10, 36)); } SequenceFile.Writer out[] = new SequenceFile.Writer[srcs]; for (int i = 0; i < srcs - 1; ++i) { out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf, src[i], IntWritable.class, IntWritable.class); } out[srcs - 1] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf, src[srcs - 1], IntWritable.class, LongWritable.class); return out; }
@Override public RecordReader<LongWritable, BytesWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { int recordLength = getRecordLength(context.getConfiguration()); if (recordLength <= 0) { throw new IOException("Fixed record length " + recordLength + " is invalid. It should be set to a value greater than zero"); } return new FixedLengthRecordReader(recordLength); }
/** * Generic test case for exceptions thrown at some point in the IPC * process. * * @param clientParamClass - client writes this writable for parameter * @param serverParamClass - server reads this writable for parameter * @param serverResponseClass - server writes this writable for response * @param clientResponseClass - client reads this writable for response */ private void doErrorTest( Class<? extends LongWritable> clientParamClass, Class<? extends LongWritable> serverParamClass, Class<? extends LongWritable> serverResponseClass, Class<? extends LongWritable> clientResponseClass) throws IOException, InstantiationException, IllegalAccessException { // start server Server server = new TestServer(1, false, serverParamClass, serverResponseClass); InetSocketAddress addr = NetUtils.getConnectAddress(server); server.start(); // start client WRITABLE_FAULTS_ENABLED = true; Client client = new Client(clientResponseClass, conf); try { LongWritable param = clientParamClass.newInstance(); try { call(client, param, addr, 0, conf); fail("Expected an exception to have been thrown"); } catch (Throwable t) { assertExceptionContains(t, "Injected fault"); } // Doing a second call with faults disabled should return fine -- // ie the internal state of the client or server should not be broken // by the failed call WRITABLE_FAULTS_ENABLED = false; call(client, param, addr, 0, conf); } finally { client.stop(); server.stop(); } }
@Test(timeout=60000) public void testIOEOnClientReadResponse() throws Exception { doErrorTest(LongWritable.class, LongWritable.class, LongWritable.class, IOEOnReadWritable.class); }
@Test(timeout=60000) public void testRTEOnClientWriteParam() throws Exception { doErrorTest(RTEOnWriteWritable.class, LongWritable.class, LongWritable.class, LongWritable.class); }
public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { String delimiter = context.getConfiguration().get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if(null != delimiter) { recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); } return new ErrorHandlingLineRecordReader(recordDelimiterBytes); }
private void writNewOldCustomer() throws IOException, InterruptedException { KeyWrapper newOldKey = new KeyWrapper(); newOldKey.setType(new Text(MapKeyConfig.NEW_OLD_CUSTOMER)); LongWritable longWritable = new LongWritable(); newOldKey.setMillisTime(longWritable); for (NewOldCustomElement newOldCustomElement : statistic.getNewOldCustomElements()) { longWritable.set(newOldCustomElement.getHour()); context.write(newOldKey, new ValueWrapper(newOldCustomElement)); } }
@Test(timeout=60000) public void testIOEOnServerWriteResponse() throws Exception { doErrorTest(LongWritable.class, LongWritable.class, IOEOnWriteWritable.class, LongWritable.class); }
@Test public void testStripBOM() throws IOException { // the test data contains a BOM at the start of the file // confirm the BOM is skipped by LineRecordReader String UTF8_BOM = "\uFEFF"; URL testFileUrl = getClass().getClassLoader().getResource("testBOM.txt"); assertNotNull("Cannot find testBOM.txt", testFileUrl); File testFile = new File(testFileUrl.getFile()); Path testFilePath = new Path(testFile.getAbsolutePath()); long testFileSize = testFile.length(); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); // read the data and check whether BOM is skipped FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[])null); LineRecordReader reader = new LineRecordReader(conf, split); LongWritable key = new LongWritable(); Text value = new Text(); int numRecords = 0; boolean firstLine = true; boolean skipBOM = true; while (reader.next(key, value)) { if (firstLine) { firstLine = false; if (value.toString().startsWith(UTF8_BOM)) { skipBOM = false; } } ++numRecords; } reader.close(); assertTrue("BOM is not skipped", skipBOM); }
@Test(timeout=60000) public void testRTEOnClientReadResponse() throws Exception { doErrorTest(LongWritable.class, LongWritable.class, LongWritable.class, RTEOnReadWritable.class); }
/** * Test SampleUploader from examples */ @SuppressWarnings("unchecked") @Test public void testSampleUploader() throws Exception { Configuration configuration = new Configuration(); Uploader uploader = new Uploader(); Mapper<LongWritable, Text, ImmutableBytesWritable, Put>.Context ctx = mock(Context.class); doAnswer(new Answer<Void>() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArguments()[0]; Put put = (Put) invocation.getArguments()[1]; assertEquals("row", Bytes.toString(writer.get())); assertEquals("row", Bytes.toString(put.getRow())); return null; } }).when(ctx).write(any(ImmutableBytesWritable.class), any(Put.class)); uploader.map(null, new Text("row,family,qualifier,value"), ctx); Path dir = util.getDataTestDirOnTestFS("testSampleUploader"); String[] args = { dir.toString(), "simpleTable" }; Job job = SampleUploader.configureJob(configuration, args); assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass()); }
private static List<Text> readSplit(InputFormat<LongWritable,Text> format, InputSplit split, JobConf job) throws IOException { List<Text> result = new ArrayList<Text>(); RecordReader<LongWritable, Text> reader = format.getRecordReader(split, job, voidReporter); LongWritable key = reader.createKey(); Text value = reader.createValue(); while (reader.next(key, value)) { result.add(value); value = reader.createValue(); } reader.close(); return result; }