public void configure(JobConf job) { // 'key' == sortInput for sort-input; key == sortOutput for sort-output key = deduceInputFile(job); if (key == sortOutput) { partitioner = new HashPartitioner<WritableComparable, Writable>(); // Figure the 'current' partition and no. of reduces of the 'sort' try { URI inputURI = new URI(job.get(JobContext.MAP_INPUT_FILE)); String inputFile = inputURI.getPath(); // part file is of the form part-r-xxxxx partition = Integer.valueOf(inputFile.substring( inputFile.lastIndexOf("part") + 7)).intValue(); noSortReducers = job.getInt(SORT_REDUCES, -1); } catch (Exception e) { System.err.println("Caught: " + e); System.exit(-1); } } }
/** The waiting function. The map exits once it gets a signal. Here the * signal is the file existence. */ public void map(WritableComparable key, Writable val, OutputCollector<WritableComparable, Writable> output, Reporter reporter) throws IOException { if (shouldWait(id)) { if (fs != null) { while (!fs.exists(getSignalFile(id))) { try { reporter.progress(); synchronized (this) { this.wait(1000); // wait for 1 sec } } catch (InterruptedException ie) { System.out.println("Interrupted while the map was waiting for " + " the signal."); break; } } } else { throw new IOException("Could not get the DFS!!"); } } }
/** The waiting function. The reduce exits once it gets a signal. Here the * signal is the file existence. */ public void reduce(WritableComparable key, Iterator<Writable> val, OutputCollector<WritableComparable, Writable> output, Reporter reporter) throws IOException { if (fs != null) { while (!fs.exists(signal)) { try { reporter.progress(); synchronized (this) { this.wait(1000); // wait for 1 sec } } catch (InterruptedException ie) { System.out.println("Interrupted while the map was waiting for the" + " signal."); break; } } } else { throw new IOException("Could not get the DFS!!"); } }
public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter ) throws IOException { if (first) { first = false; MapOutputFile mapOutputFile = new MROutputFiles(); mapOutputFile.setConf(conf); Path input = mapOutputFile.getInputFile(0); FileSystem fs = FileSystem.get(conf); assertTrue("reduce input exists " + input, fs.exists(input)); SequenceFile.Reader rdr = new SequenceFile.Reader(fs, input, conf); assertEquals("is reduce input compressed " + input, compressInput, rdr.isCompressed()); rdr.close(); } }
/** * Given an output filename, write a bunch of random records to it. */ public void map(WritableComparable key, Writable value, Context context) throws IOException,InterruptedException { int itemCount = 0; while (numBytesToWrite > 0) { int keyLength = minKeySize + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0); randomKey.setSize(keyLength); randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength()); int valueLength = minValueSize + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0); randomValue.setSize(valueLength); randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength()); context.write(randomKey, randomValue); numBytesToWrite -= keyLength + valueLength; context.getCounter(Counters.BYTES_WRITTEN).increment(keyLength + valueLength); context.getCounter(Counters.RECORDS_WRITTEN).increment(1); if (++itemCount % 200 == 0) { context.setStatus("wrote record " + itemCount + ". " + numBytesToWrite + " bytes left."); } } context.setStatus("done with " + itemCount + " records."); }
private static <T extends WritableComparable<?>> Path writePartitionFile( String testname, Configuration conf, T[] splits) throws IOException { final FileSystem fs = FileSystem.getLocal(conf); final Path testdir = new Path(System.getProperty("test.build.data", "/tmp") ).makeQualified(fs); Path p = new Path(testdir, testname + "/_partition.lst"); TotalOrderPartitioner.setPartitionFile(conf, p); conf.setInt(MRJobConfig.NUM_REDUCES, splits.length + 1); SequenceFile.Writer w = null; try { w = SequenceFile.createWriter(fs, conf, p, splits[0].getClass(), NullWritable.class, SequenceFile.CompressionType.NONE); for (int i = 0; i < splits.length; ++i) { w.append(splits[i], NullWritable.get()); } } finally { if (null != w) w.close(); } return p; }
/** Run a FileOperation */ public void map(Text key, FileOperation value, OutputCollector<WritableComparable<?>, Text> out, Reporter reporter ) throws IOException { try { value.run(jobconf); ++succeedcount; reporter.incrCounter(Counter.SUCCEED, 1); } catch (IOException e) { ++failcount; reporter.incrCounter(Counter.FAIL, 1); String s = "FAIL: " + value + ", " + StringUtils.stringifyException(e); out.collect(null, new Text(s)); LOG.info(s); } finally { reporter.setStatus(getCountString()); } }
/** * It a single record from the map file for the given index * * @param index Index, between 0 and numRecords()-1 * @return Value from the MapFile * @throws IOException If an error occurs during reading */ public V getRecord(long index) throws IOException { //First: determine which reader to read from... int readerIdx = -1; for (int i = 0; i < recordIndexesEachReader.size(); i++) { Pair<Long, Long> p = recordIndexesEachReader.get(i); if (index >= p.getFirst() && index <= p.getSecond()) { readerIdx = i; break; } } if (readerIdx == -1) { throw new IllegalStateException("Index not found in any reader: " + index); } WritableComparable key = indexToKey.getKeyForIndex(index); Writable value = ReflectionUtils.newInstance(recordClass, null); V v = (V) readers[readerIdx].get(key, value); return v; }
public RecordWriter<WritableComparable<?>, Writable> getRecordWriter( final FileSystem fs, JobConf job, String name, final Progressable progress) throws IOException { final Path segmentDumpFile = new Path( FileOutputFormat.getOutputPath(job), name); // Get the old copy out of the way if (fs.exists(segmentDumpFile)) fs.delete(segmentDumpFile, true); final PrintStream printStream = new PrintStream( fs.create(segmentDumpFile)); return new RecordWriter<WritableComparable<?>, Writable>() { public synchronized void write(WritableComparable<?> key, Writable value) throws IOException { printStream.println(value); } public synchronized void close(Reporter reporter) throws IOException { printStream.close(); } }; }
@Override public int compare(WritableComparable o1, WritableComparable o2) { ReadKey rk1 = (ReadKey) o1; ReadKey rk2 = (ReadKey) o2; if (rk1.sample.equals(rk2.sample)) { if (rk1.sequence.equals(rk2.sequence)) { return 0; } else { return rk1.sequence.compareTo(rk2.sequence); } } else { return rk1.sample.compareTo(rk2.sample); } }
@Override public RecordReader<WritableComparable, HCatRecord> createRecordReader(InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { LOG.debug("Creating a SqoopHCatRecordReader"); return new SqoopHCatRecordReader(split, taskContext, this); }
public RecordReader<WritableComparable, HCatRecord> createHCatRecordReader(InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { LOG.debug("Creating a base HCatRecordReader"); return super.createRecordReader(split, taskContext); }
@Override public void map(WritableComparable key, HCatRecord value, Context context) throws IOException, InterruptedException { try { recsRead.add(value); readRecordCount++; } catch (Exception e) { if (LOG.isDebugEnabled()) { e.printStackTrace(System.err); } throw new IOException(e); } }
private String[] getContent(Configuration conf, Path path) throws Exception { ClassLoader prevClassLoader = ClassLoaderStack.addJarFile( new Path(new Path(new SqoopOptions().getJarOutputDir()), getTableName() + ".jar").toString(), getTableName()); FileSystem fs = FileSystem.getLocal(conf); FileStatus[] stats = fs.listStatus(path); Path[] paths = new Path[stats.length]; for (int i = 0; i < stats.length; i++) { paths[i] = stats[i].getPath(); } // Read all the files adding the value lines to the list. List<String> strings = new ArrayList<String>(); for (Path filePath : paths) { if (filePath.getName().startsWith("_") || filePath.getName().startsWith(".")) { continue; } // Need to use new configuration object so that it has the proper classloaders. SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, new Configuration()); WritableComparable key = (WritableComparable) reader.getKeyClass().newInstance(); Writable value = (Writable) reader.getValueClass().newInstance(); while (reader.next(key, value)) { strings.add(value.toString()); } } ClassLoaderStack.setCurrentClassLoader(prevClassLoader); return strings.toArray(new String[0]); }
public void map(WritableComparable key, Writable value, OutputCollector<WritableComparable, IntWritable> output, Reporter reporter) throws IOException { //check for classpath String classpath = System.getProperty("java.class.path"); if (classpath.indexOf("testjob.jar") == -1) { throw new IOException("failed to find in the library " + classpath); } if (classpath.indexOf("test.jar") == -1) { throw new IOException("failed to find the library test.jar in" + classpath); } //fork off ls to see if the file exists. // java file.exists() will not work on // Windows since it is a symlink String[] argv = new String[7]; argv[0] = "ls"; argv[1] = "files_tmp"; argv[2] = "localfilelink"; argv[3] = "dfsfilelink"; argv[4] = "tarlink"; argv[5] = "ziplink"; argv[6] = "test.tgz"; Process p = Runtime.getRuntime().exec(argv); int ret = -1; try { ret = p.waitFor(); } catch(InterruptedException ie) { //do nothing here. } if (ret != 0) { throw new IOException("files_tmp does not exist"); } }
public void map(WritableComparable key, Writable value, OutputCollector<WritableComparable, Writable> out, Reporter reporter) throws IOException { //NOTE- the next line is required for the TestDebugScript test to succeed System.err.println("failing map"); throw new RuntimeException("failing map"); }
public void map(WritableComparable key, Writable value, OutputCollector<WritableComparable, Writable> out, Reporter reporter) throws IOException { try { Thread.sleep(1000000); } catch (InterruptedException e) { // Do nothing } }
public void map(WritableComparable key, Writable value, OutputCollector<BytesWritable, BytesWritable> output, Reporter reporter) throws IOException { int itemCount = 0; while (numBytesToWrite > 0) { int keyLength = minKeySize + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0); randomKey.setSize(keyLength); randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength()); int valueLength = minValueSize + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0); randomValue.setSize(valueLength); randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength()); output.collect(randomKey, randomValue); numBytesToWrite -= keyLength + valueLength; reporter.incrCounter(Counters.BYTES_WRITTEN, 1); reporter.incrCounter(Counters.RECORDS_WRITTEN, 1); if (++itemCount % 200 == 0) { reporter.setStatus("wrote record " + itemCount + ". " + numBytesToWrite + " bytes left."); } } reporter.setStatus("done with " + itemCount + " records."); }
public void map(WritableComparable key, Text value, OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException { String line = value.toString(); output.collect(new UTF8(process(line)), new UTF8("")); }
public int compare (WritableComparable v1, WritableComparable v2) { int val1 = ((IntWritable)(v1)).get() / 100; int val2 = ((IntWritable)(v2)).get() / 100; if (val1 < val2) return 1; else if (val1 > val2) return -1; else return 0; }
public void map(WritableComparable key, Writable val, OutputCollector<WritableComparable, Writable> output, Reporter reporter) throws IOException { assertNotNull("Mapper not configured!", loader); // load the memory loader.load(); // work as identity mapper output.collect(key, val); }
public void reduce(WritableComparable key, Iterator<Writable> val, OutputCollector<WritableComparable, Writable> output, Reporter reporter) throws IOException { assertNotNull("Reducer not configured!", loader); // load the memory loader.load(); // work as identity reducer output.collect(key, key); }
@SuppressWarnings("unchecked") public void configure(JobConf job) { keyclass = (Class<? extends K>) job.getClass("test.fakeif.keyclass", NullWritable.class, WritableComparable.class); valclass = (Class<? extends V>) job.getClass("test.fakeif.valclass", NullWritable.class, WritableComparable.class); }
@SuppressWarnings("unchecked") public RR_ClassLoaderChecker(JobConf job) { assertTrue("The class loader has not been inherited from " + CompositeRecordReader.class.getSimpleName(), job.getClassLoader() instanceof Fake_ClassLoader); keyclass = (Class<? extends K>) job.getClass("test.fakeif.keyclass", NullWritable.class, WritableComparable.class); valclass = (Class<? extends V>) job.getClass("test.fakeif.valclass", NullWritable.class, WritableComparable.class); }
@SuppressWarnings("unchecked") public void configure(JobConf job) { keyclass = (Class<? extends K>) job.getClass("test.fakeif.keyclass", IncomparableKey.class, WritableComparable.class); valclass = (Class<? extends V>) job.getClass("test.fakeif.valclass", NullWritable.class, WritableComparable.class); }
public void map(WritableComparable<?> key, Writable value, Context context) throws IOException { try { Thread.sleep(1000000); } catch (InterruptedException e) { // Do nothing } }