public Plan getPlan(String... args) { JobConf conf = new JobConf(); conf.set("mongo.input.uri","mongodb://localhost:27017/enron_mail.messages"); HadoopDataSource<BSONWritable,BSONWritable> src = new HadoopDataSource<BSONWritable,BSONWritable>(new MongoInputFormat(), conf, "read from Mongodb", new WritableWrapperConverter<BSONWritable,BSONWritable>()); MapOperator peekInto = MapOperator.builder(ExtractDayIntoKey.class ) .input(src).build(); ReduceOperator sortTest = ReduceOperator.builder(Count.class, IntValue.class, 0) .input(peekInto).build(); FileDataSink sink = new FileDataSink(CsvOutputFormat.class, "file:///tmp/enronCountByDay"); CsvOutputFormat.configureRecordFormat(sink) .fieldDelimiter(',') .recordDelimiter('\n') .field(IntValue.class, 0) .field(StringValue.class, 1); sink.setInput(sortTest); Plan p = new Plan(sink, "Stratosphere Quickstart SDK Sample Job"); p.setDefaultParallelism(8); return p; }
@Override public void reduce(final IntWritable key, final Iterable<DoubleWritable> values, final Context context) throws IOException, InterruptedException { DescriptiveStatistics stats = new DescriptiveStatistics(); for(DoubleWritable rating : values) { stats.addValue(rating.get()); } DBObject builder = new BasicDBObjectBuilder().start() .add("movieid", key.get()) .add("mean", stats.getMean()) .add("median", stats.getPercentile(50)) .add("std", stats.getStandardDeviation()) .add("count", stats.getN()) .add("total", stats.getSum()) .get(); BSONWritable doc = new BSONWritable(builder); context.write(NullWritable.get(), doc); }
@Override public void reduce(final TenantAndIdEmittableKey pKey, final Iterable<Text> pValues, final Context context) throws IOException, InterruptedException { for (Text result : pValues) { count(result); } Logger.getLogger("SchoolProficiencyReducer").warning("writing reduce record to: " + pKey.toString()); String field = context.getConfiguration().get(MongoAggFormatter.UPDATE_FIELD); BSONObject obj = BSONUtilities.setValue(field, counts); BSONWritable output = new BSONWritable(obj); context.write(pKey, output); }
@Override public Writable getValue(BSONWritable entity) { Writable rval = NullWritable.get(); String value = null; try { value = BSONUtilities.getValue(entity, fieldName); if (value != null) { rval = new DoubleWritable(Double.parseDouble(value.toString())); } } catch (NumberFormatException e) { log.severe(String.format("Failed to convert value {%s} to Double", value)); } return rval; }
@Override public Writable getValue(BSONWritable entity) { Writable rval = NullWritable.get(); String value = null; try { value = BSONUtilities.getValue(entity, fieldName); if (value != null) { value = Enum.valueOf(enumClass, value).toString(); rval = new Text(value); } } catch (IllegalArgumentException e) { log.severe(String.format("Failed to convert value {%s} to Enum", value)); } return rval; }
@Override public Writable getValue(BSONWritable entity) { Writable rval = NullWritable.get(); String value = null; try { value = BSONUtilities.getValue(entity, fieldName); if (value != null) { rval = new LongWritable(Long.parseLong(value.toString())); } } catch (NumberFormatException e) { log.severe(String.format("Failed to convert value {'%s'} to Long", value)); } return rval; }
@Override public void map(T id, BSONWritable entity, Context context) throws IOException, InterruptedException { for (String field : idFields.values()) { BSONUtilities.removeField(entity, field); } context.write(id, entity); }
@Override public Writable getValue(BSONWritable entity) { Writable rval = NullWritable.get(); String value = BSONUtilities.getValue(entity, fieldName); if (value != null && value instanceof String) { rval = new Text(value); } return rval; }
@Override public void write(EmittableKey key, BSONWritable value) throws IOException { DBObject k = new BasicDBObject(); k.putAll(key.toBSON()); DBObject v = new BasicDBObject(); v.put("$set", value); output.findAndModify(k, v); }
getReduceClass(function f) { Class<? extends Reducer<TenantAndIdEmittableKey, BSONWritable, TenantAndIdEmittableKey, BSONWritable>> rval = null; switch (f) { case Nth_highest: rval = org.slc.sli.aggregation.functions.Highest.class; break; } return rval; }
@Test public void testGetValue() { BSONObject field = new BasicBSONObject("field", "testing123"); BSONObject entry = new BasicBSONObject("string", field); BSONWritable entity = new BSONWritable(entry); StringValueMapper mapper = new StringValueMapper("string.field"); Writable value = mapper.getValue(entity); assertFalse(value instanceof NullWritable); assertTrue(value instanceof Text); assertEquals(value.toString(), "testing123"); }
@Test public void testValueNotFound() { BSONObject field = new BasicBSONObject("field", "testing123"); BSONObject entry = new BasicBSONObject("string", field); BSONWritable entity = new BSONWritable(entry); StringValueMapper mapper = new StringValueMapper("string.missing_field"); Writable value = mapper.getValue(entity); assertTrue(value instanceof NullWritable); }
@Test public void testGetValue() { BSONObject field = new BasicBSONObject("field", 123L); BSONObject entry = new BasicBSONObject("long", field); BSONWritable entity = new BSONWritable(entry); LongValueMapper mapper = new LongValueMapper("long.field"); Writable value = mapper.getValue(entity); assertFalse(value instanceof NullWritable); assertTrue(value instanceof LongWritable); assertEquals(((LongWritable) value).get(), 123L); }
@Test public void testValueNotFound() { BSONObject field = new BasicBSONObject("field", 123L); BSONObject entry = new BasicBSONObject("long", field); BSONWritable entity = new BSONWritable(entry); LongValueMapper mapper = new LongValueMapper("long.missing_field"); Writable value = mapper.getValue(entity); assertTrue(value instanceof NullWritable); }
@Test public void testValueNotLong() { BSONObject field = new BasicBSONObject("field", true); BSONObject entry = new BasicBSONObject("long", field); BSONWritable entity = new BSONWritable(entry); LongValueMapper mapper = new LongValueMapper("long.field"); Writable value = mapper.getValue(entity); assertTrue(value instanceof NullWritable); }
@Test public void testGetValue() { BSONObject field = new BasicBSONObject("field", "TEST1"); BSONObject entry = new BasicBSONObject("enum", field); BSONWritable entity = new BSONWritable(entry); EnumValueMapper<Testing> m = new EnumValueMapper<Testing>("enum.field", Testing.class); Writable value = m.getValue(entity); assertFalse(value instanceof NullWritable); assertTrue(value instanceof Text); assertEquals(((Text) value).toString(), Testing.TEST1.toString()); }
@Test public void testGetValueNotFound() { BSONObject field = new BasicBSONObject("field", "Unknown"); BSONObject entry = new BasicBSONObject("enum", field); BSONWritable entity = new BSONWritable(entry); EnumValueMapper<Testing> m = new EnumValueMapper<Testing>("enum.field", Testing.class); Writable value = m.getValue(entity); assertTrue(value instanceof NullWritable); }
@Override public Writable getValue(BSONWritable entity) { if (entity.containsField("found")) { return new ContentSummary(1, 2, 3); } else { return NullWritable.get(); } }
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test public void testMap() throws Exception { TenantAndIdEmittableKey key = new TenantAndIdEmittableKey(); ValueMapper m = new MockValueMapper(); BSONObject entry = new BasicBSONObject("found", "data"); BSONWritable entity = new BSONWritable(entry); Context context = Mockito.mock(Context.class); PowerMockito.when(context, "write", Matchers.any(EmittableKey.class), Matchers.any(BSONObject.class)).thenAnswer(new Answer<BSONObject>() { @Override public BSONObject answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); assertNotNull(args); assertEquals(args.length, 2); assertTrue(args[0] instanceof TenantAndIdEmittableKey); assertTrue(args[1] instanceof ContentSummary); TenantAndIdEmittableKey id = (TenantAndIdEmittableKey) args[0]; assertNotNull(id); ContentSummary e = (ContentSummary) args[1]; assertEquals(e.getLength(), 1); assertEquals(e.getFileCount(), 2); assertEquals(e.getDirectoryCount(), 3); return null; } });, entity, context); }
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test public void testMapValueNotFound() throws Exception { TenantAndIdEmittableKey key = new TenantAndIdEmittableKey(); ValueMapper m = new MockValueMapper(); BSONObject entry = new BasicBSONObject("not_found", "data"); BSONWritable entity = new BSONWritable(entry); Context context = Mockito.mock(Context.class); PowerMockito.when(context, "write", Matchers.any(TenantAndIdEmittableKey.class), Matchers.any(BSONObject.class)).thenAnswer(new Answer<BSONObject>() { @Override public BSONObject answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); assertNotNull(args); assertEquals(args.length, 2); assertTrue(args[0] instanceof TenantAndIdEmittableKey); assertTrue(args[1] instanceof NullWritable); return null; } });, entity, context); }
@Test public void testGetValue() { BSONObject field = new BasicBSONObject("field", 1.312D); BSONObject entry = new BasicBSONObject("double", field); BSONWritable entity = new BSONWritable(entry); DoubleValueMapper mapper = new DoubleValueMapper("double.field"); Writable value = mapper.getValue(entity); assertFalse(value instanceof NullWritable); assertTrue(value instanceof DoubleWritable); assertEquals(((DoubleWritable) value).get(), 1.312D, 0.05); }
@Test public void testValueNotFound() { BSONObject field = new BasicBSONObject("field", 1.312D); BSONObject entry = new BasicBSONObject("double", field); BSONWritable entity = new BSONWritable(entry); DoubleValueMapper mapper = new DoubleValueMapper("double.missing_field"); Writable value = mapper.getValue(entity); assertTrue(value instanceof NullWritable); }
@Test public void testGetValueNotDouble() { BSONObject field = new BasicBSONObject("field", "Bob"); BSONObject entry = new BasicBSONObject("double", field); BSONWritable entity = new BSONWritable(entry); DoubleValueMapper mapper = new DoubleValueMapper("double.field"); Writable value = mapper.getValue(entity); assertTrue(value instanceof NullWritable); }
protected void testWrite(final T data) throws IOException { Mockito .when( mockCollection.findAndModify(Matchers.any(DBObject.class), Matchers.any(DBObject.class))).thenAnswer(new Answer<DBObject>() { @Override public DBObject answer(InvocationOnMock inv) { Object[] args = inv.getArguments(); // Expect 2 objects -- key and value assertTrue(args.length == 2); // Both should be BSONObject types assertTrue(args[0] instanceof BSONObject); assertTrue(args[1] instanceof BSONObject); BSONObject arg1 = (BSONObject) args[1]; // value is a single value BSONObject s = (BSONObject) arg1.get("$set"); assertNotNull(s); assertEquals(s.get("testKey"), data); return null; } }); BSONWritable value = new BSONWritable(); value.put("testKey", data); writer.write(key, value); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if(args.length < 3) { System.err.println("Usage: MapReduceExercise " + "[mongodb input uri] " + "[mongodb output uri] " + "update=[true or false]"); System.err.println("Example: MapReduceExercise " + "mongodb:// " + "mongodb:// update=false"); System.err.println("Example: MapReduceExercise " + "mongodb:// " + "mongodb:// update=true"); System.exit(-1); } Class outputValueClass = BSONWritable.class; Class reducerClass = Reduce.class; if(args[2].equals("update=true")) { outputValueClass = MongoUpdateWritable.class; reducerClass = ReduceUpdater.class; } Configuration conf = new Configuration(); // Set MongoDB-specific configuration items conf.setClass("mongo.job.mapper", Map.class, Mapper.class); conf.setClass("mongo.job.reducer", reducerClass, Reducer.class); conf.setClass("mongo.job.mapper.output.key", IntWritable.class, Object.class); conf.setClass("mongo.job.mapper.output.value", DoubleWritable.class, Object.class); conf.setClass("mongo.job.output.key", NullWritable.class, Object.class); conf.setClass("mongo.job.output.value", outputValueClass, Object.class); conf.set("mongo.input.uri", args[0]); conf.set("mongo.output.uri", args[1]); Job job = Job.getInstance(conf); // Set Hadoop-specific job parameters job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(outputValueClass); job.setMapperClass(Map.class); job.setReducerClass(reducerClass); job.setJarByClass(MapReduceExercise.class); job.submit(); }
@Override protected void setup(Mapper<T, BSONWritable, T, BSONWritable>.Context context) throws IOException, InterruptedException { super.setup(context); JobConfiguration.ConfigSections sections = JobConfiguration.readFromConfiguration(context.getConfiguration()); idFields = sections.getMapper().getMapIdFields(); }
@Override public void map(TenantAndIdEmittableKey key, BSONWritable entity, Context context) throws InterruptedException, IOException { context.write(key, getValue(entity)); }
@Override public BSONWritable getCurrentValue() { return new BSONWritable(privateReader.getCurrentValue()); }
@Override public RecordWriter<EmittableKey, BSONWritable> getRecordWriter( org.apache.hadoop.mapreduce.TaskAttemptContext context) { Configuration config = context.getConfiguration(); return new MongoAggWriter(MongoConfigUtil.getOutputCollection(config), context); }
/** * getValue - Attempt to lookup the field in the entity and covert it to an appropriate * Writable. If the field does not exist or is the wrong type, return NullWritable. * * @param fieldValue * - Value of the field to convert. * @return Writable instance of the field, or NullWritable if the field does not exist * or contains incompatible values. */ public abstract Writable getValue(BSONWritable entity);