public void execute(Tuple tuple) { if(tuple.getSourceComponent().equals(UPSTREAM_COMPONENT_ID)){ String word = tuple.getStringByField(UPSTREAM_FIEDLS); if(word.length() <= 0) { collector.ack(tuple); return; } collector.emit(Constraints.coinFileds, new Values(word)); Key ky = new Key(word.getBytes()); if(bf.membershipTest(ky)) collector.emit(Constraints.hotFileds, tuple, new Values(word)); else collector.emit(Constraints.nohotFileds, tuple, new Values(word)); }else { String key = tuple.getStringByField(Constraints.wordFileds); Integer type = tuple.getIntegerByField(Constraints.typeFileds); Key hk = new Key(key.getBytes()); if(!bf.membershipTest(hk) && type.equals(1)) bf.add(hk); if(bf.membershipTest(hk) && type.equals(0)) bf.delete(hk); } collector.ack(tuple); }
@Test public void shouldWriteAndReadFilter() throws IOException { // Given final BloomFilter filter = new BloomFilter(100, 5, Hash.MURMUR_HASH); filter.add(new Key("ABC".getBytes())); filter.add(new Key("DEF".getBytes())); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final DataOutputStream out = new DataOutputStream(baos); filter.write(out); String x = new String(baos.toByteArray(), AccumuloStoreConstants.BLOOM_FILTER_CHARSET); final ByteArrayInputStream bais = new ByteArrayInputStream(x.getBytes(AccumuloStoreConstants.BLOOM_FILTER_CHARSET)); // When final DataInputStream in = new DataInputStream(bais); final BloomFilter read = new BloomFilter(); read.readFields(in); // Then assertTrue(read.membershipTest(new Key("ABC".getBytes()))); assertTrue(read.membershipTest(new Key("DEF".getBytes()))); assertFalse(read.membershipTest(new Key("lkjhgfdsa".getBytes()))); }
/** {@inheritDoc} */ @Override public void aggregate(final String data, final String metadata) throws IOException, InterruptedException { // instantiate a bloom filter input key initialized by the data final Key key = new Key(data.getBytes()); // if the key is already in the filter, forget about it if (this.filter.membershipTest(key)) return; // add the key to the bloom filter this.filter.add(key); if (this.isCombining()) this.collect(data); else this.total++; }
/** {@inheritDoc} */ @Override public void aggregate(final String data, final String metadata) throws IOException, InterruptedException { // instantiate a bloom filter input key initialized by the data Key key = new Key(data.getBytes()); // if the key is already in the filter, forget it if (this.filter.membershipTest(key)) return; // add the key to the bloom filter this.filter.add(key); // and collect it this.collect(data); }
@Override public Tuple exec(Tuple input) throws IOException { if (input == null || input.size() == 0) return null; // Strip off the initial level of bag DataBag values = (DataBag)input.get(0); Iterator<Tuple> it = values.iterator(); Tuple t = it.next(); // If the input tuple has only one field, then we'll extract // that field and serialize it into a key. If it has multiple // fields, we'll serialize the whole tuple. byte[] b; if (t.size() == 1) b = DataType.toBytes(t.get(0)); else b = DataType.toBytes(t, DataType.TUPLE); Key k = new Key(b); filter = new BloomFilter(vSize, numHash, hType); filter.add(k); return TupleFactory.getInstance().newTuple(bloomOut()); }
@Override public void map(Object key, Text value, Context context) throws IOException, InterruptedException { Map<String, String> parsed = transformXmlToMap(value.toString()); String body = parsed.get("Text"); if (isNullOrEmpty(body)) { return; } StringTokenizer tokenizer = new StringTokenizer(body); while (tokenizer.hasMoreTokens()) { String word = tokenizer.nextToken(); if (filter.membershipTest(new Key(word.getBytes()))) { context.write(value, NullWritable.get()); break; } } }
@Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { System.out.println("K[" + key + "]"); if(filter.membershipTest(new Key(key.toString().getBytes()))) { context.write(key, value); } }
@Override public void map(Text key, Text value, OutputCollector<NullWritable, BloomFilter> output, Reporter reporter) throws IOException { System.out.println("K[" + key + "]"); int age = Integer.valueOf(value.toString()); if (age > 30) { filter.add(new Key(key.toString().getBytes())); } collector = output; }
@Override protected void map(LongWritable offset, Text value, Context context) throws IOException, InterruptedException { String user = getUsername(value); if (filter.membershipTest(new Key(user.getBytes()))) { Tuple outputValue = new Tuple(); outputValue.setInt(ValueFields.DATASET, getDataset()); outputValue.setString(ValueFields.DATA, value.toString()); context.write(new Text(user), outputValue); } }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { User user = User.fromText(value); if ("CA".equals(user.getState())) { filter.add(new Key(user.getName().getBytes())); } }
@Test public void shouldAcceptValidFilter() { // Given final BloomFilter filter = new BloomFilter(100, 5, Hash.MURMUR_HASH); filter.add(new Key("ABC".getBytes())); filter.add(new Key("DEF".getBytes())); // Then assertTrue(filter.membershipTest(new Key("ABC".getBytes()))); assertTrue(filter.membershipTest(new Key("DEF".getBytes()))); assertFalse(filter.membershipTest(new Key("lkjhgfdsa".getBytes()))); }
public void addUriToBloomFilter(String id, String uri) { KR2RMLBloomFilter bf = null; if(!idToBloomFilter.containsKey(id)) { idToBloomFilter.putIfAbsent(id, new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH)); } bf = idToBloomFilter.get(id); Key k = new Key(uri.getBytes(UTF8_CHARSET)); bf.add(k); return; }
@Override public Boolean exec(Tuple input) throws IOException { if (filter == null) { init(); } byte[] b; if (input.size() == 1) b = DataType.toBytes(input.get(0)); else b = DataType.toBytes(input, DataType.TUPLE); Key k = new Key(b); return filter.membershipTest(k); }
public CBMessageFilter(boolean filterEnabled, String filterPattern){ this.filterEnabled=filterEnabled; if(this.filterEnabled){ String[] filters=filterPattern.split(","); for(String key1:filters){ if(key1!=null && key1.trim().length()>0){ Key key=new Key(key1.getBytes()); filter.add(key); } } } }
public boolean membershiptest(String key){ //return everythig if filter not enabled. if(!this.filterEnabled) return true; //System.out.println("Comparing key "+key); if(key==null) return false; return filter.membershipTest(new Key(key.getBytes())); }
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { Map<String, String> parsed = MRDPUtils.transformXmlToMap(value .toString()); String userId = parsed.get("UserId"); if (userId == null) { return; } if (bfilter.membershipTest(new Key(userId.getBytes()))) { outkey.set(userId); outvalue.set("B" + value.toString()); context.write(outkey, outvalue); } }
public void redisMembershipTestWithFilter(Path input, Path bloom) throws Exception { System.out.println("Testing Redis set membership of " + input + " using a Bloom filter " + bloom); // TODO create a fileSystem object FileSystem fs = FileSystem.get(getConf()); // TODO connect to Redis at localhost, port 6379 jedis = new Jedis("localhost", 6379); jedis.connect(); // TODO Create a new BloomFilter object BloomFilter filter = new BloomFilter(); // TODO call readFields with an FSDataInputStream from the file filter.readFields(fs.open(bloom)); // TODO Open the testing file for read String line = null; int numBFhits = 0, numhits = 0, numlines = 0; BufferedReader rdr = new BufferedReader(new InputStreamReader( fs.open(input))); // TODO create a new Key to re-use Key key = new Key(); long start = System.currentTimeMillis(); while ((line = rdr.readLine()) != null) { // TODO increment numlines ++numlines; // TODO set the bytes of the key to line's bytes with a weight of 1.0 key.set(line.getBytes(), 1.0); // TODO membership test the key if (filter.membershipTest(key)) { // TODO increment numBFhits ++numBFhits; // TODO test jedis using sismember if (jedis.sismember(REDIS_SET_KEY, line)) { // TODO increment numhits ++numhits; } } } long finish = System.currentTimeMillis(); // TODO close the file reader and Redis client rdr.close(); jedis.disconnect(); System.out.println("Took " + (finish - start) + " ms to check Redis " + numlines + " times for " + numhits + " successful tests. Bloom filter hits: " + numBFhits + " False postives: " + (numBFhits - numhits)); }
@Override public int run(String[] args) throws Exception { if (args.length != 4) { System.err .println("Usage: Trainer <totrain> <nummembers> <falseposrate> <bfoutfile>"); return 1; } // Parse command line arguments Path inputFile = new Path(args[0]); int numMembers = Integer.parseInt(args[1]); float falsePosRate = Float.parseFloat(args[2]); Path bfFile = new Path(args[3]); // TODO Create a new Jedis object using localhost at port 6379 jedis = new Jedis("localhost", 6379); // TODO delete the REDIS_SET_KEY jedis.del(REDIS_SET_KEY); // TODO Create a new Bloom filter BloomFilter filter = createBloomFilter(numMembers, falsePosRate); // TODO open the file for read FileSystem fs = FileSystem.get(getConf()); String line = null; int numRecords = 0; BufferedReader rdr = new BufferedReader(new InputStreamReader( fs.open(inputFile))); while ((line = rdr.readLine()) != null) { // TODO if the line is not empty if (!line.isEmpty()) { // TODO add the line to the Bloom filter filter.add(new Key(line.getBytes())); // TODO use Jedis client's "sadd" method to set jedis.sadd(REDIS_SET_KEY, line); // TODO increment numRecords ++numRecords; } } // TODO Close reader, disconnect Jedis client rdr.close(); jedis.disconnect(); System.out.println("Trained Bloom filter with " + numRecords + " entries."); System.out.println("Serializing Bloom filter to HDFS at " + bfFile); // TODO create anew FSDataOutputStream using the FileSystem FSDataOutputStream strm = fs.create(bfFile); // TODO pass the stream to the Bloom filter filter.write(strm); // TODO close the stream strm.flush(); strm.close(); System.out.println("Done training Bloom filter."); return 0; }
public void redisMembershipTestWithFilter(Path input, Path bloom) throws Exception { System.out.println("Testing Redis set membership of " + input + " using a Bloom filter " + bloom); // TODO create a fileSystem object // TODO connect to Redis at localhost, port 6379 // TODO Create a new BloomFilter object // TODO call readFields with an FSDataInputStream from the file // TODO Open the testing file for read String line = null; int numBFhits = 0, numhits = 0, numlines = 0; BufferedReader rdr = null; // TODO create a new Key to re-use Key key = new Key(); long start = System.currentTimeMillis(); while ((line = rdr.readLine()) != null) { // TODO increment numlines // TODO set the bytes of the key to line's bytes with a weight of 1.0 // TODO membership test the key // TODO increment numBFhits // TODO test jedis using sismember // TODO increment numhits } long finish = System.currentTimeMillis(); // TODO close the file reader and Redis client System.out.println("Took " + (finish - start) + " ms to check Redis " + numlines + " times for " + numhits + " successful tests. Bloom filter hits: " + numBFhits + " False postives: " + (numBFhits - numhits)); }
public static Key toKey(Text t) { return new Key(Arrays.copyOfRange(t.getBytes(), 0, t.getLength())); }
public static Key toKey(String s) { return toKey(new Text(s)); }