private void initBloomFilter(Path dirName, Configuration conf) { DataInputStream in = null; try { FileSystem fs = dirName.getFileSystem(conf); in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); in = null; } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } finally { IOUtils.closeStream(in); } }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
private void initBloomFilter(FileSystem fs, String dirName, Configuration conf) { try { DataInputStream in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt( IO_MAPFILE_BLOOM_SIZE_KEY, IO_MAPFILE_BLOOM_SIZE_DEFAULT); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat( IO_MAPFILE_BLOOM_ERROR_RATE_KEY, IO_MAPFILE_BLOOM_ERROR_RATE_DEFAULT); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
/** {@inheritDoc} */ @Override public void start(final EmitKey key) { super.start(key); this.filter = new DynamicBloomFilter(this.vectorSize, HASH_COUNT, Hash.MURMUR_HASH, (int) this.getArg()); }