@Test (timeout=60000) public void testCloseWithFailingFlush() throws Exception { final Configuration conf = HBaseConfiguration.create(CONF); // Only retry once. conf.setInt("hbase.hstore.flush.retries.number", 1); final User user = User.createUserForTesting(conf, this.name.getMethodName(), new String[]{"foo"}); // Inject our faulty LocalFileSystem conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class); user.runAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { // Make sure it worked (above is sensitive to caching details in hadoop core) FileSystem fs = FileSystem.get(conf); Assert.assertEquals(FaultyFileSystem.class, fs.getClass()); FaultyFileSystem ffs = (FaultyFileSystem)fs; HRegion region = null; try { // Initialize region region = initHRegion(tableName, name.getMethodName(), conf, COLUMN_FAMILY_BYTES); long size = region.getMemstoreSize(); Assert.assertEquals(0, size); // Put one item into memstore. Measure the size of one item in memstore. Put p1 = new Put(row); p1.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual1, 1, (byte[])null)); region.put(p1); // Manufacture an outstanding snapshot -- fake a failed flush by doing prepare step only. Store store = region.getStore(COLUMN_FAMILY_BYTES); StoreFlushContext storeFlushCtx = store.createFlushContext(12345); storeFlushCtx.prepare(); // Now add two entries to the foreground memstore. Put p2 = new Put(row); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[])null)); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual3, 3, (byte[])null)); region.put(p2); // Now try close on top of a failing flush. region.close(); fail(); } catch (IOException dse) { // Expected LOG.info("Expected DroppedSnapshotException"); } finally { // Make it so all writes succeed from here on out so can close clean ffs.fault.set(false); region.getWAL().rollWriter(true); HRegion.closeHRegion(region); } return null; } }); FileSystem.closeAllForUGI(user.getUGI()); }
/** * Test we do not lose data if we fail a flush and then close. * Part of HBase-10466. Tests the following from the issue description: * "Bug 1: Wrong calculation of HRegion.memstoreSize: When a flush fails, data to be flushed is * kept in each MemStore's snapshot and wait for next flush attempt to continue on it. But when * the next flush succeeds, the counter of total memstore size in HRegion is always deduced by * the sum of current memstore sizes instead of snapshots left from previous failed flush. This * calculation is problematic that almost every time there is failed flush, HRegion.memstoreSize * gets reduced by a wrong value. If region flush could not proceed for a couple cycles, the size * in current memstore could be much larger than the snapshot. It's likely to drift memstoreSize * much smaller than expected. In extreme case, if the error accumulates to even bigger than * HRegion's memstore size limit, any further flush is skipped because flush does not do anything * if memstoreSize is not larger than 0." * @throws Exception */ @Test (timeout=60000) public void testFlushSizeAccounting() throws Exception { final Configuration conf = HBaseConfiguration.create(CONF); // Only retry once. conf.setInt("hbase.hstore.flush.retries.number", 1); final User user = User.createUserForTesting(conf, this.name.getMethodName(), new String[]{"foo"}); // Inject our faulty LocalFileSystem conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class); user.runAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { // Make sure it worked (above is sensitive to caching details in hadoop core) FileSystem fs = FileSystem.get(conf); Assert.assertEquals(FaultyFileSystem.class, fs.getClass()); FaultyFileSystem ffs = (FaultyFileSystem)fs; HRegion region = null; try { // Initialize region region = initHRegion(tableName, name.getMethodName(), conf, COLUMN_FAMILY_BYTES); long size = region.getMemstoreSize().get(); Assert.assertEquals(0, size); // Put one item into memstore. Measure the size of one item in memstore. Put p1 = new Put(row); p1.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual1, 1, (byte[])null)); region.put(p1); final long sizeOfOnePut = region.getMemstoreSize().get(); // Fail a flush which means the current memstore will hang out as memstore 'snapshot'. try { LOG.info("Flushing"); region.flushcache(); Assert.fail("Didn't bubble up IOE!"); } catch (DroppedSnapshotException dse) { // What we are expecting } // Make it so all writes succeed from here on out ffs.fault.set(false); // Check sizes. Should still be the one entry. Assert.assertEquals(sizeOfOnePut, region.getMemstoreSize().get()); // Now add two entries so that on this next flush that fails, we can see if we // subtract the right amount, the snapshot size only. Put p2 = new Put(row); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[])null)); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual3, 3, (byte[])null)); region.put(p2); Assert.assertEquals(sizeOfOnePut * 3, region.getMemstoreSize().get()); // Do a successful flush. It will clear the snapshot only. Thats how flushes work. // If already a snapshot, we clear it else we move the memstore to be snapshot and flush // it region.flushcache(); // Make sure our memory accounting is right. Assert.assertEquals(sizeOfOnePut * 2, region.getMemstoreSize().get()); } finally { HRegion.closeHRegion(region); } return null; } }); FileSystem.closeAllForUGI(user.getUGI()); }
@Test (timeout=60000) public void testCloseWithFailingFlush() throws Exception { final Configuration conf = HBaseConfiguration.create(CONF); // Only retry once. conf.setInt("hbase.hstore.flush.retries.number", 1); final User user = User.createUserForTesting(conf, this.name.getMethodName(), new String[]{"foo"}); // Inject our faulty LocalFileSystem conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class); user.runAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { // Make sure it worked (above is sensitive to caching details in hadoop core) FileSystem fs = FileSystem.get(conf); Assert.assertEquals(FaultyFileSystem.class, fs.getClass()); FaultyFileSystem ffs = (FaultyFileSystem)fs; HRegion region = null; try { // Initialize region region = initHRegion(tableName, name.getMethodName(), conf, COLUMN_FAMILY_BYTES); long size = region.getMemstoreSize().get(); Assert.assertEquals(0, size); // Put one item into memstore. Measure the size of one item in memstore. Put p1 = new Put(row); p1.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual1, 1, (byte[])null)); region.put(p1); // Manufacture an outstanding snapshot -- fake a failed flush by doing prepare step only. Store store = region.getStore(COLUMN_FAMILY_BYTES); StoreFlushContext storeFlushCtx = store.createFlushContext(12345); storeFlushCtx.prepare(); // Now add two entries to the foreground memstore. Put p2 = new Put(row); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[])null)); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual3, 3, (byte[])null)); region.put(p2); // Now try close on top of a failing flush. region.close(); fail(); } catch (DroppedSnapshotException dse) { // Expected LOG.info("Expected DroppedSnapshotException"); } finally { // Make it so all writes succeed from here on out so can close clean ffs.fault.set(false); HRegion.closeHRegion(region); } return null; } }); FileSystem.closeAllForUGI(user.getUGI()); }