@Test public void testHBaseFsckWithFewerMetaReplicas() throws Exception { ClusterConnection c = (ClusterConnection)ConnectionFactory.createConnection( TEST_UTIL.getConfiguration()); RegionLocations rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, false); HBaseFsckRepair.closeRegionSilentlyAndWait(c, rl.getRegionLocation(1).getServerName(), rl.getRegionLocation(1).getRegionInfo()); // check that problem exists HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN,ERROR_CODE.NO_META_REGION}); // fix the problem hbck = doFsck(TEST_UTIL.getConfiguration(), true); // run hbck again to make sure we don't see any errors hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{}); }
@Test public void testHBaseFsckWithFewerMetaReplicaZnodes() throws Exception { ClusterConnection c = (ClusterConnection)ConnectionFactory.createConnection( TEST_UTIL.getConfiguration()); RegionLocations rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, false); HBaseFsckRepair.closeRegionSilentlyAndWait(c, rl.getRegionLocation(2).getServerName(), rl.getRegionLocation(2).getRegionInfo()); ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(2)); // check that problem exists HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN,ERROR_CODE.NO_META_REGION}); // fix the problem hbck = doFsck(TEST_UTIL.getConfiguration(), true); // run hbck again to make sure we don't see any errors hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{}); }
@Test public void testHBaseFsckWithExcessMetaReplicas() throws Exception { // Create a meta replica (this will be the 4th one) and assign it HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica( HRegionInfo.FIRST_META_REGIONINFO, 3); // create in-memory state otherwise master won't assign TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager() .getRegionStates().createRegionState(h); TEST_UTIL.getMiniHBaseCluster().getMaster().assignRegion(h); HBaseFsckRepair.waitUntilAssigned(TEST_UTIL.getHBaseAdmin(), h); // check that problem exists HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN, ERROR_CODE.SHOULD_NOT_BE_DEPLOYED}); // fix the problem hbck = doFsck(TEST_UTIL.getConfiguration(), true); // run hbck again to make sure we don't see any errors hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{}); }
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile, boolean fixReferenceFiles, String table) throws Exception { HBaseFsck fsck = new HBaseFsck(conf, exec); fsck.connect(); fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setFixAssignments(fixAssignments); fsck.setFixMeta(fixMeta); fsck.setFixHdfsHoles(fixHdfsHoles); fsck.setFixHdfsOverlaps(fixHdfsOverlaps); fsck.setFixHdfsOrphans(fixHdfsOrphans); fsck.setFixTableOrphans(fixTableOrphans); fsck.setFixVersionFile(fixVersionFile); fsck.setFixReferenceFiles(fixReferenceFiles); if (table != null) { fsck.includeTable(table); } fsck.onlineHbck(); return fsck; }
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile, boolean fixReferenceFiles, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks, TableName table) throws Exception { HBaseFsck fsck = new HBaseFsck(conf, exec); fsck.connect(); fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setFixAssignments(fixAssignments); fsck.setFixMeta(fixMeta); fsck.setFixHdfsHoles(fixHdfsHoles); fsck.setFixHdfsOverlaps(fixHdfsOverlaps); fsck.setFixHdfsOrphans(fixHdfsOrphans); fsck.setFixTableOrphans(fixTableOrphans); fsck.setFixVersionFile(fixVersionFile); fsck.setFixReferenceFiles(fixReferenceFiles); fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo); fsck.setFixTableLocks(fixTableLocks); if (table != null) { fsck.includeTable(table); } fsck.onlineHbck(); return fsck; }
public void checkUnDeletedQueues() throws ReplicationException { undeletedQueueIds = getUnDeletedQueues(); undeletedQueueIds.forEach((replicator, queueIds) -> { queueIds.forEach(queueId -> { ReplicationQueueInfo queueInfo = new ReplicationQueueInfo(queueId); String msg = "Undeleted replication queue for removed peer found: " + String.format("[removedPeerId=%s, replicator=%s, queueId=%s]", queueInfo.getPeerId(), replicator, queueId); errorReporter.reportError(HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE, msg); }); }); undeletedHFileRefsPeerIds = getUndeletedHFileRefsPeers(); undeletedHFileRefsPeerIds.stream() .map( peerId -> "Undeleted replication hfile-refs queue for removed peer " + peerId + " found") .forEach(msg -> errorReporter .reportError(HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE, msg)); }
private void stopMasterAndValidateReplicaCount(final int originalReplicaCount, final int newReplicaCount) throws Exception { ServerName sn = TEST_UTIL.getHBaseClusterInterface().getClusterMetrics().getMasterName(); TEST_UTIL.getHBaseClusterInterface().stopMaster(sn); TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(sn, 60000); List<String> metaZnodes = TEST_UTIL.getZooKeeperWatcher().getMetaReplicaNodes(); assert(metaZnodes.size() == originalReplicaCount); //we should have what was configured before TEST_UTIL.getHBaseClusterInterface().getConf().setInt(HConstants.META_REPLICAS_NUM, newReplicaCount); if (TEST_UTIL.getHBaseCluster().countServedRegions() < newReplicaCount) { TEST_UTIL.getHBaseCluster().startRegionServer(); } TEST_UTIL.getHBaseClusterInterface().startMaster(sn.getHostname(), 0); TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster(); TEST_UTIL.waitFor(10000, predicateMetaHasReplicas(newReplicaCount)); // also check if hbck returns without errors TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, newReplicaCount); HBaseFsck hbck = HbckTestingUtil.doFsck(TEST_UTIL.getConfiguration(), false); HbckTestingUtil.assertNoErrors(hbck); }
@Ignore @Test // Disabled. Relies on FSCK which needs work for AMv2. public void testHBaseFsckWithFewerMetaReplicas() throws Exception { ClusterConnection c = (ClusterConnection)ConnectionFactory.createConnection( TEST_UTIL.getConfiguration()); RegionLocations rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, false); HBaseFsckRepair.closeRegionSilentlyAndWait(c, rl.getRegionLocation(1).getServerName(), rl.getRegionLocation(1).getRegionInfo()); // check that problem exists HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN,ERROR_CODE.NO_META_REGION}); // fix the problem hbck = doFsck(TEST_UTIL.getConfiguration(), true); // run hbck again to make sure we don't see any errors hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{}); }
@Ignore @Test // The close silently doesn't work any more since HBASE-14614. Fix. public void testHBaseFsckWithFewerMetaReplicaZnodes() throws Exception { ClusterConnection c = (ClusterConnection)ConnectionFactory.createConnection( TEST_UTIL.getConfiguration()); RegionLocations rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, false); HBaseFsckRepair.closeRegionSilentlyAndWait(c, rl.getRegionLocation(2).getServerName(), rl.getRegionLocation(2).getRegionInfo()); ZKWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); ZKUtil.deleteNode(zkw, zkw.znodePaths.getZNodeForReplica(2)); // check that problem exists HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN,ERROR_CODE.NO_META_REGION}); // fix the problem hbck = doFsck(TEST_UTIL.getConfiguration(), true); // run hbck again to make sure we don't see any errors hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{}); }
@Ignore @Test // Disabled because fsck and this needs work for AMv2 public void testHBaseFsckWithExcessMetaReplicas() throws Exception { // Create a meta replica (this will be the 4th one) and assign it RegionInfo h = RegionReplicaUtil.getRegionInfoForReplica( RegionInfoBuilder.FIRST_META_REGIONINFO, 3); TEST_UTIL.assignRegion(h); HBaseFsckRepair.waitUntilAssigned(TEST_UTIL.getAdmin(), h); // check that problem exists HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN, ERROR_CODE.SHOULD_NOT_BE_DEPLOYED}); // fix the problem hbck = doFsck(TEST_UTIL.getConfiguration(), true); // run hbck again to make sure we don't see any errors hbck = doFsck(TEST_UTIL.getConfiguration(), false); assertErrors(hbck, new ERROR_CODE[]{}); }
public void checkTableLocks() throws IOException { TableLockManager tableLockManager = TableLockManager.createTableLockManager(zkWatcher.getConfiguration(), zkWatcher, null); final long expireDate = EnvironmentEdgeManager.currentTime() - expireTimeout; MetadataHandler handler = new MetadataHandler() { @Override public void handleMetadata(byte[] ownerMetadata) { ZooKeeperProtos.TableLock data = TableLockManager.fromBytes(ownerMetadata); String msg = "Table lock acquire attempt found:"; if (data != null) { msg = msg + String.format("[tableName=%s:%s, lockOwner=%s, threadId=%s, " + "purpose=%s, isShared=%s, createTime=%s]", data.getTableName().getNamespace().toStringUtf8(), data.getTableName().getQualifier().toStringUtf8(), ProtobufUtil.toServerName(data.getLockOwner()), data.getThreadId(), data.getPurpose(), data.getIsShared(), data.getCreateTime()); } if (data != null && data.hasCreateTime() && data.getCreateTime() < expireDate) { errorReporter.reportError(HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK, msg); } else { errorReporter.print(msg); } } }; tableLockManager.visitAllLocks(handler); }
private void stopMasterAndValidateReplicaCount(int originalReplicaCount, int newReplicaCount) throws Exception { ServerName sn = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster(); TEST_UTIL.getHBaseClusterInterface().stopMaster(sn); TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(sn, 60000); List<String> metaZnodes = TEST_UTIL.getZooKeeperWatcher().getMetaReplicaNodes(); assert(metaZnodes.size() == originalReplicaCount); //we should have what was configured before TEST_UTIL.getHBaseClusterInterface().getConf().setInt(HConstants.META_REPLICAS_NUM, newReplicaCount); TEST_UTIL.getHBaseClusterInterface().startMaster(sn.getHostname(), 0); TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster(); int count = 0; do { metaZnodes = TEST_UTIL.getZooKeeperWatcher().getMetaReplicaNodes(); Thread.sleep(10); count++; // wait for the count to be different from the originalReplicaCount. When the // replica count is reduced, that will happen when the master unassigns excess // replica, and deletes the excess znodes } while (metaZnodes.size() == originalReplicaCount && count < 1000); assert(metaZnodes.size() == newReplicaCount); // also check if hbck returns without errors TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, newReplicaCount); HBaseFsck hbck = HbckTestingUtil.doFsck(TEST_UTIL.getConfiguration(), false); HbckTestingUtil.assertNoErrors(hbck); }
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile, boolean fixReferenceFiles, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks, boolean fixTableZnodes, TableName table) throws Exception { HBaseFsck fsck = new HBaseFsck(conf, exec); fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setFixAssignments(fixAssignments); fsck.setFixMeta(fixMeta); fsck.setFixHdfsHoles(fixHdfsHoles); fsck.setFixHdfsOverlaps(fixHdfsOverlaps); fsck.setFixHdfsOrphans(fixHdfsOrphans); fsck.setFixTableOrphans(fixTableOrphans); fsck.setFixVersionFile(fixVersionFile); fsck.setFixReferenceFiles(fixReferenceFiles); fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo); fsck.setFixTableLocks(fixTableLocks); fsck.setFixTableZNodes(fixTableZnodes); fsck.connect(); if (table != null) { fsck.includeTable(table); } fsck.onlineHbck(); fsck.close(); return fsck; }
public static void assertErrors(HBaseFsck fsck, ERROR_CODE[] expectedErrors) { List<ERROR_CODE> errs = fsck.getErrors().getErrorList(); Collections.sort(errs); List<ERROR_CODE> expErrs = Lists.newArrayList(expectedErrors); Collections.sort(expErrs); assertEquals(expErrs, errs); }
public void checkTableLocks() throws IOException { TableLockManager tableLockManager = TableLockManager.createTableLockManager(zkWatcher.getConfiguration(), zkWatcher, null); final long expireDate = EnvironmentEdgeManager.currentTime() - expireTimeout; MetadataHandler handler = new MetadataHandler() { @Override public void handleMetadata(byte[] ownerMetadata) { ZooKeeperProtos.TableLock data = TableLockManager.fromBytes(ownerMetadata); String msg = "Table lock acquire attempt found:"; if (data != null) { msg = msg + String.format("[tableName=%s, lockOwner=%s, threadId=%s, " + "purpose=%s, isShared=%s, createTime=%s]", Bytes.toString(data.getTableName().toByteArray()), ProtobufUtil.toServerName(data.getLockOwner()), data.getThreadId(), data.getPurpose(), data.getIsShared(), data.getCreateTime()); } if (data != null && data.hasCreateTime() && data.getCreateTime() < expireDate) { errorReporter.reportError(HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK, msg); } else { errorReporter.print(msg); } } }; tableLockManager.visitAllLocks(handler); }
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile, boolean fixReferenceFiles, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks, boolean fixTableZnodes, TableName table) throws Exception { HBaseFsck fsck = new HBaseFsck(conf, exec); fsck.connect(); fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setFixAssignments(fixAssignments); fsck.setFixMeta(fixMeta); fsck.setFixHdfsHoles(fixHdfsHoles); fsck.setFixHdfsOverlaps(fixHdfsOverlaps); fsck.setFixHdfsOrphans(fixHdfsOrphans); fsck.setFixTableOrphans(fixTableOrphans); fsck.setFixVersionFile(fixVersionFile); fsck.setFixReferenceFiles(fixReferenceFiles); fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo); fsck.setFixTableLocks(fixTableLocks); fsck.setFixTableZNodes(fixTableZnodes); if (table != null) { fsck.includeTable(table); } fsck.onlineHbck(); fsck.close(); return fsck; }
public void checkTableLocks() throws IOException { TableLockManager tableLockManager = TableLockManager.createTableLockManager(zkWatcher.getConfiguration(), zkWatcher, null); final long expireDate = EnvironmentEdgeManager.currentTimeMillis() - expireTimeout; MetadataHandler handler = new MetadataHandler() { @Override public void handleMetadata(byte[] ownerMetadata) { ZooKeeperProtos.TableLock data = TableLockManager.fromBytes(ownerMetadata); String msg = "Table lock acquire attempt found:"; if (data != null) { msg = msg + String.format("[tableName=%s, lockOwner=%s, threadId=%s, " + "purpose=%s, isShared=%s, createTime=%s]", Bytes.toString(data.getTableName().toByteArray()), ProtobufUtil.toServerName(data.getLockOwner()), data.getThreadId(), data.getPurpose(), data.getIsShared(), data.getCreateTime()); } if (data != null && data.hasCreateTime() && data.getCreateTime() < expireDate) { errorReporter.reportError(HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK, msg); } else { errorReporter.print(msg); } } }; tableLockManager.visitAllLocks(handler); }
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile, boolean fixReferenceFiles, boolean fixHFileLinks, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks, boolean fixReplication, TableName table) throws Exception { HBaseFsck fsck = new HBaseFsck(conf, exec); try { HBaseFsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setFixAssignments(fixAssignments); fsck.setFixMeta(fixMeta); fsck.setFixHdfsHoles(fixHdfsHoles); fsck.setFixHdfsOverlaps(fixHdfsOverlaps); fsck.setFixHdfsOrphans(fixHdfsOrphans); fsck.setFixTableOrphans(fixTableOrphans); fsck.setFixVersionFile(fixVersionFile); fsck.setFixReferenceFiles(fixReferenceFiles); fsck.setFixHFileLinks(fixHFileLinks); fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo); fsck.setFixReplication(fixReplication); if (table != null) { fsck.includeTable(table); } // Parse command line flags before connecting, to grab the lock. fsck.connect(); fsck.onlineHbck(); } finally { fsck.close(); } return fsck; }
/** * Runs hbck with the -sidelineCorruptHFiles option * @param table table constraint * @return hbckInstance */ public static HBaseFsck doHFileQuarantine(Configuration conf, TableName table) throws Exception { String[] args = {"-sidelineCorruptHFiles", "-ignorePreCheckPermission", table.getNameAsString()}; HBaseFsck hbck = new HBaseFsck(conf, exec); hbck.exec(exec, args); return hbck; }
public void checkTableLocks() throws IOException { TableLockManager tableLockManager = TableLockManager.createTableLockManager(zkWatcher.getConfiguration(), zkWatcher, null); final long expireDate = EnvironmentEdgeManager.currentTimeMillis() - expireTimeout; MetadataHandler handler = new MetadataHandler() { @Override public void handleMetadata(byte[] ownerMetadata) { ZooKeeperProtos.TableLock data = TableLockManager.fromBytes(ownerMetadata); String msg = "Table lock acquire attempt found:"; if (data != null) { msg = msg + String.format("[tableName=%s, lockOwner=%s, threadId=%s, " + "purpose=%s, isShared=%s, createTime=%s]", Bytes.toString(data.getTableName().toByteArray()), ProtobufUtil.toServerName(data.getLockOwner()), data.getThreadId(), data.getPurpose(), data.getIsShared(), data.getCreateTime()); } if (data.hasCreateTime() && data.getCreateTime() < expireDate) { errorReporter.reportError(HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK, msg); } else { errorReporter.print(msg); } } }; tableLockManager.visitAllLocks(handler); }
/** * Main program * * @param args * @throws Exception */ public static void main(String[] args) throws Exception { // create a fsck object Configuration conf = HBaseConfiguration.create(); // Cover both bases, the old way of setting default fs and the new. // We're supposed to run on 0.20 and 0.21 anyways. FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf)); HBaseFsck fsck = new HBaseFsck(conf); boolean fixHoles = false; // Process command-line args. for (int i = 0; i < args.length; i++) { String cmd = args[i]; if (cmd.equals("-details")) { fsck.setDisplayFullReport(); } else if (cmd.equals("-base")) { if (i == args.length - 1) { System.err.println("OfflineMetaRepair: -base needs an HDFS path."); printUsageAndExit(); } // update hbase root dir to user-specified base i++; FSUtils.setRootDir(conf, new Path(args[i])); FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf)); } else if (cmd.equals("-sidelineDir")) { if (i == args.length - 1) { System.err.println("OfflineMetaRepair: -sidelineDir needs an HDFS path."); printUsageAndExit(); } // set the hbck sideline dir to user-specified one i++; fsck.setSidelineDir(args[i]); } else if (cmd.equals("-fixHoles")) { fixHoles = true; } else if (cmd.equals("-fix")) { // make all fix options true fixHoles = true; } else { String str = "Unknown command line option : " + cmd; LOG.info(str); System.out.println(str); printUsageAndExit(); } } System.out.println("OfflineMetaRepair command line options: " + StringUtils.join(args, " ")); // Fsck doesn't shutdown and and doesn't provide a way to shutdown its // threads cleanly, so we do a System.exit. boolean success = false; try { success = fsck.rebuildMeta(fixHoles); } catch (MultipleIOException mioes) { for (IOException ioe : mioes.getExceptions()) { LOG.error("Bailed out due to:", ioe); } } catch (Exception e) { LOG.error("Bailed out due to: ", e); } finally { System.exit(success ? 0 : 1); } }
@Test public void testHBaseFsckWithMetaReplicas() throws Exception { HBaseFsck hbck = HbckTestingUtil.doFsck(TEST_UTIL.getConfiguration(), false); HbckTestingUtil.assertNoErrors(hbck); }
@Test(timeout = 120000) public void testMetaRebuildOverlapFail() throws Exception { // Add a new .regioninfo meta entry in hdfs byte[] startKey = splits[0]; byte[] endKey = splits[2]; createRegion(conf, htbl, startKey, endKey); wipeOutMeta(); // is meta really messed up? assertEquals(1, scanMeta()); assertErrors(doFsck(conf, false), new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED}); // Note, would like to check # of tables, but this takes a while to time // out. // shutdown the minicluster TEST_UTIL.shutdownMiniHBaseCluster(); TEST_UTIL.shutdownMiniZKCluster(); // attempt to rebuild meta table from scratch HBaseFsck fsck = new HBaseFsck(conf); assertFalse(fsck.rebuildMeta(false)); Multimap<byte[], HbckInfo> problems = fsck.getOverlapGroups(table); assertEquals(1, problems.keySet().size()); assertEquals(3, problems.size()); // bring up the minicluster TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default TEST_UTIL.restartHBaseCluster(3); ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL); LOG.info("Waiting for no more RIT"); ZKAssign.blockUntilNoRIT(zkw); LOG.info("No more RIT in ZK, now doing final test verification"); int tries = 60; while(TEST_UTIL.getHBaseCluster() .getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition().size() > 0 && tries-- > 0) { LOG.info("Waiting for RIT: "+TEST_UTIL.getHBaseCluster() .getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition()); Thread.sleep(1000); } // Meta still messed up. assertEquals(1, scanMeta()); HTableDescriptor[] htbls = getTables(TEST_UTIL.getConfiguration()); LOG.info("Tables present after restart: " + Arrays.toString(htbls)); // After HBASE-451 HBaseAdmin.listTables() gets table descriptors from FS, // so the table is still present and this should be 1. assertEquals(1, htbls.length); assertErrors(doFsck(conf, false), new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED}); }
@Test(timeout = 120000) public void testMetaRebuild() throws Exception { wipeOutMeta(); // is meta really messed up? assertEquals(1, scanMeta()); assertErrors(doFsck(conf, false), new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED}); // Note, would like to check # of tables, but this takes a while to time // out. // shutdown the minicluster TEST_UTIL.shutdownMiniHBaseCluster(); TEST_UTIL.shutdownMiniZKCluster(); // rebuild meta table from scratch HBaseFsck fsck = new HBaseFsck(conf); assertTrue(fsck.rebuildMeta(false)); // bring up the minicluster TEST_UTIL.startMiniZKCluster(); TEST_UTIL.restartHBaseCluster(3); try (Connection connection = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration())) { Admin admin = connection.getAdmin(); admin.enableTable(table); LOG.info("Waiting for no more RIT"); TEST_UTIL.waitUntilNoRegionsInTransition(60000); LOG.info("No more RIT in ZK, now doing final test verification"); // everything is good again. assertEquals(5, scanMeta()); HTableDescriptor[] htbls = admin.listTables(); LOG.info("Tables present after restart: " + Arrays.toString(htbls)); assertEquals(1, htbls.length); } assertErrors(doFsck(conf, false), new ERROR_CODE[] {}); LOG.info("Table " + table + " has " + tableRowCount(conf, table) + " entries."); assertEquals(16, tableRowCount(conf, table)); }
@Test(timeout = 120000) public void testMetaRebuildHoleFail() throws Exception { // Fully remove a meta entry and hdfs region byte[] startKey = splits[1]; byte[] endKey = splits[2]; deleteRegion(conf, htbl, startKey, endKey); wipeOutMeta(); // is meta really messed up? assertEquals(1, scanMeta()); assertErrors(doFsck(conf, false), new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED}); // Note, would like to check # of tables, but this takes a while to time // out. // shutdown the minicluster TEST_UTIL.shutdownMiniHBaseCluster(); TEST_UTIL.shutdownMiniZKCluster(); // attempt to rebuild meta table from scratch HBaseFsck fsck = new HBaseFsck(conf); assertFalse(fsck.rebuildMeta(false)); fsck.close(); // bring up the minicluster TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default TEST_UTIL.restartHBaseCluster(3); ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL); LOG.info("Waiting for no more RIT"); ZKAssign.blockUntilNoRIT(zkw); LOG.info("No more RIT in ZK, now doing final test verification"); int tries = 60; while(TEST_UTIL.getHBaseCluster() .getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition().size() > 0 && tries-- > 0) { LOG.info("Waiting for RIT: "+TEST_UTIL.getHBaseCluster() .getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition()); Thread.sleep(1000); } // Meta still messed up. assertEquals(1, scanMeta()); HTableDescriptor[] htbls = getTables(TEST_UTIL.getConfiguration()); LOG.info("Tables present after restart: " + Arrays.toString(htbls)); // After HBASE-451 HBaseAdmin.listTables() gets table descriptors from FS, // so the table is still present and this should be 1. assertEquals(1, htbls.length); assertErrors(doFsck(conf, false), new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED}); }
public static HBaseFsck doFsck( Configuration conf, boolean fix) throws Exception { return doFsck(conf, fix, null); }