public static NodeHealthScriptRunner getNodeHealthScriptRunner(Configuration conf) { String nodeHealthScript = conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH); if(!NodeHealthScriptRunner.shouldRun(nodeHealthScript)) { LOG.info("Abey khali"); return null; } long nmCheckintervalTime = conf.getLong( YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS); long scriptTimeout = conf.getLong( YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS); String[] scriptArgs = conf.getStrings( YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_OPTS, new String[] {}); return new NodeHealthScriptRunner(nodeHealthScript, nmCheckintervalTime, scriptTimeout, scriptArgs); }
public static NodeHealthScriptRunner getNodeHealthScriptRunner(Configuration conf) { String nodeHealthScript = conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH); if(!NodeHealthScriptRunner.shouldRun(nodeHealthScript)) { LOG.info("Node Manager health check script is not available " + "or doesn't have execute permission, so not " + "starting the node health script runner."); return null; } long nmCheckintervalTime = conf.getLong( YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS); long scriptTimeout = conf.getLong( YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS); String[] scriptArgs = conf.getStrings( YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_OPTS, new String[] {}); return new NodeHealthScriptRunner(nodeHealthScript, nmCheckintervalTime, scriptTimeout, scriptArgs); }
@Override protected void serviceInit(Configuration conf) throws Exception { if (NodeHealthScriptRunner.shouldRun( conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH))) { addService(nodeHealthScriptRunner); } addService(dirsHandler); super.serviceInit(conf); }
public NodeHealthCheckerService(NodeHealthScriptRunner scriptRunner, LocalDirsHandlerService dirHandlerService) { super(NodeHealthCheckerService.class.getName()); nodeHealthScriptRunner = scriptRunner; dirsHandler = dirHandlerService; }
/** * @return the node health script runner */ NodeHealthScriptRunner getNodeHealthScriptRunner() { return nodeHealthScriptRunner; }
private NodeHealthCheckerService createNodeHealthCheckerService(Configuration conf) { NodeHealthScriptRunner scriptRunner = NodeManager.getNodeHealthScriptRunner(conf); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); return new NodeHealthCheckerService(scriptRunner, dirsHandler); }
@Test public void testNodeHealthService() throws Exception { RecordFactory factory = RecordFactoryProvider.getRecordFactory(null); NodeHealthStatus healthStatus = factory.newRecordInstance(NodeHealthStatus.class); Configuration conf = getConfForNodeHealthScript(); conf.writeXml(new FileOutputStream(nodeHealthConfigFile)); conf.addResource(nodeHealthConfigFile.getName()); writeNodeHealthScriptFile("", true); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService(); NodeHealthScriptRunner nodeHealthScriptRunner = spy(NodeManager.getNodeHealthScriptRunner(conf)); NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService( nodeHealthScriptRunner, dirsHandler); nodeHealthChecker.init(conf); doReturn(true).when(nodeHealthScriptRunner).isHealthy(); doReturn("").when(nodeHealthScriptRunner).getHealthReport(); setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking initial healthy condition"); // Check proper report conditions. Assert.assertTrue("Node health status reported unhealthy", healthStatus .getIsNodeHealthy()); Assert.assertTrue("Node health status reported unhealthy", healthStatus .getHealthReport().equals(nodeHealthChecker.getHealthReport())); doReturn(false).when(nodeHealthScriptRunner).isHealthy(); // update health status setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking Healthy--->Unhealthy"); Assert.assertFalse("Node health status reported healthy", healthStatus .getIsNodeHealthy()); Assert.assertTrue("Node health status reported healthy", healthStatus .getHealthReport().equals(nodeHealthChecker.getHealthReport())); doReturn(true).when(nodeHealthScriptRunner).isHealthy(); setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking UnHealthy--->healthy"); // Check proper report conditions. Assert.assertTrue("Node health status reported unhealthy", healthStatus .getIsNodeHealthy()); Assert.assertTrue("Node health status reported unhealthy", healthStatus .getHealthReport().equals(nodeHealthChecker.getHealthReport())); // Healthy to timeout transition. doReturn(false).when(nodeHealthScriptRunner).isHealthy(); doReturn(NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG) .when(nodeHealthScriptRunner).getHealthReport(); setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), nodeHealthChecker.getHealthReport(), nodeHealthChecker.getLastHealthReportTime()); LOG.info("Checking Healthy--->timeout"); Assert.assertFalse("Node health status reported healthy even after timeout", healthStatus.getIsNodeHealthy()); Assert.assertTrue("Node script time out message not propogated", healthStatus.getHealthReport().equals( NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG + NodeHealthCheckerService.SEPARATOR + nodeHealthChecker.getDiskHandler().getDisksHealthReport(false))); }