/** * Will verify that given task tracker is not blacklisted * @param client tasktracker info * @param conf modified configuration object * @param cluster mrcluster instance * @throws IOException thrown if verification fails */ public void verifyTTNotBlackListed(TTClient client, Configuration conf, MRCluster cluster) throws IOException { int interval = conf.getInt("mapred.healthChecker.interval",0); Assert.assertTrue("Interval cannot be zero.",interval != 0); UtilsForTests.waitFor(interval+2000); String defaultHealthScript = conf.get("mapred.healthChecker.script.path"); Assert.assertTrue("Task tracker is not healthy", nodeHealthStatus(client, true) == true); TaskTrackerStatus status = client.getStatus(); JTClient jclient = cluster.getJTClient(); Assert.assertTrue("Failed to move task tracker to healthy list", jclient.getProxy().isBlackListed(status.getTrackerName()) == false); Assert.assertTrue("Health script was not set",defaultHealthScript != null); }
/** * Verifies that the given task tracker is blacklisted * @param conf modified Configuration object * @param client tasktracker info * @param errorMessage that needs to be asserted * @param cluster mr cluster instance * @throws IOException is thrown when verification fails */ public void verifyTTBlackList(Configuration conf, TTClient client, String errorMessage, MRCluster cluster) throws IOException{ int interval = conf.getInt("mapred.healthChecker.interval",0); Assert.assertTrue("Interval cannot be zero.",interval != 0); UtilsForTests.waitFor(interval+2000); //TaskTrackerStatus status = client.getStatus(); Assert.assertTrue("Task tracker was never blacklisted ", nodeHealthStatus(client, false) == true); TaskTrackerStatus status = client.getStatus(); Assert.assertTrue("The custom error message did not appear", status.getHealthStatus().getHealthReport().trim(). equals(errorMessage)); JTClient jClient = cluster.getJTClient(); Assert.assertTrue("Failed to move task tracker to blacklisted list", jClient.getProxy().isBlackListed(status.getTrackerName()) == true); }
/** * The method return true from the task tracker if it is unhealthy/healthy * depending the blacklisted status * @param client the tracker tracker instance * @param health status information. * @return status of task tracker * @throws IOException failed to get the status of task tracker */ public boolean nodeHealthStatus(TTClient client,boolean hStatus) throws IOException { int counter = 0; TaskTrackerStatus status = client.getStatus(); while (counter < 60) { LOG.info("isNodeHealthy "+status.getHealthStatus().isNodeHealthy()); if (status.getHealthStatus().isNodeHealthy() == hStatus) { break; } else { UtilsForTests.waitFor(3000); status = client.getStatus(); Assert.assertNotNull("Task tracker status is null",status); } counter++; } if(counter != 60) { return true; } return false; }
/** Black List more than 25 % of task trackers , run the high ram * job and make sure that no exception is thrown. * @throws Exception If fails to blacklist TT or run high ram high */ @Test public void testHiRamJobBlackListedTaskTrackers() throws Exception { final HighRamJobHelper hRamHelper = new HighRamJobHelper(); List<TTClient> bListedTT = new ArrayList<TTClient>(); List<TTClient> tClient = cluster.getTTClients(); int count = tClient.size(); int moreThan25Per = count / 4 +1; LOG.info ("More than 25 % of TTclient is "+moreThan25Per); for (int i=0; i < moreThan25Per ; ++i) { TTClient client = tClient.get(i); bListedTT.add(client); blackListTT(client); } //Now run the high ram job JobClient jobClient = cluster.getJTClient().getClient(); JTProtocol remoteJTClient = cluster.getJTClient().getProxy(); Configuration conf = remoteJTClient.getDaemonConf(); hRamHelper.runHighRamJob(conf, jobClient, remoteJTClient, "Job did not succeed"); //put the task tracker back in healthy state for( int i =0; i < bListedTT.size() ; ++i) { unBlackListTT(bListedTT.get(i)); } }
private void blackListTT(TTClient client) throws Exception { Configuration tConf= client.getProxy().getDaemonConf(); tConf.set("mapred.task.tracker.report.address", cluster.getConf().get("mapred.task.tracker.report.address")); String defaultHealthScript = tConf.get("mapred.healthChecker.script.path"); Assert.assertTrue("Health script was not set", defaultHealthScript != null); tConf.set("mapred.healthChecker.script.path", remotePath+File.separator+ TestHealthScriptError.healthScriptError); tConf.setInt("mapred.healthChecker.interval", 1000); bListHelper.copyFileToRemoteHost(TestHealthScriptError.healthScriptError, client.getHostName(), remotePath, cluster); cluster.restartDaemonWithNewConfig(client, "mapred-site.xml", tConf, Role.TT); //make sure the TT is blacklisted bListHelper.verifyTTBlackList(tConf, client, "ERROR Task Tracker status is fatal", cluster); }
private TTClient getTTClientIns(TaskInfo taskInfo) throws IOException{ String [] taskTrackers = taskInfo.getTaskTrackers(); int counter = 0; TTClient ttClient = null; while (counter < 60) { if (taskTrackers.length != 0) { break; } UtilsForTests.waitFor(100); taskInfo = wovenClient.getTaskInfo(taskInfo.getTaskID()); taskTrackers = taskInfo.getTaskTrackers(); counter ++; } if ( taskTrackers.length != 0) { String hostName = taskTrackers[0].split("_")[1]; hostName = hostName.split(":")[0]; ttClient = cluster.getTTClient(hostName); } return ttClient; }
/** * Error in the test path and script will not run, the TT will not be marked * unhealthy * @throws Exception in case of test errors */ @Test public void testHealthScriptPathError() throws Exception { LOG.info("running testHealthScriptPathError"); TTClient client = cluster.getTTClient(); Configuration tConf= client.getProxy().getDaemonConf(); tConf.set("mapred.task.tracker.report.address", cluster.getConf().get("mapred.task.tracker.report.address")); String defaultHealthScript = tConf.get("mapred.healthChecker.script.path"); Assert.assertTrue("Health script was not set", defaultHealthScript != null); tConf.set("mapred.healthChecker.script.path", remotePath+File.separator+ invalidHealthScript); tConf.setInt("mapred.healthChecker.interval",1000); cluster.restartDaemonWithNewConfig(client, "mapred-site.xml", tConf, Role.TT); //For a invalid health script the TT remains healthy helper.verifyTTNotBlackListed( client, tConf, cluster); cluster.restart(client, Role.TT); tConf = client.getProxy().getDaemonConf(); }
private boolean checkLocalization(TaskInfo[] taskInfos, String cacheFile) throws Exception { boolean iscacheFileLocalized = false; for (TaskInfo taskinfo : taskInfos) { if (!taskinfo.isSetupOrCleanup()) { String[] taskTrackers = taskinfo.getTaskTrackers(); List<TTClient> ttList = getTTClients(taskTrackers); for (TTClient ttClient : ttList) { iscacheFileLocalized = checkCacheFile(ttClient,cacheFile); if(iscacheFileLocalized) { return true; } } } } return false; }
private boolean checkCacheFile(TTClient ttClient, String cacheFile) throws IOException { String[] localDirs = ttClient.getMapredLocalDirs(); for (String localDir : localDirs) { localDir = localDir + Path.SEPARATOR + TaskTracker.getPublicDistributedCacheDir(); FileStatus[] fileStatuses = ttClient.listStatus(localDir, true, true); for (FileStatus fileStatus : fileStatuses) { Path path = fileStatus.getPath(); if ((path.toString()).endsWith(cacheFile)) { return true; } } } return false; }