@Test public void testSuccessfulFinishingToFinished() { Container amContainer = allocateApplicationAttempt(); launchApplicationAttempt(amContainer); runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false); FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED; String trackingUrl = "mytrackingurl"; String diagnostics = "Successful"; unregisterApplicationAttempt(amContainer, finalStatus, trackingUrl, diagnostics); NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle( new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(amContainer.getId(), ContainerState.COMPLETE, "", 0), anyNodeId)); testAppAttemptFinishedState(amContainer, finalStatus, trackingUrl, diagnostics, 0, false); }
public MockRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress, Resource perNode, String rackName, String healthReport, long lastHealthReportTime, int cmdPort, String hostName, NodeState state, Set<String> labels) { this.nodeId = nodeId; this.nodeAddr = nodeAddr; this.httpAddress = httpAddress; this.perNode = perNode; this.rackName = rackName; this.healthReport = healthReport; this.lastHealthReportTime = lastHealthReportTime; this.cmdPort = cmdPort; this.hostName = hostName; this.state = state; this.labels = labels; }
private void initLabelsToNodes() { if (this.labelsToNodes != null) { return; } GetLabelsToNodesResponseProtoOrBuilder p = viaProto ? proto : builder; List<LabelsToNodeIdsProto> list = p.getLabelsToNodesList(); this.labelsToNodes = new HashMap<String, Set<NodeId>>(); for (LabelsToNodeIdsProto c : list) { Set<NodeId> setNodes = new HashSet<NodeId>(); for(NodeIdProto n : c.getNodeIdList()) { NodeId node = new NodeIdPBImpl(n); setNodes.add(node); } if(!setNodes.isEmpty()) { this.labelsToNodes.put(c.getNodeLabels(), setNodes); } } }
private static void uploadContainerLogIntoRemoteDir(UserGroupInformation ugi, Configuration configuration, List<String> rootLogDirs, NodeId nodeId, ContainerId containerId, Path appDir, FileSystem fs) throws Exception { Path path = new Path(appDir, LogAggregationUtils.getNodeString(nodeId) + System.currentTimeMillis()); AggregatedLogFormat.LogWriter writer = new AggregatedLogFormat.LogWriter(configuration, path, ugi); writer.writeApplicationOwner(ugi.getUserName()); Map<ApplicationAccessType, String> appAcls = new HashMap<ApplicationAccessType, String>(); appAcls.put(ApplicationAccessType.VIEW_APP, ugi.getUserName()); writer.writeApplicationACLs(appAcls); writer.append(new AggregatedLogFormat.LogKey(containerId), new AggregatedLogFormat.LogValue(rootLogDirs, containerId, UserGroupInformation.getCurrentUser().getShortUserName())); writer.close(); }
@Test public void testLogsView2() throws IOException { LOG.info("HsLogsPage with data"); MockAppContext ctx = new MockAppContext(0, 1, 1, 1); Map<String, String> params = new HashMap<String, String>(); params.put(CONTAINER_ID, MRApp.newContainerId(1, 1, 333, 1) .toString()); params.put(NM_NODENAME, NodeId.newInstance(MockJobs.NM_HOST, MockJobs.NM_PORT).toString()); params.put(ENTITY_STRING, "container_10_0001_01_000001"); params.put(APP_OWNER, "owner"); Injector injector = WebAppTests.testPage(AggregatedLogsPage.class, AppContext.class, ctx, params); PrintWriter spyPw = WebAppTests.getPrintWriter(injector); verify(spyPw).write( "Aggregation is not enabled. Try the nodemanager at " + MockJobs.NM_HOST + ":" + MockJobs.NM_PORT); }
private void computeRackAndLocality() { NodeId containerNodeId = container.getNodeId(); nodeRackName = RackResolver.resolve( containerNodeId.getHost()).getNetworkLocation(); locality = Locality.OFF_SWITCH; if (dataLocalHosts.size() > 0) { String cHost = resolveHost(containerNodeId.getHost()); if (dataLocalHosts.contains(cHost)) { locality = Locality.NODE_LOCAL; } } if (locality == Locality.OFF_SWITCH) { if (dataLocalRacks.contains(nodeRackName)) { locality = Locality.RACK_LOCAL; } } }
@Override public void addLabelsToNode(Map<NodeId, Set<String>> addedLabelsToNode) throws IOException { try { writeLock.lock(); // get nodesCollection before edition Map<String, Host> before = cloneNodeMap(addedLabelsToNode.keySet()); super.addLabelsToNode(addedLabelsToNode); // get nodesCollection after edition Map<String, Host> after = cloneNodeMap(addedLabelsToNode.keySet()); // update running nodes resources updateResourceMappings(before, after); } finally { writeLock.unlock(); } }
private String testGetContainer(YarnRPC rpc, ApplicationAttemptId appAttemptId, NodeId nodeId, ContainerId containerId, org.apache.hadoop.yarn.api.records.Token nmToken, boolean isExceptionExpected) { try { getContainerStatus(rpc, nmToken, containerId, appAttemptId, nodeId, isExceptionExpected); if (isExceptionExpected) { fail("Exception was expected!!"); } return ""; } catch (Exception e) { e.printStackTrace(); return e.getMessage(); } }
public void transition(RMAppImpl app, RMAppEvent event) { for (NodeId nodeId : app.getRanNodes()) { app.handler.handle( new RMNodeCleanAppEvent(nodeId, app.applicationId)); } app.finishTime = app.storedFinishTime; if (app.finishTime == 0 ) { app.finishTime = app.systemClock.getTime(); } // Recovered apps that are completed were not added to scheduler, so no // need to remove them from scheduler. if (app.recoveredFinalState == null) { app.handler.handle(new AppRemovedSchedulerEvent(app.applicationId, finalState)); } app.handler.handle( new RMAppManagerEvent(app.applicationId, RMAppManagerEventType.APP_COMPLETED)); app.rmContext.getRMApplicationHistoryWriter() .applicationFinished(app, finalState); app.rmContext.getSystemMetricsPublisher() .appFinished(app, finalState, app.finishTime); }
@Test public void testAbsentNodeStatus() throws Exception { NodeId nodeId = NodeId.newInstance("Absenthost0", 0); NodeCLI cli = new NodeCLI(); when(client.getNodeReports()).thenReturn( getNodeReports(0, NodeState.RUNNING)); cli.setClient(client); cli.setSysOutPrintStream(sysOut); cli.setSysErrPrintStream(sysErr); int result = cli.run(new String[] { "-status", nodeId.toString() }); assertEquals(0, result); verify(client).getNodeReports(); verify(sysOut, times(1)).println(isA(String.class)); verify(sysOut).println( "Could not find the node report for node id : " + nodeId.toString()); }
@Override protected RMContainerTokenSecretManager createContainerTokenSecretManager( Configuration conf) { return new RMContainerTokenSecretManager(conf) { @Override public Token createContainerToken(ContainerId containerId, NodeId nodeId, String appSubmitter, Resource capability, Priority priority, long createTime, LogAggregationContext logAggregationContext) { numRetries++; return super.createContainerToken(containerId, nodeId, appSubmitter, capability, priority, createTime, logAggregationContext); } }; }
@Test public void testNodeRegistrationFailure() throws Exception { writeToHostsFile("host1"); Configuration conf = new Configuration(); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile .getAbsolutePath()); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService(); RegisterNodeManagerRequest req = Records.newRecord( RegisterNodeManagerRequest.class); NodeId nodeId = NodeId.newInstance("host2", 1234); req.setNodeId(nodeId); req.setHttpPort(1234); // trying to register a invalid node. RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req); Assert.assertEquals(NodeAction.SHUTDOWN,response.getNodeAction()); Assert .assertEquals( "Disallowed NodeManager from host2, Sending SHUTDOWN signal to the NodeManager.", response.getDiagnosticsMessage()); }
private String testStopContainer(YarnRPC rpc, ApplicationAttemptId appAttemptId, NodeId nodeId, ContainerId containerId, Token nmToken, boolean isExceptionExpected) { try { stopContainer(rpc, nmToken, Arrays.asList(new ContainerId[] { containerId }), appAttemptId, nodeId); if (isExceptionExpected) { fail("Exception was expected!!"); } return ""; } catch (Exception e) { e.printStackTrace(); return e.getMessage(); } }
@Private @VisibleForTesting public boolean isApplicationAttemptNMTokenPresent( ApplicationAttemptId appAttemptId, NodeId nodeId) { try { this.readLock.lock(); HashSet<NodeId> nodes = this.appAttemptToNodeKeyMap.get(appAttemptId); if (nodes != null && nodes.contains(nodeId)) { return true; } else { return false; } } finally { this.readLock.unlock(); } }
private boolean waitForNodeManagerToConnect(int timeout, NodeId nodeId) throws Exception { for (int i = 0; i < timeout / 100; i++) { if (getActiveRM().getRMContext().getRMNodes().containsKey(nodeId)) { return true; } Thread.sleep(100); } return false; }
@SuppressWarnings("unchecked") private void handleUpdatedNodes(AllocateResponse response) { // send event to the job about on updated nodes List<NodeReport> updatedNodes = response.getUpdatedNodes(); if (!updatedNodes.isEmpty()) { // send event to the job to act upon completed tasks eventHandler.handle(new JobUpdatedNodesEvent(getJob().getID(), updatedNodes)); // act upon running tasks HashSet<NodeId> unusableNodes = new HashSet<NodeId>(); for (NodeReport nr : updatedNodes) { NodeState nodeState = nr.getNodeState(); if (nodeState.isUnusable()) { unusableNodes.add(nr.getNodeId()); } } for (int i = 0; i < 2; ++i) { HashMap<TaskAttemptId, Container> taskSet = i == 0 ? assignedRequests.maps : assignedRequests.reduces; // kill running containers for (Map.Entry<TaskAttemptId, Container> entry : taskSet.entrySet()) { TaskAttemptId tid = entry.getKey(); NodeId taskAttemptNodeId = entry.getValue().getNodeId(); if (unusableNodes.contains(taskAttemptNodeId)) { LOG.info("Killing taskAttempt:" + tid + " because it is running on unusable node:" + taskAttemptNodeId); eventHandler.handle(new TaskAttemptKillEvent(tid, "TaskAttempt killed because it ran on unusable node" + taskAttemptNodeId)); } } } } }
private RMNodeImpl getRebootedNode() { NodeId nodeId = BuilderUtils.newNodeId("localhost", 0); Resource capability = Resource.newInstance(4096, 4, 4); RMNodeImpl node = new RMNodeImpl(nodeId, rmContext,null, 0, 0, null, capability, null); node.handle(new RMNodeStartedEvent(node.getNodeID(), null, null)); Assert.assertEquals(NodeState.RUNNING, node.getState()); node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.REBOOTING)); Assert.assertEquals(NodeState.REBOOTED, node.getState()); return node; }
private void replaceNodeForLabels(NodeId node, Set<String> oldLabels, Set<String> newLabels) { if(oldLabels != null) { removeNodeFromLabels(node, oldLabels); } addNodeToLabels(node, newLabels); }
@Override protected ContainerLauncher createContainerLauncher(final AppContext context) { return new ContainerLauncherImpl(context) { @Override public ContainerManagementProtocolProxyData getCMProxy( String containerMgrBindAddr, ContainerId containerId) throws IOException { InetSocketAddress addr = NetUtils.getConnectAddress(server); String containerManagerBindAddr = addr.getHostName() + ":" + addr.getPort(); Token token = tokenSecretManager.createNMToken( containerId.getApplicationAttemptId(), NodeId.newInstance(addr.getHostName(), addr.getPort()), "user"); ContainerManagementProtocolProxy cmProxy = new ContainerManagementProtocolProxy(conf); ContainerManagementProtocolProxyData proxy = cmProxy.new ContainerManagementProtocolProxyData( YarnRPC.create(conf), containerManagerBindAddr, containerId, token); return proxy; } }; }
@Test public void testGetContainerReport() throws Exception { ApplicationCLI cli = createAndGetAppCLI(); ApplicationId applicationId = ApplicationId.newInstance(1234, 5); ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance( applicationId, 1); ContainerId containerId = ContainerId.newContainerId(attemptId, 1); ContainerReport container = ContainerReport.newInstance(containerId, null, NodeId.newInstance("host", 1234), Priority.UNDEFINED, 1234, 5678, "diagnosticInfo", "logURL", 0, ContainerState.COMPLETE, "http://" + NodeId.newInstance("host", 2345).toString()); when(client.getContainerReport(any(ContainerId.class))).thenReturn( container); int result = cli.run(new String[] { "container", "-status", containerId.toString() }); assertEquals(0, result); verify(client).getContainerReport(containerId); ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintWriter pw = new PrintWriter(baos); pw.println("Container Report : "); pw.println("\tContainer-Id : container_1234_0005_01_000001"); pw.println("\tStart-Time : 1234"); pw.println("\tFinish-Time : 5678"); pw.println("\tState : COMPLETE"); pw.println("\tLOG-URL : logURL"); pw.println("\tHost : host:1234"); pw.println("\tNodeHttpAddress : http://host:2345"); pw.println("\tDiagnostics : diagnosticInfo"); pw.close(); String appReportStr = baos.toString("UTF-8"); Assert.assertEquals(appReportStr, sysOutStream.toString()); verify(sysOut, times(1)).println(isA(String.class)); }
protected Set<String> getLabelsByNode(NodeId nodeId, Map<String, Host> map) { Host host = map.get(nodeId.getHost()); if (null == host) { return EMPTY_STRING_SET; } Node nm = host.nms.get(nodeId); if (null != nm && null != nm.labels) { return nm.labels; } else { return host.labels; } }
protected void createNodeIfNonExisted(NodeId nodeId) throws IOException { Host host = nodeCollections.get(nodeId.getHost()); if (null == host) { throw new IOException("Should create host before creating node."); } Node nm = host.nms.get(nodeId); if (null == nm) { host.nms.put(nodeId, new Node(nodeId)); } }
/** * This is to be called when NodeManager reconnects or goes down. This will * remove if NMTokens if present for any running application from cache. * @param nodeId */ public void removeNodeKey(NodeId nodeId) { try { this.writeLock.lock(); Iterator<HashSet<NodeId>> appNodeKeySetIterator = this.appAttemptToNodeKeyMap.values().iterator(); while (appNodeKeySetIterator.hasNext()) { appNodeKeySetIterator.next().remove(nodeId); } } finally { this.writeLock.unlock(); } }
public NMTokenIdentifier(ApplicationAttemptId appAttemptId, NodeId nodeId, String applicationSubmitter, int masterKeyId) { NMTokenIdentifierProto.Builder builder = NMTokenIdentifierProto.newBuilder(); if (appAttemptId != null) { builder.setAppAttemptId( ((ApplicationAttemptIdPBImpl)appAttemptId).getProto()); } if (nodeId != null) { builder.setNodeId(((NodeIdPBImpl)nodeId).getProto()); } builder.setAppSubmitter(applicationSubmitter); builder.setKeyId(masterKeyId); proto = builder.build(); }
public static GetNodesToLabelsResponse newInstance( Map<NodeId, Set<String>> map) { GetNodesToLabelsResponse response = Records.newRecord(GetNodesToLabelsResponse.class); response.setNodeToLabels(map); return response; }
public RMNodeStartedEvent(NodeId nodeId, List<NMContainerStatus> containerReports, List<ApplicationId> runningApplications) { super(nodeId, RMNodeEventType.STARTED); this.containerStatuses = containerReports; this.runningApplications = runningApplications; }
@Override @SuppressWarnings("unchecked") public void setup() throws IOException { super.setup(); NodeId nodeId = NodeId.newInstance("0.0.0.0", 5555); ((NMContext)context).setNodeId(nodeId); dispatcher = createDispatcher(); appEventHandler = mock(EventHandler.class); dispatcher.register(ApplicationEventType.class, appEventHandler); UserGroupInformation.setConfiguration(conf); }
@Test public void testRegisterNodeManagerRequest() { ApplicationId appId = ApplicationId.newInstance(123456789, 1); ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1); ContainerId containerId = ContainerId.newContainerId(attemptId, 1); NMContainerStatus containerReport = NMContainerStatus.newInstance(containerId, ContainerState.RUNNING, Resource.newInstance(1024, 1, 2), "diagnostics", 0, Priority.newInstance(10), 1234); List<NMContainerStatus> reports = Arrays.asList(containerReport); RegisterNodeManagerRequest request = RegisterNodeManagerRequest.newInstance( NodeId.newInstance("1.1.1.1", 1000), 8080, Resource.newInstance(1024, 1, 2), "NM-version-id", reports, Arrays.asList(appId)); RegisterNodeManagerRequest requestProto = new RegisterNodeManagerRequestPBImpl( ((RegisterNodeManagerRequestPBImpl) request).getProto()); Assert.assertEquals(containerReport, requestProto .getNMContainerStatuses().get(0)); Assert.assertEquals(8080, requestProto.getHttpPort()); Assert.assertEquals("NM-version-id", requestProto.getNMVersion()); Assert.assertEquals(NodeId.newInstance("1.1.1.1", 1000), requestProto.getNodeId()); Assert.assertEquals(Resource.newInstance(1024, 1, 2), requestProto.getResource()); Assert.assertEquals(1, requestProto.getRunningApplications().size()); Assert.assertEquals(appId, requestProto.getRunningApplications().get(0)); }
Container createContainer(FiCaSchedulerApp application, FiCaSchedulerNode node, Resource capability, Priority priority) { NodeId nodeId = node.getRMNode().getNodeID(); ContainerId containerId = BuilderUtils.newContainerId(application .getApplicationAttemptId(), application.getNewContainerId()); // Create the container Container container = BuilderUtils.newContainer(containerId, nodeId, node.getRMNode() .getHttpAddress(), capability, priority, null); return container; }
@Override public NodeId getNodeId() { ContainerProtoOrBuilder p = viaProto ? proto : builder; if (this.nodeId != null) { return this.nodeId; } if (!p.hasNodeId()) { return null; } this.nodeId = convertFromProtoFormat(p.getNodeId()); return this.nodeId; }
public void registerApplicationAttempt(ApplicationAttemptId appAttemptId) { try { this.writeLock.lock(); this.appAttemptToNodeKeyMap.put(appAttemptId, new HashSet<NodeId>()); } finally { this.writeLock.unlock(); } }
@Override public void setNodeId(NodeId nodeId) { maybeInitBuilder(); if (nodeId == null) builder.clearNodeId(); this.nodeId = nodeId; }
public Map<String, Set<NodeId>> getLabelsToNodesMap(Set<String> labels) { Map<String, Set<NodeId>> map = new HashMap<String, Set<NodeId>>(); Set<NodeId> setNodeIds = new HashSet<NodeId>(Arrays.asList( NodeId.newInstance("host1", 0), NodeId.newInstance("host2", 0))); for(String label : labels) { map.put(label, setNodeIds); } return map; }
public void getContainerStatusAsync(ContainerId containerId, NodeId nodeId) { try { events.put(new ContainerEvent(containerId, nodeId, null, ContainerEventType.QUERY_CONTAINER)); } catch (InterruptedException e) { LOG.warn("Exception when scheduling the event of querying the status" + " of Container " + containerId); callbackHandler.onGetContainerStatusError(containerId, e); } }
public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus, List<ContainerStatus> collection, List<ApplicationId> keepAliveAppIds, NodeHeartbeatResponse latestResponse) { super(nodeId, RMNodeEventType.STATUS_UPDATE); this.nodeHealthStatus = nodeHealthStatus; this.containersCollection = collection; this.keepAliveAppIds = keepAliveAppIds; this.latestResponse = latestResponse; }
@Override public ConcurrentMap<NodeId, List<ContainerStatus>> getFinishedContainersSentToAMReference() { this.readLock.lock(); try { return this.finishedContainersSentToAM; } finally { this.readLock.unlock(); } }
public synchronized boolean unreserve(FiCaSchedulerNode node, Priority priority) { Map<NodeId, RMContainer> reservedContainers = this.reservedContainers.get(priority); if (reservedContainers != null) { RMContainer reservedContainer = reservedContainers.remove(node.getNodeID()); // unreserve is now triggered in new scenarios (preemption) // as a consequence reservedcontainer might be null, adding NP-checks if (reservedContainer != null && reservedContainer.getContainer() != null && reservedContainer.getContainer().getResource() != null) { if (reservedContainers.isEmpty()) { this.reservedContainers.remove(priority); } // Reset the re-reservation count resetReReservations(priority); Resource resource = reservedContainer.getContainer().getResource(); this.attemptResourceUsage.decReserved(node.getPartition(), resource); LOG.info("Application " + getApplicationId() + " unreserved " + " on node " + node + ", currently has " + reservedContainers.size() + " at priority " + priority + "; currentReservation " + this.attemptResourceUsage.getReserved() + " on node-label=" + node.getPartition()); return true; } } return false; }
private void initNodeToLabels() { if (this.nodeIdToLabels != null) { return; } ReplaceLabelsOnNodeRequestProtoOrBuilder p = viaProto ? proto : builder; List<NodeIdToLabelsProto> list = p.getNodeToLabelsList(); this.nodeIdToLabels = new HashMap<NodeId, Set<String>>(); for (NodeIdToLabelsProto c : list) { this.nodeIdToLabels.put(new NodeIdPBImpl(c.getNodeId()), Sets.newHashSet(c.getNodeLabelsList())); } }
private void initNodeResourceMap() { if (this.nodeResourceMap != null) { return; } UpdateNodeResourceRequestProtoOrBuilder p = viaProto ? proto : builder; List<NodeResourceMapProto> list = p.getNodeResourceMapList(); this.nodeResourceMap = new HashMap<NodeId, ResourceOption>(list .size()); for (NodeResourceMapProto nodeResourceProto : list) { this.nodeResourceMap.put(convertFromProtoFormat(nodeResourceProto.getNodeId()), convertFromProtoFormat(nodeResourceProto.getResourceOption())); } }
public SLSRunner(boolean isSLS, String inputTraces[], String nodeFile, String outputDir, Set<String> trackedApps, boolean printsimulation) throws IOException, ClassNotFoundException { this.isSLS = isSLS; this.inputTraces = inputTraces.clone(); this.nodeFile = nodeFile; this.trackedApps = trackedApps; this.printSimulation = printsimulation; metricsOutputDir = outputDir; nmMap = new HashMap<NodeId, NMSimulator>(); queueAppNumMap = new HashMap<String, Integer>(); amMap = new HashMap<String, AMSimulator>(); amClassMap = new HashMap<String, Class>(); // runner configuration conf = new Configuration(false); conf.addResource("sls-runner.xml"); // runner int poolSize = conf.getInt(SLSConfiguration.RUNNER_POOL_SIZE, SLSConfiguration.RUNNER_POOL_SIZE_DEFAULT); SLSRunner.runner.setQueueSize(poolSize); // <AMType, Class> map for (Map.Entry e : conf) { String key = e.getKey().toString(); if (key.startsWith(SLSConfiguration.AM_TYPE)) { String amType = key.substring(SLSConfiguration.AM_TYPE.length()); amClassMap.put(amType, Class.forName(conf.get(key))); } } }