@Override public void handle(ContainerAllocatorEvent event) { ContainerId cId = ContainerId.newContainerId(getContext().getApplicationAttemptId(), containerCount++); NodeId nodeId = NodeId.newInstance(NM_HOST, NM_PORT); Resource resource = Resource.newInstance(1234, 2, 2); ContainerTokenIdentifier containerTokenIdentifier = new ContainerTokenIdentifier(cId, nodeId.toString(), "user", resource, System.currentTimeMillis() + 10000, 42, 42, Priority.newInstance(0), 0); Token containerToken = newContainerToken(nodeId, "password".getBytes(), containerTokenIdentifier); Container container = Container.newInstance(cId, nodeId, NM_HOST + ":" + NM_HTTP_PORT, resource, null, containerToken); JobID id = TypeConverter.fromYarn(applicationId); JobId jobId = TypeConverter.toYarn(id); getContext().getEventHandler().handle(new JobHistoryEvent(jobId, new NormalizedResourceEvent( org.apache.hadoop.mapreduce.TaskType.REDUCE, 100))); getContext().getEventHandler().handle(new JobHistoryEvent(jobId, new NormalizedResourceEvent( org.apache.hadoop.mapreduce.TaskType.MAP, 100))); getContext().getEventHandler().handle( new TaskAttemptContainerAssignedEvent(event.getAttemptID(), container, null)); }
@Override public ApplicationState transition(ApplicationImpl app, ApplicationEvent event) { ApplicationFinishEvent appEvent = (ApplicationFinishEvent)event; if (app.containers.isEmpty()) { // No container to cleanup. Cleanup app level resources. app.handleAppFinishWithContainersCleanedup(); return ApplicationState.APPLICATION_RESOURCES_CLEANINGUP; } // Send event to ContainersLauncher to finish all the containers of this // application. for (ContainerId containerID : app.containers.keySet()) { app.dispatcher.getEventHandler().handle( new ContainerKillEvent(containerID, ContainerExitStatus.KILLED_AFTER_APP_COMPLETION, "Container killed on application-finish event: " + appEvent.getDiagnostic())); } return ApplicationState.FINISHING_CONTAINERS_WAIT; }
/** * This method produces an Allocation that includes the current view * of the resources that will be allocated to and preempted from this * application. * * @param rc * @param clusterResource * @param minimumAllocation * @return an allocation */ public synchronized Allocation getAllocation(ResourceCalculator rc, Resource clusterResource, Resource minimumAllocation) { Set<ContainerId> currentContPreemption = Collections.unmodifiableSet( new HashSet<ContainerId>(containersToPreempt)); containersToPreempt.clear(); Resource tot = Resource.newInstance(0, 0, 0); for(ContainerId c : currentContPreemption){ Resources.addTo(tot, liveContainers.get(c).getContainer().getResource()); } int numCont = (int) Math.ceil( Resources.divide(rc, clusterResource, tot, minimumAllocation)); ResourceRequest rr = ResourceRequest.newInstance( Priority.UNDEFINED, ResourceRequest.ANY, minimumAllocation, numCont); ContainersAndNMTokensAllocation allocation = pullNewlyAllocatedContainersAndNMTokens(); Resource headroom = getHeadroom(); setApplicationHeadroomForMetrics(headroom); return new Allocation(allocation.getContainerList(), headroom, null, currentContPreemption, Collections.singletonList(rr), allocation.getNMTokenList()); }
/** * A helper api for creating an audit log for a successful event. */ static String createSuccessLog(String user, String operation, String target, ApplicationId appId, ApplicationAttemptId attemptId, ContainerId containerId) { StringBuilder b = new StringBuilder(); start(Keys.USER, user, b); addRemoteIP(b); add(Keys.OPERATION, operation, b); add(Keys.TARGET, target ,b); add(Keys.RESULT, AuditConstants.SUCCESS, b); if (appId != null) { add(Keys.APPID, appId.toString(), b); } if (attemptId != null) { add(Keys.APPATTEMPTID, attemptId.toString(), b); } if (containerId != null) { add(Keys.CONTAINERID, containerId.toString(), b); } return b.toString(); }
@Test(timeout = 10000) public void testGetContainers() throws YarnException, IOException { Configuration conf = new Configuration(); final AHSClient client = new MockAHSClient(); client.init(conf); client.start(); ApplicationId applicationId = ApplicationId.newInstance(1234, 5); ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(applicationId, 1); List<ContainerReport> reports = client.getContainers(appAttemptId); Assert.assertNotNull(reports); Assert.assertEquals(reports.get(0).getContainerId(), (ContainerId.newContainerId(appAttemptId, 1))); Assert.assertEquals(reports.get(1).getContainerId(), (ContainerId.newContainerId(appAttemptId, 2))); client.stop(); }
boolean remove(TaskAttemptId tId) { ContainerId containerId = null; if (tId.getTaskId().getTaskType().equals(TaskType.MAP)) { containerId = maps.remove(tId).getId(); } else { containerId = reduces.remove(tId).getId(); if (containerId != null) { boolean preempted = preemptionWaitingReduces.remove(tId); if (preempted) { LOG.info("Reduce preemption successful " + tId); } } } if (containerId != null) { containerToAttemptMap.remove(containerId); return true; } return false; }
protected void releaseContainers(List<ContainerId> containers, SchedulerApplicationAttempt attempt) { for (ContainerId containerId : containers) { RMContainer rmContainer = getRMContainer(containerId); if (rmContainer == null) { if (System.currentTimeMillis() - ResourceManager.getClusterTimeStamp() < nmExpireInterval) { LOG.info(containerId + " doesn't exist. Add the container" + " to the release request cache as it maybe on recovery."); synchronized (attempt) { attempt.getPendingRelease().add(containerId); } } else { RMAuditLogger.logFailure(attempt.getUser(), AuditConstants.RELEASE_CONTAINER, "Unauthorized access or invalid container", "Scheduler", "Trying to release container not owned by app or with invalid id.", attempt.getApplicationId(), containerId); } } completedContainer(rmContainer, SchedulerUtils.createAbnormalContainerStatus(containerId, SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED); } }
protected void createReleaseCache() { // Cleanup the cache after nm expire interval. new Timer().schedule(new TimerTask() { @Override public void run() { for (SchedulerApplication<T> app : applications.values()) { T attempt = app.getCurrentAppAttempt(); synchronized (attempt) { for (ContainerId containerId : attempt.getPendingRelease()) { RMAuditLogger.logFailure( app.getUser(), AuditConstants.RELEASE_CONTAINER, "Unauthorized access or invalid container", "Scheduler", "Trying to release container not owned by app or with invalid id.", attempt.getApplicationId(), containerId); } attempt.getPendingRelease().clear(); } } LOG.info("Release request cache is cleaned up"); } }, nmExpireInterval); }
@Override public void handle(ContainerEvent event) { try { this.writeLock.lock(); ContainerId containerID = event.getContainerID(); LOG.debug("Processing " + containerID + " of type " + event.getType()); ContainerState oldState = stateMachine.getCurrentState(); ContainerState newState = null; try { newState = stateMachine.doTransition(event.getType(), event); } catch (InvalidStateTransitonException e) { LOG.warn("Can't handle this event at current state: Current: [" + oldState + "], eventType: [" + event.getType() + "]", e); } if (oldState != newState) { LOG.info("Container " + containerID + " transitioned from " + oldState + " to " + newState); } } finally { this.writeLock.unlock(); } }
ApplicationHistoryClientService mockApplicationHistoryClientService(int numApps, int numAppAttempts, int numContainers) throws Exception { ApplicationHistoryManager ahManager = new MockApplicationHistoryManagerImpl(store); ApplicationHistoryClientService historyClientService = new ApplicationHistoryClientService(ahManager); for (int i = 1; i <= numApps; ++i) { ApplicationId appId = ApplicationId.newInstance(0, i); writeApplicationStartData(appId); for (int j = 1; j <= numAppAttempts; ++j) { ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, j); writeApplicationAttemptStartData(appAttemptId); for (int k = 1; k <= numContainers; ++k) { ContainerId containerId = ContainerId.newContainerId(appAttemptId, k); writeContainerStartData(containerId); writeContainerFinishData(containerId); } writeApplicationAttemptFinishData(appAttemptId); } writeApplicationFinishData(appId); } return historyClientService; }
@Override public void transition(LocalizedResource rsrc, ResourceEvent event) { ResourceLocalizedEvent locEvent = (ResourceLocalizedEvent) event; rsrc.localPath = Path.getPathWithoutSchemeAndAuthority(locEvent.getLocation()); rsrc.size = locEvent.getSize(); for (ContainerId container : rsrc.ref) { rsrc.dispatcher.getEventHandler().handle( new ContainerResourceLocalizedEvent( container, rsrc.rsrc, rsrc.localPath)); } }
/** * Finds the local directories that logs for the given container are stored * on. */ public static List<File> getContainerLogDirs(ContainerId containerId, String remoteUser, Context context) throws YarnException { Container container = context.getContainers().get(containerId); Application application = getApplicationForContainer(containerId, context); checkAccess(remoteUser, application, context); // It is not required to have null check for container ( container == null ) // and throw back exception.Because when container is completed, NodeManager // remove container information from its NMContext.Configuring log // aggregation to false, container log view request is forwarded to NM. NM // does not have completed container information,but still NM serve request for // reading container logs. if (container != null) { checkState(container.getContainerState()); } return getContainerLogDirs(containerId, context.getLocalDirsHandler()); }
public NMContainerTokenSecretManager(Configuration conf, NMStateStoreService stateStore) { super(conf); recentlyStartedContainerTracker = new TreeMap<Long, List<ContainerId>>(); this.stateStore = stateStore; }
@Test public void testContainerPage() throws Exception { Injector injector = WebAppTests.createMockInjector(ApplicationBaseProtocol.class, mockApplicationHistoryClientService(1, 1, 1)); ContainerPage containerPageInstance = injector.getInstance(ContainerPage.class); containerPageInstance.render(); WebAppTests.flushOutput(injector); containerPageInstance.set( YarnWebParams.CONTAINER_ID, ContainerId .newContainerId( ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 1), 1), 1).toString()); containerPageInstance.render(); WebAppTests.flushOutput(injector); }
@Test public void testMassiveWriteContainerHistory() throws IOException { long mb = 1024 * 1024; Runtime runtime = Runtime.getRuntime(); long usedMemoryBefore = (runtime.totalMemory() - runtime.freeMemory()) / mb; int numContainers = 100000; ApplicationId appId = ApplicationId.newInstance(0, 1); ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1); for (int i = 1; i <= numContainers; ++i) { ContainerId containerId = ContainerId.newContainerId(appAttemptId, i); writeContainerStartData(containerId); writeContainerFinishData(containerId); } long usedMemoryAfter = (runtime.totalMemory() - runtime.freeMemory()) / mb; Assert.assertTrue((usedMemoryAfter - usedMemoryBefore) < 400); }
private void addFailedContainersToProto() { maybeInitBuilder(); builder.clearFailedRequests(); if (this.failedContainers == null) return; List<ContainerExceptionMapProto> protoList = new ArrayList<ContainerExceptionMapProto>(); for (Map.Entry<ContainerId, SerializedException> entry : this.failedContainers .entrySet()) { protoList.add(ContainerExceptionMapProto.newBuilder() .setContainerId(convertToProtoFormat(entry.getKey())) .setException(convertToProtoFormat(entry.getValue())).build()); } builder.addAllFailedRequests(protoList); }
public synchronized void finishTask(Task task) throws IOException, YarnException { Set<Task> tasks = this.tasks.get(task.getPriority()); if (!tasks.remove(task)) { throw new IllegalStateException( "Finishing unknown task " + task.getTaskId() + " from application " + applicationId); } NodeManager nodeManager = task.getNodeManager(); ContainerId containerId = task.getContainerId(); task.stop(); List<ContainerId> containerIds = new ArrayList<ContainerId>(); containerIds.add(containerId); StopContainersRequest stopRequest = StopContainersRequest.newInstance(containerIds); nodeManager.stopContainers(stopRequest); Resources.subtractFrom(used, requestSpec.get(task.getPriority())); LOG.info("Finished task " + task.getTaskId() + " of application " + applicationId + " on node " + nodeManager.getHostName() + ", currently using " + used + " resources"); }
@Public @Stable public static AllocateRequest newInstance(int responseID, float appProgress, List<ResourceRequest> resourceAsk, List<ContainerId> containersToBeReleased, ResourceBlacklistRequest resourceBlacklistRequest, List<ContainerResourceIncreaseRequest> increaseRequests) { AllocateRequest allocateRequest = Records.newRecord(AllocateRequest.class); allocateRequest.setResponseId(responseID); allocateRequest.setProgress(appProgress); allocateRequest.setAskList(resourceAsk); allocateRequest.setReleaseList(containersToBeReleased); allocateRequest.setResourceBlacklistRequest(resourceBlacklistRequest); allocateRequest.setIncreaseRequests(increaseRequests); return allocateRequest; }
private ContainerStatus newContainerStatus(ContainerId cId, ContainerState state, int exitState) { ContainerStatus cs = Records.newRecord(ContainerStatus.class); cs.setContainerId(cId); cs.setState(state); cs.setExitStatus(exitState); return cs; }
private Container getContainer(YarnContainerLauncherEvent event) { ContainerId id = event.getContainerId(); Container c = containers.get(id); if (c == null) { c = new Container(event.getId(), event.getContainerId(), event.getContainerMgrAddress()); Container old = containers.putIfAbsent(id, c); if (old != null) { c = old; } } return c; }
private void addContainersToCleanupToProto() { maybeInitBuilder(); builder.clearContainersToCleanup(); if (containersToCleanup == null) return; Iterable<ContainerIdProto> iterable = new Iterable<ContainerIdProto>() { @Override public Iterator<ContainerIdProto> iterator() { return new Iterator<ContainerIdProto>() { Iterator<ContainerId> iter = containersToCleanup.iterator(); @Override public boolean hasNext() { return iter.hasNext(); } @Override public ContainerIdProto next() { return convertToProtoFormat(iter.next()); } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }; builder.addAllContainersToCleanup(iterable); }
@Override public ContainerId getContainerId() { if (this.containerId != null) { return this.containerId; } ContainerFinishDataProtoOrBuilder p = viaProto ? proto : builder; if (!p.hasContainerId()) { return null; } this.containerId = convertFromProtoFormat(p.getContainerId()); return this.containerId; }
public static Container newContainer(ContainerId containerId, NodeId nodeId, String nodeHttpAddress, Resource resource, Priority priority, Token containerToken) { Container container = recordFactory.newRecordInstance(Container.class); container.setId(containerId); container.setNodeId(nodeId); container.setNodeHttpAddress(nodeHttpAddress); container.setResource(resource); container.setPriority(priority); container.setContainerToken(containerToken); return container; }
private static AMInfo createAMInfo(int attempt) { ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance( ApplicationId.newInstance(100, 1), attempt); ContainerId containerId = ContainerId.newContainerId(appAttemptId, 1); return MRBuilderUtils.newAMInfo(appAttemptId, System.currentTimeMillis(), containerId, NM_HOST, NM_PORT, NM_HTTP_PORT); }
/** * Utility to create a {@link ContainerStatus} during exceptional * circumstances. * * @param containerId {@link ContainerId} of returned/released/lost container. * @param diagnostics diagnostic message * @return <code>ContainerStatus</code> for an returned/released/lost * container */ private static ContainerStatus createAbnormalContainerStatus( ContainerId containerId, int exitStatus, String diagnostics) { ContainerStatus containerStatus = recordFactory.newRecordInstance(ContainerStatus.class); containerStatus.setContainerId(containerId); containerStatus.setDiagnostics(diagnostics); containerStatus.setExitStatus(exitStatus); containerStatus.setState(ContainerState.COMPLETE); return containerStatus; }
@Override public void onContainerStatusReceived(ContainerId containerId, ContainerStatus containerStatus) { if (LOG.isDebugEnabled()) { LOG.debug("Container Status: id=" + containerId + ", status=" + containerStatus); } }
@Override public void onStartContainerError(ContainerId containerId, Throwable t) { LOG.error("Failed to start Container " + containerId); containers.remove(containerId); applicationMaster.completedContainerNum.incrementAndGet(); applicationMaster.failedContainerNum.incrementAndGet(); }
@Override public Map<ContainerId, ContainerHistoryData> getContainers( ApplicationAttemptId appAttemptId) throws IOException { ConcurrentMap<ContainerId, ContainerHistoryData> subMap = containerData.get(appAttemptId); if (subMap == null) { return Collections.<ContainerId, ContainerHistoryData> emptyMap(); } else { return new HashMap<ContainerId, ContainerHistoryData>(subMap); } }
private static void loadContainerToken(RecoveredContainerTokensState state, String key, String containerIdStr, byte[] value) throws IOException { ContainerId containerId; Long expTime; try { containerId = ConverterUtils.toContainerId(containerIdStr); expTime = Long.parseLong(asString(value)); } catch (IllegalArgumentException e) { throw new IOException("Bad container token state for " + key, e); } state.activeTokens.put(containerId, expTime); }
@Override protected ApplicationMasterLauncher createAMLauncher() { return new ApplicationMasterLauncher(getRMContext()) { @Override protected Runnable createRunnableLauncher(RMAppAttempt application, AMLauncherEventType event) { return new AMLauncher(context, application, event, getConfig()) { @Override protected ContainerManagementProtocol getContainerMgrProxy( ContainerId containerId) { return containerManager; } @Override protected Token<AMRMTokenIdentifier> createAndSetAMRMToken() { Token<AMRMTokenIdentifier> amRmToken = super.createAndSetAMRMToken(); InetSocketAddress serviceAddr = getConfig().getSocketAddr( YarnConfiguration.RM_SCHEDULER_ADDRESS, YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS, YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT); SecurityUtil.setTokenService(amRmToken, serviceAddr); return amRmToken; } }; } }; }
private void addLocalContainerIdsToProto() { maybeInitBuilder(); builder.clearContainerId(); if (this.containerIds == null) return; List<ContainerIdProto> protoList = new ArrayList<ContainerIdProto>(); for (ContainerId id : containerIds) { protoList.add(convertToProtoFormat(id)); } builder.addAllContainerId(protoList); }
@Override public void onStartContainerError(ContainerId containerId, Throwable t) { LOG.error("Failed to start Container " + containerId); containers.remove(containerId); applicationMaster.numCompletedContainers.incrementAndGet(); applicationMaster.numFailedContainers.incrementAndGet(); }
private static ContainerId getContainerId(ApplicationId applicationId, int startCount) { ApplicationAttemptId appAttemptId = getApplicationAttemptId(applicationId, startCount); ContainerId containerId = ContainerId.newContainerId(appAttemptId, startCount); return containerId; }
@Override public void setContainerIds(List<ContainerId> containerIds) { maybeInitBuilder(); if (containerIds == null) builder.clearContainerId(); this.containerIds = containerIds; }
@Override public ContainerId getContainerId() { TaskAttemptReportProtoOrBuilder p = viaProto ? proto : builder; if (containerId != null) { return containerId; } // Else via proto if (!p.hasContainerId()) { return null; } containerId = convertFromProtoFormat(p.getContainerId()); return containerId; }
@Test public void testContainerMetricsLimit() throws InterruptedException { final String ERR = "Error in number of records"; MetricsSystem system = mock(MetricsSystem.class); doReturn(this).when(system).register(anyString(), anyString(), any()); MetricsCollectorImpl collector = new MetricsCollectorImpl(); ContainerId containerId = mock(ContainerId.class); ContainerMetrics metrics = ContainerMetrics.forContainer(containerId, 100); int anyPmemLimit = 1024; int anyVmemLimit = 2048; int anyVcores = 10; int anyGcores = 10; String anyProcessId = "1234"; metrics.recordResourceLimit(anyVmemLimit, anyPmemLimit, anyVcores, anyGcores); metrics.recordProcessId(anyProcessId); Thread.sleep(110); metrics.getMetrics(collector, true); assertEquals(ERR, 1, collector.getRecords().size()); MetricsRecord record = collector.getRecords().get(0); MetricsRecords.assertTag(record, ContainerMetrics.PROCESSID_INFO.name(), anyProcessId); MetricsRecords.assertMetric(record, ContainerMetrics .PMEM_LIMIT_METRIC_NAME, anyPmemLimit); MetricsRecords.assertMetric(record, ContainerMetrics.VMEM_LIMIT_METRIC_NAME, anyVmemLimit); MetricsRecords.assertMetric(record, ContainerMetrics.VCORE_LIMIT_METRIC_NAME, anyVcores); collector.clear(); }
public ContainerLauncherEvent(TaskAttemptId taskAttemptID, ContainerId containerID, String containerMgrAddress, Token containerToken, ContainerLauncher.EventType type) { super(type); this.taskAttemptID = taskAttemptID; this.containerID = containerID; this.containerMgrAddress = containerMgrAddress; this.containerToken = containerToken; }
private void initFailedRequests() { if (this.failedRequests != null) { return; } GetContainerStatusesResponseProtoOrBuilder p = viaProto ? proto : builder; List<ContainerExceptionMapProto> protoList = p.getFailedRequestsList(); this.failedRequests = new HashMap<ContainerId, SerializedException>(); for (ContainerExceptionMapProto ce : protoList) { this.failedRequests.put(convertFromProtoFormat(ce.getContainerId()), convertFromProtoFormat(ce.getException())); } }
@Private @VisibleForTesting protected void authorizeGetAndStopContainerRequest(ContainerId containerId, Container container, boolean stopRequest, NMTokenIdentifier identifier) throws YarnException { /* * For get/stop container status; we need to verify that 1) User (NMToken) * application attempt only has started container. 2) Requested containerId * belongs to the same application attempt (NMToken) which was used. (Note:- * This will prevent user in knowing another application's containers). */ ApplicationId nmTokenAppId = identifier.getApplicationAttemptId().getApplicationId(); if ((!nmTokenAppId.equals(containerId.getApplicationAttemptId().getApplicationId())) || (container != null && !nmTokenAppId.equals(container .getContainerId().getApplicationAttemptId().getApplicationId()))) { if (stopRequest) { LOG.warn(identifier.getApplicationAttemptId() + " attempted to stop non-application container : " + container.getContainerId()); NMAuditLogger.logFailure("UnknownUser", AuditConstants.STOP_CONTAINER, "ContainerManagerImpl", "Trying to stop unknown container!", nmTokenAppId, container.getContainerId()); } else { LOG.warn(identifier.getApplicationAttemptId() + " attempted to get status for non-application container : " + container.getContainerId()); } } }
public static AMInfo newAMInfo(ApplicationAttemptId appAttemptId, long startTime, ContainerId containerId, String nmHost, int nmPort, int nmHttpPort) { AMInfo amInfo = Records.newRecord(AMInfo.class); amInfo.setAppAttemptId(appAttemptId); amInfo.setStartTime(startTime); amInfo.setContainerId(containerId); amInfo.setNodeManagerHost(nmHost); amInfo.setNodeManagerPort(nmPort); amInfo.setNodeManagerHttpPort(nmHttpPort); return amInfo; }