public void add(Replica replica) { try { // zig-zag to reduce size of legacy blocks cos.writeSInt64NoTag(replica.getBlockId()); cos.writeRawVarint64(replica.getBytesOnDisk()); cos.writeRawVarint64(replica.getGenerationStamp()); ReplicaState state = replica.getState(); // although state is not a 64-bit value, using a long varint to // allow for future use of the upper bits cos.writeRawVarint64(state.getValue()); if (state == ReplicaState.FINALIZED) { numFinalized++; } numBlocks++; } catch (IOException ioe) { // shouldn't happen, ByteString.Output doesn't throw IOE throw new IllegalStateException(ioe); } }
private boolean isBlockUnderConstruction(BlockInfoContiguous storedBlock, BlockUCState ucState, ReplicaState reportedState) { switch(reportedState) { case FINALIZED: switch(ucState) { case UNDER_CONSTRUCTION: case UNDER_RECOVERY: return true; default: return false; } case RBW: case RWR: return (!storedBlock.isComplete()); case RUR: // should not be reported case TEMPORARY: // should not be reported default: return false; } }
void addReplicaIfNotPresent(DatanodeStorageInfo storage, Block block, ReplicaState rState) { Iterator<ReplicaUnderConstruction> it = replicas.iterator(); while (it.hasNext()) { ReplicaUnderConstruction r = it.next(); DatanodeStorageInfo expectedLocation = r.getExpectedStorageLocation(); if(expectedLocation == storage) { // Record the gen stamp from the report r.setGenerationStamp(block.getGenerationStamp()); return; } else if (expectedLocation != null && expectedLocation.getDatanodeDescriptor() == storage.getDatanodeDescriptor()) { // The Datanode reported that the block is on a different storage // than the one chosen by BlockPlacementPolicy. This can occur as // we allow Datanodes to choose the target storage. Update our // state by removing the stale entry and adding a new one. it.remove(); break; } } replicas.add(new ReplicaUnderConstruction(block, storage, rState)); }
/** * Remove the temporary block file (if any) */ @Override // FsDatasetSpi public synchronized void unfinalizeBlock(ExtendedBlock b) throws IOException { ReplicaInfo replicaInfo = volumeMap.get(b.getBlockPoolId(), b.getLocalBlock()); if (replicaInfo != null && replicaInfo.getState() == ReplicaState.TEMPORARY) { // remove from volumeMap volumeMap.remove(b.getBlockPoolId(), b.getLocalBlock()); // delete the on-disk temp file if (delBlockFromDisk(replicaInfo.getBlockFile(), replicaInfo.getMetaFile(), b.getLocalBlock())) { LOG.warn("Block " + b + " unfinalized and removed. " ); } if (replicaInfo.getVolume().isTransientStorage()) { ramDiskReplicaTracker.discardReplica(b.getBlockPoolId(), b.getBlockId(), true); } } }
/** * Check if a block is valid. * * @param b The block to check. * @param minLength The minimum length that the block must have. May be 0. * @param state If this is null, it is ignored. If it is non-null, we * will check that the replica has this state. * * @throws ReplicaNotFoundException If the replica is not found * * @throws UnexpectedReplicaStateException If the replica is not in the * expected state. * @throws FileNotFoundException If the block file is not found or there * was an error locating it. * @throws EOFException If the replica length is too short. * * @throws IOException May be thrown from the methods called. */ public void checkBlock(ExtendedBlock b, long minLength, ReplicaState state) throws ReplicaNotFoundException, UnexpectedReplicaStateException, FileNotFoundException, EOFException, IOException { final ReplicaInfo replicaInfo = volumeMap.get(b.getBlockPoolId(), b.getLocalBlock()); if (replicaInfo == null) { throw new ReplicaNotFoundException(b); } if (replicaInfo.getState() != state) { throw new UnexpectedReplicaStateException(b,state); } if (!replicaInfo.getBlockFile().exists()) { throw new FileNotFoundException(replicaInfo.getBlockFile().getPath()); } long onDiskLength = getLength(b); if (onDiskLength < minLength) { throw new EOFException(b + "'s on-disk length " + onDiskLength + " is shorter than minLength " + minLength); } }
@Test public void testMix() { BlockListAsLongs blocks = checkReport( new FinalizedReplica(b1, null, null), new FinalizedReplica(b2, null, null), new ReplicaBeingWritten(b3, null, null, null), new ReplicaWaitingToBeRecovered(b4, null, null)); assertArrayEquals( new long[] { 2, 2, 1, 11, 111, 2, 22, 222, -1, -1, -1, 3, 33, 333, ReplicaState.RBW.getValue(), 4, 44, 444, ReplicaState.RWR.getValue() }, blocks.getBlockListAsLongs()); }
@Test public void testQueues() { DatanodeDescriptor fakeDN = DFSTestUtil.getLocalDatanodeDescriptor(); DatanodeStorage storage = new DatanodeStorage("STORAGE_ID"); DatanodeStorageInfo storageInfo = new DatanodeStorageInfo(fakeDN, storage); msgs.enqueueReportedBlock(storageInfo, block1Gs1, ReplicaState.FINALIZED); msgs.enqueueReportedBlock(storageInfo, block1Gs2, ReplicaState.FINALIZED); assertEquals(2, msgs.count()); // Nothing queued yet for block 2 assertNull(msgs.takeBlockQueue(block2Gs1)); assertEquals(2, msgs.count()); Queue<ReportedBlockInfo> q = msgs.takeBlockQueue(block1Gs2DifferentInstance); assertEquals( "ReportedBlockInfo [block=blk_1_1, dn=127.0.0.1:50010, reportedState=FINALIZED]," + "ReportedBlockInfo [block=blk_1_2, dn=127.0.0.1:50010, reportedState=FINALIZED]", Joiner.on(",").join(q)); assertEquals(0, msgs.count()); // Should be null if we pull again assertNull(msgs.takeBlockQueue(block1Gs1)); assertEquals(0, msgs.count()); }
/** * BlockRecovery_02.11. * Two replicas are RBW. * @throws IOException in case of an error */ @Test public void testRBWReplicas() throws IOException { if(LOG.isDebugEnabled()) { LOG.debug("Running " + GenericTestUtils.getMethodName()); } ReplicaRecoveryInfo replica1 = new ReplicaRecoveryInfo(BLOCK_ID, REPLICA_LEN1, GEN_STAMP-1, ReplicaState.RBW); ReplicaRecoveryInfo replica2 = new ReplicaRecoveryInfo(BLOCK_ID, REPLICA_LEN2, GEN_STAMP-2, ReplicaState.RBW); InterDatanodeProtocol dn1 = mock(InterDatanodeProtocol.class); InterDatanodeProtocol dn2 = mock(InterDatanodeProtocol.class); long minLen = Math.min(REPLICA_LEN1, REPLICA_LEN2); testSyncReplicas(replica1, replica2, dn1, dn2, minLen); verify(dn1).updateReplicaUnderRecovery(block, RECOVERY_ID, BLOCK_ID, minLen); verify(dn2).updateReplicaUnderRecovery(block, RECOVERY_ID, BLOCK_ID, minLen); }
/** * BlockRecovery_02.12. * One replica is RBW and another is RWR. * @throws IOException in case of an error */ @Test public void testRBW_RWRReplicas() throws IOException { if(LOG.isDebugEnabled()) { LOG.debug("Running " + GenericTestUtils.getMethodName()); } ReplicaRecoveryInfo replica1 = new ReplicaRecoveryInfo(BLOCK_ID, REPLICA_LEN1, GEN_STAMP-1, ReplicaState.RBW); ReplicaRecoveryInfo replica2 = new ReplicaRecoveryInfo(BLOCK_ID, REPLICA_LEN1, GEN_STAMP-2, ReplicaState.RWR); InterDatanodeProtocol dn1 = mock(InterDatanodeProtocol.class); InterDatanodeProtocol dn2 = mock(InterDatanodeProtocol.class); testSyncReplicas(replica1, replica2, dn1, dn2, REPLICA_LEN1); verify(dn1).updateReplicaUnderRecovery(block, RECOVERY_ID, BLOCK_ID, REPLICA_LEN1); verify(dn2, never()).updateReplicaUnderRecovery( block, RECOVERY_ID, BLOCK_ID, REPLICA_LEN1); }
/** * BlockRecovery_02.13. * Two replicas are RWR. * @throws IOException in case of an error */ @Test public void testRWRReplicas() throws IOException { if(LOG.isDebugEnabled()) { LOG.debug("Running " + GenericTestUtils.getMethodName()); } ReplicaRecoveryInfo replica1 = new ReplicaRecoveryInfo(BLOCK_ID, REPLICA_LEN1, GEN_STAMP-1, ReplicaState.RWR); ReplicaRecoveryInfo replica2 = new ReplicaRecoveryInfo(BLOCK_ID, REPLICA_LEN2, GEN_STAMP-2, ReplicaState.RWR); InterDatanodeProtocol dn1 = mock(InterDatanodeProtocol.class); InterDatanodeProtocol dn2 = mock(InterDatanodeProtocol.class); long minLen = Math.min(REPLICA_LEN1, REPLICA_LEN2); testSyncReplicas(replica1, replica2, dn1, dn2, minLen); verify(dn1).updateReplicaUnderRecovery(block, RECOVERY_ID, BLOCK_ID, minLen); verify(dn2).updateReplicaUnderRecovery(block, RECOVERY_ID, BLOCK_ID, minLen); }
/** * BlockRecoveryFI_07. max replica length from all DNs is zero. * * @throws IOException in case of an error */ @Test public void testZeroLenReplicas() throws IOException, InterruptedException { if(LOG.isDebugEnabled()) { LOG.debug("Running " + GenericTestUtils.getMethodName()); } DataNode spyDN = spy(dn); doReturn(new ReplicaRecoveryInfo(block.getBlockId(), 0, block.getGenerationStamp(), ReplicaState.FINALIZED)).when(spyDN). initReplicaRecovery(any(RecoveringBlock.class)); Daemon d = spyDN.recoverBlocks("fake NN", initRecoveringBlocks()); d.join(); DatanodeProtocol dnP = dn.getActiveNamenodeForBP(POOL_ID); verify(dnP).commitBlockSynchronization( block, RECOVERY_ID, 0, true, true, DatanodeID.EMPTY_ARRAY, null); }
private boolean isBlockUnderConstruction(BlockInfo storedBlock, BlockUCState ucState, ReplicaState reportedState) { switch(reportedState) { case FINALIZED: switch(ucState) { case UNDER_CONSTRUCTION: case UNDER_RECOVERY: return true; default: return false; } case RBW: case RWR: return (!storedBlock.isComplete()); case RUR: // should not be reported case TEMPORARY: // should not be reported default: return false; } }
/** * BlockRecoveryFI_07. max replica length from all DNs is zero. * * @throws IOException in case of an error */ @Test public void testZeroLenReplicas() throws IOException, InterruptedException { if(LOG.isDebugEnabled()) { LOG.debug("Running " + GenericTestUtils.getMethodName()); } doReturn(new ReplicaRecoveryInfo(block.getBlockId(), 0, block.getGenerationStamp(), ReplicaState.FINALIZED)).when(spyDN). initReplicaRecovery(any(RecoveringBlock.class)); for(RecoveringBlock rBlock: initRecoveringBlocks()){ BlockRecoveryWorker.RecoveryTaskContiguous RecoveryTaskContiguous = recoveryWorker.new RecoveryTaskContiguous(rBlock); BlockRecoveryWorker.RecoveryTaskContiguous spyTask = spy(RecoveryTaskContiguous); spyTask.recover(); } DatanodeProtocol dnP = recoveryWorker.getActiveNamenodeForBP(POOL_ID); verify(dnP).commitBlockSynchronization( block, RECOVERY_ID, 0, true, true, DatanodeID.EMPTY_ARRAY, null); }
@Override public Iterator<BlockReportReplica> iterator() { return new Iterator<BlockReportReplica>() { final BlockReportReplica block = new BlockReportReplica(); final CodedInputStream cis = buffer.newCodedInput(); private int currentBlockIndex = 0; @Override public boolean hasNext() { return currentBlockIndex < numBlocks; } @Override public BlockReportReplica next() { currentBlockIndex++; try { // zig-zag to reduce size of legacy blocks and mask off bits // we don't (yet) understand block.setBlockId(cis.readSInt64()); block.setNumBytes(cis.readRawVarint64() & NUM_BYTES_MASK); block.setGenerationStamp(cis.readRawVarint64()); long state = cis.readRawVarint64() & REPLICA_STATE_MASK; block.setState(ReplicaState.getState((int)state)); } catch (IOException e) { throw new IllegalStateException(e); } return block; } @Override public void remove() { throw new UnsupportedOperationException(); } }; }
public BlockReportReplica(Block block) { super(block); if (block instanceof BlockReportReplica) { this.state = ((BlockReportReplica)block).getState(); } else { this.state = ReplicaState.FINALIZED; } }
/** * Queue the given reported block for later processing in the * standby node. @see PendingDataNodeMessages. * @param reason a textual reason to report in the debug logs */ private void queueReportedBlock(DatanodeStorageInfo storageInfo, Block block, ReplicaState reportedState, String reason) { assert shouldPostponeBlocksFromFuture; if (LOG.isDebugEnabled()) { LOG.debug("Queueing reported block " + block + " in state " + reportedState + " from datanode " + storageInfo.getDatanodeDescriptor() + " for later processing because " + reason + "."); } pendingDNMessages.enqueueReportedBlock(storageInfo, block, reportedState); }
void addStoredBlockUnderConstruction(StatefulBlockInfo ucBlock, DatanodeStorageInfo storageInfo) throws IOException { BlockInfoContiguousUnderConstruction block = ucBlock.storedBlock; block.addReplicaIfNotPresent( storageInfo, ucBlock.reportedBlock, ucBlock.reportedState); if (ucBlock.reportedState == ReplicaState.FINALIZED && !block.findDatanode(storageInfo.getDatanodeDescriptor())) { addStoredBlock(block, storageInfo, null, true); } }
/** * The given node is reporting that it received a certain block. */ @VisibleForTesting void addBlock(DatanodeStorageInfo storageInfo, Block block, String delHint) throws IOException { DatanodeDescriptor node = storageInfo.getDatanodeDescriptor(); // Decrement number of blocks scheduled to this datanode. // for a retry request (of DatanodeProtocol#blockReceivedAndDeleted with // RECEIVED_BLOCK), we currently also decrease the approximate number. node.decrementBlocksScheduled(storageInfo.getStorageType()); // get the deletion hint node DatanodeDescriptor delHintNode = null; if (delHint != null && delHint.length() != 0) { delHintNode = datanodeManager.getDatanode(delHint); if (delHintNode == null) { blockLog.warn("BLOCK* blockReceived: {} is expected to be removed " + "from an unrecorded node {}", block, delHint); } } // // Modify the blocks->datanode map and node's map. // pendingReplications.decrement(block, node); processAndHandleReportedBlock(storageInfo, block, ReplicaState.FINALIZED, delHintNode); }
private void processAndHandleReportedBlock( DatanodeStorageInfo storageInfo, Block block, ReplicaState reportedState, DatanodeDescriptor delHintNode) throws IOException { // blockReceived reports a finalized block Collection<BlockInfoContiguous> toAdd = new LinkedList<BlockInfoContiguous>(); Collection<Block> toInvalidate = new LinkedList<Block>(); Collection<BlockToMarkCorrupt> toCorrupt = new LinkedList<BlockToMarkCorrupt>(); Collection<StatefulBlockInfo> toUC = new LinkedList<StatefulBlockInfo>(); final DatanodeDescriptor node = storageInfo.getDatanodeDescriptor(); processReportedBlock(storageInfo, block, reportedState, toAdd, toInvalidate, toCorrupt, toUC); // the block is only in one of the to-do lists // if it is in none then data-node already has it assert toUC.size() + toAdd.size() + toInvalidate.size() + toCorrupt.size() <= 1 : "The block should be only in one of the lists."; for (StatefulBlockInfo b : toUC) { addStoredBlockUnderConstruction(b, storageInfo); } long numBlocksLogged = 0; for (BlockInfoContiguous b : toAdd) { addStoredBlock(b, storageInfo, delHintNode, numBlocksLogged < maxNumBlocksToLog); numBlocksLogged++; } if (numBlocksLogged > maxNumBlocksToLog) { blockLog.info("BLOCK* addBlock: logged info for {} of {} reported.", maxNumBlocksToLog, numBlocksLogged); } for (Block b : toInvalidate) { blockLog.info("BLOCK* addBlock: block {} on node {} size {} does not " + "belong to any file", b, node, b.getNumBytes()); addToInvalidates(b, node); } for (BlockToMarkCorrupt b : toCorrupt) { markBlockAsCorrupt(b, storageInfo, node); } }
ReplicaUnderConstruction(Block block, DatanodeStorageInfo target, ReplicaState state) { super(block); this.expectedLocation = target; this.state = state; this.chosenAsPrimary = false; }
/** Set expected locations */ public void setExpectedLocations(DatanodeStorageInfo[] targets) { int numLocations = targets == null ? 0 : targets.length; this.replicas = new ArrayList<ReplicaUnderConstruction>(numLocations); for(int i = 0; i < numLocations; i++) replicas.add( new ReplicaUnderConstruction(this, targets[i], ReplicaState.RBW)); }
@Override // FsDatasetSpi public synchronized ReplicaHandler append(ExtendedBlock b, long newGS, long expectedBlockLen) throws IOException { // If the block was successfully finalized because all packets // were successfully processed at the Datanode but the ack for // some of the packets were not received by the client. The client // re-opens the connection and retries sending those packets. // The other reason is that an "append" is occurring to this block. // check the validity of the parameter if (newGS < b.getGenerationStamp()) { throw new IOException("The new generation stamp " + newGS + " should be greater than the replica " + b + "'s generation stamp"); } ReplicaInfo replicaInfo = getReplicaInfo(b); LOG.info("Appending to " + replicaInfo); if (replicaInfo.getState() != ReplicaState.FINALIZED) { throw new ReplicaNotFoundException( ReplicaNotFoundException.UNFINALIZED_REPLICA + b); } if (replicaInfo.getNumBytes() != expectedBlockLen) { throw new IOException("Corrupted replica " + replicaInfo + " with a length of " + replicaInfo.getNumBytes() + " expected length is " + expectedBlockLen); } FsVolumeReference ref = replicaInfo.getVolume().obtainReference(); ReplicaBeingWritten replica = null; try { replica = append(b.getBlockPoolId(), (FinalizedReplica)replicaInfo, newGS, b.getNumBytes()); } catch (IOException e) { IOUtils.cleanup(null, ref); throw e; } return new ReplicaHandler(replica, ref); }
@Override // FsDatasetSpi public synchronized String recoverClose(ExtendedBlock b, long newGS, long expectedBlockLen) throws IOException { LOG.info("Recover failed close " + b); // check replica's state ReplicaInfo replicaInfo = recoverCheck(b, newGS, expectedBlockLen); // bump the replica's GS bumpReplicaGS(replicaInfo, newGS); // finalize the replica if RBW if (replicaInfo.getState() == ReplicaState.RBW) { finalizeReplica(b.getBlockPoolId(), replicaInfo); } return replicaInfo.getStorageUuid(); }
/** * Complete the block write! */ @Override // FsDatasetSpi public synchronized void finalizeBlock(ExtendedBlock b) throws IOException { if (Thread.interrupted()) { // Don't allow data modifications from interrupted threads throw new IOException("Cannot finalize block from Interrupted Thread"); } ReplicaInfo replicaInfo = getReplicaInfo(b); if (replicaInfo.getState() == ReplicaState.FINALIZED) { // this is legal, when recovery happens on a file that has // been opened for append but never modified return; } finalizeReplica(b.getBlockPoolId(), replicaInfo); }
/** * Get the list of finalized blocks from in-memory blockmap for a block pool. */ @Override public synchronized List<FinalizedReplica> getFinalizedBlocks(String bpid) { ArrayList<FinalizedReplica> finalized = new ArrayList<FinalizedReplica>(volumeMap.size(bpid)); for (ReplicaInfo b : volumeMap.replicas(bpid)) { if(b.getState() == ReplicaState.FINALIZED) { finalized.add(new FinalizedReplica((FinalizedReplica)b)); } } return finalized; }