private void testContentSummary() throws Exception { FileSystem fs = FileSystem.get(getProxiedFSConf()); Path path = new Path(getProxiedFSTestDir(), "foo.txt"); OutputStream os = fs.create(path); os.write(1); os.close(); ContentSummary hdfsContentSummary = fs.getContentSummary(path); fs.close(); fs = getHttpFSFileSystem(); ContentSummary httpContentSummary = fs.getContentSummary(path); fs.close(); Assert.assertEquals(httpContentSummary.getDirectoryCount(), hdfsContentSummary.getDirectoryCount()); Assert.assertEquals(httpContentSummary.getFileCount(), hdfsContentSummary.getFileCount()); Assert.assertEquals(httpContentSummary.getLength(), hdfsContentSummary.getLength()); Assert.assertEquals(httpContentSummary.getQuota(), hdfsContentSummary.getQuota()); Assert.assertEquals(httpContentSummary.getSpaceConsumed(), hdfsContentSummary.getSpaceConsumed()); Assert.assertEquals(httpContentSummary.getSpaceQuota(), hdfsContentSummary.getSpaceQuota()); }
private boolean canBeSafelyDeleted(PathData item) throws IOException { boolean shouldDelete = true; if (safeDelete) { final long deleteLimit = getConf().getLong( HADOOP_SHELL_SAFELY_DELETE_LIMIT_NUM_FILES, HADOOP_SHELL_SAFELY_DELETE_LIMIT_NUM_FILES_DEFAULT); if (deleteLimit > 0) { ContentSummary cs = item.fs.getContentSummary(item.path); final long numFiles = cs.getFileCount(); if (numFiles > deleteLimit) { if (!ToolRunner.confirmPrompt("Proceed deleting " + numFiles + " files?")) { System.err.println("Delete aborted at user request.\n"); shouldDelete = false; } } } } return shouldDelete; }
@Override public ContentSummary getContentSummary(Path f) throws IOException { Map<String, String> params = new HashMap<String, String>(); params.put(OP_PARAM, Operation.GETCONTENTSUMMARY.toString()); HttpURLConnection conn = getConnection(Operation.GETCONTENTSUMMARY.getMethod(), params, f, true); HttpExceptionUtils.validateResponse(conn, HttpURLConnection.HTTP_OK); JSONObject json = (JSONObject) ((JSONObject) HttpFSUtils.jsonParse(conn)).get(CONTENT_SUMMARY_JSON); return new ContentSummary.Builder(). length((Long) json.get(CONTENT_SUMMARY_LENGTH_JSON)). fileCount((Long) json.get(CONTENT_SUMMARY_FILE_COUNT_JSON)). directoryCount((Long) json.get(CONTENT_SUMMARY_DIRECTORY_COUNT_JSON)). quota((Long) json.get(CONTENT_SUMMARY_QUOTA_JSON)). spaceConsumed((Long) json.get(CONTENT_SUMMARY_SPACE_CONSUMED_JSON)). spaceQuota((Long) json.get(CONTENT_SUMMARY_SPACE_QUOTA_JSON)).build(); }
/** * Connect to the name node and get content summary. * @param path The path * @return The content summary for the path. * @throws IOException */ private ContentSummary getContentSummary(String path) throws IOException { final HttpURLConnection connection = openConnection( "/contentSummary" + ServletUtil.encodePath(path), "ugi=" + getEncodedUgiParameter()); InputStream in = null; try { in = connection.getInputStream(); final XMLReader xr = XMLReaderFactory.createXMLReader(); xr.setContentHandler(this); xr.parse(new InputSource(in)); } catch(FileNotFoundException fnfe) { //the server may not support getContentSummary return null; } catch(SAXException saxe) { final Exception embedded = saxe.getException(); if (embedded != null && embedded instanceof IOException) { throw (IOException)embedded; } throw new IOException("Invalid xml format", saxe); } finally { if (in != null) { in.close(); } connection.disconnect(); } return contentsummary; }
private static ContentSummary getContentSummaryInt(FSDirectory fsd, INodesInPath iip) throws IOException { fsd.readLock(); try { INode targetNode = iip.getLastINode(); if (targetNode == null) { throw new FileNotFoundException("File does not exist: " + iip.getPath()); } else { // Make it relinquish locks everytime contentCountLimit entries are // processed. 0 means disabled. I.e. blocking for the entire duration. ContentSummaryComputationContext cscc = new ContentSummaryComputationContext(fsd, fsd.getFSNamesystem(), fsd.getContentCountLimit()); ContentSummary cs = targetNode.computeAndConvertContentSummary(cscc); fsd.addYieldCount(cscc.getYieldCount()); return cs; } } finally { fsd.readUnlock(); } }
/** * Compute {@link ContentSummary}. */ public final ContentSummary computeAndConvertContentSummary( ContentSummaryComputationContext summary) { ContentCounts counts = computeContentSummary(summary).getCounts(); final QuotaCounts q = getQuotaCounts(); return new ContentSummary.Builder(). length(counts.getLength()). fileCount(counts.getFileCount() + counts.getSymlinkCount()). directoryCount(counts.getDirectoryCount()). quota(q.getNameSpace()). spaceConsumed(counts.getStoragespace()). spaceQuota(q.getStorageSpace()). typeConsumed(counts.getTypeSpaces()). typeQuota(q.getTypeSpaces().asArray()). build(); }
public static ContentSummary convert(ContentSummaryProto cs) { if (cs == null) return null; ContentSummary.Builder builder = new ContentSummary.Builder(); builder.length(cs.getLength()). fileCount(cs.getFileCount()). directoryCount(cs.getDirectoryCount()). quota(cs.getQuota()). spaceConsumed(cs.getSpaceConsumed()). spaceQuota(cs.getSpaceQuota()); if (cs.hasTypeQuotaInfos()) { for (HdfsProtos.StorageTypeQuotaInfoProto info : cs.getTypeQuotaInfos().getTypeQuotaInfoList()) { StorageType type = PBHelper.convertStorageType(info.getType()); builder.typeConsumed(type, info.getConsumed()); builder.typeQuota(type, info.getQuota()); } } return builder.build(); }
public static ContentSummaryProto convert(ContentSummary cs) { if (cs == null) return null; ContentSummaryProto.Builder builder = ContentSummaryProto.newBuilder(); builder.setLength(cs.getLength()). setFileCount(cs.getFileCount()). setDirectoryCount(cs.getDirectoryCount()). setQuota(cs.getQuota()). setSpaceConsumed(cs.getSpaceConsumed()). setSpaceQuota(cs.getSpaceQuota()); if (cs.isTypeQuotaSet() || cs.isTypeConsumedAvailable()) { HdfsProtos.StorageTypeQuotaInfosProto.Builder isb = HdfsProtos.StorageTypeQuotaInfosProto.newBuilder(); for (StorageType t: StorageType.getTypesSupportingQuota()) { HdfsProtos.StorageTypeQuotaInfoProto info = HdfsProtos.StorageTypeQuotaInfoProto.newBuilder(). setType(convertStorageType(t)). setConsumed(cs.getTypeConsumed(t)). setQuota(cs.getTypeQuota(t)). build(); isb.addTypeQuotaInfo(info); } builder.setTypeQuotaInfos(isb); } return builder.build(); }
@Test public void testTruncate() throws Exception { final short repl = 3; final int blockSize = 1024; final int numOfBlocks = 2; Path dir = getTestRootPath(fSys, "test/hadoop"); Path file = getTestRootPath(fSys, "test/hadoop/file"); final byte[] data = getFileData(numOfBlocks, blockSize); createFile(fSys, file, data, blockSize, repl); final int newLength = blockSize; boolean isReady = fSys.truncate(file, newLength); Assert.assertTrue("Recovery is not expected.", isReady); FileStatus fileStatus = fSys.getFileStatus(file); Assert.assertEquals(fileStatus.getLen(), newLength); AppendTestUtil.checkFullFile(fSys, file, newLength, data, file.toString()); ContentSummary cs = fSys.getContentSummary(dir); Assert.assertEquals("Bad disk space usage", cs.getSpaceConsumed(), newLength * repl); Assert.assertTrue("Deleted", fSys.delete(dir, true)); }
@Test(timeout = 60000) public void testContentSummaryWithoutQuotaByStorageType() throws Exception { final Path foo = new Path(dir, "foo"); Path createdFile1 = new Path(foo, "created_file1.data"); dfs.mkdirs(foo); // set storage policy on directory "foo" to ONESSD dfs.setStoragePolicy(foo, HdfsConstants.ONESSD_STORAGE_POLICY_NAME); INode fnode = fsdir.getINode4Write(foo.toString()); assertTrue(fnode.isDirectory()); assertTrue(!fnode.isQuotaSet()); // Create file of size 2 * BLOCKSIZE under directory "foo" long file1Len = BLOCKSIZE * 2; int bufLen = BLOCKSIZE / 16; DFSTestUtil.createFile(dfs, createdFile1, bufLen, file1Len, BLOCKSIZE, REPLICATION, seed); // Verify getContentSummary without any quota set ContentSummary cs = dfs.getContentSummary(foo); assertEquals(cs.getSpaceConsumed(), file1Len * REPLICATION); assertEquals(cs.getTypeConsumed(StorageType.SSD), file1Len); assertEquals(cs.getTypeConsumed(StorageType.DISK), file1Len * 2); }
@Test(timeout = 60000) public void testContentSummaryWithoutStoragePolicy() throws Exception { final Path foo = new Path(dir, "foo"); Path createdFile1 = new Path(foo, "created_file1.data"); dfs.mkdirs(foo); INode fnode = fsdir.getINode4Write(foo.toString()); assertTrue(fnode.isDirectory()); assertTrue(!fnode.isQuotaSet()); // Create file of size 2 * BLOCKSIZE under directory "foo" long file1Len = BLOCKSIZE * 2; int bufLen = BLOCKSIZE / 16; DFSTestUtil.createFile(dfs, createdFile1, bufLen, file1Len, BLOCKSIZE, REPLICATION, seed); // Verify getContentSummary without any quota set // Expect no type quota and usage information available ContentSummary cs = dfs.getContentSummary(foo); assertEquals(cs.getSpaceConsumed(), file1Len * REPLICATION); for (StorageType t : StorageType.values()) { assertEquals(cs.getTypeConsumed(t), 0); assertEquals(cs.getTypeQuota(t), -1); } }
public static ContentSummary convert(ContentSummaryProto cs) { if (cs == null) return null; ContentSummary.Builder builder = new ContentSummary.Builder(); builder.length(cs.getLength()). fileCount(cs.getFileCount()). directoryCount(cs.getDirectoryCount()). quota(cs.getQuota()). spaceConsumed(cs.getSpaceConsumed()). spaceQuota(cs.getSpaceQuota()); if (cs.hasTypeQuotaInfos()) { for (HdfsProtos.StorageTypeQuotaInfoProto info : cs.getTypeQuotaInfos().getTypeQuotaInfoList()) { StorageType type = convertStorageType(info.getType()); builder.typeConsumed(type, info.getConsumed()); builder.typeQuota(type, info.getQuota()); } } return builder.build(); }
private static ContentSummary getContentSummaryInt(FSDirectory fsd, INodesInPath iip) throws IOException { fsd.readLock(); try { INode targetNode = iip.getLastINode(); if (targetNode == null) { throw new FileNotFoundException("File does not exist: " + iip.getPath()); } else { // Make it relinquish locks everytime contentCountLimit entries are // processed. 0 means disabled. I.e. blocking for the entire duration. ContentSummaryComputationContext cscc = new ContentSummaryComputationContext(fsd, fsd.getFSNamesystem(), fsd.getContentCountLimit(), fsd.getContentSleepMicroSec()); ContentSummary cs = targetNode.computeAndConvertContentSummary( iip.getPathSnapshotId(), cscc); fsd.addYieldCount(cscc.getYieldCount()); return cs; } } finally { fsd.readUnlock(); } }
/** * Get the content summary for a specific file/dir. * * @param src The string representation of the path to the file * * @throws AccessControlException if access is denied * @throws UnresolvedLinkException if a symlink is encountered. * @throws FileNotFoundException if no file exists * @throws StandbyException * @throws IOException for issues with writing to the audit log * * @return object containing information regarding the file * or null if file not found */ ContentSummary getContentSummary(final String src) throws IOException { checkOperation(OperationCategory.READ); readLock(); boolean success = true; try { checkOperation(OperationCategory.READ); return FSDirStatAndListingOp.getContentSummary(dir, src); } catch (AccessControlException ace) { success = false; throw ace; } finally { readUnlock(); logAuditEvent(success, "contentSummary", src); } }
/** * Compute {@link ContentSummary}. */ public final ContentSummary computeAndConvertContentSummary(int snapshotId, ContentSummaryComputationContext summary) { ContentCounts counts = computeContentSummary(snapshotId, summary) .getCounts(); final QuotaCounts q = getQuotaCounts(); return new ContentSummary.Builder(). length(counts.getLength()). fileCount(counts.getFileCount() + counts.getSymlinkCount()). directoryCount(counts.getDirectoryCount()). quota(q.getNameSpace()). spaceConsumed(counts.getStoragespace()). spaceQuota(q.getStorageSpace()). typeConsumed(counts.getTypeSpaces()). typeQuota(q.getTypeSpaces().asArray()). build(); }
@Test public void testContentSummary() throws Exception { MiniDFSCluster cluster = null; final Configuration conf = WebHdfsTestUtil.createConf(); final Path path = new Path("/QuotaDir"); try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); final WebHdfsFileSystem webHdfs = WebHdfsTestUtil.getWebHdfsFileSystem( conf, WebHdfsConstants.WEBHDFS_SCHEME); final DistributedFileSystem dfs = cluster.getFileSystem(); dfs.mkdirs(path); dfs.setQuotaByStorageType(path, StorageType.DISK, 100000); ContentSummary contentSummary = webHdfs.getContentSummary(path); Assert.assertTrue((contentSummary.getTypeQuota( StorageType.DISK) == 100000)); } finally { if (cluster != null) { cluster.shutdown(); } } }
@Test public void testGetContentSummaryForEmptyDirectory() throws Exception { try (WebImageViewer viewer = new WebImageViewer( NetUtils.createSocketAddr("localhost:0"))) { viewer.initServer(originalFsimage.getAbsolutePath()); int port = viewer.getPort(); URL url = new URL("http://localhost:" + port + "/webhdfs/v1/parentDir/childDir2?op=GETCONTENTSUMMARY"); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod("GET"); connection.connect(); assertEquals(HttpURLConnection.HTTP_OK, connection.getResponseCode()); // create a WebHdfsFileSystem instance URI uri = new URI("webhdfs://localhost:" + String.valueOf(port)); Configuration conf = new Configuration(); WebHdfsFileSystem webfs = (WebHdfsFileSystem) FileSystem.get(uri, conf); ContentSummary summary = webfs.getContentSummary(new Path( "/parentDir/childDir2")); verifyContentSummary(emptyDirSummaryFromDFS, summary); } }
@Test public void testGetContentSummaryForDirectory() throws Exception { try (WebImageViewer viewer = new WebImageViewer( NetUtils.createSocketAddr("localhost:0"))) { viewer.initServer(originalFsimage.getAbsolutePath()); int port = viewer.getPort(); URL url = new URL("http://localhost:" + port + "/webhdfs/v1/parentDir/?op=GETCONTENTSUMMARY"); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod("GET"); connection.connect(); assertEquals(HttpURLConnection.HTTP_OK, connection.getResponseCode()); // create a WebHdfsFileSystem instance URI uri = new URI("webhdfs://localhost:" + String.valueOf(port)); Configuration conf = new Configuration(); WebHdfsFileSystem webfs = (WebHdfsFileSystem) FileSystem.get(uri, conf); ContentSummary summary = webfs.getContentSummary(new Path("/parentDir/")); verifyContentSummary(summaryFromDFS, summary); } }
@Test public void testGetContentSummaryForFile() throws Exception { try (WebImageViewer viewer = new WebImageViewer( NetUtils.createSocketAddr("localhost:0"))) { viewer.initServer(originalFsimage.getAbsolutePath()); int port = viewer.getPort(); URL url = new URL("http://localhost:" + port + "/webhdfs/v1/parentDir/file1?op=GETCONTENTSUMMARY"); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod("GET"); connection.connect(); assertEquals(HttpURLConnection.HTTP_OK, connection.getResponseCode()); // create a WebHdfsFileSystem instance URI uri = new URI("webhdfs://localhost:" + String.valueOf(port)); Configuration conf = new Configuration(); WebHdfsFileSystem webfs = (WebHdfsFileSystem) FileSystem.get(uri, conf); ContentSummary summary = webfs. getContentSummary(new Path("/parentDir/file1")); verifyContentSummary(fileSummaryFromDFS, summary); } }
/** * Estimate the number of reducers needed for this job, based on job input, * and configuration parameters. * * The output of this method should only be used if the output of this * MapRedTask is not being used to populate a bucketed table and the user * has not specified the number of reducers to use. * * @return the number of reducers. */ public static int estimateNumberOfReducers(HiveConf conf, ContentSummary inputSummary, MapWork work, boolean finalMapRed) throws IOException { long bytesPerReducer = conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER); int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS); double samplePercentage = getHighestSamplePercentage(work); long totalInputFileSize = getTotalInputFileSize(inputSummary, work, samplePercentage); // if all inputs are sampled, we should shrink the size of reducers accordingly. if (totalInputFileSize != inputSummary.getLength()) { LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers=" + maxReducers + " estimated totalInputFileSize=" + totalInputFileSize); } else { LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers=" + maxReducers + " totalInputFileSize=" + totalInputFileSize); } // If this map reduce job writes final data to a table and bucketing is being inferred, // and the user has configured Hive to do this, make sure the number of reducers is a // power of two boolean powersOfTwo = conf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO) && finalMapRed && !work.getBucketedColsByDirectory().isEmpty(); return estimateReducers(totalInputFileSize, bytesPerReducer, maxReducers, powersOfTwo); }
@Override protected void processOptions(LinkedList<String> args) { CommandFormat cf = new CommandFormat(1, Integer.MAX_VALUE, OPTION_QUOTA, OPTION_HUMAN, OPTION_HEADER, OPTION_QUOTA_AND_USAGE); cf.addOptionWithValue(OPTION_TYPE); cf.parse(args); if (args.isEmpty()) { // default path is the current working directory args.add("."); } showQuotas = cf.getOpt(OPTION_QUOTA); humanReadable = cf.getOpt(OPTION_HUMAN); showQuotasAndUsageOnly = cf.getOpt(OPTION_QUOTA_AND_USAGE); if (showQuotas || showQuotasAndUsageOnly) { String types = cf.getOptValue(OPTION_TYPE); if (null != types) { showQuotabyType = true; storageTypes = getAndCheckStorageTypes(types); } else { showQuotabyType = false; } } if (cf.getOpt(OPTION_HEADER)) { if (showQuotabyType) { out.println(QuotaUsage.getStorageTypeHeader(storageTypes) + "PATHNAME"); } else { if (showQuotasAndUsageOnly) { out.println(QuotaUsage.getHeader() + "PATHNAME"); } else { out.println(ContentSummary.getHeader(showQuotas) + "PATHNAME"); } } } }
@Override protected void processPath(PathData src) throws IOException { if (showQuotasAndUsageOnly || showQuotabyType) { QuotaUsage usage = src.fs.getQuotaUsage(src.path); out.println(usage.toString(isHumanReadable(), showQuotabyType, storageTypes) + src); } else { ContentSummary summary = src.fs.getContentSummary(src.path); out.println(summary.toString(showQuotas, isHumanReadable()) + src); } }
@Override protected void processPath(PathData item) throws IOException { ContentSummary contentSummary = item.fs.getContentSummary(item.path); long length = contentSummary.getLength(); long spaceConsumed = contentSummary.getSpaceConsumed(); usagesTable.addRow(formatSize(length), formatSize(spaceConsumed), item); }
@Test public void testGetContentSummary() throws IOException { // GetContentSummary of a dir fSys.mkdirs(new Path("/newDir/dirFoo")); ContentSummary cs = fSys.getContentSummary(new Path("/newDir/dirFoo")); Assert.assertEquals(-1L, cs.getQuota()); Assert.assertEquals(-1L, cs.getSpaceQuota()); }
/** * 此方法用于获取文件的 ContentSummary * * @param fileSystemInfo * 文件系统信息 * @param path * 文件路径 * @return ContentSummary */ public static ContentSummary getContentSummary(FileSystemInfo fileSystemInfo, String path) { FileSystem fs = getFileSystem(fileSystemInfo); Path uri = new Path(path); try { pathNotExistCheck(path, fs, uri); return fs.getContentSummary(uri); } catch (IOException e) { e.printStackTrace(); } finally { closeFileSystem(fs); } return null; }