/** * Assert that the number of log files in the target directory is as expected. * @param fs the target FileSystem * @param dir the target directory path * @param expected the expected number of files * @throws IOException thrown if listing files fails */ public void assertFileCount(FileSystem fs, Path dir, int expected) throws IOException { RemoteIterator<LocatedFileStatus> i = fs.listFiles(dir, true); int count = 0; while (i.hasNext()) { i.next(); count++; } assertTrue("The sink created additional unexpected log files. " + count + "files were created", expected >= count); assertTrue("The sink created too few log files. " + count + "files were " + "created", expected <= count); }
/** * Return the next ID suffix to use when creating the log file. This method * will look at the files in the directory, find the one with the highest * ID suffix, and 1 to that suffix, and return it. This approach saves a full * linear probe, which matters in the case where there are a large number of * log files. * * @param initial the base file path * @param lastId the last ID value that was used * @return the next ID to try * @throws IOException thrown if there's an issue querying the files in the * directory */ private int getNextIdToTry(Path initial, int lastId) throws IOException { RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(currentDirPath, true); String base = initial.toString(); int id = lastId; while (files.hasNext()) { String file = files.next().getPath().getName(); if (file.startsWith(base)) { int fileId = extractId(file); if (fileId > id) { id = fileId; } } } // Return either 1 more than the highest we found or 1 more than the last // ID used (if no ID was found). return id + 1; }
@Override public RemoteIterator<LocatedFileStatus>listLocatedStatus(final Path f, final PathFilter filter) throws FileNotFoundException, IOException { final InodeTree.ResolveResult<FileSystem> res = fsState .resolve(getUriPath(f), true); final RemoteIterator<LocatedFileStatus> statusIter = res.targetFileSystem .listLocatedStatus(res.remainingPath); if (res.isInternalDir()) { return statusIter; } return new RemoteIterator<LocatedFileStatus>() { @Override public boolean hasNext() throws IOException { return statusIter.hasNext(); } @Override public LocatedFileStatus next() throws IOException { final LocatedFileStatus status = statusIter.next(); return (LocatedFileStatus)fixFileStatus(status, getChrootedPath(res, status, f)); } }; }
@Test public void testCopyRecursive() throws Throwable { int expected = createTestFiles(sourceDir, 64); expectSuccess( "-s", sourceDir.toURI().toString(), "-d", destDir.toURI().toString(), "-t", "4", "-l", "3"); LocalFileSystem local = FileSystem.getLocal(new Configuration()); Set<String> entries = new TreeSet<>(); RemoteIterator<LocatedFileStatus> iterator = local.listFiles(new Path(destDir.toURI()), true); int count = 0; while (iterator.hasNext()) { LocatedFileStatus next = iterator.next(); entries.add(next.getPath().toUri().toString()); LOG.info("Entry {} size = {}", next.getPath(), next.getLen()); count++; } assertEquals("Mismatch in files found", expected, count); }
/** * Get all ORC files present in directory for the specified table and partition/bucket. The ORC * files returned are in ascending order of the (insertion) time-partition and sequence-id within * the time-partition. * * @param orcDir the ORC store directory * @param args the arguments in order: table-name, bucket-id, time-partition-id * @return the list of all ORC files */ private String[] getOrcFiles(final String orcDir, final String fileExt, final String... args) { try { FileSystem fileSystem = FileSystem.get(conf); Path distributedPath = new Path(Paths.get(orcDir, args).toString()); ArrayList<String> filePathStrings = new ArrayList<>(); if (fileSystem.exists(distributedPath)) { RemoteIterator<LocatedFileStatus> fileListItr = fileSystem.listFiles(distributedPath, true); while (fileListItr != null && fileListItr.hasNext()) { LocatedFileStatus file = fileListItr.next(); if (!file.getPath().getName().endsWith(fileExt)) { // exclude CRC files filePathStrings.add(file.getPath().toUri().toString()); } } Collections.sort(filePathStrings); } String[] retArray = new String[filePathStrings.size()]; filePathStrings.toArray(retArray); return retArray; } catch (IOException e) { e.printStackTrace(); } return new String[0]; }
public int getFilesCount(String storeBaseDir, String tableName) { int filesCount = 0; try { FileSystem fs = FileSystem.get(conf); Path storeBasePath = new Path(fs.getHomeDirectory(), storeBaseDir); Path tablePath = new Path(storeBasePath, tableName); if (fs.exists(tablePath)) { RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fs.listFiles(tablePath, false); while (locatedFileStatusRemoteIterator.hasNext()) { filesCount++; LocatedFileStatus next = locatedFileStatusRemoteIterator.next(); System.out.println("File name is " + next.getPath()); } } } catch (IOException e) { e.printStackTrace(); } return filesCount; }
public List<OrcStruct> getORCRecords(String storeBaseDir, String tableName) throws IOException { List<OrcStruct> orcrecords = new ArrayList<>(); try { FileSystem fs = FileSystem.get(conf); Path storeBasePath = new Path(fs.getHomeDirectory(), storeBaseDir); Path tablePath = new Path(storeBasePath, tableName); if (fs.exists(tablePath)) { RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fs.listFiles(tablePath, false); while (locatedFileStatusRemoteIterator.hasNext()) { LocatedFileStatus next = locatedFileStatusRemoteIterator.next(); final org.apache.hadoop.hive.ql.io.orc.Reader fis = OrcFile.createReader(next.getPath(), OrcFile.readerOptions(conf)); RecordReader rows = fis.rows(); while (rows.hasNext()) { orcrecords.add((OrcStruct) rows.next(null)); } System.out.println("File name is " + next.getPath()); } } } catch (IOException e) { e.printStackTrace(); } return orcrecords; }
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
@Override public Result call() throws Exception { Result result = new Result(); result.fs = fs; if (fileStatus.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs .listLocatedStatus(fileStatus.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { result.dirsNeedingRecursiveCalls.add(stat); } else { result.locatedFileStatuses.add(stat); } } } } else { result.locatedFileStatuses.add(fileStatus); } return result; }
@Test(timeout=60000) public void testListFiles() throws IOException { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { DistributedFileSystem fs = cluster.getFileSystem(); final Path relative = new Path("relative"); fs.create(new Path(relative, "foo")).close(); final List<LocatedFileStatus> retVal = new ArrayList<LocatedFileStatus>(); final RemoteIterator<LocatedFileStatus> iter = fs.listFiles(relative, true); while (iter.hasNext()) { retVal.add(iter.next()); } System.out.println("retVal = " + retVal); } finally { cluster.shutdown(); } }
/** Test when input path is a file */ @Test public void testFile() throws IOException { fc.mkdir(TEST_DIR, FsPermission.getDefault(), true); writeFile(fc, FILE1, FILE_LEN); RemoteIterator<LocatedFileStatus> itor = fc.util().listFiles( FILE1, true); LocatedFileStatus stat = itor.next(); assertFalse(itor.hasNext()); assertTrue(stat.isFile()); assertEquals(FILE_LEN, stat.getLen()); assertEquals(fc.makeQualified(FILE1), stat.getPath()); assertEquals(1, stat.getBlockLocations().length); itor = fc.util().listFiles(FILE1, false); stat = itor.next(); assertFalse(itor.hasNext()); assertTrue(stat.isFile()); assertEquals(FILE_LEN, stat.getLen()); assertEquals(fc.makeQualified(FILE1), stat.getPath()); assertEquals(1, stat.getBlockLocations().length); }
private static void checkEquals(RemoteIterator<LocatedFileStatus> i1, RemoteIterator<LocatedFileStatus> i2) throws IOException { while (i1.hasNext()) { assertTrue(i2.hasNext()); // Compare all the fields but the path name, which is relative // to the original path from listFiles. LocatedFileStatus l1 = i1.next(); LocatedFileStatus l2 = i2.next(); assertEquals(l1.getAccessTime(), l2.getAccessTime()); assertEquals(l1.getBlockSize(), l2.getBlockSize()); assertEquals(l1.getGroup(), l2.getGroup()); assertEquals(l1.getLen(), l2.getLen()); assertEquals(l1.getModificationTime(), l2.getModificationTime()); assertEquals(l1.getOwner(), l2.getOwner()); assertEquals(l1.getPermission(), l2.getPermission()); assertEquals(l1.getReplication(), l2.getReplication()); } assertFalse(i2.hasNext()); }
/** * To get this project to compile under Hadoop 1, this code needs to be * commented out * * * @param fs filesystem * @param dir dir * @param subdir subdir * @param recursive recurse? * @throws IOException IO problems */ public static void assertListFilesFinds(FileSystem fs, Path dir, Path subdir, boolean recursive) throws IOException { RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, recursive); boolean found = false; int entries = 0; StringBuilder builder = new StringBuilder(); while (iterator.hasNext()) { LocatedFileStatus next = iterator.next(); entries++; builder.append(next.toString()).append('\n'); if (next.getPath().equals(subdir)) { found = true; } } assertTrue("Path " + subdir + " not found in directory " + dir + " : " + " entries=" + entries + " content" + builder.toString(), found); }
static DataStatistics publishPlainDataStatistics(Configuration conf, Path inputDir) throws IOException { FileSystem fs = inputDir.getFileSystem(conf); // obtain input data file statuses long dataSize = 0; long fileCount = 0; RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true); PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter(); while (iter.hasNext()) { LocatedFileStatus lStatus = iter.next(); if (filter.accept(lStatus.getPath())) { dataSize += lStatus.getLen(); ++fileCount; } } // publish the plain data statistics LOG.info("Total size of input data : " + StringUtils.humanReadableInt(dataSize)); LOG.info("Total number of input data files : " + fileCount); return new DataStatistics(dataSize, fileCount, false); }
@Test public void basicClientReadWrite() throws Exception { Path basePath = new Path(temporaryFolder.newFolder().getAbsolutePath()); Path path = ((PathCanonicalizer) clientFS).canonicalizePath(new Path(basePath, "testfile.bytes")); final byte[] randomBytesMoreThanBuffer = new byte[RemoteNodeFileSystem.REMOTE_WRITE_BUFFER_SIZE * 3]; Random r = new Random(); r.nextBytes(randomBytesMoreThanBuffer); try(FSDataOutputStream stream = clientFS.create(path, false)){ stream.write(randomBytesMoreThanBuffer); } RemoteIterator<LocatedFileStatus> iter = client.fileSystem.listFiles(basePath, false); assertEquals(true, iter.hasNext()); LocatedFileStatus status = iter.next(); try(FSDataInputStream in = clientFS.open(status.getPath())){ byte[] back = new byte[randomBytesMoreThanBuffer.length]; int dataRead = in.read(back); assertEquals(back.length, dataRead); assertTrue(Arrays.equals(randomBytesMoreThanBuffer, back)); } client.fileSystem.delete(status.getPath(), false); }
static SortedSet<byte []> readKeysToSearch(final Configuration conf) throws IOException, InterruptedException { Path keysInputDir = new Path(conf.get(SEARCHER_INPUTDIR_KEY)); FileSystem fs = FileSystem.get(conf); SortedSet<byte []> result = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR); if (!fs.exists(keysInputDir)) { throw new FileNotFoundException(keysInputDir.toString()); } if (!fs.isDirectory(keysInputDir)) { throw new UnsupportedOperationException("TODO"); } else { RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(keysInputDir, false); while(iterator.hasNext()) { LocatedFileStatus keyFileStatus = iterator.next(); // Skip "_SUCCESS" file. if (keyFileStatus.getPath().getName().startsWith("_")) continue; result.addAll(readFileToSearch(conf, fs, keyFileStatus)); } } return result; }
private static SortedSet<byte[]> readFileToSearch(final Configuration conf, final FileSystem fs, final LocatedFileStatus keyFileStatus) throws IOException, InterruptedException { SortedSet<byte []> result = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR); // Return entries that are flagged Counts.UNDEFINED in the value. Return the row. This is // what is missing. TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); try (SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader rr = new SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader()) { InputSplit is = new FileSplit(keyFileStatus.getPath(), 0, keyFileStatus.getLen(), new String [] {}); rr.initialize(is, context); while (rr.nextKeyValue()) { rr.getCurrentKey(); BytesWritable bw = rr.getCurrentValue(); if (Verify.VerifyReducer.whichType(bw.getBytes()) == Verify.Counts.UNDEFINED) { byte[] key = new byte[rr.getCurrentKey().getLength()]; System.arraycopy(rr.getCurrentKey().getBytes(), 0, key, 0, rr.getCurrentKey() .getLength()); result.add(key); } } } return result; }
@Test(timeout=60000) public void testListFiles() throws IOException { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { DistributedFileSystem fs = cluster.getFileSystem(); final Path relative = new Path("relative"); fs.create(new Path(relative, "foo")).close(); final List<LocatedFileStatus> retVal = new ArrayList<>(); final RemoteIterator<LocatedFileStatus> iter = fs.listFiles(relative, true); while (iter.hasNext()) { retVal.add(iter.next()); } System.out.println("retVal = " + retVal); } finally { cluster.shutdown(); } }
protected int getExtensioncount(final String hdfsstore, final String extension) throws Exception { int counter =0 ; HDFSStoreImpl hdfsStore = (HDFSStoreImpl) GemFireCacheImpl.getInstance().findHDFSStore(hdfsstore); FileSystem fs = hdfsStore.getFileSystem(); try { Path basePath = new Path(hdfsStore.getHomeDir()); RemoteIterator<LocatedFileStatus> files = fs.listFiles(basePath, true); while(files.hasNext()) { HashMap<String, String> entriesMap = new HashMap<String, String>(); LocatedFileStatus next = files.next(); if (next.getPath().getName().endsWith(extension)) counter++; } } catch (IOException e) { e.printStackTrace(); } return counter; }
@Override public LocatedFileStatus next() throws IOException { if (!hasNext()) { String msg = String.format("No more listings in [%s]", path); throw new NoSuchElementException(msg); } final MantaObject object = nextRef.getAndUpdate(stringObjectMap -> nextAcceptable()); @SuppressWarnings("unchecked") final Path nextPath = new Path(object.getPath()); final FileStatus status = new MantaFileStatus(object, nextPath); final BlockLocation[] locs; if (status.isFile()) { locs = fs.getFileBlockLocations(status, 0, status.getLen()); } else { locs = null; } return new LocatedFileStatus(status, locs); }
@Override protected RemoteIterator<LocatedFileStatus> listLocatedStatus( final Path path, final PathFilter filter) throws IOException { LOG.debug("List located status for path: {}", path); String mantaPath = mantaPath(path); if (!client.existsAndIsAccessible(mantaPath)) { throw new FileNotFoundException(mantaPath); } /* We emulate a normal filesystem by showing the home directory under root in * in order to provide compatibility with consumers that expect this behavior. */ if (mantaPath.equals(SEPARATOR)) { LocatedFileStatus singleEntry = new LocatedFileStatus(new MantaFileStatus(true, path), null); return new SingleEntryRemoteIterator<>(singleEntry); } if (!client.existsAndIsAccessible(mantaPath)) { throw new FileNotFoundException(mantaPath); } Stream<MantaObject> stream = client.listObjects(mantaPath); return new MantaRemoteIterator(filter, stream, path, this, true); }
@Override public RemoteIterator<LocatedFileStatus> listFiles(final Path path, final boolean recursive) throws IOException { LOG.debug("List files for path: {}", path); String mantaPath = mantaPath(path); if (!client.existsAndIsAccessible(mantaPath)) { throw new FileNotFoundException(mantaPath); } final Stream stream; if (recursive) { stream = client.find(mantaPath).filter(obj -> !obj.isDirectory()); } else { stream = client.listObjects(mantaPath).filter(obj -> !obj.isDirectory()); } return new MantaRemoteIterator(null, stream, new Path(mantaPath), this, true); }
@Override public List<SplitPath> getInputSplits(Configuration config, Path path, int splitSize) throws IOException { ImmutableList.Builder<SplitPath> splits = ImmutableList.builder(); RemoteIterator<LocatedFileStatus> files = listFiles(path, false); if (!files.hasNext()) { // No splits. Don't return nothing, return a single empty split String table = getTableName(_rootPath, path); return ImmutableList.of(new SplitPath(getSplitPath(_rootPath, table, getEmptySplitFileName()), 1)); } while (files.hasNext()) { LocatedFileStatus file = files.next(); splits.add(new SplitPath(file.getPath(), file.getLen())); } return splits.build(); }