public void incTrain() { setConf(); String inputPath = "../data/exampledata/LRLocalExampleData/a9a.train"; String LOCAL_FS = LocalFileSystem.DEFAULT_FS; String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp"); String loadPath = LOCAL_FS + TMP_PATH + "/model"; String savePath = LOCAL_FS + TMP_PATH + "/newmodel"; String logPath = LOCAL_FS + TMP_PATH + "/log"; // Set trainning data path conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath); // Set load model path conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, loadPath); // Set save model path conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, savePath); // Set log path conf.set(AngelConf.ANGEL_LOG_PATH, logPath); // Set actionType incremental train conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN()); LRRunner runner = new LRRunner(); runner.incTrain(conf); }
public void trainOnLocalCluster() throws Exception { setConf(); String inputPath = "../data/exampledata/LRLocalExampleData/a9a.train"; String LOCAL_FS = LocalFileSystem.DEFAULT_FS; String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp"); String savePath = LOCAL_FS + TMP_PATH + "/model"; String logPath = LOCAL_FS + TMP_PATH + "/log"; // Set trainning data path conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath); // Set save model path conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, savePath); // Set log path conf.set(AngelConf.ANGEL_LOG_PATH, logPath); // Set actionType train conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN()); LRRunner runner = new LRRunner(); runner.train(conf); }
public void predict() { setConf(); String inputPath = "../data/exampledata/LRLocalExampleData/a9a.test"; String LOCAL_FS = LocalFileSystem.DEFAULT_FS; String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp"); String loadPath = LOCAL_FS + TMP_PATH + "/model"; String savePath = LOCAL_FS + TMP_PATH + "/model"; String logPath = LOCAL_FS + TMP_PATH + "/log"; String predictPath = LOCAL_FS + TMP_PATH + "/predict"; // Set trainning data path conf.set(AngelConf.ANGEL_PREDICT_DATA_PATH, inputPath); // Set load model path conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, loadPath); // Set predict result path conf.set(AngelConf.ANGEL_PREDICT_PATH, predictPath); // Set log path conf.set(AngelConf.ANGEL_LOG_PATH, logPath); // Set actionType prediction conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT()); LRRunner runner = new LRRunner(); runner.predict(conf); }
public void trainOnLocalCluster() throws Exception { setConf(); String inputPath = "../data/exampledata/LinearRegression"; String LOCAL_FS = LocalFileSystem.DEFAULT_FS; String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp"); String logPath = "./src/test/log"; // Set trainning data path conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath); // Set save model path conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model"); // Set log path conf.set(AngelConf.ANGEL_LOG_PATH, logPath); // Set actionType train conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN()); LinearRegRunner runner = new LinearRegRunner(); runner.train(conf); }
public void incTrain() { setConf(); String inputPath = "../data/exampledata/LinearRegression/LinearReg100.train"; String LOCAL_FS = LocalFileSystem.DEFAULT_FS; String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp"); String logPath = "./src/test/log"; // Set trainning data path conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath); // Set load model path conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model"); // Set save model path conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/newmodel"); // Set actionType incremental train conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN()); // Set log path conf.set(AngelConf.ANGEL_LOG_PATH, logPath); LinearRegRunner runner = new LinearRegRunner(); runner.incTrain(conf); }
public void predict() { setConf(); String inputPath = "../data/exampledata/LinearRegression/LinearReg100.train"; String LOCAL_FS = LocalFileSystem.DEFAULT_FS; String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp"); // Set trainning data path conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath); // Set load model path conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model"); // Set predict result path conf.set(AngelConf.ANGEL_PREDICT_PATH, LOCAL_FS + TMP_PATH + "/predict"); // Set actionType prediction conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT()); LinearRegRunner runner = new LinearRegRunner(); runner.predict(conf); }
@Test public void trainOnLocalClusterTest() throws Exception { String inputPath = "./src/test/data/fm/food_fm_libsvm"; String LOCAL_FS = LocalFileSystem.DEFAULT_FS; String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp"); String savePath = LOCAL_FS + TMP_PATH + "/model"; String logPath = LOCAL_FS + TMP_PATH + "/LRlog"; // Set trainning data path conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath); // Set save model path conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, savePath); // Set log path conf.set(AngelConf.ANGEL_LOG_PATH, logPath); // Set actionType train conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN()); FMRunner runner = new FMRunner(); runner.train(conf); }
@Test public void FMClassificationTest() throws Exception { String inputPath = "./src/test/data/fm/a9a.train"; String LOCAL_FS = LocalFileSystem.DEFAULT_FS; String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp"); String savePath = LOCAL_FS + TMP_PATH + "/model"; String logPath = LOCAL_FS + TMP_PATH + "/LRlog"; // Set trainning data path conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath); // Set save model path conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, savePath); // Set log path conf.set(AngelConf.ANGEL_LOG_PATH, logPath); // Set actionType train conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN()); // Set learnType conf.set(MLConf.ML_FM_LEARN_TYPE(), "c"); // Set feature number conf.set(MLConf.ML_FEATURE_NUM(), String.valueOf(124)); FMRunner runner = new FMRunner(); runner.train(conf); }
private void trainOnLocalClusterTest() throws Exception { try { String inputPath = "./src/test/data/LinearRegression/LinearReg100.train"; String LOCAL_FS = LocalFileSystem.DEFAULT_FS; String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp"); String logPath = "./src/test/log"; // Set trainning data path conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath); // Set save model path conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model"); // Set log path conf.set(AngelConf.ANGEL_LOG_PATH, logPath); // Set actionType train conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN()); LinearRegRunner runner = new LinearRegRunner(); runner.train(conf); } catch (Exception x) { LOG.error("run trainOnLocalClusterTest failed ", x); throw x; } }
private void incTrainTest() throws Exception { try { String inputPath = "./src/test/data/LinearRegression/LinearReg100.train"; String LOCAL_FS = LocalFileSystem.DEFAULT_FS; String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp"); String logPath = "./src/test/log"; // Set trainning data path conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath); // Set load model path conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model"); // Set save model path conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/newmodel"); // Set actionType incremental train conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN()); // Set log path conf.set(AngelConf.ANGEL_LOG_PATH, logPath); LinearRegRunner runner = new LinearRegRunner(); runner.incTrain(conf); } catch (Exception x) { LOG.error("run incTrainTest failed ", x); throw x; } }
private void predictTest() throws Exception { try { String inputPath = "./src/test/data/LinearRegression/LinearReg100.train"; String LOCAL_FS = LocalFileSystem.DEFAULT_FS; String TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp"); // Set trainning data path conf.set(AngelConf.ANGEL_PREDICT_DATA_PATH, inputPath); // Set load model path conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model"); // Set predict result path conf.set(AngelConf.ANGEL_PREDICT_PATH, LOCAL_FS + TMP_PATH + "/predict"); // Set log sava path conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/LOG/log"); // Set actionType prediction conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT()); LinearRegRunner runner = new LinearRegRunner(); runner.predict(conf); } catch (Exception x) { LOG.error("run predictTest failed ", x); throw x; } }
private void _mkdirs(boolean exists, FsPermission before, FsPermission after) throws Throwable { File localDir = make(stub(File.class).returning(exists).from.exists()); when(localDir.mkdir()).thenReturn(true); Path dir = mock(Path.class); // use default stubs LocalFileSystem fs = make(stub(LocalFileSystem.class) .returning(localDir).from.pathToFile(dir)); FileStatus stat = make(stub(FileStatus.class) .returning(after).from.getPermission()); when(fs.getFileStatus(dir)).thenReturn(stat); try { DiskChecker.mkdirsWithExistsAndPermissionCheck(fs, dir, before); if (!exists) verify(fs).setPermission(dir, before); else { verify(fs).getFileStatus(dir); verify(stat).getPermission(); } } catch (DiskErrorException e) { if (before != after) assertTrue(e.getMessage().startsWith("Incorrect permission")); } }
@Test public void testCopyRecursive() throws Throwable { int expected = createTestFiles(sourceDir, 64); expectSuccess( "-s", sourceDir.toURI().toString(), "-d", destDir.toURI().toString(), "-t", "4", "-l", "3"); LocalFileSystem local = FileSystem.getLocal(new Configuration()); Set<String> entries = new TreeSet<>(); RemoteIterator<LocatedFileStatus> iterator = local.listFiles(new Path(destDir.toURI()), true); int count = 0; while (iterator.hasNext()) { LocatedFileStatus next = iterator.next(); entries.add(next.getPath().toUri().toString()); LOG.info("Entry {} size = {}", next.getPath(), next.getLen()); count++; } assertEquals("Mismatch in files found", expected, count); }
protected void open(Path dstPath, CompressionCodec codeC, CompressionType compType, Configuration conf, FileSystem hdfs) throws IOException { if (useRawLocalFileSystem) { if (hdfs instanceof LocalFileSystem) { hdfs = ((LocalFileSystem)hdfs).getRaw(); } else { logger.warn("useRawLocalFileSystem is set to true but file system " + "is not of type LocalFileSystem: " + hdfs.getClass().getName()); } } if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) { outStream = hdfs.append(dstPath); } else { outStream = hdfs.create(dstPath); } writer = SequenceFile.createWriter(conf, outStream, serializer.getKeyClass(), serializer.getValueClass(), compType, codeC); registerCurrentStream(outStream, hdfs, dstPath); }
protected void open(Path dstPath, CompressionCodec codeC, CompressionType compType, Configuration conf, FileSystem hdfs) throws IOException { if(useRawLocalFileSystem) { if(hdfs instanceof LocalFileSystem) { hdfs = ((LocalFileSystem)hdfs).getRaw(); } else { logger.warn("useRawLocalFileSystem is set to true but file system " + "is not of type LocalFileSystem: " + hdfs.getClass().getName()); } } if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile (dstPath)) { outStream = hdfs.append(dstPath); } else { outStream = hdfs.create(dstPath); } writer = SequenceFile.createWriter(conf, outStream, serializer.getKeyClass(), serializer.getValueClass(), compType, codeC); registerCurrentStream(outStream, hdfs, dstPath); }
/** * Returns a boolean to denote whether a cache file is visible to all (public) * or not * * @return true if the path in the current path is visible to all, false * otherwise */ @Private public static boolean isPublic(FileSystem fs, Path current, FileStatus sStat, LoadingCache<Path,Future<FileStatus>> statCache) throws IOException { current = fs.makeQualified(current); //the leaf level file should be readable by others if (!checkPublicPermsForAll(fs, sStat, FsAction.READ_EXECUTE, FsAction.READ)) { return false; } if (Shell.WINDOWS && fs instanceof LocalFileSystem) { // Relax the requirement for public cache on LFS on Windows since default // permissions are "700" all the way up to the drive letter. In this // model, the only requirement for a user is to give EVERYONE group // permission on the file and the file will be considered public. // This code path is only hit when fs.default.name is file:/// (mainly // in tests). return true; } return ancestorsHaveExecutePermissions(fs, current.getParent(), statCache); }
@Test(timeout = 5000l) public void testFailResultCodes() throws Exception { Configuration conf = new YarnConfiguration(); conf.setClass("fs.file.impl", LocalFileSystem.class, FileSystem.class); LogCLIHelpers cliHelper = new LogCLIHelpers(); cliHelper.setConf(conf); YarnClient mockYarnClient = createMockYarnClient(YarnApplicationState.FINISHED); LogsCLI dumper = new LogsCLIForTest(mockYarnClient); dumper.setConf(conf); // verify dumping a non-existent application's logs returns a failure code int exitCode = dumper.run( new String[] { "-applicationId", "application_0_0" } ); assertTrue("Should return an error code", exitCode != 0); // verify dumping a non-existent container log is a failure code exitCode = cliHelper.dumpAContainersLogs("application_0_0", "container_0_0", "nonexistentnode:1234", "nobody"); assertTrue("Should return an error code", exitCode != 0); }
private void copyPartitions(Path mapOutputPath, Path indexPath) throws IOException { FileSystem localFs = FileSystem.getLocal(jobConf); FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); FSDataOutputStream rawOutput = rfs.create(mapOutputPath, true, BUF_SIZE); SpillRecord spillRecord = new SpillRecord(numberOfPartitions); IndexRecord indexRecord = new IndexRecord(); for (int i = 0; i < numberOfPartitions; i++) { indexRecord.startOffset = rawOutput.getPos(); byte buffer[] = outStreams[i].toByteArray(); IFileOutputStream checksumOutput = new IFileOutputStream(rawOutput); checksumOutput.write(buffer); // Write checksum. checksumOutput.finish(); // Write index record indexRecord.rawLength = (long)buffer.length; indexRecord.partLength = rawOutput.getPos() - indexRecord.startOffset; spillRecord.putIndex(indexRecord, i); reporter.progress(); } rawOutput.close(); spillRecord.writeToFile(indexPath, jobConf); }
@Test /** * Create an IFile.Writer using GzipCodec since this code does not * have a compressor when run via the tests (ie no native libraries). */ public void testIFileWriterWithCodec() throws Exception { Configuration conf = new Configuration(); FileSystem localFs = FileSystem.getLocal(conf); FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); Path path = new Path(new Path("build/test.ifile"), "data"); DefaultCodec codec = new GzipCodec(); codec.setConf(conf); IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, rfs.create(path), Text.class, Text.class, codec, null); writer.close(); }
@Test /** Same as above but create a reader. */ public void testIFileReaderWithCodec() throws Exception { Configuration conf = new Configuration(); FileSystem localFs = FileSystem.getLocal(conf); FileSystem rfs = ((LocalFileSystem)localFs).getRaw(); Path path = new Path(new Path("build/test.ifile"), "data"); DefaultCodec codec = new GzipCodec(); codec.setConf(conf); FSDataOutputStream out = rfs.create(path); IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, out, Text.class, Text.class, codec, null); writer.close(); FSDataInputStream in = rfs.open(path); IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, in, rfs.getFileStatus(path).getLen(), codec, null); reader.close(); // test check sum byte[] ab= new byte[100]; int readed= reader.checksumIn.readWithChecksum(ab, 0, ab.length); assertEquals( readed,reader.checksumIn.getChecksum().length); }
static List<StorageLocation> checkStorageLocations( Collection<StorageLocation> dataDirs, LocalFileSystem localFS, DataNodeDiskChecker dataNodeDiskChecker) throws IOException { ArrayList<StorageLocation> locations = new ArrayList<StorageLocation>(); StringBuilder invalidDirs = new StringBuilder(); for (StorageLocation location : dataDirs) { final URI uri = location.getUri(); try { dataNodeDiskChecker.checkDir(localFS, new Path(uri)); locations.add(location); } catch (IOException ioe) { LOG.warn("Invalid " + DFS_DATANODE_DATA_DIR_KEY + " " + location.getFile() + " : ", ioe); invalidDirs.append("\"").append(uri.getPath()).append("\" "); } } if (locations.size() == 0) { throw new IOException("All directories in " + DFS_DATANODE_DATA_DIR_KEY + " are invalid: " + invalidDirs); } return locations; }
@Test (timeout = 30000) public void testDataDirValidation() throws Throwable { DataNodeDiskChecker diskChecker = mock(DataNodeDiskChecker.class); doThrow(new IOException()).doThrow(new IOException()).doNothing() .when(diskChecker).checkDir(any(LocalFileSystem.class), any(Path.class)); LocalFileSystem fs = mock(LocalFileSystem.class); AbstractList<StorageLocation> locations = new ArrayList<StorageLocation>(); locations.add(StorageLocation.parse("file:/p1/")); locations.add(StorageLocation.parse("file:/p2/")); locations.add(StorageLocation.parse("file:/p3/")); List<StorageLocation> checkedLocations = DataNode.checkStorageLocations(locations, fs, diskChecker); assertEquals("number of valid data dirs", 1, checkedLocations.size()); String validDir = checkedLocations.iterator().next().getFile().getPath(); assertThat("p3 should be valid", new File("/p3/").getPath(), is(validDir)); }
/** * Simulate the {@link DFSConfigKeys#DFS_DATANODE_DATA_DIR_KEY} of a * populated DFS filesystem. * This method populates for each parent directory, <code>parent/dirName</code> * with the content of block pool storage directory that comes from a singleton * datanode master (that contains version and block files). If the destination * directory does not exist, it will be created. If the directory already * exists, it will first be deleted. * * @param parents parent directory where {@code dirName} is created * @param dirName directory under which storage directory is created * @param bpid block pool id for which the storage directory is created. * @return the array of created directories */ public static File[] createBlockPoolStorageDirs(String[] parents, String dirName, String bpid) throws Exception { File[] retVal = new File[parents.length]; Path bpCurDir = new Path(MiniDFSCluster.getBPDir(datanodeStorage, bpid, Storage.STORAGE_DIR_CURRENT)); for (int i = 0; i < parents.length; i++) { File newDir = new File(parents[i] + "/current/" + bpid, dirName); createEmptyDirs(new String[] {newDir.toString()}); LocalFileSystem localFS = FileSystem.getLocal(new HdfsConfiguration()); localFS.copyToLocalFile(bpCurDir, new Path(newDir.toString()), false); retVal[i] = newDir; } return retVal; }
/** * check the counters to see whether the task has exceeded any configured * limits. * @throws TaskLimitException */ protected void checkTaskLimits() throws TaskLimitException { // check the limit for writing to local file system long limit = conf.getLong(MRJobConfig.TASK_LOCAL_WRITE_LIMIT_BYTES, MRJobConfig.DEFAULT_TASK_LOCAL_WRITE_LIMIT_BYTES); if (limit >= 0) { Counters.Counter localWritesCounter = null; try { LocalFileSystem localFS = FileSystem.getLocal(conf); localWritesCounter = counters.findCounter(localFS.getScheme(), FileSystemCounter.BYTES_WRITTEN); } catch (IOException e) { LOG.warn("Could not get LocalFileSystem BYTES_WRITTEN counter"); } if (localWritesCounter != null && localWritesCounter.getCounter() > limit) { throw new TaskLimitException("too much write to local file system." + " current value is " + localWritesCounter.getCounter() + " the limit is " + limit); } } }
/** * Create physical directory for block pools on the data node * * @param dataDirs * List of data directories * @param conf * Configuration instance to use. * @throws IOException on errors */ static void makeBlockPoolDataDir(Collection<File> dataDirs, Configuration conf) throws IOException { if (conf == null) conf = new HdfsConfiguration(); LocalFileSystem localFS = FileSystem.getLocal(conf); FsPermission permission = new FsPermission(conf.get( DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_KEY, DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_DEFAULT)); for (File data : dataDirs) { try { DiskChecker.checkDir(localFS, new Path(data.toURI()), permission); } catch ( IOException e ) { LOG.warn("Invalid directory in: " + data.getCanonicalPath() + ": " + e.getMessage()); } } }
@Test(timeout = 30000) public void testDataDirValidation() throws Throwable { DataNodeDiskChecker diskChecker = mock(DataNodeDiskChecker.class); doThrow(new IOException()).doThrow(new IOException()).doNothing() .when(diskChecker) .checkDir(any(LocalFileSystem.class), any(Path.class)); LocalFileSystem fs = mock(LocalFileSystem.class); AbstractList<StorageLocation> locations = new ArrayList<StorageLocation>(); locations.add(StorageLocation.parse("file:/p1/")); locations.add(StorageLocation.parse("file:/p2/")); locations.add(StorageLocation.parse("file:/p3/")); List<StorageLocation> checkedLocations = DataNode.checkStorageLocations(locations, fs, diskChecker); assertEquals("number of valid data dirs", 1, checkedLocations.size()); String validDir = checkedLocations.iterator().next().getFile().getPath(); assertThat("p3 should be valid", new File("/p3/").getPath(), is(validDir)); }