/// <exception cref="Org.Apache.Hadoop.Mapred.FileAlreadyExistsException"/> /// <exception cref="Org.Apache.Hadoop.Mapred.InvalidJobConfException"/> /// <exception cref="System.IO.IOException"/> public virtual void CheckOutputSpecs(FileSystem ignored, JobConf job) { // Ensure that the output directory is set and not already there Path outDir = GetOutputPath(job); if (outDir == null && job.GetNumReduceTasks() != 0) { throw new InvalidJobConfException("Output directory not set in JobConf."); } if (outDir != null) { FileSystem fs = outDir.GetFileSystem(job); // normalize the output directory outDir = fs.MakeQualified(outDir); SetOutputPath(job, outDir); // get delegation token for the outDir's file system TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), new Path[] { outDir }, job); // check its existence if (fs.Exists(outDir)) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists" ); } } }
/// <summary>List input directories.</summary> /// <remarks> /// List input directories. /// Subclasses may override to, e.g., select only files matching a regular /// expression. /// </remarks> /// <param name="job">the job to list input paths for</param> /// <returns>array of FileStatus objects</returns> /// <exception cref="System.IO.IOException">if zero items.</exception> protected internal virtual FileStatus[] ListStatus(JobConf job) { Path[] dirs = GetInputPaths(job); if (dirs.Length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), dirs, job); // Whether we need to recursive look into the directory structure bool recursive = job.GetBoolean(InputDirRecursive, false); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). IList <PathFilter> filters = new AList <PathFilter>(); filters.AddItem(hiddenFileFilter); PathFilter jobFilter = GetInputPathFilter(job); if (jobFilter != null) { filters.AddItem(jobFilter); } PathFilter inputFilter = new FileInputFormat.MultiPathFilter(filters); FileStatus[] result; int numThreads = job.GetInt(FileInputFormat.ListStatusNumThreads, FileInputFormat .DefaultListStatusNumThreads); StopWatch sw = new StopWatch().Start(); if (numThreads == 1) { IList <FileStatus> locatedFiles = SingleThreadedListStatus(job, dirs, inputFilter, recursive); result = Sharpen.Collections.ToArray(locatedFiles, new FileStatus[locatedFiles.Count ]); } else { IEnumerable <FileStatus> locatedFiles = null; try { LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher( job, dirs, recursive, inputFilter, false); locatedFiles = locatedFileStatusFetcher.GetFileStatuses(); } catch (Exception) { throw new IOException("Interrupted while getting file statuses"); } result = Iterables.ToArray <FileStatus>(locatedFiles); } sw.Stop(); if (Log.IsDebugEnabled()) { Log.Debug("Time taken to get FileStatuses: " + sw.Now(TimeUnit.Milliseconds)); } Log.Info("Total input paths to process : " + result.Length); return(result); }
public virtual void TestPipesReduser() { FilePath[] psw = CleanTokenPasswordFile(); JobConf conf = new JobConf(); try { Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = new Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier>(Sharpen.Runtime.GetBytesForString("user"), Sharpen.Runtime.GetBytesForString ("password"), new Text("kind"), new Text("service")); TokenCache.SetJobToken(token, conf.GetCredentials()); FilePath fCommand = GetFileCommand("org.apache.hadoop.mapred.pipes.PipeReducerStub" ); conf.Set(MRJobConfig.CacheLocalfiles, fCommand.GetAbsolutePath()); PipesReducer <BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer <BooleanWritable, Text, IntWritable, Text>(); reducer.Configure(conf); BooleanWritable bw = new BooleanWritable(true); conf.Set(MRJobConfig.TaskAttemptId, taskName); InitStdOut(conf); conf.SetBoolean(MRJobConfig.SkipRecords, true); TestPipeApplication.CombineOutputCollector <IntWritable, Text> output = new TestPipeApplication.CombineOutputCollector <IntWritable, Text>(this, new Counters.Counter(), new TestPipeApplication.Progress (this)); Reporter reporter = new TestPipeApplication.TestTaskReporter(this); IList <Text> texts = new AList <Text>(); texts.AddItem(new Text("first")); texts.AddItem(new Text("second")); texts.AddItem(new Text("third")); reducer.Reduce(bw, texts.GetEnumerator(), output, reporter); reducer.Close(); string stdOut = ReadStdOut(conf); // test data: key NUnit.Framework.Assert.IsTrue(stdOut.Contains("reducer key :true")); // and values NUnit.Framework.Assert.IsTrue(stdOut.Contains("reduce value :first")); NUnit.Framework.Assert.IsTrue(stdOut.Contains("reduce value :second")); NUnit.Framework.Assert.IsTrue(stdOut.Contains("reduce value :third")); } finally { if (psw != null) { // remove password files foreach (FilePath file in psw) { file.DeleteOnExit(); } } } }
public virtual void TestMRAppMasterCredentials() { Logger rootLogger = LogManager.GetRootLogger(); rootLogger.SetLevel(Level.Debug); // Simulate credentials passed to AM via client->RM->NM Credentials credentials = new Credentials(); byte[] identifier = Sharpen.Runtime.GetBytesForString("MyIdentifier"); byte[] password = Sharpen.Runtime.GetBytesForString("MyPassword"); Text kind = new Text("MyTokenKind"); Text service = new Text("host:port"); Org.Apache.Hadoop.Security.Token.Token<TokenIdentifier> myToken = new Org.Apache.Hadoop.Security.Token.Token <TokenIdentifier>(identifier, password, kind, service); Text tokenAlias = new Text("myToken"); credentials.AddToken(tokenAlias, myToken); Text appTokenService = new Text("localhost:0"); Org.Apache.Hadoop.Security.Token.Token<AMRMTokenIdentifier> appToken = new Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier>(identifier, password, AMRMTokenIdentifier.KindName, appTokenService ); credentials.AddToken(appTokenService, appToken); Text keyAlias = new Text("mySecretKeyAlias"); credentials.AddSecretKey(keyAlias, Sharpen.Runtime.GetBytesForString("mySecretKey" )); Org.Apache.Hadoop.Security.Token.Token<TokenIdentifier> storedToken = credentials .GetToken(tokenAlias); JobConf conf = new JobConf(); Path tokenFilePath = new Path(testDir.GetAbsolutePath(), "tokens-file"); IDictionary<string, string> newEnv = new Dictionary<string, string>(); newEnv[UserGroupInformation.HadoopTokenFileLocation] = tokenFilePath.ToUri().GetPath (); SetNewEnvironmentHack(newEnv); credentials.WriteTokenStorageFile(tokenFilePath, conf); ApplicationId appId = ApplicationId.NewInstance(12345, 56); ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.NewInstance(appId , 1); ContainerId containerId = ContainerId.NewContainerId(applicationAttemptId, 546); string userName = UserGroupInformation.GetCurrentUser().GetShortUserName(); // Create staging dir, so MRAppMaster doesn't barf. FilePath stagingDir = new FilePath(MRApps.GetStagingAreaDir(conf, userName).ToString ()); stagingDir.Mkdirs(); // Set login-user to null as that is how real world MRApp starts with. // This is null is the reason why token-file is read by UGI. UserGroupInformation.SetLoginUser(null); MRAppMasterTest appMaster = new MRAppMasterTest(applicationAttemptId, containerId , "host", -1, -1, Runtime.CurrentTimeMillis(), false, true); MRAppMaster.InitAndStartAppMaster(appMaster, conf, userName); // Now validate the task credentials Credentials appMasterCreds = appMaster.GetCredentials(); NUnit.Framework.Assert.IsNotNull(appMasterCreds); NUnit.Framework.Assert.AreEqual(1, appMasterCreds.NumberOfSecretKeys()); NUnit.Framework.Assert.AreEqual(1, appMasterCreds.NumberOfTokens()); // Validate the tokens - app token should not be present Org.Apache.Hadoop.Security.Token.Token<TokenIdentifier> usedToken = appMasterCreds .GetToken(tokenAlias); NUnit.Framework.Assert.IsNotNull(usedToken); NUnit.Framework.Assert.AreEqual(storedToken, usedToken); // Validate the keys byte[] usedKey = appMasterCreds.GetSecretKey(keyAlias); NUnit.Framework.Assert.IsNotNull(usedKey); NUnit.Framework.Assert.AreEqual("mySecretKey", Sharpen.Runtime.GetStringForBytes( usedKey)); // The credentials should also be added to conf so that OuputCommitter can // access it - app token should not be present Credentials confCredentials = conf.GetCredentials(); NUnit.Framework.Assert.AreEqual(1, confCredentials.NumberOfSecretKeys()); NUnit.Framework.Assert.AreEqual(1, confCredentials.NumberOfTokens()); NUnit.Framework.Assert.AreEqual(storedToken, confCredentials.GetToken(tokenAlias) ); NUnit.Framework.Assert.AreEqual("mySecretKey", Sharpen.Runtime.GetStringForBytes( confCredentials.GetSecretKey(keyAlias))); // Verify the AM's ugi - app token should be present Credentials ugiCredentials = appMaster.GetUgi().GetCredentials(); NUnit.Framework.Assert.AreEqual(1, ugiCredentials.NumberOfSecretKeys()); NUnit.Framework.Assert.AreEqual(2, ugiCredentials.NumberOfTokens()); NUnit.Framework.Assert.AreEqual(storedToken, ugiCredentials.GetToken(tokenAlias)); NUnit.Framework.Assert.AreEqual(appToken, ugiCredentials.GetToken(appTokenService )); NUnit.Framework.Assert.AreEqual("mySecretKey", Sharpen.Runtime.GetStringForBytes( ugiCredentials.GetSecretKey(keyAlias))); }
public virtual void TestApplication() { JobConf conf = new JobConf(); RecordReader <FloatWritable, NullWritable> rReader = new TestPipeApplication.Reader (this); // client for test FilePath fCommand = GetFileCommand("org.apache.hadoop.mapred.pipes.PipeApplicationStub" ); TestPipeApplication.TestTaskReporter reporter = new TestPipeApplication.TestTaskReporter (this); FilePath[] psw = CleanTokenPasswordFile(); try { conf.Set(MRJobConfig.TaskAttemptId, taskName); conf.Set(MRJobConfig.CacheLocalfiles, fCommand.GetAbsolutePath()); // token for authorization Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = new Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier>(Sharpen.Runtime.GetBytesForString("user"), Sharpen.Runtime.GetBytesForString ("password"), new Text("kind"), new Text("service")); TokenCache.SetJobToken(token, conf.GetCredentials()); TestPipeApplication.FakeCollector output = new TestPipeApplication.FakeCollector( this, new Counters.Counter(), new TestPipeApplication.Progress(this)); FileSystem fs = new RawLocalFileSystem(); fs.SetConf(conf); IFile.Writer <IntWritable, Text> wr = new IFile.Writer <IntWritable, Text>(conf, fs .Create(new Path(workSpace.GetAbsolutePath() + FilePath.separator + "outfile")), typeof(IntWritable), typeof(Text), null, null, true); output.SetWriter(wr); conf.Set(Submitter.PreserveCommandfile, "true"); InitStdOut(conf); Application <WritableComparable <IntWritable>, Writable, IntWritable, Text> application = new Application <WritableComparable <IntWritable>, Writable, IntWritable, Text> (conf, rReader, output, reporter, typeof(IntWritable), typeof(Text)); application.GetDownlink().Flush(); application.GetDownlink().MapItem(new IntWritable(3), new Text("txt")); application.GetDownlink().Flush(); application.WaitForFinish(); wr.Close(); // test getDownlink().mapItem(); string stdOut = ReadStdOut(conf); NUnit.Framework.Assert.IsTrue(stdOut.Contains("key:3")); NUnit.Framework.Assert.IsTrue(stdOut.Contains("value:txt")); // reporter test counter, and status should be sended // test MessageType.REGISTER_COUNTER and INCREMENT_COUNTER NUnit.Framework.Assert.AreEqual(1.0, reporter.GetProgress(), 0.01); NUnit.Framework.Assert.IsNotNull(reporter.GetCounter("group", "name")); // test status MessageType.STATUS NUnit.Framework.Assert.AreEqual(reporter.GetStatus(), "PROGRESS"); stdOut = ReadFile(new FilePath(workSpace.GetAbsolutePath() + FilePath.separator + "outfile")); // check MessageType.PROGRESS NUnit.Framework.Assert.AreEqual(0.55f, rReader.GetProgress(), 0.001); application.GetDownlink().Close(); // test MessageType.OUTPUT KeyValuePair <IntWritable, Text> entry = output.GetCollect().GetEnumerator().Next( ); NUnit.Framework.Assert.AreEqual(123, entry.Key.Get()); NUnit.Framework.Assert.AreEqual("value", entry.Value.ToString()); try { // try to abort application.Abort(new Exception()); NUnit.Framework.Assert.Fail(); } catch (IOException e) { // abort works ? NUnit.Framework.Assert.AreEqual("pipe child exception", e.Message); } } finally { if (psw != null) { // remove password files foreach (FilePath file in psw) { file.DeleteOnExit(); } } } }
public virtual void TestRunner() { // clean old password files FilePath[] psw = CleanTokenPasswordFile(); try { RecordReader <FloatWritable, NullWritable> rReader = new TestPipeApplication.ReaderPipesMapRunner (this); JobConf conf = new JobConf(); conf.Set(Submitter.IsJavaRr, "true"); // for stdour and stderror conf.Set(MRJobConfig.TaskAttemptId, taskName); TestPipeApplication.CombineOutputCollector <IntWritable, Text> output = new TestPipeApplication.CombineOutputCollector <IntWritable, Text>(this, new Counters.Counter(), new TestPipeApplication.Progress (this)); FileSystem fs = new RawLocalFileSystem(); fs.SetConf(conf); IFile.Writer <IntWritable, Text> wr = new IFile.Writer <IntWritable, Text>(conf, fs .Create(new Path(workSpace + FilePath.separator + "outfile")), typeof(IntWritable ), typeof(Text), null, null, true); output.SetWriter(wr); // stub for client FilePath fCommand = GetFileCommand("org.apache.hadoop.mapred.pipes.PipeApplicationRunnableStub" ); conf.Set(MRJobConfig.CacheLocalfiles, fCommand.GetAbsolutePath()); // token for authorization Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = new Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier>(Sharpen.Runtime.GetBytesForString("user"), Sharpen.Runtime.GetBytesForString ("password"), new Text("kind"), new Text("service")); TokenCache.SetJobToken(token, conf.GetCredentials()); conf.SetBoolean(MRJobConfig.SkipRecords, true); TestPipeApplication.TestTaskReporter reporter = new TestPipeApplication.TestTaskReporter (this); PipesMapRunner <FloatWritable, NullWritable, IntWritable, Text> runner = new PipesMapRunner <FloatWritable, NullWritable, IntWritable, Text>(); InitStdOut(conf); runner.Configure(conf); runner.Run(rReader, output, reporter); string stdOut = ReadStdOut(conf); // test part of translated data. As common file for client and test - // clients stdOut // check version NUnit.Framework.Assert.IsTrue(stdOut.Contains("CURRENT_PROTOCOL_VERSION:0")); // check key and value classes NUnit.Framework.Assert.IsTrue(stdOut.Contains("Key class:org.apache.hadoop.io.FloatWritable" )); NUnit.Framework.Assert.IsTrue(stdOut.Contains("Value class:org.apache.hadoop.io.NullWritable" )); // test have sent all data from reader NUnit.Framework.Assert.IsTrue(stdOut.Contains("value:0.0")); NUnit.Framework.Assert.IsTrue(stdOut.Contains("value:9.0")); } finally { if (psw != null) { // remove password files foreach (FilePath file in psw) { file.DeleteOnExit(); } } } }
/// <summary>Start the child process to handle the task for us.</summary> /// <param name="conf">the task's configuration</param> /// <param name="recordReader">the fake record reader to update progress with</param> /// <param name="output">the collector to send output to</param> /// <param name="reporter">the reporter for the task</param> /// <param name="outputKeyClass">the class of the output keys</param> /// <param name="outputValueClass">the class of the output values</param> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal Application(JobConf conf, RecordReader <FloatWritable, NullWritable> recordReader , OutputCollector <K2, V2> output, Reporter reporter, Type outputKeyClass, Type outputValueClass ) { serverSocket = Sharpen.Extensions.CreateServerSocket(0); IDictionary <string, string> env = new Dictionary <string, string>(); // add TMPDIR environment variable with the value of java.io.tmpdir env["TMPDIR"] = Runtime.GetProperty("java.io.tmpdir"); env[Submitter.Port] = Sharpen.Extensions.ToString(serverSocket.GetLocalPort()); //Add token to the environment if security is enabled Org.Apache.Hadoop.Security.Token.Token <JobTokenIdentifier> jobToken = TokenCache. GetJobToken(conf.GetCredentials()); // This password is used as shared secret key between this application and // child pipes process byte[] password = jobToken.GetPassword(); string localPasswordFile = new FilePath(".") + Path.Separator + "jobTokenPassword"; WritePasswordToLocalFile(localPasswordFile, password, conf); env["hadoop.pipes.shared.secret.location"] = localPasswordFile; IList <string> cmd = new AList <string>(); string interpretor = conf.Get(Submitter.Interpretor); if (interpretor != null) { cmd.AddItem(interpretor); } string executable = DistributedCache.GetLocalCacheFiles(conf)[0].ToString(); if (!FileUtil.CanExecute(new FilePath(executable))) { // LinuxTaskController sets +x permissions on all distcache files already. // In case of DefaultTaskController, set permissions here. FileUtil.Chmod(executable, "u+x"); } cmd.AddItem(executable); // wrap the command in a stdout/stderr capture // we are starting map/reduce task of the pipes job. this is not a cleanup // attempt. TaskAttemptID taskid = ((TaskAttemptID)TaskAttemptID.ForName(conf.Get(MRJobConfig .TaskAttemptId))); FilePath stdout = TaskLog.GetTaskLogFile(taskid, false, TaskLog.LogName.Stdout); FilePath stderr = TaskLog.GetTaskLogFile(taskid, false, TaskLog.LogName.Stderr); long logLength = TaskLog.GetTaskLogLength(conf); cmd = TaskLog.CaptureOutAndError(null, cmd, stdout, stderr, logLength, false); process = RunClient(cmd, env); clientSocket = serverSocket.Accept(); string challenge = GetSecurityChallenge(); string digestToSend = CreateDigest(password, challenge); string digestExpected = CreateDigest(password, digestToSend); handler = new OutputHandler <K2, V2>(output, reporter, recordReader, digestExpected ); K2 outputKey = (K2)ReflectionUtils.NewInstance(outputKeyClass, conf); V2 outputValue = (V2)ReflectionUtils.NewInstance(outputValueClass, conf); downlink = new BinaryProtocol <K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue , conf); downlink.Authenticate(digestToSend, challenge); WaitForAuthentication(); Log.Debug("Authentication succeeded"); downlink.Start(); downlink.SetJobConf(conf); }