Пример #1
0
        /// <exception cref="Org.Apache.Hadoop.Mapred.FileAlreadyExistsException"/>
        /// <exception cref="Org.Apache.Hadoop.Mapred.InvalidJobConfException"/>
        /// <exception cref="System.IO.IOException"/>
        public virtual void CheckOutputSpecs(FileSystem ignored, JobConf job)
        {
            // Ensure that the output directory is set and not already there
            Path outDir = GetOutputPath(job);

            if (outDir == null && job.GetNumReduceTasks() != 0)
            {
                throw new InvalidJobConfException("Output directory not set in JobConf.");
            }
            if (outDir != null)
            {
                FileSystem fs = outDir.GetFileSystem(job);
                // normalize the output directory
                outDir = fs.MakeQualified(outDir);
                SetOutputPath(job, outDir);
                // get delegation token for the outDir's file system
                TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), new Path[] { outDir },
                                                    job);
                // check its existence
                if (fs.Exists(outDir))
                {
                    throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"
                                                         );
                }
            }
        }
Пример #2
0
        /// <summary>List input directories.</summary>
        /// <remarks>
        /// List input directories.
        /// Subclasses may override to, e.g., select only files matching a regular
        /// expression.
        /// </remarks>
        /// <param name="job">the job to list input paths for</param>
        /// <returns>array of FileStatus objects</returns>
        /// <exception cref="System.IO.IOException">if zero items.</exception>
        protected internal virtual FileStatus[] ListStatus(JobConf job)
        {
            Path[] dirs = GetInputPaths(job);
            if (dirs.Length == 0)
            {
                throw new IOException("No input paths specified in job");
            }
            // get tokens for all the required FileSystems..
            TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), dirs, job);
            // Whether we need to recursive look into the directory structure
            bool recursive = job.GetBoolean(InputDirRecursive, false);
            // creates a MultiPathFilter with the hiddenFileFilter and the
            // user provided one (if any).
            IList <PathFilter> filters = new AList <PathFilter>();

            filters.AddItem(hiddenFileFilter);
            PathFilter jobFilter = GetInputPathFilter(job);

            if (jobFilter != null)
            {
                filters.AddItem(jobFilter);
            }
            PathFilter inputFilter = new FileInputFormat.MultiPathFilter(filters);

            FileStatus[] result;
            int          numThreads = job.GetInt(FileInputFormat.ListStatusNumThreads, FileInputFormat
                                                 .DefaultListStatusNumThreads);
            StopWatch sw = new StopWatch().Start();

            if (numThreads == 1)
            {
                IList <FileStatus> locatedFiles = SingleThreadedListStatus(job, dirs, inputFilter,
                                                                           recursive);
                result = Sharpen.Collections.ToArray(locatedFiles, new FileStatus[locatedFiles.Count
                                                     ]);
            }
            else
            {
                IEnumerable <FileStatus> locatedFiles = null;
                try
                {
                    LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
                        job, dirs, recursive, inputFilter, false);
                    locatedFiles = locatedFileStatusFetcher.GetFileStatuses();
                }
                catch (Exception)
                {
                    throw new IOException("Interrupted while getting file statuses");
                }
                result = Iterables.ToArray <FileStatus>(locatedFiles);
            }
            sw.Stop();
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Time taken to get FileStatuses: " + sw.Now(TimeUnit.Milliseconds));
            }
            Log.Info("Total input paths to process : " + result.Length);
            return(result);
        }
Пример #3
0
        public virtual void TestPipesReduser()
        {
            FilePath[] psw  = CleanTokenPasswordFile();
            JobConf    conf = new JobConf();

            try
            {
                Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = new Org.Apache.Hadoop.Security.Token.Token
                                                                                     <AMRMTokenIdentifier>(Sharpen.Runtime.GetBytesForString("user"), Sharpen.Runtime.GetBytesForString
                                                                                                               ("password"), new Text("kind"), new Text("service"));
                TokenCache.SetJobToken(token, conf.GetCredentials());
                FilePath fCommand = GetFileCommand("org.apache.hadoop.mapred.pipes.PipeReducerStub"
                                                   );
                conf.Set(MRJobConfig.CacheLocalfiles, fCommand.GetAbsolutePath());
                PipesReducer <BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer
                                                                                  <BooleanWritable, Text, IntWritable, Text>();
                reducer.Configure(conf);
                BooleanWritable bw = new BooleanWritable(true);
                conf.Set(MRJobConfig.TaskAttemptId, taskName);
                InitStdOut(conf);
                conf.SetBoolean(MRJobConfig.SkipRecords, true);
                TestPipeApplication.CombineOutputCollector <IntWritable, Text> output = new TestPipeApplication.CombineOutputCollector
                                                                                        <IntWritable, Text>(this, new Counters.Counter(), new TestPipeApplication.Progress
                                                                                                                (this));
                Reporter     reporter = new TestPipeApplication.TestTaskReporter(this);
                IList <Text> texts    = new AList <Text>();
                texts.AddItem(new Text("first"));
                texts.AddItem(new Text("second"));
                texts.AddItem(new Text("third"));
                reducer.Reduce(bw, texts.GetEnumerator(), output, reporter);
                reducer.Close();
                string stdOut = ReadStdOut(conf);
                // test data: key
                NUnit.Framework.Assert.IsTrue(stdOut.Contains("reducer key :true"));
                // and values
                NUnit.Framework.Assert.IsTrue(stdOut.Contains("reduce value  :first"));
                NUnit.Framework.Assert.IsTrue(stdOut.Contains("reduce value  :second"));
                NUnit.Framework.Assert.IsTrue(stdOut.Contains("reduce value  :third"));
            }
            finally
            {
                if (psw != null)
                {
                    // remove password files
                    foreach (FilePath file in psw)
                    {
                        file.DeleteOnExit();
                    }
                }
            }
        }
Пример #4
0
		public virtual void TestMRAppMasterCredentials()
		{
			Logger rootLogger = LogManager.GetRootLogger();
			rootLogger.SetLevel(Level.Debug);
			// Simulate credentials passed to AM via client->RM->NM
			Credentials credentials = new Credentials();
			byte[] identifier = Sharpen.Runtime.GetBytesForString("MyIdentifier");
			byte[] password = Sharpen.Runtime.GetBytesForString("MyPassword");
			Text kind = new Text("MyTokenKind");
			Text service = new Text("host:port");
			Org.Apache.Hadoop.Security.Token.Token<TokenIdentifier> myToken = new Org.Apache.Hadoop.Security.Token.Token
				<TokenIdentifier>(identifier, password, kind, service);
			Text tokenAlias = new Text("myToken");
			credentials.AddToken(tokenAlias, myToken);
			Text appTokenService = new Text("localhost:0");
			Org.Apache.Hadoop.Security.Token.Token<AMRMTokenIdentifier> appToken = new Org.Apache.Hadoop.Security.Token.Token
				<AMRMTokenIdentifier>(identifier, password, AMRMTokenIdentifier.KindName, appTokenService
				);
			credentials.AddToken(appTokenService, appToken);
			Text keyAlias = new Text("mySecretKeyAlias");
			credentials.AddSecretKey(keyAlias, Sharpen.Runtime.GetBytesForString("mySecretKey"
				));
			Org.Apache.Hadoop.Security.Token.Token<TokenIdentifier> storedToken = credentials
				.GetToken(tokenAlias);
			JobConf conf = new JobConf();
			Path tokenFilePath = new Path(testDir.GetAbsolutePath(), "tokens-file");
			IDictionary<string, string> newEnv = new Dictionary<string, string>();
			newEnv[UserGroupInformation.HadoopTokenFileLocation] = tokenFilePath.ToUri().GetPath
				();
			SetNewEnvironmentHack(newEnv);
			credentials.WriteTokenStorageFile(tokenFilePath, conf);
			ApplicationId appId = ApplicationId.NewInstance(12345, 56);
			ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.NewInstance(appId
				, 1);
			ContainerId containerId = ContainerId.NewContainerId(applicationAttemptId, 546);
			string userName = UserGroupInformation.GetCurrentUser().GetShortUserName();
			// Create staging dir, so MRAppMaster doesn't barf.
			FilePath stagingDir = new FilePath(MRApps.GetStagingAreaDir(conf, userName).ToString
				());
			stagingDir.Mkdirs();
			// Set login-user to null as that is how real world MRApp starts with.
			// This is null is the reason why token-file is read by UGI.
			UserGroupInformation.SetLoginUser(null);
			MRAppMasterTest appMaster = new MRAppMasterTest(applicationAttemptId, containerId
				, "host", -1, -1, Runtime.CurrentTimeMillis(), false, true);
			MRAppMaster.InitAndStartAppMaster(appMaster, conf, userName);
			// Now validate the task credentials
			Credentials appMasterCreds = appMaster.GetCredentials();
			NUnit.Framework.Assert.IsNotNull(appMasterCreds);
			NUnit.Framework.Assert.AreEqual(1, appMasterCreds.NumberOfSecretKeys());
			NUnit.Framework.Assert.AreEqual(1, appMasterCreds.NumberOfTokens());
			// Validate the tokens - app token should not be present
			Org.Apache.Hadoop.Security.Token.Token<TokenIdentifier> usedToken = appMasterCreds
				.GetToken(tokenAlias);
			NUnit.Framework.Assert.IsNotNull(usedToken);
			NUnit.Framework.Assert.AreEqual(storedToken, usedToken);
			// Validate the keys
			byte[] usedKey = appMasterCreds.GetSecretKey(keyAlias);
			NUnit.Framework.Assert.IsNotNull(usedKey);
			NUnit.Framework.Assert.AreEqual("mySecretKey", Sharpen.Runtime.GetStringForBytes(
				usedKey));
			// The credentials should also be added to conf so that OuputCommitter can
			// access it - app token should not be present
			Credentials confCredentials = conf.GetCredentials();
			NUnit.Framework.Assert.AreEqual(1, confCredentials.NumberOfSecretKeys());
			NUnit.Framework.Assert.AreEqual(1, confCredentials.NumberOfTokens());
			NUnit.Framework.Assert.AreEqual(storedToken, confCredentials.GetToken(tokenAlias)
				);
			NUnit.Framework.Assert.AreEqual("mySecretKey", Sharpen.Runtime.GetStringForBytes(
				confCredentials.GetSecretKey(keyAlias)));
			// Verify the AM's ugi - app token should be present
			Credentials ugiCredentials = appMaster.GetUgi().GetCredentials();
			NUnit.Framework.Assert.AreEqual(1, ugiCredentials.NumberOfSecretKeys());
			NUnit.Framework.Assert.AreEqual(2, ugiCredentials.NumberOfTokens());
			NUnit.Framework.Assert.AreEqual(storedToken, ugiCredentials.GetToken(tokenAlias));
			NUnit.Framework.Assert.AreEqual(appToken, ugiCredentials.GetToken(appTokenService
				));
			NUnit.Framework.Assert.AreEqual("mySecretKey", Sharpen.Runtime.GetStringForBytes(
				ugiCredentials.GetSecretKey(keyAlias)));
		}
Пример #5
0
        public virtual void TestApplication()
        {
            JobConf conf = new JobConf();
            RecordReader <FloatWritable, NullWritable> rReader = new TestPipeApplication.Reader
                                                                     (this);
            // client for test
            FilePath fCommand = GetFileCommand("org.apache.hadoop.mapred.pipes.PipeApplicationStub"
                                               );

            TestPipeApplication.TestTaskReporter reporter = new TestPipeApplication.TestTaskReporter
                                                                (this);
            FilePath[] psw = CleanTokenPasswordFile();
            try
            {
                conf.Set(MRJobConfig.TaskAttemptId, taskName);
                conf.Set(MRJobConfig.CacheLocalfiles, fCommand.GetAbsolutePath());
                // token for authorization
                Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = new Org.Apache.Hadoop.Security.Token.Token
                                                                                     <AMRMTokenIdentifier>(Sharpen.Runtime.GetBytesForString("user"), Sharpen.Runtime.GetBytesForString
                                                                                                               ("password"), new Text("kind"), new Text("service"));
                TokenCache.SetJobToken(token, conf.GetCredentials());
                TestPipeApplication.FakeCollector output = new TestPipeApplication.FakeCollector(
                    this, new Counters.Counter(), new TestPipeApplication.Progress(this));
                FileSystem fs = new RawLocalFileSystem();
                fs.SetConf(conf);
                IFile.Writer <IntWritable, Text> wr = new IFile.Writer <IntWritable, Text>(conf, fs
                                                                                           .Create(new Path(workSpace.GetAbsolutePath() + FilePath.separator + "outfile")),
                                                                                           typeof(IntWritable), typeof(Text), null, null, true);
                output.SetWriter(wr);
                conf.Set(Submitter.PreserveCommandfile, "true");
                InitStdOut(conf);
                Application <WritableComparable <IntWritable>, Writable, IntWritable, Text> application
                    = new Application <WritableComparable <IntWritable>, Writable, IntWritable, Text>
                          (conf, rReader, output, reporter, typeof(IntWritable), typeof(Text));
                application.GetDownlink().Flush();
                application.GetDownlink().MapItem(new IntWritable(3), new Text("txt"));
                application.GetDownlink().Flush();
                application.WaitForFinish();
                wr.Close();
                // test getDownlink().mapItem();
                string stdOut = ReadStdOut(conf);
                NUnit.Framework.Assert.IsTrue(stdOut.Contains("key:3"));
                NUnit.Framework.Assert.IsTrue(stdOut.Contains("value:txt"));
                // reporter test counter, and status should be sended
                // test MessageType.REGISTER_COUNTER and INCREMENT_COUNTER
                NUnit.Framework.Assert.AreEqual(1.0, reporter.GetProgress(), 0.01);
                NUnit.Framework.Assert.IsNotNull(reporter.GetCounter("group", "name"));
                // test status MessageType.STATUS
                NUnit.Framework.Assert.AreEqual(reporter.GetStatus(), "PROGRESS");
                stdOut = ReadFile(new FilePath(workSpace.GetAbsolutePath() + FilePath.separator +
                                               "outfile"));
                // check MessageType.PROGRESS
                NUnit.Framework.Assert.AreEqual(0.55f, rReader.GetProgress(), 0.001);
                application.GetDownlink().Close();
                // test MessageType.OUTPUT
                KeyValuePair <IntWritable, Text> entry = output.GetCollect().GetEnumerator().Next(
                    );
                NUnit.Framework.Assert.AreEqual(123, entry.Key.Get());
                NUnit.Framework.Assert.AreEqual("value", entry.Value.ToString());
                try
                {
                    // try to abort
                    application.Abort(new Exception());
                    NUnit.Framework.Assert.Fail();
                }
                catch (IOException e)
                {
                    // abort works ?
                    NUnit.Framework.Assert.AreEqual("pipe child exception", e.Message);
                }
            }
            finally
            {
                if (psw != null)
                {
                    // remove password files
                    foreach (FilePath file in psw)
                    {
                        file.DeleteOnExit();
                    }
                }
            }
        }
Пример #6
0
 public virtual void TestRunner()
 {
     // clean old password files
     FilePath[] psw = CleanTokenPasswordFile();
     try
     {
         RecordReader <FloatWritable, NullWritable> rReader = new TestPipeApplication.ReaderPipesMapRunner
                                                                  (this);
         JobConf conf = new JobConf();
         conf.Set(Submitter.IsJavaRr, "true");
         // for stdour and stderror
         conf.Set(MRJobConfig.TaskAttemptId, taskName);
         TestPipeApplication.CombineOutputCollector <IntWritable, Text> output = new TestPipeApplication.CombineOutputCollector
                                                                                 <IntWritable, Text>(this, new Counters.Counter(), new TestPipeApplication.Progress
                                                                                                         (this));
         FileSystem fs = new RawLocalFileSystem();
         fs.SetConf(conf);
         IFile.Writer <IntWritable, Text> wr = new IFile.Writer <IntWritable, Text>(conf, fs
                                                                                    .Create(new Path(workSpace + FilePath.separator + "outfile")), typeof(IntWritable
                                                                                                                                                          ), typeof(Text), null, null, true);
         output.SetWriter(wr);
         // stub for client
         FilePath fCommand = GetFileCommand("org.apache.hadoop.mapred.pipes.PipeApplicationRunnableStub"
                                            );
         conf.Set(MRJobConfig.CacheLocalfiles, fCommand.GetAbsolutePath());
         // token for authorization
         Org.Apache.Hadoop.Security.Token.Token <AMRMTokenIdentifier> token = new Org.Apache.Hadoop.Security.Token.Token
                                                                              <AMRMTokenIdentifier>(Sharpen.Runtime.GetBytesForString("user"), Sharpen.Runtime.GetBytesForString
                                                                                                        ("password"), new Text("kind"), new Text("service"));
         TokenCache.SetJobToken(token, conf.GetCredentials());
         conf.SetBoolean(MRJobConfig.SkipRecords, true);
         TestPipeApplication.TestTaskReporter reporter = new TestPipeApplication.TestTaskReporter
                                                             (this);
         PipesMapRunner <FloatWritable, NullWritable, IntWritable, Text> runner = new PipesMapRunner
                                                                                  <FloatWritable, NullWritable, IntWritable, Text>();
         InitStdOut(conf);
         runner.Configure(conf);
         runner.Run(rReader, output, reporter);
         string stdOut = ReadStdOut(conf);
         // test part of translated data. As common file for client and test -
         // clients stdOut
         // check version
         NUnit.Framework.Assert.IsTrue(stdOut.Contains("CURRENT_PROTOCOL_VERSION:0"));
         // check key and value classes
         NUnit.Framework.Assert.IsTrue(stdOut.Contains("Key class:org.apache.hadoop.io.FloatWritable"
                                                       ));
         NUnit.Framework.Assert.IsTrue(stdOut.Contains("Value class:org.apache.hadoop.io.NullWritable"
                                                       ));
         // test have sent all data from reader
         NUnit.Framework.Assert.IsTrue(stdOut.Contains("value:0.0"));
         NUnit.Framework.Assert.IsTrue(stdOut.Contains("value:9.0"));
     }
     finally
     {
         if (psw != null)
         {
             // remove password files
             foreach (FilePath file in psw)
             {
                 file.DeleteOnExit();
             }
         }
     }
 }
Пример #7
0
        /// <summary>Start the child process to handle the task for us.</summary>
        /// <param name="conf">the task's configuration</param>
        /// <param name="recordReader">the fake record reader to update progress with</param>
        /// <param name="output">the collector to send output to</param>
        /// <param name="reporter">the reporter for the task</param>
        /// <param name="outputKeyClass">the class of the output keys</param>
        /// <param name="outputValueClass">the class of the output values</param>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        internal Application(JobConf conf, RecordReader <FloatWritable, NullWritable> recordReader
                             , OutputCollector <K2, V2> output, Reporter reporter, Type outputKeyClass, Type outputValueClass
                             )
        {
            serverSocket = Sharpen.Extensions.CreateServerSocket(0);
            IDictionary <string, string> env = new Dictionary <string, string>();

            // add TMPDIR environment variable with the value of java.io.tmpdir
            env["TMPDIR"]       = Runtime.GetProperty("java.io.tmpdir");
            env[Submitter.Port] = Sharpen.Extensions.ToString(serverSocket.GetLocalPort());
            //Add token to the environment if security is enabled
            Org.Apache.Hadoop.Security.Token.Token <JobTokenIdentifier> jobToken = TokenCache.
                                                                                   GetJobToken(conf.GetCredentials());
            // This password is used as shared secret key between this application and
            // child pipes process
            byte[] password          = jobToken.GetPassword();
            string localPasswordFile = new FilePath(".") + Path.Separator + "jobTokenPassword";

            WritePasswordToLocalFile(localPasswordFile, password, conf);
            env["hadoop.pipes.shared.secret.location"] = localPasswordFile;
            IList <string> cmd         = new AList <string>();
            string         interpretor = conf.Get(Submitter.Interpretor);

            if (interpretor != null)
            {
                cmd.AddItem(interpretor);
            }
            string executable = DistributedCache.GetLocalCacheFiles(conf)[0].ToString();

            if (!FileUtil.CanExecute(new FilePath(executable)))
            {
                // LinuxTaskController sets +x permissions on all distcache files already.
                // In case of DefaultTaskController, set permissions here.
                FileUtil.Chmod(executable, "u+x");
            }
            cmd.AddItem(executable);
            // wrap the command in a stdout/stderr capture
            // we are starting map/reduce task of the pipes job. this is not a cleanup
            // attempt.
            TaskAttemptID taskid = ((TaskAttemptID)TaskAttemptID.ForName(conf.Get(MRJobConfig
                                                                                  .TaskAttemptId)));
            FilePath stdout    = TaskLog.GetTaskLogFile(taskid, false, TaskLog.LogName.Stdout);
            FilePath stderr    = TaskLog.GetTaskLogFile(taskid, false, TaskLog.LogName.Stderr);
            long     logLength = TaskLog.GetTaskLogLength(conf);

            cmd          = TaskLog.CaptureOutAndError(null, cmd, stdout, stderr, logLength, false);
            process      = RunClient(cmd, env);
            clientSocket = serverSocket.Accept();
            string challenge      = GetSecurityChallenge();
            string digestToSend   = CreateDigest(password, challenge);
            string digestExpected = CreateDigest(password, digestToSend);

            handler = new OutputHandler <K2, V2>(output, reporter, recordReader, digestExpected
                                                 );
            K2 outputKey   = (K2)ReflectionUtils.NewInstance(outputKeyClass, conf);
            V2 outputValue = (V2)ReflectionUtils.NewInstance(outputValueClass, conf);

            downlink = new BinaryProtocol <K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue
                                                           , conf);
            downlink.Authenticate(digestToSend, challenge);
            WaitForAuthentication();
            Log.Debug("Authentication succeeded");
            downlink.Start();
            downlink.SetJobConf(conf);
        }