Exemple #1
0
        // run a job with 1 map and let it run to completion
        /// <exception cref="System.IO.IOException"/>
        private void TestSuccessfulJob(string filename, Type committer, string[] exclude)
        {
            JobConf jc     = mr.CreateJobConf();
            Path    outDir = GetNewOutputDir();

            ConfigureJob(jc, "job with cleanup()", 1, 0, outDir);
            jc.SetOutputCommitter(committer);
            JobClient  jobClient = new JobClient(jc);
            RunningJob job       = jobClient.SubmitJob(jc);
            JobID      id        = job.GetID();

            job.WaitForCompletion();
            Log.Info("Job finished : " + job.IsComplete());
            Path testFile = new Path(outDir, filename);

            NUnit.Framework.Assert.IsTrue("Done file \"" + testFile + "\" missing for job " +
                                          id, fileSys.Exists(testFile));
            // check if the files from the missing set exists
            foreach (string ex in exclude)
            {
                Path file = new Path(outDir, ex);
                NUnit.Framework.Assert.IsFalse("File " + file + " should not be present for successful job "
                                               + id, fileSys.Exists(file));
            }
        }
Exemple #2
0
        public virtual void TestDistinctUsers()
        {
            JobConf job1   = mr.CreateJobConf();
            string  input  = "The quick brown fox\nhas many silly\n" + "red fox sox\n";
            Path    inDir  = new Path("/testing/distinct/input");
            Path    outDir = new Path("/user/alice/output");

            TestMiniMRClasspath.ConfigureWordCount(fs, job1, input, 2, 1, inDir, outDir);
            RunJobAsUser(job1, AliceUgi);
            JobConf job2    = mr.CreateJobConf();
            Path    inDir2  = new Path("/testing/distinct/input2");
            Path    outDir2 = new Path("/user/bob/output2");

            TestMiniMRClasspath.ConfigureWordCount(fs, job2, input, 2, 1, inDir2, outDir2);
            RunJobAsUser(job2, BobUgi);
        }
Exemple #3
0
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestWithDFS()
        {
            MiniDFSCluster dfs     = null;
            MiniMRCluster  mr      = null;
            FileSystem     fileSys = null;

            try
            {
                int     taskTrackers = 4;
                JobConf conf         = new JobConf();
                conf.Set(JTConfig.JtSystemDir, "/tmp/custom/mapred/system");
                dfs     = new MiniDFSCluster.Builder(conf).NumDataNodes(4).Build();
                fileSys = dfs.GetFileSystem();
                mr      = new MiniMRCluster(taskTrackers, fileSys.GetUri().ToString(), 1, null, null,
                                            conf);
                RunWordCount(mr, mr.CreateJobConf(), conf.Get("mapred.system.dir"));
            }
            finally
            {
                if (dfs != null)
                {
                    dfs.Shutdown();
                }
                if (mr != null)
                {
                    mr.Shutdown();
                }
            }
        }
Exemple #4
0
        public virtual void TestExternalWritable()
        {
            string         namenode = null;
            MiniDFSCluster dfs      = null;
            MiniMRCluster  mr       = null;
            FileSystem     fileSys  = null;

            try
            {
                int           taskTrackers = 4;
                Configuration conf         = new Configuration();
                dfs      = new MiniDFSCluster.Builder(conf).Build();
                fileSys  = dfs.GetFileSystem();
                namenode = fileSys.GetUri().ToString();
                mr       = new MiniMRCluster(taskTrackers, namenode, 3);
                JobConf jobConf = mr.CreateJobConf();
                string  result;
                result = LaunchExternal(fileSys.GetUri(), jobConf, "Dennis was here!\nDennis again!"
                                        , 3, 1);
                NUnit.Framework.Assert.AreEqual("Dennis again!\t1\nDennis was here!\t1\n", result
                                                );
            }
            finally
            {
                if (dfs != null)
                {
                    dfs.Shutdown();
                }
                if (mr != null)
                {
                    mr.Shutdown();
                }
            }
        }
Exemple #5
0
        public virtual void TestClassPath()
        {
            string         namenode = null;
            MiniDFSCluster dfs      = null;
            MiniMRCluster  mr       = null;
            FileSystem     fileSys  = null;

            try
            {
                int           taskTrackers   = 4;
                int           jobTrackerPort = 60050;
                Configuration conf           = new Configuration();
                dfs      = new MiniDFSCluster.Builder(conf).Build();
                fileSys  = dfs.GetFileSystem();
                namenode = fileSys.GetUri().ToString();
                mr       = new MiniMRCluster(taskTrackers, namenode, 3);
                JobConf jobConf = mr.CreateJobConf();
                string  result;
                result = LaunchWordCount(fileSys.GetUri(), jobConf, "The quick brown fox\nhas many silly\n"
                                         + "red fox sox\n", 3, 1);
                NUnit.Framework.Assert.AreEqual("The\t1\nbrown\t1\nfox\t2\nhas\t1\nmany\t1\n" + "quick\t1\nred\t1\nsilly\t1\nsox\t1\n"
                                                , result);
            }
            finally
            {
                if (dfs != null)
                {
                    dfs.Shutdown();
                }
                if (mr != null)
                {
                    mr.Shutdown();
                }
            }
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestWithDFS()
        {
            MiniMRCluster  mr      = null;
            MiniDFSCluster dfs     = null;
            FileSystem     fileSys = null;

            try
            {
                JobConf conf = new JobConf();
                dfs     = new MiniDFSCluster.Builder(conf).Build();
                fileSys = dfs.GetFileSystem();
                mr      = new MiniMRCluster(2, fileSys.GetUri().ToString(), 4);
                MRCaching.SetupCache("/cachedir", fileSys);
                // run the wordcount example with caching
                MRCaching.TestResult ret = MRCaching.LaunchMRCache("/testing/wc/input", "/testing/wc/output"
                                                                   , "/cachedir", mr.CreateJobConf(), "The quick brown fox\nhas many silly\n" + "red fox sox\n"
                                                                   );
                NUnit.Framework.Assert.IsTrue("Archives not matching", ret.isOutputOk);
                // launch MR cache with symlinks
                ret = MRCaching.LaunchMRCache("/testing/wc/input", "/testing/wc/output", "/cachedir"
                                              , mr.CreateJobConf(), "The quick brown fox\nhas many silly\n" + "red fox sox\n");
                NUnit.Framework.Assert.IsTrue("Archives not matching", ret.isOutputOk);
            }
            finally
            {
                if (fileSys != null)
                {
                    fileSys.Close();
                }
                if (dfs != null)
                {
                    dfs.Shutdown();
                }
                if (mr != null)
                {
                    mr.Shutdown();
                }
            }
        }
 /// <summary>
 /// Returns a job configuration preconfigured to run against the Hadoop
 /// managed by the testcase.
 /// </summary>
 /// <returns>configuration that works on the testcase Hadoop instance</returns>
 protected internal virtual JobConf CreateJobConf()
 {
     if (localMR)
     {
         JobConf conf = new JobConf();
         conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
         return(conf);
     }
     else
     {
         return(mrCluster.CreateJobConf());
     }
 }
Exemple #8
0
        /// <summary>Verify that at least one segment does not hit disk</summary>
        /// <exception cref="System.Exception"/>
        public virtual void TestReduceFromPartialMem()
        {
            int     MapTasks = 7;
            JobConf job      = mrCluster.CreateJobConf();

            job.SetNumMapTasks(MapTasks);
            job.SetInt(JobContext.ReduceMergeInmemThreshold, 0);
            job.Set(JobContext.ReduceInputBufferPercent, "1.0");
            job.SetInt(JobContext.ShuffleParallelCopies, 1);
            job.SetInt(JobContext.IoSortMb, 10);
            job.Set(JobConf.MapredReduceTaskJavaOpts, "-Xmx128m");
            job.SetLong(JobContext.ReduceMemoryTotalBytes, 128 << 20);
            job.Set(JobContext.ShuffleInputBufferPercent, "0.14");
            job.Set(JobContext.ShuffleMergePercent, "1.0");
            Counters c     = RunJob(job);
            long     @out  = c.FindCounter(TaskCounter.MapOutputRecords).GetCounter();
            long     spill = c.FindCounter(TaskCounter.SpilledRecords).GetCounter();

            NUnit.Framework.Assert.IsTrue("Expected some records not spilled during reduce" +
                                          spill + ")", spill < 2 * @out);
        }
        /// <summary>Launches a MR job and tests the job counters against the expected values.
        ///     </summary>
        /// <param name="testName">The name for the job</param>
        /// <param name="mr">The MR cluster</param>
        /// <param name="fileSys">The FileSystem</param>
        /// <param name="in">Input path</param>
        /// <param name="out">Output path</param>
        /// <param name="numMaps">Number of maps</param>
        /// <param name="otherLocalMaps">Expected value of other local maps</param>
        /// <param name="datalocalMaps">Expected value of data(node) local maps</param>
        /// <param name="racklocalMaps">Expected value of rack local maps</param>
        /// <exception cref="System.IO.IOException"/>
        internal static void LaunchJobAndTestCounters(string jobName, MiniMRCluster mr, FileSystem
                                                      fileSys, Path @in, Path @out, int numMaps, int otherLocalMaps, int dataLocalMaps
                                                      , int rackLocalMaps)
        {
            JobConf jobConf = mr.CreateJobConf();

            if (fileSys.Exists(@out))
            {
                fileSys.Delete(@out, true);
            }
            RunningJob job      = LaunchJob(jobConf, @in, @out, numMaps, jobName);
            Counters   counters = job.GetCounters();

            NUnit.Framework.Assert.AreEqual("Number of local maps", counters.GetCounter(JobCounter
                                                                                        .OtherLocalMaps), otherLocalMaps);
            NUnit.Framework.Assert.AreEqual("Number of Data-local maps", counters.GetCounter(
                                                JobCounter.DataLocalMaps), dataLocalMaps);
            NUnit.Framework.Assert.AreEqual("Number of Rack-local maps", counters.GetCounter(
                                                JobCounter.RackLocalMaps), rackLocalMaps);
            mr.WaitUntilIdle();
            mr.Shutdown();
        }
Exemple #10
0
        public virtual void TestHeapUsageCounter()
        {
            JobConf conf = new JobConf();
            // create a local filesystem handle
            FileSystem fileSystem = FileSystem.GetLocal(conf);
            // define test root directories
            Path rootDir     = new Path(Runtime.GetProperty("test.build.data", "/tmp"));
            Path testRootDir = new Path(rootDir, "testHeapUsageCounter");

            // cleanup the test root directory
            fileSystem.Delete(testRootDir, true);
            // set the current working directory
            fileSystem.SetWorkingDirectory(testRootDir);
            fileSystem.DeleteOnExit(testRootDir);
            // create a mini cluster using the local file system
            MiniMRCluster mrCluster = new MiniMRCluster(1, fileSystem.GetUri().ToString(), 1);

            try
            {
                conf = mrCluster.CreateJobConf();
                JobClient jobClient = new JobClient(conf);
                // define job input
                Path inDir = new Path(testRootDir, "in");
                // create input data
                CreateWordsFile(inDir, conf);
                // configure and run a low memory job which will run without loading the
                // jvm's heap
                RunningJob lowMemJob = RunHeapUsageTestJob(conf, testRootDir, "-Xms32m -Xmx1G", 0
                                                           , 0, fileSystem, jobClient, inDir);
                JobID lowMemJobID           = lowMemJob.GetID();
                long  lowMemJobMapHeapUsage = GetTaskCounterUsage(jobClient, lowMemJobID, 1, 0, TaskType
                                                                  .Map);
                System.Console.Out.WriteLine("Job1 (low memory job) map task heap usage: " + lowMemJobMapHeapUsage
                                             );
                long lowMemJobReduceHeapUsage = GetTaskCounterUsage(jobClient, lowMemJobID, 1, 0,
                                                                    TaskType.Reduce);
                System.Console.Out.WriteLine("Job1 (low memory job) reduce task heap usage: " + lowMemJobReduceHeapUsage
                                             );
                // configure and run a high memory job which will load the jvm's heap
                RunningJob highMemJob = RunHeapUsageTestJob(conf, testRootDir, "-Xms32m -Xmx1G",
                                                            lowMemJobMapHeapUsage + 256 * 1024 * 1024, lowMemJobReduceHeapUsage + 256 * 1024
                                                            * 1024, fileSystem, jobClient, inDir);
                JobID highMemJobID           = highMemJob.GetID();
                long  highMemJobMapHeapUsage = GetTaskCounterUsage(jobClient, highMemJobID, 1, 0,
                                                                   TaskType.Map);
                System.Console.Out.WriteLine("Job2 (high memory job) map task heap usage: " + highMemJobMapHeapUsage
                                             );
                long highMemJobReduceHeapUsage = GetTaskCounterUsage(jobClient, highMemJobID, 1,
                                                                     0, TaskType.Reduce);
                System.Console.Out.WriteLine("Job2 (high memory job) reduce task heap usage: " +
                                             highMemJobReduceHeapUsage);
                NUnit.Framework.Assert.IsTrue("Incorrect map heap usage reported by the map task"
                                              , lowMemJobMapHeapUsage < highMemJobMapHeapUsage);
                NUnit.Framework.Assert.IsTrue("Incorrect reduce heap usage reported by the reduce task"
                                              , lowMemJobReduceHeapUsage < highMemJobReduceHeapUsage);
            }
            finally
            {
                // shutdown the mr cluster
                mrCluster.Shutdown();
                try
                {
                    fileSystem.Delete(testRootDir, true);
                }
                catch (IOException)
                {
                }
            }
        }
Exemple #11
0
 /// <summary>
 /// Returns a job configuration preconfigured to run against the Hadoop
 /// managed by the testcase.
 /// </summary>
 /// <returns>configuration that works on the testcase Hadoop instance</returns>
 protected internal virtual JobConf CreateJobConf()
 {
     return(mrCluster.CreateJobConf());
 }
Exemple #12
0
        /// <exception cref="System.Exception"/>
        public virtual void TestLazyOutput()
        {
            MiniDFSCluster dfs     = null;
            MiniMRCluster  mr      = null;
            FileSystem     fileSys = null;

            try
            {
                Configuration conf = new Configuration();
                // Start the mini-MR and mini-DFS clusters
                dfs     = new MiniDFSCluster.Builder(conf).NumDataNodes(NumHadoopSlaves).Build();
                fileSys = dfs.GetFileSystem();
                mr      = new MiniMRCluster(NumHadoopSlaves, fileSys.GetUri().ToString(), 1);
                int numReducers = 2;
                int numMappers  = NumHadoopSlaves * NumMapsPerNode;
                CreateInput(fileSys, numMappers);
                Path output1 = new Path("/testlazy/output1");
                // Test 1.
                RunTestLazyOutput(mr.CreateJobConf(), output1, numReducers, true);
                Path[] fileList = FileUtil.Stat2Paths(fileSys.ListStatus(output1, new Utils.OutputFileUtils.OutputFilesFilter
                                                                             ()));
                for (int i = 0; i < fileList.Length; ++i)
                {
                    System.Console.Out.WriteLine("Test1 File list[" + i + "]" + ": " + fileList[i]);
                }
                NUnit.Framework.Assert.IsTrue(fileList.Length == (numReducers - 1));
                // Test 2. 0 Reducers, maps directly write to the output files
                Path output2 = new Path("/testlazy/output2");
                RunTestLazyOutput(mr.CreateJobConf(), output2, 0, true);
                fileList = FileUtil.Stat2Paths(fileSys.ListStatus(output2, new Utils.OutputFileUtils.OutputFilesFilter
                                                                      ()));
                for (int i_1 = 0; i_1 < fileList.Length; ++i_1)
                {
                    System.Console.Out.WriteLine("Test2 File list[" + i_1 + "]" + ": " + fileList[i_1
                                                 ]);
                }
                NUnit.Framework.Assert.IsTrue(fileList.Length == numMappers - 1);
                // Test 3. 0 Reducers, but flag is turned off
                Path output3 = new Path("/testlazy/output3");
                RunTestLazyOutput(mr.CreateJobConf(), output3, 0, false);
                fileList = FileUtil.Stat2Paths(fileSys.ListStatus(output3, new Utils.OutputFileUtils.OutputFilesFilter
                                                                      ()));
                for (int i_2 = 0; i_2 < fileList.Length; ++i_2)
                {
                    System.Console.Out.WriteLine("Test3 File list[" + i_2 + "]" + ": " + fileList[i_2
                                                 ]);
                }
                NUnit.Framework.Assert.IsTrue(fileList.Length == numMappers);
            }
            finally
            {
                if (dfs != null)
                {
                    dfs.Shutdown();
                }
                if (mr != null)
                {
                    mr.Shutdown();
                }
            }
        }
        // Input output paths for this..
        // these are all dummy and does not test
        // much in map reduce except for the command line
        // params
        /// <exception cref="System.Exception"/>
        public virtual void TestJobShell()
        {
            MiniDFSCluster dfs      = null;
            MiniMRCluster  mr       = null;
            FileSystem     fs       = null;
            Path           testFile = new Path(input, "testfile");

            try
            {
                Configuration conf = new Configuration();
                //start the mini mr and dfs cluster.
                dfs = new MiniDFSCluster.Builder(conf).NumDataNodes(2).Build();
                fs  = dfs.GetFileSystem();
                FSDataOutputStream stream = fs.Create(testFile);
                stream.Write(Sharpen.Runtime.GetBytesForString("teststring"));
                stream.Close();
                mr = new MiniMRCluster(2, fs.GetUri().ToString(), 1);
                FilePath thisbuildDir = new FilePath(buildDir, "jobCommand");
                NUnit.Framework.Assert.IsTrue("create build dir", thisbuildDir.Mkdirs());
                FilePath         f       = new FilePath(thisbuildDir, "files_tmp");
                FileOutputStream fstream = new FileOutputStream(f);
                fstream.Write(Sharpen.Runtime.GetBytesForString("somestrings"));
                fstream.Close();
                FilePath f1 = new FilePath(thisbuildDir, "files_tmp1");
                fstream = new FileOutputStream(f1);
                fstream.Write(Sharpen.Runtime.GetBytesForString("somestrings"));
                fstream.Close();
                // copy files to dfs
                Path cachePath = new Path("/cacheDir");
                if (!fs.Mkdirs(cachePath))
                {
                    throw new IOException("Mkdirs failed to create " + cachePath.ToString());
                }
                Path localCachePath = new Path(Runtime.GetProperty("test.cache.data"));
                Path txtPath        = new Path(localCachePath, new Path("test.txt"));
                Path jarPath        = new Path(localCachePath, new Path("test.jar"));
                Path zipPath        = new Path(localCachePath, new Path("test.zip"));
                Path tarPath        = new Path(localCachePath, new Path("test.tar"));
                Path tgzPath        = new Path(localCachePath, new Path("test.tgz"));
                fs.CopyFromLocalFile(txtPath, cachePath);
                fs.CopyFromLocalFile(jarPath, cachePath);
                fs.CopyFromLocalFile(zipPath, cachePath);
                // construct options for -files
                string[] files = new string[3];
                files[0] = f.ToString();
                files[1] = f1.ToString() + "#localfilelink";
                files[2] = fs.GetUri().Resolve(cachePath + "/test.txt#dfsfilelink").ToString();
                // construct options for -libjars
                string[] libjars = new string[2];
                libjars[0] = "build/test/mapred/testjar/testjob.jar";
                libjars[1] = fs.GetUri().Resolve(cachePath + "/test.jar").ToString();
                // construct options for archives
                string[] archives = new string[3];
                archives[0] = tgzPath.ToString();
                archives[1] = tarPath + "#tarlink";
                archives[2] = fs.GetUri().Resolve(cachePath + "/test.zip#ziplink").ToString();
                string[] args = new string[10];
                args[0] = "-files";
                args[1] = StringUtils.ArrayToString(files);
                args[2] = "-libjars";
                // the testjob.jar as a temporary jar file
                // rather than creating its own
                args[3] = StringUtils.ArrayToString(libjars);
                args[4] = "-archives";
                args[5] = StringUtils.ArrayToString(archives);
                args[6] = "-D";
                args[7] = "mapred.output.committer.class=testjar.CustomOutputCommitter";
                args[8] = input.ToString();
                args[9] = output.ToString();
                JobConf jobConf = mr.CreateJobConf();
                //before running the job, verify that libjar is not in client classpath
                NUnit.Framework.Assert.IsTrue("libjar not in client classpath", LoadLibJar(jobConf
                                                                                           ) == null);
                int ret = ToolRunner.Run(jobConf, new ExternalMapReduce(), args);
                //after running the job, verify that libjar is in the client classpath
                NUnit.Framework.Assert.IsTrue("libjar added to client classpath", LoadLibJar(jobConf
                                                                                             ) != null);
                NUnit.Framework.Assert.IsTrue("not failed ", ret != -1);
                f.Delete();
                thisbuildDir.Delete();
            }
            finally
            {
                if (dfs != null)
                {
                    dfs.Shutdown();
                }
                if (mr != null)
                {
                    mr.Shutdown();
                }
            }
        }