示例#1
0
        public virtual void TestCombinerShouldUpdateTheReporter()
        {
            JobConf conf    = new JobConf(mrCluster.GetConfig());
            int     numMaps = 5;
            int     numReds = 2;
            Path    @in     = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-in"
                                       );
            Path @out = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-out"
                                 );

            CreateInputOutPutFolder(@in, @out, numMaps);
            conf.SetJobName("test-job-with-combiner");
            conf.SetMapperClass(typeof(IdentityMapper));
            conf.SetCombinerClass(typeof(TestMRAppWithCombiner.MyCombinerToCheckReporter));
            //conf.setJarByClass(MyCombinerToCheckReporter.class);
            conf.SetReducerClass(typeof(IdentityReducer));
            DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf);
            conf.SetOutputCommitter(typeof(CustomOutputCommitter));
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            FileInputFormat.SetInputPaths(conf, @in);
            FileOutputFormat.SetOutputPath(conf, @out);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReds);
            RunJob(conf);
        }
示例#2
0
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Configuration conf = new Configuration();

            string[] otherArgs = new GenericOptionsParser(conf, args).GetRemainingArgs();
            if (otherArgs.Length != 2)
            {
                System.Console.Error.WriteLine("Usage: secondarysort <in> <out>");
                System.Environment.Exit(2);
            }
            Job job = Job.GetInstance(conf, "secondary sort");

            job.SetJarByClass(typeof(SecondarySort));
            job.SetMapperClass(typeof(SecondarySort.MapClass));
            job.SetReducerClass(typeof(SecondarySort.Reduce));
            // group and partition by the first int in the pair
            job.SetPartitionerClass(typeof(SecondarySort.FirstPartitioner));
            job.SetGroupingComparatorClass(typeof(SecondarySort.FirstGroupingComparator));
            // the map output is IntPair, IntWritable
            job.SetMapOutputKeyClass(typeof(SecondarySort.IntPair));
            job.SetMapOutputValueClass(typeof(IntWritable));
            // the reduce output is Text, IntWritable
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(IntWritable));
            FileInputFormat.AddInputPath(job, new Path(otherArgs[0]));
            FileOutputFormat.SetOutputPath(job, new Path(otherArgs[1]));
            System.Environment.Exit(job.WaitForCompletion(true) ? 0 : 1);
        }
示例#3
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            if (args.Length != 2)
            {
                System.Console.Error.WriteLine("Usage: wordmean <in> <out>");
                return(0);
            }
            Configuration conf = GetConf();
            Job           job  = Job.GetInstance(conf, "word mean");

            job.SetJarByClass(typeof(WordMean));
            job.SetMapperClass(typeof(WordMean.WordMeanMapper));
            job.SetCombinerClass(typeof(WordMean.WordMeanReducer));
            job.SetReducerClass(typeof(WordMean.WordMeanReducer));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(LongWritable));
            FileInputFormat.AddInputPath(job, new Path(args[0]));
            Path outputpath = new Path(args[1]);

            FileOutputFormat.SetOutputPath(job, outputpath);
            bool result = job.WaitForCompletion(true);

            mean = ReadAndCalcMean(outputpath, conf);
            return(result ? 0 : 1);
        }
        public virtual void TestJobSuccessCleanup()
        {
            Reset();
            Job           job        = Job.GetInstance();
            Path          inputPath  = CreateInput();
            Path          outputPath = GetOutputPath();
            Configuration conf       = new Configuration();
            FileSystem    fs         = FileSystem.GetLocal(conf);

            if (fs.Exists(outputPath))
            {
                fs.Delete(outputPath, true);
            }
            job.SetMapperClass(typeof(TestMapperReducerCleanup.TrackingTokenizerMapper));
            job.SetReducerClass(typeof(TestMapperReducerCleanup.TrackingIntSumReducer));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(IntWritable));
            job.SetInputFormatClass(typeof(TestMapperReducerCleanup.TrackingTextInputFormat));
            job.SetOutputFormatClass(typeof(TestMapperReducerCleanup.TrackingTextOutputFormat
                                            ));
            job.SetNumReduceTasks(1);
            FileInputFormat.AddInputPath(job, inputPath);
            FileOutputFormat.SetOutputPath(job, outputPath);
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue(mapCleanup);
            NUnit.Framework.Assert.IsTrue(reduceCleanup);
            NUnit.Framework.Assert.IsTrue(recordReaderCleanup);
            NUnit.Framework.Assert.IsTrue(recordWriterCleanup);
        }
示例#5
0
 public virtual void TestValueIterReset()
 {
     try
     {
         Configuration conf = new Configuration();
         Job           job  = Job.GetInstance(conf, "TestValueIterReset");
         job.SetJarByClass(typeof(TestValueIterReset));
         job.SetMapperClass(typeof(TestValueIterReset.TestMapper));
         job.SetReducerClass(typeof(TestValueIterReset.TestReducer));
         job.SetNumReduceTasks(NumTests);
         job.SetMapOutputKeyClass(typeof(IntWritable));
         job.SetMapOutputValueClass(typeof(IntWritable));
         job.SetOutputKeyClass(typeof(IntWritable));
         job.SetOutputValueClass(typeof(IntWritable));
         job.GetConfiguration().SetInt(MRJobConfig.ReduceMarkresetBufferSize, 128);
         job.SetInputFormatClass(typeof(TextInputFormat));
         job.SetOutputFormatClass(typeof(TextOutputFormat));
         FileInputFormat.AddInputPath(job, new Path(TestRootDir + "/in"));
         Path output = new Path(TestRootDir + "/out");
         localFs.Delete(output, true);
         FileOutputFormat.SetOutputPath(job, output);
         CreateInput();
         NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(true));
         ValidateOutput();
     }
     catch (Exception e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
         NUnit.Framework.Assert.IsTrue(false);
     }
 }
示例#6
0
        /// <exception cref="System.IO.IOException"/>
        public virtual Job CreateJob(int numMapper, int numReducer, long mapSleepTime, int
                                     mapSleepCount, long reduceSleepTime, int reduceSleepCount)
        {
            Configuration conf = GetConf();

            conf.SetLong(MapSleepTime, mapSleepTime);
            conf.SetLong(ReduceSleepTime, reduceSleepTime);
            conf.SetInt(MapSleepCount, mapSleepCount);
            conf.SetInt(ReduceSleepCount, reduceSleepCount);
            conf.SetInt(MRJobConfig.NumMaps, numMapper);
            Job job = Job.GetInstance(conf, "sleep");

            job.SetNumReduceTasks(numReducer);
            job.SetJarByClass(typeof(SleepJob));
            job.SetMapperClass(typeof(SleepJob.SleepMapper));
            job.SetMapOutputKeyClass(typeof(IntWritable));
            job.SetMapOutputValueClass(typeof(NullWritable));
            job.SetReducerClass(typeof(SleepJob.SleepReducer));
            job.SetOutputFormatClass(typeof(NullOutputFormat));
            job.SetInputFormatClass(typeof(SleepJob.SleepInputFormat));
            job.SetPartitionerClass(typeof(SleepJob.SleepJobPartitioner));
            job.SetSpeculativeExecution(false);
            job.SetJobName("Sleep job");
            FileInputFormat.AddInputPath(job, new Path("ignored"));
            return(job);
        }
        /// <exception cref="System.Exception"/>
        public virtual void TestFormat()
        {
            JobConf job    = new JobConf();
            Path    file   = new Path(workDir, "test.txt");
            int     seed   = new Random().Next();
            Random  random = new Random(seed);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int numLinesPerMap = 5;

            job.SetInt("mapreduce.input.lineinputformat.linespermap", numLinesPerMap);
            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) +
                                                               1)
            {
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(localFs.Create(file));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                CheckFormat(job, numLinesPerMap);
            }
        }
示例#8
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            if (args.Length < 2)
            {
                PrintUsage();
                return(2);
            }
            Job job = Job.GetInstance(GetConf());

            job.SetJobName("MultiFileWordCount");
            job.SetJarByClass(typeof(MultiFileWordCount));
            //set the InputFormat of the job to our InputFormat
            job.SetInputFormatClass(typeof(MultiFileWordCount.MyInputFormat));
            // the keys are words (strings)
            job.SetOutputKeyClass(typeof(Text));
            // the values are counts (ints)
            job.SetOutputValueClass(typeof(IntWritable));
            //use the defined mapper
            job.SetMapperClass(typeof(MultiFileWordCount.MapClass));
            //use the WordCount Reducer
            job.SetCombinerClass(typeof(IntSumReducer));
            job.SetReducerClass(typeof(IntSumReducer));
            FileInputFormat.AddInputPaths(job, args[0]);
            FileOutputFormat.SetOutputPath(job, new Path(args[1]));
            return(job.WaitForCompletion(true) ? 0 : 1);
        }
示例#9
0
        /// <exception cref="System.IO.IOException"/>
        public static Job CreateJob(Configuration conf, Path inDir, Path outDir, int numInputFiles
                                    , int numReds, string input)
        {
            Job        job = Job.GetInstance(conf);
            FileSystem fs  = FileSystem.Get(conf);

            if (fs.Exists(outDir))
            {
                fs.Delete(outDir, true);
            }
            if (fs.Exists(inDir))
            {
                fs.Delete(inDir, true);
            }
            fs.Mkdirs(inDir);
            for (int i = 0; i < numInputFiles; ++i)
            {
                DataOutputStream file = fs.Create(new Path(inDir, "part-" + i));
                file.WriteBytes(input);
                file.Close();
            }
            FileInputFormat.SetInputPaths(job, inDir);
            FileOutputFormat.SetOutputPath(job, outDir);
            job.SetNumReduceTasks(numReds);
            return(job);
        }
示例#10
0
            // no check for ll.isEmpty() to permit extension
            /// <exception cref="System.IO.IOException"/>
            private Configuration GetConf(Configuration jconf)
            {
                Job job = Job.GetInstance(jconf);

                FileInputFormat.SetInputPaths(job, indir);
                return(job.GetConfiguration());
            }
示例#11
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            if (args.Length != 2)
            {
                System.Console.Error.WriteLine("Usage: wordmedian <in> <out>");
                return(0);
            }
            SetConf(new Configuration());
            Configuration conf = GetConf();
            Job           job  = Job.GetInstance(conf, "word median");

            job.SetJarByClass(typeof(WordMedian));
            job.SetMapperClass(typeof(WordMedian.WordMedianMapper));
            job.SetCombinerClass(typeof(WordMedian.WordMedianReducer));
            job.SetReducerClass(typeof(WordMedian.WordMedianReducer));
            job.SetOutputKeyClass(typeof(IntWritable));
            job.SetOutputValueClass(typeof(IntWritable));
            FileInputFormat.AddInputPath(job, new Path(args[0]));
            FileOutputFormat.SetOutputPath(job, new Path(args[1]));
            bool result = job.WaitForCompletion(true);
            // Wait for JOB 1 -- get middle value to check for Median
            long totalWords = job.GetCounters().GetGroup(typeof(TaskCounter).GetCanonicalName
                                                             ()).FindCounter("MAP_OUTPUT_RECORDS", "Map output records").GetValue();
            int medianIndex1 = (int)Math.Ceil((totalWords / 2.0));
            int medianIndex2 = (int)Math.Floor((totalWords / 2.0));

            median = ReadAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
            return(result ? 0 : 1);
        }
示例#12
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        internal static bool RunJob(JobConf conf, Path inDir, Path outDir, int numMaps, int
                                    numReds)
        {
            FileSystem fs = FileSystem.Get(conf);

            if (fs.Exists(outDir))
            {
                fs.Delete(outDir, true);
            }
            if (!fs.Exists(inDir))
            {
                fs.Mkdirs(inDir);
            }
            string input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n";

            for (int i = 0; i < numMaps; ++i)
            {
                DataOutputStream file = fs.Create(new Path(inDir, "part-" + i));
                file.WriteBytes(input);
                file.Close();
            }
            DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf, fs);
            conf.SetOutputCommitter(typeof(CustomOutputCommitter));
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReds);
            JobClient  jobClient = new JobClient(conf);
            RunningJob job       = jobClient.SubmitJob(conf);

            return(jobClient.MonitorAndPrintJob(conf, job));
        }
示例#13
0
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Configuration conf = new Configuration();

            string[] otherArgs = new GenericOptionsParser(conf, args).GetRemainingArgs();
            if (otherArgs.Length < 2)
            {
                System.Console.Error.WriteLine("Usage: wordcount <in> [<in>...] <out>");
                System.Environment.Exit(2);
            }
            Job job = Job.GetInstance(conf, "word count");

            job.SetJarByClass(typeof(WordCount));
            job.SetMapperClass(typeof(WordCount.TokenizerMapper));
            job.SetCombinerClass(typeof(WordCount.IntSumReducer));
            job.SetReducerClass(typeof(WordCount.IntSumReducer));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(IntWritable));
            for (int i = 0; i < otherArgs.Length - 1; ++i)
            {
                FileInputFormat.AddInputPath(job, new Path(otherArgs[i]));
            }
            FileOutputFormat.SetOutputPath(job, new Path(otherArgs[otherArgs.Length - 1]));
            System.Environment.Exit(job.WaitForCompletion(true) ? 0 : 1);
        }
示例#14
0
        /// <exception cref="System.IO.IOException"/>
        internal static long ReadBench(JobConf conf)
        {
            // InputFormat instantiation
            InputFormat  inf = conf.GetInputFormat();
            string       fn  = conf.Get("test.filebench.name", string.Empty);
            Path         pin = new Path(FileInputFormat.GetInputPaths(conf)[0], fn);
            FileStatus   @in = pin.GetFileSystem(conf).GetFileStatus(pin);
            RecordReader rr  = inf.GetRecordReader(new FileSplit(pin, 0, @in.GetLen(), (string
                                                                                        [])null), conf, Reporter.Null);

            try
            {
                object   key   = rr.CreateKey();
                object   val   = rr.CreateValue();
                DateTime start = new DateTime();
                while (rr.Next(key, val))
                {
                }
                DateTime end = new DateTime();
                return(end.GetTime() - start.GetTime());
            }
            finally
            {
                rr.Close();
            }
        }
示例#15
0
            // no check for ll.isEmpty() to permit extension
            private JobConf GetConf(JobConf job)
            {
                JobConf conf = new JobConf(job);

                FileInputFormat.SetInputPaths(conf, indir);
                conf.SetClassLoader(job.GetClassLoader());
                return(conf);
            }
        /// <exception cref="System.IO.IOException"/>
        protected internal CombineFileRecordReaderWrapper(FileInputFormat <K, V> inputFormat
                                                          , CombineFileSplit split, Configuration conf, Reporter reporter, int idx)
        {
            FileSplit fileSplit = new FileSplit(split.GetPath(idx), split.GetOffset(idx), split
                                                .GetLength(idx), split.GetLocations());

            delegate_ = inputFormat.GetRecordReader(fileSplit, (JobConf)conf, reporter);
        }
示例#17
0
        /// <exception cref="System.IO.IOException"/>
        public virtual InputSplit[] GetSplits(JobConf conf, int numSplits)
        {
            JobConf            confCopy = new JobConf(conf);
            IList <InputSplit> splits   = new AList <InputSplit>();
            IDictionary <Path, InputFormat>   formatMap   = MultipleInputs.GetInputFormatMap(conf);
            IDictionary <Path, Type>          mapperMap   = MultipleInputs.GetMapperTypeMap(conf);
            IDictionary <Type, IList <Path> > formatPaths = new Dictionary <Type, IList <Path> >();

            // First, build a map of InputFormats to Paths
            foreach (KeyValuePair <Path, InputFormat> entry in formatMap)
            {
                if (!formatPaths.Contains(entry.Value.GetType()))
                {
                    formatPaths[entry.Value.GetType()] = new List <Path>();
                }
                formatPaths[entry.Value.GetType()].AddItem(entry.Key);
            }
            foreach (KeyValuePair <Type, IList <Path> > formatEntry in formatPaths)
            {
                Type         formatClass = formatEntry.Key;
                InputFormat  format      = (InputFormat)ReflectionUtils.NewInstance(formatClass, conf);
                IList <Path> paths       = formatEntry.Value;
                IDictionary <Type, IList <Path> > mapperPaths = new Dictionary <Type, IList <Path> >();
                // Now, for each set of paths that have a common InputFormat, build
                // a map of Mappers to the paths they're used for
                foreach (Path path in paths)
                {
                    Type mapperClass = mapperMap[path];
                    if (!mapperPaths.Contains(mapperClass))
                    {
                        mapperPaths[mapperClass] = new List <Path>();
                    }
                    mapperPaths[mapperClass].AddItem(path);
                }
                // Now each set of paths that has a common InputFormat and Mapper can
                // be added to the same job, and split together.
                foreach (KeyValuePair <Type, IList <Path> > mapEntry in mapperPaths)
                {
                    paths = mapEntry.Value;
                    Type mapperClass = mapEntry.Key;
                    if (mapperClass == null)
                    {
                        mapperClass = conf.GetMapperClass();
                    }
                    FileInputFormat.SetInputPaths(confCopy, Sharpen.Collections.ToArray(paths, new Path
                                                                                        [paths.Count]));
                    // Get splits for each input path and tag with InputFormat
                    // and Mapper types by wrapping in a TaggedInputSplit.
                    InputSplit[] pathSplits = format.GetSplits(confCopy, numSplits);
                    foreach (InputSplit pathSplit in pathSplits)
                    {
                        splits.AddItem(new TaggedInputSplit(pathSplit, conf, format.GetType(), mapperClass
                                                            ));
                    }
                }
            }
            return(Sharpen.Collections.ToArray(splits, new InputSplit[splits.Count]));
        }
示例#18
0
        /// <summary>
        /// Run a test with several mappers in parallel, operating at different
        /// speeds.
        /// </summary>
        /// <remarks>
        /// Run a test with several mappers in parallel, operating at different
        /// speeds. Verify that the correct amount of output is created.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestMultiMaps()
        {
            Job           job        = Job.GetInstance();
            Path          inputPath  = CreateMultiMapsInput();
            Path          outputPath = GetOutputPath();
            Configuration conf       = new Configuration();
            FileSystem    fs         = FileSystem.GetLocal(conf);

            if (fs.Exists(outputPath))
            {
                fs.Delete(outputPath, true);
            }
            job.SetMapperClass(typeof(TestLocalRunner.StressMapper));
            job.SetReducerClass(typeof(TestLocalRunner.CountingReducer));
            job.SetNumReduceTasks(1);
            LocalJobRunner.SetLocalMaxRunningMaps(job, 6);
            job.GetConfiguration().Set(MRJobConfig.IoSortMb, "25");
            FileInputFormat.AddInputPath(job, inputPath);
            FileOutputFormat.SetOutputPath(job, outputPath);
            Sharpen.Thread toInterrupt = Sharpen.Thread.CurrentThread();
            Sharpen.Thread interrupter = new _Thread_311(toInterrupt);
            // 2m
            Log.Info("Submitting job...");
            job.Submit();
            Log.Info("Starting thread to interrupt main thread in 2 minutes");
            interrupter.Start();
            Log.Info("Waiting for job to complete...");
            try
            {
                job.WaitForCompletion(true);
            }
            catch (Exception ie)
            {
                Log.Fatal("Interrupted while waiting for job completion", ie);
                for (int i = 0; i < 10; i++)
                {
                    Log.Fatal("Dumping stacks");
                    ReflectionUtils.LogThreadInfo(Log, "multimap threads", 0);
                    Sharpen.Thread.Sleep(1000);
                }
                throw;
            }
            Log.Info("Job completed, stopping interrupter");
            interrupter.Interrupt();
            try
            {
                interrupter.Join();
            }
            catch (Exception)
            {
            }
            // it might interrupt us right as we interrupt it
            Log.Info("Verifying output");
            VerifyOutput(outputPath);
        }
示例#19
0
        /* Extracts matching regexs from input files and counts them. */
        // singleton
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            if (args.Length < 3)
            {
                System.Console.Out.WriteLine("Grep <inDir> <outDir> <regex> [<group>]");
                ToolRunner.PrintGenericCommandUsage(System.Console.Out);
                return(2);
            }
            Path tempDir = new Path("grep-temp-" + Sharpen.Extensions.ToString(new Random().Next
                                                                                   (int.MaxValue)));
            Configuration conf = GetConf();

            conf.Set(RegexMapper.Pattern, args[2]);
            if (args.Length == 4)
            {
                conf.Set(RegexMapper.Group, args[3]);
            }
            Job grepJob = Job.GetInstance(conf);

            try
            {
                grepJob.SetJobName("grep-search");
                grepJob.SetJarByClass(typeof(Org.Apache.Hadoop.Examples.Grep));
                FileInputFormat.SetInputPaths(grepJob, args[0]);
                grepJob.SetMapperClass(typeof(RegexMapper));
                grepJob.SetCombinerClass(typeof(LongSumReducer));
                grepJob.SetReducerClass(typeof(LongSumReducer));
                FileOutputFormat.SetOutputPath(grepJob, tempDir);
                grepJob.SetOutputFormatClass(typeof(SequenceFileOutputFormat));
                grepJob.SetOutputKeyClass(typeof(Text));
                grepJob.SetOutputValueClass(typeof(LongWritable));
                grepJob.WaitForCompletion(true);
                Job sortJob = Job.GetInstance(conf);
                sortJob.SetJobName("grep-sort");
                sortJob.SetJarByClass(typeof(Org.Apache.Hadoop.Examples.Grep));
                FileInputFormat.SetInputPaths(sortJob, tempDir);
                sortJob.SetInputFormatClass(typeof(SequenceFileInputFormat));
                sortJob.SetMapperClass(typeof(InverseMapper));
                sortJob.SetNumReduceTasks(1);
                // write a single file
                FileOutputFormat.SetOutputPath(sortJob, new Path(args[1]));
                sortJob.SetSortComparatorClass(typeof(LongWritable.DecreasingComparator));
                // sort by decreasing freq
                sortJob.WaitForCompletion(true);
            }
            finally
            {
                FileSystem.Get(conf).Delete(tempDir, true);
            }
            return(0);
        }
示例#20
0
        /// <exception cref="System.Exception"/>
        private void RunDistributedFSCheck()
        {
            JobConf job = new JobConf(fs.GetConf(), typeof(DistributedFSCheck));

            FileInputFormat.SetInputPaths(job, MapInputDir);
            job.SetInputFormat(typeof(SequenceFileInputFormat));
            job.SetMapperClass(typeof(DistributedFSCheck.DistributedFSCheckMapper));
            job.SetReducerClass(typeof(AccumulatingReducer));
            FileOutputFormat.SetOutputPath(job, ReadDir);
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(Text));
            job.SetNumReduceTasks(1);
            JobClient.RunJob(job);
        }
示例#21
0
        /// <exception cref="System.IO.IOException"/>
        private static void RunIOTest(Type mapperClass, Path outputDir)
        {
            JobConf job = new JobConf(fsConfig, typeof(DFSCIOTest));

            FileInputFormat.SetInputPaths(job, ControlDir);
            job.SetInputFormat(typeof(SequenceFileInputFormat));
            job.SetMapperClass(mapperClass);
            job.SetReducerClass(typeof(AccumulatingReducer));
            FileOutputFormat.SetOutputPath(job, outputDir);
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(Text));
            job.SetNumReduceTasks(1);
            JobClient.RunJob(job);
        }
示例#22
0
        /// <exception cref="System.Exception"/>
        public virtual void _testDistributedCache(string jobJarPath)
        {
            if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists())
            {
                Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test."
                         );
                return;
            }
            // Create a temporary file of length 1.
            Path first = CreateTempFile("distributed.first", "x");
            // Create two jars with a single file inside them.
            Path second = MakeJar(new Path(TestRootDir, "distributed.second.jar"), 2);
            Path third  = MakeJar(new Path(TestRootDir, "distributed.third.jar"), 3);
            Path fourth = MakeJar(new Path(TestRootDir, "distributed.fourth.jar"), 4);
            Job  job    = Job.GetInstance(mrCluster.GetConfig());

            // Set the job jar to a new "dummy" jar so we can check that its extracted
            // properly
            job.SetJar(jobJarPath);
            // Because the job jar is a "dummy" jar, we need to include the jar with
            // DistributedCacheChecker or it won't be able to find it
            Path distributedCacheCheckerJar = new Path(JarFinder.GetJar(typeof(TestMRJobs.DistributedCacheChecker
                                                                               )));

            job.AddFileToClassPath(distributedCacheCheckerJar.MakeQualified(localFs.GetUri(),
                                                                            distributedCacheCheckerJar.GetParent()));
            job.SetMapperClass(typeof(TestMRJobs.DistributedCacheChecker));
            job.SetOutputFormatClass(typeof(NullOutputFormat));
            FileInputFormat.SetInputPaths(job, first);
            // Creates the Job Configuration
            job.AddCacheFile(new URI(first.ToUri().ToString() + "#distributed.first.symlink")
                             );
            job.AddFileToClassPath(second);
            // The AppMaster jar itself
            job.AddFileToClassPath(AppJar.MakeQualified(localFs.GetUri(), AppJar.GetParent())
                                   );
            job.AddArchiveToClassPath(third);
            job.AddCacheArchive(fourth.ToUri());
            job.SetMaxMapAttempts(1);
            // speed up failures
            job.Submit();
            string trackingUrl = job.GetTrackingURL();
            string jobId       = job.GetJobID().ToString();

            NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(false));
            NUnit.Framework.Assert.IsTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID "
                                          + jobId, trackingUrl.EndsWith(Sharpen.Runtime.Substring(jobId, jobId.LastIndexOf
                                                                                                      ("_")) + "/"));
        }
示例#23
0
        /// <summary>Creates a simple fail job.</summary>
        /// <param name="conf">Configuration object</param>
        /// <param name="outdir">Output directory.</param>
        /// <param name="indirs">Comma separated input directories.</param>
        /// <returns>Job initialized for a simple kill job.</returns>
        /// <exception cref="System.Exception">If an error occurs creating job configuration.
        ///     </exception>
        public static Job CreateKillJob(Configuration conf, Path outdir, params Path[] indirs
                                        )
        {
            Job theJob = Job.GetInstance(conf);

            theJob.SetJobName("Kill-Job");
            FileInputFormat.SetInputPaths(theJob, indirs);
            theJob.SetMapperClass(typeof(MapReduceTestUtil.KillMapper));
            theJob.SetReducerClass(typeof(Reducer));
            theJob.SetNumReduceTasks(0);
            FileOutputFormat.SetOutputPath(theJob, outdir);
            theJob.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text));
            theJob.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            return(theJob);
        }
示例#24
0
        /// <exception cref="System.Exception"/>
        public virtual void TestNoDefaults()
        {
            JobConf configuration = new JobConf();

            NUnit.Framework.Assert.IsTrue(configuration.Get("hadoop.tmp.dir", null) != null);
            configuration = new JobConf(false);
            NUnit.Framework.Assert.IsTrue(configuration.Get("hadoop.tmp.dir", null) == null);
            Path         inDir  = new Path("testing/jobconf/input");
            Path         outDir = new Path("testing/jobconf/output");
            OutputStream os     = GetFileSystem().Create(new Path(inDir, "text.txt"));
            TextWriter   wr     = new OutputStreamWriter(os);

            wr.Write("hello\n");
            wr.Write("hello\n");
            wr.Close();
            JobConf conf = new JobConf(false);

            conf.Set("fs.defaultFS", CreateJobConf().Get("fs.defaultFS"));
            conf.SetJobName("mr");
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetMapOutputKeyClass(typeof(LongWritable));
            conf.SetMapOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapperClass(typeof(IdentityMapper));
            conf.SetReducerClass(typeof(IdentityReducer));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            JobClient.RunJob(conf);
            Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter
                                                                                    ()));
            if (outputFiles.Length > 0)
            {
                InputStream    @is     = GetFileSystem().Open(outputFiles[0]);
                BufferedReader reader  = new BufferedReader(new InputStreamReader(@is));
                string         line    = reader.ReadLine();
                int            counter = 0;
                while (line != null)
                {
                    counter++;
                    NUnit.Framework.Assert.IsTrue(line.Contains("hello"));
                    line = reader.ReadLine();
                }
                reader.Close();
                NUnit.Framework.Assert.AreEqual(2, counter);
            }
        }
示例#25
0
        /// <summary>Creates a simple copy job.</summary>
        /// <param name="conf">Configuration object</param>
        /// <param name="outdir">Output directory.</param>
        /// <param name="indirs">Comma separated input directories.</param>
        /// <returns>Job initialized for a data copy job.</returns>
        /// <exception cref="System.Exception">If an error occurs creating job configuration.
        ///     </exception>
        public static Job CreateCopyJob(Configuration conf, Path outdir, params Path[] indirs
                                        )
        {
            conf.SetInt(MRJobConfig.NumMaps, 3);
            Job theJob = Job.GetInstance(conf);

            theJob.SetJobName("DataMoveJob");
            FileInputFormat.SetInputPaths(theJob, indirs);
            theJob.SetMapperClass(typeof(MapReduceTestUtil.DataCopyMapper));
            FileOutputFormat.SetOutputPath(theJob, outdir);
            theJob.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text));
            theJob.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            theJob.SetReducerClass(typeof(MapReduceTestUtil.DataCopyReducer));
            theJob.SetNumReduceTasks(1);
            return(theJob);
        }
示例#26
0
        /// <exception cref="System.Exception"/>
        public static void SeekTest(FileSystem fs, bool fastCheck)
        {
            fs.Delete(ReadDir, true);
            JobConf job = new JobConf(conf, typeof(TestFileSystem));

            job.SetBoolean("fs.test.fastCheck", fastCheck);
            FileInputFormat.SetInputPaths(job, ControlDir);
            job.SetInputFormat(typeof(SequenceFileInputFormat));
            job.SetMapperClass(typeof(TestFileSystem.SeekMapper));
            job.SetReducerClass(typeof(LongSumReducer));
            FileOutputFormat.SetOutputPath(job, ReadDir);
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(LongWritable));
            job.SetNumReduceTasks(1);
            JobClient.RunJob(job);
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void RunTest(SequenceFile.CompressionType compressionType)
        {
            JobConf    job     = new JobConf();
            FileSystem fs      = FileSystem.GetLocal(job);
            Path       dir     = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred");
            Path       file    = new Path(dir, "test.seq");
            Path       tempDir = new Path(dir, "tmp");

            fs.Delete(dir, true);
            FileInputFormat.SetInputPaths(job, dir);
            fs.Mkdirs(tempDir);
            LongWritable tkey = new LongWritable();
            Text         tval = new Text();

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, job, file, typeof(LongWritable
                                                                                         ), typeof(Text), compressionType, new DefaultCodec());
            try
            {
                for (int i = 0; i < Records; ++i)
                {
                    tkey.Set(1234);
                    tval.Set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue");
                    writer.Append(tkey, tval);
                }
            }
            finally
            {
                writer.Close();
            }
            long fileLength = fs.GetFileStatus(file).GetLen();

            Log.Info("With compression = " + compressionType + ": " + "compressed length = "
                     + fileLength);
            SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.GetOutputKeyComparator
                                                                     (), job.GetMapOutputKeyClass(), job.GetMapOutputValueClass(), job);
            Path[] paths = new Path[] { file };
            SequenceFile.Sorter.RawKeyValueIterator rIter = sorter.Merge(paths, tempDir, false
                                                                         );
            int count = 0;

            while (rIter.Next())
            {
                count++;
            }
            NUnit.Framework.Assert.AreEqual(Records, count);
            NUnit.Framework.Assert.AreEqual(1.0f, rIter.GetProgress().Get());
        }
示例#28
0
        /// <summary>Creates a simple copy job.</summary>
        /// <param name="indirs">List of input directories.</param>
        /// <param name="outdir">Output directory.</param>
        /// <returns>JobConf initialised for a simple copy job.</returns>
        /// <exception cref="System.Exception">If an error occurs creating job configuration.
        ///     </exception>
        internal static JobConf CreateCopyJob(IList <Path> indirs, Path outdir)
        {
            Configuration defaults = new Configuration();
            JobConf       theJob   = new JobConf(defaults, typeof(TestJobControl));

            theJob.SetJobName("DataMoveJob");
            FileInputFormat.SetInputPaths(theJob, Sharpen.Collections.ToArray(indirs, new Path
                                                                              [0]));
            theJob.SetMapperClass(typeof(JobControlTestUtils.DataCopy));
            FileOutputFormat.SetOutputPath(theJob, outdir);
            theJob.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text));
            theJob.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            theJob.SetReducerClass(typeof(JobControlTestUtils.DataCopy));
            theJob.SetNumMapTasks(12);
            theJob.SetNumReduceTasks(4);
            return(theJob);
        }
示例#29
0
        /// <summary>Run the test</summary>
        /// <exception cref="System.IO.IOException">on error</exception>
        public static void RunTests()
        {
            config.SetLong("io.bytes.per.checksum", bytesPerChecksum);
            JobConf job = new JobConf(config, typeof(NNBench));

            job.SetJobName("NNBench-" + operation);
            FileInputFormat.SetInputPaths(job, new Path(baseDir, ControlDirName));
            job.SetInputFormat(typeof(SequenceFileInputFormat));
            // Explicitly set number of max map attempts to 1.
            job.SetMaxMapAttempts(1);
            // Explicitly turn off speculative execution
            job.SetSpeculativeExecution(false);
            job.SetMapperClass(typeof(NNBench.NNBenchMapper));
            job.SetReducerClass(typeof(NNBench.NNBenchReducer));
            FileOutputFormat.SetOutputPath(job, new Path(baseDir, OutputDirName));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(Text));
            job.SetNumReduceTasks((int)numberOfReduces);
            JobClient.RunJob(job);
        }
示例#30
0
        /// <exception cref="System.IO.IOException"/>
        public virtual Job CreateJob(bool failMappers, bool failReducers, Path inputFile)
        {
            Configuration conf = GetConf();

            conf.SetBoolean(FailMap, failMappers);
            conf.SetBoolean(FailReduce, failReducers);
            Job job = Job.GetInstance(conf, "fail");

            job.SetJarByClass(typeof(FailJob));
            job.SetMapperClass(typeof(FailJob.FailMapper));
            job.SetMapOutputKeyClass(typeof(LongWritable));
            job.SetMapOutputValueClass(typeof(NullWritable));
            job.SetReducerClass(typeof(FailJob.FailReducer));
            job.SetOutputFormatClass(typeof(NullOutputFormat));
            job.SetInputFormatClass(typeof(TextInputFormat));
            job.SetSpeculativeExecution(false);
            job.SetJobName("Fail job");
            FileInputFormat.AddInputPath(job, inputFile);
            return(job);
        }