Example #1
0
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Configuration conf = new Configuration();

            string[] otherArgs = new GenericOptionsParser(conf, args).GetRemainingArgs();
            if (otherArgs.Length != 2)
            {
                System.Console.Error.WriteLine("Usage: secondarysort <in> <out>");
                System.Environment.Exit(2);
            }
            Job job = Job.GetInstance(conf, "secondary sort");

            job.SetJarByClass(typeof(SecondarySort));
            job.SetMapperClass(typeof(SecondarySort.MapClass));
            job.SetReducerClass(typeof(SecondarySort.Reduce));
            // group and partition by the first int in the pair
            job.SetPartitionerClass(typeof(SecondarySort.FirstPartitioner));
            job.SetGroupingComparatorClass(typeof(SecondarySort.FirstGroupingComparator));
            // the map output is IntPair, IntWritable
            job.SetMapOutputKeyClass(typeof(SecondarySort.IntPair));
            job.SetMapOutputValueClass(typeof(IntWritable));
            // the reduce output is Text, IntWritable
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(IntWritable));
            FileInputFormat.AddInputPath(job, new Path(otherArgs[0]));
            FileOutputFormat.SetOutputPath(job, new Path(otherArgs[1]));
            System.Environment.Exit(job.WaitForCompletion(true) ? 0 : 1);
        }
Example #2
0
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Configuration conf = new Configuration();

            string[] otherArgs = new GenericOptionsParser(conf, args).GetRemainingArgs();
            if (otherArgs.Length < 2)
            {
                System.Console.Error.WriteLine("Usage: wordcount <in> [<in>...] <out>");
                System.Environment.Exit(2);
            }
            Job job = Job.GetInstance(conf, "word count");

            job.SetJarByClass(typeof(WordCount));
            job.SetMapperClass(typeof(WordCount.TokenizerMapper));
            job.SetCombinerClass(typeof(WordCount.IntSumReducer));
            job.SetReducerClass(typeof(WordCount.IntSumReducer));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(IntWritable));
            for (int i = 0; i < otherArgs.Length - 1; ++i)
            {
                FileInputFormat.AddInputPath(job, new Path(otherArgs[i]));
            }
            FileOutputFormat.SetOutputPath(job, new Path(otherArgs[otherArgs.Length - 1]));
            System.Environment.Exit(job.WaitForCompletion(true) ? 0 : 1);
        }
Example #3
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            if (args.Length != 2)
            {
                System.Console.Error.WriteLine("Usage: wordmean <in> <out>");
                return(0);
            }
            Configuration conf = GetConf();
            Job           job  = Job.GetInstance(conf, "word mean");

            job.SetJarByClass(typeof(WordMean));
            job.SetMapperClass(typeof(WordMean.WordMeanMapper));
            job.SetCombinerClass(typeof(WordMean.WordMeanReducer));
            job.SetReducerClass(typeof(WordMean.WordMeanReducer));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(LongWritable));
            FileInputFormat.AddInputPath(job, new Path(args[0]));
            Path outputpath = new Path(args[1]);

            FileOutputFormat.SetOutputPath(job, outputpath);
            bool result = job.WaitForCompletion(true);

            mean = ReadAndCalcMean(outputpath, conf);
            return(result ? 0 : 1);
        }
        /// <summary>Run a map/reduce job to compute Pi.</summary>
        /// <exception cref="System.IO.IOException"/>
        private static void Compute(int startDigit, int nDigits, int nMaps, string workingDir
                                    , Configuration conf, TextWriter @out)
        {
            string name = startDigit + "_" + nDigits;

            //setup wroking directory
            @out.WriteLine("Working Directory = " + workingDir);
            @out.WriteLine();
            FileSystem fs  = FileSystem.Get(conf);
            Path       dir = fs.MakeQualified(new Path(workingDir));

            if (fs.Exists(dir))
            {
                throw new IOException("Working directory " + dir + " already exists.  Please remove it first."
                                      );
            }
            else
            {
                if (!fs.Mkdirs(dir))
                {
                    throw new IOException("Cannot create working directory " + dir);
                }
            }
            @out.WriteLine("Start Digit      = " + startDigit);
            @out.WriteLine("Number of Digits = " + nDigits);
            @out.WriteLine("Number of Maps   = " + nMaps);
            // setup a job
            Job  job     = CreateJob(name, conf);
            Path hexfile = new Path(dir, "pi_" + name + ".hex");

            FileOutputFormat.SetOutputPath(job, new Path(dir, "out"));
            // setup custom properties
            job.GetConfiguration().Set(WorkingDirProperty, dir.ToString());
            job.GetConfiguration().Set(HexFileProperty, hexfile.ToString());
            job.GetConfiguration().SetInt(DigitStartProperty, startDigit);
            job.GetConfiguration().SetInt(DigitSizeProperty, nDigits);
            job.GetConfiguration().SetInt(DigitPartsProperty, nMaps);
            // start a map/reduce job
            @out.WriteLine("\nStarting Job ...");
            long startTime = Runtime.CurrentTimeMillis();

            try
            {
                if (!job.WaitForCompletion(true))
                {
                    @out.WriteLine("Job failed.");
                    System.Environment.Exit(1);
                }
            }
            catch (Exception e)
            {
                throw new RuntimeException(e);
            }
            finally
            {
                double duration = (Runtime.CurrentTimeMillis() - startTime) / 1000.0;
                @out.WriteLine("Duration is " + duration + " seconds.");
            }
            @out.WriteLine("Output file: " + hexfile);
        }
Example #5
0
        public virtual void TestCombinerShouldUpdateTheReporter()
        {
            JobConf conf    = new JobConf(mrCluster.GetConfig());
            int     numMaps = 5;
            int     numReds = 2;
            Path    @in     = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-in"
                                       );
            Path @out = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-out"
                                 );

            CreateInputOutPutFolder(@in, @out, numMaps);
            conf.SetJobName("test-job-with-combiner");
            conf.SetMapperClass(typeof(IdentityMapper));
            conf.SetCombinerClass(typeof(TestMRAppWithCombiner.MyCombinerToCheckReporter));
            //conf.setJarByClass(MyCombinerToCheckReporter.class);
            conf.SetReducerClass(typeof(IdentityReducer));
            DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf);
            conf.SetOutputCommitter(typeof(CustomOutputCommitter));
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            FileInputFormat.SetInputPaths(conf, @in);
            FileOutputFormat.SetOutputPath(conf, @out);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReds);
            RunJob(conf);
        }
Example #6
0
        /// <exception cref="System.IO.IOException"/>
        public static Job CreateJob(Configuration conf, Path inDir, Path outDir, int numInputFiles
                                    , int numReds, string input)
        {
            Job        job = Job.GetInstance(conf);
            FileSystem fs  = FileSystem.Get(conf);

            if (fs.Exists(outDir))
            {
                fs.Delete(outDir, true);
            }
            if (fs.Exists(inDir))
            {
                fs.Delete(inDir, true);
            }
            fs.Mkdirs(inDir);
            for (int i = 0; i < numInputFiles; ++i)
            {
                DataOutputStream file = fs.Create(new Path(inDir, "part-" + i));
                file.WriteBytes(input);
                file.Close();
            }
            FileInputFormat.SetInputPaths(job, inDir);
            FileOutputFormat.SetOutputPath(job, outDir);
            job.SetNumReduceTasks(numReds);
            return(job);
        }
Example #7
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            if (args.Length != 2)
            {
                System.Console.Error.WriteLine("Usage: wordmedian <in> <out>");
                return(0);
            }
            SetConf(new Configuration());
            Configuration conf = GetConf();
            Job           job  = Job.GetInstance(conf, "word median");

            job.SetJarByClass(typeof(WordMedian));
            job.SetMapperClass(typeof(WordMedian.WordMedianMapper));
            job.SetCombinerClass(typeof(WordMedian.WordMedianReducer));
            job.SetReducerClass(typeof(WordMedian.WordMedianReducer));
            job.SetOutputKeyClass(typeof(IntWritable));
            job.SetOutputValueClass(typeof(IntWritable));
            FileInputFormat.AddInputPath(job, new Path(args[0]));
            FileOutputFormat.SetOutputPath(job, new Path(args[1]));
            bool result = job.WaitForCompletion(true);
            // Wait for JOB 1 -- get middle value to check for Median
            long totalWords = job.GetCounters().GetGroup(typeof(TaskCounter).GetCanonicalName
                                                             ()).FindCounter("MAP_OUTPUT_RECORDS", "Map output records").GetValue();
            int medianIndex1 = (int)Math.Ceil((totalWords / 2.0));
            int medianIndex2 = (int)Math.Floor((totalWords / 2.0));

            median = ReadAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
            return(result ? 0 : 1);
        }
Example #8
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        internal static bool RunJob(JobConf conf, Path inDir, Path outDir, int numMaps, int
                                    numReds)
        {
            FileSystem fs = FileSystem.Get(conf);

            if (fs.Exists(outDir))
            {
                fs.Delete(outDir, true);
            }
            if (!fs.Exists(inDir))
            {
                fs.Mkdirs(inDir);
            }
            string input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n";

            for (int i = 0; i < numMaps; ++i)
            {
                DataOutputStream file = fs.Create(new Path(inDir, "part-" + i));
                file.WriteBytes(input);
                file.Close();
            }
            DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf, fs);
            conf.SetOutputCommitter(typeof(CustomOutputCommitter));
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReds);
            JobClient  jobClient = new JobClient(conf);
            RunningJob job       = jobClient.SubmitJob(conf);

            return(jobClient.MonitorAndPrintJob(conf, job));
        }
        public virtual void TestJobSuccessCleanup()
        {
            Reset();
            Job           job        = Job.GetInstance();
            Path          inputPath  = CreateInput();
            Path          outputPath = GetOutputPath();
            Configuration conf       = new Configuration();
            FileSystem    fs         = FileSystem.GetLocal(conf);

            if (fs.Exists(outputPath))
            {
                fs.Delete(outputPath, true);
            }
            job.SetMapperClass(typeof(TestMapperReducerCleanup.TrackingTokenizerMapper));
            job.SetReducerClass(typeof(TestMapperReducerCleanup.TrackingIntSumReducer));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(IntWritable));
            job.SetInputFormatClass(typeof(TestMapperReducerCleanup.TrackingTextInputFormat));
            job.SetOutputFormatClass(typeof(TestMapperReducerCleanup.TrackingTextOutputFormat
                                            ));
            job.SetNumReduceTasks(1);
            FileInputFormat.AddInputPath(job, inputPath);
            FileOutputFormat.SetOutputPath(job, outputPath);
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue(mapCleanup);
            NUnit.Framework.Assert.IsTrue(reduceCleanup);
            NUnit.Framework.Assert.IsTrue(recordReaderCleanup);
            NUnit.Framework.Assert.IsTrue(recordWriterCleanup);
        }
        private static void AppendHeader(IReadOnlyList <ValidationLog> logs, FileOutputFormat outputFormat)
        {
            switch (outputFormat)
            {
            case FileOutputFormat.Html:
                _stringBuilder.Append(HtmlDocAndHeaderStart);
                _stringBuilder.AppendLine(string.Format(HtmlHeaderInnerFormat, CreateHtmlStyles(logs)));
                _stringBuilder.AppendLine(string.Format(HtmlHeaderEndFormat, DateTime.Now));
                _stringBuilder.AppendLine(HtmlScript);

                foreach (var kvp in _validatorToHtmlStyle)
                {
                    _stringBuilder.AppendLine(HtmlDivStart);
                    _stringBuilder.Append(string.Format(HtmlDivInnerFormat, kvp.Key, kvp.Key.ToLowerInvariant()));
                    _stringBuilder.Append(HtmlDivEnd);
                }

                _stringBuilder.AppendLine(HtmlTableStartAndHeader);
                break;

            case FileOutputFormat.Csv:
                _stringBuilder.AppendLine(CsvHeader);
                break;

            case FileOutputFormat.Text:
                _stringBuilder.AppendLine(PlainTextHeader);
                break;

            default:
                var msg = string.Format(EditorConstants.OutputFormatIsInvalid, outputFormat);
                throw new ArgumentOutOfRangeException(msg);
            }
        }
Example #11
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        protected internal virtual Job RunFailingMapperJob()
        {
            Configuration myConf = new Configuration(mrCluster.GetConfig());

            myConf.SetInt(MRJobConfig.NumMaps, 1);
            myConf.SetInt(MRJobConfig.MapMaxAttempts, 2);
            //reduce the number of attempts
            Job job = Job.GetInstance(myConf);

            job.SetJarByClass(typeof(FailingMapper));
            job.SetJobName("failmapper");
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(Text));
            job.SetInputFormatClass(typeof(RandomTextWriterJob.RandomInputFormat));
            job.SetOutputFormatClass(typeof(TextOutputFormat));
            job.SetMapperClass(typeof(FailingMapper));
            job.SetNumReduceTasks(0);
            FileOutputFormat.SetOutputPath(job, new Path(OutputRootDir, "failmapper-output"));
            job.AddFileToClassPath(AppJar);
            // The AppMaster jar itself.
            job.Submit();
            string trackingUrl = job.GetTrackingURL();
            string jobId       = job.GetJobID().ToString();
            bool   succeeded   = job.WaitForCompletion(true);

            NUnit.Framework.Assert.IsFalse(succeeded);
            NUnit.Framework.Assert.IsTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID "
                                          + jobId, trackingUrl.EndsWith(Sharpen.Runtime.Substring(jobId, jobId.LastIndexOf
                                                                                                      ("_")) + "/"));
            return(job);
        }
Example #12
0
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            if (args.Length < 2)
            {
                PrintUsage();
                return(2);
            }
            Job job = Job.GetInstance(GetConf());

            job.SetJobName("MultiFileWordCount");
            job.SetJarByClass(typeof(MultiFileWordCount));
            //set the InputFormat of the job to our InputFormat
            job.SetInputFormatClass(typeof(MultiFileWordCount.MyInputFormat));
            // the keys are words (strings)
            job.SetOutputKeyClass(typeof(Text));
            // the values are counts (ints)
            job.SetOutputValueClass(typeof(IntWritable));
            //use the defined mapper
            job.SetMapperClass(typeof(MultiFileWordCount.MapClass));
            //use the WordCount Reducer
            job.SetCombinerClass(typeof(IntSumReducer));
            job.SetReducerClass(typeof(IntSumReducer));
            FileInputFormat.AddInputPaths(job, args[0]);
            FileOutputFormat.SetOutputPath(job, new Path(args[1]));
            return(job.WaitForCompletion(true) ? 0 : 1);
        }
Example #13
0
        /// <exception cref="System.Exception"/>
        private static void JoinAs(string jointype, Type map, Type reduce)
        {
            int           srcs  = 4;
            Configuration conf  = new Configuration();
            Path          @base = cluster.GetFileSystem().MakeQualified(new Path("/" + jointype));

            Path[] src = WriteSimpleSrc(@base, conf, srcs);
            conf.Set(CompositeInputFormat.JoinExpr, CompositeInputFormat.Compose(jointype, typeof(
                                                                                     SequenceFileInputFormat), src));
            conf.SetInt("testdatamerge.sources", srcs);
            Job job = Job.GetInstance(conf);

            job.SetInputFormatClass(typeof(CompositeInputFormat));
            FileOutputFormat.SetOutputPath(job, new Path(@base, "out"));
            job.SetMapperClass(map);
            job.SetReducerClass(reduce);
            job.SetOutputFormatClass(typeof(SequenceFileOutputFormat));
            job.SetOutputKeyClass(typeof(IntWritable));
            job.SetOutputValueClass(typeof(IntWritable));
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful());
            if ("outer".Equals(jointype))
            {
                CheckOuterConsistency(job, src);
            }
            @base.GetFileSystem(conf).Delete(@base, true);
        }
Example #14
0
 public virtual void TestValueIterReset()
 {
     try
     {
         Configuration conf = new Configuration();
         Job           job  = Job.GetInstance(conf, "TestValueIterReset");
         job.SetJarByClass(typeof(TestValueIterReset));
         job.SetMapperClass(typeof(TestValueIterReset.TestMapper));
         job.SetReducerClass(typeof(TestValueIterReset.TestReducer));
         job.SetNumReduceTasks(NumTests);
         job.SetMapOutputKeyClass(typeof(IntWritable));
         job.SetMapOutputValueClass(typeof(IntWritable));
         job.SetOutputKeyClass(typeof(IntWritable));
         job.SetOutputValueClass(typeof(IntWritable));
         job.GetConfiguration().SetInt(MRJobConfig.ReduceMarkresetBufferSize, 128);
         job.SetInputFormatClass(typeof(TextInputFormat));
         job.SetOutputFormatClass(typeof(TextOutputFormat));
         FileInputFormat.AddInputPath(job, new Path(TestRootDir + "/in"));
         Path output = new Path(TestRootDir + "/out");
         localFs.Delete(output, true);
         FileOutputFormat.SetOutputPath(job, output);
         CreateInput();
         NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(true));
         ValidateOutput();
     }
     catch (Exception e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
         NUnit.Framework.Assert.IsTrue(false);
     }
 }
Example #15
0
        protected override void PreInitializeImpl(IExtractCommand request, IDataLoadEventListener listener)
        {
            if (_request is ExtractGlobalsCommand)
            {
                listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Request is for the extraction of Globals."));
                OutputFile = _request.GetExtractionDirectory().FullName;
                return;
            }

            switch (FlatFileType)
            {
            case ExecuteExtractionToFlatFileType.CSV:
                OutputFile = Path.Combine(DirectoryPopulated.FullName, GetFilename() + ".csv");
                if (request.Configuration != null)
                {
                    _output = new CSVOutputFormat(OutputFile, request.Configuration.Separator, DateFormat);
                }
                else
                {
                    _output = new CSVOutputFormat(OutputFile, ",", DateFormat);
                }
                break;

            default:
                throw new ArgumentOutOfRangeException();
            }

            listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Setup data extraction destination as " + OutputFile + " (will not exist yet)"));
        }
Example #16
0
        /// <summary>Start a job to compute sigma</summary>
        /// <exception cref="System.IO.IOException"/>
        private void Compute(string name, Summation sigma)
        {
            if (sigma.GetValue() != null)
            {
                throw new IOException("sigma.getValue() != null, sigma=" + sigma);
            }
            //setup remote directory
            FileSystem fs  = FileSystem.Get(GetConf());
            Path       dir = fs.MakeQualified(new Path(parameters.remoteDir, name));

            if (!Org.Apache.Hadoop.Examples.PI.Util.CreateNonexistingDirectory(fs, dir))
            {
                return;
            }
            //setup a job
            Job  job    = CreateJob(name, sigma);
            Path outdir = new Path(dir, "out");

            FileOutputFormat.SetOutputPath(job, outdir);
            //start a map/reduce job
            string startmessage = "steps/parts = " + sigma.E.GetSteps() + "/" + parameters.nParts
                                  + " = " + Org.Apache.Hadoop.Examples.PI.Util.Long2string(sigma.E.GetSteps() / parameters
                                                                                           .nParts);

            Org.Apache.Hadoop.Examples.PI.Util.RunJob(name, job, parameters.machine, startmessage
                                                      , timer);
            IList <TaskResult> results = Org.Apache.Hadoop.Examples.PI.Util.ReadJobOutputs(fs,
                                                                                           outdir);

            Org.Apache.Hadoop.Examples.PI.Util.WriteResults(name, results, fs, parameters.remoteDir
                                                            );
            fs.Delete(dir, true);
            //combine results
            IList <TaskResult> combined = Org.Apache.Hadoop.Examples.PI.Util.Combine(results);
            PrintWriter        @out     = Org.Apache.Hadoop.Examples.PI.Util.CreateWriter(parameters.localDir
                                                                                          , name);

            try
            {
                foreach (TaskResult r in combined)
                {
                    string s = TaskResult2string(name, r);
                    @out.WriteLine(s);
                    @out.Flush();
                    [email protected](s);
                }
            }
            finally
            {
                @out.Close();
            }
            if (combined.Count == 1)
            {
                Summation s = combined[0].GetElement();
                if (sigma.Contains(s) && s.Contains(sigma))
                {
                    sigma.SetValue(s.GetValue());
                }
            }
        }
Example #17
0
        /// <exception cref="System.IO.IOException"/>
        private void WriteFile(JobConf conf, string filename)
        {
            System.Console.Out.WriteLine("writing file ----" + filename);
            Path       outputPath = FileOutputFormat.GetOutputPath(conf);
            FileSystem fs         = outputPath.GetFileSystem(conf);

            fs.Create(new Path(outputPath, filename)).Close();
        }
        public static void LaunchWindowWithValidation(
            SceneValidationMode validationMode,
            FileOutputFormat fileOutputFormat)
        {
            var window = GetWindow <AssetValidatorEditorWindow>();

            window.Show();
            window.LaunchValidator(validationMode, fileOutputFormat, false, false, string.Empty);
        }
Example #19
0
        /// <summary>
        /// Run a test with several mappers in parallel, operating at different
        /// speeds.
        /// </summary>
        /// <remarks>
        /// Run a test with several mappers in parallel, operating at different
        /// speeds. Verify that the correct amount of output is created.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestMultiMaps()
        {
            Job           job        = Job.GetInstance();
            Path          inputPath  = CreateMultiMapsInput();
            Path          outputPath = GetOutputPath();
            Configuration conf       = new Configuration();
            FileSystem    fs         = FileSystem.GetLocal(conf);

            if (fs.Exists(outputPath))
            {
                fs.Delete(outputPath, true);
            }
            job.SetMapperClass(typeof(TestLocalRunner.StressMapper));
            job.SetReducerClass(typeof(TestLocalRunner.CountingReducer));
            job.SetNumReduceTasks(1);
            LocalJobRunner.SetLocalMaxRunningMaps(job, 6);
            job.GetConfiguration().Set(MRJobConfig.IoSortMb, "25");
            FileInputFormat.AddInputPath(job, inputPath);
            FileOutputFormat.SetOutputPath(job, outputPath);
            Sharpen.Thread toInterrupt = Sharpen.Thread.CurrentThread();
            Sharpen.Thread interrupter = new _Thread_311(toInterrupt);
            // 2m
            Log.Info("Submitting job...");
            job.Submit();
            Log.Info("Starting thread to interrupt main thread in 2 minutes");
            interrupter.Start();
            Log.Info("Waiting for job to complete...");
            try
            {
                job.WaitForCompletion(true);
            }
            catch (Exception ie)
            {
                Log.Fatal("Interrupted while waiting for job completion", ie);
                for (int i = 0; i < 10; i++)
                {
                    Log.Fatal("Dumping stacks");
                    ReflectionUtils.LogThreadInfo(Log, "multimap threads", 0);
                    Sharpen.Thread.Sleep(1000);
                }
                throw;
            }
            Log.Info("Job completed, stopping interrupter");
            interrupter.Interrupt();
            try
            {
                interrupter.Join();
            }
            catch (Exception)
            {
            }
            // it might interrupt us right as we interrupt it
            Log.Info("Verifying output");
            VerifyOutput(outputPath);
        }
Example #20
0
        /// <summary>This is the main routine for launching a distributed random write job.</summary>
        /// <remarks>
        /// This is the main routine for launching a distributed random write job.
        /// It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
        /// The reduce doesn't do anything.
        /// </remarks>
        /// <exception cref="System.IO.IOException"></exception>
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            if (args.Length == 0)
            {
                System.Console.Out.WriteLine("Usage: writer <out-dir>");
                ToolRunner.PrintGenericCommandUsage(System.Console.Out);
                return(2);
            }
            Path          outDir                = new Path(args[0]);
            Configuration conf                  = GetConf();
            JobClient     client                = new JobClient(conf);
            ClusterStatus cluster               = client.GetClusterStatus();
            int           numMapsPerHost        = conf.GetInt(MapsPerHost, 10);
            long          numBytesToWritePerMap = conf.GetLong(BytesPerMap, 1 * 1024 * 1024 * 1024);

            if (numBytesToWritePerMap == 0)
            {
                System.Console.Error.WriteLine("Cannot have" + BytesPerMap + " set to 0");
                return(-2);
            }
            long totalBytesToWrite = conf.GetLong(TotalBytes, numMapsPerHost * numBytesToWritePerMap
                                                  * cluster.GetTaskTrackers());
            int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);

            if (numMaps == 0 && totalBytesToWrite > 0)
            {
                numMaps = 1;
                conf.SetLong(BytesPerMap, totalBytesToWrite);
            }
            conf.SetInt(MRJobConfig.NumMaps, numMaps);
            Job job = Job.GetInstance(conf);

            job.SetJarByClass(typeof(RandomWriter));
            job.SetJobName("random-writer");
            FileOutputFormat.SetOutputPath(job, outDir);
            job.SetOutputKeyClass(typeof(BytesWritable));
            job.SetOutputValueClass(typeof(BytesWritable));
            job.SetInputFormatClass(typeof(RandomWriter.RandomInputFormat));
            job.SetMapperClass(typeof(RandomWriter.RandomMapper));
            job.SetReducerClass(typeof(Reducer));
            job.SetOutputFormatClass(typeof(SequenceFileOutputFormat));
            System.Console.Out.WriteLine("Running " + numMaps + " maps.");
            // reducer NONE
            job.SetNumReduceTasks(0);
            DateTime startTime = new DateTime();

            System.Console.Out.WriteLine("Job started: " + startTime);
            int      ret     = job.WaitForCompletion(true) ? 0 : 1;
            DateTime endTime = new DateTime();

            System.Console.Out.WriteLine("Job ended: " + endTime);
            System.Console.Out.WriteLine("The job took " + (endTime.GetTime() - startTime.GetTime
                                                                ()) / 1000 + " seconds.");
            return(ret);
        }
Example #21
0
        /// <summary>Creates and runs an MR job</summary>
        /// <param name="conf"/>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        public virtual void CreateAndRunJob(Configuration conf)
        {
            Job job = Job.GetInstance(conf);

            job.SetJarByClass(typeof(TestLineRecordReaderJobs));
            job.SetMapperClass(typeof(Mapper));
            job.SetReducerClass(typeof(Reducer));
            FileInputFormat.AddInputPath(job, inputDir);
            FileOutputFormat.SetOutputPath(job, outputDir);
            job.WaitForCompletion(true);
        }
Example #22
0
            /// <summary>
            /// Generate the requested number of file splits, with the filename
            /// set to the filename of the output file.
            /// </summary>
            /// <exception cref="System.IO.IOException"/>
            public override IList <InputSplit> GetSplits(JobContext job)
            {
                IList <InputSplit> result = new AList <InputSplit>();
                Path outDir    = FileOutputFormat.GetOutputPath(job);
                int  numSplits = job.GetConfiguration().GetInt(MRJobConfig.NumMaps, 1);

                for (int i = 0; i < numSplits; ++i)
                {
                    result.AddItem(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, null));
                }
                return(result);
            }
        /// <summary>
        /// Launch Validator is a one-all fit for being able to run Validation
        /// </summary>
        /// <param name="vMode"></param>
        /// <param name="fileOutputFormat"></param>
        /// <param name="doValidateProjectAssets"></param>
        /// <param name="doValidateAcrossScenes"></param>
        /// <param name="fileName"></param>
        private void LaunchValidator(SceneValidationMode vMode,
                                     FileOutputFormat fileOutputFormat,
                                     bool doValidateProjectAssets,
                                     bool doValidateAcrossScenes,
                                     string fileName)
        {
            _selectedFileOutputFormat = fileOutputFormat;
            _outputFilename           = string.IsNullOrEmpty(fileName)
                                ? EditorConstants.DefaultLogFilename
                                : fileName;

            OnValidateSelectionClick(vMode, doValidateProjectAssets, doValidateAcrossScenes);
        }
 /// <summary>
 /// Runs validation against the project in <see cref="SceneValidationMode"/>
 /// <paramref name="validationMode"/> and writes the log file to a file with the default name.
 /// </summary>
 /// <param name="validationMode">The <see cref="SceneValidationMode"/> the validation is run in.</param>
 /// <param name="fileOutputFormat">The <see cref="FileOutputFormat"/> the file will be written in, if any.</param>
 /// <param name="doValidateProjectAssets">True if project assets should be validated, false if not.</param>
 /// <param name="doValidateAcrossScenes">True if cross-scene validation should be performed.</param>
 /// <returns></returns>
 public static Result RunValidation(
     SceneValidationMode validationMode,
     FileOutputFormat fileOutputFormat,
     bool doValidateProjectAssets,
     bool doValidateAcrossScenes)
 {
     return(RunValidation(
                validationMode,
                fileOutputFormat,
                doValidateProjectAssets,
                doValidateAcrossScenes,
                string.Empty));
 }
Example #25
0
        /// <exception cref="System.Exception"/>
        public virtual void TestDateSplits()
        {
            Statement s         = connection.CreateStatement();
            string    DateTable = "datetable";
            string    Col       = "foo";

            try
            {
                // delete the table if it already exists.
                s.ExecuteUpdate("DROP TABLE " + DateTable);
            }
            catch (SQLException)
            {
            }
            // Create the table.
            s.ExecuteUpdate("CREATE TABLE " + DateTable + "(" + Col + " DATE)");
            s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-04-01')");
            s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-04-02')");
            s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-05-01')");
            s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2011-04-01')");
            // commit this tx.
            connection.Commit();
            Configuration conf = new Configuration();

            conf.Set("fs.defaultFS", "file:///");
            FileSystem fs = FileSystem.GetLocal(conf);

            fs.Delete(new Path(OutDir), true);
            // now do a dd import
            Job job = Job.GetInstance(conf);

            job.SetMapperClass(typeof(TestDataDrivenDBInputFormat.ValMapper));
            job.SetReducerClass(typeof(Reducer));
            job.SetMapOutputKeyClass(typeof(TestDataDrivenDBInputFormat.DateCol));
            job.SetMapOutputValueClass(typeof(NullWritable));
            job.SetOutputKeyClass(typeof(TestDataDrivenDBInputFormat.DateCol));
            job.SetOutputValueClass(typeof(NullWritable));
            job.SetNumReduceTasks(1);
            job.GetConfiguration().SetInt("mapreduce.map.tasks", 2);
            FileOutputFormat.SetOutputPath(job, new Path(OutDir));
            DBConfiguration.ConfigureDB(job.GetConfiguration(), DriverClass, DbUrl, null, null
                                        );
            DataDrivenDBInputFormat.SetInput(job, typeof(TestDataDrivenDBInputFormat.DateCol)
                                             , DateTable, null, Col, Col);
            bool ret = job.WaitForCompletion(true);

            NUnit.Framework.Assert.IsTrue("job failed", ret);
            // Check to see that we imported as much as we thought we did.
            NUnit.Framework.Assert.AreEqual("Did not get all the records", 4, job.GetCounters
                                                ().FindCounter(TaskCounter.ReduceOutputRecords).GetValue());
        }
Example #26
0
        /* Extracts matching regexs from input files and counts them. */
        // singleton
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            if (args.Length < 3)
            {
                System.Console.Out.WriteLine("Grep <inDir> <outDir> <regex> [<group>]");
                ToolRunner.PrintGenericCommandUsage(System.Console.Out);
                return(2);
            }
            Path tempDir = new Path("grep-temp-" + Sharpen.Extensions.ToString(new Random().Next
                                                                                   (int.MaxValue)));
            Configuration conf = GetConf();

            conf.Set(RegexMapper.Pattern, args[2]);
            if (args.Length == 4)
            {
                conf.Set(RegexMapper.Group, args[3]);
            }
            Job grepJob = Job.GetInstance(conf);

            try
            {
                grepJob.SetJobName("grep-search");
                grepJob.SetJarByClass(typeof(Org.Apache.Hadoop.Examples.Grep));
                FileInputFormat.SetInputPaths(grepJob, args[0]);
                grepJob.SetMapperClass(typeof(RegexMapper));
                grepJob.SetCombinerClass(typeof(LongSumReducer));
                grepJob.SetReducerClass(typeof(LongSumReducer));
                FileOutputFormat.SetOutputPath(grepJob, tempDir);
                grepJob.SetOutputFormatClass(typeof(SequenceFileOutputFormat));
                grepJob.SetOutputKeyClass(typeof(Text));
                grepJob.SetOutputValueClass(typeof(LongWritable));
                grepJob.WaitForCompletion(true);
                Job sortJob = Job.GetInstance(conf);
                sortJob.SetJobName("grep-sort");
                sortJob.SetJarByClass(typeof(Org.Apache.Hadoop.Examples.Grep));
                FileInputFormat.SetInputPaths(sortJob, tempDir);
                sortJob.SetInputFormatClass(typeof(SequenceFileInputFormat));
                sortJob.SetMapperClass(typeof(InverseMapper));
                sortJob.SetNumReduceTasks(1);
                // write a single file
                FileOutputFormat.SetOutputPath(sortJob, new Path(args[1]));
                sortJob.SetSortComparatorClass(typeof(LongWritable.DecreasingComparator));
                // sort by decreasing freq
                sortJob.WaitForCompletion(true);
            }
            finally
            {
                FileSystem.Get(conf).Delete(tempDir, true);
            }
            return(0);
        }
Example #27
0
        /// <exception cref="System.Exception"/>
        private void RunDistributedFSCheck()
        {
            JobConf job = new JobConf(fs.GetConf(), typeof(DistributedFSCheck));

            FileInputFormat.SetInputPaths(job, MapInputDir);
            job.SetInputFormat(typeof(SequenceFileInputFormat));
            job.SetMapperClass(typeof(DistributedFSCheck.DistributedFSCheckMapper));
            job.SetReducerClass(typeof(AccumulatingReducer));
            FileOutputFormat.SetOutputPath(job, ReadDir);
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(Text));
            job.SetNumReduceTasks(1);
            JobClient.RunJob(job);
        }
Example #28
0
        /// <exception cref="System.IO.IOException"/>
        private static void RunIOTest(Type mapperClass, Path outputDir)
        {
            JobConf job = new JobConf(fsConfig, typeof(DFSCIOTest));

            FileInputFormat.SetInputPaths(job, ControlDir);
            job.SetInputFormat(typeof(SequenceFileInputFormat));
            job.SetMapperClass(mapperClass);
            job.SetReducerClass(typeof(AccumulatingReducer));
            FileOutputFormat.SetOutputPath(job, outputDir);
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(Text));
            job.SetNumReduceTasks(1);
            JobClient.RunJob(job);
        }
Example #29
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        public virtual void TestRandomWriter()
        {
            Log.Info("\n\n\nStarting testRandomWriter().");
            if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists())
            {
                Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test."
                         );
                return;
            }
            RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();

            mrCluster.GetConfig().Set(RandomTextWriterJob.TotalBytes, "3072");
            mrCluster.GetConfig().Set(RandomTextWriterJob.BytesPerMap, "1024");
            Job  job       = randomWriterJob.CreateJob(mrCluster.GetConfig());
            Path outputDir = new Path(OutputRootDir, "random-output");

            FileOutputFormat.SetOutputPath(job, outputDir);
            job.SetSpeculativeExecution(false);
            job.AddFileToClassPath(AppJar);
            // The AppMaster jar itself.
            job.SetJarByClass(typeof(RandomTextWriterJob));
            job.SetMaxMapAttempts(1);
            // speed up failures
            job.Submit();
            string trackingUrl = job.GetTrackingURL();
            string jobId       = job.GetJobID().ToString();
            bool   succeeded   = job.WaitForCompletion(true);

            NUnit.Framework.Assert.IsTrue(succeeded);
            NUnit.Framework.Assert.AreEqual(JobStatus.State.Succeeded, job.GetJobState());
            NUnit.Framework.Assert.IsTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID "
                                          + jobId, trackingUrl.EndsWith(Sharpen.Runtime.Substring(jobId, jobId.LastIndexOf
                                                                                                      ("_")) + "/"));
            // Make sure there are three files in the output-dir
            RemoteIterator <FileStatus> iterator = FileContext.GetFileContext(mrCluster.GetConfig
                                                                                  ()).ListStatus(outputDir);
            int count = 0;

            while (iterator.HasNext())
            {
                FileStatus file = iterator.Next();
                if (!file.GetPath().GetName().Equals(FileOutputCommitter.SucceededFileName))
                {
                    count++;
                }
            }
            NUnit.Framework.Assert.AreEqual("Number of part files is wrong!", 3, count);
            VerifyRandomWriterCounters(job);
        }
Example #30
0
        /// <summary>Creates a simple fail job.</summary>
        /// <param name="conf">Configuration object</param>
        /// <param name="outdir">Output directory.</param>
        /// <param name="indirs">Comma separated input directories.</param>
        /// <returns>Job initialized for a simple kill job.</returns>
        /// <exception cref="System.Exception">If an error occurs creating job configuration.
        ///     </exception>
        public static Job CreateKillJob(Configuration conf, Path outdir, params Path[] indirs
                                        )
        {
            Job theJob = Job.GetInstance(conf);

            theJob.SetJobName("Kill-Job");
            FileInputFormat.SetInputPaths(theJob, indirs);
            theJob.SetMapperClass(typeof(MapReduceTestUtil.KillMapper));
            theJob.SetReducerClass(typeof(Reducer));
            theJob.SetNumReduceTasks(0);
            FileOutputFormat.SetOutputPath(theJob, outdir);
            theJob.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text));
            theJob.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            return(theJob);
        }