Example #1
0
        public virtual void Configure()
        {
            Path       testdir = new Path(TestDir.GetAbsolutePath());
            Path       inDir   = new Path(testdir, "in");
            Path       outDir  = new Path(testdir, "out");
            FileSystem fs      = FileSystem.Get(conf);

            fs.Delete(testdir, true);
            conf.SetInt(JobContext.IoSortMb, 1);
            conf.SetInputFormat(typeof(SequenceFileInputFormat));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetMapperClass(typeof(TestMapOutputType.TextGen));
            conf.SetReducerClass(typeof(TestMapOutputType.TextReduce));
            conf.SetOutputKeyClass(typeof(Text));
            conf.SetOutputValueClass(typeof(Text));
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            if (!fs.Mkdirs(testdir))
            {
                throw new IOException("Mkdirs failed to create " + testdir.ToString());
            }
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            Path inFile = new Path(inDir, "part0");

            SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text
                                                                                            ), typeof(Text));
            writer.Append(new Text("rec: 1"), new Text("Hello"));
            writer.Close();
            jc = new JobClient(conf);
        }
Example #2
0
        /// <exception cref="System.IO.IOException"/>
        public override RecordWriter <K, V> GetRecordWriter(FileSystem ignored, JobConf job
                                                            , string name, Progressable progress)
        {
            bool   isCompressed      = GetCompressOutput(job);
            string keyValueSeparator = job.Get("mapreduce.output.textoutputformat.separator",
                                               "\t");

            if (!isCompressed)
            {
                Path               file    = FileOutputFormat.GetTaskOutputPath(job, name);
                FileSystem         fs      = file.GetFileSystem(job);
                FSDataOutputStream fileOut = fs.Create(file, progress);
                return(new TextOutputFormat.LineRecordWriter <K, V>(fileOut, keyValueSeparator));
            }
            else
            {
                Type codecClass = GetOutputCompressorClass(job, typeof(GzipCodec));
                // create the named codec
                CompressionCodec codec = ReflectionUtils.NewInstance(codecClass, job);
                // build the filename including the extension
                Path file = FileOutputFormat.GetTaskOutputPath(job, name + codec.GetDefaultExtension
                                                                   ());
                FileSystem         fs      = file.GetFileSystem(job);
                FSDataOutputStream fileOut = fs.Create(file, progress);
                return(new TextOutputFormat.LineRecordWriter <K, V>(new DataOutputStream(codec.CreateOutputStream
                                                                                             (fileOut)), keyValueSeparator));
            }
        }
Example #3
0
        /// <exception cref="System.IO.IOException"/>
        internal static void ConfigureWordCount(FileSystem fs, JobConf conf, string input
                                                , int numMaps, int numReduces, Path inDir, Path outDir)
        {
            fs.Delete(outDir, true);
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            DataOutputStream file = fs.Create(new Path(inDir, "part-0"));

            file.WriteBytes(input);
            file.Close();
            FileSystem.SetDefaultUri(conf, fs.GetUri());
            conf.Set(JTConfig.FrameworkName, JTConfig.YarnFrameworkName);
            conf.SetJobName("wordcount");
            conf.SetInputFormat(typeof(TextInputFormat));
            // the keys are words (strings)
            conf.SetOutputKeyClass(typeof(Text));
            // the values are counts (ints)
            conf.SetOutputValueClass(typeof(IntWritable));
            conf.Set("mapred.mapper.class", "testjar.ClassWordCount$MapClass");
            conf.Set("mapred.combine.class", "testjar.ClassWordCount$Reduce");
            conf.Set("mapred.reducer.class", "testjar.ClassWordCount$Reduce");
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReduces);
            //set the tests jar file
            conf.SetJarByClass(typeof(TestMiniMRClasspath));
        }
        // configure a job
        /// <exception cref="System.IO.IOException"/>
        private void Configure(JobConf conf, Path inDir, Path outDir, string input, Type
                               map, Type reduce)
        {
            // set up the input file system and write input text.
            FileSystem inFs  = inDir.GetFileSystem(conf);
            FileSystem outFs = outDir.GetFileSystem(conf);

            outFs.Delete(outDir, true);
            if (!inFs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            {
                // write input into input file
                DataOutputStream file = inFs.Create(new Path(inDir, "part-0"));
                file.WriteBytes(input);
                file.Close();
            }
            // configure the mapred Job which creates a tempfile in map.
            conf.SetJobName("testmap");
            conf.SetMapperClass(map);
            conf.SetReducerClass(reduce);
            conf.SetNumMapTasks(1);
            conf.SetNumReduceTasks(0);
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            string TestRootDir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).ToString
                                     ().Replace(' ', '+');

            conf.Set("test.build.data", TestRootDir);
        }
Example #5
0
        public virtual void TestOldCounterB()
        {
            JobConf conf = CreateConfiguration();

            CreateWordsFile(inFiles[3], conf);
            RemoveWordsFile(inFiles[4], conf);
            long inputSize = 0;

            inputSize += GetFileSize(inFiles[0]);
            inputSize += GetFileSize(inFiles[1]);
            inputSize += GetFileSize(inFiles[2]);
            inputSize += GetFileSize(inFiles[3]);
            conf.SetNumMapTasks(4);
            conf.SetInt(JobContext.IoSortFactor, 2);
            FileInputFormat.SetInputPaths(conf, InDir);
            FileOutputFormat.SetOutputPath(conf, new Path(OutDir, "outputO1"));
            RunningJob myJob = JobClient.RunJob(conf);
            Counters   c1    = myJob.GetCounters();

            // As above, each map spills 2^14 records, so 4 maps spill 2^16 records
            // In the reduce, there are two intermediate merges before the reduce.
            // 1st merge: read + write = 8192 * 4
            // 2nd merge: read + write = 8192 * 4
            // final merge: 0
            // Total reduce: 32768
            // Total: map + reduce = 2^16 + 2^15 = 98304
            // 4 files, 5120 = 5 * 1024 rec/file = 15360 input records
            // 4 records/line = 81920 output records
            ValidateCounters(c1, 98304, 20480, 81920);
            ValidateFileCounters(c1, inputSize, 0, 0, 0);
        }
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        private void TestFailAbortInternal(int version)
        {
            JobConf conf = new JobConf();

            conf.Set(FileSystem.FsDefaultNameKey, "faildel:///");
            conf.SetClass("fs.faildel.impl", typeof(TestFileOutputCommitter.FakeFileSystem),
                          typeof(FileSystem));
            conf.Set(JobContext.TaskAttemptId, attempt);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version);
            conf.SetInt(MRConstants.ApplicationAttemptId, 1);
            FileOutputFormat.SetOutputPath(conf, outDir);
            JobContext          jContext  = new JobContextImpl(conf, ((JobID)taskID.GetJobID()));
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter();

            // do setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            FilePath jobTmpDir = new FilePath(new Path(outDir, FileOutputCommitter.TempDirName
                                                       + Path.Separator + conf.GetInt(MRConstants.ApplicationAttemptId, 0) + Path.Separator
                                                       + FileOutputCommitter.TempDirName).ToString());
            FilePath taskTmpDir   = new FilePath(jobTmpDir, "_" + taskID);
            FilePath expectedFile = new FilePath(taskTmpDir, partFile);
            TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat();
            RecordWriter <object, object>     theRecordWriter = theOutputFormat.GetRecordWriter(null
                                                                                                , conf, expectedFile.GetAbsolutePath(), null);

            WriteOutput(theRecordWriter, tContext);
            // do abort
            Exception th = null;

            try
            {
                committer.AbortTask(tContext);
            }
            catch (IOException ie)
            {
                th = ie;
            }
            NUnit.Framework.Assert.IsNotNull(th);
            NUnit.Framework.Assert.IsTrue(th is IOException);
            NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed"));
            NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists
                                              ());
            th = null;
            try
            {
                committer.AbortJob(jContext, JobStatus.State.Failed);
            }
            catch (IOException ie)
            {
                th = ie;
            }
            NUnit.Framework.Assert.IsNotNull(th);
            NUnit.Framework.Assert.IsTrue(th is IOException);
            NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed"));
            NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists());
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Example #7
0
        /// <exception cref="System.Exception"/>
        public virtual void TestComplexNameWithRegex()
        {
            OutputStream os = GetFileSystem().Create(new Path(GetInputDir(), "text.txt"));
            TextWriter   wr = new OutputStreamWriter(os);

            wr.Write("b a\n");
            wr.Close();
            JobConf conf = CreateJobConf();

            conf.SetJobName("name \\Evalue]");
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapperClass(typeof(IdentityMapper));
            FileInputFormat.SetInputPaths(conf, GetInputDir());
            FileOutputFormat.SetOutputPath(conf, GetOutputDir());
            JobClient.RunJob(conf);
            Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(GetOutputDir(
                                                                                    ), new Utils.OutputFileUtils.OutputFilesFilter()));
            NUnit.Framework.Assert.AreEqual(1, outputFiles.Length);
            InputStream    @is    = GetFileSystem().Open(outputFiles[0]);
            BufferedReader reader = new BufferedReader(new InputStreamReader(@is));

            NUnit.Framework.Assert.AreEqual("0\tb a", reader.ReadLine());
            NUnit.Framework.Assert.IsNull(reader.ReadLine());
            reader.Close();
        }
        /// <exception cref="System.Exception"/>
        private void TestMapFileOutputCommitterInternal(int version)
        {
            JobConf conf = new JobConf();

            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.Set(JobContext.TaskAttemptId, attempt);
            conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version);
            JobContext          jContext  = new JobContextImpl(conf, ((JobID)taskID.GetJobID()));
            TaskAttemptContext  tContext  = new TaskAttemptContextImpl(conf, taskID);
            FileOutputCommitter committer = new FileOutputCommitter();

            // setup
            committer.SetupJob(jContext);
            committer.SetupTask(tContext);
            // write output
            MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
            RecordWriter        theRecordWriter = theOutputFormat.GetRecordWriter(null, conf, partFile
                                                                                  , null);

            WriteMapFileOutput(theRecordWriter, tContext);
            // do commit
            if (committer.NeedsTaskCommit(tContext))
            {
                committer.CommitTask(tContext);
            }
            committer.CommitJob(jContext);
            // validate output
            ValidateMapFileOutputContent(FileSystem.Get(conf), outDir);
            FileUtil.FullyDelete(new FilePath(outDir.ToString()));
        }
Example #9
0
        // Start a job with the specified input and return its RunningJob object
        /// <exception cref="System.IO.IOException"/>
        internal static RunningJob RunJob(JobConf conf, Path inDir, Path outDir, int numMaps
                                          , int numReds, string input)
        {
            FileSystem fs = FileSystem.Get(conf);

            if (fs.Exists(outDir))
            {
                fs.Delete(outDir, true);
            }
            if (!fs.Exists(inDir))
            {
                fs.Mkdirs(inDir);
            }
            for (int i = 0; i < numMaps; ++i)
            {
                DataOutputStream file = fs.Create(new Path(inDir, "part-" + i));
                file.WriteBytes(input);
                file.Close();
            }
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReds);
            JobClient  jobClient = new JobClient(conf);
            RunningJob job       = jobClient.SubmitJob(conf);

            return(job);
        }
Example #10
0
        /// <exception cref="System.Exception"/>
        private static void RunTestLazyOutput(JobConf job, Path output, int numReducers,
                                              bool createLazily)
        {
            job.SetJobName("test-lazy-output");
            FileInputFormat.SetInputPaths(job, Input);
            FileOutputFormat.SetOutputPath(job, output);
            job.SetInputFormat(typeof(TextInputFormat));
            job.SetMapOutputKeyClass(typeof(LongWritable));
            job.SetMapOutputValueClass(typeof(Text));
            job.SetOutputKeyClass(typeof(LongWritable));
            job.SetOutputValueClass(typeof(Text));
            job.SetMapperClass(typeof(TestLazyOutput.TestMapper));
            job.SetReducerClass(typeof(TestLazyOutput.TestReducer));
            JobClient client = new JobClient(job);

            job.SetNumReduceTasks(numReducers);
            if (createLazily)
            {
                LazyOutputFormat.SetOutputFormatClass(job, typeof(TextOutputFormat));
            }
            else
            {
                job.SetOutputFormat(typeof(TextOutputFormat));
            }
            JobClient.RunJob(job);
        }
Example #11
0
        /// <exception cref="System.Exception"/>
        public static Counters RunJob(JobConf conf)
        {
            conf.SetMapperClass(typeof(TestReduceFetchFromPartialMem.MapMB));
            conf.SetReducerClass(typeof(TestReduceFetchFromPartialMem.MBValidate));
            conf.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text));
            conf.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            conf.SetNumReduceTasks(1);
            conf.SetInputFormat(typeof(TestReduceFetchFromPartialMem.FakeIF));
            conf.SetNumTasksToExecutePerJvm(1);
            conf.SetInt(JobContext.MapMaxAttempts, 0);
            conf.SetInt(JobContext.ReduceMaxAttempts, 0);
            FileInputFormat.SetInputPaths(conf, new Path("/in"));
            Path outp = new Path("/out");

            FileOutputFormat.SetOutputPath(conf, outp);
            RunningJob job = null;

            try
            {
                job = JobClient.RunJob(conf);
                NUnit.Framework.Assert.IsTrue(job.IsSuccessful());
            }
            finally
            {
                FileSystem fs = dfsCluster.GetFileSystem();
                if (fs.Exists(outp))
                {
                    fs.Delete(outp, true);
                }
            }
            return(job.GetCounters());
        }
Example #12
0
        /// <exception cref="System.Exception"/>
        private void RunMapReduce(JobConf conf, IList <string> mapperBadRecords, IList <string
                                                                                        > redBadRecords)
        {
            CreateInput();
            conf.SetJobName("mr");
            conf.SetNumMapTasks(1);
            conf.SetNumReduceTasks(1);
            conf.SetInt(JobContext.TaskTimeout, 30 * 1000);
            SkipBadRecords.SetMapperMaxSkipRecords(conf, long.MaxValue);
            SkipBadRecords.SetReducerMaxSkipGroups(conf, long.MaxValue);
            SkipBadRecords.SetAttemptsToStartSkipping(conf, 0);
            //the no of attempts to successfully complete the task depends
            //on the no of bad records.
            conf.SetMaxMapAttempts(SkipBadRecords.GetAttemptsToStartSkipping(conf) + 1 + mapperBadRecords
                                   .Count);
            conf.SetMaxReduceAttempts(SkipBadRecords.GetAttemptsToStartSkipping(conf) + 1 + redBadRecords
                                      .Count);
            FileInputFormat.SetInputPaths(conf, GetInputDir());
            FileOutputFormat.SetOutputPath(conf, GetOutputDir());
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetMapOutputKeyClass(typeof(LongWritable));
            conf.SetMapOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            RunningJob runningJob = JobClient.RunJob(conf);

            ValidateOutput(conf, runningJob, mapperBadRecords, redBadRecords);
        }
Example #13
0
        public virtual void TestStatusLimit()
        {
            Path          test   = new Path(testRootTempDir, "testStatusLimit");
            Configuration conf   = new Configuration();
            Path          inDir  = new Path(test, "in");
            Path          outDir = new Path(test, "out");
            FileSystem    fs     = FileSystem.Get(conf);

            if (fs.Exists(inDir))
            {
                fs.Delete(inDir, true);
            }
            fs.Mkdirs(inDir);
            DataOutputStream file = fs.Create(new Path(inDir, "part-" + 0));

            file.WriteBytes("testStatusLimit");
            file.Close();
            if (fs.Exists(outDir))
            {
                fs.Delete(outDir, true);
            }
            Job job = Job.GetInstance(conf, "testStatusLimit");

            job.SetMapperClass(typeof(TestReporter.StatusLimitMapper));
            job.SetNumReduceTasks(0);
            FileInputFormat.AddInputPath(job, inDir);
            FileOutputFormat.SetOutputPath(job, outDir);
            job.WaitForCompletion(true);
            NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful());
        }
Example #14
0
        public virtual void TestOldCounterC()
        {
            JobConf conf = CreateConfiguration();

            CreateWordsFile(inFiles[3], conf);
            CreateWordsFile(inFiles[4], conf);
            long inputSize = 0;

            inputSize += GetFileSize(inFiles[0]);
            inputSize += GetFileSize(inFiles[1]);
            inputSize += GetFileSize(inFiles[2]);
            inputSize += GetFileSize(inFiles[3]);
            inputSize += GetFileSize(inFiles[4]);
            conf.SetNumMapTasks(4);
            conf.SetInt(JobContext.IoSortFactor, 3);
            FileInputFormat.SetInputPaths(conf, InDir);
            FileOutputFormat.SetOutputPath(conf, new Path(OutDir, "outputO2"));
            RunningJob myJob = JobClient.RunJob(conf);
            Counters   c1    = myJob.GetCounters();

            // As above, each map spills 2^14 records, so 5 maps spill 81920
            // 1st merge: read + write = 6 * 8192
            // final merge: unmerged = 2 * 8192
            // Total reduce: 45056
            // 5 files, 5120 = 5 * 1024 rec/file = 15360 input records
            // 4 records/line = 102400 output records
            ValidateCounters(c1, 122880, 25600, 102400);
            ValidateFileCounters(c1, inputSize, 0, 0, 0);
        }
Example #15
0
        /// <summary>
        /// HADOOP-4466:
        /// This test verifies the JavSerialization impl can write to
        /// SequenceFiles.
        /// </summary>
        /// <remarks>
        /// HADOOP-4466:
        /// This test verifies the JavSerialization impl can write to
        /// SequenceFiles. by virtue other SequenceFileOutputFormat is not
        /// coupled to Writable types, if so, the job will fail.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual void TestWriteToSequencefile()
        {
            JobConf conf = new JobConf(typeof(TestJavaSerialization));

            conf.SetJobName("JavaSerialization");
            FileSystem fs = FileSystem.Get(conf);

            CleanAndCreateInput(fs);
            conf.Set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                     + "org.apache.hadoop.io.serializer.WritableSerialization");
            conf.SetInputFormat(typeof(TextInputFormat));
            // test we can write to sequence files
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            conf.SetOutputKeyClass(typeof(string));
            conf.SetOutputValueClass(typeof(long));
            conf.SetOutputKeyComparatorClass(typeof(JavaSerializationComparator));
            conf.SetMapperClass(typeof(TestJavaSerialization.WordCountMapper));
            conf.SetReducerClass(typeof(TestJavaSerialization.SumReducer));
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            FileInputFormat.SetInputPaths(conf, InputDir);
            FileOutputFormat.SetOutputPath(conf, OutputDir);
            JobClient.RunJob(conf);
            Path[] outputFiles = FileUtil.Stat2Paths(fs.ListStatus(OutputDir, new Utils.OutputFileUtils.OutputFilesFilter
                                                                       ()));
            NUnit.Framework.Assert.AreEqual(1, outputFiles.Length);
        }
        /// <summary>Generate input data for the benchmark</summary>
        /// <exception cref="System.Exception"/>
        public static void GenerateInputData(int dataSizePerMap, int numSpillsPerMap, int
                                             numMapsPerHost, JobConf masterConf)
        {
            JobConf job = new JobConf(masterConf, typeof(ThreadedMapBenchmark));

            job.SetJobName("threaded-map-benchmark-random-writer");
            job.SetJarByClass(typeof(ThreadedMapBenchmark));
            job.SetInputFormat(typeof(UtilsForTests.RandomInputFormat));
            job.SetOutputFormat(typeof(SequenceFileOutputFormat));
            job.SetMapperClass(typeof(ThreadedMapBenchmark.Map));
            job.SetReducerClass(typeof(IdentityReducer));
            job.SetOutputKeyClass(typeof(BytesWritable));
            job.SetOutputValueClass(typeof(BytesWritable));
            JobClient     client        = new JobClient(job);
            ClusterStatus cluster       = client.GetClusterStatus();
            long          totalDataSize = dataSizePerMap * numMapsPerHost * cluster.GetTaskTrackers();

            job.Set("test.tmb.bytes_per_map", (dataSizePerMap * 1024 * 1024).ToString());
            job.SetNumReduceTasks(0);
            // none reduce
            job.SetNumMapTasks(numMapsPerHost * cluster.GetTaskTrackers());
            FileOutputFormat.SetOutputPath(job, InputDir);
            FileSystem fs = FileSystem.Get(job);

            fs.Delete(BaseDir, true);
            Log.Info("Generating random input for the benchmark");
            Log.Info("Total data : " + totalDataSize + " mb");
            Log.Info("Data per map: " + dataSizePerMap + " mb");
            Log.Info("Number of spills : " + numSpillsPerMap);
            Log.Info("Number of maps per host : " + numMapsPerHost);
            Log.Info("Number of hosts : " + cluster.GetTaskTrackers());
            JobClient.RunJob(job);
        }
Example #17
0
        /// <exception cref="System.Exception"/>
        private string RunJob()
        {
            OutputStream os = GetFileSystem().Create(new Path(GetInputDir(), "text.txt"));
            TextWriter   wr = new OutputStreamWriter(os);

            wr.Write("hello1\n");
            wr.Write("hello2\n");
            wr.Write("hello3\n");
            wr.Close();
            JobConf conf = CreateJobConf();

            conf.SetJobName("mr");
            conf.SetJobPriority(JobPriority.High);
            conf.SetInputFormat(typeof(TextInputFormat));
            conf.SetMapOutputKeyClass(typeof(LongWritable));
            conf.SetMapOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(TextOutputFormat));
            conf.SetOutputKeyClass(typeof(LongWritable));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetMapperClass(typeof(IdentityMapper));
            conf.SetReducerClass(typeof(IdentityReducer));
            FileInputFormat.SetInputPaths(conf, GetInputDir());
            FileOutputFormat.SetOutputPath(conf, GetOutputDir());
            return(JobClient.RunJob(conf).GetID().ToString());
        }
Example #18
0
        /// <summary>The main driver for word count map/reduce program.</summary>
        /// <remarks>
        /// The main driver for word count map/reduce program.
        /// Invoke this method to submit the map/reduce job.
        /// </remarks>
        /// <exception cref="System.IO.IOException">
        /// When there is communication problems with the
        /// job tracker.
        /// </exception>
        /// <exception cref="System.Exception"/>
        public virtual int Run(string[] args)
        {
            JobConf conf = new JobConf(GetConf(), typeof(WordCount));

            conf.SetJobName("wordcount");
            // the keys are words (strings)
            conf.SetOutputKeyClass(typeof(Text));
            // the values are counts (ints)
            conf.SetOutputValueClass(typeof(IntWritable));
            conf.SetMapperClass(typeof(WordCount.MapClass));
            conf.SetCombinerClass(typeof(WordCount.Reduce));
            conf.SetReducerClass(typeof(WordCount.Reduce));
            IList <string> other_args = new AList <string>();

            for (int i = 0; i < args.Length; ++i)
            {
                try
                {
                    if ("-m".Equals(args[i]))
                    {
                        conf.SetNumMapTasks(System.Convert.ToInt32(args[++i]));
                    }
                    else
                    {
                        if ("-r".Equals(args[i]))
                        {
                            conf.SetNumReduceTasks(System.Convert.ToInt32(args[++i]));
                        }
                        else
                        {
                            other_args.AddItem(args[i]);
                        }
                    }
                }
                catch (FormatException)
                {
                    System.Console.Out.WriteLine("ERROR: Integer expected instead of " + args[i]);
                    return(PrintUsage());
                }
                catch (IndexOutOfRangeException)
                {
                    System.Console.Out.WriteLine("ERROR: Required parameter missing from " + args[i -
                                                                                                  1]);
                    return(PrintUsage());
                }
            }
            // Make sure there are exactly 2 parameters left.
            if (other_args.Count != 2)
            {
                System.Console.Out.WriteLine("ERROR: Wrong number of parameters: " + other_args.Count
                                             + " instead of 2.");
                return(PrintUsage());
            }
            FileInputFormat.SetInputPaths(conf, other_args[0]);
            FileOutputFormat.SetOutputPath(conf, new Path(other_args[1]));
            JobClient.RunJob(conf);
            return(0);
        }
Example #19
0
            /// <exception cref="System.IO.IOException"/>
            public override void CleanupJob(JobContext context)
            {
                System.Console.Error.WriteLine("---- HERE ----");
                JobConf    conf       = context.GetJobConf();
                Path       outputPath = FileOutputFormat.GetOutputPath(conf);
                FileSystem fs         = outputPath.GetFileSystem(conf);

                fs.Create(new Path(outputPath, CustomCleanupFileName)).Close();
            }
Example #20
0
            /// <exception cref="System.IO.IOException"/>
            internal static void CheckRecords(Configuration defaults, int noMaps, int noReduces
                                              , Path sortInput, Path sortOutput)
            {
                JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordChecker));

                jobConf.SetJobName("sortvalidate-record-checker");
                jobConf.SetInputFormat(typeof(SequenceFileInputFormat));
                jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat));
                jobConf.SetOutputKeyClass(typeof(BytesWritable));
                jobConf.SetOutputValueClass(typeof(IntWritable));
                jobConf.SetMapperClass(typeof(SortValidator.RecordChecker.Map));
                jobConf.SetReducerClass(typeof(SortValidator.RecordChecker.Reduce));
                JobClient     client  = new JobClient(jobConf);
                ClusterStatus cluster = client.GetClusterStatus();

                if (noMaps == -1)
                {
                    noMaps = cluster.GetTaskTrackers() * jobConf.GetInt(MapsPerHost, 10);
                }
                if (noReduces == -1)
                {
                    noReduces = (int)(cluster.GetMaxReduceTasks() * 0.9);
                    string sortReduces = jobConf.Get(ReducesPerHost);
                    if (sortReduces != null)
                    {
                        noReduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sortReduces);
                    }
                }
                jobConf.SetNumMapTasks(noMaps);
                jobConf.SetNumReduceTasks(noReduces);
                FileInputFormat.SetInputPaths(jobConf, sortInput);
                FileInputFormat.AddInputPath(jobConf, sortOutput);
                Path       outputPath = new Path("/tmp/sortvalidate/recordchecker");
                FileSystem fs         = FileSystem.Get(defaults);

                if (fs.Exists(outputPath))
                {
                    fs.Delete(outputPath, true);
                }
                FileOutputFormat.SetOutputPath(jobConf, outputPath);
                // Uncomment to run locally in a single process
                //job_conf.set(JTConfig.JT, "local");
                Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf);
                System.Console.Out.WriteLine("\nSortValidator.RecordChecker: Running on " + cluster
                                             .GetTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths
                                             [1] + " into " + FileOutputFormat.GetOutputPath(jobConf) + " with " + noReduces
                                             + " reduces.");
                DateTime startTime = new DateTime();

                System.Console.Out.WriteLine("Job started: " + startTime);
                JobClient.RunJob(jobConf);
                DateTime end_time = new DateTime();

                System.Console.Out.WriteLine("Job ended: " + end_time);
                System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime
                                                                    ()) / 1000 + " seconds.");
            }
        public virtual void TestJob()
        {
            Job job = CreateJob();

            FileInputFormat.SetInputPaths(job, inDir);
            FileOutputFormat.SetOutputPath(job, new Path(outDir, "testJob"));
            NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(true));
            ValidateCounters(job.GetCounters(), 5, 25, 5, 5);
        }
Example #22
0
        public virtual void TestFormatWithCustomSeparator()
        {
            JobConf job       = new JobConf();
            string  separator = "\u0001";

            job.Set("mapreduce.output.textoutputformat.separator", separator);
            job.Set(JobContext.TaskAttemptId, attempt);
            FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent());
            FileOutputFormat.SetWorkOutputPath(job, workDir);
            FileSystem fs = workDir.GetFileSystem(job);

            if (!fs.Mkdirs(workDir))
            {
                NUnit.Framework.Assert.Fail("Failed to create output directory");
            }
            string file = "test_custom.txt";
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;
            TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object
                                                                                      >();
            RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs
                                                                                            , job, file, reporter);

            Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1");
            Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2");
            Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1");
            Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2");
            NullWritable nullWritable      = NullWritable.Get();

            try
            {
                theRecordWriter.Write(key1, val1);
                theRecordWriter.Write(null, nullWritable);
                theRecordWriter.Write(null, val1);
                theRecordWriter.Write(nullWritable, val2);
                theRecordWriter.Write(key2, nullWritable);
                theRecordWriter.Write(key1, null);
                theRecordWriter.Write(null, null);
                theRecordWriter.Write(key2, val2);
            }
            finally
            {
                theRecordWriter.Close(reporter);
            }
            FilePath      expectedFile   = new FilePath(new Path(workDir, file).ToString());
            StringBuilder expectedOutput = new StringBuilder();

            expectedOutput.Append(key1).Append(separator).Append(val1).Append("\n");
            expectedOutput.Append(val1).Append("\n");
            expectedOutput.Append(val2).Append("\n");
            expectedOutput.Append(key2).Append("\n");
            expectedOutput.Append(key1).Append("\n");
            expectedOutput.Append(key2).Append(separator).Append(val2).Append("\n");
            string output = UtilsForTests.Slurp(expectedFile);

            NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output);
        }
Example #23
0
        /// <exception cref="System.Exception"/>
        private void CheckCompression(bool compressMapOutputs, SequenceFile.CompressionType
                                      redCompression, bool includeCombine)
        {
            JobConf    conf    = new JobConf(typeof(TestMapRed));
            Path       testdir = new Path(TestDir.GetAbsolutePath());
            Path       inDir   = new Path(testdir, "in");
            Path       outDir  = new Path(testdir, "out");
            FileSystem fs      = FileSystem.Get(conf);

            fs.Delete(testdir, true);
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetMapperClass(typeof(TestMapRed.MyMap));
            conf.SetReducerClass(typeof(TestMapRed.MyReduce));
            conf.SetOutputKeyClass(typeof(Text));
            conf.SetOutputValueClass(typeof(Text));
            conf.SetOutputFormat(typeof(SequenceFileOutputFormat));
            conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName);
            if (includeCombine)
            {
                conf.SetCombinerClass(typeof(IdentityReducer));
            }
            conf.SetCompressMapOutput(compressMapOutputs);
            SequenceFileOutputFormat.SetOutputCompressionType(conf, redCompression);
            try
            {
                if (!fs.Mkdirs(testdir))
                {
                    throw new IOException("Mkdirs failed to create " + testdir.ToString());
                }
                if (!fs.Mkdirs(inDir))
                {
                    throw new IOException("Mkdirs failed to create " + inDir.ToString());
                }
                Path             inFile = new Path(inDir, "part0");
                DataOutputStream f      = fs.Create(inFile);
                f.WriteBytes("Owen was here\n");
                f.WriteBytes("Hadoop is fun\n");
                f.WriteBytes("Is this done, yet?\n");
                f.Close();
                RunningJob rj = JobClient.RunJob(conf);
                NUnit.Framework.Assert.IsTrue("job was complete", rj.IsComplete());
                NUnit.Framework.Assert.IsTrue("job was successful", rj.IsSuccessful());
                Path output = new Path(outDir, Task.GetOutputName(0));
                NUnit.Framework.Assert.IsTrue("reduce output exists " + output, fs.Exists(output)
                                              );
                SequenceFile.Reader rdr = new SequenceFile.Reader(fs, output, conf);
                NUnit.Framework.Assert.AreEqual("is reduce output compressed " + output, redCompression
                                                != SequenceFile.CompressionType.None, rdr.IsCompressed());
                rdr.Close();
            }
            finally
            {
                fs.Delete(testdir, true);
            }
        }
Example #24
0
            /// <exception cref="System.IO.IOException"/>
            public override void AbortJob(JobContext context, int state)
            {
                JobConf    conf       = context.GetJobConf();
                Path       outputPath = FileOutputFormat.GetOutputPath(conf);
                FileSystem fs         = outputPath.GetFileSystem(conf);
                string     fileName   = (state == JobStatus.Failed) ? TestJobCleanup.AbortFailedFileName
                                         : TestJobCleanup.AbortKilledFileName;

                fs.Create(new Path(outputPath, fileName)).Close();
            }
        /// <exception cref="System.IO.IOException"/>
        private Path GetTaskAttemptPath(TaskAttemptContext context, Path @out)
        {
            Path workPath = FileOutputFormat.GetWorkOutputPath(context.GetJobConf());

            if (workPath == null && @out != null)
            {
                return(FileOutputCommitter.GetTaskAttemptPath(context, @out));
            }
            return(workPath);
        }
        /// <summary>Creates and runs an MR job</summary>
        /// <param name="conf"/>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        /// <exception cref="System.TypeLoadException"/>
        public virtual void CreateAndRunJob(Configuration conf)
        {
            JobConf job = new JobConf(conf);

            job.SetJarByClass(typeof(TestLineRecordReaderJobs));
            job.SetMapperClass(typeof(IdentityMapper));
            job.SetReducerClass(typeof(IdentityReducer));
            FileInputFormat.AddInputPath(job, inputDir);
            FileOutputFormat.SetOutputPath(job, outputDir);
            JobClient.RunJob(job);
        }
Example #27
0
        /// <exception cref="System.IO.IOException"/>
        internal static string LaunchExternal(URI uri, JobConf conf, string input, int numMaps
                                              , int numReduces)
        {
            Path       inDir  = new Path("/testing/ext/input");
            Path       outDir = new Path("/testing/ext/output");
            FileSystem fs     = FileSystem.Get(uri, conf);

            fs.Delete(outDir, true);
            if (!fs.Mkdirs(inDir))
            {
                throw new IOException("Mkdirs failed to create " + inDir.ToString());
            }
            {
                DataOutputStream file = fs.Create(new Path(inDir, "part-0"));
                file.WriteBytes(input);
                file.Close();
            }
            FileSystem.SetDefaultUri(conf, uri);
            conf.Set(JTConfig.FrameworkName, JTConfig.YarnFrameworkName);
            conf.SetJobName("wordcount");
            conf.SetInputFormat(typeof(TextInputFormat));
            // the keys are counts
            conf.SetOutputValueClass(typeof(IntWritable));
            // the values are the messages
            conf.Set(JobContext.OutputKeyClass, "testjar.ExternalWritable");
            FileInputFormat.SetInputPaths(conf, inDir);
            FileOutputFormat.SetOutputPath(conf, outDir);
            conf.SetNumMapTasks(numMaps);
            conf.SetNumReduceTasks(numReduces);
            conf.Set("mapred.mapper.class", "testjar.ExternalMapperReducer");
            conf.Set("mapred.reducer.class", "testjar.ExternalMapperReducer");
            // set the tests jar file
            conf.SetJarByClass(typeof(TestMiniMRClasspath));
            JobClient.RunJob(conf);
            StringBuilder result = new StringBuilder();

            Path[] fileList = FileUtil.Stat2Paths(fs.ListStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter
                                                                    ()));
            for (int i = 0; i < fileList.Length; ++i)
            {
                BufferedReader file = new BufferedReader(new InputStreamReader(fs.Open(fileList[i
                                                                                       ])));
                string line = file.ReadLine();
                while (line != null)
                {
                    result.Append(line);
                    line = file.ReadLine();
                    result.Append("\n");
                }
                file.Close();
            }
            return(result.ToString());
        }
Example #28
0
            // Input formats
            /// <exception cref="System.IO.IOException"/>
            public virtual InputSplit[] GetSplits(JobConf job, int numSplits)
            {
                InputSplit[] result = new InputSplit[numSplits];
                Path         outDir = FileOutputFormat.GetOutputPath(job);

                for (int i = 0; i < result.Length; ++i)
                {
                    result[i] = new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, (string[])null
                                              );
                }
                return(result);
            }
Example #29
0
        /// <exception cref="System.Exception"/>
        public static void Launch()
        {
            JobConf    conf            = new JobConf(typeof(Org.Apache.Hadoop.Mapred.TestFieldSelection));
            FileSystem fs              = FileSystem.Get(conf);
            int        numOfInputLines = 10;
            Path       OutputDir       = new Path("build/test/output_for_field_selection_test");
            Path       InputDir        = new Path("build/test/input_for_field_selection_test");
            string     inputFile       = "input.txt";

            fs.Delete(InputDir, true);
            fs.Mkdirs(InputDir);
            fs.Delete(OutputDir, true);
            StringBuilder inputData      = new StringBuilder();
            StringBuilder expectedOutput = new StringBuilder();

            TestMRFieldSelection.ConstructInputOutputData(inputData, expectedOutput, numOfInputLines
                                                          );
            FSDataOutputStream fileOut = fs.Create(new Path(InputDir, inputFile));

            fileOut.Write(Sharpen.Runtime.GetBytesForString(inputData.ToString(), "utf-8"));
            fileOut.Close();
            System.Console.Out.WriteLine("inputData:");
            System.Console.Out.WriteLine(inputData.ToString());
            JobConf job = new JobConf(conf, typeof(Org.Apache.Hadoop.Mapred.TestFieldSelection
                                                   ));

            FileInputFormat.SetInputPaths(job, InputDir);
            job.SetInputFormat(typeof(TextInputFormat));
            job.SetMapperClass(typeof(FieldSelectionMapReduce));
            job.SetReducerClass(typeof(FieldSelectionMapReduce));
            FileOutputFormat.SetOutputPath(job, OutputDir);
            job.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text));
            job.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text));
            job.SetOutputFormat(typeof(TextOutputFormat));
            job.SetNumReduceTasks(1);
            job.Set(FieldSelectionHelper.DataFieldSeperator, "-");
            job.Set(FieldSelectionHelper.MapOutputKeyValueSpec, "6,5,1-3:0-");
            job.Set(FieldSelectionHelper.ReduceOutputKeyValueSpec, ":4,3,2,1,0,0-");
            JobClient.RunJob(job);
            //
            // Finally, we compare the reconstructed answer key with the
            // original one.  Remember, we need to ignore zero-count items
            // in the original key.
            //
            bool   success = true;
            Path   outPath = new Path(OutputDir, "part-00000");
            string outdata = MapReduceTestUtil.ReadOutput(outPath, job);

            NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), outdata);
            fs.Delete(OutputDir, true);
            fs.Delete(InputDir, true);
        }
Example #30
0
        /// <exception cref="System.Exception"/>
        private void RunMergeTest(JobConf job, FileSystem fileSystem, int numMappers, int
                                  numReducers, int numLines, bool isUber)
        {
            fileSystem.Delete(Output, true);
            job.SetJobName("Test");
            JobClient  client       = new JobClient(job);
            RunningJob submittedJob = null;

            FileInputFormat.SetInputPaths(job, InputDir);
            FileOutputFormat.SetOutputPath(job, Output);
            job.Set("mapreduce.output.textoutputformat.separator", " ");
            job.SetInputFormat(typeof(TextInputFormat));
            job.SetMapOutputKeyClass(typeof(Text));
            job.SetMapOutputValueClass(typeof(Text));
            job.SetOutputKeyClass(typeof(Text));
            job.SetOutputValueClass(typeof(Text));
            job.SetMapperClass(typeof(TestMRIntermediateDataEncryption.MyMapper));
            job.SetPartitionerClass(typeof(TestMRIntermediateDataEncryption.MyPartitioner));
            job.SetOutputFormat(typeof(TextOutputFormat));
            job.SetNumReduceTasks(numReducers);
            job.SetInt("mapreduce.map.maxattempts", 1);
            job.SetInt("mapreduce.reduce.maxattempts", 1);
            job.SetInt("mapred.test.num_lines", numLines);
            if (isUber)
            {
                job.SetBoolean("mapreduce.job.ubertask.enable", true);
            }
            job.SetBoolean(MRJobConfig.MrEncryptedIntermediateData, true);
            try
            {
                submittedJob = client.SubmitJob(job);
                try
                {
                    if (!client.MonitorAndPrintJob(job, submittedJob))
                    {
                        throw new IOException("Job failed!");
                    }
                }
                catch (Exception)
                {
                    Sharpen.Thread.CurrentThread().Interrupt();
                }
            }
            catch (IOException ioe)
            {
                System.Console.Error.WriteLine("Job failed with: " + ioe);
            }
            finally
            {
                VerifyOutput(submittedJob, fileSystem, numMappers, numLines);
            }
        }