/// <exception cref="System.Exception"/> public virtual void TestControlledJob() { Log.Info("Starting testControlledJob"); Configuration conf = CreateJobConf(); CleanupData(conf); Job job1 = MapReduceTestUtil.CreateCopyJob(conf, outdir_1, indir); JobControl theControl = CreateDependencies(conf, job1); while (cjob1.GetJobState() != ControlledJob.State.Running) { try { Sharpen.Thread.Sleep(100); } catch (Exception) { break; } } NUnit.Framework.Assert.IsNotNull(cjob1.GetMapredJobId()); // wait till all the jobs complete WaitTillAllFinished(theControl); NUnit.Framework.Assert.AreEqual("Some jobs failed", 0, theControl.GetFailedJobList ().Count); theControl.Stop(); }
/// <exception cref="System.Exception"/> public static void Launch() { Configuration conf = new Configuration(); FileSystem fs = FileSystem.Get(conf); int numOfInputLines = 10; Path outDir = new Path(testDir, "output_for_field_selection_test"); Path inDir = new Path(testDir, "input_for_field_selection_test"); StringBuilder inputData = new StringBuilder(); StringBuilder expectedOutput = new StringBuilder(); ConstructInputOutputData(inputData, expectedOutput, numOfInputLines); conf.Set(FieldSelectionHelper.DataFieldSeperator, "-"); conf.Set(FieldSelectionHelper.MapOutputKeyValueSpec, "6,5,1-3:0-"); conf.Set(FieldSelectionHelper.ReduceOutputKeyValueSpec, ":4,3,2,1,0,0-"); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, inputData.ToString ()); job.SetMapperClass(typeof(FieldSelectionMapper)); job.SetReducerClass(typeof(FieldSelectionReducer)); job.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetNumReduceTasks(1); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job Failed!", job.IsSuccessful()); // // Finally, we compare the reconstructed answer key with the // original one. Remember, we need to ignore zero-count items // in the original key. // string outdata = MapReduceTestUtil.ReadOutput(outDir, conf); NUnit.Framework.Assert.AreEqual("Outputs doesnt match.", expectedOutput.ToString( ), outdata); fs.Delete(outDir, true); }
/// <exception cref="System.Exception"/> public virtual int TestFormat(Configuration conf, int tupleSize, bool firstTuple, bool secondTuple, TestJoinProperties.TestType ttype) { Job job = Job.GetInstance(conf); CompositeInputFormat format = new CompositeInputFormat(); int count = 0; foreach (InputSplit split in (IList <InputSplit>)format.GetSplits(job)) { TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(conf ); RecordReader reader = format.CreateRecordReader(split, context); MapContext mcontext = new MapContextImpl(conf, context.GetTaskAttemptID(), reader , null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); WritableComparable key = null; Writable value = null; while (reader.NextKeyValue()) { key = (WritableComparable)reader.GetCurrentKey(); value = (Writable)reader.GetCurrentValue(); ValidateKeyValue(key, value, tupleSize, firstTuple, secondTuple, ttype); count++; } } return(count); }
/// <exception cref="System.Exception"/> private static IList <string> ReadSplit(FixedLengthInputFormat format, InputSplit split, Job job) { IList <string> result = new AList <string>(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, BytesWritable> reader = format.CreateRecordReader(split , context); MapContext <LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl <LongWritable, BytesWritable, LongWritable, BytesWritable>(job.GetConfiguration (), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter (), split); LongWritable key; BytesWritable value; try { reader.Initialize(split, mcontext); while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); value = reader.GetCurrentValue(); result.AddItem(Sharpen.Runtime.GetStringForBytes(value.GetBytes(), 0, value.GetLength ())); } } finally { reader.Close(); } return(result); }
/// <summary>This is a main function for testing JobControl class.</summary> /// <remarks> /// This is a main function for testing JobControl class. /// It requires 4 jobs: /// Job 1: passed as parameter. input:indir output:outdir_1 /// Job 2: copy data from indir to outdir_2 /// Job 3: copy data from outdir_1 and outdir_2 to outdir_3 /// Job 4: copy data from outdir to outdir_4 /// The jobs 1 and 2 have no dependency. The job 3 depends on jobs 1 and 2. /// The job 4 depends on job 3. /// Then it creates a JobControl object and add the 4 jobs to /// the JobControl object. /// Finally, it creates a thread to run the JobControl object /// </remarks> /// <exception cref="System.Exception"/> private JobControl CreateDependencies(Configuration conf, Job job1) { IList <ControlledJob> dependingJobs = null; cjob1 = new ControlledJob(job1, dependingJobs); Job job2 = MapReduceTestUtil.CreateCopyJob(conf, outdir_2, indir); cjob2 = new ControlledJob(job2, dependingJobs); Job job3 = MapReduceTestUtil.CreateCopyJob(conf, outdir_3, outdir_1, outdir_2); dependingJobs = new AList <ControlledJob>(); dependingJobs.AddItem(cjob1); dependingJobs.AddItem(cjob2); cjob3 = new ControlledJob(job3, dependingJobs); Job job4 = MapReduceTestUtil.CreateCopyJob(conf, outdir_4, outdir_3); dependingJobs = new AList <ControlledJob>(); dependingJobs.AddItem(cjob3); cjob4 = new ControlledJob(job4, dependingJobs); JobControl theControl = new JobControl("Test"); theControl.AddJob(cjob1); theControl.AddJob(cjob2); theControl.AddJob(cjob3); theControl.AddJob(cjob4); Sharpen.Thread theController = new Sharpen.Thread(theControl); theController.Start(); return(theControl); }
// run a job which gets stuck in mapper and kill it. /// <exception cref="System.Exception"/> private void TestKilledJob(string fileName, Type output, string[] exclude) { Path outDir = GetNewOutputDir(); Job job = MapReduceTestUtil.CreateKillJob(conf, outDir, inDir); job.SetOutputFormatClass(output); job.Submit(); // wait for the setup to be completed while (job.SetupProgress() != 1.0f) { UtilsForTests.WaitFor(100); } job.KillJob(); // kill the job NUnit.Framework.Assert.IsFalse("Job did not get kill", job.WaitForCompletion(true )); if (fileName != null) { Path testFile = new Path(outDir, fileName); NUnit.Framework.Assert.IsTrue("File " + testFile + " missing for job " + job.GetJobID (), fs.Exists(testFile)); } // check if the files from the missing set exists foreach (string ex in exclude) { Path file = new Path(outDir, ex); NUnit.Framework.Assert.IsFalse("File " + file + " should not be present for killed job " + job.GetJobID(), fs.Exists(file)); } }
/// <exception cref="System.Exception"/> private void Run(bool ioEx, bool rtEx) { string localPathRoot = Runtime.GetProperty("test.build.data", "/tmp"); Path inDir = new Path(localPathRoot, "testing/mt/input"); Path outDir = new Path(localPathRoot, "testing/mt/output"); Configuration conf = CreateJobConf(); if (ioEx) { conf.SetBoolean("multithreaded.ioException", true); } if (rtEx) { conf.SetBoolean("multithreaded.runtimeException", true); } Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1); job.SetJobName("mt"); job.SetMapperClass(typeof(MultithreadedMapper)); MultithreadedMapper.SetMapperClass(job, typeof(TestMultithreadedMapper.IDMap)); MultithreadedMapper.SetNumberOfThreads(job, 2); job.SetReducerClass(typeof(Reducer)); job.WaitForCompletion(true); if (job.IsSuccessful()) { NUnit.Framework.Assert.IsFalse(ioEx || rtEx); } else { NUnit.Framework.Assert.IsTrue(ioEx || rtEx); } }
/// <exception cref="System.Exception"/> private void TestComparator(string keySpec, int expect) { string root = Runtime.GetProperty("test.build.data", "/tmp"); Path inDir = new Path(root, "test_cmp/in"); Path outDir = new Path(root, "test_cmp/out"); conf.Set("mapreduce.partition.keycomparator.options", keySpec); conf.Set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1"); conf.Set(MRJobConfig.MapOutputKeyFieldSeperator, " "); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, line1 + "\n" + line2 + "\n"); job.SetMapperClass(typeof(InverseMapper)); job.SetReducerClass(typeof(Reducer)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(LongWritable)); job.SetSortComparatorClass(typeof(KeyFieldBasedComparator)); job.SetPartitionerClass(typeof(KeyFieldBasedPartitioner)); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue(job.IsSuccessful()); // validate output Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter ())); if (outputFiles.Length > 0) { InputStream @is = GetFileSystem().Open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(@is)); string line = reader.ReadLine(); //make sure we get what we expect as the first line, and also //that we have two lines (both the lines must end up in the same //reducer since the partitioner takes the same key spec for all //lines if (expect == 1) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line1)); } else { if (expect == 2) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line2)); } } line = reader.ReadLine(); if (expect == 1) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line2)); } else { if (expect == 2) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line1)); } } reader.Close(); } }
/// <exception cref="System.Exception"/> private void CleanupData(Configuration conf) { FileSystem fs = FileSystem.Get(conf); MapReduceTestUtil.CleanData(fs, indir); MapReduceTestUtil.GenerateData(fs, indir); MapReduceTestUtil.CleanData(fs, outdir_1); MapReduceTestUtil.CleanData(fs, outdir_2); MapReduceTestUtil.CleanData(fs, outdir_3); MapReduceTestUtil.CleanData(fs, outdir_4); }
/// <exception cref="System.Exception"/> public static void Launch() { JobConf conf = new JobConf(typeof(Org.Apache.Hadoop.Mapred.TestFieldSelection)); FileSystem fs = FileSystem.Get(conf); int numOfInputLines = 10; Path OutputDir = new Path("build/test/output_for_field_selection_test"); Path InputDir = new Path("build/test/input_for_field_selection_test"); string inputFile = "input.txt"; fs.Delete(InputDir, true); fs.Mkdirs(InputDir); fs.Delete(OutputDir, true); StringBuilder inputData = new StringBuilder(); StringBuilder expectedOutput = new StringBuilder(); TestMRFieldSelection.ConstructInputOutputData(inputData, expectedOutput, numOfInputLines ); FSDataOutputStream fileOut = fs.Create(new Path(InputDir, inputFile)); fileOut.Write(Sharpen.Runtime.GetBytesForString(inputData.ToString(), "utf-8")); fileOut.Close(); System.Console.Out.WriteLine("inputData:"); System.Console.Out.WriteLine(inputData.ToString()); JobConf job = new JobConf(conf, typeof(Org.Apache.Hadoop.Mapred.TestFieldSelection )); FileInputFormat.SetInputPaths(job, InputDir); job.SetInputFormat(typeof(TextInputFormat)); job.SetMapperClass(typeof(FieldSelectionMapReduce)); job.SetReducerClass(typeof(FieldSelectionMapReduce)); FileOutputFormat.SetOutputPath(job, OutputDir); job.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputFormat(typeof(TextOutputFormat)); job.SetNumReduceTasks(1); job.Set(FieldSelectionHelper.DataFieldSeperator, "-"); job.Set(FieldSelectionHelper.MapOutputKeyValueSpec, "6,5,1-3:0-"); job.Set(FieldSelectionHelper.ReduceOutputKeyValueSpec, ":4,3,2,1,0,0-"); JobClient.RunJob(job); // // Finally, we compare the reconstructed answer key with the // original one. Remember, we need to ignore zero-count items // in the original key. // bool success = true; Path outPath = new Path(OutputDir, "part-00000"); string outdata = MapReduceTestUtil.ReadOutput(outPath, job); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), outdata); fs.Delete(OutputDir, true); fs.Delete(InputDir, true); }
/// <exception cref="System.Exception"/> public virtual void TestJobControl() { Log.Info("Starting testJobControl"); Configuration conf = CreateJobConf(); CleanupData(conf); Job job1 = MapReduceTestUtil.CreateCopyJob(conf, outdir_1, indir); JobControl theControl = CreateDependencies(conf, job1); // wait till all the jobs complete WaitTillAllFinished(theControl); NUnit.Framework.Assert.AreEqual("Some jobs failed", 0, theControl.GetFailedJobList ().Count); theControl.Stop(); }
/// <summary>Tests Reducer throwing exception.</summary> /// <exception cref="System.Exception"/> public virtual void TestReducerFail() { Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, input); job.SetJobName("chain"); ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( LongWritable), typeof(Text), null); ChainReducer.SetReducer(job, typeof(TestChainErrors.FailReduce), typeof(LongWritable ), typeof(Text), typeof(LongWritable), typeof(Text), null); ChainReducer.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( LongWritable), typeof(Text), null); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job Not failed", !job.IsSuccessful()); }
/// <summary>Tests one of the maps consuming output.</summary> /// <exception cref="System.Exception"/> public virtual void TestChainMapNoOuptut() { Configuration conf = CreateJobConf(); string expectedOutput = string.Empty; Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 0, input); job.SetJobName("chain"); ChainMapper.AddMapper(job, typeof(TestChainErrors.ConsumeMap), typeof(IntWritable ), typeof(Text), typeof(LongWritable), typeof(Text), null); ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( LongWritable), typeof(Text), null); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful()); NUnit.Framework.Assert.AreEqual("Outputs doesn't match", expectedOutput, MapReduceTestUtil .ReadOutput(outDir, conf)); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal virtual void CheckFormat(Job job, int expectedN, int lastN) { NLineInputFormat format = new NLineInputFormat(); IList <InputSplit> splits = format.GetSplits(job); int count = 0; for (int i = 0; i < splits.Count; i++) { NUnit.Framework.Assert.AreEqual("There are no split locations", 0, splits[i].GetLocations ().Length); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, Text> reader = format.CreateRecordReader(splits[i], context ); Type clazz = reader.GetType(); NUnit.Framework.Assert.AreEqual("reader class is LineRecordReader.", typeof(LineRecordReader ), clazz); MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl < LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[i]); reader.Initialize(splits[i], mcontext); try { count = 0; while (reader.NextKeyValue()) { count++; } } finally { reader.Close(); } if (i == splits.Count - 1) { NUnit.Framework.Assert.AreEqual("number of lines in split(" + i + ") is wrong", lastN , count); } else { NUnit.Framework.Assert.AreEqual("number of lines in split(" + i + ") is wrong", expectedN , count); } } }
// test chain mapper and reducer by adding single mapper and reducer to chain /// <exception cref="System.Exception"/> public virtual void TestNoChain() { Path inDir = new Path(localPathRoot, "testing/chain/input"); Path outDir = new Path(localPathRoot, "testing/chain/output"); string input = "a\nb\na\n"; string expectedOutput = "a\t2\nb\t1\n"; Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 1, input); job.SetJobName("chain"); ChainMapper.AddMapper(job, typeof(TokenCounterMapper), typeof(object), typeof(Text ), typeof(Text), typeof(IntWritable), null); ChainReducer.SetReducer(job, typeof(IntSumReducer), typeof(Text), typeof(IntWritable ), typeof(Text), typeof(IntWritable), null); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful()); NUnit.Framework.Assert.AreEqual("Outputs doesn't match", expectedOutput, MapReduceTestUtil .ReadOutput(outDir, conf)); }
// run a job with 1 map and let it run to completion /// <exception cref="System.Exception"/> private void TestSuccessfulJob(string filename, Type output, string[] exclude) { Path outDir = GetNewOutputDir(); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 1, 0); job.SetOutputFormatClass(output); NUnit.Framework.Assert.IsTrue("Job failed!", job.WaitForCompletion(true)); Path testFile = new Path(outDir, filename); NUnit.Framework.Assert.IsTrue("Done file missing for job " + job.GetJobID(), fs.Exists (testFile)); // check if the files from the missing set exists foreach (string ex in exclude) { Path file = new Path(outDir, ex); NUnit.Framework.Assert.IsFalse("File " + file + " should not be present for successful job " + job.GetJobID(), fs.Exists(file)); } }
/// <exception cref="System.IO.IOException"/> private string LaunchWordCount(JobConf conf, string input, int numMaps, int numReduces ) { Path inDir = new Path("testing/wc/input"); Path outDir = new Path("testing/wc/output"); // Hack for local FS that does not have the concept of a 'mounting point' if (IsLocalFS()) { string localPathRoot = Runtime.GetProperty("test.build.data", "/tmp").ToString(). Replace(' ', '+'); inDir = new Path(localPathRoot, inDir); outDir = new Path(localPathRoot, outDir); } FileSystem fs = FileSystem.Get(conf); fs.Delete(outDir, true); if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } { DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes(input); file.Close(); } conf.SetJobName("wordcount"); conf.SetInputFormat(typeof(TextInputFormat)); // the keys are words (strings) conf.SetOutputKeyClass(typeof(Text)); // the values are counts (ints) conf.SetOutputValueClass(typeof(IntWritable)); conf.SetMapperClass(typeof(WordCount.MapClass)); conf.SetCombinerClass(typeof(WordCount.Reduce)); conf.SetReducerClass(typeof(WordCount.Reduce)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReduces); JobClient.RunJob(conf); return(MapReduceTestUtil.ReadOutput(outDir, conf)); }
/// <exception cref="System.IO.IOException"/> public static TestJobSysDirWithDFS.TestResult LaunchWordCount(JobConf conf, Path inDir, Path outDir, string input, int numMaps, int numReduces, string sysDir) { FileSystem inFs = inDir.GetFileSystem(conf); FileSystem outFs = outDir.GetFileSystem(conf); outFs.Delete(outDir, true); if (!inFs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } { DataOutputStream file = inFs.Create(new Path(inDir, "part-0")); file.WriteBytes(input); file.Close(); } conf.SetJobName("wordcount"); conf.SetInputFormat(typeof(TextInputFormat)); // the keys are words (strings) conf.SetOutputKeyClass(typeof(Text)); // the values are counts (ints) conf.SetOutputValueClass(typeof(IntWritable)); conf.SetMapperClass(typeof(WordCount.MapClass)); conf.SetCombinerClass(typeof(WordCount.Reduce)); conf.SetReducerClass(typeof(WordCount.Reduce)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReduces); conf.Set(JTConfig.JtSystemDir, "/tmp/subru/mapred/system"); JobClient jobClient = new JobClient(conf); RunningJob job = JobClient.RunJob(conf); // Checking that the Job Client system dir is not used NUnit.Framework.Assert.IsFalse(FileSystem.Get(conf).Exists(new Path(conf.Get(JTConfig .JtSystemDir)))); // Check if the Job Tracker system dir is propogated to client NUnit.Framework.Assert.IsFalse(sysDir.Contains("/tmp/subru/mapred/system")); NUnit.Framework.Assert.IsTrue(sysDir.Contains("custom")); return(new TestJobSysDirWithDFS.TestResult(job, MapReduceTestUtil.ReadOutput(outDir , conf))); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private static IList <Text> ReadSplit(InputFormat <LongWritable, Text> format, InputSplit split, Job job) { IList <Text> result = new AList <Text>(); Configuration conf = job.GetConfiguration(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(conf ); RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, MapReduceTestUtil .CreateDummyMapTaskAttemptContext(conf)); MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl < LongWritable, Text, LongWritable, Text>(conf, context.GetTaskAttemptID(), reader , null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); while (reader.NextKeyValue()) { result.AddItem(new Text(reader.GetCurrentValue())); } return(result); }
// run a job for which all the attempts simply fail. /// <exception cref="System.Exception"/> private void TestFailedJob(string fileName, Type output, string[] exclude) { Path outDir = GetNewOutputDir(); Job job = MapReduceTestUtil.CreateFailJob(conf, outDir, inDir); job.SetOutputFormatClass(output); NUnit.Framework.Assert.IsFalse("Job did not fail!", job.WaitForCompletion(true)); if (fileName != null) { Path testFile = new Path(outDir, fileName); NUnit.Framework.Assert.IsTrue("File " + testFile + " missing for failed job " + job .GetJobID(), fs.Exists(testFile)); } // check if the files from the missing set exists foreach (string ex in exclude) { Path file = new Path(outDir, ex); NUnit.Framework.Assert.IsFalse("File " + file + " should not be present for failed job " + job.GetJobID(), fs.Exists(file)); } }
/// <exception cref="System.Exception"/> public virtual void TestJobControlWithFailJob() { Log.Info("Starting testJobControlWithFailJob"); Configuration conf = CreateJobConf(); CleanupData(conf); // create a Fail job Job job1 = MapReduceTestUtil.CreateFailJob(conf, outdir_1, indir); // create job dependencies JobControl theControl = CreateDependencies(conf, job1); // wait till all the jobs complete WaitTillAllFinished(theControl); NUnit.Framework.Assert.IsTrue(cjob1.GetJobState() == ControlledJob.State.Failed); NUnit.Framework.Assert.IsTrue(cjob2.GetJobState() == ControlledJob.State.Success); NUnit.Framework.Assert.IsTrue(cjob3.GetJobState() == ControlledJob.State.DependentFailed ); NUnit.Framework.Assert.IsTrue(cjob4.GetJobState() == ControlledJob.State.DependentFailed ); theControl.Stop(); }
/// <exception cref="System.Exception"/> public virtual void TestJobControlWithKillJob() { Log.Info("Starting testJobControlWithKillJob"); Configuration conf = CreateJobConf(); CleanupData(conf); Job job1 = MapReduceTestUtil.CreateKillJob(conf, outdir_1, indir); JobControl theControl = CreateDependencies(conf, job1); while (cjob1.GetJobState() != ControlledJob.State.Running) { try { Sharpen.Thread.Sleep(100); } catch (Exception) { break; } } // verify adding dependingJo to RUNNING job fails. NUnit.Framework.Assert.IsFalse(cjob1.AddDependingJob(cjob2)); // suspend jobcontrol and resume it again theControl.Suspend(); NUnit.Framework.Assert.IsTrue(theControl.GetThreadState() == JobControl.ThreadState .Suspended); theControl.Resume(); // kill the first job. cjob1.KillJob(); // wait till all the jobs complete WaitTillAllFinished(theControl); NUnit.Framework.Assert.IsTrue(cjob1.GetJobState() == ControlledJob.State.Failed); NUnit.Framework.Assert.IsTrue(cjob2.GetJobState() == ControlledJob.State.Success); NUnit.Framework.Assert.IsTrue(cjob3.GetJobState() == ControlledJob.State.DependentFailed ); NUnit.Framework.Assert.IsTrue(cjob4.GetJobState() == ControlledJob.State.DependentFailed ); theControl.Stop(); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private int CountRecords(int numSplits) { InputFormat <Text, BytesWritable> format = new SequenceFileInputFilter <Text, BytesWritable >(); if (numSplits == 0) { numSplits = random.Next(MaxLength / (SequenceFile.SyncInterval / 20)) + 1; } FileInputFormat.SetMaxInputSplitSize(job, fs.GetFileStatus(inFile).GetLen() / numSplits ); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); // check each split int count = 0; foreach (InputSplit split in format.GetSplits(job)) { RecordReader <Text, BytesWritable> reader = format.CreateRecordReader(split, context ); MapContext <Text, BytesWritable, Text, BytesWritable> mcontext = new MapContextImpl <Text, BytesWritable, Text, BytesWritable>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { while (reader.NextKeyValue()) { Log.Info("Accept record " + reader.GetCurrentKey().ToString()); count++; } } finally { reader.Close(); } } return(count); }
/// <summary>Test with record length set to a negative value</summary> /// <exception cref="System.Exception"/> public virtual void TestNegativeRecordLength() { localFs.Delete(workDir, true); Path file = new Path(workDir, new string("testFormat.txt")); CreateFile(file, null, 10, 10); // Set the fixed length record length config property Job job = Job.GetInstance(defaultConf); FixedLengthInputFormat format = new FixedLengthInputFormat(); FixedLengthInputFormat.SetRecordLength(job.GetConfiguration(), -10); FileInputFormat.SetInputPaths(job, workDir); IList <InputSplit> splits = format.GetSplits(job); bool exceptionThrown = false; foreach (InputSplit split in splits) { try { TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, BytesWritable> reader = format.CreateRecordReader(split , context); MapContext <LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl <LongWritable, BytesWritable, LongWritable, BytesWritable>(job.GetConfiguration (), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter (), split); reader.Initialize(split, mcontext); } catch (IOException ioe) { exceptionThrown = true; Log.Info("Exception message:" + ioe.Message); } } NUnit.Framework.Assert.IsTrue("Exception for negative record length:", exceptionThrown ); }
/// <summary>Tests errors during submission.</summary> /// <exception cref="System.Exception"/> public virtual void TestChainSubmission() { Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, inDir, outDir, 0, 0, input); job.SetJobName("chain"); Exception th = null; // output key,value classes of first map are not same as that of second map try { ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( IntWritable), typeof(Text), null); ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( LongWritable), typeof(Text), null); } catch (ArgumentException iae) { th = iae; } NUnit.Framework.Assert.IsTrue(th != null); th = null; // output key,value classes of reducer are not // same as that of mapper in the chain try { ChainReducer.SetReducer(job, typeof(Reducer), typeof(LongWritable), typeof(Text), typeof(IntWritable), typeof(Text), null); ChainMapper.AddMapper(job, typeof(Mapper), typeof(LongWritable), typeof(Text), typeof( LongWritable), typeof(Text), null); } catch (ArgumentException iae) { th = iae; } NUnit.Framework.Assert.IsTrue(th != null); }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { Job job = Job.GetInstance(new Configuration(defaultConf)); Random random = new Random(); long seed = random.NextLong(); Log.Info("seed = " + seed); random.SetSeed(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int length = 10000; int numFiles = 10; // create files with various lengths CreateFiles(length, numFiles, random); // create a combined split for the files CombineTextInputFormat format = new CombineTextInputFormat(); for (int i = 0; i < 3; i++) { int numSplits = random.Next(length / 20) + 1; Log.Info("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Info("splitting: got = " + splits.Count); // we should have a single split as the length is comfortably smaller than // the block size NUnit.Framework.Assert.AreEqual("We got more than one splits!", 1, splits.Count); InputSplit split = splits[0]; NUnit.Framework.Assert.AreEqual("It should be CombineFileSplit", typeof(CombineFileSplit ), split.GetType()); // check the split BitSet bits = new BitSet(length); Log.Debug("split= " + split); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <LongWritable, Text> reader = format.CreateRecordReader(split, context ); NUnit.Framework.Assert.AreEqual("reader class is CombineFileRecordReader.", typeof( CombineFileRecordReader), reader.GetType()); MapContext <LongWritable, Text, LongWritable, Text> mcontext = new MapContextImpl < LongWritable, Text, LongWritable, Text>(job.GetConfiguration(), context.GetTaskAttemptID (), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { int count = 0; while (reader.NextKeyValue()) { LongWritable key = reader.GetCurrentKey(); NUnit.Framework.Assert.IsNotNull("Key should not be null.", key); Text value = reader.GetCurrentValue(); int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); count++; } Log.Debug("split=" + split + " count=" + count); } finally { reader.Close(); } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } }
/// <exception cref="System.Exception"/> protected internal virtual void _testMOWithJavaSerialization(bool withCounters) { string input = "a\nb\nc\nd\ne\nc\nd\ne"; Configuration conf = CreateJobConf(); conf.Set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); Job job = MapReduceTestUtil.CreateJob(conf, InDir, OutDir, 2, 1, input); job.SetJobName("mo"); MultipleOutputs.AddNamedOutput(job, Text, typeof(TextOutputFormat), typeof(long), typeof(string)); MultipleOutputs.SetCountersEnabled(job, withCounters); job.SetSortComparatorClass(typeof(JavaSerializationComparator)); job.SetMapOutputKeyClass(typeof(long)); job.SetMapOutputValueClass(typeof(string)); job.SetOutputKeyClass(typeof(long)); job.SetOutputValueClass(typeof(string)); job.SetMapperClass(typeof(TestMRMultipleOutputs.MOJavaSerDeMap)); job.SetReducerClass(typeof(TestMRMultipleOutputs.MOJavaSerDeReduce)); job.WaitForCompletion(true); // assert number of named output part files int namedOutputCount = 0; int valueBasedOutputCount = 0; FileSystem fs = OutDir.GetFileSystem(conf); FileStatus[] statuses = fs.ListStatus(OutDir); foreach (FileStatus status in statuses) { string fileName = status.GetPath().GetName(); if (fileName.Equals("text-m-00000") || fileName.Equals("text-m-00001") || fileName .Equals("text-r-00000")) { namedOutputCount++; } else { if (fileName.Equals("a-r-00000") || fileName.Equals("b-r-00000") || fileName.Equals ("c-r-00000") || fileName.Equals("d-r-00000") || fileName.Equals("e-r-00000")) { valueBasedOutputCount++; } } } NUnit.Framework.Assert.AreEqual(3, namedOutputCount); NUnit.Framework.Assert.AreEqual(5, valueBasedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path (FileOutputFormat.GetOutputPath(job), "text-r-00000")))); int count = 0; string line = reader.ReadLine(); while (line != null) { NUnit.Framework.Assert.IsTrue(line.EndsWith(Text)); line = reader.ReadLine(); count++; } reader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); if (withCounters) { CounterGroup counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName ); NUnit.Framework.Assert.AreEqual(6, counters.Size()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Text).GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("a").GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("b").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("c").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("d").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("e").GetValue()); } }
/// <exception cref="System.Exception"/> protected internal virtual void _testMultipleOutputs(bool withCounters) { string input = "a\nb\nc\nd\ne\nc\nd\ne"; Configuration conf = CreateJobConf(); Job job = MapReduceTestUtil.CreateJob(conf, InDir, OutDir, 2, 1, input); job.SetJobName("mo"); MultipleOutputs.AddNamedOutput(job, Text, typeof(TextOutputFormat), typeof(LongWritable ), typeof(Text)); MultipleOutputs.AddNamedOutput(job, Sequence, typeof(SequenceFileOutputFormat), typeof( IntWritable), typeof(Text)); MultipleOutputs.SetCountersEnabled(job, withCounters); job.SetMapperClass(typeof(TestMRMultipleOutputs.MOMap)); job.SetReducerClass(typeof(TestMRMultipleOutputs.MOReduce)); job.WaitForCompletion(true); // assert number of named output part files int namedOutputCount = 0; int valueBasedOutputCount = 0; FileSystem fs = OutDir.GetFileSystem(conf); FileStatus[] statuses = fs.ListStatus(OutDir); foreach (FileStatus status in statuses) { string fileName = status.GetPath().GetName(); if (fileName.Equals("text-m-00000") || fileName.Equals("text-m-00001") || fileName .Equals("text-r-00000") || fileName.Equals("sequence_A-m-00000") || fileName.Equals ("sequence_A-m-00001") || fileName.Equals("sequence_B-m-00000") || fileName.Equals ("sequence_B-m-00001") || fileName.Equals("sequence_B-r-00000") || fileName.Equals ("sequence_C-r-00000")) { namedOutputCount++; } else { if (fileName.Equals("a-r-00000") || fileName.Equals("b-r-00000") || fileName.Equals ("c-r-00000") || fileName.Equals("d-r-00000") || fileName.Equals("e-r-00000")) { valueBasedOutputCount++; } } } NUnit.Framework.Assert.AreEqual(9, namedOutputCount); NUnit.Framework.Assert.AreEqual(5, valueBasedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path (FileOutputFormat.GetOutputPath(job), "text-r-00000")))); int count = 0; string line = reader.ReadLine(); while (line != null) { NUnit.Framework.Assert.IsTrue(line.EndsWith(Text)); line = reader.ReadLine(); count++; } reader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); // assert SequenceOutputFormat files correctness SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat .GetOutputPath(job), "sequence_B-r-00000"), conf); NUnit.Framework.Assert.AreEqual(typeof(IntWritable), seqReader.GetKeyClass()); NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass()); count = 0; IntWritable key = new IntWritable(); Text value = new Text(); while (seqReader.Next(key, value)) { NUnit.Framework.Assert.AreEqual(Sequence, value.ToString()); count++; } seqReader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); if (withCounters) { CounterGroup counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName ); NUnit.Framework.Assert.AreEqual(9, counters.Size()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Text).GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_A").GetValue ()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter(Sequence + "_B").GetValue ()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter(Sequence + "_C").GetValue ()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("a").GetValue()); NUnit.Framework.Assert.AreEqual(2, counters.FindCounter("b").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("c").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("d").GetValue()); NUnit.Framework.Assert.AreEqual(4, counters.FindCounter("e").GetValue()); } }
/// <exception cref="System.IO.IOException"/> internal static void RunProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program , Path inputPath, Path outputPath, int numMaps, int numReduces, string[] expectedResults , JobConf conf) { Path wordExec = new Path("testing/bin/application"); JobConf job = null; if (conf == null) { job = mr.CreateJobConf(); } else { job = new JobConf(conf); } job.SetNumMapTasks(numMaps); job.SetNumReduceTasks(numReduces); { FileSystem fs = dfs.GetFileSystem(); fs.Delete(wordExec.GetParent(), true); fs.CopyFromLocalFile(program, wordExec); Submitter.SetExecutable(job, fs.MakeQualified(wordExec).ToString()); Submitter.SetIsJavaRecordReader(job, true); Submitter.SetIsJavaRecordWriter(job, true); FileInputFormat.SetInputPaths(job, inputPath); FileOutputFormat.SetOutputPath(job, outputPath); RunningJob rJob = null; if (numReduces == 0) { rJob = Submitter.JobSubmit(job); while (!rJob.IsComplete()) { try { Sharpen.Thread.Sleep(1000); } catch (Exception ie) { throw new RuntimeException(ie); } } } else { rJob = Submitter.RunJob(job); } NUnit.Framework.Assert.IsTrue("pipes job failed", rJob.IsSuccessful()); Counters counters = rJob.GetCounters(); Counters.Group wordCountCounters = counters.GetGroup("WORDCOUNT"); int numCounters = 0; foreach (Counters.Counter c in wordCountCounters) { System.Console.Out.WriteLine(c); ++numCounters; } NUnit.Framework.Assert.IsTrue("No counters found!", (numCounters > 0)); } IList <string> results = new AList <string>(); foreach (Path p in FileUtil.Stat2Paths(dfs.GetFileSystem().ListStatus(outputPath, new Utils.OutputFileUtils.OutputFilesFilter()))) { results.AddItem(MapReduceTestUtil.ReadOutput(p, job)); } NUnit.Framework.Assert.AreEqual("number of reduces is wrong", expectedResults.Length , results.Count); for (int i = 0; i < results.Count; i++) { NUnit.Framework.Assert.AreEqual("pipes program " + program + " output " + i + " wrong" , expectedResults[i], results[i]); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestBinary() { Configuration conf = new Configuration(); Job job = Job.GetInstance(conf); Path outdir = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "outseq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); FileOutputFormat.SetOutputPath(job, outdir); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputKeyClass(job, typeof(IntWritable )); SequenceFileAsBinaryOutputFormat.SetSequenceFileOutputValueClass(job, typeof(DoubleWritable )); SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Block); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); OutputFormat <BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat (); OutputCommitter committer = outputFormat.GetOutputCommitter(context); committer.SetupJob(job); RecordWriter <BytesWritable, BytesWritable> writer = outputFormat.GetRecordWriter( context); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); Log.Info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < Records; ++i) { iwritable = new IntWritable(r.Next()); iwritable.Write(outbuf); bkey.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); dwritable = new DoubleWritable(r.NextDouble()); dwritable.Write(outbuf); bval.Set(outbuf.GetData(), 0, outbuf.GetLength()); outbuf.Reset(); writer.Write(bkey, bval); } } finally { writer.Close(context); } committer.CommitTask(context); committer.CommitJob(job); InputFormat <IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat <IntWritable , DoubleWritable>(); int count = 0; r.SetSeed(seed); SequenceFileInputFormat.SetInputPaths(job, outdir); Log.Info("Reading data by SequenceFileInputFormat"); foreach (InputSplit split in iformat.GetSplits(job)) { RecordReader <IntWritable, DoubleWritable> reader = iformat.CreateRecordReader(split , context); MapContext <IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl <IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil .CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { int sourceInt; double sourceDouble; while (reader.NextKeyValue()) { sourceInt = r.Next(); sourceDouble = r.NextDouble(); iwritable = reader.GetCurrentKey(); dwritable = reader.GetCurrentValue(); NUnit.Framework.Assert.AreEqual("Keys don't match: " + "*" + iwritable.Get() + ":" + sourceInt + "*", sourceInt, iwritable.Get()); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + dwritable.Get() + ":" + sourceDouble + "*", double.Compare(dwritable.Get(), sourceDouble) == 0); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }