/// <exception cref="System.Exception"/> private void MrRun() { FileSystem fs = FileSystem.Get(GetJobConf()); Path inputDir = new Path("input"); fs.Mkdirs(inputDir); TextWriter writer = new OutputStreamWriter(fs.Create(new Path(inputDir, "data.txt" ))); writer.Write("hello"); writer.Close(); Path outputDir = new Path("output", "output"); JobConf jobConf = new JobConf(GetJobConf()); jobConf.SetInt("mapred.map.tasks", 1); jobConf.SetInt("mapred.map.max.attempts", 1); jobConf.SetInt("mapred.reduce.max.attempts", 1); jobConf.Set("mapred.input.dir", inputDir.ToString()); jobConf.Set("mapred.output.dir", outputDir.ToString()); JobClient jobClient = new JobClient(jobConf); RunningJob runJob = jobClient.SubmitJob(jobConf); runJob.WaitForCompletion(); NUnit.Framework.Assert.IsTrue(runJob.IsComplete()); NUnit.Framework.Assert.IsTrue(runJob.IsSuccessful()); }
// run a job with 1 map and let it run to completion /// <exception cref="System.IO.IOException"/> private void TestSuccessfulJob(string filename, Type committer, string[] exclude) { JobConf jc = mr.CreateJobConf(); Path outDir = GetNewOutputDir(); ConfigureJob(jc, "job with cleanup()", 1, 0, outDir); jc.SetOutputCommitter(committer); JobClient jobClient = new JobClient(jc); RunningJob job = jobClient.SubmitJob(jc); JobID id = job.GetID(); job.WaitForCompletion(); Log.Info("Job finished : " + job.IsComplete()); Path testFile = new Path(outDir, filename); NUnit.Framework.Assert.IsTrue("Done file \"" + testFile + "\" missing for job " + id, fileSys.Exists(testFile)); // check if the files from the missing set exists foreach (string ex in exclude) { Path file = new Path(outDir, ex); NUnit.Framework.Assert.IsFalse("File " + file + " should not be present for successful job " + id, fileSys.Exists(file)); } }
// Run a job that will be failed and wait until it completes /// <exception cref="System.IO.IOException"/> public static RunningJob RunJobFail(JobConf conf, Path inDir, Path outDir) { conf.SetJobName("test-job-fail"); conf.SetMapperClass(typeof(UtilsForTests.FailMapper)); conf.SetReducerClass(typeof(IdentityReducer)); conf.SetMaxMapAttempts(1); RunningJob job = UtilsForTests.RunJob(conf, inDir, outDir); long sleepCount = 0; while (!job.IsComplete()) { try { if (sleepCount > 300) { // 30 seconds throw new IOException("Job didn't finish in 30 seconds"); } Sharpen.Thread.Sleep(100); sleepCount++; } catch (Exception) { break; } } return(job); }
/// <exception cref="System.Exception"/> private void CheckCompression(bool compressMapOutputs, SequenceFile.CompressionType redCompression, bool includeCombine) { JobConf conf = new JobConf(typeof(TestMapRed)); Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.Get(conf); fs.Delete(testdir, true); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetMapperClass(typeof(TestMapRed.MyMap)); conf.SetReducerClass(typeof(TestMapRed.MyReduce)); conf.SetOutputKeyClass(typeof(Text)); conf.SetOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); if (includeCombine) { conf.SetCombinerClass(typeof(IdentityReducer)); } conf.SetCompressMapOutput(compressMapOutputs); SequenceFileOutputFormat.SetOutputCompressionType(conf, redCompression); try { if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } Path inFile = new Path(inDir, "part0"); DataOutputStream f = fs.Create(inFile); f.WriteBytes("Owen was here\n"); f.WriteBytes("Hadoop is fun\n"); f.WriteBytes("Is this done, yet?\n"); f.Close(); RunningJob rj = JobClient.RunJob(conf); NUnit.Framework.Assert.IsTrue("job was complete", rj.IsComplete()); NUnit.Framework.Assert.IsTrue("job was successful", rj.IsSuccessful()); Path output = new Path(outDir, Task.GetOutputName(0)); NUnit.Framework.Assert.IsTrue("reduce output exists " + output, fs.Exists(output) ); SequenceFile.Reader rdr = new SequenceFile.Reader(fs, output, conf); NUnit.Framework.Assert.AreEqual("is reduce output compressed " + output, redCompression != SequenceFile.CompressionType.None, rdr.IsCompressed()); rdr.Close(); } finally { fs.Delete(testdir, true); } }
/// <exception cref="System.IO.IOException"/> public static bool LaunchJob(URI fileSys, JobConf conf, int numMaps, int numReduces ) { Path inDir = new Path("/testing/input"); Path outDir = new Path("/testing/output"); FileSystem fs = FileSystem.Get(fileSys, conf); fs.Delete(outDir, true); if (!fs.Mkdirs(inDir)) { Log.Warn("Can't create " + inDir); return(false); } // generate an input file DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes("foo foo2 foo3"); file.Close(); // use WordCount example FileSystem.SetDefaultUri(conf, fileSys); conf.SetJobName("foo"); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputFormat(typeof(TestSpecialCharactersInOutputPath.SpecialTextOutputFormat )); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapperClass(typeof(IdentityMapper)); conf.SetReducerClass(typeof(IdentityReducer)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReduces); // run job and wait for completion RunningJob runningJob = JobClient.RunJob(conf); try { NUnit.Framework.Assert.IsTrue(runningJob.IsComplete()); NUnit.Framework.Assert.IsTrue(runningJob.IsSuccessful()); NUnit.Framework.Assert.IsTrue("Output folder not found!", fs.Exists(new Path("/testing/output/" + OutputFilename))); } catch (ArgumentNullException) { // This NPE should no more happens Fail("A NPE should not have happened."); } // return job result Log.Info("job is complete: " + runningJob.IsSuccessful()); return(runningJob.IsSuccessful()); }
public virtual void TestDefaultMRComparator() { conf.SetMapperClass(typeof(TestComparators.IdentityMapper)); conf.SetReducerClass(typeof(TestComparators.AscendingKeysReducer)); RunningJob r_job = jc.SubmitJob(conf); while (!r_job.IsComplete()) { Sharpen.Thread.Sleep(1000); } if (!r_job.IsSuccessful()) { NUnit.Framework.Assert.Fail("Oops! The job broke due to an unexpected error"); } }
public virtual void TestValueMismatch() { conf.SetMapOutputKeyClass(typeof(Text)); conf.SetMapOutputValueClass(typeof(IntWritable)); RunningJob r_job = jc.SubmitJob(conf); while (!r_job.IsComplete()) { Sharpen.Thread.Sleep(1000); } if (r_job.IsSuccessful()) { NUnit.Framework.Assert.Fail("Oops! The job was supposed to break due to an exception" ); } }
public virtual void TestNoMismatch() { // Set good MapOutputKeyClass and MapOutputValueClass conf.SetMapOutputKeyClass(typeof(Text)); conf.SetMapOutputValueClass(typeof(Text)); RunningJob r_job = jc.SubmitJob(conf); while (!r_job.IsComplete()) { Sharpen.Thread.Sleep(1000); } if (!r_job.IsSuccessful()) { NUnit.Framework.Assert.Fail("Oops! The job broke due to an unexpected error"); } }
public virtual void TestUserValueGroupingComparator() { conf.SetMapperClass(typeof(TestComparators.RandomGenMapper)); conf.SetReducerClass(typeof(TestComparators.AscendingGroupReducer)); conf.SetOutputValueGroupingComparator(typeof(TestComparators.CompositeIntGroupFn) ); RunningJob r_job = jc.SubmitJob(conf); while (!r_job.IsComplete()) { Sharpen.Thread.Sleep(1000); } if (!r_job.IsSuccessful()) { NUnit.Framework.Assert.Fail("Oops! The job broke due to an unexpected error"); } }
//Starts the job in a thread. It also starts the taskKill/tasktrackerKill //threads. /// <exception cref="System.Exception"/> private void RunTest(JobClient jc, Configuration conf, string jobClass, string[] args, ReliabilityTest.KillTaskThread killTaskThread, ReliabilityTest.KillTrackerThread killTrackerThread) { Sharpen.Thread t = new _Thread_202(this, conf, jobClass, args, "Job Test"); t.SetDaemon(true); t.Start(); JobStatus[] jobs; //get the job ID. This is the job that we just submitted while ((jobs = jc.JobsToComplete()).Length == 0) { Log.Info("Waiting for the job " + jobClass + " to start"); Sharpen.Thread.Sleep(1000); } JobID jobId = ((JobID)jobs[jobs.Length - 1].GetJobID()); RunningJob rJob = jc.GetJob(jobId); if (rJob.IsComplete()) { Log.Error("The last job returned by the querying JobTracker is complete :" + rJob .GetJobID() + " .Exiting the test"); System.Environment.Exit(-1); } while (rJob.GetJobState() == JobStatus.Prep) { Log.Info("JobID : " + jobId + " not started RUNNING yet"); Sharpen.Thread.Sleep(1000); rJob = jc.GetJob(jobId); } if (killTaskThread != null) { killTaskThread.SetRunningJob(rJob); killTaskThread.Start(); killTaskThread.Join(); Log.Info("DONE WITH THE TASK KILL/FAIL TESTS"); } if (killTrackerThread != null) { killTrackerThread.SetRunningJob(rJob); killTrackerThread.Start(); killTrackerThread.Join(); Log.Info("DONE WITH THE TESTS TO DO WITH LOST TASKTRACKERS"); } t.Join(); }
public virtual void TestAllUserComparators() { conf.SetMapperClass(typeof(TestComparators.RandomGenMapper)); // use a decreasing comparator so keys are sorted in reverse order conf.SetOutputKeyComparatorClass(typeof(TestComparators.DecreasingIntComparator)); conf.SetReducerClass(typeof(TestComparators.DescendingGroupReducer)); conf.SetOutputValueGroupingComparator(typeof(TestComparators.CompositeIntReverseGroupFn )); RunningJob r_job = jc.SubmitJob(conf); while (!r_job.IsComplete()) { Sharpen.Thread.Sleep(1000); } if (!r_job.IsSuccessful()) { NUnit.Framework.Assert.Fail("Oops! The job broke due to an unexpected error"); } }
/// <exception cref="System.Exception"/> private void EncryptedShuffleWithCerts(bool useClientCerts) { try { Configuration conf = new Configuration(); string keystoresDir = new FilePath(Basedir).GetAbsolutePath(); string sslConfsDir = KeyStoreTestUtil.GetClasspathDir(typeof(TestEncryptedShuffle )); KeyStoreTestUtil.SetupSSLConfig(keystoresDir, sslConfsDir, conf, useClientCerts); conf.SetBoolean(MRConfig.ShuffleSslEnabledKey, true); StartCluster(conf); FileSystem fs = FileSystem.Get(GetJobConf()); Path inputDir = new Path("input"); fs.Mkdirs(inputDir); TextWriter writer = new OutputStreamWriter(fs.Create(new Path(inputDir, "data.txt" ))); writer.Write("hello"); writer.Close(); Path outputDir = new Path("output", "output"); JobConf jobConf = new JobConf(GetJobConf()); jobConf.SetInt("mapred.map.tasks", 1); jobConf.SetInt("mapred.map.max.attempts", 1); jobConf.SetInt("mapred.reduce.max.attempts", 1); jobConf.Set("mapred.input.dir", inputDir.ToString()); jobConf.Set("mapred.output.dir", outputDir.ToString()); JobClient jobClient = new JobClient(jobConf); RunningJob runJob = jobClient.SubmitJob(jobConf); runJob.WaitForCompletion(); NUnit.Framework.Assert.IsTrue(runJob.IsComplete()); NUnit.Framework.Assert.IsTrue(runJob.IsSuccessful()); } finally { StopCluster(); } }
/// <exception cref="System.Exception"/> public virtual void TestChain() { Path inDir = new Path("testing/chain/input"); Path outDir = new Path("testing/chain/output"); // Hack for local FS that does not have the concept of a 'mounting point' if (IsLocalFS()) { string localPathRoot = Runtime.GetProperty("test.build.data", "/tmp").Replace(' ' , '+'); inDir = new Path(localPathRoot, inDir); outDir = new Path(localPathRoot, outDir); } JobConf conf = CreateJobConf(); conf.SetBoolean("localFS", IsLocalFS()); conf.SetInt("mapreduce.job.maps", 1); CleanFlags(conf); FileSystem fs = FileSystem.Get(conf); fs.Delete(outDir, true); if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes("1\n2\n"); file.Close(); conf.SetJobName("chain"); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputFormat(typeof(TextOutputFormat)); conf.Set("a", "X"); JobConf mapAConf = new JobConf(false); mapAConf.Set("a", "A"); ChainMapper.AddMapper(conf, typeof(TestChainMapReduce.AMap), typeof(LongWritable) , typeof(Text), typeof(LongWritable), typeof(Text), true, mapAConf); ChainMapper.AddMapper(conf, typeof(TestChainMapReduce.BMap), typeof(LongWritable) , typeof(Text), typeof(LongWritable), typeof(Text), false, null); JobConf reduceConf = new JobConf(false); reduceConf.Set("a", "C"); ChainReducer.SetReducer(conf, typeof(TestChainMapReduce.CReduce), typeof(LongWritable ), typeof(Text), typeof(LongWritable), typeof(Text), true, reduceConf); ChainReducer.AddMapper(conf, typeof(TestChainMapReduce.DMap), typeof(LongWritable ), typeof(Text), typeof(LongWritable), typeof(Text), false, null); JobConf mapEConf = new JobConf(false); mapEConf.Set("a", "E"); ChainReducer.AddMapper(conf, typeof(TestChainMapReduce.EMap), typeof(LongWritable ), typeof(Text), typeof(LongWritable), typeof(Text), true, mapEConf); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); JobClient jc = new JobClient(conf); RunningJob job = jc.SubmitJob(conf); while (!job.IsComplete()) { Sharpen.Thread.Sleep(100); } NUnit.Framework.Assert.IsTrue(GetFlag(conf, "configure.A")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "configure.B")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "configure.C")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "configure.D")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "configure.E")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.A.value.1")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.A.value.2")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.B.value.1")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.B.value.2")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "reduce.C.value.2")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "reduce.C.value.1")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.D.value.1")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.D.value.2")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.E.value.1")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "map.E.value.2")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "close.A")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "close.B")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "close.C")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "close.D")); NUnit.Framework.Assert.IsTrue(GetFlag(conf, "close.E")); }
/// <exception cref="System.IO.IOException"/> internal static void RunProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program , Path inputPath, Path outputPath, int numMaps, int numReduces, string[] expectedResults , JobConf conf) { Path wordExec = new Path("testing/bin/application"); JobConf job = null; if (conf == null) { job = mr.CreateJobConf(); } else { job = new JobConf(conf); } job.SetNumMapTasks(numMaps); job.SetNumReduceTasks(numReduces); { FileSystem fs = dfs.GetFileSystem(); fs.Delete(wordExec.GetParent(), true); fs.CopyFromLocalFile(program, wordExec); Submitter.SetExecutable(job, fs.MakeQualified(wordExec).ToString()); Submitter.SetIsJavaRecordReader(job, true); Submitter.SetIsJavaRecordWriter(job, true); FileInputFormat.SetInputPaths(job, inputPath); FileOutputFormat.SetOutputPath(job, outputPath); RunningJob rJob = null; if (numReduces == 0) { rJob = Submitter.JobSubmit(job); while (!rJob.IsComplete()) { try { Sharpen.Thread.Sleep(1000); } catch (Exception ie) { throw new RuntimeException(ie); } } } else { rJob = Submitter.RunJob(job); } NUnit.Framework.Assert.IsTrue("pipes job failed", rJob.IsSuccessful()); Counters counters = rJob.GetCounters(); Counters.Group wordCountCounters = counters.GetGroup("WORDCOUNT"); int numCounters = 0; foreach (Counters.Counter c in wordCountCounters) { System.Console.Out.WriteLine(c); ++numCounters; } NUnit.Framework.Assert.IsTrue("No counters found!", (numCounters > 0)); } IList <string> results = new AList <string>(); foreach (Path p in FileUtil.Stat2Paths(dfs.GetFileSystem().ListStatus(outputPath, new Utils.OutputFileUtils.OutputFilesFilter()))) { results.AddItem(MapReduceTestUtil.ReadOutput(p, job)); } NUnit.Framework.Assert.AreEqual("number of reduces is wrong", expectedResults.Length , results.Count); for (int i = 0; i < results.Count; i++) { NUnit.Framework.Assert.AreEqual("pipes program " + program + " output " + i + " wrong" , expectedResults[i], results[i]); } }
/// <exception cref="System.Exception"/> protected internal virtual void _testMOWithJavaSerialization(bool withCounters) { Path inDir = GetDir(InDir); Path outDir = GetDir(OutDir); JobConf conf = CreateJobConf(); FileSystem fs = FileSystem.Get(conf); DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes("a\nb\n\nc\nd\ne"); file.Close(); fs.Delete(inDir, true); fs.Delete(outDir, true); file = fs.Create(new Path(inDir, "part-1")); file.WriteBytes("a\nb\n\nc\nd\ne"); file.Close(); conf.SetJobName("mo"); conf.Set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetMapOutputKeyClass(typeof(long)); conf.SetMapOutputValueClass(typeof(string)); conf.SetOutputKeyComparatorClass(typeof(JavaSerializationComparator)); conf.SetOutputKeyClass(typeof(long)); conf.SetOutputValueClass(typeof(string)); conf.SetOutputFormat(typeof(TextOutputFormat)); MultipleOutputs.AddNamedOutput(conf, "text", typeof(TextOutputFormat), typeof(long ), typeof(string)); MultipleOutputs.SetCountersEnabled(conf, withCounters); conf.SetMapperClass(typeof(TestMultipleOutputs.MOJavaSerDeMap)); conf.SetReducerClass(typeof(TestMultipleOutputs.MOJavaSerDeReduce)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); JobClient jc = new JobClient(conf); RunningJob job = jc.SubmitJob(conf); while (!job.IsComplete()) { Sharpen.Thread.Sleep(100); } // assert number of named output part files int namedOutputCount = 0; FileStatus[] statuses = fs.ListStatus(outDir); foreach (FileStatus status in statuses) { if (status.GetPath().GetName().Equals("text-m-00000") || status.GetPath().GetName ().Equals("text-r-00000")) { namedOutputCount++; } } NUnit.Framework.Assert.AreEqual(2, namedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path (FileOutputFormat.GetOutputPath(conf), "text-r-00000")))); int count = 0; string line = reader.ReadLine(); while (line != null) { NUnit.Framework.Assert.IsTrue(line.EndsWith("text")); line = reader.ReadLine(); count++; } reader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); Counters.Group counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName ); if (!withCounters) { NUnit.Framework.Assert.AreEqual(0, counters.Size()); } else { NUnit.Framework.Assert.AreEqual(1, counters.Size()); NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("text")); } }
/// <exception cref="System.Exception"/> private void Run(bool ioEx, bool rtEx) { Path inDir = new Path("testing/mt/input"); Path outDir = new Path("testing/mt/output"); // Hack for local FS that does not have the concept of a 'mounting point' if (IsLocalFS()) { string localPathRoot = Runtime.GetProperty("test.build.data", "/tmp").Replace(' ' , '+'); inDir = new Path(localPathRoot, inDir); outDir = new Path(localPathRoot, outDir); } JobConf conf = CreateJobConf(); FileSystem fs = FileSystem.Get(conf); fs.Delete(outDir, true); if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } { DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes("a\nb\n\nc\nd\ne"); file.Close(); } conf.SetJobName("mt"); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapOutputKeyClass(typeof(LongWritable)); conf.SetMapOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(TextOutputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapperClass(typeof(TestMultithreadedMapRunner.IDMap)); conf.SetReducerClass(typeof(TestMultithreadedMapRunner.IDReduce)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetMapRunnerClass(typeof(MultithreadedMapRunner)); conf.SetInt(MultithreadedMapper.NumThreads, 2); if (ioEx) { conf.SetBoolean("multithreaded.ioException", true); } if (rtEx) { conf.SetBoolean("multithreaded.runtimeException", true); } JobClient jc = new JobClient(conf); RunningJob job = jc.SubmitJob(conf); while (!job.IsComplete()) { Sharpen.Thread.Sleep(100); } if (job.IsSuccessful()) { NUnit.Framework.Assert.IsFalse(ioEx || rtEx); } else { NUnit.Framework.Assert.IsTrue(ioEx || rtEx); } }
/// <exception cref="System.Exception"/> protected internal virtual void _testMultipleOutputs(bool withCounters) { Path inDir = GetDir(InDir); Path outDir = GetDir(OutDir); JobConf conf = CreateJobConf(); FileSystem fs = FileSystem.Get(conf); DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes("a\nb\n\nc\nd\ne"); file.Close(); file = fs.Create(new Path(inDir, "part-1")); file.WriteBytes("a\nb\n\nc\nd\ne"); file.Close(); conf.SetJobName("mo"); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapOutputKeyClass(typeof(LongWritable)); conf.SetMapOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(TextOutputFormat)); MultipleOutputs.AddNamedOutput(conf, "text", typeof(TextOutputFormat), typeof(LongWritable ), typeof(Text)); MultipleOutputs.AddMultiNamedOutput(conf, "sequence", typeof(SequenceFileOutputFormat ), typeof(LongWritable), typeof(Text)); MultipleOutputs.SetCountersEnabled(conf, withCounters); conf.SetMapperClass(typeof(TestMultipleOutputs.MOMap)); conf.SetReducerClass(typeof(TestMultipleOutputs.MOReduce)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); JobClient jc = new JobClient(conf); RunningJob job = jc.SubmitJob(conf); while (!job.IsComplete()) { Sharpen.Thread.Sleep(100); } // assert number of named output part files int namedOutputCount = 0; FileStatus[] statuses = fs.ListStatus(outDir); foreach (FileStatus status in statuses) { if (status.GetPath().GetName().Equals("text-m-00000") || status.GetPath().GetName ().Equals("text-m-00001") || status.GetPath().GetName().Equals("text-r-00000") || status.GetPath().GetName().Equals("sequence_A-m-00000") || status.GetPath().GetName ().Equals("sequence_A-m-00001") || status.GetPath().GetName().Equals("sequence_B-m-00000" ) || status.GetPath().GetName().Equals("sequence_B-m-00001") || status.GetPath() .GetName().Equals("sequence_B-r-00000") || status.GetPath().GetName().Equals("sequence_C-r-00000" )) { namedOutputCount++; } } NUnit.Framework.Assert.AreEqual(9, namedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader(new InputStreamReader(fs.Open(new Path (FileOutputFormat.GetOutputPath(conf), "text-r-00000")))); int count = 0; string line = reader.ReadLine(); while (line != null) { NUnit.Framework.Assert.IsTrue(line.EndsWith("text")); line = reader.ReadLine(); count++; } reader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); // assert SequenceOutputFormat files correctness SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat .GetOutputPath(conf), "sequence_B-r-00000"), conf); NUnit.Framework.Assert.AreEqual(typeof(LongWritable), seqReader.GetKeyClass()); NUnit.Framework.Assert.AreEqual(typeof(Text), seqReader.GetValueClass()); count = 0; LongWritable key = new LongWritable(); Text value = new Text(); while (seqReader.Next(key, value)) { NUnit.Framework.Assert.AreEqual("sequence", value.ToString()); count++; } seqReader.Close(); NUnit.Framework.Assert.IsFalse(count == 0); Counters.Group counters = job.GetCounters().GetGroup(typeof(MultipleOutputs).FullName ); if (!withCounters) { NUnit.Framework.Assert.AreEqual(0, counters.Size()); } else { NUnit.Framework.Assert.AreEqual(4, counters.Size()); NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("text")); NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_A")); NUnit.Framework.Assert.AreEqual(4, counters.GetCounter("sequence_B")); NUnit.Framework.Assert.AreEqual(2, counters.GetCounter("sequence_C")); } }
/// <exception cref="System.Exception"/> public virtual void Configure(string keySpec, int expect) { Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = GetFileSystem(); fs.Delete(testdir, true); conf.SetInputFormat(typeof(TextInputFormat)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetOutputKeyClass(typeof(Text)); conf.SetOutputValueClass(typeof(LongWritable)); conf.SetNumMapTasks(1); conf.SetNumReduceTasks(1); conf.SetOutputFormat(typeof(TextOutputFormat)); conf.SetOutputKeyComparatorClass(typeof(KeyFieldBasedComparator)); conf.SetKeyFieldComparatorOptions(keySpec); conf.SetKeyFieldPartitionerOptions("-k1.1,1.1"); conf.Set(JobContext.MapOutputKeyFieldSeperator, " "); conf.SetMapperClass(typeof(InverseMapper)); conf.SetReducerClass(typeof(IdentityReducer)); if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } // set up input data in 2 files Path inFile = new Path(inDir, "part0"); FileOutputStream fos = new FileOutputStream(inFile.ToString()); fos.Write(Sharpen.Runtime.GetBytesForString((line1 + "\n"))); fos.Write(Sharpen.Runtime.GetBytesForString((line2 + "\n"))); fos.Close(); JobClient jc = new JobClient(conf); RunningJob r_job = jc.SubmitJob(conf); while (!r_job.IsComplete()) { Sharpen.Thread.Sleep(1000); } if (!r_job.IsSuccessful()) { Fail("Oops! The job broke due to an unexpected error"); } Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter ())); if (outputFiles.Length > 0) { InputStream @is = GetFileSystem().Open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(@is)); string line = reader.ReadLine(); //make sure we get what we expect as the first line, and also //that we have two lines if (expect == 1) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line1)); } else { if (expect == 2) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line2)); } } line = reader.ReadLine(); if (expect == 1) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line2)); } else { if (expect == 2) { NUnit.Framework.Assert.IsTrue(line.StartsWith(line1)); } } reader.Close(); } }
/// <exception cref="System.Exception"/> public virtual void TestCustomFile() { Path inDir = new Path("testing/fileoutputformat/input"); Path outDir = new Path("testing/fileoutputformat/output"); // Hack for local FS that does not have the concept of a 'mounting point' if (IsLocalFS()) { string localPathRoot = Runtime.GetProperty("test.build.data", "/tmp").Replace(' ' , '+'); inDir = new Path(localPathRoot, inDir); outDir = new Path(localPathRoot, outDir); } JobConf conf = CreateJobConf(); FileSystem fs = FileSystem.Get(conf); fs.Delete(outDir, true); if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes("a\nb\n\nc\nd\ne"); file.Close(); file = fs.Create(new Path(inDir, "part-1")); file.WriteBytes("a\nb\n\nc\nd\ne"); file.Close(); conf.SetJobName("fof"); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetMapOutputKeyClass(typeof(LongWritable)); conf.SetMapOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(TextOutputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapperClass(typeof(TestFileOutputFormat.TestMap)); conf.SetReducerClass(typeof(TestFileOutputFormat.TestReduce)); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); JobClient jc = new JobClient(conf); RunningJob job = jc.SubmitJob(conf); while (!job.IsComplete()) { Sharpen.Thread.Sleep(100); } NUnit.Framework.Assert.IsTrue(job.IsSuccessful()); bool map0 = false; bool map1 = false; bool reduce = false; FileStatus[] statuses = fs.ListStatus(outDir); foreach (FileStatus status in statuses) { map0 = map0 || status.GetPath().GetName().Equals("test-m-00000"); map1 = map1 || status.GetPath().GetName().Equals("test-m-00001"); reduce = reduce || status.GetPath().GetName().Equals("test-r-00000"); } NUnit.Framework.Assert.IsTrue(map0); NUnit.Framework.Assert.IsTrue(map1); NUnit.Framework.Assert.IsTrue(reduce); }