/// <exception cref="System.Exception"/> private void TestMapFileOutputCommitterInternal(int version) { JobConf conf = new JobConf(); FileOutputFormat.SetOutputPath(conf, outDir); conf.Set(JobContext.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); JobContext jContext = new JobContextImpl(conf, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(); // setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output MapFileOutputFormat theOutputFormat = new MapFileOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.GetRecordWriter(null, conf, partFile , null); WriteMapFileOutput(theRecordWriter, tContext); // do commit if (committer.NeedsTaskCommit(tContext)) { committer.CommitTask(tContext); } committer.CommitJob(jContext); // validate output ValidateMapFileOutputContent(FileSystem.Get(conf), outDir); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private void TestFailAbortInternal(int version) { JobConf conf = new JobConf(); conf.Set(FileSystem.FsDefaultNameKey, "faildel:///"); conf.SetClass("fs.faildel.impl", typeof(TestFileOutputCommitter.FakeFileSystem), typeof(FileSystem)); conf.Set(JobContext.TaskAttemptId, attempt); conf.SetInt(FileOutputCommitter.FileoutputcommitterAlgorithmVersion, version); conf.SetInt(MRConstants.ApplicationAttemptId, 1); FileOutputFormat.SetOutputPath(conf, outDir); JobContext jContext = new JobContextImpl(conf, ((JobID)taskID.GetJobID())); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(); // do setup committer.SetupJob(jContext); committer.SetupTask(tContext); // write output FilePath jobTmpDir = new FilePath(new Path(outDir, FileOutputCommitter.TempDirName + Path.Separator + conf.GetInt(MRConstants.ApplicationAttemptId, 0) + Path.Separator + FileOutputCommitter.TempDirName).ToString()); FilePath taskTmpDir = new FilePath(jobTmpDir, "_" + taskID); FilePath expectedFile = new FilePath(taskTmpDir, partFile); TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(null , conf, expectedFile.GetAbsolutePath(), null); WriteOutput(theRecordWriter, tContext); // do abort Exception th = null; try { committer.AbortTask(tContext); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue(expectedFile + " does not exists", expectedFile.Exists ()); th = null; try { committer.AbortJob(jContext, JobStatus.State.Failed); } catch (IOException ie) { th = ie; } NUnit.Framework.Assert.IsNotNull(th); NUnit.Framework.Assert.IsTrue(th is IOException); NUnit.Framework.Assert.IsTrue(th.Message.Contains("fake delete failed")); NUnit.Framework.Assert.IsTrue("job temp dir does not exists", jobTmpDir.Exists()); FileUtil.FullyDelete(new FilePath(outDir.ToString())); }
/// <summary> /// HADOOP-4466: /// This test verifies the JavSerialization impl can write to /// SequenceFiles. /// </summary> /// <remarks> /// HADOOP-4466: /// This test verifies the JavSerialization impl can write to /// SequenceFiles. by virtue other SequenceFileOutputFormat is not /// coupled to Writable types, if so, the job will fail. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestWriteToSequencefile() { JobConf conf = new JobConf(typeof(TestJavaSerialization)); conf.SetJobName("JavaSerialization"); FileSystem fs = FileSystem.Get(conf); CleanAndCreateInput(fs); conf.Set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.SetInputFormat(typeof(TextInputFormat)); // test we can write to sequence files conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); conf.SetOutputKeyClass(typeof(string)); conf.SetOutputValueClass(typeof(long)); conf.SetOutputKeyComparatorClass(typeof(JavaSerializationComparator)); conf.SetMapperClass(typeof(TestJavaSerialization.WordCountMapper)); conf.SetReducerClass(typeof(TestJavaSerialization.SumReducer)); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); FileInputFormat.SetInputPaths(conf, InputDir); FileOutputFormat.SetOutputPath(conf, OutputDir); JobClient.RunJob(conf); Path[] outputFiles = FileUtil.Stat2Paths(fs.ListStatus(OutputDir, new Utils.OutputFileUtils.OutputFilesFilter ())); NUnit.Framework.Assert.AreEqual(1, outputFiles.Length); }
/// <summary>Generate input data for the benchmark</summary> /// <exception cref="System.Exception"/> public static void GenerateInputData(int dataSizePerMap, int numSpillsPerMap, int numMapsPerHost, JobConf masterConf) { JobConf job = new JobConf(masterConf, typeof(ThreadedMapBenchmark)); job.SetJobName("threaded-map-benchmark-random-writer"); job.SetJarByClass(typeof(ThreadedMapBenchmark)); job.SetInputFormat(typeof(UtilsForTests.RandomInputFormat)); job.SetOutputFormat(typeof(SequenceFileOutputFormat)); job.SetMapperClass(typeof(ThreadedMapBenchmark.Map)); job.SetReducerClass(typeof(IdentityReducer)); job.SetOutputKeyClass(typeof(BytesWritable)); job.SetOutputValueClass(typeof(BytesWritable)); JobClient client = new JobClient(job); ClusterStatus cluster = client.GetClusterStatus(); long totalDataSize = dataSizePerMap * numMapsPerHost * cluster.GetTaskTrackers(); job.Set("test.tmb.bytes_per_map", (dataSizePerMap * 1024 * 1024).ToString()); job.SetNumReduceTasks(0); // none reduce job.SetNumMapTasks(numMapsPerHost * cluster.GetTaskTrackers()); FileOutputFormat.SetOutputPath(job, InputDir); FileSystem fs = FileSystem.Get(job); fs.Delete(BaseDir, true); Log.Info("Generating random input for the benchmark"); Log.Info("Total data : " + totalDataSize + " mb"); Log.Info("Data per map: " + dataSizePerMap + " mb"); Log.Info("Number of spills : " + numSpillsPerMap); Log.Info("Number of maps per host : " + numMapsPerHost); Log.Info("Number of hosts : " + cluster.GetTaskTrackers()); JobClient.RunJob(job); }
/// <exception cref="System.IO.IOException"/> internal static void ConfigureWordCount(FileSystem fs, JobConf conf, string input , int numMaps, int numReduces, Path inDir, Path outDir) { fs.Delete(outDir, true); if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes(input); file.Close(); FileSystem.SetDefaultUri(conf, fs.GetUri()); conf.Set(JTConfig.FrameworkName, JTConfig.YarnFrameworkName); conf.SetJobName("wordcount"); conf.SetInputFormat(typeof(TextInputFormat)); // the keys are words (strings) conf.SetOutputKeyClass(typeof(Text)); // the values are counts (ints) conf.SetOutputValueClass(typeof(IntWritable)); conf.Set("mapred.mapper.class", "testjar.ClassWordCount$MapClass"); conf.Set("mapred.combine.class", "testjar.ClassWordCount$Reduce"); conf.Set("mapred.reducer.class", "testjar.ClassWordCount$Reduce"); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReduces); //set the tests jar file conf.SetJarByClass(typeof(TestMiniMRClasspath)); }
/// <exception cref="System.Exception"/> public virtual void TestComplexNameWithRegex() { OutputStream os = GetFileSystem().Create(new Path(GetInputDir(), "text.txt")); TextWriter wr = new OutputStreamWriter(os); wr.Write("b a\n"); wr.Close(); JobConf conf = CreateJobConf(); conf.SetJobName("name \\Evalue]"); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapperClass(typeof(IdentityMapper)); FileInputFormat.SetInputPaths(conf, GetInputDir()); FileOutputFormat.SetOutputPath(conf, GetOutputDir()); JobClient.RunJob(conf); Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(GetOutputDir( ), new Utils.OutputFileUtils.OutputFilesFilter())); NUnit.Framework.Assert.AreEqual(1, outputFiles.Length); InputStream @is = GetFileSystem().Open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(@is)); NUnit.Framework.Assert.AreEqual("0\tb a", reader.ReadLine()); NUnit.Framework.Assert.IsNull(reader.ReadLine()); reader.Close(); }
public virtual void TestStatusLimit() { Path test = new Path(testRootTempDir, "testStatusLimit"); Configuration conf = new Configuration(); Path inDir = new Path(test, "in"); Path outDir = new Path(test, "out"); FileSystem fs = FileSystem.Get(conf); if (fs.Exists(inDir)) { fs.Delete(inDir, true); } fs.Mkdirs(inDir); DataOutputStream file = fs.Create(new Path(inDir, "part-" + 0)); file.WriteBytes("testStatusLimit"); file.Close(); if (fs.Exists(outDir)) { fs.Delete(outDir, true); } Job job = Job.GetInstance(conf, "testStatusLimit"); job.SetMapperClass(typeof(TestReporter.StatusLimitMapper)); job.SetNumReduceTasks(0); FileInputFormat.AddInputPath(job, inDir); FileOutputFormat.SetOutputPath(job, outDir); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful()); }
// configure a job /// <exception cref="System.IO.IOException"/> private void Configure(JobConf conf, Path inDir, Path outDir, string input, Type map, Type reduce) { // set up the input file system and write input text. FileSystem inFs = inDir.GetFileSystem(conf); FileSystem outFs = outDir.GetFileSystem(conf); outFs.Delete(outDir, true); if (!inFs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } { // write input into input file DataOutputStream file = inFs.Create(new Path(inDir, "part-0")); file.WriteBytes(input); file.Close(); } // configure the mapred Job which creates a tempfile in map. conf.SetJobName("testmap"); conf.SetMapperClass(map); conf.SetReducerClass(reduce); conf.SetNumMapTasks(1); conf.SetNumReduceTasks(0); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); string TestRootDir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).ToString ().Replace(' ', '+'); conf.Set("test.build.data", TestRootDir); }
/// <exception cref="System.Exception"/> private void RunMapReduce(JobConf conf, IList <string> mapperBadRecords, IList <string > redBadRecords) { CreateInput(); conf.SetJobName("mr"); conf.SetNumMapTasks(1); conf.SetNumReduceTasks(1); conf.SetInt(JobContext.TaskTimeout, 30 * 1000); SkipBadRecords.SetMapperMaxSkipRecords(conf, long.MaxValue); SkipBadRecords.SetReducerMaxSkipGroups(conf, long.MaxValue); SkipBadRecords.SetAttemptsToStartSkipping(conf, 0); //the no of attempts to successfully complete the task depends //on the no of bad records. conf.SetMaxMapAttempts(SkipBadRecords.GetAttemptsToStartSkipping(conf) + 1 + mapperBadRecords .Count); conf.SetMaxReduceAttempts(SkipBadRecords.GetAttemptsToStartSkipping(conf) + 1 + redBadRecords .Count); FileInputFormat.SetInputPaths(conf, GetInputDir()); FileOutputFormat.SetOutputPath(conf, GetOutputDir()); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetMapOutputKeyClass(typeof(LongWritable)); conf.SetMapOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(TextOutputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); RunningJob runningJob = JobClient.RunJob(conf); ValidateOutput(conf, runningJob, mapperBadRecords, redBadRecords); }
/// <exception cref="System.Exception"/> private static void RunTestLazyOutput(JobConf job, Path output, int numReducers, bool createLazily) { job.SetJobName("test-lazy-output"); FileInputFormat.SetInputPaths(job, Input); FileOutputFormat.SetOutputPath(job, output); job.SetInputFormat(typeof(TextInputFormat)); job.SetMapOutputKeyClass(typeof(LongWritable)); job.SetMapOutputValueClass(typeof(Text)); job.SetOutputKeyClass(typeof(LongWritable)); job.SetOutputValueClass(typeof(Text)); job.SetMapperClass(typeof(TestLazyOutput.TestMapper)); job.SetReducerClass(typeof(TestLazyOutput.TestReducer)); JobClient client = new JobClient(job); job.SetNumReduceTasks(numReducers); if (createLazily) { LazyOutputFormat.SetOutputFormatClass(job, typeof(TextOutputFormat)); } else { job.SetOutputFormat(typeof(TextOutputFormat)); } JobClient.RunJob(job); }
// Start a job with the specified input and return its RunningJob object /// <exception cref="System.IO.IOException"/> internal static RunningJob RunJob(JobConf conf, Path inDir, Path outDir, int numMaps , int numReds, string input) { FileSystem fs = FileSystem.Get(conf); if (fs.Exists(outDir)) { fs.Delete(outDir, true); } if (!fs.Exists(inDir)) { fs.Mkdirs(inDir); } for (int i = 0; i < numMaps; ++i) { DataOutputStream file = fs.Create(new Path(inDir, "part-" + i)); file.WriteBytes(input); file.Close(); } conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReds); JobClient jobClient = new JobClient(conf); RunningJob job = jobClient.SubmitJob(conf); return(job); }
public virtual void Configure() { Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.Get(conf); fs.Delete(testdir, true); conf.SetInt(JobContext.IoSortMb, 1); conf.SetInputFormat(typeof(SequenceFileInputFormat)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetMapperClass(typeof(TestMapOutputType.TextGen)); conf.SetReducerClass(typeof(TestMapOutputType.TextReduce)); conf.SetOutputKeyClass(typeof(Text)); conf.SetOutputValueClass(typeof(Text)); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } Path inFile = new Path(inDir, "part0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text ), typeof(Text)); writer.Append(new Text("rec: 1"), new Text("Hello")); writer.Close(); jc = new JobClient(conf); }
/// <exception cref="System.Exception"/> private string RunJob() { OutputStream os = GetFileSystem().Create(new Path(GetInputDir(), "text.txt")); TextWriter wr = new OutputStreamWriter(os); wr.Write("hello1\n"); wr.Write("hello2\n"); wr.Write("hello3\n"); wr.Close(); JobConf conf = CreateJobConf(); conf.SetJobName("mr"); conf.SetJobPriority(JobPriority.High); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetMapOutputKeyClass(typeof(LongWritable)); conf.SetMapOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(TextOutputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapperClass(typeof(IdentityMapper)); conf.SetReducerClass(typeof(IdentityReducer)); FileInputFormat.SetInputPaths(conf, GetInputDir()); FileOutputFormat.SetOutputPath(conf, GetOutputDir()); return(JobClient.RunJob(conf).GetID().ToString()); }
public virtual void TestOldCounterB() { JobConf conf = CreateConfiguration(); CreateWordsFile(inFiles[3], conf); RemoveWordsFile(inFiles[4], conf); long inputSize = 0; inputSize += GetFileSize(inFiles[0]); inputSize += GetFileSize(inFiles[1]); inputSize += GetFileSize(inFiles[2]); inputSize += GetFileSize(inFiles[3]); conf.SetNumMapTasks(4); conf.SetInt(JobContext.IoSortFactor, 2); FileInputFormat.SetInputPaths(conf, InDir); FileOutputFormat.SetOutputPath(conf, new Path(OutDir, "outputO1")); RunningJob myJob = JobClient.RunJob(conf); Counters c1 = myJob.GetCounters(); // As above, each map spills 2^14 records, so 4 maps spill 2^16 records // In the reduce, there are two intermediate merges before the reduce. // 1st merge: read + write = 8192 * 4 // 2nd merge: read + write = 8192 * 4 // final merge: 0 // Total reduce: 32768 // Total: map + reduce = 2^16 + 2^15 = 98304 // 4 files, 5120 = 5 * 1024 rec/file = 15360 input records // 4 records/line = 81920 output records ValidateCounters(c1, 98304, 20480, 81920); ValidateFileCounters(c1, inputSize, 0, 0, 0); }
/// <exception cref="System.Exception"/> public static Counters RunJob(JobConf conf) { conf.SetMapperClass(typeof(TestReduceFetchFromPartialMem.MapMB)); conf.SetReducerClass(typeof(TestReduceFetchFromPartialMem.MBValidate)); conf.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); conf.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); conf.SetNumReduceTasks(1); conf.SetInputFormat(typeof(TestReduceFetchFromPartialMem.FakeIF)); conf.SetNumTasksToExecutePerJvm(1); conf.SetInt(JobContext.MapMaxAttempts, 0); conf.SetInt(JobContext.ReduceMaxAttempts, 0); FileInputFormat.SetInputPaths(conf, new Path("/in")); Path outp = new Path("/out"); FileOutputFormat.SetOutputPath(conf, outp); RunningJob job = null; try { job = JobClient.RunJob(conf); NUnit.Framework.Assert.IsTrue(job.IsSuccessful()); } finally { FileSystem fs = dfsCluster.GetFileSystem(); if (fs.Exists(outp)) { fs.Delete(outp, true); } } return(job.GetCounters()); }
public virtual void TestOldCounterC() { JobConf conf = CreateConfiguration(); CreateWordsFile(inFiles[3], conf); CreateWordsFile(inFiles[4], conf); long inputSize = 0; inputSize += GetFileSize(inFiles[0]); inputSize += GetFileSize(inFiles[1]); inputSize += GetFileSize(inFiles[2]); inputSize += GetFileSize(inFiles[3]); inputSize += GetFileSize(inFiles[4]); conf.SetNumMapTasks(4); conf.SetInt(JobContext.IoSortFactor, 3); FileInputFormat.SetInputPaths(conf, InDir); FileOutputFormat.SetOutputPath(conf, new Path(OutDir, "outputO2")); RunningJob myJob = JobClient.RunJob(conf); Counters c1 = myJob.GetCounters(); // As above, each map spills 2^14 records, so 5 maps spill 81920 // 1st merge: read + write = 6 * 8192 // final merge: unmerged = 2 * 8192 // Total reduce: 45056 // 5 files, 5120 = 5 * 1024 rec/file = 15360 input records // 4 records/line = 102400 output records ValidateCounters(c1, 122880, 25600, 102400); ValidateFileCounters(c1, inputSize, 0, 0, 0); }
/// <summary>The main driver for word count map/reduce program.</summary> /// <remarks> /// The main driver for word count map/reduce program. /// Invoke this method to submit the map/reduce job. /// </remarks> /// <exception cref="System.IO.IOException"> /// When there is communication problems with the /// job tracker. /// </exception> /// <exception cref="System.Exception"/> public virtual int Run(string[] args) { JobConf conf = new JobConf(GetConf(), typeof(WordCount)); conf.SetJobName("wordcount"); // the keys are words (strings) conf.SetOutputKeyClass(typeof(Text)); // the values are counts (ints) conf.SetOutputValueClass(typeof(IntWritable)); conf.SetMapperClass(typeof(WordCount.MapClass)); conf.SetCombinerClass(typeof(WordCount.Reduce)); conf.SetReducerClass(typeof(WordCount.Reduce)); IList <string> other_args = new AList <string>(); for (int i = 0; i < args.Length; ++i) { try { if ("-m".Equals(args[i])) { conf.SetNumMapTasks(System.Convert.ToInt32(args[++i])); } else { if ("-r".Equals(args[i])) { conf.SetNumReduceTasks(System.Convert.ToInt32(args[++i])); } else { other_args.AddItem(args[i]); } } } catch (FormatException) { System.Console.Out.WriteLine("ERROR: Integer expected instead of " + args[i]); return(PrintUsage()); } catch (IndexOutOfRangeException) { System.Console.Out.WriteLine("ERROR: Required parameter missing from " + args[i - 1]); return(PrintUsage()); } } // Make sure there are exactly 2 parameters left. if (other_args.Count != 2) { System.Console.Out.WriteLine("ERROR: Wrong number of parameters: " + other_args.Count + " instead of 2."); return(PrintUsage()); } FileInputFormat.SetInputPaths(conf, other_args[0]); FileOutputFormat.SetOutputPath(conf, new Path(other_args[1])); JobClient.RunJob(conf); return(0); }
/// <exception cref="System.IO.IOException"/> internal static void CheckRecords(Configuration defaults, int noMaps, int noReduces , Path sortInput, Path sortOutput) { JobConf jobConf = new JobConf(defaults, typeof(SortValidator.RecordChecker)); jobConf.SetJobName("sortvalidate-record-checker"); jobConf.SetInputFormat(typeof(SequenceFileInputFormat)); jobConf.SetOutputFormat(typeof(SequenceFileOutputFormat)); jobConf.SetOutputKeyClass(typeof(BytesWritable)); jobConf.SetOutputValueClass(typeof(IntWritable)); jobConf.SetMapperClass(typeof(SortValidator.RecordChecker.Map)); jobConf.SetReducerClass(typeof(SortValidator.RecordChecker.Reduce)); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.GetClusterStatus(); if (noMaps == -1) { noMaps = cluster.GetTaskTrackers() * jobConf.GetInt(MapsPerHost, 10); } if (noReduces == -1) { noReduces = (int)(cluster.GetMaxReduceTasks() * 0.9); string sortReduces = jobConf.Get(ReducesPerHost); if (sortReduces != null) { noReduces = cluster.GetTaskTrackers() * System.Convert.ToInt32(sortReduces); } } jobConf.SetNumMapTasks(noMaps); jobConf.SetNumReduceTasks(noReduces); FileInputFormat.SetInputPaths(jobConf, sortInput); FileInputFormat.AddInputPath(jobConf, sortOutput); Path outputPath = new Path("/tmp/sortvalidate/recordchecker"); FileSystem fs = FileSystem.Get(defaults); if (fs.Exists(outputPath)) { fs.Delete(outputPath, true); } FileOutputFormat.SetOutputPath(jobConf, outputPath); // Uncomment to run locally in a single process //job_conf.set(JTConfig.JT, "local"); Path[] inputPaths = FileInputFormat.GetInputPaths(jobConf); System.Console.Out.WriteLine("\nSortValidator.RecordChecker: Running on " + cluster .GetTaskTrackers() + " nodes to validate sort from " + inputPaths[0] + ", " + inputPaths [1] + " into " + FileOutputFormat.GetOutputPath(jobConf) + " with " + noReduces + " reduces."); DateTime startTime = new DateTime(); System.Console.Out.WriteLine("Job started: " + startTime); JobClient.RunJob(jobConf); DateTime end_time = new DateTime(); System.Console.Out.WriteLine("Job ended: " + end_time); System.Console.Out.WriteLine("The job took " + (end_time.GetTime() - startTime.GetTime ()) / 1000 + " seconds."); }
public virtual void TestJob() { Job job = CreateJob(); FileInputFormat.SetInputPaths(job, inDir); FileOutputFormat.SetOutputPath(job, new Path(outDir, "testJob")); NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(true)); ValidateCounters(job.GetCounters(), 5, 25, 5, 5); }
public virtual void TestFormatWithCustomSeparator() { JobConf job = new JobConf(); string separator = "\u0001"; job.Set("mapreduce.output.textoutputformat.separator", separator); job.Set(JobContext.TaskAttemptId, attempt); FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent()); FileOutputFormat.SetWorkOutputPath(job, workDir); FileSystem fs = workDir.GetFileSystem(job); if (!fs.Mkdirs(workDir)) { NUnit.Framework.Assert.Fail("Failed to create output directory"); } string file = "test_custom.txt"; // A reporter that does nothing Reporter reporter = Reporter.Null; TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object >(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs , job, file, reporter); Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1"); Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2"); Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1"); Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2"); NullWritable nullWritable = NullWritable.Get(); try { theRecordWriter.Write(key1, val1); theRecordWriter.Write(null, nullWritable); theRecordWriter.Write(null, val1); theRecordWriter.Write(nullWritable, val2); theRecordWriter.Write(key2, nullWritable); theRecordWriter.Write(key1, null); theRecordWriter.Write(null, null); theRecordWriter.Write(key2, val2); } finally { theRecordWriter.Close(reporter); } FilePath expectedFile = new FilePath(new Path(workDir, file).ToString()); StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append(key1).Append(separator).Append(val1).Append("\n"); expectedOutput.Append(val1).Append("\n"); expectedOutput.Append(val2).Append("\n"); expectedOutput.Append(key2).Append("\n"); expectedOutput.Append(key1).Append("\n"); expectedOutput.Append(key2).Append(separator).Append(val2).Append("\n"); string output = UtilsForTests.Slurp(expectedFile); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output); }
/// <exception cref="System.Exception"/> private void CheckCompression(bool compressMapOutputs, SequenceFile.CompressionType redCompression, bool includeCombine) { JobConf conf = new JobConf(typeof(TestMapRed)); Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.Get(conf); fs.Delete(testdir, true); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetMapperClass(typeof(TestMapRed.MyMap)); conf.SetReducerClass(typeof(TestMapRed.MyReduce)); conf.SetOutputKeyClass(typeof(Text)); conf.SetOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); if (includeCombine) { conf.SetCombinerClass(typeof(IdentityReducer)); } conf.SetCompressMapOutput(compressMapOutputs); SequenceFileOutputFormat.SetOutputCompressionType(conf, redCompression); try { if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } Path inFile = new Path(inDir, "part0"); DataOutputStream f = fs.Create(inFile); f.WriteBytes("Owen was here\n"); f.WriteBytes("Hadoop is fun\n"); f.WriteBytes("Is this done, yet?\n"); f.Close(); RunningJob rj = JobClient.RunJob(conf); NUnit.Framework.Assert.IsTrue("job was complete", rj.IsComplete()); NUnit.Framework.Assert.IsTrue("job was successful", rj.IsSuccessful()); Path output = new Path(outDir, Task.GetOutputName(0)); NUnit.Framework.Assert.IsTrue("reduce output exists " + output, fs.Exists(output) ); SequenceFile.Reader rdr = new SequenceFile.Reader(fs, output, conf); NUnit.Framework.Assert.AreEqual("is reduce output compressed " + output, redCompression != SequenceFile.CompressionType.None, rdr.IsCompressed()); rdr.Close(); } finally { fs.Delete(testdir, true); } }
/// <summary>Creates and runs an MR job</summary> /// <param name="conf"/> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> public virtual void CreateAndRunJob(Configuration conf) { JobConf job = new JobConf(conf); job.SetJarByClass(typeof(TestLineRecordReaderJobs)); job.SetMapperClass(typeof(IdentityMapper)); job.SetReducerClass(typeof(IdentityReducer)); FileInputFormat.AddInputPath(job, inputDir); FileOutputFormat.SetOutputPath(job, outputDir); JobClient.RunJob(job); }
/// <exception cref="System.IO.IOException"/> internal static string LaunchExternal(URI uri, JobConf conf, string input, int numMaps , int numReduces) { Path inDir = new Path("/testing/ext/input"); Path outDir = new Path("/testing/ext/output"); FileSystem fs = FileSystem.Get(uri, conf); fs.Delete(outDir, true); if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } { DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes(input); file.Close(); } FileSystem.SetDefaultUri(conf, uri); conf.Set(JTConfig.FrameworkName, JTConfig.YarnFrameworkName); conf.SetJobName("wordcount"); conf.SetInputFormat(typeof(TextInputFormat)); // the keys are counts conf.SetOutputValueClass(typeof(IntWritable)); // the values are the messages conf.Set(JobContext.OutputKeyClass, "testjar.ExternalWritable"); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReduces); conf.Set("mapred.mapper.class", "testjar.ExternalMapperReducer"); conf.Set("mapred.reducer.class", "testjar.ExternalMapperReducer"); // set the tests jar file conf.SetJarByClass(typeof(TestMiniMRClasspath)); JobClient.RunJob(conf); StringBuilder result = new StringBuilder(); Path[] fileList = FileUtil.Stat2Paths(fs.ListStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter ())); for (int i = 0; i < fileList.Length; ++i) { BufferedReader file = new BufferedReader(new InputStreamReader(fs.Open(fileList[i ]))); string line = file.ReadLine(); while (line != null) { result.Append(line); line = file.ReadLine(); result.Append("\n"); } file.Close(); } return(result.ToString()); }
/// <exception cref="System.Exception"/> public static void Launch() { JobConf conf = new JobConf(typeof(Org.Apache.Hadoop.Mapred.TestFieldSelection)); FileSystem fs = FileSystem.Get(conf); int numOfInputLines = 10; Path OutputDir = new Path("build/test/output_for_field_selection_test"); Path InputDir = new Path("build/test/input_for_field_selection_test"); string inputFile = "input.txt"; fs.Delete(InputDir, true); fs.Mkdirs(InputDir); fs.Delete(OutputDir, true); StringBuilder inputData = new StringBuilder(); StringBuilder expectedOutput = new StringBuilder(); TestMRFieldSelection.ConstructInputOutputData(inputData, expectedOutput, numOfInputLines ); FSDataOutputStream fileOut = fs.Create(new Path(InputDir, inputFile)); fileOut.Write(Sharpen.Runtime.GetBytesForString(inputData.ToString(), "utf-8")); fileOut.Close(); System.Console.Out.WriteLine("inputData:"); System.Console.Out.WriteLine(inputData.ToString()); JobConf job = new JobConf(conf, typeof(Org.Apache.Hadoop.Mapred.TestFieldSelection )); FileInputFormat.SetInputPaths(job, InputDir); job.SetInputFormat(typeof(TextInputFormat)); job.SetMapperClass(typeof(FieldSelectionMapReduce)); job.SetReducerClass(typeof(FieldSelectionMapReduce)); FileOutputFormat.SetOutputPath(job, OutputDir); job.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); job.SetOutputFormat(typeof(TextOutputFormat)); job.SetNumReduceTasks(1); job.Set(FieldSelectionHelper.DataFieldSeperator, "-"); job.Set(FieldSelectionHelper.MapOutputKeyValueSpec, "6,5,1-3:0-"); job.Set(FieldSelectionHelper.ReduceOutputKeyValueSpec, ":4,3,2,1,0,0-"); JobClient.RunJob(job); // // Finally, we compare the reconstructed answer key with the // original one. Remember, we need to ignore zero-count items // in the original key. // bool success = true; Path outPath = new Path(OutputDir, "part-00000"); string outdata = MapReduceTestUtil.ReadOutput(outPath, job); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), outdata); fs.Delete(OutputDir, true); fs.Delete(InputDir, true); }
public virtual void TestNullKeys() { JobConf conf = new JobConf(typeof(TestMapRed)); FileSystem fs = FileSystem.GetLocal(conf); HashSet <string> values = new HashSet <string>(); string m = "AAAAAAAAAAAAAA"; for (int i = 1; i < 11; ++i) { values.AddItem(m); m = m.Replace((char)('A' + i - 1), (char)('A' + i)); } Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified (fs); fs.Delete(testdir, true); Path inFile = new Path(testdir, "nullin/blah"); SequenceFile.Writer w = SequenceFile.CreateWriter(fs, conf, inFile, typeof(NullWritable ), typeof(Text), SequenceFile.CompressionType.None); Text t = new Text(); foreach (string s in values) { t.Set(s); w.Append(NullWritable.Get(), t); } w.Close(); FileInputFormat.SetInputPaths(conf, inFile); FileOutputFormat.SetOutputPath(conf, new Path(testdir, "nullout")); conf.SetMapperClass(typeof(TestMapRed.NullMapper)); conf.SetReducerClass(typeof(IdentityReducer)); conf.SetOutputKeyClass(typeof(NullWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetInputFormat(typeof(SequenceFileInputFormat)); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); conf.SetNumReduceTasks(1); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); JobClient.RunJob(conf); // Since null keys all equal, allow any ordering SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(testdir, "nullout/part-00000" ), conf); m = "AAAAAAAAAAAAAA"; for (int i_1 = 1; r.Next(NullWritable.Get(), t); ++i_1) { NUnit.Framework.Assert.IsTrue("Unexpected value: " + t, values.Remove(t.ToString( ))); m = m.Replace((char)('A' + i_1 - 1), (char)('A' + i_1)); } NUnit.Framework.Assert.IsTrue("Missing values: " + values.ToString(), values.IsEmpty ()); }
/// <exception cref="System.Exception"/> private void RunMergeTest(JobConf job, FileSystem fileSystem, int numMappers, int numReducers, int numLines, bool isUber) { fileSystem.Delete(Output, true); job.SetJobName("Test"); JobClient client = new JobClient(job); RunningJob submittedJob = null; FileInputFormat.SetInputPaths(job, InputDir); FileOutputFormat.SetOutputPath(job, Output); job.Set("mapreduce.output.textoutputformat.separator", " "); job.SetInputFormat(typeof(TextInputFormat)); job.SetMapOutputKeyClass(typeof(Text)); job.SetMapOutputValueClass(typeof(Text)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(Text)); job.SetMapperClass(typeof(TestMRIntermediateDataEncryption.MyMapper)); job.SetPartitionerClass(typeof(TestMRIntermediateDataEncryption.MyPartitioner)); job.SetOutputFormat(typeof(TextOutputFormat)); job.SetNumReduceTasks(numReducers); job.SetInt("mapreduce.map.maxattempts", 1); job.SetInt("mapreduce.reduce.maxattempts", 1); job.SetInt("mapred.test.num_lines", numLines); if (isUber) { job.SetBoolean("mapreduce.job.ubertask.enable", true); } job.SetBoolean(MRJobConfig.MrEncryptedIntermediateData, true); try { submittedJob = client.SubmitJob(job); try { if (!client.MonitorAndPrintJob(job, submittedJob)) { throw new IOException("Job failed!"); } } catch (Exception) { Sharpen.Thread.CurrentThread().Interrupt(); } } catch (IOException ioe) { System.Console.Error.WriteLine("Job failed with: " + ioe); } finally { VerifyOutput(submittedJob, fileSystem, numMappers, numLines); } }
/// <exception cref="System.IO.IOException"/> public static bool LaunchJob(URI fileSys, JobConf conf, int numMaps, int numReduces ) { Path inDir = new Path("/testing/input"); Path outDir = new Path("/testing/output"); FileSystem fs = FileSystem.Get(fileSys, conf); fs.Delete(outDir, true); if (!fs.Mkdirs(inDir)) { Log.Warn("Can't create " + inDir); return(false); } // generate an input file DataOutputStream file = fs.Create(new Path(inDir, "part-0")); file.WriteBytes("foo foo2 foo3"); file.Close(); // use WordCount example FileSystem.SetDefaultUri(conf, fileSys); conf.SetJobName("foo"); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputFormat(typeof(TestSpecialCharactersInOutputPath.SpecialTextOutputFormat )); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapperClass(typeof(IdentityMapper)); conf.SetReducerClass(typeof(IdentityReducer)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReduces); // run job and wait for completion RunningJob runningJob = JobClient.RunJob(conf); try { NUnit.Framework.Assert.IsTrue(runningJob.IsComplete()); NUnit.Framework.Assert.IsTrue(runningJob.IsSuccessful()); NUnit.Framework.Assert.IsTrue("Output folder not found!", fs.Exists(new Path("/testing/output/" + OutputFilename))); } catch (ArgumentNullException) { // This NPE should no more happens Fail("A NPE should not have happened."); } // return job result Log.Info("job is complete: " + runningJob.IsSuccessful()); return(runningJob.IsSuccessful()); }
public virtual void RunJob(int items) { try { JobConf conf = new JobConf(typeof(TestMapRed)); Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.Get(conf); fs.Delete(testdir, true); conf.SetInt(JobContext.IoSortMb, 1); conf.SetInputFormat(typeof(SequenceFileInputFormat)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetMapperClass(typeof(IdentityMapper)); conf.SetReducerClass(typeof(IdentityReducer)); conf.SetOutputKeyClass(typeof(Text)); conf.SetOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } Path inFile = new Path(inDir, "part0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text ), typeof(Text)); StringBuilder content = new StringBuilder(); for (int i = 0; i < 1000; i++) { content.Append(i).Append(": This is one more line of content\n"); } Org.Apache.Hadoop.IO.Text text = new Org.Apache.Hadoop.IO.Text(content.ToString() ); for (int i_1 = 0; i_1 < items; i_1++) { writer.Append(new Org.Apache.Hadoop.IO.Text("rec:" + i_1), text); } writer.Close(); JobClient.RunJob(conf); } catch (Exception e) { NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false); } }
/// <exception cref="System.IO.IOException"/> public virtual void TestcheckOutputSpecsForbidRecordCompression() { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path outputdir = new Path(Runtime.GetProperty("test.build.data", ".") + "/output" ); fs.Delete(dir, true); fs.Delete(outputdir, true); if (!fs.Mkdirs(dir)) { Fail("Failed to create output directory"); } FileOutputFormat.SetWorkOutputPath(job, dir); // Without outputpath, FileOutputFormat.checkoutputspecs will throw // InvalidJobConfException FileOutputFormat.SetOutputPath(job, outputdir); // SequenceFileAsBinaryOutputFormat doesn't support record compression // It should throw an exception when checked by checkOutputSpecs SequenceFileAsBinaryOutputFormat.SetCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Block); try { new SequenceFileAsBinaryOutputFormat().CheckOutputSpecs(fs, job); } catch (Exception e) { Fail("Block compression should be allowed for " + "SequenceFileAsBinaryOutputFormat:" + "Caught " + e.GetType().FullName); } SequenceFileAsBinaryOutputFormat.SetOutputCompressionType(job, SequenceFile.CompressionType .Record); try { new SequenceFileAsBinaryOutputFormat().CheckOutputSpecs(fs, job); Fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat" ); } catch (InvalidJobConfException) { } catch (Exception e) { // expected Fail("Expected " + typeof(InvalidJobConfException).FullName + "but caught " + e.GetType ().FullName); } }
private void ConfigureJob(JobConf jc, string jobName, int maps, int reds, Path outDir ) { jc.SetJobName(jobName); jc.SetInputFormat(typeof(TextInputFormat)); jc.SetOutputKeyClass(typeof(LongWritable)); jc.SetOutputValueClass(typeof(Text)); FileInputFormat.SetInputPaths(jc, inDir); FileOutputFormat.SetOutputPath(jc, outDir); jc.SetMapperClass(typeof(IdentityMapper)); jc.SetReducerClass(typeof(IdentityReducer)); jc.SetNumMapTasks(maps); jc.SetNumReduceTasks(reds); }