public virtual void TestCombinerShouldUpdateTheReporter() { JobConf conf = new JobConf(mrCluster.GetConfig()); int numMaps = 5; int numReds = 2; Path @in = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-in" ); Path @out = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-out" ); CreateInputOutPutFolder(@in, @out, numMaps); conf.SetJobName("test-job-with-combiner"); conf.SetMapperClass(typeof(IdentityMapper)); conf.SetCombinerClass(typeof(TestMRAppWithCombiner.MyCombinerToCheckReporter)); //conf.setJarByClass(MyCombinerToCheckReporter.class); conf.SetReducerClass(typeof(IdentityReducer)); DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf); conf.SetOutputCommitter(typeof(CustomOutputCommitter)); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); FileInputFormat.SetInputPaths(conf, @in); FileOutputFormat.SetOutputPath(conf, @out); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReds); RunJob(conf); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { Configuration conf = new Configuration(); string[] otherArgs = new GenericOptionsParser(conf, args).GetRemainingArgs(); if (otherArgs.Length != 2) { System.Console.Error.WriteLine("Usage: secondarysort <in> <out>"); System.Environment.Exit(2); } Job job = Job.GetInstance(conf, "secondary sort"); job.SetJarByClass(typeof(SecondarySort)); job.SetMapperClass(typeof(SecondarySort.MapClass)); job.SetReducerClass(typeof(SecondarySort.Reduce)); // group and partition by the first int in the pair job.SetPartitionerClass(typeof(SecondarySort.FirstPartitioner)); job.SetGroupingComparatorClass(typeof(SecondarySort.FirstGroupingComparator)); // the map output is IntPair, IntWritable job.SetMapOutputKeyClass(typeof(SecondarySort.IntPair)); job.SetMapOutputValueClass(typeof(IntWritable)); // the reduce output is Text, IntWritable job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(IntWritable)); FileInputFormat.AddInputPath(job, new Path(otherArgs[0])); FileOutputFormat.SetOutputPath(job, new Path(otherArgs[1])); System.Environment.Exit(job.WaitForCompletion(true) ? 0 : 1); }
/// <exception cref="System.Exception"/> public virtual int Run(string[] args) { if (args.Length != 2) { System.Console.Error.WriteLine("Usage: wordmean <in> <out>"); return(0); } Configuration conf = GetConf(); Job job = Job.GetInstance(conf, "word mean"); job.SetJarByClass(typeof(WordMean)); job.SetMapperClass(typeof(WordMean.WordMeanMapper)); job.SetCombinerClass(typeof(WordMean.WordMeanReducer)); job.SetReducerClass(typeof(WordMean.WordMeanReducer)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(LongWritable)); FileInputFormat.AddInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.SetOutputPath(job, outputpath); bool result = job.WaitForCompletion(true); mean = ReadAndCalcMean(outputpath, conf); return(result ? 0 : 1); }
public virtual void TestJobSuccessCleanup() { Reset(); Job job = Job.GetInstance(); Path inputPath = CreateInput(); Path outputPath = GetOutputPath(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.GetLocal(conf); if (fs.Exists(outputPath)) { fs.Delete(outputPath, true); } job.SetMapperClass(typeof(TestMapperReducerCleanup.TrackingTokenizerMapper)); job.SetReducerClass(typeof(TestMapperReducerCleanup.TrackingIntSumReducer)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(IntWritable)); job.SetInputFormatClass(typeof(TestMapperReducerCleanup.TrackingTextInputFormat)); job.SetOutputFormatClass(typeof(TestMapperReducerCleanup.TrackingTextOutputFormat )); job.SetNumReduceTasks(1); FileInputFormat.AddInputPath(job, inputPath); FileOutputFormat.SetOutputPath(job, outputPath); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue(mapCleanup); NUnit.Framework.Assert.IsTrue(reduceCleanup); NUnit.Framework.Assert.IsTrue(recordReaderCleanup); NUnit.Framework.Assert.IsTrue(recordWriterCleanup); }
public virtual void TestValueIterReset() { try { Configuration conf = new Configuration(); Job job = Job.GetInstance(conf, "TestValueIterReset"); job.SetJarByClass(typeof(TestValueIterReset)); job.SetMapperClass(typeof(TestValueIterReset.TestMapper)); job.SetReducerClass(typeof(TestValueIterReset.TestReducer)); job.SetNumReduceTasks(NumTests); job.SetMapOutputKeyClass(typeof(IntWritable)); job.SetMapOutputValueClass(typeof(IntWritable)); job.SetOutputKeyClass(typeof(IntWritable)); job.SetOutputValueClass(typeof(IntWritable)); job.GetConfiguration().SetInt(MRJobConfig.ReduceMarkresetBufferSize, 128); job.SetInputFormatClass(typeof(TextInputFormat)); job.SetOutputFormatClass(typeof(TextOutputFormat)); FileInputFormat.AddInputPath(job, new Path(TestRootDir + "/in")); Path output = new Path(TestRootDir + "/out"); localFs.Delete(output, true); FileOutputFormat.SetOutputPath(job, output); CreateInput(); NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(true)); ValidateOutput(); } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); NUnit.Framework.Assert.IsTrue(false); } }
/// <exception cref="System.IO.IOException"/> public virtual Job CreateJob(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount) { Configuration conf = GetConf(); conf.SetLong(MapSleepTime, mapSleepTime); conf.SetLong(ReduceSleepTime, reduceSleepTime); conf.SetInt(MapSleepCount, mapSleepCount); conf.SetInt(ReduceSleepCount, reduceSleepCount); conf.SetInt(MRJobConfig.NumMaps, numMapper); Job job = Job.GetInstance(conf, "sleep"); job.SetNumReduceTasks(numReducer); job.SetJarByClass(typeof(SleepJob)); job.SetMapperClass(typeof(SleepJob.SleepMapper)); job.SetMapOutputKeyClass(typeof(IntWritable)); job.SetMapOutputValueClass(typeof(NullWritable)); job.SetReducerClass(typeof(SleepJob.SleepReducer)); job.SetOutputFormatClass(typeof(NullOutputFormat)); job.SetInputFormatClass(typeof(SleepJob.SleepInputFormat)); job.SetPartitionerClass(typeof(SleepJob.SleepJobPartitioner)); job.SetSpeculativeExecution(false); job.SetJobName("Sleep job"); FileInputFormat.AddInputPath(job, new Path("ignored")); return(job); }
/// <exception cref="System.Exception"/> public virtual void TestFormat() { JobConf job = new JobConf(); Path file = new Path(workDir, "test.txt"); int seed = new Random().Next(); Random random = new Random(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int numLinesPerMap = 5; job.SetInt("mapreduce.input.lineinputformat.linespermap", numLinesPerMap); // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 10) + 1) { // create a file with length entries TextWriter writer = new OutputStreamWriter(localFs.Create(file)); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } CheckFormat(job, numLinesPerMap); } }
/// <exception cref="System.Exception"/> public virtual int Run(string[] args) { if (args.Length < 2) { PrintUsage(); return(2); } Job job = Job.GetInstance(GetConf()); job.SetJobName("MultiFileWordCount"); job.SetJarByClass(typeof(MultiFileWordCount)); //set the InputFormat of the job to our InputFormat job.SetInputFormatClass(typeof(MultiFileWordCount.MyInputFormat)); // the keys are words (strings) job.SetOutputKeyClass(typeof(Text)); // the values are counts (ints) job.SetOutputValueClass(typeof(IntWritable)); //use the defined mapper job.SetMapperClass(typeof(MultiFileWordCount.MapClass)); //use the WordCount Reducer job.SetCombinerClass(typeof(IntSumReducer)); job.SetReducerClass(typeof(IntSumReducer)); FileInputFormat.AddInputPaths(job, args[0]); FileOutputFormat.SetOutputPath(job, new Path(args[1])); return(job.WaitForCompletion(true) ? 0 : 1); }
/// <exception cref="System.IO.IOException"/> public static Job CreateJob(Configuration conf, Path inDir, Path outDir, int numInputFiles , int numReds, string input) { Job job = Job.GetInstance(conf); FileSystem fs = FileSystem.Get(conf); if (fs.Exists(outDir)) { fs.Delete(outDir, true); } if (fs.Exists(inDir)) { fs.Delete(inDir, true); } fs.Mkdirs(inDir); for (int i = 0; i < numInputFiles; ++i) { DataOutputStream file = fs.Create(new Path(inDir, "part-" + i)); file.WriteBytes(input); file.Close(); } FileInputFormat.SetInputPaths(job, inDir); FileOutputFormat.SetOutputPath(job, outDir); job.SetNumReduceTasks(numReds); return(job); }
// no check for ll.isEmpty() to permit extension /// <exception cref="System.IO.IOException"/> private Configuration GetConf(Configuration jconf) { Job job = Job.GetInstance(jconf); FileInputFormat.SetInputPaths(job, indir); return(job.GetConfiguration()); }
/// <exception cref="System.Exception"/> public virtual int Run(string[] args) { if (args.Length != 2) { System.Console.Error.WriteLine("Usage: wordmedian <in> <out>"); return(0); } SetConf(new Configuration()); Configuration conf = GetConf(); Job job = Job.GetInstance(conf, "word median"); job.SetJarByClass(typeof(WordMedian)); job.SetMapperClass(typeof(WordMedian.WordMedianMapper)); job.SetCombinerClass(typeof(WordMedian.WordMedianReducer)); job.SetReducerClass(typeof(WordMedian.WordMedianReducer)); job.SetOutputKeyClass(typeof(IntWritable)); job.SetOutputValueClass(typeof(IntWritable)); FileInputFormat.AddInputPath(job, new Path(args[0])); FileOutputFormat.SetOutputPath(job, new Path(args[1])); bool result = job.WaitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.GetCounters().GetGroup(typeof(TaskCounter).GetCanonicalName ()).FindCounter("MAP_OUTPUT_RECORDS", "Map output records").GetValue(); int medianIndex1 = (int)Math.Ceil((totalWords / 2.0)); int medianIndex2 = (int)Math.Floor((totalWords / 2.0)); median = ReadAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return(result ? 0 : 1); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal static bool RunJob(JobConf conf, Path inDir, Path outDir, int numMaps, int numReds) { FileSystem fs = FileSystem.Get(conf); if (fs.Exists(outDir)) { fs.Delete(outDir, true); } if (!fs.Exists(inDir)) { fs.Mkdirs(inDir); } string input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n"; for (int i = 0; i < numMaps; ++i) { DataOutputStream file = fs.Create(new Path(inDir, "part-" + i)); file.WriteBytes(input); file.Close(); } DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf, fs); conf.SetOutputCommitter(typeof(CustomOutputCommitter)); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReds); JobClient jobClient = new JobClient(conf); RunningJob job = jobClient.SubmitJob(conf); return(jobClient.MonitorAndPrintJob(conf, job)); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { Configuration conf = new Configuration(); string[] otherArgs = new GenericOptionsParser(conf, args).GetRemainingArgs(); if (otherArgs.Length < 2) { System.Console.Error.WriteLine("Usage: wordcount <in> [<in>...] <out>"); System.Environment.Exit(2); } Job job = Job.GetInstance(conf, "word count"); job.SetJarByClass(typeof(WordCount)); job.SetMapperClass(typeof(WordCount.TokenizerMapper)); job.SetCombinerClass(typeof(WordCount.IntSumReducer)); job.SetReducerClass(typeof(WordCount.IntSumReducer)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(IntWritable)); for (int i = 0; i < otherArgs.Length - 1; ++i) { FileInputFormat.AddInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.SetOutputPath(job, new Path(otherArgs[otherArgs.Length - 1])); System.Environment.Exit(job.WaitForCompletion(true) ? 0 : 1); }
/// <exception cref="System.IO.IOException"/> internal static long ReadBench(JobConf conf) { // InputFormat instantiation InputFormat inf = conf.GetInputFormat(); string fn = conf.Get("test.filebench.name", string.Empty); Path pin = new Path(FileInputFormat.GetInputPaths(conf)[0], fn); FileStatus @in = pin.GetFileSystem(conf).GetFileStatus(pin); RecordReader rr = inf.GetRecordReader(new FileSplit(pin, 0, @in.GetLen(), (string [])null), conf, Reporter.Null); try { object key = rr.CreateKey(); object val = rr.CreateValue(); DateTime start = new DateTime(); while (rr.Next(key, val)) { } DateTime end = new DateTime(); return(end.GetTime() - start.GetTime()); } finally { rr.Close(); } }
// no check for ll.isEmpty() to permit extension private JobConf GetConf(JobConf job) { JobConf conf = new JobConf(job); FileInputFormat.SetInputPaths(conf, indir); conf.SetClassLoader(job.GetClassLoader()); return(conf); }
/// <exception cref="System.IO.IOException"/> protected internal CombineFileRecordReaderWrapper(FileInputFormat <K, V> inputFormat , CombineFileSplit split, Configuration conf, Reporter reporter, int idx) { FileSplit fileSplit = new FileSplit(split.GetPath(idx), split.GetOffset(idx), split .GetLength(idx), split.GetLocations()); delegate_ = inputFormat.GetRecordReader(fileSplit, (JobConf)conf, reporter); }
/// <exception cref="System.IO.IOException"/> public virtual InputSplit[] GetSplits(JobConf conf, int numSplits) { JobConf confCopy = new JobConf(conf); IList <InputSplit> splits = new AList <InputSplit>(); IDictionary <Path, InputFormat> formatMap = MultipleInputs.GetInputFormatMap(conf); IDictionary <Path, Type> mapperMap = MultipleInputs.GetMapperTypeMap(conf); IDictionary <Type, IList <Path> > formatPaths = new Dictionary <Type, IList <Path> >(); // First, build a map of InputFormats to Paths foreach (KeyValuePair <Path, InputFormat> entry in formatMap) { if (!formatPaths.Contains(entry.Value.GetType())) { formatPaths[entry.Value.GetType()] = new List <Path>(); } formatPaths[entry.Value.GetType()].AddItem(entry.Key); } foreach (KeyValuePair <Type, IList <Path> > formatEntry in formatPaths) { Type formatClass = formatEntry.Key; InputFormat format = (InputFormat)ReflectionUtils.NewInstance(formatClass, conf); IList <Path> paths = formatEntry.Value; IDictionary <Type, IList <Path> > mapperPaths = new Dictionary <Type, IList <Path> >(); // Now, for each set of paths that have a common InputFormat, build // a map of Mappers to the paths they're used for foreach (Path path in paths) { Type mapperClass = mapperMap[path]; if (!mapperPaths.Contains(mapperClass)) { mapperPaths[mapperClass] = new List <Path>(); } mapperPaths[mapperClass].AddItem(path); } // Now each set of paths that has a common InputFormat and Mapper can // be added to the same job, and split together. foreach (KeyValuePair <Type, IList <Path> > mapEntry in mapperPaths) { paths = mapEntry.Value; Type mapperClass = mapEntry.Key; if (mapperClass == null) { mapperClass = conf.GetMapperClass(); } FileInputFormat.SetInputPaths(confCopy, Sharpen.Collections.ToArray(paths, new Path [paths.Count])); // Get splits for each input path and tag with InputFormat // and Mapper types by wrapping in a TaggedInputSplit. InputSplit[] pathSplits = format.GetSplits(confCopy, numSplits); foreach (InputSplit pathSplit in pathSplits) { splits.AddItem(new TaggedInputSplit(pathSplit, conf, format.GetType(), mapperClass )); } } } return(Sharpen.Collections.ToArray(splits, new InputSplit[splits.Count])); }
/// <summary> /// Run a test with several mappers in parallel, operating at different /// speeds. /// </summary> /// <remarks> /// Run a test with several mappers in parallel, operating at different /// speeds. Verify that the correct amount of output is created. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestMultiMaps() { Job job = Job.GetInstance(); Path inputPath = CreateMultiMapsInput(); Path outputPath = GetOutputPath(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.GetLocal(conf); if (fs.Exists(outputPath)) { fs.Delete(outputPath, true); } job.SetMapperClass(typeof(TestLocalRunner.StressMapper)); job.SetReducerClass(typeof(TestLocalRunner.CountingReducer)); job.SetNumReduceTasks(1); LocalJobRunner.SetLocalMaxRunningMaps(job, 6); job.GetConfiguration().Set(MRJobConfig.IoSortMb, "25"); FileInputFormat.AddInputPath(job, inputPath); FileOutputFormat.SetOutputPath(job, outputPath); Sharpen.Thread toInterrupt = Sharpen.Thread.CurrentThread(); Sharpen.Thread interrupter = new _Thread_311(toInterrupt); // 2m Log.Info("Submitting job..."); job.Submit(); Log.Info("Starting thread to interrupt main thread in 2 minutes"); interrupter.Start(); Log.Info("Waiting for job to complete..."); try { job.WaitForCompletion(true); } catch (Exception ie) { Log.Fatal("Interrupted while waiting for job completion", ie); for (int i = 0; i < 10; i++) { Log.Fatal("Dumping stacks"); ReflectionUtils.LogThreadInfo(Log, "multimap threads", 0); Sharpen.Thread.Sleep(1000); } throw; } Log.Info("Job completed, stopping interrupter"); interrupter.Interrupt(); try { interrupter.Join(); } catch (Exception) { } // it might interrupt us right as we interrupt it Log.Info("Verifying output"); VerifyOutput(outputPath); }
/* Extracts matching regexs from input files and counts them. */ // singleton /// <exception cref="System.Exception"/> public virtual int Run(string[] args) { if (args.Length < 3) { System.Console.Out.WriteLine("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.PrintGenericCommandUsage(System.Console.Out); return(2); } Path tempDir = new Path("grep-temp-" + Sharpen.Extensions.ToString(new Random().Next (int.MaxValue))); Configuration conf = GetConf(); conf.Set(RegexMapper.Pattern, args[2]); if (args.Length == 4) { conf.Set(RegexMapper.Group, args[3]); } Job grepJob = Job.GetInstance(conf); try { grepJob.SetJobName("grep-search"); grepJob.SetJarByClass(typeof(Org.Apache.Hadoop.Examples.Grep)); FileInputFormat.SetInputPaths(grepJob, args[0]); grepJob.SetMapperClass(typeof(RegexMapper)); grepJob.SetCombinerClass(typeof(LongSumReducer)); grepJob.SetReducerClass(typeof(LongSumReducer)); FileOutputFormat.SetOutputPath(grepJob, tempDir); grepJob.SetOutputFormatClass(typeof(SequenceFileOutputFormat)); grepJob.SetOutputKeyClass(typeof(Text)); grepJob.SetOutputValueClass(typeof(LongWritable)); grepJob.WaitForCompletion(true); Job sortJob = Job.GetInstance(conf); sortJob.SetJobName("grep-sort"); sortJob.SetJarByClass(typeof(Org.Apache.Hadoop.Examples.Grep)); FileInputFormat.SetInputPaths(sortJob, tempDir); sortJob.SetInputFormatClass(typeof(SequenceFileInputFormat)); sortJob.SetMapperClass(typeof(InverseMapper)); sortJob.SetNumReduceTasks(1); // write a single file FileOutputFormat.SetOutputPath(sortJob, new Path(args[1])); sortJob.SetSortComparatorClass(typeof(LongWritable.DecreasingComparator)); // sort by decreasing freq sortJob.WaitForCompletion(true); } finally { FileSystem.Get(conf).Delete(tempDir, true); } return(0); }
/// <exception cref="System.Exception"/> private void RunDistributedFSCheck() { JobConf job = new JobConf(fs.GetConf(), typeof(DistributedFSCheck)); FileInputFormat.SetInputPaths(job, MapInputDir); job.SetInputFormat(typeof(SequenceFileInputFormat)); job.SetMapperClass(typeof(DistributedFSCheck.DistributedFSCheckMapper)); job.SetReducerClass(typeof(AccumulatingReducer)); FileOutputFormat.SetOutputPath(job, ReadDir); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(Text)); job.SetNumReduceTasks(1); JobClient.RunJob(job); }
/// <exception cref="System.IO.IOException"/> private static void RunIOTest(Type mapperClass, Path outputDir) { JobConf job = new JobConf(fsConfig, typeof(DFSCIOTest)); FileInputFormat.SetInputPaths(job, ControlDir); job.SetInputFormat(typeof(SequenceFileInputFormat)); job.SetMapperClass(mapperClass); job.SetReducerClass(typeof(AccumulatingReducer)); FileOutputFormat.SetOutputPath(job, outputDir); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(Text)); job.SetNumReduceTasks(1); JobClient.RunJob(job); }
/// <exception cref="System.Exception"/> public virtual void _testDistributedCache(string jobJarPath) { if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists()) { Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test." ); return; } // Create a temporary file of length 1. Path first = CreateTempFile("distributed.first", "x"); // Create two jars with a single file inside them. Path second = MakeJar(new Path(TestRootDir, "distributed.second.jar"), 2); Path third = MakeJar(new Path(TestRootDir, "distributed.third.jar"), 3); Path fourth = MakeJar(new Path(TestRootDir, "distributed.fourth.jar"), 4); Job job = Job.GetInstance(mrCluster.GetConfig()); // Set the job jar to a new "dummy" jar so we can check that its extracted // properly job.SetJar(jobJarPath); // Because the job jar is a "dummy" jar, we need to include the jar with // DistributedCacheChecker or it won't be able to find it Path distributedCacheCheckerJar = new Path(JarFinder.GetJar(typeof(TestMRJobs.DistributedCacheChecker ))); job.AddFileToClassPath(distributedCacheCheckerJar.MakeQualified(localFs.GetUri(), distributedCacheCheckerJar.GetParent())); job.SetMapperClass(typeof(TestMRJobs.DistributedCacheChecker)); job.SetOutputFormatClass(typeof(NullOutputFormat)); FileInputFormat.SetInputPaths(job, first); // Creates the Job Configuration job.AddCacheFile(new URI(first.ToUri().ToString() + "#distributed.first.symlink") ); job.AddFileToClassPath(second); // The AppMaster jar itself job.AddFileToClassPath(AppJar.MakeQualified(localFs.GetUri(), AppJar.GetParent()) ); job.AddArchiveToClassPath(third); job.AddCacheArchive(fourth.ToUri()); job.SetMaxMapAttempts(1); // speed up failures job.Submit(); string trackingUrl = job.GetTrackingURL(); string jobId = job.GetJobID().ToString(); NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(false)); NUnit.Framework.Assert.IsTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.EndsWith(Sharpen.Runtime.Substring(jobId, jobId.LastIndexOf ("_")) + "/")); }
/// <summary>Creates a simple fail job.</summary> /// <param name="conf">Configuration object</param> /// <param name="outdir">Output directory.</param> /// <param name="indirs">Comma separated input directories.</param> /// <returns>Job initialized for a simple kill job.</returns> /// <exception cref="System.Exception">If an error occurs creating job configuration. /// </exception> public static Job CreateKillJob(Configuration conf, Path outdir, params Path[] indirs ) { Job theJob = Job.GetInstance(conf); theJob.SetJobName("Kill-Job"); FileInputFormat.SetInputPaths(theJob, indirs); theJob.SetMapperClass(typeof(MapReduceTestUtil.KillMapper)); theJob.SetReducerClass(typeof(Reducer)); theJob.SetNumReduceTasks(0); FileOutputFormat.SetOutputPath(theJob, outdir); theJob.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); theJob.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); return(theJob); }
/// <exception cref="System.Exception"/> public virtual void TestNoDefaults() { JobConf configuration = new JobConf(); NUnit.Framework.Assert.IsTrue(configuration.Get("hadoop.tmp.dir", null) != null); configuration = new JobConf(false); NUnit.Framework.Assert.IsTrue(configuration.Get("hadoop.tmp.dir", null) == null); Path inDir = new Path("testing/jobconf/input"); Path outDir = new Path("testing/jobconf/output"); OutputStream os = GetFileSystem().Create(new Path(inDir, "text.txt")); TextWriter wr = new OutputStreamWriter(os); wr.Write("hello\n"); wr.Write("hello\n"); wr.Close(); JobConf conf = new JobConf(false); conf.Set("fs.defaultFS", CreateJobConf().Get("fs.defaultFS")); conf.SetJobName("mr"); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetMapOutputKeyClass(typeof(LongWritable)); conf.SetMapOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(TextOutputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapperClass(typeof(IdentityMapper)); conf.SetReducerClass(typeof(IdentityReducer)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); JobClient.RunJob(conf); Path[] outputFiles = FileUtil.Stat2Paths(GetFileSystem().ListStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter ())); if (outputFiles.Length > 0) { InputStream @is = GetFileSystem().Open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(@is)); string line = reader.ReadLine(); int counter = 0; while (line != null) { counter++; NUnit.Framework.Assert.IsTrue(line.Contains("hello")); line = reader.ReadLine(); } reader.Close(); NUnit.Framework.Assert.AreEqual(2, counter); } }
/// <summary>Creates a simple copy job.</summary> /// <param name="conf">Configuration object</param> /// <param name="outdir">Output directory.</param> /// <param name="indirs">Comma separated input directories.</param> /// <returns>Job initialized for a data copy job.</returns> /// <exception cref="System.Exception">If an error occurs creating job configuration. /// </exception> public static Job CreateCopyJob(Configuration conf, Path outdir, params Path[] indirs ) { conf.SetInt(MRJobConfig.NumMaps, 3); Job theJob = Job.GetInstance(conf); theJob.SetJobName("DataMoveJob"); FileInputFormat.SetInputPaths(theJob, indirs); theJob.SetMapperClass(typeof(MapReduceTestUtil.DataCopyMapper)); FileOutputFormat.SetOutputPath(theJob, outdir); theJob.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); theJob.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); theJob.SetReducerClass(typeof(MapReduceTestUtil.DataCopyReducer)); theJob.SetNumReduceTasks(1); return(theJob); }
/// <exception cref="System.Exception"/> public static void SeekTest(FileSystem fs, bool fastCheck) { fs.Delete(ReadDir, true); JobConf job = new JobConf(conf, typeof(TestFileSystem)); job.SetBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.SetInputPaths(job, ControlDir); job.SetInputFormat(typeof(SequenceFileInputFormat)); job.SetMapperClass(typeof(TestFileSystem.SeekMapper)); job.SetReducerClass(typeof(LongSumReducer)); FileOutputFormat.SetOutputPath(job, ReadDir); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(LongWritable)); job.SetNumReduceTasks(1); JobClient.RunJob(job); }
/// <exception cref="System.IO.IOException"/> public virtual void RunTest(SequenceFile.CompressionType compressionType) { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Path tempDir = new Path(dir, "tmp"); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); fs.Mkdirs(tempDir); LongWritable tkey = new LongWritable(); Text tval = new Text(); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, job, file, typeof(LongWritable ), typeof(Text), compressionType, new DefaultCodec()); try { for (int i = 0; i < Records; ++i) { tkey.Set(1234); tval.Set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue"); writer.Append(tkey, tval); } } finally { writer.Close(); } long fileLength = fs.GetFileStatus(file).GetLen(); Log.Info("With compression = " + compressionType + ": " + "compressed length = " + fileLength); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.GetOutputKeyComparator (), job.GetMapOutputKeyClass(), job.GetMapOutputValueClass(), job); Path[] paths = new Path[] { file }; SequenceFile.Sorter.RawKeyValueIterator rIter = sorter.Merge(paths, tempDir, false ); int count = 0; while (rIter.Next()) { count++; } NUnit.Framework.Assert.AreEqual(Records, count); NUnit.Framework.Assert.AreEqual(1.0f, rIter.GetProgress().Get()); }
/// <summary>Creates a simple copy job.</summary> /// <param name="indirs">List of input directories.</param> /// <param name="outdir">Output directory.</param> /// <returns>JobConf initialised for a simple copy job.</returns> /// <exception cref="System.Exception">If an error occurs creating job configuration. /// </exception> internal static JobConf CreateCopyJob(IList <Path> indirs, Path outdir) { Configuration defaults = new Configuration(); JobConf theJob = new JobConf(defaults, typeof(TestJobControl)); theJob.SetJobName("DataMoveJob"); FileInputFormat.SetInputPaths(theJob, Sharpen.Collections.ToArray(indirs, new Path [0])); theJob.SetMapperClass(typeof(JobControlTestUtils.DataCopy)); FileOutputFormat.SetOutputPath(theJob, outdir); theJob.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); theJob.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); theJob.SetReducerClass(typeof(JobControlTestUtils.DataCopy)); theJob.SetNumMapTasks(12); theJob.SetNumReduceTasks(4); return(theJob); }
/// <summary>Run the test</summary> /// <exception cref="System.IO.IOException">on error</exception> public static void RunTests() { config.SetLong("io.bytes.per.checksum", bytesPerChecksum); JobConf job = new JobConf(config, typeof(NNBench)); job.SetJobName("NNBench-" + operation); FileInputFormat.SetInputPaths(job, new Path(baseDir, ControlDirName)); job.SetInputFormat(typeof(SequenceFileInputFormat)); // Explicitly set number of max map attempts to 1. job.SetMaxMapAttempts(1); // Explicitly turn off speculative execution job.SetSpeculativeExecution(false); job.SetMapperClass(typeof(NNBench.NNBenchMapper)); job.SetReducerClass(typeof(NNBench.NNBenchReducer)); FileOutputFormat.SetOutputPath(job, new Path(baseDir, OutputDirName)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(Text)); job.SetNumReduceTasks((int)numberOfReduces); JobClient.RunJob(job); }
/// <exception cref="System.IO.IOException"/> public virtual Job CreateJob(bool failMappers, bool failReducers, Path inputFile) { Configuration conf = GetConf(); conf.SetBoolean(FailMap, failMappers); conf.SetBoolean(FailReduce, failReducers); Job job = Job.GetInstance(conf, "fail"); job.SetJarByClass(typeof(FailJob)); job.SetMapperClass(typeof(FailJob.FailMapper)); job.SetMapOutputKeyClass(typeof(LongWritable)); job.SetMapOutputValueClass(typeof(NullWritable)); job.SetReducerClass(typeof(FailJob.FailReducer)); job.SetOutputFormatClass(typeof(NullOutputFormat)); job.SetInputFormatClass(typeof(TextInputFormat)); job.SetSpeculativeExecution(false); job.SetJobName("Fail job"); FileInputFormat.AddInputPath(job, inputFile); return(job); }