/// <exception cref="System.Exception"/> public static void Main(string[] args) { Configuration conf = new Configuration(); string[] otherArgs = new GenericOptionsParser(conf, args).GetRemainingArgs(); if (otherArgs.Length != 2) { System.Console.Error.WriteLine("Usage: secondarysort <in> <out>"); System.Environment.Exit(2); } Job job = Job.GetInstance(conf, "secondary sort"); job.SetJarByClass(typeof(SecondarySort)); job.SetMapperClass(typeof(SecondarySort.MapClass)); job.SetReducerClass(typeof(SecondarySort.Reduce)); // group and partition by the first int in the pair job.SetPartitionerClass(typeof(SecondarySort.FirstPartitioner)); job.SetGroupingComparatorClass(typeof(SecondarySort.FirstGroupingComparator)); // the map output is IntPair, IntWritable job.SetMapOutputKeyClass(typeof(SecondarySort.IntPair)); job.SetMapOutputValueClass(typeof(IntWritable)); // the reduce output is Text, IntWritable job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(IntWritable)); FileInputFormat.AddInputPath(job, new Path(otherArgs[0])); FileOutputFormat.SetOutputPath(job, new Path(otherArgs[1])); System.Environment.Exit(job.WaitForCompletion(true) ? 0 : 1); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { Configuration conf = new Configuration(); string[] otherArgs = new GenericOptionsParser(conf, args).GetRemainingArgs(); if (otherArgs.Length < 2) { System.Console.Error.WriteLine("Usage: wordcount <in> [<in>...] <out>"); System.Environment.Exit(2); } Job job = Job.GetInstance(conf, "word count"); job.SetJarByClass(typeof(WordCount)); job.SetMapperClass(typeof(WordCount.TokenizerMapper)); job.SetCombinerClass(typeof(WordCount.IntSumReducer)); job.SetReducerClass(typeof(WordCount.IntSumReducer)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(IntWritable)); for (int i = 0; i < otherArgs.Length - 1; ++i) { FileInputFormat.AddInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.SetOutputPath(job, new Path(otherArgs[otherArgs.Length - 1])); System.Environment.Exit(job.WaitForCompletion(true) ? 0 : 1); }
/// <exception cref="System.Exception"/> public virtual int Run(string[] args) { if (args.Length != 2) { System.Console.Error.WriteLine("Usage: wordmean <in> <out>"); return(0); } Configuration conf = GetConf(); Job job = Job.GetInstance(conf, "word mean"); job.SetJarByClass(typeof(WordMean)); job.SetMapperClass(typeof(WordMean.WordMeanMapper)); job.SetCombinerClass(typeof(WordMean.WordMeanReducer)); job.SetReducerClass(typeof(WordMean.WordMeanReducer)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(LongWritable)); FileInputFormat.AddInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.SetOutputPath(job, outputpath); bool result = job.WaitForCompletion(true); mean = ReadAndCalcMean(outputpath, conf); return(result ? 0 : 1); }
/// <summary>Run a map/reduce job to compute Pi.</summary> /// <exception cref="System.IO.IOException"/> private static void Compute(int startDigit, int nDigits, int nMaps, string workingDir , Configuration conf, TextWriter @out) { string name = startDigit + "_" + nDigits; //setup wroking directory @out.WriteLine("Working Directory = " + workingDir); @out.WriteLine(); FileSystem fs = FileSystem.Get(conf); Path dir = fs.MakeQualified(new Path(workingDir)); if (fs.Exists(dir)) { throw new IOException("Working directory " + dir + " already exists. Please remove it first." ); } else { if (!fs.Mkdirs(dir)) { throw new IOException("Cannot create working directory " + dir); } } @out.WriteLine("Start Digit = " + startDigit); @out.WriteLine("Number of Digits = " + nDigits); @out.WriteLine("Number of Maps = " + nMaps); // setup a job Job job = CreateJob(name, conf); Path hexfile = new Path(dir, "pi_" + name + ".hex"); FileOutputFormat.SetOutputPath(job, new Path(dir, "out")); // setup custom properties job.GetConfiguration().Set(WorkingDirProperty, dir.ToString()); job.GetConfiguration().Set(HexFileProperty, hexfile.ToString()); job.GetConfiguration().SetInt(DigitStartProperty, startDigit); job.GetConfiguration().SetInt(DigitSizeProperty, nDigits); job.GetConfiguration().SetInt(DigitPartsProperty, nMaps); // start a map/reduce job @out.WriteLine("\nStarting Job ..."); long startTime = Runtime.CurrentTimeMillis(); try { if (!job.WaitForCompletion(true)) { @out.WriteLine("Job failed."); System.Environment.Exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { double duration = (Runtime.CurrentTimeMillis() - startTime) / 1000.0; @out.WriteLine("Duration is " + duration + " seconds."); } @out.WriteLine("Output file: " + hexfile); }
public virtual void TestCombinerShouldUpdateTheReporter() { JobConf conf = new JobConf(mrCluster.GetConfig()); int numMaps = 5; int numReds = 2; Path @in = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-in" ); Path @out = new Path(mrCluster.GetTestWorkDir().GetAbsolutePath(), "testCombinerShouldUpdateTheReporter-out" ); CreateInputOutPutFolder(@in, @out, numMaps); conf.SetJobName("test-job-with-combiner"); conf.SetMapperClass(typeof(IdentityMapper)); conf.SetCombinerClass(typeof(TestMRAppWithCombiner.MyCombinerToCheckReporter)); //conf.setJarByClass(MyCombinerToCheckReporter.class); conf.SetReducerClass(typeof(IdentityReducer)); DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf); conf.SetOutputCommitter(typeof(CustomOutputCommitter)); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); FileInputFormat.SetInputPaths(conf, @in); FileOutputFormat.SetOutputPath(conf, @out); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReds); RunJob(conf); }
/// <exception cref="System.IO.IOException"/> public static Job CreateJob(Configuration conf, Path inDir, Path outDir, int numInputFiles , int numReds, string input) { Job job = Job.GetInstance(conf); FileSystem fs = FileSystem.Get(conf); if (fs.Exists(outDir)) { fs.Delete(outDir, true); } if (fs.Exists(inDir)) { fs.Delete(inDir, true); } fs.Mkdirs(inDir); for (int i = 0; i < numInputFiles; ++i) { DataOutputStream file = fs.Create(new Path(inDir, "part-" + i)); file.WriteBytes(input); file.Close(); } FileInputFormat.SetInputPaths(job, inDir); FileOutputFormat.SetOutputPath(job, outDir); job.SetNumReduceTasks(numReds); return(job); }
/// <exception cref="System.Exception"/> public virtual int Run(string[] args) { if (args.Length != 2) { System.Console.Error.WriteLine("Usage: wordmedian <in> <out>"); return(0); } SetConf(new Configuration()); Configuration conf = GetConf(); Job job = Job.GetInstance(conf, "word median"); job.SetJarByClass(typeof(WordMedian)); job.SetMapperClass(typeof(WordMedian.WordMedianMapper)); job.SetCombinerClass(typeof(WordMedian.WordMedianReducer)); job.SetReducerClass(typeof(WordMedian.WordMedianReducer)); job.SetOutputKeyClass(typeof(IntWritable)); job.SetOutputValueClass(typeof(IntWritable)); FileInputFormat.AddInputPath(job, new Path(args[0])); FileOutputFormat.SetOutputPath(job, new Path(args[1])); bool result = job.WaitForCompletion(true); // Wait for JOB 1 -- get middle value to check for Median long totalWords = job.GetCounters().GetGroup(typeof(TaskCounter).GetCanonicalName ()).FindCounter("MAP_OUTPUT_RECORDS", "Map output records").GetValue(); int medianIndex1 = (int)Math.Ceil((totalWords / 2.0)); int medianIndex2 = (int)Math.Floor((totalWords / 2.0)); median = ReadAndFindMedian(args[1], medianIndex1, medianIndex2, conf); return(result ? 0 : 1); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal static bool RunJob(JobConf conf, Path inDir, Path outDir, int numMaps, int numReds) { FileSystem fs = FileSystem.Get(conf); if (fs.Exists(outDir)) { fs.Delete(outDir, true); } if (!fs.Exists(inDir)) { fs.Mkdirs(inDir); } string input = "The quick brown fox\n" + "has many silly\n" + "red fox sox\n"; for (int i = 0; i < numMaps; ++i) { DataOutputStream file = fs.Create(new Path(inDir, "part-" + i)); file.WriteBytes(input); file.Close(); } DistributedCache.AddFileToClassPath(TestMRJobs.AppJar, conf, fs); conf.SetOutputCommitter(typeof(CustomOutputCommitter)); conf.SetInputFormat(typeof(TextInputFormat)); conf.SetOutputKeyClass(typeof(LongWritable)); conf.SetOutputValueClass(typeof(Text)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetNumMapTasks(numMaps); conf.SetNumReduceTasks(numReds); JobClient jobClient = new JobClient(conf); RunningJob job = jobClient.SubmitJob(conf); return(jobClient.MonitorAndPrintJob(conf, job)); }
public virtual void TestJobSuccessCleanup() { Reset(); Job job = Job.GetInstance(); Path inputPath = CreateInput(); Path outputPath = GetOutputPath(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.GetLocal(conf); if (fs.Exists(outputPath)) { fs.Delete(outputPath, true); } job.SetMapperClass(typeof(TestMapperReducerCleanup.TrackingTokenizerMapper)); job.SetReducerClass(typeof(TestMapperReducerCleanup.TrackingIntSumReducer)); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(IntWritable)); job.SetInputFormatClass(typeof(TestMapperReducerCleanup.TrackingTextInputFormat)); job.SetOutputFormatClass(typeof(TestMapperReducerCleanup.TrackingTextOutputFormat )); job.SetNumReduceTasks(1); FileInputFormat.AddInputPath(job, inputPath); FileOutputFormat.SetOutputPath(job, outputPath); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue(mapCleanup); NUnit.Framework.Assert.IsTrue(reduceCleanup); NUnit.Framework.Assert.IsTrue(recordReaderCleanup); NUnit.Framework.Assert.IsTrue(recordWriterCleanup); }
private static void AppendHeader(IReadOnlyList <ValidationLog> logs, FileOutputFormat outputFormat) { switch (outputFormat) { case FileOutputFormat.Html: _stringBuilder.Append(HtmlDocAndHeaderStart); _stringBuilder.AppendLine(string.Format(HtmlHeaderInnerFormat, CreateHtmlStyles(logs))); _stringBuilder.AppendLine(string.Format(HtmlHeaderEndFormat, DateTime.Now)); _stringBuilder.AppendLine(HtmlScript); foreach (var kvp in _validatorToHtmlStyle) { _stringBuilder.AppendLine(HtmlDivStart); _stringBuilder.Append(string.Format(HtmlDivInnerFormat, kvp.Key, kvp.Key.ToLowerInvariant())); _stringBuilder.Append(HtmlDivEnd); } _stringBuilder.AppendLine(HtmlTableStartAndHeader); break; case FileOutputFormat.Csv: _stringBuilder.AppendLine(CsvHeader); break; case FileOutputFormat.Text: _stringBuilder.AppendLine(PlainTextHeader); break; default: var msg = string.Format(EditorConstants.OutputFormatIsInvalid, outputFormat); throw new ArgumentOutOfRangeException(msg); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> protected internal virtual Job RunFailingMapperJob() { Configuration myConf = new Configuration(mrCluster.GetConfig()); myConf.SetInt(MRJobConfig.NumMaps, 1); myConf.SetInt(MRJobConfig.MapMaxAttempts, 2); //reduce the number of attempts Job job = Job.GetInstance(myConf); job.SetJarByClass(typeof(FailingMapper)); job.SetJobName("failmapper"); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(Text)); job.SetInputFormatClass(typeof(RandomTextWriterJob.RandomInputFormat)); job.SetOutputFormatClass(typeof(TextOutputFormat)); job.SetMapperClass(typeof(FailingMapper)); job.SetNumReduceTasks(0); FileOutputFormat.SetOutputPath(job, new Path(OutputRootDir, "failmapper-output")); job.AddFileToClassPath(AppJar); // The AppMaster jar itself. job.Submit(); string trackingUrl = job.GetTrackingURL(); string jobId = job.GetJobID().ToString(); bool succeeded = job.WaitForCompletion(true); NUnit.Framework.Assert.IsFalse(succeeded); NUnit.Framework.Assert.IsTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.EndsWith(Sharpen.Runtime.Substring(jobId, jobId.LastIndexOf ("_")) + "/")); return(job); }
/// <exception cref="System.Exception"/> public virtual int Run(string[] args) { if (args.Length < 2) { PrintUsage(); return(2); } Job job = Job.GetInstance(GetConf()); job.SetJobName("MultiFileWordCount"); job.SetJarByClass(typeof(MultiFileWordCount)); //set the InputFormat of the job to our InputFormat job.SetInputFormatClass(typeof(MultiFileWordCount.MyInputFormat)); // the keys are words (strings) job.SetOutputKeyClass(typeof(Text)); // the values are counts (ints) job.SetOutputValueClass(typeof(IntWritable)); //use the defined mapper job.SetMapperClass(typeof(MultiFileWordCount.MapClass)); //use the WordCount Reducer job.SetCombinerClass(typeof(IntSumReducer)); job.SetReducerClass(typeof(IntSumReducer)); FileInputFormat.AddInputPaths(job, args[0]); FileOutputFormat.SetOutputPath(job, new Path(args[1])); return(job.WaitForCompletion(true) ? 0 : 1); }
/// <exception cref="System.Exception"/> private static void JoinAs(string jointype, Type map, Type reduce) { int srcs = 4; Configuration conf = new Configuration(); Path @base = cluster.GetFileSystem().MakeQualified(new Path("/" + jointype)); Path[] src = WriteSimpleSrc(@base, conf, srcs); conf.Set(CompositeInputFormat.JoinExpr, CompositeInputFormat.Compose(jointype, typeof( SequenceFileInputFormat), src)); conf.SetInt("testdatamerge.sources", srcs); Job job = Job.GetInstance(conf); job.SetInputFormatClass(typeof(CompositeInputFormat)); FileOutputFormat.SetOutputPath(job, new Path(@base, "out")); job.SetMapperClass(map); job.SetReducerClass(reduce); job.SetOutputFormatClass(typeof(SequenceFileOutputFormat)); job.SetOutputKeyClass(typeof(IntWritable)); job.SetOutputValueClass(typeof(IntWritable)); job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("Job failed", job.IsSuccessful()); if ("outer".Equals(jointype)) { CheckOuterConsistency(job, src); } @base.GetFileSystem(conf).Delete(@base, true); }
public virtual void TestValueIterReset() { try { Configuration conf = new Configuration(); Job job = Job.GetInstance(conf, "TestValueIterReset"); job.SetJarByClass(typeof(TestValueIterReset)); job.SetMapperClass(typeof(TestValueIterReset.TestMapper)); job.SetReducerClass(typeof(TestValueIterReset.TestReducer)); job.SetNumReduceTasks(NumTests); job.SetMapOutputKeyClass(typeof(IntWritable)); job.SetMapOutputValueClass(typeof(IntWritable)); job.SetOutputKeyClass(typeof(IntWritable)); job.SetOutputValueClass(typeof(IntWritable)); job.GetConfiguration().SetInt(MRJobConfig.ReduceMarkresetBufferSize, 128); job.SetInputFormatClass(typeof(TextInputFormat)); job.SetOutputFormatClass(typeof(TextOutputFormat)); FileInputFormat.AddInputPath(job, new Path(TestRootDir + "/in")); Path output = new Path(TestRootDir + "/out"); localFs.Delete(output, true); FileOutputFormat.SetOutputPath(job, output); CreateInput(); NUnit.Framework.Assert.IsTrue(job.WaitForCompletion(true)); ValidateOutput(); } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); NUnit.Framework.Assert.IsTrue(false); } }
protected override void PreInitializeImpl(IExtractCommand request, IDataLoadEventListener listener) { if (_request is ExtractGlobalsCommand) { listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Request is for the extraction of Globals.")); OutputFile = _request.GetExtractionDirectory().FullName; return; } switch (FlatFileType) { case ExecuteExtractionToFlatFileType.CSV: OutputFile = Path.Combine(DirectoryPopulated.FullName, GetFilename() + ".csv"); if (request.Configuration != null) { _output = new CSVOutputFormat(OutputFile, request.Configuration.Separator, DateFormat); } else { _output = new CSVOutputFormat(OutputFile, ",", DateFormat); } break; default: throw new ArgumentOutOfRangeException(); } listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Information, "Setup data extraction destination as " + OutputFile + " (will not exist yet)")); }
/// <summary>Start a job to compute sigma</summary> /// <exception cref="System.IO.IOException"/> private void Compute(string name, Summation sigma) { if (sigma.GetValue() != null) { throw new IOException("sigma.getValue() != null, sigma=" + sigma); } //setup remote directory FileSystem fs = FileSystem.Get(GetConf()); Path dir = fs.MakeQualified(new Path(parameters.remoteDir, name)); if (!Org.Apache.Hadoop.Examples.PI.Util.CreateNonexistingDirectory(fs, dir)) { return; } //setup a job Job job = CreateJob(name, sigma); Path outdir = new Path(dir, "out"); FileOutputFormat.SetOutputPath(job, outdir); //start a map/reduce job string startmessage = "steps/parts = " + sigma.E.GetSteps() + "/" + parameters.nParts + " = " + Org.Apache.Hadoop.Examples.PI.Util.Long2string(sigma.E.GetSteps() / parameters .nParts); Org.Apache.Hadoop.Examples.PI.Util.RunJob(name, job, parameters.machine, startmessage , timer); IList <TaskResult> results = Org.Apache.Hadoop.Examples.PI.Util.ReadJobOutputs(fs, outdir); Org.Apache.Hadoop.Examples.PI.Util.WriteResults(name, results, fs, parameters.remoteDir ); fs.Delete(dir, true); //combine results IList <TaskResult> combined = Org.Apache.Hadoop.Examples.PI.Util.Combine(results); PrintWriter @out = Org.Apache.Hadoop.Examples.PI.Util.CreateWriter(parameters.localDir , name); try { foreach (TaskResult r in combined) { string s = TaskResult2string(name, r); @out.WriteLine(s); @out.Flush(); [email protected](s); } } finally { @out.Close(); } if (combined.Count == 1) { Summation s = combined[0].GetElement(); if (sigma.Contains(s) && s.Contains(sigma)) { sigma.SetValue(s.GetValue()); } } }
/// <exception cref="System.IO.IOException"/> private void WriteFile(JobConf conf, string filename) { System.Console.Out.WriteLine("writing file ----" + filename); Path outputPath = FileOutputFormat.GetOutputPath(conf); FileSystem fs = outputPath.GetFileSystem(conf); fs.Create(new Path(outputPath, filename)).Close(); }
public static void LaunchWindowWithValidation( SceneValidationMode validationMode, FileOutputFormat fileOutputFormat) { var window = GetWindow <AssetValidatorEditorWindow>(); window.Show(); window.LaunchValidator(validationMode, fileOutputFormat, false, false, string.Empty); }
/// <summary> /// Run a test with several mappers in parallel, operating at different /// speeds. /// </summary> /// <remarks> /// Run a test with several mappers in parallel, operating at different /// speeds. Verify that the correct amount of output is created. /// </remarks> /// <exception cref="System.Exception"/> public virtual void TestMultiMaps() { Job job = Job.GetInstance(); Path inputPath = CreateMultiMapsInput(); Path outputPath = GetOutputPath(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.GetLocal(conf); if (fs.Exists(outputPath)) { fs.Delete(outputPath, true); } job.SetMapperClass(typeof(TestLocalRunner.StressMapper)); job.SetReducerClass(typeof(TestLocalRunner.CountingReducer)); job.SetNumReduceTasks(1); LocalJobRunner.SetLocalMaxRunningMaps(job, 6); job.GetConfiguration().Set(MRJobConfig.IoSortMb, "25"); FileInputFormat.AddInputPath(job, inputPath); FileOutputFormat.SetOutputPath(job, outputPath); Sharpen.Thread toInterrupt = Sharpen.Thread.CurrentThread(); Sharpen.Thread interrupter = new _Thread_311(toInterrupt); // 2m Log.Info("Submitting job..."); job.Submit(); Log.Info("Starting thread to interrupt main thread in 2 minutes"); interrupter.Start(); Log.Info("Waiting for job to complete..."); try { job.WaitForCompletion(true); } catch (Exception ie) { Log.Fatal("Interrupted while waiting for job completion", ie); for (int i = 0; i < 10; i++) { Log.Fatal("Dumping stacks"); ReflectionUtils.LogThreadInfo(Log, "multimap threads", 0); Sharpen.Thread.Sleep(1000); } throw; } Log.Info("Job completed, stopping interrupter"); interrupter.Interrupt(); try { interrupter.Join(); } catch (Exception) { } // it might interrupt us right as we interrupt it Log.Info("Verifying output"); VerifyOutput(outputPath); }
/// <summary>This is the main routine for launching a distributed random write job.</summary> /// <remarks> /// This is the main routine for launching a distributed random write job. /// It runs 10 maps/node and each node writes 1 gig of data to a DFS file. /// The reduce doesn't do anything. /// </remarks> /// <exception cref="System.IO.IOException"></exception> /// <exception cref="System.Exception"/> public virtual int Run(string[] args) { if (args.Length == 0) { System.Console.Out.WriteLine("Usage: writer <out-dir>"); ToolRunner.PrintGenericCommandUsage(System.Console.Out); return(2); } Path outDir = new Path(args[0]); Configuration conf = GetConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.GetClusterStatus(); int numMapsPerHost = conf.GetInt(MapsPerHost, 10); long numBytesToWritePerMap = conf.GetLong(BytesPerMap, 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.Console.Error.WriteLine("Cannot have" + BytesPerMap + " set to 0"); return(-2); } long totalBytesToWrite = conf.GetLong(TotalBytes, numMapsPerHost * numBytesToWritePerMap * cluster.GetTaskTrackers()); int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.SetLong(BytesPerMap, totalBytesToWrite); } conf.SetInt(MRJobConfig.NumMaps, numMaps); Job job = Job.GetInstance(conf); job.SetJarByClass(typeof(RandomWriter)); job.SetJobName("random-writer"); FileOutputFormat.SetOutputPath(job, outDir); job.SetOutputKeyClass(typeof(BytesWritable)); job.SetOutputValueClass(typeof(BytesWritable)); job.SetInputFormatClass(typeof(RandomWriter.RandomInputFormat)); job.SetMapperClass(typeof(RandomWriter.RandomMapper)); job.SetReducerClass(typeof(Reducer)); job.SetOutputFormatClass(typeof(SequenceFileOutputFormat)); System.Console.Out.WriteLine("Running " + numMaps + " maps."); // reducer NONE job.SetNumReduceTasks(0); DateTime startTime = new DateTime(); System.Console.Out.WriteLine("Job started: " + startTime); int ret = job.WaitForCompletion(true) ? 0 : 1; DateTime endTime = new DateTime(); System.Console.Out.WriteLine("Job ended: " + endTime); System.Console.Out.WriteLine("The job took " + (endTime.GetTime() - startTime.GetTime ()) / 1000 + " seconds."); return(ret); }
/// <summary>Creates and runs an MR job</summary> /// <param name="conf"/> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> public virtual void CreateAndRunJob(Configuration conf) { Job job = Job.GetInstance(conf); job.SetJarByClass(typeof(TestLineRecordReaderJobs)); job.SetMapperClass(typeof(Mapper)); job.SetReducerClass(typeof(Reducer)); FileInputFormat.AddInputPath(job, inputDir); FileOutputFormat.SetOutputPath(job, outputDir); job.WaitForCompletion(true); }
/// <summary> /// Generate the requested number of file splits, with the filename /// set to the filename of the output file. /// </summary> /// <exception cref="System.IO.IOException"/> public override IList <InputSplit> GetSplits(JobContext job) { IList <InputSplit> result = new AList <InputSplit>(); Path outDir = FileOutputFormat.GetOutputPath(job); int numSplits = job.GetConfiguration().GetInt(MRJobConfig.NumMaps, 1); for (int i = 0; i < numSplits; ++i) { result.AddItem(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, null)); } return(result); }
/// <summary> /// Launch Validator is a one-all fit for being able to run Validation /// </summary> /// <param name="vMode"></param> /// <param name="fileOutputFormat"></param> /// <param name="doValidateProjectAssets"></param> /// <param name="doValidateAcrossScenes"></param> /// <param name="fileName"></param> private void LaunchValidator(SceneValidationMode vMode, FileOutputFormat fileOutputFormat, bool doValidateProjectAssets, bool doValidateAcrossScenes, string fileName) { _selectedFileOutputFormat = fileOutputFormat; _outputFilename = string.IsNullOrEmpty(fileName) ? EditorConstants.DefaultLogFilename : fileName; OnValidateSelectionClick(vMode, doValidateProjectAssets, doValidateAcrossScenes); }
/// <summary> /// Runs validation against the project in <see cref="SceneValidationMode"/> /// <paramref name="validationMode"/> and writes the log file to a file with the default name. /// </summary> /// <param name="validationMode">The <see cref="SceneValidationMode"/> the validation is run in.</param> /// <param name="fileOutputFormat">The <see cref="FileOutputFormat"/> the file will be written in, if any.</param> /// <param name="doValidateProjectAssets">True if project assets should be validated, false if not.</param> /// <param name="doValidateAcrossScenes">True if cross-scene validation should be performed.</param> /// <returns></returns> public static Result RunValidation( SceneValidationMode validationMode, FileOutputFormat fileOutputFormat, bool doValidateProjectAssets, bool doValidateAcrossScenes) { return(RunValidation( validationMode, fileOutputFormat, doValidateProjectAssets, doValidateAcrossScenes, string.Empty)); }
/// <exception cref="System.Exception"/> public virtual void TestDateSplits() { Statement s = connection.CreateStatement(); string DateTable = "datetable"; string Col = "foo"; try { // delete the table if it already exists. s.ExecuteUpdate("DROP TABLE " + DateTable); } catch (SQLException) { } // Create the table. s.ExecuteUpdate("CREATE TABLE " + DateTable + "(" + Col + " DATE)"); s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-04-01')"); s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-04-02')"); s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2010-05-01')"); s.ExecuteUpdate("INSERT INTO " + DateTable + " VALUES('2011-04-01')"); // commit this tx. connection.Commit(); Configuration conf = new Configuration(); conf.Set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.GetLocal(conf); fs.Delete(new Path(OutDir), true); // now do a dd import Job job = Job.GetInstance(conf); job.SetMapperClass(typeof(TestDataDrivenDBInputFormat.ValMapper)); job.SetReducerClass(typeof(Reducer)); job.SetMapOutputKeyClass(typeof(TestDataDrivenDBInputFormat.DateCol)); job.SetMapOutputValueClass(typeof(NullWritable)); job.SetOutputKeyClass(typeof(TestDataDrivenDBInputFormat.DateCol)); job.SetOutputValueClass(typeof(NullWritable)); job.SetNumReduceTasks(1); job.GetConfiguration().SetInt("mapreduce.map.tasks", 2); FileOutputFormat.SetOutputPath(job, new Path(OutDir)); DBConfiguration.ConfigureDB(job.GetConfiguration(), DriverClass, DbUrl, null, null ); DataDrivenDBInputFormat.SetInput(job, typeof(TestDataDrivenDBInputFormat.DateCol) , DateTable, null, Col, Col); bool ret = job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue("job failed", ret); // Check to see that we imported as much as we thought we did. NUnit.Framework.Assert.AreEqual("Did not get all the records", 4, job.GetCounters ().FindCounter(TaskCounter.ReduceOutputRecords).GetValue()); }
/* Extracts matching regexs from input files and counts them. */ // singleton /// <exception cref="System.Exception"/> public virtual int Run(string[] args) { if (args.Length < 3) { System.Console.Out.WriteLine("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.PrintGenericCommandUsage(System.Console.Out); return(2); } Path tempDir = new Path("grep-temp-" + Sharpen.Extensions.ToString(new Random().Next (int.MaxValue))); Configuration conf = GetConf(); conf.Set(RegexMapper.Pattern, args[2]); if (args.Length == 4) { conf.Set(RegexMapper.Group, args[3]); } Job grepJob = Job.GetInstance(conf); try { grepJob.SetJobName("grep-search"); grepJob.SetJarByClass(typeof(Org.Apache.Hadoop.Examples.Grep)); FileInputFormat.SetInputPaths(grepJob, args[0]); grepJob.SetMapperClass(typeof(RegexMapper)); grepJob.SetCombinerClass(typeof(LongSumReducer)); grepJob.SetReducerClass(typeof(LongSumReducer)); FileOutputFormat.SetOutputPath(grepJob, tempDir); grepJob.SetOutputFormatClass(typeof(SequenceFileOutputFormat)); grepJob.SetOutputKeyClass(typeof(Text)); grepJob.SetOutputValueClass(typeof(LongWritable)); grepJob.WaitForCompletion(true); Job sortJob = Job.GetInstance(conf); sortJob.SetJobName("grep-sort"); sortJob.SetJarByClass(typeof(Org.Apache.Hadoop.Examples.Grep)); FileInputFormat.SetInputPaths(sortJob, tempDir); sortJob.SetInputFormatClass(typeof(SequenceFileInputFormat)); sortJob.SetMapperClass(typeof(InverseMapper)); sortJob.SetNumReduceTasks(1); // write a single file FileOutputFormat.SetOutputPath(sortJob, new Path(args[1])); sortJob.SetSortComparatorClass(typeof(LongWritable.DecreasingComparator)); // sort by decreasing freq sortJob.WaitForCompletion(true); } finally { FileSystem.Get(conf).Delete(tempDir, true); } return(0); }
/// <exception cref="System.Exception"/> private void RunDistributedFSCheck() { JobConf job = new JobConf(fs.GetConf(), typeof(DistributedFSCheck)); FileInputFormat.SetInputPaths(job, MapInputDir); job.SetInputFormat(typeof(SequenceFileInputFormat)); job.SetMapperClass(typeof(DistributedFSCheck.DistributedFSCheckMapper)); job.SetReducerClass(typeof(AccumulatingReducer)); FileOutputFormat.SetOutputPath(job, ReadDir); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(Text)); job.SetNumReduceTasks(1); JobClient.RunJob(job); }
/// <exception cref="System.IO.IOException"/> private static void RunIOTest(Type mapperClass, Path outputDir) { JobConf job = new JobConf(fsConfig, typeof(DFSCIOTest)); FileInputFormat.SetInputPaths(job, ControlDir); job.SetInputFormat(typeof(SequenceFileInputFormat)); job.SetMapperClass(mapperClass); job.SetReducerClass(typeof(AccumulatingReducer)); FileOutputFormat.SetOutputPath(job, outputDir); job.SetOutputKeyClass(typeof(Text)); job.SetOutputValueClass(typeof(Text)); job.SetNumReduceTasks(1); JobClient.RunJob(job); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> /// <exception cref="System.TypeLoadException"/> public virtual void TestRandomWriter() { Log.Info("\n\n\nStarting testRandomWriter()."); if (!(new FilePath(MiniMRYarnCluster.Appjar)).Exists()) { Log.Info("MRAppJar " + MiniMRYarnCluster.Appjar + " not found. Not running test." ); return; } RandomTextWriterJob randomWriterJob = new RandomTextWriterJob(); mrCluster.GetConfig().Set(RandomTextWriterJob.TotalBytes, "3072"); mrCluster.GetConfig().Set(RandomTextWriterJob.BytesPerMap, "1024"); Job job = randomWriterJob.CreateJob(mrCluster.GetConfig()); Path outputDir = new Path(OutputRootDir, "random-output"); FileOutputFormat.SetOutputPath(job, outputDir); job.SetSpeculativeExecution(false); job.AddFileToClassPath(AppJar); // The AppMaster jar itself. job.SetJarByClass(typeof(RandomTextWriterJob)); job.SetMaxMapAttempts(1); // speed up failures job.Submit(); string trackingUrl = job.GetTrackingURL(); string jobId = job.GetJobID().ToString(); bool succeeded = job.WaitForCompletion(true); NUnit.Framework.Assert.IsTrue(succeeded); NUnit.Framework.Assert.AreEqual(JobStatus.State.Succeeded, job.GetJobState()); NUnit.Framework.Assert.IsTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.EndsWith(Sharpen.Runtime.Substring(jobId, jobId.LastIndexOf ("_")) + "/")); // Make sure there are three files in the output-dir RemoteIterator <FileStatus> iterator = FileContext.GetFileContext(mrCluster.GetConfig ()).ListStatus(outputDir); int count = 0; while (iterator.HasNext()) { FileStatus file = iterator.Next(); if (!file.GetPath().GetName().Equals(FileOutputCommitter.SucceededFileName)) { count++; } } NUnit.Framework.Assert.AreEqual("Number of part files is wrong!", 3, count); VerifyRandomWriterCounters(job); }
/// <summary>Creates a simple fail job.</summary> /// <param name="conf">Configuration object</param> /// <param name="outdir">Output directory.</param> /// <param name="indirs">Comma separated input directories.</param> /// <returns>Job initialized for a simple kill job.</returns> /// <exception cref="System.Exception">If an error occurs creating job configuration. /// </exception> public static Job CreateKillJob(Configuration conf, Path outdir, params Path[] indirs ) { Job theJob = Job.GetInstance(conf); theJob.SetJobName("Kill-Job"); FileInputFormat.SetInputPaths(theJob, indirs); theJob.SetMapperClass(typeof(MapReduceTestUtil.KillMapper)); theJob.SetReducerClass(typeof(Reducer)); theJob.SetNumReduceTasks(0); FileOutputFormat.SetOutputPath(theJob, outdir); theJob.SetOutputKeyClass(typeof(Org.Apache.Hadoop.IO.Text)); theJob.SetOutputValueClass(typeof(Org.Apache.Hadoop.IO.Text)); return(theJob); }