/// <exception cref="System.IO.IOException"/> internal static long ReadBench(JobConf conf) { // InputFormat instantiation InputFormat inf = conf.GetInputFormat(); string fn = conf.Get("test.filebench.name", string.Empty); Path pin = new Path(FileInputFormat.GetInputPaths(conf)[0], fn); FileStatus @in = pin.GetFileSystem(conf).GetFileStatus(pin); RecordReader rr = inf.GetRecordReader(new FileSplit(pin, 0, @in.GetLen(), (string [])null), conf, Reporter.Null); try { object key = rr.CreateKey(); object val = rr.CreateValue(); DateTime start = new DateTime(); while (rr.Next(key, val)) { } DateTime end = new DateTime(); return(end.GetTime() - start.GetTime()); } finally { rr.Close(); } }
/// <exception cref="System.Exception"/> private void _testInputFiles(bool withFilter, bool withGlob) { ICollection <Path> createdFiles = CreateFiles(); JobConf conf = new JobConf(); Path inputDir = (withGlob) ? new Path(workDir, "a*") : workDir; FileInputFormat.SetInputPaths(conf, inputDir); conf.SetInputFormat(typeof(TestFileInputFormatPathFilter.DummyFileInputFormat)); if (withFilter) { FileInputFormat.SetInputPathFilter(conf, typeof(TestFileInputFormatPathFilter.TestPathFilter )); } TestFileInputFormatPathFilter.DummyFileInputFormat inputFormat = (TestFileInputFormatPathFilter.DummyFileInputFormat )conf.GetInputFormat(); ICollection <Path> computedFiles = new HashSet <Path>(); foreach (FileStatus file in inputFormat.ListStatus(conf)) { computedFiles.AddItem(file.GetPath()); } createdFiles.Remove(localFs.MakeQualified(new Path(workDir, "_hello"))); createdFiles.Remove(localFs.MakeQualified(new Path(workDir, ".hello"))); if (withFilter) { createdFiles.Remove(localFs.MakeQualified(new Path(workDir, "aa"))); createdFiles.Remove(localFs.MakeQualified(new Path(workDir, "bb"))); } if (withGlob) { createdFiles.Remove(localFs.MakeQualified(new Path(workDir, "b"))); createdFiles.Remove(localFs.MakeQualified(new Path(workDir, "bb"))); } NUnit.Framework.Assert.AreEqual(createdFiles, computedFiles); }
//method to write splits for old api mapper. /// <exception cref="System.IO.IOException"/> private int WriteOldSplits(JobConf job, Path jobSubmitDir) { InputSplit[] splits = job.GetInputFormat().GetSplits(job, job.GetNumMapTasks()); // sort the splits into order based on size, so that the biggest // go first Arrays.Sort(splits, new _IComparer_332()); JobSplitWriter.CreateSplitFiles(jobSubmitDir, job, jobSubmitDir.GetFileSystem(job ), splits); return(splits.Length); }
public virtual void Configure(JobConf job) { this.fieldSeparator = job.Get(FieldSelectionHelper.DataFieldSeperator, "\t"); this.mapOutputKeyValueSpec = job.Get(FieldSelectionHelper.MapOutputKeyValueSpec, "0-:"); this.ignoreInputKey = typeof(TextInputFormat).GetCanonicalName().Equals(job.GetInputFormat ().GetType().GetCanonicalName()); this.reduceOutputKeyValueSpec = job.Get(FieldSelectionHelper.ReduceOutputKeyValueSpec , "0-:"); ParseOutputKeyValueSpec(); Log.Info(SpecToString()); }
/// <exception cref="System.IO.IOException"/> private static void SetupPipesJob(JobConf conf) { // default map output types to Text if (!GetIsJavaMapper(conf)) { conf.SetMapRunnerClass(typeof(PipesMapRunner)); // Save the user's partitioner and hook in our's. SetJavaPartitioner(conf, conf.GetPartitionerClass()); conf.SetPartitionerClass(typeof(PipesPartitioner)); } if (!GetIsJavaReducer(conf)) { conf.SetReducerClass(typeof(PipesReducer)); if (!GetIsJavaRecordWriter(conf)) { conf.SetOutputFormat(typeof(NullOutputFormat)); } } string textClassname = typeof(Text).FullName; SetIfUnset(conf, MRJobConfig.MapOutputKeyClass, textClassname); SetIfUnset(conf, MRJobConfig.MapOutputValueClass, textClassname); SetIfUnset(conf, MRJobConfig.OutputKeyClass, textClassname); SetIfUnset(conf, MRJobConfig.OutputValueClass, textClassname); // Use PipesNonJavaInputFormat if necessary to handle progress reporting // from C++ RecordReaders ... if (!GetIsJavaRecordReader(conf) && !GetIsJavaMapper(conf)) { conf.SetClass(Org.Apache.Hadoop.Mapred.Pipes.Submitter.InputFormat, conf.GetInputFormat ().GetType(), typeof(InputFormat)); conf.SetInputFormat(typeof(PipesNonJavaInputFormat)); } string exec = GetExecutable(conf); if (exec == null) { throw new ArgumentException("No application program defined."); } // add default debug script only when executable is expressed as // <path>#<executable> if (exec.Contains("#")) { // set default gdb commands for map and reduce task string defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script"; SetIfUnset(conf, MRJobConfig.MapDebugScript, defScript); SetIfUnset(conf, MRJobConfig.ReduceDebugScript, defScript); } URI[] fileCache = DistributedCache.GetCacheFiles(conf); if (fileCache == null) { fileCache = new URI[1]; } else { URI[] tmp = new URI[fileCache.Length + 1]; System.Array.Copy(fileCache, 0, tmp, 1, fileCache.Length); fileCache = tmp; } try { fileCache[0] = new URI(exec); } catch (URISyntaxException e) { IOException ie = new IOException("Problem parsing execable URI " + exec); Sharpen.Extensions.InitCause(ie, e); throw ie; } DistributedCache.SetCacheFiles(fileCache, conf); }