/// <summary>Run the map task.</summary> /// <param name="input">the set of inputs</param> /// <param name="output">the object to collect the outputs of the map</param> /// <param name="reporter">the object to update with status</param> /// <exception cref="System.IO.IOException"/> public override void Run(RecordReader <K1, V1> input, OutputCollector <K2, V2> output , Reporter reporter) { Application <K1, V1, K2, V2> application = null; try { RecordReader <FloatWritable, NullWritable> fakeInput = (!Submitter.GetIsJavaRecordReader (job) && !Submitter.GetIsJavaMapper(job)) ? (RecordReader <FloatWritable, NullWritable >)input : null; application = new Application <K1, V1, K2, V2>(job, fakeInput, output, reporter, ( Type)job.GetOutputKeyClass(), (Type)job.GetOutputValueClass()); } catch (Exception ie) { throw new RuntimeException("interrupted", ie); } DownwardProtocol <K1, V1> downlink = application.GetDownlink(); bool isJavaInput = Submitter.GetIsJavaRecordReader(job); downlink.RunMap(reporter.GetInputSplit(), job.GetNumReduceTasks(), isJavaInput); bool skipping = job.GetBoolean(MRJobConfig.SkipRecords, false); try { if (isJavaInput) { // allocate key & value instances that are re-used for all entries K1 key = input.CreateKey(); V1 value = input.CreateValue(); downlink.SetInputTypes(key.GetType().FullName, value.GetType().FullName); while (input.Next(key, value)) { // map pair to output downlink.MapItem(key, value); if (skipping) { //flush the streams on every record input if running in skip mode //so that we don't buffer other records surrounding a bad record. downlink.Flush(); } } downlink.EndOfInput(); } application.WaitForFinish(); } catch (Exception t) { application.Abort(t); } finally { application.Cleanup(); } }
/// <summary>Create a proxy object that will speak the binary protocol on a socket.</summary> /// <remarks> /// Create a proxy object that will speak the binary protocol on a socket. /// Upward messages are passed on the specified handler and downward /// downward messages are public methods on this object. /// </remarks> /// <param name="sock">The socket to communicate on.</param> /// <param name="handler">The handler for the received messages.</param> /// <param name="key">The object to read keys into.</param> /// <param name="value">The object to read values into.</param> /// <param name="config">The job's configuration</param> /// <exception cref="System.IO.IOException"/> public BinaryProtocol(Socket sock, UpwardProtocol <K2, V2> handler, K2 key, V2 value , JobConf config) { OutputStream raw = sock.GetOutputStream(); // If we are debugging, save a copy of the downlink commands to a file if (Submitter.GetKeepCommandFile(config)) { raw = new BinaryProtocol.TeeOutputStream("downlink.data", raw); } stream = new DataOutputStream(new BufferedOutputStream(raw, BufferSize)); uplink = new BinaryProtocol.UplinkReaderThread <K2, V2>(sock.GetInputStream(), handler , key, value); uplink.SetName("pipe-uplink-handler"); uplink.Start(); }
public virtual void TestPipesPartitioner() { PipesPartitioner <IntWritable, Text> partitioner = new PipesPartitioner <IntWritable , Text>(); JobConf configuration = new JobConf(); Submitter.GetJavaPartitioner(configuration); partitioner.Configure(new JobConf()); IntWritable iw = new IntWritable(4); // the cache empty NUnit.Framework.Assert.AreEqual(0, partitioner.GetPartition(iw, new Text("test"), 2)); // set data into cache PipesPartitioner.SetNextPartition(3); // get data from cache NUnit.Framework.Assert.AreEqual(3, partitioner.GetPartition(iw, new Text("test"), 2)); }
/// <exception cref="System.IO.IOException"/> private void StartApplication(OutputCollector <K3, V3> output, Reporter reporter) { if (application == null) { try { Log.Info("starting application"); application = new Application <K2, V2, K3, V3>(job, null, output, reporter, (Type) job.GetOutputKeyClass(), (Type)job.GetOutputValueClass()); downlink = application.GetDownlink(); } catch (Exception ie) { throw new RuntimeException("interrupted", ie); } int reduce = 0; downlink.RunReduce(reduce, Submitter.GetIsJavaRecordWriter(job)); } }
public virtual void TestSubmitter() { JobConf conf = new JobConf(); FilePath[] psw = CleanTokenPasswordFile(); Runtime.SetProperty("test.build.data", "target/tmp/build/TEST_SUBMITTER_MAPPER/data" ); conf.Set("hadoop.log.dir", "target/tmp"); // prepare configuration Submitter.SetIsJavaMapper(conf, false); Submitter.SetIsJavaReducer(conf, false); Submitter.SetKeepCommandFile(conf, false); Submitter.SetIsJavaRecordReader(conf, false); Submitter.SetIsJavaRecordWriter(conf, false); PipesPartitioner <IntWritable, Text> partitioner = new PipesPartitioner <IntWritable , Text>(); partitioner.Configure(conf); Submitter.SetJavaPartitioner(conf, partitioner.GetType()); NUnit.Framework.Assert.AreEqual(typeof(PipesPartitioner), (Submitter.GetJavaPartitioner (conf))); // test going to call main method with System.exit(). Change Security SecurityManager securityManager = Runtime.GetSecurityManager(); // store System.out TextWriter oldps = System.Console.Out; ByteArrayOutputStream @out = new ByteArrayOutputStream(); ExitUtil.DisableSystemExit(); // test without parameters try { Runtime.SetOut(new TextWriter(@out)); Submitter.Main(new string[0]); NUnit.Framework.Assert.Fail(); } catch (ExitUtil.ExitException) { // System.exit prohibited! output message test NUnit.Framework.Assert.IsTrue(@out.ToString().Contains(string.Empty)); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("bin/hadoop pipes")); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-input <path>] // Input directory" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-output <path>] // Output directory" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-jar <jar file> // jar filename" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-inputformat <class>] // InputFormat class" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-map <class>] // Java Map class" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-partitioner <class>] // Java Partitioner" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-reduce <class>] // Java Reduce class" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-writer <class>] // Java RecordWriter" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-program <executable>] // executable URI" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-reduces <num>] // number of reduces" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("[-lazyOutput <true/false>] // createOutputLazily" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-conf <configuration file> specify an application configuration file" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-D <property=value> use value for given property" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-fs <local|namenode:port> specify a namenode" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-jt <local|resourcemanager:port> specify a ResourceManager" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-files <comma separated list of files> specify comma separated files to be copied to the map reduce cluster" )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-libjars <comma separated list of jars> specify comma separated jar files to include in the classpath." )); NUnit.Framework.Assert.IsTrue(@out.ToString().Contains("-archives <comma separated list of archives> specify comma separated archives to be unarchived on the compute machines." )); } finally { Runtime.SetOut(oldps); // restore Runtime.SetSecurityManager(securityManager); if (psw != null) { // remove password files foreach (FilePath file in psw) { file.DeleteOnExit(); } } } // test call Submitter form command line try { FilePath fCommand = GetFileCommand(null); string[] args = new string[22]; FilePath input = new FilePath(workSpace + FilePath.separator + "input"); if (!input.Exists()) { NUnit.Framework.Assert.IsTrue(input.CreateNewFile()); } FilePath outPut = new FilePath(workSpace + FilePath.separator + "output"); FileUtil.FullyDelete(outPut); args[0] = "-input"; args[1] = input.GetAbsolutePath(); // "input"; args[2] = "-output"; args[3] = outPut.GetAbsolutePath(); // "output"; args[4] = "-inputformat"; args[5] = "org.apache.hadoop.mapred.TextInputFormat"; args[6] = "-map"; args[7] = "org.apache.hadoop.mapred.lib.IdentityMapper"; args[8] = "-partitioner"; args[9] = "org.apache.hadoop.mapred.pipes.PipesPartitioner"; args[10] = "-reduce"; args[11] = "org.apache.hadoop.mapred.lib.IdentityReducer"; args[12] = "-writer"; args[13] = "org.apache.hadoop.mapred.TextOutputFormat"; args[14] = "-program"; args[15] = fCommand.GetAbsolutePath(); // "program"; args[16] = "-reduces"; args[17] = "2"; args[18] = "-lazyOutput"; args[19] = "lazyOutput"; args[20] = "-jobconf"; args[21] = "mapreduce.pipes.isjavarecordwriter=false,mapreduce.pipes.isjavarecordreader=false"; Submitter.Main(args); NUnit.Framework.Assert.Fail(); } catch (ExitUtil.ExitException e) { // status should be 0 NUnit.Framework.Assert.AreEqual(e.status, 0); } finally { Runtime.SetOut(oldps); Runtime.SetSecurityManager(securityManager); } }
/// <exception cref="System.IO.IOException"/> internal static void RunProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program , Path inputPath, Path outputPath, int numMaps, int numReduces, string[] expectedResults , JobConf conf) { Path wordExec = new Path("testing/bin/application"); JobConf job = null; if (conf == null) { job = mr.CreateJobConf(); } else { job = new JobConf(conf); } job.SetNumMapTasks(numMaps); job.SetNumReduceTasks(numReduces); { FileSystem fs = dfs.GetFileSystem(); fs.Delete(wordExec.GetParent(), true); fs.CopyFromLocalFile(program, wordExec); Submitter.SetExecutable(job, fs.MakeQualified(wordExec).ToString()); Submitter.SetIsJavaRecordReader(job, true); Submitter.SetIsJavaRecordWriter(job, true); FileInputFormat.SetInputPaths(job, inputPath); FileOutputFormat.SetOutputPath(job, outputPath); RunningJob rJob = null; if (numReduces == 0) { rJob = Submitter.JobSubmit(job); while (!rJob.IsComplete()) { try { Sharpen.Thread.Sleep(1000); } catch (Exception ie) { throw new RuntimeException(ie); } } } else { rJob = Submitter.RunJob(job); } NUnit.Framework.Assert.IsTrue("pipes job failed", rJob.IsSuccessful()); Counters counters = rJob.GetCounters(); Counters.Group wordCountCounters = counters.GetGroup("WORDCOUNT"); int numCounters = 0; foreach (Counters.Counter c in wordCountCounters) { System.Console.Out.WriteLine(c); ++numCounters; } NUnit.Framework.Assert.IsTrue("No counters found!", (numCounters > 0)); } IList <string> results = new AList <string>(); foreach (Path p in FileUtil.Stat2Paths(dfs.GetFileSystem().ListStatus(outputPath, new Utils.OutputFileUtils.OutputFilesFilter()))) { results.AddItem(MapReduceTestUtil.ReadOutput(p, job)); } NUnit.Framework.Assert.AreEqual("number of reduces is wrong", expectedResults.Length , results.Count); for (int i = 0; i < results.Count; i++) { NUnit.Framework.Assert.AreEqual("pipes program " + program + " output " + i + " wrong" , expectedResults[i], results[i]); } }
public virtual void Configure(JobConf conf) { part = ReflectionUtils.NewInstance(Submitter.GetJavaPartitioner(conf), conf); }
/// <summary>Submit a pipes job based on the command line arguments.</summary> /// <param name="args"/> /// <exception cref="System.Exception"/> public static void Main(string[] args) { int exitCode = new Submitter().Run(args); ExitUtil.Terminate(exitCode); }