/// <exception cref="System.Exception"/> public virtual int Run(string[] argv) { JobConf job = new JobConf(GetConf()); job.SetJarByClass(typeof(GenericMRLoadGenerator)); job.SetMapperClass(typeof(GenericMRLoadGenerator.SampleMapper)); job.SetReducerClass(typeof(GenericMRLoadGenerator.SampleReducer)); if (!ParseArgs(argv, job)) { return(-1); } if (null == FileOutputFormat.GetOutputPath(job)) { // No output dir? No writes job.SetOutputFormat(typeof(NullOutputFormat)); } if (0 == FileInputFormat.GetInputPaths(job).Length) { // No input dir? Generate random data System.Console.Error.WriteLine("No input path; ignoring InputFormat"); ConfRandom(job); } else { if (null != job.GetClass(GenericMRLoadGenerator.IndirectInputFormat, null)) { // specified IndirectInputFormat? Build src list JobClient jClient = new JobClient(job); Path tmpDir = new Path(jClient.GetFs().GetHomeDirectory(), ".staging"); Random r = new Random(); Path indirInputFile = new Path(tmpDir, Sharpen.Extensions.ToString(r.Next(int.MaxValue ), 36) + "_files"); job.Set(GenericMRLoadGenerator.IndirectInputFile, indirInputFile.ToString()); SequenceFile.Writer writer = SequenceFile.CreateWriter(tmpDir.GetFileSystem(job), job, indirInputFile, typeof(LongWritable), typeof(Text), SequenceFile.CompressionType .None); try { foreach (Path p in FileInputFormat.GetInputPaths(job)) { FileSystem fs = p.GetFileSystem(job); Stack <Path> pathstack = new Stack <Path>(); pathstack.Push(p); while (!pathstack.Empty()) { foreach (FileStatus stat in fs.ListStatus(pathstack.Pop())) { if (stat.IsDirectory()) { if (!stat.GetPath().GetName().StartsWith("_")) { pathstack.Push(stat.GetPath()); } } else { writer.Sync(); writer.Append(new LongWritable(stat.GetLen()), new Text(stat.GetPath().ToUri().ToString ())); } } } } } finally { writer.Close(); } } } DateTime startTime = new DateTime(); System.Console.Out.WriteLine("Job started: " + startTime); JobClient.RunJob(job); DateTime endTime = new DateTime(); System.Console.Out.WriteLine("Job ended: " + endTime); System.Console.Out.WriteLine("The job took " + (endTime.GetTime() - startTime.GetTime ()) / 1000 + " seconds."); return(0); }