/// <exception cref="System.Exception"/> public virtual void TestTotalOrderBinarySearch() { TotalOrderPartitioner <Text, NullWritable> partitioner = new TotalOrderPartitioner <Text, NullWritable>(); Configuration conf = new Configuration(); Path p = TestTotalOrderPartitioner.WritePartitionFile <Text>("totalorderbinarysearch" , conf, splitStrings); conf.SetBoolean(TotalOrderPartitioner.NaturalOrder, false); conf.SetClass(MRJobConfig.MapOutputKeyClass, typeof(Text), typeof(object)); try { partitioner.SetConf(conf); NullWritable nw = NullWritable.Get(); foreach (TestTotalOrderPartitioner.Check <Text> chk in testStrings) { NUnit.Framework.Assert.AreEqual(chk.data.ToString(), chk.part, partitioner.GetPartition (chk.data, nw, splitStrings.Length + 1)); } } finally { p.GetFileSystem(conf).Delete(p, true); } }
/// <exception cref="System.IO.IOException"/> private static Path WritePartitionFile <T>(string testname, Configuration conf, T[] splits) where T : WritableComparable <object> { FileSystem fs = FileSystem.GetLocal(conf); Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified (fs); Path p = new Path(testdir, testname + "/_partition.lst"); TotalOrderPartitioner.SetPartitionFile(conf, p); conf.SetInt(MRJobConfig.NumReduces, splits.Length + 1); SequenceFile.Writer w = null; try { w = SequenceFile.CreateWriter(fs, conf, p, splits[0].GetType(), typeof(NullWritable ), SequenceFile.CompressionType.None); for (int i = 0; i < splits.Length; ++i) { w.Append(splits[i], NullWritable.Get()); } } finally { if (null != w) { w.Close(); } } return(p); }
/// <summary>Read the cut points from the given IFile.</summary> /// <param name="fs">The file system</param> /// <param name="p">The path to read</param> /// <param name="keyClass">The map output key class</param> /// <param name="job">The job config</param> /// <exception cref="System.IO.IOException"/> private K[] ReadPartitions(FileSystem fs, Path p, Type keyClass, Configuration conf ) { // matching key types enforced by passing in // map output key class SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf); AList <K> parts = new AList <K>(); K key = ReflectionUtils.NewInstance(keyClass, conf); NullWritable value = NullWritable.Get(); try { while (reader.Next(key, value)) { parts.AddItem(key); key = ReflectionUtils.NewInstance(keyClass, conf); } reader.Close(); reader = null; } finally { IOUtils.Cleanup(Log, reader); } return(Sharpen.Collections.ToArray(parts, (K[])System.Array.CreateInstance(keyClass , parts.Count))); }
internal override U CreateValue() { if (valueclass != null) { return((U)ReflectionUtils.NewInstance(valueclass, conf)); } return((U)NullWritable.Get()); }
/// <summary>Request new key from proxied RR.</summary> internal override K CreateKey() { if (keyclass != null) { return((K)ReflectionUtils.NewInstance(keyclass, conf)); } return((K)NullWritable.Get()); }
/// <summary>Create a new key common to all child RRs.</summary> /// <exception cref="System.InvalidCastException">if key classes differ.</exception> internal override K CreateKey() { if (keyclass == null || keyclass.Equals(typeof(NullWritable))) { return((K)NullWritable.Get()); } return((K)ReflectionUtils.NewInstance(keyclass, GetConf())); }
public virtual void TestFormatWithCustomSeparator() { JobConf job = new JobConf(); string separator = "\u0001"; job.Set("mapreduce.output.textoutputformat.separator", separator); job.Set(JobContext.TaskAttemptId, attempt); FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent()); FileOutputFormat.SetWorkOutputPath(job, workDir); FileSystem fs = workDir.GetFileSystem(job); if (!fs.Mkdirs(workDir)) { NUnit.Framework.Assert.Fail("Failed to create output directory"); } string file = "test_custom.txt"; // A reporter that does nothing Reporter reporter = Reporter.Null; TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object >(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs , job, file, reporter); Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1"); Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2"); Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1"); Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2"); NullWritable nullWritable = NullWritable.Get(); try { theRecordWriter.Write(key1, val1); theRecordWriter.Write(null, nullWritable); theRecordWriter.Write(null, val1); theRecordWriter.Write(nullWritable, val2); theRecordWriter.Write(key2, nullWritable); theRecordWriter.Write(key1, null); theRecordWriter.Write(null, null); theRecordWriter.Write(key2, val2); } finally { theRecordWriter.Close(reporter); } FilePath expectedFile = new FilePath(new Path(workDir, file).ToString()); StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append(key1).Append(separator).Append(val1).Append("\n"); expectedOutput.Append(val1).Append("\n"); expectedOutput.Append(val2).Append("\n"); expectedOutput.Append(key2).Append("\n"); expectedOutput.Append(key1).Append("\n"); expectedOutput.Append(key2).Append(separator).Append(val2).Append("\n"); string output = UtilsForTests.Slurp(expectedFile); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output); }
public virtual void TestNullKeys() { JobConf conf = new JobConf(typeof(TestMapRed)); FileSystem fs = FileSystem.GetLocal(conf); HashSet <string> values = new HashSet <string>(); string m = "AAAAAAAAAAAAAA"; for (int i = 1; i < 11; ++i) { values.AddItem(m); m = m.Replace((char)('A' + i - 1), (char)('A' + i)); } Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified (fs); fs.Delete(testdir, true); Path inFile = new Path(testdir, "nullin/blah"); SequenceFile.Writer w = SequenceFile.CreateWriter(fs, conf, inFile, typeof(NullWritable ), typeof(Text), SequenceFile.CompressionType.None); Text t = new Text(); foreach (string s in values) { t.Set(s); w.Append(NullWritable.Get(), t); } w.Close(); FileInputFormat.SetInputPaths(conf, inFile); FileOutputFormat.SetOutputPath(conf, new Path(testdir, "nullout")); conf.SetMapperClass(typeof(TestMapRed.NullMapper)); conf.SetReducerClass(typeof(IdentityReducer)); conf.SetOutputKeyClass(typeof(NullWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetInputFormat(typeof(SequenceFileInputFormat)); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); conf.SetNumReduceTasks(1); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); JobClient.RunJob(conf); // Since null keys all equal, allow any ordering SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(testdir, "nullout/part-00000" ), conf); m = "AAAAAAAAAAAAAA"; for (int i_1 = 1; r.Next(NullWritable.Get(), t); ++i_1) { NUnit.Framework.Assert.IsTrue("Unexpected value: " + t, values.Remove(t.ToString( ))); m = m.Replace((char)('A' + i_1 - 1), (char)('A' + i_1)); } NUnit.Framework.Assert.IsTrue("Missing values: " + values.ToString(), values.IsEmpty ()); }
/// <summary>Compute sigma</summary> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> internal static void Compute <_T0>(Summation sigma, TaskInputOutputContext <_T0> context ) { string s; Log.Info(s = "sigma=" + sigma); context.SetStatus(s); long start = Runtime.CurrentTimeMillis(); sigma.Compute(); long duration = Runtime.CurrentTimeMillis() - start; TaskResult result = new TaskResult(sigma, duration); Log.Info(s = "result=" + result); context.SetStatus(s); context.Write(NullWritable.Get(), result); }
/// <exception cref="System.Exception"/> public virtual void TestTotalOrderCustomComparator() { TotalOrderPartitioner <Text, NullWritable> partitioner = new TotalOrderPartitioner <Text, NullWritable>(); Configuration conf = new Configuration(); Text[] revSplitStrings = Arrays.CopyOf(splitStrings, splitStrings.Length); Arrays.Sort(revSplitStrings, new TestTotalOrderPartitioner.ReverseStringComparator ()); Path p = TestTotalOrderPartitioner.WritePartitionFile <Text>("totalordercustomcomparator" , conf, revSplitStrings); conf.SetBoolean(TotalOrderPartitioner.NaturalOrder, false); conf.SetClass(MRJobConfig.MapOutputKeyClass, typeof(Text), typeof(object)); conf.SetClass(MRJobConfig.KeyComparator, typeof(TestTotalOrderPartitioner.ReverseStringComparator ), typeof(RawComparator)); AList <TestTotalOrderPartitioner.Check <Text> > revCheck = new AList <TestTotalOrderPartitioner.Check <Text> >(); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aaaaa"), 9)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aaabb"), 9)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aabbb"), 9)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("aaaaa"), 9)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("babbb"), 8)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("baabb"), 8)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("yai"), 1)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("yak"), 1)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("z"), 0)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("ddngo"), 4)); revCheck.AddItem(new TestTotalOrderPartitioner.Check <Text>(new Text("hi"), 3)); try { partitioner.SetConf(conf); NullWritable nw = NullWritable.Get(); foreach (TestTotalOrderPartitioner.Check <Text> chk in revCheck) { NUnit.Framework.Assert.AreEqual(chk.data.ToString(), chk.part, partitioner.GetPartition (chk.data, nw, splitStrings.Length + 1)); } } finally { p.GetFileSystem(conf).Delete(p, true); } }
internal override V CreateValue() { // Explicit check for value class agreement if (null == valueclass) { Type cls = kids[kids.Length - 1].CreateValue().GetType(); for (int i = kids.Length - 1; cls.Equals(typeof(NullWritable)); i--) { cls = kids[i].CreateValue().GetType(); } valueclass = cls.AsSubclass <Writable>(); } if (valueclass.Equals(typeof(NullWritable))) { return((V)NullWritable.Get()); } return((V)ReflectionUtils.NewInstance(valueclass, null)); }
/// <summary><inheritDoc/></summary> /// <exception cref="System.IO.IOException"/> public virtual void ReadFields(DataInput @in) { // No static typeinfo on Tuples int card = WritableUtils.ReadVInt(@in); values = new Writable[card]; ReadBitSet(@in, card, written); Type[] cls = new Type[card]; try { for (int i = 0; i < card; ++i) { cls[i] = Sharpen.Runtime.GetType(Org.Apache.Hadoop.IO.Text.ReadString(@in)).AsSubclass <Writable>(); } for (int i_1 = 0; i_1 < card; ++i_1) { if (cls[i_1].Equals(typeof(NullWritable))) { values[i_1] = NullWritable.Get(); } else { values[i_1] = System.Activator.CreateInstance(cls[i_1]); } if (Has(i_1)) { values[i_1].ReadFields(@in); } } } catch (TypeLoadException e) { throw new IOException("Failed tuple init", e); } catch (MemberAccessException e) { throw new IOException("Failed tuple init", e); } catch (InstantiationException e) { throw new IOException("Failed tuple init", e); } }
public virtual void BinaryProtocolStub() { try { InitSoket(); System.Console.Out.WriteLine("start OK"); // RUN_MAP.code // should be 3 int answer = WritableUtils.ReadVInt(dataInput); System.Console.Out.WriteLine("RunMap:" + answer); TestPipeApplication.FakeSplit split = new TestPipeApplication.FakeSplit(); ReadObject(split, dataInput); WritableUtils.ReadVInt(dataInput); WritableUtils.ReadVInt(dataInput); // end runMap // get InputTypes WritableUtils.ReadVInt(dataInput); string inText = Text.ReadString(dataInput); System.Console.Out.WriteLine("Key class:" + inText); inText = Text.ReadString(dataInput); System.Console.Out.WriteLine("Value class:" + inText); int inCode = 0; // read all data from sender and write to output while ((inCode = WritableUtils.ReadVInt(dataInput)) == 4) { FloatWritable key = new FloatWritable(); NullWritable value = NullWritable.Get(); ReadObject(key, dataInput); System.Console.Out.WriteLine("value:" + key.Get()); ReadObject(value, dataInput); } WritableUtils.WriteVInt(dataOut, 54); dataOut.Flush(); dataOut.Close(); } catch (Exception x) { Sharpen.Runtime.PrintStackTrace(x); } finally { CloseSoket(); } }
// A random task attempt id for testing. /// <exception cref="System.IO.IOException"/> private void WriteOutput(RecordWriter theRecordWriter, Reporter reporter) { NullWritable nullWritable = NullWritable.Get(); try { theRecordWriter.Write(key1, val1); theRecordWriter.Write(null, nullWritable); theRecordWriter.Write(null, val1); theRecordWriter.Write(nullWritable, val2); theRecordWriter.Write(key2, nullWritable); theRecordWriter.Write(key1, null); theRecordWriter.Write(null, null); theRecordWriter.Write(key2, val2); } finally { theRecordWriter.Close(reporter); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> private void WriteOutput(RecordWriter theRecordWriter, TaskAttemptContext context ) { NullWritable nullWritable = NullWritable.Get(); try { theRecordWriter.Write(key1, val1); theRecordWriter.Write(null, nullWritable); theRecordWriter.Write(null, val1); theRecordWriter.Write(nullWritable, val2); theRecordWriter.Write(key2, nullWritable); theRecordWriter.Write(key1, null); theRecordWriter.Write(null, null); theRecordWriter.Write(key2, val2); } finally { theRecordWriter.Close(context); } }
/// <summary>Write a partition file for the given job, using the Sampler provided.</summary> /// <remarks> /// Write a partition file for the given job, using the Sampler provided. /// Queries the sampler for a sample keyset, sorts by the output key /// comparator, selects the keys for each rank, and writes to the destination /// returned from /// <see cref="TotalOrderPartitioner{K, V}.GetPartitionFile(Org.Apache.Hadoop.Conf.Configuration) /// "/> /// . /// </remarks> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> /// <exception cref="System.Exception"/> public static void WritePartitionFile <K, V>(Job job, InputSampler.Sampler <K, V> sampler ) { // getInputFormat, getOutputKeyComparator Configuration conf = job.GetConfiguration(); InputFormat inf = ReflectionUtils.NewInstance(job.GetInputFormatClass(), conf); int numPartitions = job.GetNumReduceTasks(); K[] samples = (K[])sampler.GetSample(inf, job); Log.Info("Using " + samples.Length + " samples"); RawComparator <K> comparator = (RawComparator <K>)job.GetSortComparator(); Arrays.Sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.GetPartitionFile(conf)); FileSystem fs = dst.GetFileSystem(conf); if (fs.Exists(dst)) { fs.Delete(dst, false); } SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, dst, job.GetMapOutputKeyClass (), typeof(NullWritable)); NullWritable nullValue = NullWritable.Get(); float stepSize = samples.Length / (float)numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.Round(stepSize * i); while (last >= k && comparator.Compare(samples[last], samples[k]) == 0) { ++k; } writer.Append(samples[k], nullValue); last = k; } writer.Close(); }
public virtual void TestFormat() { PipesNonJavaInputFormat inputFormat = new PipesNonJavaInputFormat(); JobConf conf = new JobConf(); Reporter reporter = Org.Mockito.Mockito.Mock <Reporter>(); RecordReader <FloatWritable, NullWritable> reader = inputFormat.GetRecordReader(new TestPipeApplication.FakeSplit(), conf, reporter); NUnit.Framework.Assert.AreEqual(0.0f, reader.GetProgress(), 0.001); // input and output files FilePath input1 = new FilePath(workSpace + FilePath.separator + "input1"); if (!input1.GetParentFile().Exists()) { NUnit.Framework.Assert.IsTrue(input1.GetParentFile().Mkdirs()); } if (!input1.Exists()) { NUnit.Framework.Assert.IsTrue(input1.CreateNewFile()); } FilePath input2 = new FilePath(workSpace + FilePath.separator + "input2"); if (!input2.Exists()) { NUnit.Framework.Assert.IsTrue(input2.CreateNewFile()); } // set data for splits conf.Set(FileInputFormat.InputDir, StringUtils.EscapeString(input1.GetAbsolutePath ()) + "," + StringUtils.EscapeString(input2.GetAbsolutePath())); InputSplit[] splits = inputFormat.GetSplits(conf, 2); NUnit.Framework.Assert.AreEqual(2, splits.Length); PipesNonJavaInputFormat.PipesDummyRecordReader dummyRecordReader = new PipesNonJavaInputFormat.PipesDummyRecordReader (conf, splits[0]); // empty dummyRecordReader NUnit.Framework.Assert.IsNull(dummyRecordReader.CreateKey()); NUnit.Framework.Assert.IsNull(dummyRecordReader.CreateValue()); NUnit.Framework.Assert.AreEqual(0, dummyRecordReader.GetPos()); NUnit.Framework.Assert.AreEqual(0.0, dummyRecordReader.GetProgress(), 0.001); // test method next NUnit.Framework.Assert.IsTrue(dummyRecordReader.Next(new FloatWritable(2.0f), NullWritable .Get())); NUnit.Framework.Assert.AreEqual(2.0, dummyRecordReader.GetProgress(), 0.001); dummyRecordReader.Close(); }
/// <summary>Read the next key in a set into <code>key</code>.</summary> /// <remarks> /// Read the next key in a set into <code>key</code>. Returns /// true if such a key exists and false when at the end of the set. /// </remarks> /// <exception cref="System.IO.IOException"/> public virtual bool Next(WritableComparable key) { return(Next(key, NullWritable.Get())); }
// no public ctor /// <summary>Append a key to a set.</summary> /// <remarks> /// Append a key to a set. The key must be strictly greater than the /// previous key added to the set. /// </remarks> /// <exception cref="System.IO.IOException"/> public virtual void Append(WritableComparable key) { Append(key, NullWritable.Get()); }
public virtual void TestCompress() { JobConf job = new JobConf(); job.Set(JobContext.TaskAttemptId, attempt); job.Set(FileOutputFormat.Compress, "true"); FileOutputFormat.SetOutputPath(job, workDir.GetParent().GetParent()); FileOutputFormat.SetWorkOutputPath(job, workDir); FileSystem fs = workDir.GetFileSystem(job); if (!fs.Mkdirs(workDir)) { NUnit.Framework.Assert.Fail("Failed to create output directory"); } string file = "test_compress.txt"; // A reporter that does nothing Reporter reporter = Reporter.Null; TextOutputFormat <object, object> theOutputFormat = new TextOutputFormat <object, object >(); RecordWriter <object, object> theRecordWriter = theOutputFormat.GetRecordWriter(localFs , job, file, reporter); Org.Apache.Hadoop.IO.Text key1 = new Org.Apache.Hadoop.IO.Text("key1"); Org.Apache.Hadoop.IO.Text key2 = new Org.Apache.Hadoop.IO.Text("key2"); Org.Apache.Hadoop.IO.Text val1 = new Org.Apache.Hadoop.IO.Text("val1"); Org.Apache.Hadoop.IO.Text val2 = new Org.Apache.Hadoop.IO.Text("val2"); NullWritable nullWritable = NullWritable.Get(); try { theRecordWriter.Write(key1, val1); theRecordWriter.Write(null, nullWritable); theRecordWriter.Write(null, val1); theRecordWriter.Write(nullWritable, val2); theRecordWriter.Write(key2, nullWritable); theRecordWriter.Write(key1, null); theRecordWriter.Write(null, null); theRecordWriter.Write(key2, val2); } finally { theRecordWriter.Close(reporter); } StringBuilder expectedOutput = new StringBuilder(); expectedOutput.Append(key1).Append("\t").Append(val1).Append("\n"); expectedOutput.Append(val1).Append("\n"); expectedOutput.Append(val2).Append("\n"); expectedOutput.Append(key2).Append("\n"); expectedOutput.Append(key1).Append("\n"); expectedOutput.Append(key2).Append("\t").Append(val2).Append("\n"); DefaultCodec codec = new DefaultCodec(); codec.SetConf(job); Path expectedFile = new Path(workDir, file + codec.GetDefaultExtension()); FileInputStream istream = new FileInputStream(expectedFile.ToString()); CompressionInputStream cistream = codec.CreateInputStream(istream); LineReader reader = new LineReader(cistream); string output = string.Empty; Org.Apache.Hadoop.IO.Text @out = new Org.Apache.Hadoop.IO.Text(); while (reader.ReadLine(@out) > 0) { output += @out; output += "\n"; } reader.Close(); NUnit.Framework.Assert.AreEqual(expectedOutput.ToString(), output); }