/// <exception cref="System.Exception"/> public virtual void TestAppendSort() { GenericTestUtils.AssumeInNativeProfile(); Path file = new Path(RootPath, "testseqappendSort.seq"); fs.Delete(file, true); Path sortedFile = new Path(RootPath, "testseqappendSort.seq.sort"); fs.Delete(sortedFile, true); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new JavaSerializationComparator <long>(), typeof(long), typeof(string), conf); SequenceFile.Writer.Option compressOption = SequenceFile.Writer.Compression(SequenceFile.CompressionType .Block, new GzipCodec()); SequenceFile.Writer writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer. File(file), SequenceFile.Writer.KeyClass(typeof(long)), SequenceFile.Writer.ValueClass (typeof(string)), compressOption); writer.Append(2L, "two"); writer.Append(1L, "one"); writer.Close(); writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer .AppendIfExists(true), compressOption); writer.Append(4L, "four"); writer.Append(3L, "three"); writer.Close(); // Sort file after append sorter.Sort(file, sortedFile); VerifyAll4Values(sortedFile); fs.DeleteOnExit(file); fs.DeleteOnExit(sortedFile); }
/// <exception cref="System.IO.IOException"/> public virtual void TestClose() { Configuration conf = new Configuration(); LocalFileSystem fs = FileSystem.GetLocal(conf); // create a sequence file 1 Path path1 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test1.seq"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, path1, typeof(Text ), typeof(NullWritable), SequenceFile.CompressionType.Block); writer.Append(new Text("file1-1"), NullWritable.Get()); writer.Append(new Text("file1-2"), NullWritable.Get()); writer.Close(); Path path2 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test2.seq"); writer = SequenceFile.CreateWriter(fs, conf, path2, typeof(Text), typeof(NullWritable ), SequenceFile.CompressionType.Block); writer.Append(new Text("file2-1"), NullWritable.Get()); writer.Append(new Text("file2-2"), NullWritable.Get()); writer.Close(); // Create a reader which uses 4 BuiltInZLibInflater instances SequenceFile.Reader reader = new SequenceFile.Reader(fs, path1, conf); // Returns the 4 BuiltInZLibInflater instances to the CodecPool reader.Close(); // The second close _could_ erroneously returns the same // 4 BuiltInZLibInflater instances to the CodecPool again reader.Close(); // The first reader gets 4 BuiltInZLibInflater instances from the CodecPool SequenceFile.Reader reader1 = new SequenceFile.Reader(fs, path1, conf); // read first value from reader1 Text text = new Text(); reader1.Next(text); Assert.Equal("file1-1", text.ToString()); // The second reader _could_ get the same 4 BuiltInZLibInflater // instances from the CodePool as reader1 SequenceFile.Reader reader2 = new SequenceFile.Reader(fs, path2, conf); // read first value from reader2 reader2.Next(text); Assert.Equal("file2-1", text.ToString()); // read second value from reader1 reader1.Next(text); Assert.Equal("file1-2", text.ToString()); // read second value from reader2 (this throws an exception) reader2.Next(text); Assert.Equal("file2-2", text.ToString()); NUnit.Framework.Assert.IsFalse(reader1.Next(text)); NUnit.Framework.Assert.IsFalse(reader2.Next(text)); }
/// <summary>Create control files before a test run.</summary> /// <remarks> /// Create control files before a test run. /// Number of files created is equal to the number of maps specified /// </remarks> /// <exception cref="System.IO.IOException">on error</exception> private static void CreateControlFiles() { FileSystem tempFS = FileSystem.Get(config); Log.Info("Creating " + numberOfMaps + " control files"); for (int i = 0; i < numberOfMaps; i++) { string strFileName = "NNBench_Controlfile_" + i; Path filePath = new Path(new Path(baseDir, ControlDirName), strFileName); SequenceFile.Writer writer = null; try { writer = SequenceFile.CreateWriter(tempFS, config, filePath, typeof(Text), typeof( LongWritable), SequenceFile.CompressionType.None); writer.Append(new Text(strFileName), new LongWritable(0l)); } finally { if (writer != null) { writer.Close(); } } } }
public virtual void Configure() { Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.Get(conf); fs.Delete(testdir, true); conf.SetInt(JobContext.IoSortMb, 1); conf.SetInputFormat(typeof(SequenceFileInputFormat)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetMapperClass(typeof(TestMapOutputType.TextGen)); conf.SetReducerClass(typeof(TestMapOutputType.TextReduce)); conf.SetOutputKeyClass(typeof(Text)); conf.SetOutputValueClass(typeof(Text)); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } Path inFile = new Path(inDir, "part0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text ), typeof(Text)); writer.Append(new Text("rec: 1"), new Text("Hello")); writer.Close(); jc = new JobClient(conf); }
/// <exception cref="System.IO.IOException"/> private static void CreateFiles(int length, int numFiles, Random random, Job job) { TestCombineSequenceFileInputFormat.Range[] ranges = CreateRanges(length, numFiles , random); for (int i = 0; i < numFiles; i++) { Path file = new Path(workDir, "test_" + i + ".seq"); // create a file with length entries SequenceFile.Writer writer = SequenceFile.CreateWriter(localFs, job.GetConfiguration (), file, typeof(IntWritable), typeof(BytesWritable)); TestCombineSequenceFileInputFormat.Range range = ranges[i]; try { for (int j = range.start; j < range.end; j++) { IntWritable key = new IntWritable(j); byte[] data = new byte[random.Next(10)]; random.NextBytes(data); BytesWritable value = new BytesWritable(data); writer.Append(key, value); } } finally { writer.Close(); } } }
/// <exception cref="System.IO.IOException"/> private static void CreateControlFile(FileSystem fs, int fileSize, int nrFiles) { // in MB Log.Info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files" ); fs.Delete(ControlDir, true); for (int i = 0; i < nrFiles; i++) { string name = GetFileName(i); Path controlFile = new Path(ControlDir, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.CreateWriter(fs, fsConfig, controlFile, typeof(Text), typeof( LongWritable), SequenceFile.CompressionType.None); writer.Append(new Text(name), new LongWritable(fileSize)); } catch (Exception e) { throw new IOException(e.GetLocalizedMessage()); } finally { if (writer != null) { writer.Close(); } writer = null; } } Log.Info("created control files for: " + nrFiles + " files"); }
/// <exception cref="System.IO.IOException"/> private static Path WritePartitionFile <T>(string testname, Configuration conf, T[] splits) where T : WritableComparable <object> { FileSystem fs = FileSystem.GetLocal(conf); Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified (fs); Path p = new Path(testdir, testname + "/_partition.lst"); TotalOrderPartitioner.SetPartitionFile(conf, p); conf.SetInt(MRJobConfig.NumReduces, splits.Length + 1); SequenceFile.Writer w = null; try { w = SequenceFile.CreateWriter(fs, conf, p, splits[0].GetType(), typeof(NullWritable ), SequenceFile.CompressionType.None); for (int i = 0; i < splits.Length; ++i) { w.Append(splits[i], NullWritable.Get()); } } finally { if (null != w) { w.Close(); } } return(p); }
public virtual void TestSequenceFileSync() { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).Build(); FileSystem fs = cluster.GetFileSystem(); Path p = new Path("/testSequenceFileSync/foo"); int len = 1 << 16; FSDataOutputStream @out = fs.Create(p, FsPermission.GetDefault(), EnumSet.Of(CreateFlag .Create, CreateFlag.Overwrite, CreateFlag.SyncBlock), 4096, (short)1, len, null); SequenceFile.Writer w = SequenceFile.CreateWriter(new Configuration(), SequenceFile.Writer .Stream(@out), SequenceFile.Writer.KeyClass(typeof(RandomDatum)), SequenceFile.Writer .ValueClass(typeof(RandomDatum)), SequenceFile.Writer.Compression(SequenceFile.CompressionType .None, new DefaultCodec())); w.Hflush(); CheckSyncMetric(cluster, 0); w.Hsync(); CheckSyncMetric(cluster, 1); int seed = new Random().Next(); RandomDatum.Generator generator = new RandomDatum.Generator(seed); generator.Next(); w.Append(generator.GetKey(), generator.GetValue()); w.Hsync(); CheckSyncMetric(cluster, 2); w.Close(); CheckSyncMetric(cluster, 2); @out.Close(); CheckSyncMetric(cluster, 3); cluster.Shutdown(); }
public virtual void Configure() { Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.Get(conf); fs.Delete(testdir, true); conf.SetInputFormat(typeof(SequenceFileInputFormat)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetOutputKeyClass(typeof(IntWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapOutputValueClass(typeof(IntWritable)); // set up two map jobs, so we can test merge phase in Reduce also conf.SetNumMapTasks(2); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } // set up input data in 2 files Path inFile = new Path(inDir, "part0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(IntWritable ), typeof(IntWritable)); writer.Append(new IntWritable(11), new IntWritable(999)); writer.Append(new IntWritable(23), new IntWritable(456)); writer.Append(new IntWritable(10), new IntWritable(780)); writer.Close(); inFile = new Path(inDir, "part1"); writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(IntWritable), typeof( IntWritable)); writer.Append(new IntWritable(45), new IntWritable(100)); writer.Append(new IntWritable(18), new IntWritable(200)); writer.Append(new IntWritable(27), new IntWritable(300)); writer.Close(); jc = new JobClient(conf); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="Sharpen.TimeoutException"/> /// <exception cref="System.Exception"/> internal static void WriteFile(NameNode namenode, Configuration conf, Path name, short replication) { FileSystem fileSys = FileSystem.Get(conf); SequenceFile.Writer writer = SequenceFile.CreateWriter(fileSys, conf, name, typeof( BytesWritable), typeof(BytesWritable), SequenceFile.CompressionType.None); writer.Append(new BytesWritable(), new BytesWritable()); writer.Close(); fileSys.SetReplication(name, replication); DFSTestUtil.WaitReplication(fileSys, name, replication); }
/// <summary>Test that makes sure the FileSystem passed to createWriter</summary> /// <exception cref="System.Exception"/> public virtual void TestCreateUsesFsArg() { FileSystem fs = FileSystem.GetLocal(conf); FileSystem spyFs = Org.Mockito.Mockito.Spy(fs); Path p = new Path(Runtime.GetProperty("test.build.data", ".") + "/testCreateUsesFSArg.seq" ); SequenceFile.Writer writer = SequenceFile.CreateWriter(spyFs, conf, p, typeof(NullWritable ), typeof(NullWritable)); writer.Close(); Org.Mockito.Mockito.Verify(spyFs).GetDefaultReplication(p); }
public virtual void TestNullKeys() { JobConf conf = new JobConf(typeof(TestMapRed)); FileSystem fs = FileSystem.GetLocal(conf); HashSet <string> values = new HashSet <string>(); string m = "AAAAAAAAAAAAAA"; for (int i = 1; i < 11; ++i) { values.AddItem(m); m = m.Replace((char)('A' + i - 1), (char)('A' + i)); } Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified (fs); fs.Delete(testdir, true); Path inFile = new Path(testdir, "nullin/blah"); SequenceFile.Writer w = SequenceFile.CreateWriter(fs, conf, inFile, typeof(NullWritable ), typeof(Text), SequenceFile.CompressionType.None); Text t = new Text(); foreach (string s in values) { t.Set(s); w.Append(NullWritable.Get(), t); } w.Close(); FileInputFormat.SetInputPaths(conf, inFile); FileOutputFormat.SetOutputPath(conf, new Path(testdir, "nullout")); conf.SetMapperClass(typeof(TestMapRed.NullMapper)); conf.SetReducerClass(typeof(IdentityReducer)); conf.SetOutputKeyClass(typeof(NullWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetInputFormat(typeof(SequenceFileInputFormat)); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); conf.SetNumReduceTasks(1); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); JobClient.RunJob(conf); // Since null keys all equal, allow any ordering SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(testdir, "nullout/part-00000" ), conf); m = "AAAAAAAAAAAAAA"; for (int i_1 = 1; r.Next(NullWritable.Get(), t); ++i_1) { NUnit.Framework.Assert.IsTrue("Unexpected value: " + t, values.Remove(t.ToString( ))); m = m.Replace((char)('A' + i_1 - 1), (char)('A' + i_1)); } NUnit.Framework.Assert.IsTrue("Missing values: " + values.ToString(), values.IsEmpty ()); }
/// <summary>Reduce task done, write output to a file.</summary> /// <exception cref="System.IO.IOException"/> protected override void Cleanup(Reducer.Context context) { //write output to a file Configuration conf = context.GetConfiguration(); Path outDir = new Path(conf.Get(FileOutputFormat.Outdir)); Path outFile = new Path(outDir, "reduce-out"); FileSystem fileSys = FileSystem.Get(conf); SequenceFile.Writer writer = SequenceFile.CreateWriter(fileSys, conf, outFile, typeof( LongWritable), typeof(LongWritable), SequenceFile.CompressionType.None); writer.Append(new LongWritable(numInside), new LongWritable(numOutside)); writer.Close(); }
/// <exception cref="System.IO.IOException"/> public static void WriteSequenceFile(SequenceFile.Writer writer, int numRecords) { IntWritable key = new IntWritable(); Text val = new Text(); for (int numWritten = 0; numWritten < numRecords; ++numWritten) { key.Set(numWritten); RandomText(val, numWritten, Recordsize); writer.Append(key, val); } writer.Close(); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> /// <exception cref="InstantiationException"/> /// <exception cref="System.MemberAccessException"/> private static void SequenceFileCodecTest(Configuration conf, int lines, string codecClass , int blockSize) { Path filePath = new Path("SequenceFileCodecTest." + codecClass); // Configuration conf.SetInt("io.seqfile.compress.blocksize", blockSize); // Create the SequenceFile FileSystem fs = FileSystem.Get(conf); Log.Info("Creating SequenceFile with codec \"" + codecClass + "\""); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, filePath, typeof( Text), typeof(Text), SequenceFile.CompressionType.Block, (CompressionCodec)System.Activator.CreateInstance (Runtime.GetType(codecClass))); // Write some data Log.Info("Writing to SequenceFile..."); for (int i = 0; i < lines; i++) { Text key = new Text("key" + i); Text value = new Text("value" + i); writer.Append(key, value); } writer.Close(); // Read the data back and check Log.Info("Reading from the SequenceFile..."); SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf); Writable key_1 = (Writable)System.Activator.CreateInstance(reader.GetKeyClass()); Writable value_1 = (Writable)System.Activator.CreateInstance(reader.GetValueClass ()); int lc = 0; try { while (reader.Next(key_1, value_1)) { Assert.Equal("key" + lc, key_1.ToString()); Assert.Equal("value" + lc, value_1.ToString()); lc++; } } finally { reader.Close(); } Assert.Equal(lines, lc); // Delete temporary files fs.Delete(filePath, false); Log.Info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass + "\""); }
public virtual void RunJob(int items) { try { JobConf conf = new JobConf(typeof(TestMapRed)); Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.Get(conf); fs.Delete(testdir, true); conf.SetInt(JobContext.IoSortMb, 1); conf.SetInputFormat(typeof(SequenceFileInputFormat)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetMapperClass(typeof(IdentityMapper)); conf.SetReducerClass(typeof(IdentityReducer)); conf.SetOutputKeyClass(typeof(Text)); conf.SetOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } Path inFile = new Path(inDir, "part0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text ), typeof(Text)); StringBuilder content = new StringBuilder(); for (int i = 0; i < 1000; i++) { content.Append(i).Append(": This is one more line of content\n"); } Org.Apache.Hadoop.IO.Text text = new Org.Apache.Hadoop.IO.Text(content.ToString() ); for (int i_1 = 0; i_1 < items; i_1++) { writer.Append(new Org.Apache.Hadoop.IO.Text("rec:" + i_1), text); } writer.Close(); JobClient.RunJob(conf); } catch (Exception e) { NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false); } }
/// <exception cref="System.Exception"/> public virtual void TestAppendRecordCompression() { GenericTestUtils.AssumeInNativeProfile(); Path file = new Path(RootPath, "testseqappendblockcompr.seq"); fs.Delete(file, true); SequenceFile.Writer.Option compressOption = SequenceFile.Writer.Compression(SequenceFile.CompressionType .Record, new GzipCodec()); SequenceFile.Writer writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer. File(file), SequenceFile.Writer.KeyClass(typeof(long)), SequenceFile.Writer.ValueClass (typeof(string)), compressOption); writer.Append(1L, "one"); writer.Append(2L, "two"); writer.Close(); Verify2Values(file); writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer .AppendIfExists(true), compressOption); writer.Append(3L, "three"); writer.Append(4L, "four"); writer.Close(); VerifyAll4Values(file); fs.DeleteOnExit(file); }
/// <exception cref="System.IO.IOException"/> public virtual void RunTest(SequenceFile.CompressionType compressionType) { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Path tempDir = new Path(dir, "tmp"); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); fs.Mkdirs(tempDir); LongWritable tkey = new LongWritable(); Text tval = new Text(); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, job, file, typeof(LongWritable ), typeof(Text), compressionType, new DefaultCodec()); try { for (int i = 0; i < Records; ++i) { tkey.Set(1234); tval.Set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue"); writer.Append(tkey, tval); } } finally { writer.Close(); } long fileLength = fs.GetFileStatus(file).GetLen(); Log.Info("With compression = " + compressionType + ": " + "compressed length = " + fileLength); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.GetOutputKeyComparator (), job.GetMapOutputKeyClass(), job.GetMapOutputValueClass(), job); Path[] paths = new Path[] { file }; SequenceFile.Sorter.RawKeyValueIterator rIter = sorter.Merge(paths, tempDir, false ); int count = 0; while (rIter.Next()) { count++; } NUnit.Framework.Assert.AreEqual(Records, count); NUnit.Framework.Assert.AreEqual(1.0f, rIter.GetProgress().Get()); }
/// <exception cref="System.IO.IOException"/> private static void CreateBigMapInputFile(Configuration conf, FileSystem fs, Path dir, long fileSizeInMB) { // Check if the input path exists and is non-empty if (fs.Exists(dir)) { FileStatus[] list = fs.ListStatus(dir); if (list.Length > 0) { throw new IOException("Input path: " + dir + " already exists... "); } } Path file = new Path(dir, "part-0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(BytesWritable ), typeof(BytesWritable), SequenceFile.CompressionType.None); long numBytesToWrite = fileSizeInMB * 1024 * 1024; int minKeySize = conf.GetInt(MinKey, 10); int keySizeRange = conf.GetInt(MaxKey, 1000) - minKeySize; int minValueSize = conf.GetInt(MinValue, 0); int valueSizeRange = conf.GetInt(MaxValue, 20000) - minValueSize; BytesWritable randomKey = new BytesWritable(); BytesWritable randomValue = new BytesWritable(); Log.Info("Writing " + numBytesToWrite + " bytes to " + file + " with " + "minKeySize: " + minKeySize + " keySizeRange: " + keySizeRange + " minValueSize: " + minValueSize + " valueSizeRange: " + valueSizeRange); long start = Runtime.CurrentTimeMillis(); while (numBytesToWrite > 0) { int keyLength = minKeySize + (keySizeRange != 0 ? random.Next(keySizeRange) : 0); randomKey.SetSize(keyLength); RandomizeBytes(randomKey.GetBytes(), 0, randomKey.GetLength()); int valueLength = minValueSize + (valueSizeRange != 0 ? random.Next(valueSizeRange ) : 0); randomValue.SetSize(valueLength); RandomizeBytes(randomValue.GetBytes(), 0, randomValue.GetLength()); writer.Append(randomKey, randomValue); numBytesToWrite -= keyLength + valueLength; } writer.Close(); long end = Runtime.CurrentTimeMillis(); Log.Info("Created " + file + " of size: " + fileSizeInMB + "MB in " + (end - start ) / 1000 + "secs"); }
/// <exception cref="System.IO.IOException"/> private void WriteMetadataTest(FileSystem fs, int count, int seed, Path file, SequenceFile.CompressionType compressionType, CompressionCodec codec, SequenceFile.Metadata metadata) { fs.Delete(file, true); Log.Info("creating " + count + " records with metadata and with " + compressionType + " compression"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(RandomDatum ), typeof(RandomDatum), compressionType, codec, null, metadata); RandomDatum.Generator generator = new RandomDatum.Generator(seed); for (int i = 0; i < count; i++) { generator.Next(); RandomDatum key = generator.GetKey(); RandomDatum value = generator.GetValue(); writer.Append(key, value); } writer.Close(); }
/// <exception cref="System.Exception"/> public virtual void TestJavaSerialization() { Path file = new Path(Runtime.GetProperty("test.build.data", ".") + "/testseqser.seq" ); fs.Delete(file, true); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(long ), typeof(string)); writer.Append(1L, "one"); writer.Append(2L, "two"); writer.Close(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf); Assert.Equal(1L, reader.Next((object)null)); Assert.Equal("one", reader.GetCurrentValue((object)null)); Assert.Equal(2L, reader.Next((object)null)); Assert.Equal("two", reader.GetCurrentValue((object)null)); NUnit.Framework.Assert.IsNull(reader.Next((object)null)); reader.Close(); }
// clean up after all to restore the system state /// <exception cref="System.IO.IOException"/> private void CreateInputFile(string rootName) { Cleanup(); // clean up if previous run failed Path inputFile = new Path(MapInputDir, "in_file"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, fsConfig, inputFile, typeof( Text), typeof(LongWritable), SequenceFile.CompressionType.None); try { nrFiles = 0; ListSubtree(new Path(rootName), writer); } finally { writer.Close(); } Log.Info("Created map input files."); }
/// <exception cref="System.Exception"/> private static void CreateSequenceFile(int numRecords) { // create a file with length entries SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text ), typeof(BytesWritable)); try { for (int i = 1; i <= numRecords; i++) { Text key = new Text(Sharpen.Extensions.ToString(i)); byte[] data = new byte[random.Next(10)]; random.NextBytes(data); BytesWritable value = new BytesWritable(data); writer.Append(key, value); } } finally { writer.Close(); } }
/// <summary>Write a partition file for the given job, using the Sampler provided.</summary> /// <remarks> /// Write a partition file for the given job, using the Sampler provided. /// Queries the sampler for a sample keyset, sorts by the output key /// comparator, selects the keys for each rank, and writes to the destination /// returned from /// <see cref="TotalOrderPartitioner{K, V}.GetPartitionFile(Org.Apache.Hadoop.Conf.Configuration) /// "/> /// . /// </remarks> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> /// <exception cref="System.Exception"/> public static void WritePartitionFile <K, V>(Job job, InputSampler.Sampler <K, V> sampler ) { // getInputFormat, getOutputKeyComparator Configuration conf = job.GetConfiguration(); InputFormat inf = ReflectionUtils.NewInstance(job.GetInputFormatClass(), conf); int numPartitions = job.GetNumReduceTasks(); K[] samples = (K[])sampler.GetSample(inf, job); Log.Info("Using " + samples.Length + " samples"); RawComparator <K> comparator = (RawComparator <K>)job.GetSortComparator(); Arrays.Sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.GetPartitionFile(conf)); FileSystem fs = dst.GetFileSystem(conf); if (fs.Exists(dst)) { fs.Delete(dst, false); } SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, dst, job.GetMapOutputKeyClass (), typeof(NullWritable)); NullWritable nullValue = NullWritable.Get(); float stepSize = samples.Length / (float)numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.Round(stepSize * i); while (last >= k && comparator.Compare(samples[last], samples[k]) == 0) { ++k; } writer.Append(samples[k], nullValue); last = k; } writer.Close(); }
/// <exception cref="System.IO.IOException"/> internal virtual void CreateTempFile(Path p, Configuration conf) { SequenceFile.Writer writer = null; try { writer = SequenceFile.CreateWriter(fs, conf, p, typeof(Text), typeof(Text), SequenceFile.CompressionType .None); writer.Append(new Text("text"), new Text("moretext")); } catch (Exception e) { throw new IOException(e.GetLocalizedMessage()); } finally { if (writer != null) { writer.Close(); } writer = null; } Log.Info("created: " + p); }
/// <exception cref="System.Exception"/> public static void CreateControlFile(FileSystem fs, long megaBytes, int numFiles, long seed) { Log.Info("creating control file: " + megaBytes + " bytes, " + numFiles + " files" ); Path controlFile = new Path(ControlDir, "files"); fs.Delete(controlFile, true); Random random = new Random(seed); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, controlFile, typeof( Text), typeof(LongWritable), SequenceFile.CompressionType.None); long totalSize = 0; long maxSize = ((megaBytes / numFiles) * 2) + 1; try { while (totalSize < megaBytes) { Text name = new Text(System.Convert.ToString(random.NextLong())); long size = random.NextLong(); if (size < 0) { size = -size; } size = size % maxSize; //LOG.info(" adding: name="+name+" size="+size); writer.Append(name, new LongWritable(size)); totalSize += size; } } finally { writer.Close(); } Log.Info("created control file for: " + totalSize + " bytes"); }
/// <exception cref="System.IO.IOException"/> public virtual void Close() { writer.Close(); fsdos.Close(); }
/// <exception cref="System.Exception"/> private static void Launch() { // // Generate distribution of ints. This is the answer key. // Configuration conf = new Configuration(); int countsToGo = counts; int[] dist = new int[range]; for (int i = 0; i < range; i++) { double avgInts = (1.0 * countsToGo) / (range - i); dist[i] = (int)Math.Max(0, Math.Round(avgInts + (Math.Sqrt(avgInts) * r.NextGaussian ()))); countsToGo -= dist[i]; } if (countsToGo > 0) { dist[dist.Length - 1] += countsToGo; } // // Write the answer key to a file. // Path testdir = new Path(TestDir.GetAbsolutePath()); if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } Path randomIns = new Path(testdir, "genins"); if (!fs.Mkdirs(randomIns)) { throw new IOException("Mkdirs failed to create " + randomIns.ToString()); } Path answerkey = new Path(randomIns, "answer.key"); SequenceFile.Writer @out = SequenceFile.CreateWriter(fs, conf, answerkey, typeof( IntWritable), typeof(IntWritable), SequenceFile.CompressionType.None); try { for (int i_1 = 0; i_1 < range; i_1++) { @out.Append(new IntWritable(i_1), new IntWritable(dist[i_1])); } } finally { @out.Close(); } PrintFiles(randomIns, conf); // // Now we need to generate the random numbers according to // the above distribution. // // We create a lot of map tasks, each of which takes at least // one "line" of the distribution. (That is, a certain number // X is to be generated Y number of times.) // // A map task emits Y key/val pairs. The val is X. The key // is a randomly-generated number. // // The reduce task gets its input sorted by key. That is, sorted // in random order. It then emits a single line of text that // for the given values. It does not emit the key. // // Because there's just one reduce task, we emit a single big // file of random numbers. // Path randomOuts = new Path(testdir, "genouts"); fs.Delete(randomOuts, true); Job genJob = Job.GetInstance(conf); FileInputFormat.SetInputPaths(genJob, randomIns); genJob.SetInputFormatClass(typeof(SequenceFileInputFormat)); genJob.SetMapperClass(typeof(TestMapReduce.RandomGenMapper)); FileOutputFormat.SetOutputPath(genJob, randomOuts); genJob.SetOutputKeyClass(typeof(IntWritable)); genJob.SetOutputValueClass(typeof(IntWritable)); genJob.SetReducerClass(typeof(TestMapReduce.RandomGenReducer)); genJob.SetNumReduceTasks(1); genJob.WaitForCompletion(true); PrintFiles(randomOuts, conf); // // Next, we read the big file in and regenerate the // original map. It's split into a number of parts. // (That number is 'intermediateReduces'.) // // We have many map tasks, each of which read at least one // of the output numbers. For each number read in, the // map task emits a key/value pair where the key is the // number and the value is "1". // // We have a single reduce task, which receives its input // sorted by the key emitted above. For each key, there will // be a certain number of "1" values. The reduce task sums // these values to compute how many times the given key was // emitted. // // The reduce task then emits a key/val pair where the key // is the number in question, and the value is the number of // times the key was emitted. This is the same format as the // original answer key (except that numbers emitted zero times // will not appear in the regenerated key.) The answer set // is split into a number of pieces. A final MapReduce job // will merge them. // // There's not really a need to go to 10 reduces here // instead of 1. But we want to test what happens when // you have multiple reduces at once. // int intermediateReduces = 10; Path intermediateOuts = new Path(testdir, "intermediateouts"); fs.Delete(intermediateOuts, true); Job checkJob = Job.GetInstance(conf); FileInputFormat.SetInputPaths(checkJob, randomOuts); checkJob.SetMapperClass(typeof(TestMapReduce.RandomCheckMapper)); FileOutputFormat.SetOutputPath(checkJob, intermediateOuts); checkJob.SetOutputKeyClass(typeof(IntWritable)); checkJob.SetOutputValueClass(typeof(IntWritable)); checkJob.SetOutputFormatClass(typeof(MapFileOutputFormat)); checkJob.SetReducerClass(typeof(TestMapReduce.RandomCheckReducer)); checkJob.SetNumReduceTasks(intermediateReduces); checkJob.WaitForCompletion(true); PrintFiles(intermediateOuts, conf); // // OK, now we take the output from the last job and // merge it down to a single file. The map() and reduce() // functions don't really do anything except reemit tuples. // But by having a single reduce task here, we end up merging // all the files. // Path finalOuts = new Path(testdir, "finalouts"); fs.Delete(finalOuts, true); Job mergeJob = Job.GetInstance(conf); FileInputFormat.SetInputPaths(mergeJob, intermediateOuts); mergeJob.SetInputFormatClass(typeof(SequenceFileInputFormat)); mergeJob.SetMapperClass(typeof(TestMapReduce.MergeMapper)); FileOutputFormat.SetOutputPath(mergeJob, finalOuts); mergeJob.SetOutputKeyClass(typeof(IntWritable)); mergeJob.SetOutputValueClass(typeof(IntWritable)); mergeJob.SetOutputFormatClass(typeof(SequenceFileOutputFormat)); mergeJob.SetReducerClass(typeof(TestMapReduce.MergeReducer)); mergeJob.SetNumReduceTasks(1); mergeJob.WaitForCompletion(true); PrintFiles(finalOuts, conf); // // Finally, we compare the reconstructed answer key with the // original one. Remember, we need to ignore zero-count items // in the original key. // bool success = true; Path recomputedkey = new Path(finalOuts, "part-r-00000"); SequenceFile.Reader @in = new SequenceFile.Reader(fs, recomputedkey, conf); int totalseen = 0; try { IntWritable key = new IntWritable(); IntWritable val = new IntWritable(); for (int i_1 = 0; i_1 < range; i_1++) { if (dist[i_1] == 0) { continue; } if ([email protected](key, val)) { System.Console.Error.WriteLine("Cannot read entry " + i_1); success = false; break; } else { if (!((key.Get() == i_1) && (val.Get() == dist[i_1]))) { System.Console.Error.WriteLine("Mismatch! Pos=" + key.Get() + ", i=" + i_1 + ", val=" + val.Get() + ", dist[i]=" + dist[i_1]); success = false; } totalseen += val.Get(); } } if (success) { if (@in.Next(key, val)) { System.Console.Error.WriteLine("Unnecessary lines in recomputed key!"); success = false; } } } finally { @in.Close(); } int originalTotal = 0; for (int i_2 = 0; i_2 < dist.Length; i_2++) { originalTotal += dist[i_2]; } System.Console.Out.WriteLine("Original sum: " + originalTotal); System.Console.Out.WriteLine("Recomputed sum: " + totalseen); // // Write to "results" whether the test succeeded or not. // Path resultFile = new Path(testdir, "results"); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.Create(resultFile ))); try { bw.Write("Success=" + success + "\n"); System.Console.Out.WriteLine("Success=" + success); } finally { bw.Close(); } NUnit.Framework.Assert.IsTrue("testMapRed failed", success); fs.Delete(testdir, true); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public virtual void TestBinary() { Job job = Job.GetInstance(); FileSystem fs = FileSystem.GetLocal(job.GetConfiguration()); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "testbinary.seq"); Random r = new Random(); long seed = r.NextLong(); r.SetSeed(seed); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); Text tkey = new Text(); Text tval = new Text(); SequenceFile.Writer writer = new SequenceFile.Writer(fs, job.GetConfiguration(), file, typeof(Text), typeof(Text)); try { for (int i = 0; i < Records; ++i) { tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); writer.Append(tkey, tval); } } finally { writer.Close(); } TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); InputFormat <BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat (); int count = 0; r.SetSeed(seed); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); Text cmpkey = new Text(); Text cmpval = new Text(); DataInputBuffer buf = new DataInputBuffer(); FileInputFormat.SetInputPaths(job, file); foreach (InputSplit split in bformat.GetSplits(job)) { RecordReader <BytesWritable, BytesWritable> reader = bformat.CreateRecordReader(split , context); MapContext <BytesWritable, BytesWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl <BytesWritable, BytesWritable, BytesWritable, BytesWritable>(job .GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil .CreateDummyReporter(), split); reader.Initialize(split, mcontext); try { while (reader.NextKeyValue()) { bkey = reader.GetCurrentKey(); bval = reader.GetCurrentValue(); tkey.Set(Sharpen.Extensions.ToString(r.Next(), 36)); tval.Set(System.Convert.ToString(r.NextLong(), 36)); buf.Reset(bkey.GetBytes(), bkey.GetLength()); cmpkey.ReadFields(buf); buf.Reset(bval.GetBytes(), bval.GetLength()); cmpval.ReadFields(buf); NUnit.Framework.Assert.IsTrue("Keys don't match: " + "*" + cmpkey.ToString() + ":" + tkey.ToString() + "*", cmpkey.ToString().Equals(tkey.ToString())); NUnit.Framework.Assert.IsTrue("Vals don't match: " + "*" + cmpval.ToString() + ":" + tval.ToString() + "*", cmpval.ToString().Equals(tval.ToString())); ++count; } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some records not found", Records, count); }
/// <summary>Run a map/reduce job for estimating Pi.</summary> /// <returns>the estimated value of Pi</returns> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> /// <exception cref="System.Exception"/> public static BigDecimal EstimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) { Job job = Job.GetInstance(conf); //setup job conf job.SetJobName(typeof(QuasiMonteCarlo).Name); job.SetJarByClass(typeof(QuasiMonteCarlo)); job.SetInputFormatClass(typeof(SequenceFileInputFormat)); job.SetOutputKeyClass(typeof(BooleanWritable)); job.SetOutputValueClass(typeof(LongWritable)); job.SetOutputFormatClass(typeof(SequenceFileOutputFormat)); job.SetMapperClass(typeof(QuasiMonteCarlo.QmcMapper)); job.SetReducerClass(typeof(QuasiMonteCarlo.QmcReducer)); job.SetNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.SetSpeculativeExecution(false); //setup input/output directories Path inDir = new Path(tmpDir, "in"); Path outDir = new Path(tmpDir, "out"); FileInputFormat.SetInputPaths(job, inDir); FileOutputFormat.SetOutputPath(job, outDir); FileSystem fs = FileSystem.Get(conf); if (fs.Exists(tmpDir)) { throw new IOException("Tmp directory " + fs.MakeQualified(tmpDir) + " already exists. Please remove it first." ); } if (!fs.Mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { Path file = new Path(inDir, "part" + i); LongWritable offset = new LongWritable(i * numPoints); LongWritable size = new LongWritable(numPoints); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(LongWritable ), typeof(LongWritable), SequenceFile.CompressionType.None); try { writer.Append(offset, size); } finally { writer.Close(); } System.Console.Out.WriteLine("Wrote input for Map #" + i); } //start a map/reduce job System.Console.Out.WriteLine("Starting Job"); long startTime = Runtime.CurrentTimeMillis(); job.WaitForCompletion(true); double duration = (Runtime.CurrentTimeMillis() - startTime) / 1000.0; System.Console.Out.WriteLine("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); try { reader.Next(numInside, numOutside); } finally { reader.Close(); } //compute estimated value BigDecimal numTotal = BigDecimal.ValueOf(numMaps).Multiply(BigDecimal.ValueOf(numPoints )); return(BigDecimal.ValueOf(4).SetScale(20).Multiply(BigDecimal.ValueOf(numInside.Get ())).Divide(numTotal, RoundingMode.HalfUp)); } finally { fs.Delete(tmpDir, true); } }