/// <exception cref="System.IO.IOException"/> public override RecordWriter <BytesWritable, BytesWritable> GetRecordWriter(TaskAttemptContext context) { SequenceFile.Writer @out = GetSequenceWriter(context, GetSequenceFileOutputKeyClass (context), GetSequenceFileOutputValueClass(context)); return(new _RecordWriter_140(@out)); }
/// <exception cref="System.IO.IOException"/> private static void CreateFiles(int length, int numFiles, Random random, Job job) { TestCombineSequenceFileInputFormat.Range[] ranges = CreateRanges(length, numFiles , random); for (int i = 0; i < numFiles; i++) { Path file = new Path(workDir, "test_" + i + ".seq"); // create a file with length entries SequenceFile.Writer writer = SequenceFile.CreateWriter(localFs, job.GetConfiguration (), file, typeof(IntWritable), typeof(BytesWritable)); TestCombineSequenceFileInputFormat.Range range = ranges[i]; try { for (int j = range.start; j < range.end; j++) { IntWritable key = new IntWritable(j); byte[] data = new byte[random.Next(10)]; random.NextBytes(data); BytesWritable value = new BytesWritable(data); writer.Append(key, value); } } finally { writer.Close(); } } }
/// <exception cref="System.Exception"/> public virtual void TestAppendSort() { GenericTestUtils.AssumeInNativeProfile(); Path file = new Path(RootPath, "testseqappendSort.seq"); fs.Delete(file, true); Path sortedFile = new Path(RootPath, "testseqappendSort.seq.sort"); fs.Delete(sortedFile, true); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new JavaSerializationComparator <long>(), typeof(long), typeof(string), conf); SequenceFile.Writer.Option compressOption = SequenceFile.Writer.Compression(SequenceFile.CompressionType .Block, new GzipCodec()); SequenceFile.Writer writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer. File(file), SequenceFile.Writer.KeyClass(typeof(long)), SequenceFile.Writer.ValueClass (typeof(string)), compressOption); writer.Append(2L, "two"); writer.Append(1L, "one"); writer.Close(); writer = SequenceFile.CreateWriter(conf, SequenceFile.Writer.File(file), SequenceFile.Writer .KeyClass(typeof(long)), SequenceFile.Writer.ValueClass(typeof(string)), SequenceFile.Writer .AppendIfExists(true), compressOption); writer.Append(4L, "four"); writer.Append(3L, "three"); writer.Close(); // Sort file after append sorter.Sort(file, sortedFile); VerifyAll4Values(sortedFile); fs.DeleteOnExit(file); fs.DeleteOnExit(sortedFile); }
/// <exception cref="System.IO.IOException"/> private static Path WritePartitionFile <T>(string testname, Configuration conf, T[] splits) where T : WritableComparable <object> { FileSystem fs = FileSystem.GetLocal(conf); Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified (fs); Path p = new Path(testdir, testname + "/_partition.lst"); TotalOrderPartitioner.SetPartitionFile(conf, p); conf.SetInt(MRJobConfig.NumReduces, splits.Length + 1); SequenceFile.Writer w = null; try { w = SequenceFile.CreateWriter(fs, conf, p, splits[0].GetType(), typeof(NullWritable ), SequenceFile.CompressionType.None); for (int i = 0; i < splits.Length; ++i) { w.Append(splits[i], NullWritable.Get()); } } finally { if (null != w) { w.Close(); } } return(p); }
/// <exception cref="System.IO.IOException"/> private static void CreateControlFile(FileSystem fs, int fileSize, int nrFiles) { // in MB Log.Info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files" ); fs.Delete(ControlDir, true); for (int i = 0; i < nrFiles; i++) { string name = GetFileName(i); Path controlFile = new Path(ControlDir, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.CreateWriter(fs, fsConfig, controlFile, typeof(Text), typeof( LongWritable), SequenceFile.CompressionType.None); writer.Append(new Text(name), new LongWritable(fileSize)); } catch (Exception e) { throw new IOException(e.GetLocalizedMessage()); } finally { if (writer != null) { writer.Close(); } writer = null; } } Log.Info("created control files for: " + nrFiles + " files"); }
public virtual void TestSequenceFileSync() { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).Build(); FileSystem fs = cluster.GetFileSystem(); Path p = new Path("/testSequenceFileSync/foo"); int len = 1 << 16; FSDataOutputStream @out = fs.Create(p, FsPermission.GetDefault(), EnumSet.Of(CreateFlag .Create, CreateFlag.Overwrite, CreateFlag.SyncBlock), 4096, (short)1, len, null); SequenceFile.Writer w = SequenceFile.CreateWriter(new Configuration(), SequenceFile.Writer .Stream(@out), SequenceFile.Writer.KeyClass(typeof(RandomDatum)), SequenceFile.Writer .ValueClass(typeof(RandomDatum)), SequenceFile.Writer.Compression(SequenceFile.CompressionType .None, new DefaultCodec())); w.Hflush(); CheckSyncMetric(cluster, 0); w.Hsync(); CheckSyncMetric(cluster, 1); int seed = new Random().Next(); RandomDatum.Generator generator = new RandomDatum.Generator(seed); generator.Next(); w.Append(generator.GetKey(), generator.GetValue()); w.Hsync(); CheckSyncMetric(cluster, 2); w.Close(); CheckSyncMetric(cluster, 2); @out.Close(); CheckSyncMetric(cluster, 3); cluster.Shutdown(); }
/// <exception cref="System.IO.IOException"/> private void ListSubtree(FileStatus rootStatus, SequenceFile.Writer writer) { Path rootFile = rootStatus.GetPath(); if (rootStatus.IsFile()) { nrFiles++; // For a regular file generate <fName,offset> pairs long blockSize = fs.GetDefaultBlockSize(rootFile); long fileLength = rootStatus.GetLen(); for (long offset = 0; offset < fileLength; offset += blockSize) { writer.Append(new Text(rootFile.ToString()), new LongWritable(offset)); } return; } FileStatus[] children = null; try { children = fs.ListStatus(rootFile); } catch (FileNotFoundException) { throw new IOException("Could not get listing for " + rootFile); } for (int i = 0; i < children.Length; i++) { ListSubtree(children[i], writer); } }
public virtual void Configure() { Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.Get(conf); fs.Delete(testdir, true); conf.SetInt(JobContext.IoSortMb, 1); conf.SetInputFormat(typeof(SequenceFileInputFormat)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetMapperClass(typeof(TestMapOutputType.TextGen)); conf.SetReducerClass(typeof(TestMapOutputType.TextReduce)); conf.SetOutputKeyClass(typeof(Text)); conf.SetOutputValueClass(typeof(Text)); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } Path inFile = new Path(inDir, "part0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text ), typeof(Text)); writer.Append(new Text("rec: 1"), new Text("Hello")); writer.Close(); jc = new JobClient(conf); }
/// <summary>Create control files before a test run.</summary> /// <remarks> /// Create control files before a test run. /// Number of files created is equal to the number of maps specified /// </remarks> /// <exception cref="System.IO.IOException">on error</exception> private static void CreateControlFiles() { FileSystem tempFS = FileSystem.Get(config); Log.Info("Creating " + numberOfMaps + " control files"); for (int i = 0; i < numberOfMaps; i++) { string strFileName = "NNBench_Controlfile_" + i; Path filePath = new Path(new Path(baseDir, ControlDirName), strFileName); SequenceFile.Writer writer = null; try { writer = SequenceFile.CreateWriter(tempFS, config, filePath, typeof(Text), typeof( LongWritable), SequenceFile.CompressionType.None); writer.Append(new Text(strFileName), new LongWritable(0l)); } finally { if (writer != null) { writer.Close(); } } } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="Sharpen.TimeoutException"/> /// <exception cref="System.Exception"/> internal static void WriteFile(NameNode namenode, Configuration conf, Path name, short replication) { FileSystem fileSys = FileSystem.Get(conf); SequenceFile.Writer writer = SequenceFile.CreateWriter(fileSys, conf, name, typeof( BytesWritable), typeof(BytesWritable), SequenceFile.CompressionType.None); writer.Append(new BytesWritable(), new BytesWritable()); writer.Close(); fileSys.SetReplication(name, replication); DFSTestUtil.WaitReplication(fileSys, name, replication); }
/// <summary>Test that makes sure the FileSystem passed to createWriter</summary> /// <exception cref="System.Exception"/> public virtual void TestCreateUsesFsArg() { FileSystem fs = FileSystem.GetLocal(conf); FileSystem spyFs = Org.Mockito.Mockito.Spy(fs); Path p = new Path(Runtime.GetProperty("test.build.data", ".") + "/testCreateUsesFSArg.seq" ); SequenceFile.Writer writer = SequenceFile.CreateWriter(spyFs, conf, p, typeof(NullWritable ), typeof(NullWritable)); writer.Close(); Org.Mockito.Mockito.Verify(spyFs).GetDefaultReplication(p); }
/// <exception cref="System.IO.IOException"/> private void WriteSkippedRec(KEY key, VALUE value) { if (this.skipWriter == null) { Path skipDir = SkipBadRecords.GetSkipOutputPath(this._enclosing.conf); Path skipFile = new Path(skipDir, this._enclosing.GetTaskID().ToString()); this.skipWriter = SequenceFile.CreateWriter(skipFile.GetFileSystem(this._enclosing .conf), this._enclosing.conf, skipFile, this.keyClass, this.valClass, SequenceFile.CompressionType .Block, this.reporter); } this.skipWriter.Append(key, value); }
public virtual void TestNullKeys() { JobConf conf = new JobConf(typeof(TestMapRed)); FileSystem fs = FileSystem.GetLocal(conf); HashSet <string> values = new HashSet <string>(); string m = "AAAAAAAAAAAAAA"; for (int i = 1; i < 11; ++i) { values.AddItem(m); m = m.Replace((char)('A' + i - 1), (char)('A' + i)); } Path testdir = new Path(Runtime.GetProperty("test.build.data", "/tmp")).MakeQualified (fs); fs.Delete(testdir, true); Path inFile = new Path(testdir, "nullin/blah"); SequenceFile.Writer w = SequenceFile.CreateWriter(fs, conf, inFile, typeof(NullWritable ), typeof(Text), SequenceFile.CompressionType.None); Text t = new Text(); foreach (string s in values) { t.Set(s); w.Append(NullWritable.Get(), t); } w.Close(); FileInputFormat.SetInputPaths(conf, inFile); FileOutputFormat.SetOutputPath(conf, new Path(testdir, "nullout")); conf.SetMapperClass(typeof(TestMapRed.NullMapper)); conf.SetReducerClass(typeof(IdentityReducer)); conf.SetOutputKeyClass(typeof(NullWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetInputFormat(typeof(SequenceFileInputFormat)); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); conf.SetNumReduceTasks(1); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); JobClient.RunJob(conf); // Since null keys all equal, allow any ordering SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(testdir, "nullout/part-00000" ), conf); m = "AAAAAAAAAAAAAA"; for (int i_1 = 1; r.Next(NullWritable.Get(), t); ++i_1) { NUnit.Framework.Assert.IsTrue("Unexpected value: " + t, values.Remove(t.ToString( ))); m = m.Replace((char)('A' + i_1 - 1), (char)('A' + i_1)); } NUnit.Framework.Assert.IsTrue("Missing values: " + values.ToString(), values.IsEmpty ()); }
/// <exception cref="System.IO.IOException"/> public static void WriteSequenceFile(SequenceFile.Writer writer, int numRecords) { IntWritable key = new IntWritable(); Text val = new Text(); for (int numWritten = 0; numWritten < numRecords; ++numWritten) { key.Set(numWritten); RandomText(val, numWritten, Recordsize); writer.Append(key, val); } writer.Close(); }
/// <summary>Reduce task done, write output to a file.</summary> /// <exception cref="System.IO.IOException"/> protected override void Cleanup(Reducer.Context context) { //write output to a file Configuration conf = context.GetConfiguration(); Path outDir = new Path(conf.Get(FileOutputFormat.Outdir)); Path outFile = new Path(outDir, "reduce-out"); FileSystem fileSys = FileSystem.Get(conf); SequenceFile.Writer writer = SequenceFile.CreateWriter(fileSys, conf, outFile, typeof( LongWritable), typeof(LongWritable), SequenceFile.CompressionType.None); writer.Append(new LongWritable(numInside), new LongWritable(numOutside)); writer.Close(); }
public virtual void RunJob(int items) { try { JobConf conf = new JobConf(typeof(TestMapRed)); Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.Get(conf); fs.Delete(testdir, true); conf.SetInt(JobContext.IoSortMb, 1); conf.SetInputFormat(typeof(SequenceFileInputFormat)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetMapperClass(typeof(IdentityMapper)); conf.SetReducerClass(typeof(IdentityReducer)); conf.SetOutputKeyClass(typeof(Text)); conf.SetOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } Path inFile = new Path(inDir, "part0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text ), typeof(Text)); StringBuilder content = new StringBuilder(); for (int i = 0; i < 1000; i++) { content.Append(i).Append(": This is one more line of content\n"); } Org.Apache.Hadoop.IO.Text text = new Org.Apache.Hadoop.IO.Text(content.ToString() ); for (int i_1 = 0; i_1 < items; i_1++) { writer.Append(new Org.Apache.Hadoop.IO.Text("rec:" + i_1), text); } writer.Close(); JobClient.RunJob(conf); } catch (Exception e) { NUnit.Framework.Assert.IsTrue("Threw exception:" + e, false); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> /// <exception cref="InstantiationException"/> /// <exception cref="System.MemberAccessException"/> private static void SequenceFileCodecTest(Configuration conf, int lines, string codecClass , int blockSize) { Path filePath = new Path("SequenceFileCodecTest." + codecClass); // Configuration conf.SetInt("io.seqfile.compress.blocksize", blockSize); // Create the SequenceFile FileSystem fs = FileSystem.Get(conf); Log.Info("Creating SequenceFile with codec \"" + codecClass + "\""); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, filePath, typeof( Text), typeof(Text), SequenceFile.CompressionType.Block, (CompressionCodec)System.Activator.CreateInstance (Runtime.GetType(codecClass))); // Write some data Log.Info("Writing to SequenceFile..."); for (int i = 0; i < lines; i++) { Text key = new Text("key" + i); Text value = new Text("value" + i); writer.Append(key, value); } writer.Close(); // Read the data back and check Log.Info("Reading from the SequenceFile..."); SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf); Writable key_1 = (Writable)System.Activator.CreateInstance(reader.GetKeyClass()); Writable value_1 = (Writable)System.Activator.CreateInstance(reader.GetValueClass ()); int lc = 0; try { while (reader.Next(key_1, value_1)) { Assert.Equal("key" + lc, key_1.ToString()); Assert.Equal("value" + lc, value_1.ToString()); lc++; } } finally { reader.Close(); } Assert.Equal(lines, lc); // Delete temporary files fs.Delete(filePath, false); Log.Info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass + "\""); }
/// <exception cref="System.IO.IOException"/> private static SequenceFile.Writer[] CreateWriters(Path testdir, Configuration conf , int srcs, Path[] src) { for (int i = 0; i < srcs; ++i) { src[i] = new Path(testdir, Sharpen.Extensions.ToString(i + 10, 36)); } SequenceFile.Writer[] @out = new SequenceFile.Writer[srcs]; for (int i_1 = 0; i_1 < srcs; ++i_1) { @out[i_1] = new SequenceFile.Writer(testdir.GetFileSystem(conf), conf, src[i_1], typeof(IntWritable), typeof(IntWritable)); } return(@out); }
/// <exception cref="System.IO.IOException"/> public virtual void TestClose() { Configuration conf = new Configuration(); LocalFileSystem fs = FileSystem.GetLocal(conf); // create a sequence file 1 Path path1 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test1.seq"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, path1, typeof(Text ), typeof(NullWritable), SequenceFile.CompressionType.Block); writer.Append(new Text("file1-1"), NullWritable.Get()); writer.Append(new Text("file1-2"), NullWritable.Get()); writer.Close(); Path path2 = new Path(Runtime.GetProperty("test.build.data", ".") + "/test2.seq"); writer = SequenceFile.CreateWriter(fs, conf, path2, typeof(Text), typeof(NullWritable ), SequenceFile.CompressionType.Block); writer.Append(new Text("file2-1"), NullWritable.Get()); writer.Append(new Text("file2-2"), NullWritable.Get()); writer.Close(); // Create a reader which uses 4 BuiltInZLibInflater instances SequenceFile.Reader reader = new SequenceFile.Reader(fs, path1, conf); // Returns the 4 BuiltInZLibInflater instances to the CodecPool reader.Close(); // The second close _could_ erroneously returns the same // 4 BuiltInZLibInflater instances to the CodecPool again reader.Close(); // The first reader gets 4 BuiltInZLibInflater instances from the CodecPool SequenceFile.Reader reader1 = new SequenceFile.Reader(fs, path1, conf); // read first value from reader1 Text text = new Text(); reader1.Next(text); Assert.Equal("file1-1", text.ToString()); // The second reader _could_ get the same 4 BuiltInZLibInflater // instances from the CodePool as reader1 SequenceFile.Reader reader2 = new SequenceFile.Reader(fs, path2, conf); // read first value from reader2 reader2.Next(text); Assert.Equal("file2-1", text.ToString()); // read second value from reader1 reader1.Next(text); Assert.Equal("file1-2", text.ToString()); // read second value from reader2 (this throws an exception) reader2.Next(text); Assert.Equal("file2-2", text.ToString()); NUnit.Framework.Assert.IsFalse(reader1.Next(text)); NUnit.Framework.Assert.IsFalse(reader2.Next(text)); }
public virtual void TestLowSyncpoint() { Configuration conf = new Configuration(); FileSystem fs = FileSystem.GetLocal(conf); Path path = new Path(Runtime.GetProperty("test.build.data", "/tmp"), "sequencefile.sync.test" ); IntWritable input = new IntWritable(); Text val = new Text(); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, typeof(IntWritable ), typeof(Text)); try { WriteSequenceFile(writer, Numrecords); for (int i = 0; i < 5; i++) { SequenceFile.Reader reader; //try different SequenceFile.Reader constructors if (i % 2 == 0) { reader = new SequenceFile.Reader(fs, path, conf); } else { FSDataInputStream @in = fs.Open(path); long length = fs.GetFileStatus(path).GetLen(); int buffersize = conf.GetInt("io.file.buffer.size", 4096); reader = new SequenceFile.Reader(@in, buffersize, 0L, length, conf); } try { ForOffset(reader, input, val, i, 0, 0); ForOffset(reader, input, val, i, 65, 0); ForOffset(reader, input, val, i, 2000, 21); ForOffset(reader, input, val, i, 0, 0); } finally { reader.Close(); } } } finally { fs.Delete(path, false); } }
/// <exception cref="System.IO.IOException"/> private static void CreateBigMapInputFile(Configuration conf, FileSystem fs, Path dir, long fileSizeInMB) { // Check if the input path exists and is non-empty if (fs.Exists(dir)) { FileStatus[] list = fs.ListStatus(dir); if (list.Length > 0) { throw new IOException("Input path: " + dir + " already exists... "); } } Path file = new Path(dir, "part-0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(BytesWritable ), typeof(BytesWritable), SequenceFile.CompressionType.None); long numBytesToWrite = fileSizeInMB * 1024 * 1024; int minKeySize = conf.GetInt(MinKey, 10); int keySizeRange = conf.GetInt(MaxKey, 1000) - minKeySize; int minValueSize = conf.GetInt(MinValue, 0); int valueSizeRange = conf.GetInt(MaxValue, 20000) - minValueSize; BytesWritable randomKey = new BytesWritable(); BytesWritable randomValue = new BytesWritable(); Log.Info("Writing " + numBytesToWrite + " bytes to " + file + " with " + "minKeySize: " + minKeySize + " keySizeRange: " + keySizeRange + " minValueSize: " + minValueSize + " valueSizeRange: " + valueSizeRange); long start = Runtime.CurrentTimeMillis(); while (numBytesToWrite > 0) { int keyLength = minKeySize + (keySizeRange != 0 ? random.Next(keySizeRange) : 0); randomKey.SetSize(keyLength); RandomizeBytes(randomKey.GetBytes(), 0, randomKey.GetLength()); int valueLength = minValueSize + (valueSizeRange != 0 ? random.Next(valueSizeRange ) : 0); randomValue.SetSize(valueLength); RandomizeBytes(randomValue.GetBytes(), 0, randomValue.GetLength()); writer.Append(randomKey, randomValue); numBytesToWrite -= keyLength + valueLength; } writer.Close(); long end = Runtime.CurrentTimeMillis(); Log.Info("Created " + file + " of size: " + fileSizeInMB + "MB in " + (end - start ) / 1000 + "secs"); }
/// <exception cref="System.IO.IOException"/> public virtual void RunTest(SequenceFile.CompressionType compressionType) { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Path tempDir = new Path(dir, "tmp"); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); fs.Mkdirs(tempDir); LongWritable tkey = new LongWritable(); Text tval = new Text(); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, job, file, typeof(LongWritable ), typeof(Text), compressionType, new DefaultCodec()); try { for (int i = 0; i < Records; ++i) { tkey.Set(1234); tval.Set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue"); writer.Append(tkey, tval); } } finally { writer.Close(); } long fileLength = fs.GetFileStatus(file).GetLen(); Log.Info("With compression = " + compressionType + ": " + "compressed length = " + fileLength); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.GetOutputKeyComparator (), job.GetMapOutputKeyClass(), job.GetMapOutputValueClass(), job); Path[] paths = new Path[] { file }; SequenceFile.Sorter.RawKeyValueIterator rIter = sorter.Merge(paths, tempDir, false ); int count = 0; while (rIter.Next()) { count++; } NUnit.Framework.Assert.AreEqual(Records, count); NUnit.Framework.Assert.AreEqual(1.0f, rIter.GetProgress().Get()); }
/// <exception cref="System.IO.IOException"/> private void WriteMetadataTest(FileSystem fs, int count, int seed, Path file, SequenceFile.CompressionType compressionType, CompressionCodec codec, SequenceFile.Metadata metadata) { fs.Delete(file, true); Log.Info("creating " + count + " records with metadata and with " + compressionType + " compression"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(RandomDatum ), typeof(RandomDatum), compressionType, codec, null, metadata); RandomDatum.Generator generator = new RandomDatum.Generator(seed); for (int i = 0; i < count; i++) { generator.Next(); RandomDatum key = generator.GetKey(); RandomDatum value = generator.GetValue(); writer.Append(key, value); } writer.Close(); }
public virtual void Configure() { Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.Get(conf); fs.Delete(testdir, true); conf.SetInputFormat(typeof(SequenceFileInputFormat)); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetOutputKeyClass(typeof(IntWritable)); conf.SetOutputValueClass(typeof(Text)); conf.SetMapOutputValueClass(typeof(IntWritable)); // set up two map jobs, so we can test merge phase in Reduce also conf.SetNumMapTasks(2); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } // set up input data in 2 files Path inFile = new Path(inDir, "part0"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(IntWritable ), typeof(IntWritable)); writer.Append(new IntWritable(11), new IntWritable(999)); writer.Append(new IntWritable(23), new IntWritable(456)); writer.Append(new IntWritable(10), new IntWritable(780)); writer.Close(); inFile = new Path(inDir, "part1"); writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(IntWritable), typeof( IntWritable)); writer.Append(new IntWritable(45), new IntWritable(100)); writer.Append(new IntWritable(18), new IntWritable(200)); writer.Append(new IntWritable(27), new IntWritable(300)); writer.Close(); jc = new JobClient(conf); }
/// <exception cref="System.Exception"/> public virtual void TestJavaSerialization() { Path file = new Path(Runtime.GetProperty("test.build.data", ".") + "/testseqser.seq" ); fs.Delete(file, true); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(long ), typeof(string)); writer.Append(1L, "one"); writer.Append(2L, "two"); writer.Close(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf); Assert.Equal(1L, reader.Next((object)null)); Assert.Equal("one", reader.GetCurrentValue((object)null)); Assert.Equal(2L, reader.Next((object)null)); Assert.Equal("two", reader.GetCurrentValue((object)null)); NUnit.Framework.Assert.IsNull(reader.Next((object)null)); reader.Close(); }
// clean up after all to restore the system state /// <exception cref="System.IO.IOException"/> private void CreateInputFile(string rootName) { Cleanup(); // clean up if previous run failed Path inputFile = new Path(MapInputDir, "in_file"); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, fsConfig, inputFile, typeof( Text), typeof(LongWritable), SequenceFile.CompressionType.None); try { nrFiles = 0; ListSubtree(new Path(rootName), writer); } finally { writer.Close(); } Log.Info("Created map input files."); }
/// <exception cref="System.Exception"/> private static void CreateSequenceFile(int numRecords) { // create a file with length entries SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, inFile, typeof(Text ), typeof(BytesWritable)); try { for (int i = 1; i <= numRecords; i++) { Text key = new Text(Sharpen.Extensions.ToString(i)); byte[] data = new byte[random.Next(10)]; random.NextBytes(data); BytesWritable value = new BytesWritable(data); writer.Append(key, value); } } finally { writer.Close(); } }
/// <exception cref="System.IO.IOException"/> public override RecordWriter <BytesWritable, BytesWritable> GetRecordWriter(FileSystem ignored, JobConf job, string name, Progressable progress) { // get the path of the temporary output file Path file = FileOutputFormat.GetTaskOutputPath(job, name); FileSystem fs = file.GetFileSystem(job); CompressionCodec codec = null; SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.None; if (GetCompressOutput(job)) { // find the kind of compression to do compressionType = GetOutputCompressionType(job); // find the right codec Type codecClass = GetOutputCompressorClass(job, typeof(DefaultCodec)); codec = ReflectionUtils.NewInstance(codecClass, job); } SequenceFile.Writer @out = SequenceFile.CreateWriter(fs, job, file, GetSequenceFileOutputKeyClass (job), GetSequenceFileOutputValueClass(job), compressionType, codec, progress); return(new _RecordWriter_138(@out)); }
/// <summary>Write a partition file for the given job, using the Sampler provided.</summary> /// <remarks> /// Write a partition file for the given job, using the Sampler provided. /// Queries the sampler for a sample keyset, sorts by the output key /// comparator, selects the keys for each rank, and writes to the destination /// returned from /// <see cref="TotalOrderPartitioner{K, V}.GetPartitionFile(Org.Apache.Hadoop.Conf.Configuration) /// "/> /// . /// </remarks> /// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> /// <exception cref="System.Exception"/> public static void WritePartitionFile <K, V>(Job job, InputSampler.Sampler <K, V> sampler ) { // getInputFormat, getOutputKeyComparator Configuration conf = job.GetConfiguration(); InputFormat inf = ReflectionUtils.NewInstance(job.GetInputFormatClass(), conf); int numPartitions = job.GetNumReduceTasks(); K[] samples = (K[])sampler.GetSample(inf, job); Log.Info("Using " + samples.Length + " samples"); RawComparator <K> comparator = (RawComparator <K>)job.GetSortComparator(); Arrays.Sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.GetPartitionFile(conf)); FileSystem fs = dst.GetFileSystem(conf); if (fs.Exists(dst)) { fs.Delete(dst, false); } SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, dst, job.GetMapOutputKeyClass (), typeof(NullWritable)); NullWritable nullValue = NullWritable.Get(); float stepSize = samples.Length / (float)numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.Round(stepSize * i); while (last >= k && comparator.Compare(samples[last], samples[k]) == 0) { ++k; } writer.Append(samples[k], nullValue); last = k; } writer.Close(); }
/// <exception cref="System.IO.IOException"/> public SeqFileAppendable(FileSystem fs, Path path, int osBufferSize, string compress , int minBlkSize) { Configuration conf = new Configuration(); conf.SetBoolean("hadoop.native.lib", true); CompressionCodec codec = null; if ("lzo".Equals(compress)) { codec = Compression.Algorithm.Lzo.GetCodec(); } else { if ("gz".Equals(compress)) { codec = Compression.Algorithm.Gz.GetCodec(); } else { if (!"none".Equals(compress)) { throw new IOException("Codec not supported."); } } } this.fsdos = fs.Create(path, true, osBufferSize); if (!"none".Equals(compress)) { writer = SequenceFile.CreateWriter(conf, fsdos, typeof(BytesWritable), typeof(BytesWritable ), SequenceFile.CompressionType.Block, codec); } else { writer = SequenceFile.CreateWriter(conf, fsdos, typeof(BytesWritable), typeof(BytesWritable ), SequenceFile.CompressionType.None, null); } }