public Writer(Configuration conf, FileSystem fs, string dirName, Type keyClass, Type valClass, SequenceFile.CompressionType compress, CompressionCodec codec, Progressable progress) : this(conf, new Path(dirName), KeyClass(keyClass), ValueClass(valClass), Compression (compress, codec), Progressable(progress)) { }
public Writer(Configuration conf, FileSystem fs, string dirName, WritableComparator comparator, Type valClass, SequenceFile.CompressionType compress, Progressable progress) : this(conf, new Path(dirName), Comparator(comparator), ValueClass(valClass), Compression (compress), Progressable(progress)) { }
/// <exception cref="System.Exception"/> private void CheckCompression(bool compressMapOutputs, SequenceFile.CompressionType redCompression, bool includeCombine) { JobConf conf = new JobConf(typeof(TestMapRed)); Path testdir = new Path(TestDir.GetAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.Get(conf); fs.Delete(testdir, true); FileInputFormat.SetInputPaths(conf, inDir); FileOutputFormat.SetOutputPath(conf, outDir); conf.SetMapperClass(typeof(TestMapRed.MyMap)); conf.SetReducerClass(typeof(TestMapRed.MyReduce)); conf.SetOutputKeyClass(typeof(Text)); conf.SetOutputValueClass(typeof(Text)); conf.SetOutputFormat(typeof(SequenceFileOutputFormat)); conf.Set(MRConfig.FrameworkName, MRConfig.LocalFrameworkName); if (includeCombine) { conf.SetCombinerClass(typeof(IdentityReducer)); } conf.SetCompressMapOutput(compressMapOutputs); SequenceFileOutputFormat.SetOutputCompressionType(conf, redCompression); try { if (!fs.Mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.ToString()); } if (!fs.Mkdirs(inDir)) { throw new IOException("Mkdirs failed to create " + inDir.ToString()); } Path inFile = new Path(inDir, "part0"); DataOutputStream f = fs.Create(inFile); f.WriteBytes("Owen was here\n"); f.WriteBytes("Hadoop is fun\n"); f.WriteBytes("Is this done, yet?\n"); f.Close(); RunningJob rj = JobClient.RunJob(conf); NUnit.Framework.Assert.IsTrue("job was complete", rj.IsComplete()); NUnit.Framework.Assert.IsTrue("job was successful", rj.IsSuccessful()); Path output = new Path(outDir, Task.GetOutputName(0)); NUnit.Framework.Assert.IsTrue("reduce output exists " + output, fs.Exists(output) ); SequenceFile.Reader rdr = new SequenceFile.Reader(fs, output, conf); NUnit.Framework.Assert.AreEqual("is reduce output compressed " + output, redCompression != SequenceFile.CompressionType.None, rdr.IsCompressed()); rdr.Close(); } finally { fs.Delete(testdir, true); } }
/// <exception cref="System.IO.IOException"/> private static void CreateMapFile(Configuration conf, FileSystem fs, Path path, CompressionCodec codec, SequenceFile.CompressionType type, int records) { MapFile.Writer writer = new MapFile.Writer(conf, path, MapFile.Writer.KeyClass(typeof( Text)), MapFile.Writer.ValueClass(typeof(Text)), MapFile.Writer.Compression(type , codec)); Text key = new Text(); for (int j = 0; j < records; j++) { key.Set(string.Format("%03d", j)); writer.Append(key, key); } writer.Close(); }
/// <exception cref="System.IO.IOException"/> public virtual void RunTest(SequenceFile.CompressionType compressionType) { JobConf job = new JobConf(); FileSystem fs = FileSystem.GetLocal(job); Path dir = new Path(Runtime.GetProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Path tempDir = new Path(dir, "tmp"); fs.Delete(dir, true); FileInputFormat.SetInputPaths(job, dir); fs.Mkdirs(tempDir); LongWritable tkey = new LongWritable(); Text tval = new Text(); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, job, file, typeof(LongWritable ), typeof(Text), compressionType, new DefaultCodec()); try { for (int i = 0; i < Records; ++i) { tkey.Set(1234); tval.Set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue"); writer.Append(tkey, tval); } } finally { writer.Close(); } long fileLength = fs.GetFileStatus(file).GetLen(); Log.Info("With compression = " + compressionType + ": " + "compressed length = " + fileLength); SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, job.GetOutputKeyComparator (), job.GetMapOutputKeyClass(), job.GetMapOutputValueClass(), job); Path[] paths = new Path[] { file }; SequenceFile.Sorter.RawKeyValueIterator rIter = sorter.Merge(paths, tempDir, false ); int count = 0; while (rIter.Next()) { count++; } NUnit.Framework.Assert.AreEqual(Records, count); NUnit.Framework.Assert.AreEqual(1.0f, rIter.GetProgress().Get()); }
/// <exception cref="System.IO.IOException"/> private void WriteTest(FileSystem fs, int count, int seed, Path file, SequenceFile.CompressionType compressionType, CompressionCodec codec) { fs.Delete(file, true); Log.Info("creating " + count + " records with " + compressionType + " compression" ); SequenceFile.Writer writer = SequenceFile.CreateWriter(fs, conf, file, typeof(RandomDatum ), typeof(RandomDatum), compressionType, codec); RandomDatum.Generator generator = new RandomDatum.Generator(seed); for (int i = 0; i < count; i++) { generator.Next(); RandomDatum key = generator.GetKey(); RandomDatum value = generator.GetValue(); writer.Append(key, value); } writer.Close(); }
/// <exception cref="System.Exception"/> private void CodecTestMapFile(Type clazz, SequenceFile.CompressionType type, int records) { FileSystem fs = FileSystem.Get(conf); Log.Info("Creating MapFiles with " + records + " records using codec " + clazz.Name ); Path path = new Path(new Path(Runtime.GetProperty("test.build.data", "/tmp")), clazz .Name + "-" + type + "-" + records); Log.Info("Writing " + path); CreateMapFile(conf, fs, path, System.Activator.CreateInstance(clazz), type, records ); MapFile.Reader reader = new MapFile.Reader(path, conf); Text key1 = new Text("002"); NUnit.Framework.Assert.IsNotNull(reader.Get(key1, new Text())); Text key2 = new Text("004"); NUnit.Framework.Assert.IsNotNull(reader.Get(key2, new Text())); }
/// <exception cref="System.IO.IOException"/> public override RecordWriter <BytesWritable, BytesWritable> GetRecordWriter(FileSystem ignored, JobConf job, string name, Progressable progress) { // get the path of the temporary output file Path file = FileOutputFormat.GetTaskOutputPath(job, name); FileSystem fs = file.GetFileSystem(job); CompressionCodec codec = null; SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.None; if (GetCompressOutput(job)) { // find the kind of compression to do compressionType = GetOutputCompressionType(job); // find the right codec Type codecClass = GetOutputCompressorClass(job, typeof(DefaultCodec)); codec = ReflectionUtils.NewInstance(codecClass, job); } SequenceFile.Writer @out = SequenceFile.CreateWriter(fs, job, file, GetSequenceFileOutputKeyClass (job), GetSequenceFileOutputValueClass(job), compressionType, codec, progress); return(new _RecordWriter_138(@out)); }
/// <exception cref="System.IO.IOException"/> protected internal virtual SequenceFile.Writer GetSequenceWriter(TaskAttemptContext context, Type keyClass, Type valueClass) { Configuration conf = context.GetConfiguration(); CompressionCodec codec = null; SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.None; if (GetCompressOutput(context)) { // find the kind of compression to do compressionType = GetOutputCompressionType(context); // find the right codec Type codecClass = GetOutputCompressorClass(context, typeof(DefaultCodec)); codec = (CompressionCodec)ReflectionUtils.NewInstance(codecClass, conf); } // get the path of the temporary output file Path file = GetDefaultWorkFile(context, string.Empty); FileSystem fs = file.GetFileSystem(conf); return(SequenceFile.CreateWriter(fs, conf, file, keyClass, valueClass, compressionType , codec, context)); }
/// <exception cref="System.IO.IOException"/> public override RecordWriter <WritableComparable, Writable> GetRecordWriter(FileSystem ignored, JobConf job, string name, Progressable progress) { // get the path of the temporary output file Path file = FileOutputFormat.GetTaskOutputPath(job, name); FileSystem fs = file.GetFileSystem(job); CompressionCodec codec = null; SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.None; if (GetCompressOutput(job)) { // find the kind of compression to do compressionType = SequenceFileOutputFormat.GetOutputCompressionType(job); // find the right codec Type codecClass = GetOutputCompressorClass(job, typeof(DefaultCodec)); codec = ReflectionUtils.NewInstance(codecClass, job); } // ignore the progress parameter, since MapFile is local MapFile.Writer @out = new MapFile.Writer(job, fs, file.ToString(), job.GetOutputKeyClass ().AsSubclass <WritableComparable>(), job.GetOutputValueClass().AsSubclass <Writable >(), compressionType, codec, progress); return(new _RecordWriter_72(@out)); }
/// <exception cref="System.IO.IOException"/> private void MergeTest(FileSystem fs, int count, int seed, Path file, SequenceFile.CompressionType compressionType, bool fast, int factor, int megabytes) { Log.Debug("creating " + factor + " files with " + count / factor + " records"); SequenceFile.Writer[] writers = new SequenceFile.Writer[factor]; Path[] names = new Path[factor]; Path[] sortedNames = new Path[factor]; for (int i = 0; i < factor; i++) { names[i] = file.Suffix("." + i); sortedNames[i] = names[i].Suffix(".sorted"); fs.Delete(names[i], true); fs.Delete(sortedNames[i], true); writers[i] = SequenceFile.CreateWriter(fs, conf, names[i], typeof(RandomDatum), typeof( RandomDatum), compressionType); } RandomDatum.Generator generator = new RandomDatum.Generator(seed); for (int i_1 = 0; i_1 < count; i_1++) { generator.Next(); RandomDatum key = generator.GetKey(); RandomDatum value = generator.GetValue(); writers[i_1 % factor].Append(key, value); } for (int i_2 = 0; i_2 < factor; i_2++) { writers[i_2].Close(); } for (int i_3 = 0; i_3 < factor; i_3++) { Log.Debug("sorting file " + i_3 + " with " + count / factor + " records"); NewSorter(fs, fast, megabytes, factor).Sort(names[i_3], sortedNames[i_3]); } Log.Info("merging " + factor + " files with " + count / factor + " debug"); fs.Delete(new Path(file + ".sorted"), true); NewSorter(fs, fast, megabytes, factor).Merge(sortedNames, file.Suffix(".sorted")); }
/// <exception cref="System.IO.IOException"/> public override RecordWriter <WritableComparable <object>, Writable> GetRecordWriter (TaskAttemptContext context) { Configuration conf = context.GetConfiguration(); CompressionCodec codec = null; SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.None; if (GetCompressOutput(context)) { // find the kind of compression to do compressionType = SequenceFileOutputFormat.GetOutputCompressionType(context); // find the right codec Type codecClass = GetOutputCompressorClass(context, typeof(DefaultCodec)); codec = (CompressionCodec)ReflectionUtils.NewInstance(codecClass, conf); } Path file = GetDefaultWorkFile(context, string.Empty); FileSystem fs = file.GetFileSystem(conf); // ignore the progress parameter, since MapFile is local MapFile.Writer @out = new MapFile.Writer(conf, fs, file.ToString(), context.GetOutputKeyClass ().AsSubclass <WritableComparable>(), context.GetOutputValueClass().AsSubclass <Writable >(), compressionType, codec, context); return(new _RecordWriter_75(@out)); }
/// <summary>Create a set naming the element comparator and compression type.</summary> /// <exception cref="System.IO.IOException"/> public Writer(Configuration conf, FileSystem fs, string dirName, WritableComparator comparator, SequenceFile.CompressionType compress) : base(conf, new Path(dirName), Comparator(comparator), ValueClass(typeof(NullWritable )), Compression(compress)) { }
/// <summary>Create a set naming the element class and compression type.</summary> /// <exception cref="System.IO.IOException"/> public Writer(Configuration conf, FileSystem fs, string dirName, Type keyClass, SequenceFile.CompressionType compress) : this(conf, fs, dirName, WritableComparator.Get(keyClass, conf), compress) { }
/// <summary> /// Set the /// <see cref="Org.Apache.Hadoop.IO.SequenceFile.CompressionType"/> /// for the output /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/> /// . /// </summary> /// <param name="conf"> /// the /// <see cref="JobConf"/> /// to modify /// </param> /// <param name="style"> /// the /// <see cref="Org.Apache.Hadoop.IO.SequenceFile.CompressionType"/> /// for the output /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/> /// /// </param> public static void SetOutputCompressionType(JobConf conf, SequenceFile.CompressionType style) { SetCompressOutput(conf, true); conf.Set(FileOutputFormat.CompressType, style.ToString()); }
/// <summary> /// Set the /// <see cref="Org.Apache.Hadoop.IO.SequenceFile.CompressionType"/> /// for the output /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/> /// . /// </summary> /// <param name="job"> /// the /// <see cref="Org.Apache.Hadoop.Mapreduce.Job"/> /// to modify /// </param> /// <param name="style"> /// the /// <see cref="Org.Apache.Hadoop.IO.SequenceFile.CompressionType"/> /// for the output /// <see cref="Org.Apache.Hadoop.IO.SequenceFile"/> /// /// </param> public static void SetOutputCompressionType(Job job, SequenceFile.CompressionType style) { SetCompressOutput(job, true); job.GetConfiguration().Set(FileOutputFormat.CompressType, style.ToString()); }
/// <summary>For debugging and testing.</summary> /// <exception cref="System.Exception"/> public static void Main(string[] args) { int count = 1024 * 1024; int megabytes = 1; int factor = 10; bool create = true; bool rwonly = false; bool check = false; bool fast = false; bool merge = false; string compressType = "NONE"; string compressionCodec = "org.apache.hadoop.io.compress.DefaultCodec"; Path file = null; int seed = new Random().Next(); string usage = "Usage: SequenceFile " + "[-count N] " + "[-seed #] [-check] [-compressType <NONE|RECORD|BLOCK>] " + "-codec <compressionCodec> " + "[[-rwonly] | {[-megabytes M] [-factor F] [-nocreate] [-fast] [-merge]}] " + " file"; if (args.Length == 0) { System.Console.Error.WriteLine(usage); System.Environment.Exit(-1); } FileSystem fs = null; try { for (int i = 0; i < args.Length; ++i) { // parse command line if (args[i] == null) { continue; } else { if (args[i].Equals("-count")) { count = System.Convert.ToInt32(args[++i]); } else { if (args[i].Equals("-megabytes")) { megabytes = System.Convert.ToInt32(args[++i]); } else { if (args[i].Equals("-factor")) { factor = System.Convert.ToInt32(args[++i]); } else { if (args[i].Equals("-seed")) { seed = System.Convert.ToInt32(args[++i]); } else { if (args[i].Equals("-rwonly")) { rwonly = true; } else { if (args[i].Equals("-nocreate")) { create = false; } else { if (args[i].Equals("-check")) { check = true; } else { if (args[i].Equals("-fast")) { fast = true; } else { if (args[i].Equals("-merge")) { merge = true; } else { if (args[i].Equals("-compressType")) { compressType = args[++i]; } else { if (args[i].Equals("-codec")) { compressionCodec = args[++i]; } else { // file is required parameter file = new Path(args[i]); } } } } } } } } } } } } } TestSequenceFile test = new TestSequenceFile(); fs = file.GetFileSystem(test.conf); Log.Info("count = " + count); Log.Info("megabytes = " + megabytes); Log.Info("factor = " + factor); Log.Info("create = " + create); Log.Info("seed = " + seed); Log.Info("rwonly = " + rwonly); Log.Info("check = " + check); Log.Info("fast = " + fast); Log.Info("merge = " + merge); Log.Info("compressType = " + compressType); Log.Info("compressionCodec = " + compressionCodec); Log.Info("file = " + file); if (rwonly && (!create || merge || fast)) { System.Console.Error.WriteLine(usage); System.Environment.Exit(-1); } SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.ValueOf (compressType); CompressionCodec codec = (CompressionCodec)ReflectionUtils.NewInstance(test.conf. GetClassByName(compressionCodec), test.conf); if (rwonly || (create && !merge)) { test.WriteTest(fs, count, seed, file, compressionType, codec); test.ReadTest(fs, count, seed, file); } if (!rwonly) { if (merge) { test.MergeTest(fs, count, seed, file, compressionType, fast, factor, megabytes); } else { test.SortTest(fs, count, megabytes, factor, fast, file); } } if (check) { test.CheckSort(fs, count, seed, file); } } finally { fs.Close(); } }
/// <summary>Create the named file for values of the named class.</summary> /// <exception cref="System.IO.IOException"/> public Writer(Configuration conf, FileSystem fs, string file, Type valClass, SequenceFile.CompressionType compress, Progressable progress) : base(conf, new Path(file), KeyClass(typeof(LongWritable)), ValueClass(valClass) , Compression(compress), Progressable(progress)) { }
/// <exception cref="System.IO.IOException"/> private static void WriteTest(FileSystem fs, RandomDatum[] data, string file, SequenceFile.CompressionType compress) { MapFile.Delete(fs, file); Log.Info("creating with " + data.Length + " records"); SetFile.Writer writer = new SetFile.Writer(conf, fs, file, WritableComparator.Get (typeof(RandomDatum)), compress); for (int i = 0; i < data.Length; i++) { writer.Append(data[i]); } writer.Close(); }