// Create a file containing fixed length records with random data /// <exception cref="System.IO.IOException"/> private AList <string> CreateFile(Path targetFile, CompressionCodec codec, int recordLen , int numRecords) { AList <string> recordList = new AList <string>(numRecords); OutputStream ostream = localFs.Create(targetFile); if (codec != null) { ostream = codec.CreateOutputStream(ostream); } TextWriter writer = new OutputStreamWriter(ostream); try { StringBuilder sb = new StringBuilder(); for (int i = 0; i < numRecords; i++) { for (int j = 0; j < recordLen; j++) { sb.Append(chars[charRand.Next(chars.Length)]); } string recordData = sb.ToString(); recordList.AddItem(recordData); writer.Write(recordData); sb.Length = 0; } } finally { writer.Close(); } return(recordList); }
/// <exception cref="System.IO.IOException"/> private void SaveInternal(FileOutputStream fout, FSImageCompression compression, string filePath) { StartupProgress prog = NameNode.GetStartupProgress(); MessageDigest digester = MD5Hash.GetDigester(); underlyingOutputStream = new DigestOutputStream(new BufferedOutputStream(fout), digester ); underlyingOutputStream.Write(FSImageUtil.MagicHeader); fileChannel = fout.GetChannel(); FsImageProto.FileSummary.Builder b = FsImageProto.FileSummary.NewBuilder().SetOndiskVersion (FSImageUtil.FileVersion).SetLayoutVersion(NameNodeLayoutVersion.CurrentLayoutVersion ); codec = compression.GetImageCodec(); if (codec != null) { b.SetCodec(codec.GetType().GetCanonicalName()); sectionOutputStream = codec.CreateOutputStream(underlyingOutputStream); } else { sectionOutputStream = underlyingOutputStream; } SaveNameSystemSection(b); // Check for cancellation right after serializing the name system section. // Some unit tests, such as TestSaveNamespace#testCancelSaveNameSpace // depends on this behavior. context.CheckCancelled(); Step step = new Step(StepType.Inodes, filePath); prog.BeginStep(Phase.SavingCheckpoint, step); SaveInodes(b); SaveSnapshots(b); prog.EndStep(Phase.SavingCheckpoint, step); step = new Step(StepType.DelegationTokens, filePath); prog.BeginStep(Phase.SavingCheckpoint, step); SaveSecretManagerSection(b); prog.EndStep(Phase.SavingCheckpoint, step); step = new Step(StepType.CachePools, filePath); prog.BeginStep(Phase.SavingCheckpoint, step); SaveCacheManagerSection(b); prog.EndStep(Phase.SavingCheckpoint, step); SaveStringTableSection(b); // We use the underlyingOutputStream to write the header. Therefore flush // the buffered stream (which is potentially compressed) first. FlushSectionOutputStream(); FsImageProto.FileSummary summary = ((FsImageProto.FileSummary)b.Build()); SaveFileSummary(underlyingOutputStream, summary); underlyingOutputStream.Close(); savedDigest = new MD5Hash(digester.Digest()); }
/// <exception cref="System.IO.IOException"/> private static void WriteFile(FileSystem fs, Path name, CompressionCodec codec, string contents) { OutputStream stm; if (codec == null) { stm = fs.Create(name); } else { stm = codec.CreateOutputStream(fs.Create(name)); } stm.Write(Sharpen.Runtime.GetBytesForString(contents)); stm.Close(); }
/// <summary> /// Write out a header to the given stream that indicates the chosen /// compression codec, and return the same stream wrapped with that codec. /// </summary> /// <remarks> /// Write out a header to the given stream that indicates the chosen /// compression codec, and return the same stream wrapped with that codec. /// If no codec is specified, simply adds buffering to the stream, so that /// the returned stream is always buffered. /// </remarks> /// <param name="os"> /// The stream to write header to and wrap. This stream should /// be unbuffered. /// </param> /// <returns> /// A stream wrapped with the specified compressor, or buffering /// if compression is not enabled. /// </returns> /// <exception cref="System.IO.IOException"> /// if an IO error occurs or the compressor cannot be /// instantiated /// </exception> internal virtual DataOutputStream WriteHeaderAndWrapStream(OutputStream os) { DataOutputStream dos = new DataOutputStream(os); dos.WriteBoolean(imageCodec != null); if (imageCodec != null) { string codecClassName = imageCodec.GetType().GetCanonicalName(); Text.WriteString(dos, codecClassName); return(new DataOutputStream(imageCodec.CreateOutputStream(os))); } else { // use a buffered output stream return(new DataOutputStream(new BufferedOutputStream(os))); } }
/// <exception cref="System.IO.IOException"/> public void CommitSection(FsImageProto.FileSummary.Builder summary, FSImageFormatProtobuf.SectionName name) { long oldOffset = currentOffset; FlushSectionOutputStream(); if (codec != null) { sectionOutputStream = codec.CreateOutputStream(underlyingOutputStream); } else { sectionOutputStream = underlyingOutputStream; } long length = fileChannel.Position() - oldOffset; summary.AddSections(FsImageProto.FileSummary.Section.NewBuilder().SetName(name.name ).SetLength(length).SetOffset(currentOffset)); currentOffset += length; }
/// <exception cref="System.IO.IOException"/> public Writer(Configuration conf, FSDataOutputStream @out, Type keyClass, Type valueClass , CompressionCodec codec, Counters.Counter writesCounter, bool ownOutputStream) { this.writtenRecordsCounter = writesCounter; this.checksumOut = new IFileOutputStream(@out); this.rawOut = @out; this.start = this.rawOut.GetPos(); if (codec != null) { this.compressor = CodecPool.GetCompressor(codec); if (this.compressor != null) { this.compressor.Reset(); this.compressedOut = codec.CreateOutputStream(checksumOut, compressor); this.@out = new FSDataOutputStream(this.compressedOut, null); this.compressOutput = true; } else { Log.Warn("Could not obtain compressor from CodecPool"); this.@out = new FSDataOutputStream(checksumOut, null); } } else { this.@out = new FSDataOutputStream(checksumOut, null); } this.keyClass = keyClass; this.valueClass = valueClass; if (keyClass != null) { SerializationFactory serializationFactory = new SerializationFactory(conf); this.keySerializer = serializationFactory.GetSerializer(keyClass); this.keySerializer.Open(buffer); this.valueSerializer = serializationFactory.GetSerializer(valueClass); this.valueSerializer.Open(buffer); } this.ownOutputStream = ownOutputStream; }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.Exception"/> public override RecordWriter <K, V> GetRecordWriter(TaskAttemptContext job) { Configuration conf = job.GetConfiguration(); bool isCompressed = GetCompressOutput(job); string keyValueSeparator = conf.Get(Seperator, "\t"); CompressionCodec codec = null; string extension = string.Empty; if (isCompressed) { Type codecClass = GetOutputCompressorClass(job, typeof(GzipCodec)); codec = (CompressionCodec)ReflectionUtils.NewInstance(codecClass, conf); extension = codec.GetDefaultExtension(); } Path file = GetDefaultWorkFile(job, extension); FileSystem fs = file.GetFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.Create(file, false); return(new TextOutputFormat.LineRecordWriter <K, V>(fileOut, keyValueSeparator)); } else { FSDataOutputStream fileOut = fs.Create(file, false); return(new TextOutputFormat.LineRecordWriter <K, V>(new DataOutputStream(codec.CreateOutputStream (fileOut)), keyValueSeparator)); } }
public virtual void TestSplitableCodecs() { Job job = Job.GetInstance(defaultConf); Configuration conf = job.GetConfiguration(); // Create the codec CompressionCodec codec = null; try { codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec" ), conf); } catch (TypeLoadException) { throw new IOException("Illegal codec!"); } Path file = new Path(workDir, "test" + codec.GetDefaultExtension()); int seed = new Random().Next(); Log.Info("seed = " + seed); Random random = new Random(seed); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(job, workDir); int MaxLength = 500000; FileInputFormat.SetMaxInputSplitSize(job, MaxLength / 20); // for a variety of lengths for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 4) + 1) { Log.Info("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create (file))); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i * 2)); writer.Write("\t"); writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); NUnit.Framework.Assert.IsTrue("KVTIF claims not splittable", format.IsSplitable(job , file)); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 2000) + 1; Log.Info("splitting: requesting = " + numSplits); IList <InputSplit> splits = format.GetSplits(job); Log.Info("splitting: got = " + splits.Count); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Count; j++) { Log.Debug("split[" + j + "]= " + splits[j]); TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job .GetConfiguration()); RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context); Type clazz = reader.GetType(); MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null, MapReduceTestUtil.CreateDummyReporter(), splits[j]); reader.Initialize(splits[j], mcontext); Text key = null; Text value = null; try { int count = 0; while (reader.NextKeyValue()) { key = reader.GetCurrentKey(); value = reader.GetCurrentValue(); int k = System.Convert.ToInt32(key.ToString()); int v = System.Convert.ToInt32(value.ToString()); NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2); NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v); Log.Debug("read " + k + "," + v); NUnit.Framework.Assert.IsFalse(k + "," + v + " in multiple partitions.", bits.Get (v)); bits.Set(v); count++; } if (count > 0) { Log.Info("splits[" + j + "]=" + splits[j] + " count=" + count); } else { Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count); } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
/// <exception cref="System.IO.IOException"/> public virtual void TestSplitableCodecs() { JobConf conf = new JobConf(defaultConf); int seed = new Random().Next(); // Create the codec CompressionCodec codec = null; try { codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec" ), conf); } catch (TypeLoadException) { throw new IOException("Illegal codec!"); } Path file = new Path(workDir, "test" + codec.GetDefaultExtension()); // A reporter that does nothing Reporter reporter = Reporter.Null; Log.Info("seed = " + seed); Random random = new Random(seed); FileSystem localFs = FileSystem.GetLocal(conf); localFs.Delete(workDir, true); FileInputFormat.SetInputPaths(conf, workDir); int MaxLength = 500000; // for a variety of lengths for (int length = MaxLength / 2; length < MaxLength; length += random.Next(MaxLength / 4) + 1) { Log.Info("creating; entries = " + length); // create a file with length entries TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create (file))); try { for (int i = 0; i < length; i++) { writer.Write(Sharpen.Extensions.ToString(i)); writer.Write("\n"); } } finally { writer.Close(); } // try splitting the file in a variety of sizes TextInputFormat format = new TextInputFormat(); format.Configure(conf); LongWritable key = new LongWritable(); Text value = new Text(); for (int i_1 = 0; i_1 < 3; i_1++) { int numSplits = random.Next(MaxLength / 2000) + 1; Log.Info("splitting: requesting = " + numSplits); InputSplit[] splits = format.GetSplits(conf, numSplits); Log.Info("splitting: got = " + splits.Length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.Length; j++) { Log.Debug("split[" + j + "]= " + splits[j]); RecordReader <LongWritable, Text> reader = format.GetRecordReader(splits[j], conf, reporter); try { int counter = 0; while (reader.Next(key, value)) { int v = System.Convert.ToInt32(value.ToString()); Log.Debug("read " + v); if (bits.Get(v)) { Log.Warn("conflict with " + v + " in split " + j + " at position " + reader.GetPos ()); } NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v)); bits.Set(v); counter++; } if (counter > 0) { Log.Info("splits[" + j + "]=" + splits[j] + " count=" + counter); } else { Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + counter); } } finally { reader.Close(); } } NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality ()); } } }
/// <exception cref="System.IO.IOException"/> public override RecordWriter <K, V> GetRecordWriter(FileSystem ignored, JobConf job , string name, Progressable progress) { bool isCompressed = GetCompressOutput(job); string keyValueSeparator = job.Get("mapreduce.output.textoutputformat.separator", "\t"); if (!isCompressed) { Path file = FileOutputFormat.GetTaskOutputPath(job, name); FileSystem fs = file.GetFileSystem(job); FSDataOutputStream fileOut = fs.Create(file, progress); return(new TextOutputFormat.LineRecordWriter <K, V>(fileOut, keyValueSeparator)); } else { Type codecClass = GetOutputCompressorClass(job, typeof(GzipCodec)); // create the named codec CompressionCodec codec = ReflectionUtils.NewInstance(codecClass, job); // build the filename including the extension Path file = FileOutputFormat.GetTaskOutputPath(job, name + codec.GetDefaultExtension ()); FileSystem fs = file.GetFileSystem(job); FSDataOutputStream fileOut = fs.Create(file, progress); return(new TextOutputFormat.LineRecordWriter <K, V>(new DataOutputStream(codec.CreateOutputStream (fileOut)), keyValueSeparator)); } }