Example #1
0
        // Create a file containing fixed length records with random data
        /// <exception cref="System.IO.IOException"/>
        private AList <string> CreateFile(Path targetFile, CompressionCodec codec, int recordLen
                                          , int numRecords)
        {
            AList <string> recordList = new AList <string>(numRecords);
            OutputStream   ostream    = localFs.Create(targetFile);

            if (codec != null)
            {
                ostream = codec.CreateOutputStream(ostream);
            }
            TextWriter writer = new OutputStreamWriter(ostream);

            try
            {
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < numRecords; i++)
                {
                    for (int j = 0; j < recordLen; j++)
                    {
                        sb.Append(chars[charRand.Next(chars.Length)]);
                    }
                    string recordData = sb.ToString();
                    recordList.AddItem(recordData);
                    writer.Write(recordData);
                    sb.Length = 0;
                }
            }
            finally
            {
                writer.Close();
            }
            return(recordList);
        }
            /// <exception cref="System.IO.IOException"/>
            private void SaveInternal(FileOutputStream fout, FSImageCompression compression,
                                      string filePath)
            {
                StartupProgress prog     = NameNode.GetStartupProgress();
                MessageDigest   digester = MD5Hash.GetDigester();

                underlyingOutputStream = new DigestOutputStream(new BufferedOutputStream(fout), digester
                                                                );
                underlyingOutputStream.Write(FSImageUtil.MagicHeader);
                fileChannel = fout.GetChannel();
                FsImageProto.FileSummary.Builder b = FsImageProto.FileSummary.NewBuilder().SetOndiskVersion
                                                         (FSImageUtil.FileVersion).SetLayoutVersion(NameNodeLayoutVersion.CurrentLayoutVersion
                                                                                                    );
                codec = compression.GetImageCodec();
                if (codec != null)
                {
                    b.SetCodec(codec.GetType().GetCanonicalName());
                    sectionOutputStream = codec.CreateOutputStream(underlyingOutputStream);
                }
                else
                {
                    sectionOutputStream = underlyingOutputStream;
                }
                SaveNameSystemSection(b);
                // Check for cancellation right after serializing the name system section.
                // Some unit tests, such as TestSaveNamespace#testCancelSaveNameSpace
                // depends on this behavior.
                context.CheckCancelled();
                Step step = new Step(StepType.Inodes, filePath);

                prog.BeginStep(Phase.SavingCheckpoint, step);
                SaveInodes(b);
                SaveSnapshots(b);
                prog.EndStep(Phase.SavingCheckpoint, step);
                step = new Step(StepType.DelegationTokens, filePath);
                prog.BeginStep(Phase.SavingCheckpoint, step);
                SaveSecretManagerSection(b);
                prog.EndStep(Phase.SavingCheckpoint, step);
                step = new Step(StepType.CachePools, filePath);
                prog.BeginStep(Phase.SavingCheckpoint, step);
                SaveCacheManagerSection(b);
                prog.EndStep(Phase.SavingCheckpoint, step);
                SaveStringTableSection(b);
                // We use the underlyingOutputStream to write the header. Therefore flush
                // the buffered stream (which is potentially compressed) first.
                FlushSectionOutputStream();
                FsImageProto.FileSummary summary = ((FsImageProto.FileSummary)b.Build());
                SaveFileSummary(underlyingOutputStream, summary);
                underlyingOutputStream.Close();
                savedDigest = new MD5Hash(digester.Digest());
            }
Example #3
0
        /// <exception cref="System.IO.IOException"/>
        private static void WriteFile(FileSystem fs, Path name, CompressionCodec codec, string
                                      contents)
        {
            OutputStream stm;

            if (codec == null)
            {
                stm = fs.Create(name);
            }
            else
            {
                stm = codec.CreateOutputStream(fs.Create(name));
            }
            stm.Write(Sharpen.Runtime.GetBytesForString(contents));
            stm.Close();
        }
        /// <summary>
        /// Write out a header to the given stream that indicates the chosen
        /// compression codec, and return the same stream wrapped with that codec.
        /// </summary>
        /// <remarks>
        /// Write out a header to the given stream that indicates the chosen
        /// compression codec, and return the same stream wrapped with that codec.
        /// If no codec is specified, simply adds buffering to the stream, so that
        /// the returned stream is always buffered.
        /// </remarks>
        /// <param name="os">
        /// The stream to write header to and wrap. This stream should
        /// be unbuffered.
        /// </param>
        /// <returns>
        /// A stream wrapped with the specified compressor, or buffering
        /// if compression is not enabled.
        /// </returns>
        /// <exception cref="System.IO.IOException">
        /// if an IO error occurs or the compressor cannot be
        /// instantiated
        /// </exception>
        internal virtual DataOutputStream WriteHeaderAndWrapStream(OutputStream os)
        {
            DataOutputStream dos = new DataOutputStream(os);

            dos.WriteBoolean(imageCodec != null);
            if (imageCodec != null)
            {
                string codecClassName = imageCodec.GetType().GetCanonicalName();
                Text.WriteString(dos, codecClassName);
                return(new DataOutputStream(imageCodec.CreateOutputStream(os)));
            }
            else
            {
                // use a buffered output stream
                return(new DataOutputStream(new BufferedOutputStream(os)));
            }
        }
            /// <exception cref="System.IO.IOException"/>
            public void CommitSection(FsImageProto.FileSummary.Builder summary, FSImageFormatProtobuf.SectionName
                                      name)
            {
                long oldOffset = currentOffset;

                FlushSectionOutputStream();
                if (codec != null)
                {
                    sectionOutputStream = codec.CreateOutputStream(underlyingOutputStream);
                }
                else
                {
                    sectionOutputStream = underlyingOutputStream;
                }
                long length = fileChannel.Position() - oldOffset;

                summary.AddSections(FsImageProto.FileSummary.Section.NewBuilder().SetName(name.name
                                                                                          ).SetLength(length).SetOffset(currentOffset));
                currentOffset += length;
            }
Example #6
0
 /// <exception cref="System.IO.IOException"/>
 public Writer(Configuration conf, FSDataOutputStream @out, Type keyClass, Type valueClass
               , CompressionCodec codec, Counters.Counter writesCounter, bool ownOutputStream)
 {
     this.writtenRecordsCounter = writesCounter;
     this.checksumOut           = new IFileOutputStream(@out);
     this.rawOut = @out;
     this.start  = this.rawOut.GetPos();
     if (codec != null)
     {
         this.compressor = CodecPool.GetCompressor(codec);
         if (this.compressor != null)
         {
             this.compressor.Reset();
             this.compressedOut  = codec.CreateOutputStream(checksumOut, compressor);
             this.@out           = new FSDataOutputStream(this.compressedOut, null);
             this.compressOutput = true;
         }
         else
         {
             Log.Warn("Could not obtain compressor from CodecPool");
             this.@out = new FSDataOutputStream(checksumOut, null);
         }
     }
     else
     {
         this.@out = new FSDataOutputStream(checksumOut, null);
     }
     this.keyClass   = keyClass;
     this.valueClass = valueClass;
     if (keyClass != null)
     {
         SerializationFactory serializationFactory = new SerializationFactory(conf);
         this.keySerializer = serializationFactory.GetSerializer(keyClass);
         this.keySerializer.Open(buffer);
         this.valueSerializer = serializationFactory.GetSerializer(valueClass);
         this.valueSerializer.Open(buffer);
     }
     this.ownOutputStream = ownOutputStream;
 }
Example #7
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.Exception"/>
        public override RecordWriter <K, V> GetRecordWriter(TaskAttemptContext job)
        {
            Configuration    conf              = job.GetConfiguration();
            bool             isCompressed      = GetCompressOutput(job);
            string           keyValueSeparator = conf.Get(Seperator, "\t");
            CompressionCodec codec             = null;
            string           extension         = string.Empty;

            if (isCompressed)
            {
                Type codecClass = GetOutputCompressorClass(job, typeof(GzipCodec));
                codec     = (CompressionCodec)ReflectionUtils.NewInstance(codecClass, conf);
                extension = codec.GetDefaultExtension();
            }
            Path       file = GetDefaultWorkFile(job, extension);
            FileSystem fs   = file.GetFileSystem(conf);

            if (!isCompressed)
            {
                FSDataOutputStream fileOut = fs.Create(file, false);
                return(new TextOutputFormat.LineRecordWriter <K, V>(fileOut, keyValueSeparator));
            }
            else
            {
                FSDataOutputStream fileOut = fs.Create(file, false);
                return(new TextOutputFormat.LineRecordWriter <K, V>(new DataOutputStream(codec.CreateOutputStream
                                                                                             (fileOut)), keyValueSeparator));
            }
        }
Example #8
0
        public virtual void TestSplitableCodecs()
        {
            Job           job  = Job.GetInstance(defaultConf);
            Configuration conf = job.GetConfiguration();
            // Create the codec
            CompressionCodec codec = null;

            try
            {
                codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec"
                                                                                          ), conf);
            }
            catch (TypeLoadException)
            {
                throw new IOException("Illegal codec!");
            }
            Path file = new Path(workDir, "test" + codec.GetDefaultExtension());
            int  seed = new Random().Next();

            Log.Info("seed = " + seed);
            Random random = new Random(seed);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(job, workDir);
            int MaxLength = 500000;

            FileInputFormat.SetMaxInputSplitSize(job, MaxLength / 20);
            // for a variety of lengths
            for (int length = 0; length < MaxLength; length += random.Next(MaxLength / 4) + 1)
            {
                Log.Info("creating; entries = " + length);
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create
                                                                                        (file)));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i * 2));
                        writer.Write("\t");
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                KeyValueTextInputFormat format = new KeyValueTextInputFormat();
                NUnit.Framework.Assert.IsTrue("KVTIF claims not splittable", format.IsSplitable(job
                                                                                                , file));
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / 2000) + 1;
                    Log.Info("splitting: requesting = " + numSplits);
                    IList <InputSplit> splits = format.GetSplits(job);
                    Log.Info("splitting: got =        " + splits.Count);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Count; j++)
                    {
                        Log.Debug("split[" + j + "]= " + splits[j]);
                        TaskAttemptContext context = MapReduceTestUtil.CreateDummyMapTaskAttemptContext(job
                                                                                                        .GetConfiguration());
                        RecordReader <Text, Text> reader = format.CreateRecordReader(splits[j], context);
                        Type clazz = reader.GetType();
                        MapContext <Text, Text, Text, Text> mcontext = new MapContextImpl <Text, Text, Text
                                                                                           , Text>(job.GetConfiguration(), context.GetTaskAttemptID(), reader, null, null,
                                                                                                   MapReduceTestUtil.CreateDummyReporter(), splits[j]);
                        reader.Initialize(splits[j], mcontext);
                        Text key   = null;
                        Text value = null;
                        try
                        {
                            int count = 0;
                            while (reader.NextKeyValue())
                            {
                                key   = reader.GetCurrentKey();
                                value = reader.GetCurrentValue();
                                int k = System.Convert.ToInt32(key.ToString());
                                int v = System.Convert.ToInt32(value.ToString());
                                NUnit.Framework.Assert.AreEqual("Bad key", 0, k % 2);
                                NUnit.Framework.Assert.AreEqual("Mismatched key/value", k / 2, v);
                                Log.Debug("read " + k + "," + v);
                                NUnit.Framework.Assert.IsFalse(k + "," + v + " in multiple partitions.", bits.Get
                                                                   (v));
                                bits.Set(v);
                                count++;
                            }
                            if (count > 0)
                            {
                                Log.Info("splits[" + j + "]=" + splits[j] + " count=" + count);
                            }
                            else
                            {
                                Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + count);
                            }
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
        /// <exception cref="System.IO.IOException"/>
        public virtual void TestSplitableCodecs()
        {
            JobConf conf = new JobConf(defaultConf);
            int     seed = new Random().Next();
            // Create the codec
            CompressionCodec codec = null;

            try
            {
                codec = (CompressionCodec)ReflectionUtils.NewInstance(conf.GetClassByName("org.apache.hadoop.io.compress.BZip2Codec"
                                                                                          ), conf);
            }
            catch (TypeLoadException)
            {
                throw new IOException("Illegal codec!");
            }
            Path file = new Path(workDir, "test" + codec.GetDefaultExtension());
            // A reporter that does nothing
            Reporter reporter = Reporter.Null;

            Log.Info("seed = " + seed);
            Random     random  = new Random(seed);
            FileSystem localFs = FileSystem.GetLocal(conf);

            localFs.Delete(workDir, true);
            FileInputFormat.SetInputPaths(conf, workDir);
            int MaxLength = 500000;

            // for a variety of lengths
            for (int length = MaxLength / 2; length < MaxLength; length += random.Next(MaxLength
                                                                                       / 4) + 1)
            {
                Log.Info("creating; entries = " + length);
                // create a file with length entries
                TextWriter writer = new OutputStreamWriter(codec.CreateOutputStream(localFs.Create
                                                                                        (file)));
                try
                {
                    for (int i = 0; i < length; i++)
                    {
                        writer.Write(Sharpen.Extensions.ToString(i));
                        writer.Write("\n");
                    }
                }
                finally
                {
                    writer.Close();
                }
                // try splitting the file in a variety of sizes
                TextInputFormat format = new TextInputFormat();
                format.Configure(conf);
                LongWritable key   = new LongWritable();
                Text         value = new Text();
                for (int i_1 = 0; i_1 < 3; i_1++)
                {
                    int numSplits = random.Next(MaxLength / 2000) + 1;
                    Log.Info("splitting: requesting = " + numSplits);
                    InputSplit[] splits = format.GetSplits(conf, numSplits);
                    Log.Info("splitting: got =        " + splits.Length);
                    // check each split
                    BitSet bits = new BitSet(length);
                    for (int j = 0; j < splits.Length; j++)
                    {
                        Log.Debug("split[" + j + "]= " + splits[j]);
                        RecordReader <LongWritable, Text> reader = format.GetRecordReader(splits[j], conf,
                                                                                          reporter);
                        try
                        {
                            int counter = 0;
                            while (reader.Next(key, value))
                            {
                                int v = System.Convert.ToInt32(value.ToString());
                                Log.Debug("read " + v);
                                if (bits.Get(v))
                                {
                                    Log.Warn("conflict with " + v + " in split " + j + " at position " + reader.GetPos
                                                 ());
                                }
                                NUnit.Framework.Assert.IsFalse("Key in multiple partitions.", bits.Get(v));
                                bits.Set(v);
                                counter++;
                            }
                            if (counter > 0)
                            {
                                Log.Info("splits[" + j + "]=" + splits[j] + " count=" + counter);
                            }
                            else
                            {
                                Log.Debug("splits[" + j + "]=" + splits[j] + " count=" + counter);
                            }
                        }
                        finally
                        {
                            reader.Close();
                        }
                    }
                    NUnit.Framework.Assert.AreEqual("Some keys in no partition.", length, bits.Cardinality
                                                        ());
                }
            }
        }
Example #10
0
        /// <exception cref="System.IO.IOException"/>
        public override RecordWriter <K, V> GetRecordWriter(FileSystem ignored, JobConf job
                                                            , string name, Progressable progress)
        {
            bool   isCompressed      = GetCompressOutput(job);
            string keyValueSeparator = job.Get("mapreduce.output.textoutputformat.separator",
                                               "\t");

            if (!isCompressed)
            {
                Path               file    = FileOutputFormat.GetTaskOutputPath(job, name);
                FileSystem         fs      = file.GetFileSystem(job);
                FSDataOutputStream fileOut = fs.Create(file, progress);
                return(new TextOutputFormat.LineRecordWriter <K, V>(fileOut, keyValueSeparator));
            }
            else
            {
                Type codecClass = GetOutputCompressorClass(job, typeof(GzipCodec));
                // create the named codec
                CompressionCodec codec = ReflectionUtils.NewInstance(codecClass, job);
                // build the filename including the extension
                Path file = FileOutputFormat.GetTaskOutputPath(job, name + codec.GetDefaultExtension
                                                                   ());
                FileSystem         fs      = file.GetFileSystem(job);
                FSDataOutputStream fileOut = fs.Create(file, progress);
                return(new TextOutputFormat.LineRecordWriter <K, V>(new DataOutputStream(codec.CreateOutputStream
                                                                                             (fileOut)), keyValueSeparator));
            }
        }